├── .gitignore ├── README.md ├── adr.gif ├── common ├── __init__.py ├── agents │ ├── __init__.py │ ├── ddpg │ │ ├── __init__.py │ │ ├── ddpg.py │ │ └── replay_buffer.py │ ├── ddpg_actor.py │ └── svpg_simulator_agent.py ├── discriminator │ └── discriminator_rewarder.py ├── envs │ ├── __init__.py │ ├── assets │ │ ├── LICENSE.md │ │ ├── __init__.py │ │ ├── fetch │ │ │ ├── reach.xml │ │ │ ├── robot.xml │ │ │ └── shared.xml │ │ ├── pusher_3dof.xml │ │ └── stls │ │ │ └── fetch │ │ │ ├── base_link_collision.stl │ │ │ ├── bellows_link_collision.stl │ │ │ ├── elbow_flex_link_collision.stl │ │ │ ├── estop_link.stl │ │ │ ├── forearm_roll_link_collision.stl │ │ │ ├── gripper_link.stl │ │ │ ├── head_pan_link_collision.stl │ │ │ ├── head_tilt_link_collision.stl │ │ │ ├── l_wheel_link_collision.stl │ │ │ ├── laser_link.stl │ │ │ ├── r_wheel_link_collision.stl │ │ │ ├── shoulder_lift_link_collision.stl │ │ │ ├── shoulder_pan_link_collision.stl │ │ │ ├── torso_fixed_link.stl │ │ │ ├── torso_lift_link_collision.stl │ │ │ ├── upperarm_roll_link_collision.stl │ │ │ ├── wrist_flex_link_collision.stl │ │ │ └── wrist_roll_link_collision.stl │ ├── config │ │ ├── ErgoReacherRandomized │ │ │ ├── default-4dof.json │ │ │ ├── default-6dof.json │ │ │ ├── easy-4dof.json │ │ │ ├── fulldr-4dof.json │ │ │ ├── fulldr-6dof.json │ │ │ └── hard-4dof.json │ │ ├── ErgoReacherRandomizedBacklash │ │ │ ├── default-4dof.json │ │ │ ├── fulldr-4dof.json │ │ │ ├── fulldr-easy.json │ │ │ └── fulldr-hard.json │ │ ├── HalfCheetahRandomized │ │ │ └── default.json │ │ ├── HumanoidRandomized │ │ │ └── default.json │ │ ├── LunarLanderRandomized │ │ │ ├── 10.json │ │ │ ├── 16.json │ │ │ ├── debug.json │ │ │ ├── default.json │ │ │ ├── random2D_820.json │ │ │ ├── random_1720.json │ │ │ ├── random_620.json │ │ │ ├── random_811.json │ │ │ ├── random_812.json │ │ │ ├── random_813.json │ │ │ └── random_820.json │ │ ├── Pusher3DOFGeneralization │ │ │ ├── 00.json │ │ │ ├── 01.json │ │ │ ├── 02.json │ │ │ ├── 10.json │ │ │ ├── 11.json │ │ │ ├── 12.json │ │ │ ├── 20.json │ │ │ ├── 21.json │ │ │ └── 22.json │ │ ├── Pusher3DOFRandomized │ │ │ ├── default.json │ │ │ ├── fulldr-easy.json │ │ │ ├── fulldr-toohard.json │ │ │ ├── fulldr.json │ │ │ └── hard.json │ │ └── __init__.py │ ├── dimension.py │ ├── ergoreacher.py │ ├── ergoreacherbacklash.py │ ├── fetch.py │ ├── half_cheetah.py │ ├── humanoid.py │ ├── lunar_lander.py │ ├── pusher.py │ ├── pusher3dof.py │ ├── randomized_locomotion.py │ ├── randomized_vecenv.py │ └── wrappers.py ├── models │ ├── __init__.py │ ├── actor_critic.py │ └── discriminator.py ├── svpg │ ├── __init__.py │ ├── particles │ │ ├── __init__.py │ │ ├── distributions.py │ │ ├── svpg_particle.py │ │ └── utils.py │ ├── svpg.py │ └── svpg_utils.py └── utils │ ├── __init__.py │ ├── logging.py │ ├── plot_utils.py │ ├── policy_evaluator.py │ ├── recorder.py │ ├── rollout_evaluation.py │ ├── sim_agent_helper.py │ └── visualization.py ├── experiments ├── __init__.py └── domainrand │ ├── __init__.py │ ├── args.py │ ├── batch_reward_analysis.py │ ├── experiment_driver.py │ └── pusher_grid_generalization.py ├── real_robot.py ├── real_robot_torquesweep.py ├── scripts ├── README.md ├── docopts ├── docopts.sh ├── envs │ ├── bluewire │ │ └── manfred.sh │ ├── slurm │ │ ├── bhairav.sh │ │ └── manfred.sh │ └── uberduck │ │ └── bhairav.sh ├── experiments │ ├── lunar_lander.sh │ └── pusher_3dof.sh ├── launch.py ├── real-robot-read-dataset.py └── run.sh ├── setup.py ├── slurm.sh └── tests ├── 00-test-vecenv.py ├── 01-test-svpg-vectorized.py ├── 02-test-svpg-policy-rollout-vectorized.py ├── 03-test-vanilla-fetchreach.py ├── 04-test-randomized-mujoco-api.py ├── 05-test-randomized-mujoco-viz.py ├── 06-test-randomized-ergoreach.py ├── 07-test-mujoco-3dof-keyboard-control.py ├── 08-test-mujoco-4dof-keyboard-control.py ├── 09-test-mujoco-3dof-auto.py ├── 10-test-mujoco-3dof-ranges.py ├── 11-test-randomized-ergoreach-halfdisk.py ├── 12-test-randomized-ergoreach-backlash-halfdisk.py ├── 13-test-randomized-humanoid.py ├── 14-test-randomized-halfcheetah.py └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Python template 3 | 4 | # Custom 5 | sftp-config* 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | *.egg-info/ 30 | .installed.cfg 31 | *.egg 32 | MANIFEST 33 | 34 | # PyInstaller 35 | # Usually these files are written by a python script from a template 36 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 37 | *.manifest 38 | *.spec 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | 44 | # Unit test / coverage reports 45 | htmlcov/ 46 | .tox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | 56 | # Translations 57 | *.mo 58 | *.pot 59 | 60 | # Django stuff: 61 | local_settings.py 62 | db.sqlite3 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # pyenv 81 | .python-version 82 | 83 | # celery beat schedule file 84 | celerybeat-schedule 85 | 86 | # SageMath parsed files 87 | *.sage.py 88 | 89 | # Environments 90 | .env 91 | .venv 92 | env/ 93 | venv/ 94 | ENV/ 95 | env.bak/ 96 | venv.bak/ 97 | 98 | # Spyder project settings 99 | .spyderproject 100 | .spyproject 101 | 102 | # Rope project settings 103 | .ropeproject 104 | 105 | # mkdocs documentation 106 | /site 107 | 108 | # mypy 109 | .mypy_cache/ 110 | .idea/ 111 | diffsim.egg-info/ 112 | results/real-robot/*.hdf5 113 | results/real-robot/*.hdf5.xz 114 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Active Domain Randomization 2 | 3 | [Active Domain Randomization](https://arxiv.org/abs/1904.04762) (ADR) is a new method for improved, zero-shot transfer of robotic reinforcement learning policies. Building upon traditional domain randomization, which uniformly samples the _randomization space_, we show that replacing this with an active search for difficult MDP instances improves generalization and robustness in the resulting policies. 4 | 5 |

6 |
7 |

8 | 9 | Below is our code to reproduce the experiments in the paper. Please check out our [Domain Randomizer](https://github.com/montrealrobotics/domain-randomizer) repository if you're interested in a easy way to do domain randomization in parallel. 10 | 11 | ## Experiments 12 | 13 | ### Baseline Experiments 14 | 15 | #### Pure Baseline 16 | 17 | The most important flag here is `--initial-svpg-steps=1e6`, which will make sure that only randomized environments are proposed until that step limit is reached (it never will be). The environment names (`randomized-env-id`) handle the range of randomization - `LunarLanderDefault-v0` has a single-valued range, so a `randomize()` call will always result in the same, default environment. Likewise, `LunarLanderRandomized-v0` has the full randomization range (in one dimension). 18 | 19 | On the command line, specify an experiment type from `[lunar|pusher|ergo]` to get defaults for that experiment. You can find a detailed list of command line arguments in `experiments/args.py`. 20 | 21 | ``` 22 | python -m experiments.domainrand.experiment_driver [lunar|pusher|ergo] \ 23 | --experiment-name=unfreeze-policy --freeze-discriminator \ 24 | --experiment-prefix="true-baseline" --agent-name=baseline --initial-svpg-steps=1e6 \ 25 | --continuous-svpg --freeze-svpg --seed={SEED} 26 | ``` 27 | 28 | #### Uniform Domain Randomization 29 | 30 | ``` 31 | python -m experiments.domainrand.experiment_driver [lunar|pusher|ergo] \ 32 | --experiment-name=unfreeze-policy --randomized-eval-env-id="[corresponding env ID]" \ 33 | --experiment-prefix="fulldr" --agent-name=fulldr --initial-svpg-steps=1e6 \ 34 | --continuous-svpg --freeze-svpg --seed={SEED} 35 | ``` 36 | 37 | ### Active Domain Randomization 38 | 39 | ``` 40 | python -m experiments.domainrand.experiment_driver [lunar|pusher|ergo] \ 41 | --experiment-name=unfreeze-policy --load-discriminator --randomized-eval-env-id="[corresponding env ID]" \ 42 | --freeze-discriminator --experiment-prefix="ours-agent-scratch" --seed={SEED} 43 | ``` 44 | 45 | ## Reference 46 | 47 | ``` 48 | @article{mehta2019adr, 49 | title={Active Domain Randomization}, 50 | author={Mehta, Bhairav and Diaz, Manfred and Golemo, Florian and Pal, Christopher and Paull, Liam}, 51 | url={https://arxiv.org/abs/1904.04762}, 52 | year={2019} 53 | } 54 | ``` 55 | 56 | Built by [@bhairavmehta95](https://bhairavmehta95.github.io), [@takeitallsource](https://github.com/takeitallsource), and [@fgolemo](https://github.com/fgolemo). 57 | -------------------------------------------------------------------------------- /adr.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/adr.gif -------------------------------------------------------------------------------- /common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/__init__.py -------------------------------------------------------------------------------- /common/agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/agents/__init__.py -------------------------------------------------------------------------------- /common/agents/ddpg/__init__.py: -------------------------------------------------------------------------------- 1 | from .ddpg import Actor, Critic, DDPG -------------------------------------------------------------------------------- /common/agents/ddpg/ddpg.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | from common.models.actor_critic import Actor, Critic 7 | 8 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 9 | 10 | 11 | class DDPG(object): 12 | def __init__(self, state_dim, action_dim, agent_name='baseline', max_action=1.): 13 | self.actor = Actor(state_dim, action_dim, max_action).to(device) 14 | self.actor_target = Actor(state_dim, action_dim, max_action).to(device) 15 | self.actor_target.load_state_dict(self.actor.state_dict()) 16 | self.actor_optimizer = torch.optim.Adam(self.actor.parameters()) 17 | 18 | self.critic = Critic(state_dim, action_dim).to(device) 19 | self.critic_target = Critic(state_dim, action_dim).to(device) 20 | self.critic_target.load_state_dict(self.critic.state_dict()) 21 | self.critic_optimizer = torch.optim.Adam(self.critic.parameters()) 22 | 23 | self.max_action = max_action 24 | 25 | self.agent_name = agent_name 26 | 27 | def select_action(self, state): 28 | state = torch.FloatTensor(state).to(device) 29 | return self.actor(state).cpu().data.numpy() 30 | 31 | def train(self, replay_buffer, iterations, batch_size=100, discount=0.99, tau=0.005): 32 | for it in range(iterations): 33 | # Sample replay buffer 34 | x, y, u, r, d = replay_buffer.sample(batch_size) 35 | state = torch.FloatTensor(x).to(device) 36 | action = torch.FloatTensor(u).to(device) 37 | next_state = torch.FloatTensor(y).to(device) 38 | done = torch.FloatTensor(1 - d).to(device) 39 | reward = torch.FloatTensor(r).to(device) 40 | 41 | # Compute the target Q value 42 | target_Q = self.critic_target(next_state, self.actor_target(next_state)) 43 | target_Q = reward + (done * discount * target_Q).detach() 44 | 45 | # Get current Q estimate 46 | current_Q = self.critic(state, action) 47 | 48 | # Compute critic loss 49 | critic_loss = F.mse_loss(current_Q, target_Q) 50 | 51 | # Optimize the critic 52 | self.critic_optimizer.zero_grad() 53 | critic_loss.backward() 54 | self.critic_optimizer.step() 55 | 56 | # Compute actor loss 57 | actor_loss = -self.critic(state, self.actor(state)).mean() 58 | 59 | # Optimize the actor 60 | self.actor_optimizer.zero_grad() 61 | actor_loss.backward() 62 | self.actor_optimizer.step() 63 | 64 | # Update the frozen target models 65 | for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()): 66 | target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data) 67 | 68 | for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()): 69 | target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data) 70 | 71 | def save(self, filename, directory): 72 | torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, filename)) 73 | torch.save(self.critic.state_dict(), '%s/%s_critic.pth' % (directory, filename)) 74 | 75 | def load(self, filename, directory): 76 | self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, filename), map_location=device)) 77 | self.critic.load_state_dict(torch.load('%s/%s_critic.pth' % (directory, filename), map_location=device)) 78 | 79 | # To ensure backwards compatibility D: 80 | def load_model(self): 81 | cur_dir = os.getcwd() 82 | actor_path = 'common/agents/ddpg/saved_model/{}_{}.pth'.format(self.agent_name, 'actor') 83 | critic_path = 'common/agents/ddpg/saved_model/{}_{}.pth'.format(self.agent_name, 'critic') 84 | 85 | self.actor.load_state_dict(torch.load(os.path.join(cur_dir, actor_path), map_location=device)) 86 | self.critic.load_state_dict(torch.load(os.path.join(cur_dir, critic_path), map_location=device)) 87 | -------------------------------------------------------------------------------- /common/agents/ddpg/replay_buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | # Code based on: 4 | # https://github.com/openai/baselines/blob/master/baselines/deepq/replay_buffer.py 5 | 6 | 7 | # Simple replay buffer 8 | class ReplayBuffer(object): 9 | def __init__(self, max_size=1e6): 10 | self.storage = [] 11 | self.max_size = int(max_size) 12 | self.next_idx = 0 13 | 14 | # Expects tuples of (state, next_state, action, reward, done) 15 | def add(self, data): 16 | if self.next_idx >= len(self.storage): 17 | self.storage.append(data) 18 | else: 19 | self.storage[self.next_idx] = data 20 | 21 | self.next_idx = (self.next_idx + 1) % self.max_size 22 | 23 | def sample(self, batch_size=100): 24 | ind = np.random.randint(0, len(self.storage), size=batch_size) 25 | x, y, u, r, d = [], [], [], [], [] 26 | 27 | for i in ind: 28 | X, Y, U, R, D = self.storage[i] 29 | x.append(np.array(X, copy=False)) 30 | y.append(np.array(Y, copy=False)) 31 | u.append(np.array(U, copy=False)) 32 | r.append(np.array(R, copy=False)) 33 | d.append(np.array(D, copy=False)) 34 | 35 | return np.array(x), np.array(y), np.array(u), np.array(r).reshape(-1, 1), np.array(d).reshape(-1, 1) -------------------------------------------------------------------------------- /common/agents/ddpg_actor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import logging 4 | from common.agents.ddpg import Actor 5 | 6 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 7 | 8 | 9 | class DDPGActor(object): 10 | def __init__(self, state_dim, action_dim, max_action=1, agent_name="baseline", load_agent=True, model_path=None): 11 | self.actor = Actor(state_dim, action_dim, max_action).to(device) 12 | self.agent_name = agent_name 13 | self.model_path = model_path 14 | 15 | if load_agent: 16 | self._load() 17 | 18 | def select_action(self, state): 19 | state = torch.FloatTensor(state).to(device) 20 | return self.actor(state).cpu().data.numpy() 21 | 22 | def _load(self): 23 | if self.model_path is not None: 24 | logging.info('Loading DDPG from: {}'.format(self.model_path)) 25 | self.actor.load_state_dict(torch.load(self.model_path, map_location=device)) 26 | else: 27 | cur_dir = os.getcwd() 28 | full_path = os.path.join(cur_dir, 'saved-models/policy/baseline_actor.pth') 29 | self.actor.load_state_dict(torch.load(full_path, map_location=device)) 30 | -------------------------------------------------------------------------------- /common/discriminator/discriminator_rewarder.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import torch 4 | import torch.nn as nn 5 | from torch.autograd import Variable 6 | 7 | from common.models.discriminator import MLPDiscriminator 8 | 9 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 10 | 11 | 12 | class DiscriminatorRewarder(object): 13 | def __init__(self, reference_env, randomized_env_id, discriminator_batchsz, reward_scale, 14 | load_discriminator, discriminator_lr=3e-3, add_pz=True): 15 | self.discriminator = MLPDiscriminator( 16 | state_dim=reference_env.observation_space.shape[0], 17 | action_dim=reference_env.action_space.shape[0]).to(device) 18 | 19 | self.discriminator_criterion = nn.BCELoss() 20 | self.discriminator_optimizer = torch.optim.Adam(self.discriminator.parameters(), lr=discriminator_lr) 21 | self.reward_scale = reward_scale 22 | self.batch_size = discriminator_batchsz 23 | self.add_pz = add_pz 24 | 25 | if load_discriminator: 26 | self._load_discriminator(randomized_env_id) 27 | 28 | def calculate_rewards(self, randomized_trajectory): 29 | """Discriminator based reward calculation 30 | We want to use the negative of the adversarial calculation (Normally, -log(D)). We want to *reward* 31 | our simulator for making it easier to discriminate between the reference env + randomized onea 32 | """ 33 | score, _, _ = self.get_score(randomized_trajectory) 34 | reward = np.log(score) 35 | 36 | if self.add_pz: 37 | reward -= np.log(0.5) 38 | 39 | return self.reward_scale * reward 40 | 41 | def get_score(self, trajectory): 42 | """Discriminator based reward calculation 43 | We want to use the negative of the adversarial calculation (Normally, -log(D)). We want to *reward* 44 | our simulator for making it easier to discriminate between the reference env + randomized onea 45 | """ 46 | traj_tensor = self._trajectory2tensor(trajectory).float() 47 | 48 | with torch.no_grad(): 49 | score = (self.discriminator(traj_tensor).cpu().detach().numpy()+1e-8) 50 | return score.mean(), np.median(score), np.sum(score) 51 | 52 | def train_discriminator(self, reference_trajectory, randomized_trajectory, iterations): 53 | """Trains discriminator to distinguish between reference and randomized state action tuples 54 | """ 55 | for _ in range(iterations): 56 | randind = np.random.randint(0, len(randomized_trajectory[0]), size=int(self.batch_size)) 57 | refind = np.random.randint(0, len(reference_trajectory[0]), size=int(self.batch_size)) 58 | 59 | randomized_batch = self._trajectory2tensor(randomized_trajectory[randind]) 60 | reference_batch = self._trajectory2tensor(reference_trajectory[refind]) 61 | 62 | g_o = self.discriminator(randomized_batch) 63 | e_o = self.discriminator(reference_batch) 64 | 65 | self.discriminator_optimizer.zero_grad() 66 | 67 | discrim_loss = self.discriminator_criterion(g_o, torch.ones((len(randomized_batch), 1), device=device)) + \ 68 | self.discriminator_criterion(e_o, torch.zeros((len(reference_batch), 1), device=device)) 69 | discrim_loss.backward() 70 | 71 | self.discriminator_optimizer.step() 72 | 73 | def _load_discriminator(self, name, path='saved-models/discriminator/discriminator_{}.pth'): 74 | self.discriminator.load_state_dict(torch.load(path.format(name), map_location=device)) 75 | 76 | def _save_discriminator(self, name, path='saved-models/discriminator/discriminator_{}.pth'): 77 | torch.save(self.discriminator.state_dict(), path.format(name)) 78 | 79 | def _trajectory2tensor(self, trajectory): 80 | return torch.from_numpy(trajectory).float().to(device) 81 | -------------------------------------------------------------------------------- /common/envs/assets/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | MODEL_PATH = os.path.abspath(os.path.dirname(__file__)) 4 | -------------------------------------------------------------------------------- /common/envs/assets/fetch/reach.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /common/envs/assets/fetch/robot.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | -------------------------------------------------------------------------------- /common/envs/assets/fetch/shared.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /common/envs/assets/pusher_3dof.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/base_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/base_link_collision.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/bellows_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/bellows_link_collision.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/elbow_flex_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/elbow_flex_link_collision.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/estop_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/estop_link.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/forearm_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/forearm_roll_link_collision.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/gripper_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/gripper_link.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/head_pan_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/head_pan_link_collision.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/head_tilt_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/head_tilt_link_collision.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/l_wheel_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/l_wheel_link_collision.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/laser_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/laser_link.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/r_wheel_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/r_wheel_link_collision.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/shoulder_lift_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/shoulder_lift_link_collision.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/shoulder_pan_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/shoulder_pan_link_collision.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/torso_fixed_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/torso_fixed_link.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/torso_lift_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/torso_lift_link_collision.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/upperarm_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/upperarm_roll_link_collision.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/wrist_flex_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/wrist_flex_link_collision.stl -------------------------------------------------------------------------------- /common/envs/assets/stls/fetch/wrist_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/wrist_roll_link_collision.stl -------------------------------------------------------------------------------- /common/envs/config/ErgoReacherRandomized/default-4dof.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "ErgoReacherRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "joint0gain", 6 | "default": 1, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.0 9 | }, 10 | { 11 | "name": "joint1gain", 12 | "default": 1, 13 | "multiplier_min": 1.0, 14 | "multiplier_max": 1.0 15 | }, 16 | { 17 | "name": "joint2gain", 18 | "default": 1, 19 | "multiplier_min": 1.0, 20 | "multiplier_max": 1.0 21 | }, 22 | { 23 | "name": "joint3gain", 24 | "default": 1, 25 | "multiplier_min": 1.0, 26 | "multiplier_max": 1.0 27 | }, 28 | { 29 | "name": "joint0maxtorque", 30 | "default": 18, 31 | "multiplier_min": 1.0, 32 | "multiplier_max": 1.0 33 | }, 34 | { 35 | "name": "joint1maxtorque", 36 | "default": 18, 37 | "multiplier_min": 1.0, 38 | "multiplier_max": 1.0 39 | }, 40 | { 41 | "name": "joint2maxtorque", 42 | "default": 18, 43 | "multiplier_min": 1.0, 44 | "multiplier_max": 1.0 45 | }, 46 | { 47 | "name": "joint3maxtorque", 48 | "default": 18, 49 | "multiplier_min": 1.0, 50 | "multiplier_max": 1.0 51 | } 52 | ] 53 | } 54 | -------------------------------------------------------------------------------- /common/envs/config/ErgoReacherRandomized/default-6dof.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "ErgoReacherRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "joint0gain", 6 | "default": 1, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.0 9 | }, 10 | { 11 | "name": "joint1gain", 12 | "default": 1, 13 | "multiplier_min": 1.0, 14 | "multiplier_max": 1.0 15 | }, 16 | { 17 | "name": "joint2gain", 18 | "default": 1, 19 | "multiplier_min": 1.0, 20 | "multiplier_max": 1.0 21 | }, 22 | { 23 | "name": "joint3gain", 24 | "default": 1, 25 | "multiplier_min": 1.0, 26 | "multiplier_max": 1.0 27 | }, 28 | { 29 | "name": "joint4gain", 30 | "default": 1, 31 | "multiplier_min": 1.0, 32 | "multiplier_max": 1.0 33 | }, 34 | { 35 | "name": "joint5gain", 36 | "default": 1, 37 | "multiplier_min": 1.0, 38 | "multiplier_max": 1.0 39 | }, 40 | { 41 | "name": "joint0maxtorque", 42 | "default": 18, 43 | "multiplier_min": 1.0, 44 | "multiplier_max": 1.0 45 | }, 46 | { 47 | "name": "joint1maxtorque", 48 | "default": 18, 49 | "multiplier_min": 1.0, 50 | "multiplier_max": 1.0 51 | }, 52 | { 53 | "name": "joint2maxtorque", 54 | "default": 18, 55 | "multiplier_min": 1.0, 56 | "multiplier_max": 1.0 57 | }, 58 | { 59 | "name": "joint3maxtorque", 60 | "default": 18, 61 | "multiplier_min": 1.0, 62 | "multiplier_max": 1.0 63 | }, 64 | { 65 | "name": "joint4maxtorque", 66 | "default": 18, 67 | "multiplier_min": 1.0, 68 | "multiplier_max": 1.0 69 | }, 70 | { 71 | "name": "joint5maxtorque", 72 | "default": 18, 73 | "multiplier_min": 1.0, 74 | "multiplier_max": 1.0 75 | } 76 | ] 77 | } 78 | -------------------------------------------------------------------------------- /common/envs/config/ErgoReacherRandomized/easy-4dof.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "ErgoReacherRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "joint0gain", 6 | "default": 1.0, 7 | "multiplier_min": 0.3, 8 | "multiplier_max": 2.0 9 | }, 10 | { 11 | "name": "joint1gain", 12 | "default": 1.0, 13 | "multiplier_min": 0.3, 14 | "multiplier_max": 2.0 15 | }, 16 | { 17 | "name": "joint2gain", 18 | "default": 1.0, 19 | "multiplier_min": 0.3, 20 | "multiplier_max": 2.0 21 | }, 22 | { 23 | "name": "joint3gain", 24 | "default": 1.0, 25 | "multiplier_min": 0.3, 26 | "multiplier_max": 2.0 27 | }, 28 | { 29 | "name": "joint0maxtorque", 30 | "default": 5.0, 31 | "multiplier_min": 1.0, 32 | "multiplier_max": 4.0 33 | }, 34 | { 35 | "name": "joint1maxtorque", 36 | "default": 5.0, 37 | "multiplier_min": 1.0, 38 | "multiplier_max": 4.0 39 | }, 40 | { 41 | "name": "joint2maxtorque", 42 | "default": 5.0, 43 | "multiplier_min": 1.0, 44 | "multiplier_max": 4.0 45 | }, 46 | { 47 | "name": "joint3maxtorque", 48 | "default": 5.0, 49 | "multiplier_min": 1.0, 50 | "multiplier_max": 4.0 51 | } 52 | ] 53 | } 54 | -------------------------------------------------------------------------------- /common/envs/config/ErgoReacherRandomized/fulldr-4dof.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "ErgoReacherRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "joint0gain", 6 | "default": 1, 7 | "multiplier_min": 0.1, 8 | "multiplier_max": 100 9 | }, 10 | { 11 | "name": "joint1gain", 12 | "default": 1, 13 | "multiplier_min": 0.1, 14 | "multiplier_max": 100 15 | }, 16 | { 17 | "name": "joint2gain", 18 | "default": 1, 19 | "multiplier_min": 0.1, 20 | "multiplier_max": 100 21 | }, 22 | { 23 | "name": "joint3gain", 24 | "default": 1, 25 | "multiplier_min": 0.1, 26 | "multiplier_max": 100 27 | }, 28 | { 29 | "name": "joint0maxtorque", 30 | "default": 18, 31 | "multiplier_min": 0.2222, 32 | "multiplier_max": 2.7778 33 | }, 34 | { 35 | "name": "joint1maxtorque", 36 | "default": 18, 37 | "multiplier_min": 0.2222, 38 | "multiplier_max": 2.7778 39 | }, 40 | { 41 | "name": "joint2maxtorque", 42 | "default": 18, 43 | "multiplier_min": 0.2222, 44 | "multiplier_max": 2.7778 45 | }, 46 | { 47 | "name": "joint3maxtorque", 48 | "default": 18, 49 | "multiplier_min": 0.2222, 50 | "multiplier_max": 2.7778 51 | } 52 | ] 53 | } 54 | -------------------------------------------------------------------------------- /common/envs/config/ErgoReacherRandomized/fulldr-6dof.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "ErgoReacherRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "joint0gain", 6 | "default": 1, 7 | "multiplier_min": 0.1, 8 | "multiplier_max": 100 9 | }, 10 | { 11 | "name": "joint1gain", 12 | "default": 1, 13 | "multiplier_min": 0.1, 14 | "multiplier_max": 100 15 | }, 16 | { 17 | "name": "joint2gain", 18 | "default": 1, 19 | "multiplier_min": 0.1, 20 | "multiplier_max": 100 21 | }, 22 | { 23 | "name": "joint3gain", 24 | "default": 1, 25 | "multiplier_min": 0.1, 26 | "multiplier_max": 100 27 | }, 28 | { 29 | "name": "joint4gain", 30 | "default": 1, 31 | "multiplier_min": 0.1, 32 | "multiplier_max": 100 33 | }, 34 | { 35 | "name": "joint5gain", 36 | "default": 1, 37 | "multiplier_min": 0.1, 38 | "multiplier_max": 100 39 | }, 40 | { 41 | "name": "joint0maxtorque", 42 | "default": 18, 43 | "multiplier_min": 0.2222, 44 | "multiplier_max": 2.7778 45 | }, 46 | { 47 | "name": "joint1maxtorque", 48 | "default": 18, 49 | "multiplier_min": 0.2222, 50 | "multiplier_max": 2.7778 51 | }, 52 | { 53 | "name": "joint2maxtorque", 54 | "default": 18, 55 | "multiplier_min": 0.2222, 56 | "multiplier_max": 2.7778 57 | }, 58 | { 59 | "name": "joint3maxtorque", 60 | "default": 18, 61 | "multiplier_min": 0.2222, 62 | "multiplier_max": 2.7778 63 | }, 64 | { 65 | "name": "joint4maxtorque", 66 | "default": 18, 67 | "multiplier_min": 0.2222, 68 | "multiplier_max": 2.7778 69 | }, 70 | { 71 | "name": "joint5maxtorque", 72 | "default": 18, 73 | "multiplier_min": 0.2222, 74 | "multiplier_max": 2.7778 75 | } 76 | ] 77 | } 78 | -------------------------------------------------------------------------------- /common/envs/config/ErgoReacherRandomized/hard-4dof.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "ErgoReacherRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "joint0gain", 6 | "default": 0.2, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.0 9 | }, 10 | { 11 | "name": "joint1gain", 12 | "default": 0.2, 13 | "multiplier_min": 1.0, 14 | "multiplier_max": 1.0 15 | }, 16 | { 17 | "name": "joint2gain", 18 | "default": 0.2, 19 | "multiplier_min": 1.0, 20 | "multiplier_max": 1.0 21 | }, 22 | { 23 | "name": "joint3gain", 24 | "default": 0.2, 25 | "multiplier_min": 1.0, 26 | "multiplier_max": 1.0 27 | }, 28 | { 29 | "name": "joint0maxtorque", 30 | "default": 5.0, 31 | "multiplier_min": 1.0, 32 | "multiplier_max": 1.0 33 | }, 34 | { 35 | "name": "joint1maxtorque", 36 | "default": 5.0, 37 | "multiplier_min": 1.0, 38 | "multiplier_max": 1.0 39 | }, 40 | { 41 | "name": "joint2maxtorque", 42 | "default": 5.0, 43 | "multiplier_min": 1.0, 44 | "multiplier_max": 1.0 45 | }, 46 | { 47 | "name": "joint3maxtorque", 48 | "default": 5.0, 49 | "multiplier_min": 1.0, 50 | "multiplier_max": 1.0 51 | } 52 | ] 53 | } 54 | -------------------------------------------------------------------------------- /common/envs/config/ErgoReacherRandomizedBacklash/default-4dof.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "ErgoReacherRandomizedBacklash-v0", 3 | "dimensions": [ 4 | { 5 | "name": "joint0gain", 6 | "default": 1000, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.0 9 | }, 10 | { 11 | "name": "joint1gain", 12 | "default": 1000, 13 | "multiplier_min": 1.0, 14 | "multiplier_max": 1.0 15 | }, 16 | { 17 | "name": "joint2gain", 18 | "default": 1000, 19 | "multiplier_min": 1.0, 20 | "multiplier_max": 1.0 21 | }, 22 | { 23 | "name": "joint3gain", 24 | "default": 1000, 25 | "multiplier_min": 1.0, 26 | "multiplier_max": 1.0 27 | }, 28 | { 29 | "name": "joint0backlash", 30 | "default": -2.30258509299, 31 | "multiplier_min": 1.0, 32 | "multiplier_max": 1.0 33 | }, 34 | { 35 | "name": "joint1backlash", 36 | "default": -2.30258509299, 37 | "multiplier_min": 1.0, 38 | "multiplier_max": 1.0 39 | }, 40 | { 41 | "name": "joint2backlash", 42 | "default": -2.30258509299, 43 | "multiplier_min": 1.0, 44 | "multiplier_max": 1.0 45 | }, 46 | { 47 | "name": "joint3backlash", 48 | "default": -2.30258509299, 49 | "multiplier_min": 1.0, 50 | "multiplier_max": 1.0 51 | } 52 | ] 53 | } 54 | -------------------------------------------------------------------------------- /common/envs/config/ErgoReacherRandomizedBacklash/fulldr-4dof.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "ErgoReacherRandomizedBacklash-v0", 3 | "dimensions": [ 4 | { 5 | "name": "joint0gain", 6 | "default": 1000, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 7.0 9 | }, 10 | { 11 | "name": "joint1gain", 12 | "default": 1000, 13 | "multiplier_min": 1.0, 14 | "multiplier_max": 7.0 15 | }, 16 | { 17 | "name": "joint2gain", 18 | "default": 1000, 19 | "multiplier_min": 1.0, 20 | "multiplier_max": 7.0 21 | }, 22 | { 23 | "name": "joint3gain", 24 | "default": 1000, 25 | "multiplier_min": 1.0, 26 | "multiplier_max": 7.0 27 | }, 28 | { 29 | "name": "joint0backlash", 30 | "default": -2.302585, 31 | "multiplier_min": 4, 32 | "multiplier_max": 0 33 | }, 34 | { 35 | "name": "joint1backlash", 36 | "default": -2.302585, 37 | "multiplier_min": 4, 38 | "multiplier_max": 0 39 | }, 40 | { 41 | "name": "joint2backlash", 42 | "default": -2.302585, 43 | "multiplier_min": 4, 44 | "multiplier_max": 0 45 | }, 46 | { 47 | "name": "joint3backlash", 48 | "default": -2.302585, 49 | "multiplier_min": 4, 50 | "multiplier_max": 0 51 | } 52 | ] 53 | } 54 | -------------------------------------------------------------------------------- /common/envs/config/ErgoReacherRandomizedBacklash/fulldr-easy.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "ErgoReacherRandomizedBacklashEasy-v0", 3 | "dimensions": [ 4 | { 5 | "name": "joint0gain", 6 | "default": 1000, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 5.0 9 | }, 10 | { 11 | "name": "joint1gain", 12 | "default": 1000, 13 | "multiplier_min": 1.0, 14 | "multiplier_max": 5.0 15 | }, 16 | { 17 | "name": "joint2gain", 18 | "default": 1000, 19 | "multiplier_min": 1.0, 20 | "multiplier_max": 5.0 21 | }, 22 | { 23 | "name": "joint3gain", 24 | "default": 1000, 25 | "multiplier_min": 1.0, 26 | "multiplier_max": 5.0 27 | }, 28 | { 29 | "name": "joint0backlash", 30 | "default": -0.91629, 31 | "multiplier_min": 7.5, 32 | "multiplier_max": 1.5 33 | }, 34 | { 35 | "name": "joint1backlash", 36 | "default": -0.91629, 37 | "multiplier_min": 7.5, 38 | "multiplier_max": 1.5 39 | }, 40 | { 41 | "name": "joint2backlash", 42 | "default": -0.91629, 43 | "multiplier_min": 7.5, 44 | "multiplier_max": 1.5 45 | }, 46 | { 47 | "name": "joint3backlash", 48 | "default": -0.91629, 49 | "multiplier_min": 7.5, 50 | "multiplier_max": 1.5 51 | } 52 | ] 53 | } 54 | -------------------------------------------------------------------------------- /common/envs/config/ErgoReacherRandomizedBacklash/fulldr-hard.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "ErgoReacherRandomizedBacklashHard-v0", 3 | "dimensions": [ 4 | { 5 | "name": "joint0gain", 6 | "default": 1000, 7 | "multiplier_min": 5.0, 8 | "multiplier_max": 7.0 9 | }, 10 | { 11 | "name": "joint1gain", 12 | "default": 1000, 13 | "multiplier_min": 5.0, 14 | "multiplier_max": 7.0 15 | }, 16 | { 17 | "name": "joint2gain", 18 | "default": 1000, 19 | "multiplier_min": 5.0, 20 | "multiplier_max": 7.0 21 | }, 22 | { 23 | "name": "joint3gain", 24 | "default": 1000, 25 | "multiplier_min": 5.0, 26 | "multiplier_max": 7.0 27 | }, 28 | { 29 | "name": "joint0backlash", 30 | "default": -0.63147, 31 | "multiplier_min": 2.0, 32 | "multiplier_max": 1.5 33 | }, 34 | { 35 | "name": "joint1backlash", 36 | "default": -0.63147, 37 | "multiplier_min": 2.0, 38 | "multiplier_max": 1.5 39 | }, 40 | { 41 | "name": "joint2backlash", 42 | "default": -0.63147, 43 | "multiplier_min": 2.0, 44 | "multiplier_max": 1.5 45 | }, 46 | { 47 | "name": "joint3backlash", 48 | "default": -0.63147, 49 | "multiplier_min": 2.0, 50 | "multiplier_max": 1.5 51 | } 52 | ] 53 | } 54 | -------------------------------------------------------------------------------- /common/envs/config/HalfCheetahRandomized/default.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "HalfCheetahRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "foot", 6 | "default": 0.046, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.5 9 | }, 10 | { 11 | "name": "shin", 12 | "default": 0.046, 13 | "multiplier_min": 1.0, 14 | "multiplier_max": 1.5 15 | }, 16 | { 17 | "name": "thigh", 18 | "default": 0.046, 19 | "multiplier_min": 1.0, 20 | "multiplier_max": 1.5 21 | }, 22 | { 23 | "name": "torso", 24 | "default": 0.046, 25 | "multiplier_min": 1.0, 26 | "multiplier_max": 1.5 27 | }, 28 | { 29 | "name": "head", 30 | "default": 0.046, 31 | "multiplier_min": 1.0, 32 | "multiplier_max": 1.5 33 | } 34 | ], 35 | "geom_map" : { 36 | "foot": ["bfoot", "ffoot"], 37 | "shin": ["bshin", "fshin"], 38 | "thigh": ["bthigh", "fthigh"], 39 | "torso": ["torso"], 40 | "head": ["head"] 41 | }, 42 | "suffixes": { 43 | "foot": "0.046", 44 | "shin": "0.046", 45 | "thigh": "0.046", 46 | "torso": "0.046", 47 | "head": "0.046" 48 | } 49 | } -------------------------------------------------------------------------------- /common/envs/config/HumanoidRandomized/default.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "HumanoidRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "foot", 6 | "default": 0.075, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.5 9 | }, 10 | { 11 | "name": "thigh", 12 | "default": 0.06, 13 | "multiplier_min": 1.0, 14 | "multiplier_max": 1.5 15 | }, 16 | { 17 | "name": "shin", 18 | "default": 0.06, 19 | "multiplier_min": 1.0, 20 | "multiplier_max": 1.5 21 | }, 22 | { 23 | "name": "torso", 24 | "default": 0.07, 25 | "multiplier_min": 1.0, 26 | "multiplier_max": 1.5 27 | }, 28 | { 29 | "name": "head", 30 | "default": 0.09, 31 | "multiplier_min": 1.0, 32 | "multiplier_max": 1.5 33 | }, 34 | { 35 | "name": "hand", 36 | "default": 0.04, 37 | "multiplier_min": 1.0, 38 | "multiplier_max": 1.5 39 | } 40 | ], 41 | "geom_map" : { 42 | "head": ["head"], 43 | "thigh": ["right_thigh1", "left_thigh1"], 44 | "shin": ["right_shin1", "left_shin1"], 45 | "foot": ["left_foot", "right_foot"], 46 | "hand": ["left_hand", "right_hand"], 47 | "torso": ["torso1"] 48 | } 49 | } -------------------------------------------------------------------------------- /common/envs/config/LunarLanderRandomized/10.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "LunarLanderRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "main_engine", 6 | "default": 10.0, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.0 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /common/envs/config/LunarLanderRandomized/16.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "LunarLanderRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "main_engine", 6 | "default": 16.0, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.0 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /common/envs/config/LunarLanderRandomized/debug.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "LunarLanderRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "main_engine", 6 | "default": 11.0, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.0 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /common/envs/config/LunarLanderRandomized/default.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "LunarLanderRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "main_engine", 6 | "default": 13.0, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.0 9 | } 10 | ] 11 | } 12 | -------------------------------------------------------------------------------- /common/envs/config/LunarLanderRandomized/random2D_820.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "LunarLanderRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "main_engine", 6 | "default": 13.0, 7 | "multiplier_min": 0.615, 8 | "multiplier_max": 1.538 9 | }, 10 | { 11 | "name": "side_engine", 12 | "default": 0.6, 13 | "multiplier_min": 0.167, 14 | "multiplier_max": 3.333 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /common/envs/config/LunarLanderRandomized/random_1720.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "LunarLanderRandomized-Expert_0-v0", 3 | "dimensions": [ 4 | { 5 | "name": "main_engine", 6 | "default": 13, 7 | "multiplier_min": 1.308, 8 | "multiplier_max": 1.538 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /common/envs/config/LunarLanderRandomized/random_620.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "LunarLanderRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "main_engine", 6 | "default": 13.0, 7 | "multiplier_min": 0.462, 8 | "multiplier_max": 1.538 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /common/envs/config/LunarLanderRandomized/random_811.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "LunarLanderRandomized-Expert_0-v0", 3 | "dimensions": [ 4 | { 5 | "name": "main_engine", 6 | "default": 8, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.375 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /common/envs/config/LunarLanderRandomized/random_812.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "LunarLanderRandomized-Expert_0-v0", 3 | "dimensions": [ 4 | { 5 | "name": "main_engine", 6 | "default": 8, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.5 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /common/envs/config/LunarLanderRandomized/random_813.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "LunarLanderRandomized-Expert_0-v0", 3 | "dimensions": [ 4 | { 5 | "name": "main_engine", 6 | "default": 8, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.625 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /common/envs/config/LunarLanderRandomized/random_820.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "LunarLanderRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "main_engine", 6 | "default": 13.0, 7 | "multiplier_min": 0.615, 8 | "multiplier_max": 1.538 9 | } 10 | ] 11 | } -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFGeneralization/00.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFDefault-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 0.5, 8 | "multiplier_max": 0.666 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 0.833, 14 | "multiplier_max": 1.0 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFGeneralization/01.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFDefault-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 0.666, 8 | "multiplier_max": 0.833 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 0.833, 14 | "multiplier_max": 1.0 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFGeneralization/02.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFDefault-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 0.833, 8 | "multiplier_max": 1.0 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 0.833, 14 | "multiplier_max": 1.0 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFGeneralization/10.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFDefault-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 0.5, 8 | "multiplier_max": 0.666 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 0.666, 14 | "multiplier_max": 0.833 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFGeneralization/11.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFDefault-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 0.666, 8 | "multiplier_max": 0.833 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 0.666, 14 | "multiplier_max": 0.833 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFGeneralization/12.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFDefault-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 0.833, 8 | "multiplier_max": 1.0 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 0.666, 14 | "multiplier_max": 0.833 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFGeneralization/20.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFDefault-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 0.5, 8 | "multiplier_max": 0.666 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 0.5, 14 | "multiplier_max": 0.666 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFGeneralization/21.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFDefault-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 0.666, 8 | "multiplier_max": 0.833 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 0.5, 14 | "multiplier_max": 0.666 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFGeneralization/22.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFDefault-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 0.833, 8 | "multiplier_max": 1.0 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 0.5, 14 | "multiplier_max": 0.666 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFRandomized/default.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFDefault-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 1.0, 8 | "multiplier_max": 1.0 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 1.0, 14 | "multiplier_max": 1.0 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFRandomized/fulldr-easy.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFRandomizedEasy-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 0.666, 8 | "multiplier_max": 1.0 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 0.666, 14 | "multiplier_max": 1.0 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFRandomized/fulldr-toohard.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFDefault-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 0.05, 8 | "multiplier_max": 0.05 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 0.05, 14 | "multiplier_max": 0.05 15 | } 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFRandomized/fulldr.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFRandomized-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 0.4, 8 | "multiplier_max": 1.0 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 0.4, 14 | "multiplier_max": 1.0 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /common/envs/config/Pusher3DOFRandomized/hard.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "Pusher3DOFHard-v0", 3 | "dimensions": [ 4 | { 5 | "name": "frictionloss", 6 | "default": 0.01, 7 | "multiplier_min": 0.4, 8 | "multiplier_max": 0.5 9 | }, 10 | { 11 | "name": "damping", 12 | "default": 0.025, 13 | "multiplier_min": 0.4, 14 | "multiplier_max": 0.5 15 | } 16 | ] 17 | } -------------------------------------------------------------------------------- /common/envs/config/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | CONFIG_PATH = os.path.abspath(os.path.dirname(__file__)) 3 | -------------------------------------------------------------------------------- /common/envs/dimension.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class Dimension(object): 5 | """Class which handles the machinery for doing BO over a particular dimensions 6 | """ 7 | def __init__(self, default_value, seed, multiplier_min=0.0, multiplier_max=1.0, name=None): 8 | """Generates datapoints at specified discretization, and initializes BO 9 | """ 10 | self.default_value = default_value 11 | self.current_value = default_value 12 | self.multiplier_min = multiplier_min 13 | self.multiplier_max = multiplier_max 14 | self.range_min = self.default_value * self.multiplier_min 15 | self.range_max = self.default_value * self.multiplier_max 16 | self.name = name 17 | 18 | # TODO: doesn't this change the random seed for all numpy uses? 19 | np.random.seed(seed) 20 | 21 | def _rescale(self, value): 22 | """Rescales normalized value to be within range of env. dimension 23 | """ 24 | return self.range_min + (self.range_max - self.range_min) * value 25 | 26 | def randomize(self): 27 | self.current_value = np.random.uniform(low=self.range_min, high=self.range_max) 28 | 29 | def reset(self): 30 | self.current_value = self.default_value 31 | 32 | def set(self, value): 33 | self.current_value = value 34 | 35 | -------------------------------------------------------------------------------- /common/envs/ergoreacher.py: -------------------------------------------------------------------------------- 1 | from gym_ergojr.envs import ErgoReacherEnv 2 | import numpy as np 3 | 4 | 5 | class ErgoReacherRandomizedEnv(ErgoReacherEnv): 6 | def __init__(self, **kwargs): 7 | self.dimensions = [] # this will be 8 elements long after wrapper init 8 | self.config_file = kwargs.get('config') 9 | 10 | del kwargs['config'] 11 | 12 | super().__init__(**kwargs) 13 | 14 | # # these two are affected by the DR 15 | # self.max_force 16 | # self.max_vel 17 | 18 | def step(self, action): 19 | observation, reward, done, info = super().step(action) 20 | info = {'goal_dist': self.dist.query()} 21 | return observation, reward, False, info # I'm not crazy excited about the lack of early stopping, but alright 22 | 23 | def _update_randomized_params(self): 24 | # these are used automatically in the `step` function 25 | self.max_force = np.zeros(6, np.float32) 26 | self.max_vel = np.zeros(6, np.float32) 27 | 28 | if self.simple: 29 | self.max_force[[1, 2, 4, 5]] = [x.current_value for x in self.dimensions[:4]] 30 | self.max_vel[[1, 2, 4, 5]] = [x.current_value for x in self.dimensions[4:]] 31 | else: 32 | self.max_force[:] = [x.current_value for x in self.dimensions[:6]] 33 | self.max_vel[:] = [x.current_value for x in self.dimensions[6:]] 34 | -------------------------------------------------------------------------------- /common/envs/ergoreacherbacklash.py: -------------------------------------------------------------------------------- 1 | from gym_ergojr.envs import ErgoReacherHeavyEnv 2 | import numpy as np 3 | 4 | 5 | class ErgoReacherRandomizedBacklashEnv(ErgoReacherHeavyEnv): 6 | def __init__(self, **kwargs): 7 | self.dimensions = [] # this will be 8 elements long after wrapper init 8 | self.config_file = kwargs.get('config') 9 | 10 | del kwargs['config'] 11 | 12 | super().__init__(**kwargs) 13 | 14 | # # these three are affected by the DR 15 | # self.max_force 16 | # backlash + self.force_urdf_reload 17 | 18 | def step(self, action): 19 | observation, reward, done, info = super().step(action) 20 | info = {'goal_dist': self.dist.query()} 21 | return observation, reward, False, info 22 | 23 | def _update_randomized_params(self): 24 | # the self.max_force is used automatically in the step function, 25 | # but for the backlash to take effect, self.reset() has to be called 26 | self.max_force = np.zeros(6, np.float32) 27 | backlash = np.zeros(6, np.float32) 28 | 29 | if self.simple: 30 | self.max_force[[0, 3]] = [1000, 1000] # setting these to default 31 | 32 | self.max_force[[1, 2, 4, 5]] = [x.current_value for x in self.dimensions[:4]] 33 | 34 | # The values coming into the backlash from the JSON are from -2.302585*4 = -9.2103 to 0 35 | # ...so that when we do e^[-9.2103,0] we get [0.0001,1] 36 | backlash[[1, 2, 4, 5]] = [np.power(np.e, x.current_value) for x in self.dimensions[4:]] 37 | self.update_backlash(backlash) 38 | else: 39 | raise NotImplementedError("just ping me and I'll write this if need be") 40 | # reason I haven't written this yet is because 41 | # the 6dof+backlash task is wayyy too hard 42 | 43 | self.reset() 44 | -------------------------------------------------------------------------------- /common/envs/fetch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os.path as osp 3 | from gym.envs.robotics import rotations, robot_env, utils 4 | from gym.utils import EzPickle 5 | from common.envs.assets import MODEL_PATH 6 | 7 | 8 | def goal_distance(goal_a, goal_b): 9 | assert goal_a.shape == goal_b.shape 10 | return np.linalg.norm(goal_a - goal_b, axis=-1) 11 | 12 | 13 | class FetchEnv(robot_env.RobotEnv): 14 | """Superclass for all Fetch environments. 15 | """ 16 | 17 | def __init__( 18 | self, model_path, n_substeps, gripper_extra_height, block_gripper, 19 | has_object, target_in_the_air, target_offset, obj_range, target_range, 20 | distance_threshold, initial_qpos, reward_type, 21 | ): 22 | """Initializes a new Fetch environment. 23 | Args: 24 | model_path (string): path to the environments XML file 25 | n_substeps (int): number of substeps the simulation runs on every call to step 26 | gripper_extra_height (float): additional height above the table when positioning the gripper 27 | block_gripper (boolean): whether or not the gripper is blocked (i.e. not movable) or not 28 | has_object (boolean): whether or not the environment has an object 29 | target_in_the_air (boolean): whether or not the target should be in the air above the table or on the table surface 30 | target_offset (float or array with 3 elements): offset of the target 31 | obj_range (float): range of a uniform distribution for sampling initial object positions 32 | target_range (float): range of a uniform distribution for sampling a target 33 | distance_threshold (float): the threshold after which a goal is considered achieved 34 | initial_qpos (dict): a dictionary of joint names and values that define the initial configuration 35 | reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense 36 | """ 37 | self.gripper_extra_height = gripper_extra_height 38 | self.block_gripper = block_gripper 39 | self.has_object = has_object 40 | self.target_in_the_air = target_in_the_air 41 | self.target_offset = target_offset 42 | self.obj_range = obj_range 43 | self.target_range = target_range 44 | self.distance_threshold = distance_threshold 45 | self.reward_type = reward_type 46 | 47 | super(FetchEnv, self).__init__( 48 | model_path=model_path, n_substeps=n_substeps, n_actions=4, 49 | initial_qpos=initial_qpos) 50 | 51 | # GoalEnv methods 52 | # ---------------------------- 53 | 54 | def compute_reward(self, achieved_goal, goal, info): 55 | # Compute distance between goal and the achieved goal. 56 | d = goal_distance(achieved_goal, goal) 57 | if self.reward_type == 'sparse': 58 | return -(d > self.distance_threshold).astype(np.float32) 59 | else: 60 | return -d 61 | 62 | # RobotEnv methods 63 | # ---------------------------- 64 | 65 | def _step_callback(self): 66 | if self.block_gripper: 67 | self.sim.data.set_joint_qpos('robot0:l_gripper_finger_joint', 0.) 68 | self.sim.data.set_joint_qpos('robot0:r_gripper_finger_joint', 0.) 69 | self.sim.forward() 70 | 71 | def _set_action(self, action): 72 | assert action.shape == (4,) 73 | action = action.copy() # ensure that we don't change the action outside of this scope 74 | pos_ctrl, gripper_ctrl = action[:3], action[3] 75 | 76 | pos_ctrl *= 0.05 # limit maximum change in position 77 | rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion 78 | gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl]) 79 | assert gripper_ctrl.shape == (2,) 80 | if self.block_gripper: 81 | gripper_ctrl = np.zeros_like(gripper_ctrl) 82 | action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl]) 83 | 84 | # Apply action to simulation. 85 | utils.ctrl_set_action(self.sim, action) 86 | utils.mocap_set_action(self.sim, action) 87 | 88 | def _get_obs(self): 89 | # positions 90 | grip_pos = self.sim.data.get_site_xpos('robot0:grip') 91 | dt = self.sim.nsubsteps * self.sim.model.opt.timestep 92 | grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt 93 | robot_qpos, robot_qvel = utils.robot_get_obs(self.sim) 94 | if self.has_object: 95 | object_pos = self.sim.data.get_site_xpos('object0') 96 | # rotations 97 | object_rot = rotations.mat2euler(self.sim.data.get_site_xmat('object0')) 98 | # velocities 99 | object_velp = self.sim.data.get_site_xvelp('object0') * dt 100 | object_velr = self.sim.data.get_site_xvelr('object0') * dt 101 | # gripper state 102 | object_rel_pos = object_pos - grip_pos 103 | object_velp -= grip_velp 104 | else: 105 | object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.zeros(0) 106 | gripper_state = robot_qpos[-2:] 107 | gripper_vel = robot_qvel[-2:] * dt # change to a scalar if the gripper is made symmetric 108 | 109 | if not self.has_object: 110 | achieved_goal = grip_pos.copy() 111 | else: 112 | achieved_goal = np.squeeze(object_pos.copy()) 113 | obs = np.concatenate([ 114 | grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(), 115 | object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel, 116 | ]) 117 | 118 | return { 119 | 'observation': obs.copy(), 120 | 'achieved_goal': achieved_goal.copy(), 121 | 'desired_goal': self.goal.copy(), 122 | } 123 | 124 | def _viewer_setup(self): 125 | body_id = self.sim.model.body_name2id('robot0:gripper_link') 126 | lookat = self.sim.data.body_xpos[body_id] 127 | for idx, value in enumerate(lookat): 128 | self.viewer.cam.lookat[idx] = value 129 | self.viewer.cam.distance = 2.5 130 | self.viewer.cam.azimuth = 132. 131 | self.viewer.cam.elevation = -14. 132 | 133 | def _render_callback(self): 134 | # Visualize target. 135 | sites_offset = (self.sim.data.site_xpos - self.sim.model.site_pos).copy() 136 | site_id = self.sim.model.site_name2id('target0') 137 | self.sim.model.site_pos[site_id] = self.goal - sites_offset[0] 138 | self.sim.forward() 139 | 140 | def _reset_sim(self): 141 | self.sim.set_state(self.initial_state) 142 | 143 | # Randomize start position of object. 144 | if self.has_object: 145 | object_xpos = self.initial_gripper_xpos[:2] 146 | while np.linalg.norm(object_xpos - self.initial_gripper_xpos[:2]) < 0.1: 147 | object_xpos = self.initial_gripper_xpos[:2] + self.np_random.uniform(-self.obj_range, self.obj_range, size=2) 148 | object_qpos = self.sim.data.get_joint_qpos('object0:joint') 149 | assert object_qpos.shape == (7,) 150 | object_qpos[:2] = object_xpos 151 | self.sim.data.set_joint_qpos('object0:joint', object_qpos) 152 | 153 | self.sim.forward() 154 | return True 155 | 156 | def _sample_goal(self): 157 | if self.has_object: 158 | goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-self.target_range, self.target_range, size=3) 159 | goal += self.target_offset 160 | goal[2] = self.height_offset 161 | if self.target_in_the_air and self.np_random.uniform() < 0.5: 162 | goal[2] += self.np_random.uniform(0, 0.45) 163 | else: 164 | goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-0.15, 0.15, size=3) 165 | return goal.copy() 166 | 167 | def _is_success(self, achieved_goal, desired_goal): 168 | d = goal_distance(achieved_goal, desired_goal) 169 | return (d < self.distance_threshold).astype(np.float32) 170 | 171 | def _env_setup(self, initial_qpos): 172 | for name, value in initial_qpos.items(): 173 | self.sim.data.set_joint_qpos(name, value) 174 | utils.reset_mocap_welds(self.sim) 175 | self.sim.forward() 176 | 177 | # Move end effector into position. 178 | gripper_target = np.array([-0.498, 0.005, -0.431 + self.gripper_extra_height]) + self.sim.data.get_site_xpos('robot0:grip') 179 | gripper_rotation = np.array([1., 0., 1., 0.]) 180 | self.sim.data.set_mocap_pos('robot0:mocap', gripper_target) 181 | self.sim.data.set_mocap_quat('robot0:mocap', gripper_rotation) 182 | for _ in range(10): 183 | self.sim.step() 184 | 185 | # Extract information for sampling goals. 186 | self.initial_gripper_xpos = self.sim.data.get_site_xpos('robot0:grip').copy() 187 | if self.has_object: 188 | self.height_offset = self.sim.data.get_site_xpos('object0')[2] 189 | 190 | 191 | # Ensure we get the path separator correct on windows 192 | MODEL_XML_PATH = osp.join(MODEL_PATH, 'fetch', 'reach.xml') 193 | 194 | 195 | class FetchReachEnv(FetchEnv, EzPickle): 196 | def __init__(self, reward_type='sparse'): 197 | initial_qpos = { 198 | 'robot0:slide0': 0.4049, 199 | 'robot0:slide1': 0.48, 200 | 'robot0:slide2': 0.0, 201 | } 202 | FetchEnv.__init__( 203 | self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20, 204 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0, 205 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 206 | initial_qpos=initial_qpos, reward_type=reward_type) 207 | EzPickle.__init__(self) -------------------------------------------------------------------------------- /common/envs/half_cheetah.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | from common.envs.randomized_locomotion import RandomizedLocomotionEnv 6 | 7 | class HalfCheetahRandomizedEnv(RandomizedLocomotionEnv): 8 | def __init__(self, **kwargs): 9 | RandomizedLocomotionEnv.__init__(self, **kwargs) 10 | 11 | def step(self, action): 12 | xposbefore = self.sim.data.qpos[0] 13 | self.do_simulation(action, self.frame_skip) 14 | xposafter = self.sim.data.qpos[0] 15 | ob = self._get_obs() 16 | reward_ctrl = - 0.1 * np.square(action).sum() 17 | reward_run = (xposafter - xposbefore)/self.dt 18 | reward = reward_ctrl + reward_run 19 | done = False 20 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl) 21 | 22 | def _get_obs(self): 23 | return np.concatenate([ 24 | self.sim.data.qpos.flat[1:], 25 | self.sim.data.qvel.flat, 26 | ]) 27 | 28 | def reset_model(self): 29 | qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq) 30 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1 31 | self.set_state(qpos, qvel) 32 | return self._get_obs() 33 | 34 | def viewer_setup(self): 35 | self.viewer.cam.distance = self.model.stat.extent * 0.5 36 | -------------------------------------------------------------------------------- /common/envs/humanoid.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | from gym import utils 5 | from gym.envs.mujoco import mujoco_env 6 | import xml.etree.ElementTree as et 7 | 8 | import mujoco_py 9 | 10 | from common.envs.randomized_locomotion import RandomizedLocomotionEnv 11 | 12 | 13 | def mass_center(model, sim): 14 | mass = np.expand_dims(model.body_mass, 1) 15 | xpos = sim.data.xipos 16 | 17 | return (np.sum(mass * xpos, 0) / np.sum(mass))[0] 18 | 19 | 20 | # TODO: this class is not Thread-Safe 21 | class HumanoidRandomizedEnv(RandomizedLocomotionEnv): 22 | def __init__(self, **kwargs): 23 | RandomizedLocomotionEnv.__init__(self, **kwargs) 24 | 25 | def _get_obs(self): 26 | data = self.sim.data 27 | return np.concatenate([data.qpos.flat[2:], 28 | data.qvel.flat, 29 | data.cinert.flat, 30 | data.cvel.flat, 31 | data.qfrc_actuator.flat, 32 | data.cfrc_ext.flat]) 33 | 34 | def step(self, a): 35 | pos_before = mass_center(self.model, self.sim) 36 | self.do_simulation(a, self.frame_skip) 37 | pos_after = mass_center(self.model, self.sim) 38 | alive_bonus = 5.0 39 | data = self.sim.data 40 | lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep 41 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() 42 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum() 43 | quad_impact_cost = min(quad_impact_cost, 10) 44 | reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus 45 | qpos = self.sim.data.qpos 46 | done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0)) 47 | return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost, 48 | reward_alive=alive_bonus, reward_impact=-quad_impact_cost) 49 | 50 | def reset_model(self): 51 | c = 0.01 52 | self.set_state( 53 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), 54 | self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv, ) 55 | ) 56 | return self._get_obs() 57 | 58 | def viewer_setup(self): 59 | self.viewer.cam.trackbodyid = 1 60 | self.viewer.cam.distance = self.model.stat.extent * 1.0 61 | self.viewer.cam.lookat[2] = 2.0 62 | 63 | self.viewer.cam.elevation = -20 64 | -------------------------------------------------------------------------------- /common/envs/pusher.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | from gym import utils 5 | from gym.envs.mujoco import mujoco_env 6 | import xml.etree.ElementTree as et 7 | 8 | import mujoco_py 9 | 10 | PLANE_LOCATION_Z = -0.325 11 | 12 | 13 | # TODO: this class is not Thread-Safe 14 | class PusherRandomizedEnv(mujoco_env.MujocoEnv, utils.EzPickle): 15 | def __init__(self, **kwargs): 16 | utils.EzPickle.__init__(self) 17 | mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', frame_skip=5) 18 | 19 | # randomization 20 | self.reference_path = os.path.join(os.path.dirname(mujoco_env.__file__), "assets", 'pusher.xml') 21 | self.reference_xml = et.parse(self.reference_path) 22 | self.config_file = kwargs.get('config') 23 | self.dimensions = [] 24 | self._locate_randomize_parameters() 25 | 26 | def _locate_randomize_parameters(self): 27 | self.root = self.reference_xml.getroot() 28 | self.geom = self.root.find("./default/geom[@friction]") 29 | roll_link = self.root.find(".//body[@name='r_wrist_roll_link']") 30 | self.wrist = roll_link.findall("./geom[@type='capsule']") 31 | self.tips = roll_link.findall("./body[@name='tips_arm']/geom") 32 | self.object_body = self.root.find(".//body[@name='object']") 33 | self.object_body_geom = self.object_body.findall('./geom') 34 | self.goal_body = self.root.find(".//body[@name='goal']/geom") 35 | 36 | def _update_randomized_params(self): 37 | xml = self._create_xml() 38 | self._re_init(xml) 39 | 40 | def _re_init(self, xml): 41 | self.model = mujoco_py.load_model_from_xml(xml) 42 | self.sim = mujoco_py.MjSim(self.model) 43 | self.data = self.sim.data 44 | self.init_qpos = self.data.qpos.ravel().copy() 45 | self.init_qvel = self.data.qvel.ravel().copy() 46 | observation, _reward, done, _info = self.step(np.zeros(self.model.nu)) 47 | assert not done 48 | if self.viewer: 49 | self.viewer.update_sim(self.sim) 50 | 51 | def _create_xml(self): 52 | # TODO: I might speed this up, but I think is insignificant w.r.t to the model/sim creation... 53 | self._randomize_friction() 54 | self._randomize_density() 55 | self._randomize_size() 56 | 57 | return et.tostring(self.root, encoding='unicode', method='xml') 58 | 59 | # TODO: I'm making an assumption here that 3 places after the comma are good enough, are they? 60 | def _randomize_friction(self): 61 | self.geom.set('friction', '{:3f} 0.1 0.1'.format(self.dimensions[0].current_value)) 62 | 63 | def _randomize_density(self): 64 | self.geom.set('density', '{:3f}'.format(self.dimensions[1].current_value)) 65 | 66 | def _randomize_size(self): 67 | size = self.dimensions[2].current_value 68 | 69 | # grabber 70 | grabber_width = size * 2 71 | self.wrist[0].set('fromto', '0 -{:3f} 0. 0.0 +{:3f} 0'.format(grabber_width, grabber_width)) 72 | self.wrist[1].set('fromto', '0 -{:3f} 0. {:3f} -{:3f} 0'.format(grabber_width, grabber_width, grabber_width)) 73 | self.wrist[2].set('fromto', '0 +{:3f} 0. {:3f} +{:3f} 0'.format(grabber_width, grabber_width, grabber_width)) 74 | self.tips[0].set('pos', '{:3f} -{:3f} 0.'.format(grabber_width, grabber_width)) 75 | self.tips[1].set('pos', '{:3f} {:3f} 0.'.format(grabber_width, grabber_width)) 76 | 77 | # object 78 | # self.object_body.set('pos', '0.45 -0.05 {:3f}'.format(PLANE_LOCATION_Z + size)) 79 | # for geom in self.object_body_geom: 80 | # geom.set('size', "{:3f} {:3f} {:3f}".format(size, size, size)) 81 | 82 | # goal 83 | # TODO: maybe a constant here? 1.6 is 0.08 / 0.05, the goal diam shrinks with the object diam 84 | # self.goal_body.set('size', "{:3f} 0.001 0.1".format(size * 1.6)) 85 | 86 | def step(self, a): 87 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm") 88 | vec_2 = self.get_body_com("object") - self.get_body_com("goal") 89 | 90 | reward_near = - np.linalg.norm(vec_1) 91 | reward_dist = - np.linalg.norm(vec_2) 92 | reward_ctrl = - np.square(a).sum() 93 | reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near 94 | 95 | self.do_simulation(a, self.frame_skip) 96 | ob = self._get_obs() 97 | done = False 98 | return ob, reward, done, dict(reward_dist=reward_dist, 99 | reward_ctrl=reward_ctrl) 100 | 101 | def viewer_setup(self): 102 | self.viewer.cam.trackbodyid = -1 103 | self.viewer.cam.distance = 4.0 104 | 105 | def reset_model(self): 106 | qpos = self.init_qpos 107 | 108 | self.goal_pos = np.asarray([0, 0]) 109 | while True: 110 | self.cylinder_pos = np.concatenate([ 111 | self.np_random.uniform(low=-0.3, high=0, size=1), 112 | self.np_random.uniform(low=-0.2, high=0.2, size=1)]) 113 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17: 114 | break 115 | 116 | qpos[-4:-2] = self.cylinder_pos 117 | qpos[-2:] = self.goal_pos 118 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005, 119 | high=0.005, size=self.model.nv) 120 | qvel[-4:] = 0 121 | self.set_state(qpos, qvel) 122 | return self._get_obs() 123 | 124 | def _get_obs(self): 125 | return np.concatenate([ 126 | self.sim.data.qpos.flat[:7], 127 | self.sim.data.qvel.flat[:7], 128 | self.get_body_com("tips_arm"), 129 | self.get_body_com("object"), 130 | self.get_body_com("goal"), 131 | ]) -------------------------------------------------------------------------------- /common/envs/pusher3dof.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | from gym import utils 5 | from gym.envs.mujoco import mujoco_env 6 | import xml.etree.ElementTree as et 7 | 8 | import mujoco_py 9 | 10 | 11 | class PusherEnv3DofEnv(mujoco_env.MujocoEnv, utils.EzPickle): 12 | def __init__(self, **kwargs): 13 | utils.EzPickle.__init__(self) 14 | self.reference_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 15 | 'assets/pusher_3dof.xml') 16 | mujoco_env.MujocoEnv.__init__(self, self.reference_path, frame_skip=5) 17 | 18 | self.model.stat.extent = 10 19 | 20 | # randomization 21 | self.reference_xml = et.parse(self.reference_path) 22 | self.config_file = kwargs.get('config') 23 | self.dimensions = [] 24 | self._locate_randomize_parameters() 25 | 26 | def _locate_randomize_parameters(self): 27 | self.root = self.reference_xml.getroot() 28 | end_effector = self.root.find(".//body[@name='distal_4']") 29 | self.wrist = end_effector.findall("./geom[@type='capsule']") 30 | self.tips = end_effector.findall(".//body[@name='tips_arm']/geom") 31 | self.object_body = self.root.find(".//body[@name='object']/geom") 32 | self.object_joints = self.root.findall(".//body[@name='object']/joint") 33 | 34 | def _update_randomized_params(self): 35 | xml = self._create_xml() 36 | self._re_init(xml) 37 | 38 | def _re_init(self, xml): 39 | self.model = mujoco_py.load_model_from_xml(xml) 40 | self.sim = mujoco_py.MjSim(self.model) 41 | self.data = self.sim.data 42 | self.init_qpos = self.data.qpos.ravel().copy() 43 | self.init_qvel = self.data.qvel.ravel().copy() 44 | observation, _reward, done, _info = self.step(np.zeros(self.model.nu)) 45 | assert not done 46 | if self.viewer: 47 | self.viewer.update_sim(self.sim) 48 | 49 | def _create_xml(self): 50 | # TODO: I might speed this up, but I think is insignificant w.r.t to the model/sim creation... 51 | self._randomize_friction() 52 | self._randomize_damping() 53 | # self._randomize_size() 54 | 55 | return et.tostring(self.root, encoding='unicode', method='xml') 56 | 57 | # TODO: I'm making an assumption here that 3 places after the comma are good enough, are they? 58 | def _randomize_friction(self): 59 | frictionloss = self.dimensions[0].current_value 60 | 61 | for joint in self.object_joints: 62 | joint.set('frictionloss', '{:3f}'.format(frictionloss)) 63 | 64 | def _randomize_damping(self): 65 | damping = self.dimensions[1].current_value 66 | for joint in self.object_joints: 67 | joint.set('damping', '{:3f}'.format(damping)) 68 | 69 | def _randomize_size(self): 70 | size = self.dimensions[2].current_value 71 | # grabber 72 | grabber_width = size * 2 73 | self.wrist[0].set('fromto', '0 -{:3f} 0. 0.0 +{:3f} 0'.format(grabber_width, grabber_width)) 74 | self.wrist[1].set('fromto', '0 -{:3f} 0. {:3f} -{:3f} 0'.format(grabber_width, grabber_width, grabber_width)) 75 | self.wrist[2].set('fromto', '0 +{:3f} 0. {:3f} +{:3f} 0'.format(grabber_width, grabber_width, grabber_width)) 76 | self.tips[0].set('pos', '{:3f} -{:3f} 0.'.format(grabber_width, grabber_width)) 77 | self.tips[1].set('pos', '{:3f} {:3f} 0.'.format(grabber_width, grabber_width)) 78 | 79 | def step(self, action): 80 | arm_dist = np.linalg.norm(self.get_body_com("object")[:2] - self.get_body_com("tips_arm")[:2]) 81 | goal_dist = np.linalg.norm(self.get_body_com("object")[:2] - self.get_body_com("goal")[:2]) 82 | 83 | # Reward from Soft Q Learning 84 | action_cost = np.square(action).sum() 85 | reward = -0.1 * action_cost - goal_dist 86 | 87 | self.do_simulation(action, self.frame_skip) 88 | ob = self._get_obs() 89 | done = False 90 | 91 | return ob, reward, done, {'arm_dist': arm_dist, 'goal_dist': goal_dist} 92 | 93 | def viewer_setup(self): 94 | coords = [.7, -.5, 0] 95 | for i in range(3): 96 | self.viewer.cam.lookat[i] = coords[i] 97 | self.viewer.cam.trackbodyid = -1 98 | self.viewer.cam.distance = 2 99 | 100 | def reset_model(self): 101 | qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos 102 | 103 | # Original 104 | # object_ = np.random.uniform(low=[.3,-1.0], high=[1.2,-0.4]) 105 | # goal = np.random.uniform(low=[.8,-1.2], high=[1.2,-0.8]) 106 | 107 | while True: 108 | # NOW RUNNING: "HARDER*" 109 | object_ = np.random.uniform(low=[.4,-1.0], high=[1.2,-0.5]) 110 | # object_ = np.random.uniform(low=[.5,-1.0], high=[1.2,-0.6]) 111 | goal = np.random.uniform(low=[.8,-1.2], high=[1.2,-0.8]) 112 | if np.linalg.norm(object_ - goal) > 0.45: 113 | break 114 | 115 | self.object = np.array(object_) 116 | self.goal = np.array(goal) 117 | 118 | qpos[-4:-2] = self.object 119 | qpos[-2:] = self.goal 120 | qvel = self.init_qvel 121 | qvel[-4:] = 0 122 | self.set_state(qpos, qvel) 123 | return self._get_obs() 124 | 125 | def _get_obs(self): 126 | return np.concatenate([ 127 | self.sim.data.qpos.flat[:-4], 128 | self.sim.data.qvel.flat[:-4], 129 | self.get_body_com("distal_4")[:2], 130 | self.get_body_com("object")[:2], 131 | self.get_body_com("goal")[:2], 132 | ]) 133 | -------------------------------------------------------------------------------- /common/envs/randomized_locomotion.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import json 4 | import numpy as np 5 | from gym import utils 6 | from gym.envs.mujoco import mujoco_env 7 | import xml.etree.ElementTree as et 8 | 9 | import mujoco_py 10 | 11 | 12 | def mass_center(model, sim): 13 | mass = np.expand_dims(model.body_mass, 1) 14 | xpos = sim.data.xipos 15 | 16 | return (np.sum(mass * xpos, 0) / np.sum(mass))[0] 17 | 18 | 19 | # TODO: this class is not Thread-Safe 20 | class RandomizedLocomotionEnv(mujoco_env.MujocoEnv, utils.EzPickle): 21 | def __init__(self, **kwargs): 22 | utils.EzPickle.__init__(self) 23 | mujoco_env.MujocoEnv.__init__(self, kwargs.get('xml_name'), frame_skip=5) 24 | 25 | # randomization 26 | self.reference_path = os.path.join(os.path.dirname(mujoco_env.__file__), "assets", kwargs.get('xml_name')) 27 | self.reference_xml = et.parse(self.reference_path) 28 | self.config_file = kwargs.get('config') 29 | self.dimensions = [] 30 | self.dimension_map = [] 31 | self.suffixes = [] 32 | self._locate_randomize_parameters() 33 | 34 | def _locate_randomize_parameters(self): 35 | self.root = self.reference_xml.getroot() 36 | with open(self.config_file, mode='r') as f: 37 | config = json.load(f) 38 | 39 | check_suffixes = config.get('suffixes', False) 40 | 41 | for entry in config['dimensions']: 42 | name = entry["name"] 43 | self.dimension_map.append([]) 44 | for geom in config["geom_map"][name]: 45 | self.dimension_map[-1].append(self.root.find(".//geom[@name='{}']".format(geom))) 46 | 47 | if check_suffixes: 48 | suffix = config['suffixes'].get(name, "") 49 | self.suffixes.append(suffix) 50 | else: 51 | self.suffixes.append("") 52 | 53 | def _create_xml(self): 54 | for i, bodypart in enumerate(self.dimensions): 55 | for geom in self.dimension_map[i]: 56 | suffix = self.suffixes[i] 57 | value = "{:3f} {}".format(self.dimensions[i].current_value, suffix) 58 | geom.set('size', '{}'.format(value)) 59 | 60 | return et.tostring(self.root, encoding='unicode', method='xml') 61 | 62 | def _update_randomized_params(self): 63 | xml = self._create_xml() 64 | self._re_init(xml) 65 | 66 | def _re_init(self, xml): 67 | self.model = mujoco_py.load_model_from_xml(xml) 68 | self.sim = mujoco_py.MjSim(self.model) 69 | self.data = self.sim.data 70 | self.init_qpos = self.data.qpos.ravel().copy() 71 | self.init_qvel = self.data.qvel.ravel().copy() 72 | observation, _reward, done, _info = self.step(np.zeros(self.model.nu)) 73 | assert not done 74 | if self.viewer: 75 | self.viewer.update_sim(self.sim) -------------------------------------------------------------------------------- /common/envs/randomized_vecenv.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import gym 4 | import numpy as np 5 | from multiprocessing import Process, Pipe 6 | from baselines.common.vec_env import VecEnv, CloudpickleWrapper 7 | 8 | from common.envs.wrappers import RandomizedEnvWrapper 9 | 10 | """File Description: 11 | Creates a vectorized environment with RandomizationEnvWrapper, which helps 12 | for fast / general Domain Randomization. 13 | The main thing to note here is unlike the OpenAI vectorized env, 14 | the step command does not automatically reset. 15 | 16 | We also provide simple helper functions to randomize environments 17 | """ 18 | 19 | logger = logging.getLogger(__name__) 20 | 21 | 22 | def make_env(env_id, seed, rank): 23 | def _thunk(): 24 | env = gym.make(env_id) 25 | env = RandomizedEnvWrapper(env, seed + rank) 26 | 27 | env.seed(seed + rank) 28 | obs_shape = env.observation_space.shape # TODO: is this something we can remove 29 | 30 | return env 31 | 32 | return _thunk 33 | 34 | 35 | def make_vec_envs(env_name, seed, num_processes): 36 | envs = [make_env(env_name, seed, i) 37 | for i in range(num_processes)] 38 | envs = RandomizedSubprocVecEnv(envs) 39 | return envs 40 | 41 | 42 | def worker(remote, parent_remote, env_fn_wrapper): 43 | parent_remote.close() 44 | env = env_fn_wrapper.x() 45 | try: 46 | while True: 47 | cmd, data = remote.recv() 48 | if cmd == 'step': 49 | ob, reward, done, info = env.step(data) 50 | remote.send((ob, reward, done, info)) 51 | elif cmd == 'reset': 52 | ob = env.reset() 53 | remote.send(ob) 54 | elif cmd == 'render': 55 | remote.send(env.render(mode='rgb_array')) 56 | elif cmd == 'close': 57 | remote.close() 58 | break 59 | elif cmd == 'get_spaces': 60 | remote.send((env.observation_space, env.action_space, env.unwrapped.randomization_space)) 61 | elif cmd == 'get_dimension_name': 62 | remote.send(env.unwrapped.dimensions[data].name) 63 | elif cmd == 'rescale_dimension': 64 | dimension = data[0] 65 | array = data[1] 66 | rescaled = env.unwrapped.dimensions[dimension]._rescale(array) 67 | remote.send(rescaled) 68 | elif cmd == 'randomize': 69 | randomized_val = data 70 | env.randomize(randomized_val) 71 | remote.send(None) 72 | elif cmd == 'get_current_randomization_values': 73 | values = [] 74 | for dim in env.unwrapped.dimensions: 75 | values.append(dim.current_value) 76 | 77 | remote.send(values) 78 | else: 79 | raise NotImplementedError 80 | except KeyboardInterrupt: 81 | print('SubprocVecEnv worker: got KeyboardInterrupt') 82 | finally: 83 | env.close() 84 | 85 | 86 | class RandomizedSubprocVecEnv(VecEnv): 87 | """ 88 | VecEnv that runs multiple environments in parallel in subproceses and communicates with them via pipes. 89 | Recommended to use when num_envs > 1 and step() can be a bottleneck. 90 | """ 91 | 92 | # TODO: arg spaces is no longer used. Remove? 93 | def __init__(self, env_fns, spaces=None): 94 | """ 95 | Arguments: 96 | 97 | env_fns: iterable of callables - functions that create environments to run in subprocesses. Need to be cloud-pickleable 98 | """ 99 | self.waiting = False 100 | self.closed = False 101 | nenvs = len(env_fns) 102 | self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) 103 | self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) 104 | for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)] 105 | for p in self.ps: 106 | p.daemon = True # if the main process crashes, we should not cause things to hang 107 | p.start() 108 | for remote in self.work_remotes: 109 | remote.close() 110 | 111 | self.remotes[0].send(('get_spaces', None)) 112 | observation_space, action_space, randomization_space = self.remotes[0].recv() 113 | self.randomization_space = randomization_space 114 | self.viewer = None 115 | VecEnv.__init__(self, len(env_fns), observation_space, action_space) 116 | 117 | def step_async(self, actions): 118 | self._assert_not_closed() 119 | logger.debug('[step] => SENDING') 120 | for remote, action in zip(self.remotes, actions): 121 | remote.send(('step', action)) 122 | logger.debug('[step] => SENT') 123 | self.waiting = True 124 | 125 | def step_wait(self): 126 | self._assert_not_closed() 127 | logger.debug('[step] => WAITING') 128 | results = [remote.recv() for remote in self.remotes] 129 | self.waiting = False 130 | logger.debug('[step] => DONE') 131 | obs, rews, dones, infos = zip(*results) 132 | return np.stack(obs), np.stack(rews), np.stack(dones), infos 133 | 134 | def randomize(self, randomized_values): 135 | self._assert_not_closed() 136 | 137 | logger.debug('[randomize] => SENDING') 138 | for remote, val in zip(self.remotes, randomized_values): 139 | remote.send(('randomize', val)) 140 | results = [remote.recv() for remote in self.remotes] # TODO: why creating the array if you're not gonna use it 141 | logger.debug('[randomize] => SENT') 142 | self.waiting = False 143 | 144 | def get_current_params(self): 145 | logger.debug('[get_current_randomization_values] => SENDING') 146 | for remote in self.remotes: 147 | remote.send(('get_current_randomization_values', None)) 148 | result = [remote.recv() for remote in self.remotes] 149 | logger.debug('[get_current_randomization_values] => SENT') 150 | return np.stack(result) 151 | 152 | def get_dimension_name(self, dimension): 153 | logger.debug('[get_dimension_name] => SENDING') 154 | self.remotes[0].send(('get_dimension_name', dimension)) 155 | result = self.remotes[0].recv() 156 | logger.debug('[get_dimension_name] => SENT') 157 | return result 158 | 159 | def rescale(self, dimension, array): 160 | logger.debug('[rescale_dimension] => SENDING') 161 | data = (dimension, array) 162 | self.remotes[0].send(('rescale_dimension', data)) 163 | result = self.remotes[0].recv() 164 | logger.debug('[rescale_dimension] => SENT') 165 | return result 166 | 167 | def reset(self): 168 | self._assert_not_closed() 169 | logger.debug('[reset] => SENDING') 170 | for remote in self.remotes: 171 | remote.send(('reset', None)) 172 | result = [remote.recv() for remote in self.remotes] 173 | logger.debug('[reset] => SENT') 174 | return np.stack(result) 175 | 176 | def close(self): 177 | if self.closed: 178 | return 179 | if self.viewer is not None: 180 | self.viewer.close() 181 | self.close_extras() 182 | self.closed = True 183 | 184 | def close_extras(self): 185 | self.closed = True 186 | if self.waiting: 187 | for remote in self.remotes: 188 | remote.recv() 189 | for remote in self.remotes: 190 | remote.send(('close', None)) 191 | for p in self.ps: 192 | p.join() 193 | 194 | def get_images(self): 195 | self._assert_not_closed() 196 | for pipe in self.remotes: 197 | pipe.send(('render', None)) 198 | imgs = [pipe.recv() for pipe in self.remotes] 199 | return imgs 200 | 201 | def _assert_not_closed(self): 202 | assert not self.closed, "Trying to operate on a SubprocVecEnv after calling close()" 203 | -------------------------------------------------------------------------------- /common/envs/wrappers.py: -------------------------------------------------------------------------------- 1 | from importlib import import_module 2 | 3 | import gym 4 | import json 5 | import numpy as np 6 | 7 | import gym.spaces as spaces 8 | import os.path as osp 9 | 10 | from enum import Enum 11 | 12 | from lxml import etree 13 | import numpy as np 14 | 15 | from common.envs.assets import MODEL_PATH 16 | from common.envs.dimension import Dimension 17 | 18 | 19 | class RandomizedEnvWrapper(gym.Wrapper): 20 | """Creates a randomization-enabled enviornment, which can change 21 | physics / simulation parameters without relaunching everything 22 | """ 23 | 24 | def __init__(self, env, seed): 25 | super(RandomizedEnvWrapper, self).__init__(env) 26 | self.config_file = self.unwrapped.config_file 27 | 28 | self._load_randomization_dimensions(seed) 29 | self.unwrapped._update_randomized_params() 30 | self.randomized_default = ['random'] * len(self.unwrapped.dimensions) 31 | 32 | def _load_randomization_dimensions(self, seed): 33 | """ Helper function to load environment defaults ranges 34 | """ 35 | self.unwrapped.dimensions = [] 36 | 37 | with open(self.config_file, mode='r') as f: 38 | config = json.load(f) 39 | 40 | for dimension in config['dimensions']: 41 | self.unwrapped.dimensions.append( 42 | Dimension( 43 | default_value=dimension['default'], 44 | seed=seed, 45 | multiplier_min=dimension['multiplier_min'], 46 | multiplier_max=dimension['multiplier_max'], 47 | name=dimension['name'] 48 | ) 49 | ) 50 | 51 | nrand = len(self.unwrapped.dimensions) 52 | self.unwrapped.randomization_space = spaces.Box(0, 1, shape=(nrand,), dtype=np.float32) 53 | 54 | # TODO: The default is not informative of the type of randomize_values 55 | # TODO: The .randomize API is counter intuitive... 56 | def randomize(self, randomized_values=-1): 57 | """Creates a randomized environment, using the dimension and value specified 58 | to randomize over 59 | """ 60 | for dimension, randomized_value in enumerate(randomized_values): 61 | if randomized_value == 'default': 62 | self.unwrapped.dimensions[dimension].current_value = \ 63 | self.unwrapped.dimensions[dimension].default_value 64 | elif randomized_value != 'random' and randomized_value != -1: 65 | assert 0.0 <= randomized_value <= 1.0, "using incorrect: {}".format(randomized_value) 66 | self.unwrapped.dimensions[dimension].current_value = \ 67 | self.unwrapped.dimensions[dimension]._rescale(randomized_value) 68 | else: # random 69 | self.unwrapped.dimensions[dimension].randomize() 70 | 71 | self.unwrapped._update_randomized_params() 72 | 73 | def step(self, action): 74 | return self.env.step(action) 75 | 76 | def reset(self, **kwargs): 77 | return self.env.reset(**kwargs) -------------------------------------------------------------------------------- /common/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/models/__init__.py -------------------------------------------------------------------------------- /common/models/actor_critic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class Actor(nn.Module): 7 | def __init__(self, state_dim, action_dim, max_action): 8 | super(Actor, self).__init__() 9 | 10 | self.l1 = nn.Linear(state_dim, 400) 11 | self.l2 = nn.Linear(400, 300) 12 | self.l3 = nn.Linear(300, action_dim) 13 | 14 | self.max_action = max_action 15 | 16 | def forward(self, x): 17 | x = F.relu(self.l1(x)) 18 | x = F.relu(self.l2(x)) 19 | x = self.max_action * torch.tanh(self.l3(x)) 20 | return x 21 | 22 | class Critic(nn.Module): 23 | def __init__(self, state_dim, action_dim): 24 | super(Critic, self).__init__() 25 | 26 | self.l1 = nn.Linear(state_dim + action_dim, 400) 27 | self.l2 = nn.Linear(400, 300) 28 | self.l3 = nn.Linear(300, 1) 29 | 30 | 31 | def forward(self, x, u): 32 | x = F.relu(self.l1(torch.cat([x, u], 1))) 33 | x = F.relu(self.l2(x)) 34 | x = self.l3(x) 35 | return x 36 | -------------------------------------------------------------------------------- /common/models/discriminator.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class MLPDiscriminator(nn.Module): 5 | """Discriminator class based on Feedforward Network 6 | Input is a state-action-state' transition 7 | Output is probability that it was from a reference trajectory 8 | """ 9 | def __init__(self, state_dim, action_dim): 10 | super(MLPDiscriminator, self).__init__() 11 | 12 | self.l1 = nn.Linear((state_dim + action_dim + state_dim), 128) 13 | self.l2 = nn.Linear(128, 128) 14 | self.logic = nn.Linear(128, 1) 15 | 16 | self.logic.weight.data.mul_(0.1) 17 | self.logic.bias.data.mul_(0.0) 18 | 19 | # Tuple of S-A-S' 20 | def forward(self, x): 21 | x = torch.tanh(self.l1(x)) 22 | x = torch.tanh(self.l2(x)) 23 | x = self.logic(x) 24 | return torch.sigmoid(x) 25 | 26 | class GAILMLPDiscriminator(nn.Module): 27 | """Discriminator class based on Feedforward Network 28 | Input is a state-action-state' transition 29 | Output is probability that it was from a reference trajectory 30 | """ 31 | def __init__(self, state_dim, action_dim): 32 | super(GAILMLPDiscriminator, self).__init__() 33 | self.l1 = nn.Linear((state_dim + action_dim), 128) 34 | self.l2 = nn.Linear(128, 128) 35 | self.logic = nn.Linear(128, 1) 36 | 37 | self.logic.weight.data.mul_(0.1) 38 | self.logic.bias.data.mul_(0.0) 39 | 40 | # Tuple of S-A-S' 41 | def forward(self, x): 42 | x = torch.tanh(self.l1(x)) 43 | x = torch.tanh(self.l2(x)) 44 | x = self.logic(x) 45 | return torch.sigmoid(x) 46 | 47 | 48 | class LSTMDiscriminator(nn.Module): 49 | """Discriminator class based on Feedforward Network 50 | Input is a sequence of state-action-state' transitions 51 | Output is probability that it was from a reference trajectory 52 | """ 53 | def __init__(self, state_dim, batch_size, hidden_dim): 54 | self.lstm = nn.LSTM(state_dim, hidden_dim, num_layers=1) 55 | self.state_dim = state_dim 56 | 57 | self.hidden_dim = hidden_dim 58 | self.hidden2out = nn.Linear(hidden_dim, output_size) 59 | self.hidden = self._init_hidden() 60 | 61 | def _init_hidden(self): 62 | return (Variable(torch.zeros(1, self.batch_size, self.hidden_dim)), 63 | Variable(torch.zeros(1, self.batch_size, self.hidden_dim))) 64 | 65 | def forward(self, trajectory): 66 | self.hidden = self._init_hidden() 67 | 68 | predictions, (ht, ct) = self.lstm(trajectory, self.hidden) 69 | output = self.hidden2out(ht[-1]) 70 | return torch.sigmoid(output) 71 | -------------------------------------------------------------------------------- /common/svpg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/svpg/__init__.py -------------------------------------------------------------------------------- /common/svpg/particles/__init__.py: -------------------------------------------------------------------------------- 1 | from .svpg_particle import SVPGParticle -------------------------------------------------------------------------------- /common/svpg/particles/distributions.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from common.svpg.particles.utils import AddBias, init 6 | 7 | """ 8 | Modify standard PyTorch distributions so they are compatible with this code. 9 | """ 10 | 11 | # 12 | # Standardize distribution interfaces 13 | # 14 | 15 | # Categorical 16 | FixedCategorical = torch.distributions.Categorical 17 | 18 | old_sample = FixedCategorical.sample 19 | FixedCategorical.sample = lambda self: old_sample(self).unsqueeze(-1) 20 | 21 | log_prob_cat = FixedCategorical.log_prob 22 | FixedCategorical.log_probs = lambda self, actions: log_prob_cat(self, actions.squeeze(-1)).view(actions.size(0), -1).sum(-1).unsqueeze(-1) 23 | 24 | FixedCategorical.mode = lambda self: self.probs.argmax(dim=-1, keepdim=True) 25 | 26 | 27 | # Normal 28 | FixedNormal = torch.distributions.Normal 29 | 30 | log_prob_normal = FixedNormal.log_prob 31 | FixedNormal.log_probs = lambda self, actions: log_prob_normal(self, actions).sum(-1, keepdim=True) 32 | 33 | normal_entropy = FixedNormal.entropy 34 | FixedNormal.entropy = lambda self: normal_entropy(self).sum(-1) 35 | 36 | FixedNormal.mode = lambda self: self.mean 37 | 38 | 39 | class Categorical(nn.Module): 40 | def __init__(self, num_inputs, num_outputs): 41 | super(Categorical, self).__init__() 42 | 43 | init_ = lambda m: init(m, 44 | nn.init.orthogonal_, 45 | lambda x: nn.init.constant_(x, 0), 46 | gain=0.01) 47 | 48 | self.linear = init_(nn.Linear(num_inputs, num_outputs)) 49 | 50 | def forward(self, x): 51 | x = self.linear(x) 52 | return FixedCategorical(logits=x) 53 | 54 | 55 | class DiagGaussian(nn.Module): 56 | def __init__(self, num_inputs, num_outputs): 57 | super(DiagGaussian, self).__init__() 58 | 59 | init_ = lambda m: init(m, 60 | nn.init.orthogonal_, 61 | lambda x: nn.init.constant_(x, 0)) 62 | 63 | self.fc_mean = init_(nn.Linear(num_inputs, num_outputs)) 64 | self.logstd = AddBias(torch.zeros(num_outputs)) 65 | 66 | def forward(self, x): 67 | action_mean = self.fc_mean(x) 68 | 69 | # An ugly hack for my KFAC implementation. 70 | zeros = torch.zeros(action_mean.size()) 71 | if x.is_cuda: 72 | zeros = zeros.cuda() 73 | 74 | action_logstd = self.logstd(zeros) 75 | return FixedNormal(action_mean, action_logstd.exp()) -------------------------------------------------------------------------------- /common/svpg/particles/svpg_particle.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | from .utils import orthogonal_init 6 | from .distributions import Categorical, DiagGaussian 7 | 8 | 9 | class SVPGParticleCritic(nn.Module): 10 | def __init__(self, input_dim, output_dim, hidden_dim): 11 | super(SVPGParticleCritic, self).__init__() 12 | 13 | self.critic = nn.Sequential( 14 | orthogonal_init(nn.Linear(input_dim, hidden_dim)), 15 | nn.Tanh(), 16 | orthogonal_init(nn.Linear(hidden_dim, hidden_dim)), 17 | nn.Tanh(), 18 | orthogonal_init(nn.Linear(hidden_dim, 1)) 19 | ) 20 | 21 | def forward(self, x): 22 | return self.critic(x) 23 | 24 | class SVPGParticleActorBase(nn.Module): 25 | def __init__(self, input_dim, hidden_dim): 26 | super(SVPGParticleActorBase, self).__init__() 27 | 28 | self.actor_hidden = nn.Sequential( 29 | orthogonal_init(nn.Linear(input_dim, hidden_dim)), 30 | nn.Tanh(), 31 | orthogonal_init(nn.Linear(hidden_dim, hidden_dim)), 32 | nn.Tanh(), 33 | ) 34 | 35 | def forward(self, x): 36 | return self.actor_hidden(x) 37 | 38 | 39 | class SVPGParticle(nn.Module): 40 | """Implements a AC architecture for a Discrete Advantage 41 | Actor Critic Policy, used inside of SVPG 42 | """ 43 | def __init__(self, input_dim, output_dim, hidden_dim, discrete, freeze=False): 44 | super(SVPGParticle, self).__init__() 45 | 46 | self.critic = SVPGParticleCritic(input_dim, output_dim, hidden_dim) 47 | self.actor_hidden = SVPGParticleActorBase(input_dim, hidden_dim) 48 | 49 | if discrete: 50 | self.dist = Categorical(hidden_dim, output_dim) 51 | else: 52 | self.dist = DiagGaussian(hidden_dim, output_dim) 53 | 54 | if freeze: 55 | self.freeze() 56 | 57 | self.reset() 58 | 59 | def forward(self, x): 60 | actor_hidden = self.actor_hidden(x) 61 | dist = self.dist(actor_hidden) 62 | value = self.critic(x) 63 | 64 | return dist, value 65 | 66 | def freeze(self): 67 | for param in self.critic.parameters(): 68 | param.requires_grad = False 69 | 70 | for param in self.actor_hidden.parameters(): 71 | param.requires_grad = False 72 | 73 | for param in self.dist.parameters(): 74 | param.requires_grad = False 75 | 76 | def reset(self): 77 | self.saved_log_probs = [] 78 | self.saved_klds = [] 79 | self.rewards = [] -------------------------------------------------------------------------------- /common/svpg/particles/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch.nn as nn 3 | 4 | 5 | class AddBias(nn.Module): 6 | def __init__(self, bias): 7 | super(AddBias, self).__init__() 8 | self._bias = nn.Parameter(bias.unsqueeze(1)) 9 | 10 | def forward(self, x): 11 | if x.dim() == 2: 12 | bias = self._bias.t().view(1, -1) 13 | else: 14 | bias = self._bias.t().view(1, -1, 1, 1) 15 | 16 | return x + bias 17 | 18 | 19 | def init(module, weight_init, bias_init, gain=1): 20 | weight_init(module.weight.data, gain=gain) 21 | bias_init(module.bias.data) 22 | return module 23 | 24 | 25 | orthogonal_init = lambda m: init(module=m, 26 | weight_init=nn.init.orthogonal_, 27 | bias_init=lambda x: nn.init.constant_(x, 0), 28 | gain=np.sqrt(2)) 29 | -------------------------------------------------------------------------------- /common/svpg/svpg_utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import os 4 | 5 | 6 | def parameters_to_vector(parameters, grad=False, both=False): 7 | """Convert parameters or/and their gradients to one vector 8 | Arguments: 9 | parameters (Iterable[Variable]): an iterator of Variables that are the 10 | parameters of a model. 11 | grad (bool): Vectorizes gradients if true, otherwise vectorizes params 12 | both (bool): If True, vectorizes both parameters and their gradients, 13 | `grad` has no effect in this case. Otherwise vectorizes parameters 14 | or gradients according to `grad`. 15 | Returns: 16 | The parameters or/and their gradients (each) represented by a single 17 | vector (th.Tensor, not Variable) 18 | """ 19 | # Flag for the device where the parameter is located 20 | param_device = None 21 | 22 | if not both: 23 | vec = [] 24 | if not grad: 25 | for param in parameters: 26 | # Ensure the parameters are located in the same device 27 | param_device = _check_param_device(param, param_device) 28 | vec.append(param.data.view(-1)) 29 | else: 30 | for param in parameters: 31 | param_device = _check_param_device(param, param_device) 32 | vec.append(param.grad.data.view(-1)) 33 | return torch.cat(vec) 34 | else: 35 | vec_params, vec_grads = [], [] 36 | for param in parameters: 37 | param_device = _check_param_device(param, param_device) 38 | vec_params.append(param.data.view(-1)) 39 | vec_grads.append(param.grad.data.view(-1)) 40 | return torch.cat(vec_params), torch.cat(vec_grads) 41 | 42 | def vector_to_parameters(vec, parameters, grad=True): 43 | """Convert one vector to the parameters or gradients of the parameters 44 | Arguments: 45 | vec (torch.Tensor): a single vector represents the parameters of a model. 46 | parameters (Iterable[Variable]): an iterator of Variables that are the 47 | parameters of a model. 48 | grad (bool): True for assigning de-vectorized `vec` to gradients 49 | """ 50 | # Ensure vec of type Variable 51 | if not isinstance(vec, torch.cuda.FloatTensor): 52 | raise TypeError('expected torch.Tensor, but got: {}' 53 | .format(torch.typename(vec))) 54 | # Flag for the device where the parameter is located 55 | param_device = None 56 | 57 | # Pointer for slicing the vector for each parameter 58 | pointer = 0 59 | if grad: 60 | for param in parameters: 61 | # Ensure the parameters are located in the same device 62 | param_device = _check_param_device(param, param_device) 63 | # The length of the parameter 64 | num_param = torch.prod(torch.LongTensor(list(param.size()))) 65 | param.grad.data = vec[pointer:pointer + num_param].view( 66 | param.size()) 67 | # Increment the pointer 68 | pointer += num_param 69 | else: 70 | for param in parameters: 71 | # Ensure the parameters are located in the same device 72 | param_device = _check_param_device(param, param_device) 73 | # The length of the parameter 74 | num_param = torch.prod(torch.LongTensor(list(param.size()))) 75 | param.data = vec[pointer:pointer + num_param].view( 76 | param.size()) 77 | # Increment the pointer 78 | pointer += num_param 79 | 80 | 81 | def _check_param_device(param, old_param_device): 82 | """This helper function is to check if the parameters are located 83 | in the same device. Currently, the conversion between model parameters 84 | and single vector form is not supported for multiple allocations, 85 | e.g. parameters in different GPUs, or mixture of CPU/GPU. 86 | Arguments: 87 | param ([Variable]): a Variable of a parameter of a model 88 | old_param_device (int): the device where the first parameter of a 89 | model is allocated. 90 | Returns: 91 | old_param_device (int): report device for the first time 92 | """ 93 | 94 | # Meet the first parameter 95 | if old_param_device is None: 96 | old_param_device = param.get_device() if param.is_cuda else -1 97 | else: 98 | warn = False 99 | if param.is_cuda: # Check if in same GPU 100 | warn = (param.get_device() != old_param_device) 101 | else: # Check if in CPU 102 | warn = (old_param_device != -1) 103 | if warn: 104 | raise TypeError('Found two parameters on different devices, ' 105 | 'this is currently not supported.') 106 | return old_param_device -------------------------------------------------------------------------------- /common/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/utils/__init__.py -------------------------------------------------------------------------------- /common/utils/plot_utils.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import matplotlib 3 | # matplotlib.use('Agg') 4 | import matplotlib.pyplot as plt 5 | import seaborn as sns 6 | 7 | # styling 8 | # plt.style.use('seaborn-dark') 9 | # matplotlib.rcParams.update({'font.size': 22}) 10 | 11 | sns.set(font_scale=1.5) 12 | sns.set_style("whitegrid") 13 | COLORS = ["#95d0ff", "#966bff", "#ff6ad5", "#ffa58b", "#ff6a8b"] 14 | 15 | 16 | # sns.color_palette('bright', 6) 17 | 18 | 19 | def get_args(): 20 | parser = argparse.ArgumentParser(description='Domain Randomization Driver') 21 | parser.add_argument('--environment', type=str, 22 | choices=['lunar', 'pusher', 'ergo', 'ergosix', 'lunar2', 'lunarbootstrap']) 23 | parser.add_argument('--filter', type=float) 24 | return parser.parse_args() 25 | 26 | 27 | def get_config(environment): 28 | if environment == 'lunar': 29 | return { 30 | 'metrics': ['ref_learning_curve_{}', 'hard_learning_curve_{}', 'rand_learning_curve_{}'], 31 | 'solved': 200, 32 | 'xlim': (7.5, 20.0), 33 | 'ylim': (0, 330), 34 | 'start_index': 0, 35 | 'environment': environment, 36 | # 'labels': ['baseline', 'UDR', 'oracle', 'ADR (ours)'], 37 | 'labels': ['Oracle', 'Baseline', 'UDR', 'ADR (ours)'], 38 | 'title': 'Generalization Results (LunarLander)', 39 | # 'title': 'Oracle vs. UDR (LunarLander)', 40 | 'dimensions': 1, 41 | 'colors': COLORS, 42 | 'legend_loc': 'lower right', 43 | 'x_label': 'Main Engine Strength (MES)', 44 | 'y_label': 'Average Reward' 45 | } 46 | elif environment == 'lunar2': 47 | return { 48 | 'metrics': ['ref_learning_curve_{}', 'hard_learning_curve_{}'], 49 | 'solved': 200, 50 | 'xlim': (7.5, 20.0), 51 | 'ylim': (-100, 330), 52 | 'start_index': 0, 53 | 'environment': environment, 54 | 'labels': ['$Baseline$', '$UDR$', '$ADR (ours)$'], 55 | 'title': ['Learning Curve (LL), Reference Env.', 'Learning Curve (LL), Hard Env.'], 56 | 'dimensions': 1, 57 | 'colors': [COLORS[1], COLORS[2], COLORS[0]], 58 | 'legend_loc': 'best', 59 | 'x_label': 'Main Engine Strength (MES)', 60 | 'y_label': 'Average Reward' 61 | } 62 | elif environment == 'lunarbootstrap': 63 | return { 64 | 'metrics': ['ref_learning_curve_{}'], 65 | 'solved': 200, 66 | 'xlim': (7.5, 11), 67 | 'ylim': (-150, 330), 68 | 'start_index': 0, 69 | 'environment': environment, 70 | 'labels': ['$ADR(boostrapped)$', '$ADR(original)$'], 71 | 'title': ['Bootstrapped ADR (LL)'], 72 | 'dimensions': 1, 73 | 'colors': [COLORS[1], COLORS[0]], 74 | 'legend_loc': 'lower right', 75 | 'x_label': 'Main Engine Strength (MES)', 76 | 'y_label': 'Average Reward' 77 | } 78 | elif environment == 'pusher': 79 | return { 80 | 'metrics': ['ref_final_dists_{}', 'hard_final_dists_{}'], 81 | 'solved': 0.35, 82 | 'xlim': (0, 1.0), 83 | 'ylim': (0.1, 0.7), 84 | 'start_index': 0, 85 | 'environment': environment, 86 | 'labels': ['$UDR$', '$ADR (ours)$'], 87 | 'title': ['Learning Curve (Pusher), Reference Env.', 'Learning Curve (Pusher), Hard Env.'], 88 | 'dimensions': 2, 89 | 'colors': [COLORS[2], COLORS[0]], 90 | 'legend_loc': 'upper right', 91 | 'x_label': 'Agent Timesteps', 92 | 'y_label': 'Average Final Distance to Goal' 93 | } 94 | 95 | elif environment == 'ergo': 96 | return { 97 | 'metrics': ['ref_final_dists_{}', 'hard_final_dists_{}'], 98 | 'solved': None, 99 | 'xlim': (0, 1.0), 100 | 'ylim': (0, 0.2), 101 | 'start_index': 0, 102 | 'environment': environment, 103 | 'labels': ['$UDR$', '$ADR (ours)$'], 104 | 'title': ['Learning Curve (Ergo), Reference Env.', 'Learning Curve (Ergo), Hard Env.'], 105 | 'dimensions': 8, 106 | 'colors': [COLORS[2], COLORS[0]], 107 | 'legend_loc': 'upper right', 108 | 'x_label': 'Agent Timesteps', 109 | 'y_label': 'Average Final Distance to Goal' 110 | } 111 | 112 | 113 | def gen_plot(config, file_path, data, title=None, learning_curve=False): 114 | plt.figure(figsize=(6, 5)) 115 | 116 | plt.title(config['title'] if not title else title) 117 | plt.xlabel(config['x_label']) 118 | plt.ylabel(config['y_label']) 119 | 120 | plt.ylim(*config['ylim']) 121 | if config['solved']: 122 | # plt.axhline(config['solved'], color=COLORS[4], linestyle='--', label='$[Solved]$') # only for figure 1 123 | plt.axhline(config['solved'], color=COLORS[3], linestyle='--', label='$[Solved]$') 124 | 125 | # colors = config['colors'][::-1][1:] # only for figure 1 126 | colors = config['colors'] 127 | for i, entry in enumerate(data): 128 | timesteps, averaged_curve, sigma, convergence = entry 129 | sns.lineplot(timesteps, 130 | averaged_curve, 131 | c=colors[i], 132 | label=config['labels'][i]) 133 | if convergence is not None: 134 | plt.plot([timesteps[-1], timesteps[-1] + 0.5], 135 | [averaged_curve.values[-1], averaged_curve.values[-1]], 136 | color=colors[i], 137 | linestyle='--') 138 | 139 | plt.fill_between(x=timesteps, 140 | y1=averaged_curve + sigma, 141 | y2=averaged_curve - sigma, 142 | facecolor=colors[i], 143 | alpha=0.1) 144 | if learning_curve: 145 | plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) 146 | 147 | plt.legend(loc=config['legend_loc'], frameon=True, framealpha=0.5) 148 | plt.grid(b=False) 149 | 150 | # plt.show() 151 | 152 | plt.savefig(fname=file_path, 153 | bbox_inches='tight', 154 | pad_inches=0) 155 | plt.close() 156 | -------------------------------------------------------------------------------- /common/utils/policy_evaluator.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import numpy as np 4 | import gym 5 | 6 | 7 | class PolicyEvaluator: 8 | def __init__(self, env_id, seed, policy, eval_file_path): 9 | self.env = gym.make(env_id) 10 | self.env.seed(seed) 11 | self.policy = policy 12 | self.eval_file = open(eval_file_path, mode='w') 13 | 14 | def evaluate(self, iteration, episodes=10, debug=True): 15 | 16 | episodes_stats = [] 17 | cumulative_reward = 0.0 18 | 19 | for _ in range(episodes): 20 | obs = self.env.reset() 21 | 22 | steps = 0 23 | total_reward = 0.0 24 | done = False 25 | 26 | while not done: 27 | action = self.policy.select_action(np.array(obs)) 28 | obs, reward, done, _ = self.env.step(action) 29 | 30 | # stats 31 | steps += 1 32 | total_reward += reward 33 | cumulative_reward += reward 34 | 35 | if debug: 36 | self.env.render() 37 | 38 | episodes_stats.append({ 39 | 'steps': steps, 40 | 'reward': total_reward 41 | }) 42 | 43 | json.dump({ 44 | 'iteration': iteration, 45 | 'reward': cumulative_reward, 46 | 'episodes': episodes, 47 | 'stats': episodes_stats 48 | }, self.eval_file, indent=2, sort_keys=True) 49 | 50 | self.eval_file.flush() 51 | 52 | self.env.close() 53 | 54 | return cumulative_reward / episodes 55 | 56 | def close(self): 57 | self.eval_file.close() 58 | 59 | -------------------------------------------------------------------------------- /common/utils/rollout_evaluation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | LUNAR_LANDER_SOLVED_SCORE = 200.0 4 | ERGO_SOLVED_DISTANCE = 0.025 5 | PUSHER_SOLVED_DISTANCE = 0.25 # Radius=0.17 6 | 7 | 8 | def evaluate_policy(nagents, env, agent_policy, replay_buffer, eval_episodes, max_steps, freeze_agent=True, 9 | return_rewards=False, add_noise=False, log_distances=True, 10 | gail_rewarder=None, noise_scale=0.1, min_buffer_len=1000): 11 | """Evaluates a given policy in a particular environment, 12 | returns an array of rewards received from the evaluation step. 13 | """ 14 | 15 | states = [[] for _ in range(nagents)] 16 | actions = [[] for _ in range(nagents)] 17 | next_states = [[] for _ in range(nagents)] 18 | rewards = [[] for _ in range(nagents)] 19 | ep_rewards = [] 20 | final_dists = [] 21 | 22 | for ep in range(eval_episodes): 23 | agent_total_rewards = np.zeros(nagents) 24 | state = env.reset() 25 | 26 | done = [False] * nagents 27 | add_to_buffer = [True] * nagents 28 | steps = 0 29 | training_iters = 0 30 | 31 | while not all(done) and steps <= max_steps: 32 | action = agent_policy.select_action(np.array(state)) 33 | 34 | if add_noise: 35 | action = action + np.random.normal(0, noise_scale, size=action.shape) 36 | action = action.clip(-1, 1) 37 | 38 | next_state, reward, done, info = env.step(action) 39 | if gail_rewarder is not None: 40 | reward = gail_rewarder.get_reward(np.concatenate([state, action], axis=-1)) 41 | 42 | for i, st in enumerate(state): 43 | if add_to_buffer[i]: 44 | states[i].append(st) 45 | actions[i].append(action[i]) 46 | next_states[i].append(next_state[i]) 47 | rewards[i].append(reward[i]) 48 | agent_total_rewards[i] += reward[i] 49 | training_iters += 1 50 | 51 | if replay_buffer is not None: 52 | done_bool = 0 if steps + 1 == max_steps else float(done[i]) 53 | replay_buffer.add((state[i], next_state[i], action[i], reward[i], done_bool)) 54 | 55 | if done[i]: 56 | # Avoid duplicates 57 | add_to_buffer[i] = False 58 | 59 | if log_distances: 60 | final_dists.append(info[i]['goal_dist']) 61 | 62 | state = next_state 63 | steps += 1 64 | 65 | # Train for total number of env iterations 66 | if not freeze_agent and len(replay_buffer.storage) > min_buffer_len: 67 | agent_policy.train(replay_buffer=replay_buffer, iterations=training_iters) 68 | 69 | ep_rewards.append(agent_total_rewards) 70 | 71 | if return_rewards: 72 | return np.array(ep_rewards).flatten(), np.array(final_dists).flatten() 73 | 74 | trajectories = [] 75 | for i in range(nagents): 76 | trajectories.append(np.concatenate( 77 | [ 78 | np.array(states[i]), 79 | np.array(actions[i]), 80 | np.array(next_states[i]) 81 | ], axis=-1)) 82 | 83 | return trajectories 84 | 85 | 86 | def check_solved(env_name, criteria): 87 | if env_name.find('Lunar') != -1: 88 | return np.median(criteria) > LUNAR_LANDER_SOLVED_SCORE 89 | elif env_name.find('Ergo') != -1: 90 | return np.median(criteria) < ERGO_SOLVED_DISTANCE 91 | else: 92 | return np.median(criteria) < PUSHER_SOLVED_DISTANCE 93 | 94 | 95 | def check_new_best(env_name, new, current): 96 | if env_name.find('Lunar') != -1: 97 | return new > current 98 | else: 99 | return new < current 100 | -------------------------------------------------------------------------------- /common/utils/sim_agent_helper.py: -------------------------------------------------------------------------------- 1 | from common.agents.svpg_simulator_agent import SVPGSimulatorAgent 2 | 3 | def generate_simulator_agent(args): 4 | return SVPGSimulatorAgent( 5 | reference_env_id=args.reference_env_id, 6 | randomized_env_id=args.randomized_env_id, 7 | randomized_eval_env_id=args.randomized_eval_env_id, 8 | agent_name=args.agent_name, 9 | nagents=args.nagents, 10 | nparams=args.nparams, 11 | temperature=args.temperature, 12 | svpg_rollout_length=args.svpg_rollout_length, 13 | svpg_horizon=args.svpg_horizon, 14 | max_step_length=args.max_step_length, 15 | reward_scale=args.reward_scale, 16 | initial_svpg_steps=args.initial_svpg_steps, 17 | max_env_timesteps=args.max_env_timesteps, 18 | episodes_per_instance=args.episodes_per_instance, 19 | discrete_svpg=args.discrete_svpg, 20 | load_discriminator=args.load_discriminator, 21 | freeze_discriminator=args.freeze_discriminator, 22 | freeze_agent=args.freeze_agent, 23 | seed=args.seed, 24 | particle_path=args.particle_path, 25 | ) -------------------------------------------------------------------------------- /experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/experiments/__init__.py -------------------------------------------------------------------------------- /experiments/domainrand/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/experiments/domainrand/__init__.py -------------------------------------------------------------------------------- /experiments/domainrand/args.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import logging 3 | 4 | logger = logging.getLogger(__name__) 5 | 6 | 7 | def get_args(): 8 | parser = argparse.ArgumentParser(description='Domain Randomization Driver') 9 | 10 | subparsers = parser.add_subparsers(help='sub-command help', dest='subparser_name') 11 | 12 | lunar_subparser = subparsers.add_parser('lunar', help='lunar lander subparser') 13 | pusher_subparser = subparsers.add_parser('pusher', help='puhser-3dof subparser') 14 | ergo_subparser = subparsers.add_parser('ergo', help='ergoreacher subparser') 15 | backlash_subparser = subparsers.add_parser('backlash', help='ergoreacher with backlash subparser') 16 | ergosix_subparser = subparsers.add_parser('ergosix', help='ergoreacher 6dpf subparser') 17 | 18 | lunar_subparser.add_argument("--randomized-env-id", default="LunarLanderDefault-v0", 19 | type=str, help="Name of the reference environment") 20 | lunar_subparser.add_argument("--reference-env-id", default="LunarLanderDefault-v0", 21 | type=str, help="Name of the randomized environment") 22 | lunar_subparser.add_argument("--randomized-eval-env-id", default="LunarLanderRandomized-v0", 23 | type=str, help="Name of the randomized environment") 24 | lunar_subparser.add_argument("--nparams", default=1, type=int, help="Number of randomization parameters") 25 | lunar_subparser.add_argument("--eval-randomization-discretization", default=50, type=int, help="number of eval points") 26 | lunar_subparser.add_argument("--max-env-timesteps", default=1000, type=int, 27 | help="environment timeout") 28 | lunar_subparser.add_argument("--plot-frequency", default=5, type=int, help="how often to plot / log") 29 | lunar_subparser.add_argument("--nagents", default=10, type=int, 30 | help="Number of SVPG particle") 31 | 32 | pusher_subparser.add_argument("--randomized-env-id", default="Pusher3DOFDefault-v0", 33 | type=str, help="Name of the reference environment") 34 | pusher_subparser.add_argument("--reference-env-id", default="Pusher3DOFDefault-v0", 35 | type=str, help="Name of the randomized environment") 36 | pusher_subparser.add_argument("--randomized-eval-env-id", default="Pusher3DOFRandomized-v0", 37 | type=str, help="Name of the randomized environment") 38 | pusher_subparser.add_argument("--nparams", default=2, type=int, help="Number of randomization parameters") 39 | pusher_subparser.add_argument("--eval-randomization-discretization", default=20, type=int, help="number of eval points") 40 | pusher_subparser.add_argument("--max-env-timesteps", default=100, type=int, 41 | help="environment timeout") 42 | pusher_subparser.add_argument("--plot-frequency", default=5, type=int, help="how often to plot / log") 43 | pusher_subparser.add_argument("--nagents", default=10, type=int, 44 | help="Number of SVPG particle") 45 | 46 | ergo_subparser.add_argument("--randomized-env-id", default="ErgoReacher4DOFDefault-v0", 47 | type=str, help="Name of the reference environment") 48 | ergo_subparser.add_argument("--reference-env-id", default="ErgoReacher4DOFDefault-v0", 49 | type=str, help="Name of the randomized environment") 50 | ergo_subparser.add_argument("--randomized-eval-env-id", default="ErgoReacher4DOFRandomizedEasy-v0", 51 | type=str, help="Name of the randomized environment") 52 | ergo_subparser.add_argument("--nparams", default=8, type=int, help="Number of randomization parameters") 53 | ergo_subparser.add_argument("--eval-randomization-discretization", default=5, type=int, help="number of eval points") 54 | ergo_subparser.add_argument("--max-env-timesteps", default=100, type=int, 55 | help="environment timeout") 56 | ergo_subparser.add_argument("--plot-frequency", default=50, type=int, help="how often to plot / log") 57 | ergo_subparser.add_argument("--nagents", default=10, type=int, 58 | help="Number of SVPG particle") 59 | 60 | backlash_subparser.add_argument("--randomized-env-id", default="ErgoReacherRandomizedBacklashEasy-v0", 61 | type=str, help="Name of the reference environment") 62 | backlash_subparser.add_argument("--reference-env-id", default="ErgoReacher-DualGoal-Easy-Default-Headless-v0", 63 | type=str, help="Name of the randomized environment") 64 | backlash_subparser.add_argument("--randomized-eval-env-id", default="ErgoReacherRandomizedBacklashEasy-v0", 65 | type=str, help="Name of the randomized environment") 66 | backlash_subparser.add_argument("--nparams", default=8, type=int, help="Number of randomization parameters") 67 | backlash_subparser.add_argument("--eval-randomization-discretization", default=20, type=int, help="number of eval points") 68 | backlash_subparser.add_argument("--max-env-timesteps", default=200, type=int, 69 | help="environment timeout") 70 | backlash_subparser.add_argument("--plot-frequency", default=50, type=int, help="how often to plot / log") 71 | backlash_subparser.add_argument("--nagents", default=10, type=int, 72 | help="Number of SVPG particle") 73 | 74 | ergosix_subparser.add_argument("--randomized-env-id", default="ErgoReacher-6Dof-Default-Headless-v0", 75 | type=str, help="Name of the reference environment") 76 | ergosix_subparser.add_argument("--reference-env-id", default="ErgoReacher-6Dof-Default-Headless-v0", 77 | type=str, help="Name of the randomized environment") 78 | ergosix_subparser.add_argument("--randomized-eval-env-id", default="ErgoReacher-6Dof-Randomized-Headless-v0", 79 | type=str, help="Name of the randomized environment") 80 | ergosix_subparser.add_argument("--nparams", default=12, type=int, help="Number of randomization parameters") 81 | ergosix_subparser.add_argument("--eval-randomization-discretization", default=20, type=int, help="number of eval points") 82 | ergosix_subparser.add_argument("--max-env-timesteps", default=100, type=int, 83 | help="environment timeout") 84 | ergosix_subparser.add_argument("--plot-frequency", default=5, type=int, help="how often to plot / log") 85 | ergosix_subparser.add_argument("--nagents", default=10, type=int, 86 | help="Number of SVPG particle") 87 | 88 | for subparser in [lunar_subparser, pusher_subparser, ergo_subparser, backlash_subparser, ergosix_subparser]: 89 | subparser.add_argument("--experiment-name", type=str, 90 | choices=['bootstrapping', 'unfreeze-policy']) 91 | subparser.add_argument("--experiment-prefix", default="experiment", type=str, help="Any custom string to attach") 92 | subparser.add_argument("--agent-name", default="baseline", type=str, 93 | help="Which Agent to benchmark") 94 | subparser.add_argument("--temperature", default=10.0, type=float, 95 | help="SVPG temperature") 96 | subparser.add_argument("--svpg-rollout-length", default=5, type=int, 97 | help="length of one svpg particle rollout") 98 | subparser.add_argument("--svpg-horizon", default=25, type=int, 99 | help="how often to fully reset svpg particles") 100 | 101 | subparser.add_argument("--max-step-length", default=0.05, 102 | type=float, help="step length / delta in parameters; If discrete, this is fixed, If continuous, this is max.") 103 | 104 | subparser.add_argument("--reward-scale", default=1.0, type=float, 105 | help="reward multipler for discriminator") 106 | subparser.add_argument("--initial-svpg-steps", default=0, type=float, 107 | help="number of svpg steps to take before updates") 108 | subparser.add_argument("--max-agent-timesteps", default=1e6, type=float, 109 | help="max iterations, counted in terms of AGENT env steps") 110 | subparser.add_argument("--episodes-per-instance", default=1, type=int, 111 | help="number of episodes to rollout the agent for per sim instance") 112 | 113 | subparser.add_argument("--kld-coefficient", default=0.00, type=float, help="kld coefficient for particles") 114 | subparser.add_argument("--discrete-svpg", action="store_true", help="discrete SVPG") 115 | subparser.add_argument("--continuous-svpg", action="store_true", help="continuous SVPG") 116 | subparser.add_argument("--save-particles", action="store_true", help="store the particle policies") 117 | subparser.add_argument("--particle-path", default="", type=str, help="where to load particles from") 118 | subparser.add_argument("--freeze-svpg", action="store_true", help="Freeze SVPG or not") 119 | 120 | subparser.add_argument("--pretrain-discriminator", help="pretrain discriminator or not") 121 | subparser.add_argument("--load-discriminator", action="store_true", help="load discriminator or not") 122 | subparser.add_argument("--load-agent", action="store_true", help="load an agent or not") 123 | subparser.add_argument("--freeze-discriminator", action="store_true", help="freeze discriminator (no training)") 124 | subparser.add_argument("--freeze-agent", action="store_true", help="freeze agent (no training)") 125 | 126 | subparser.add_argument("--seed", default=123, type=int) 127 | subparser.add_argument("--use-bootstrapping-results", action="store_true", help="where to look when running batch-reward-anaylsis") 128 | 129 | return parser.parse_args() 130 | 131 | def check_args(args): 132 | experiment_name = args.experiment_name 133 | 134 | assert args.nagents > 2, "TODO: Weird bug" 135 | assert args.discrete_svpg or args.continuous_svpg and not (args.discrete_svpg and args.continuous_svpg), "Specify continuous OR discrete" 136 | 137 | if experiment_name == 'batch-reward-anaylsis': 138 | assert args.load_agent 139 | assert args.episodes_per_instance >= 5, "Need to run atleast 5+ runs when doing reward plots" 140 | return 141 | elif experiment_name.find('reward') != -1: 142 | assert args.episodes_per_instance > 1, "Probably want more than just one eval_episode for evaluation?" 143 | elif experiment_name == 'bootstrapping': 144 | assert args.load_discriminator, "Need to load discriminator" 145 | assert args.freeze_agent == False, "Need to unfreeze agent" 146 | 147 | assert args.svpg_rollout_length < 25, "Rollout length likely too long - SVPG will likely need more frequent feedback" 148 | assert args.svpg_horizon > 10, "Horizon likely too short for consistency - might reset SVPG to random positions too frequently" 149 | assert args.episodes_per_instance > 0, "Must provide episodes_per_instance" 150 | 151 | if args.pretrain_discriminator: 152 | assert args.load_discriminator == True, "If pretraining, you should also load" 153 | 154 | if args.discrete_svpg: 155 | assert args.max_step_length < 0.1, "Step length for discrete_svpg too large" 156 | 157 | if args.initial_svpg_steps >= args.max_agent_timesteps: 158 | logger.warning("YOU WILL NOT TRAIN THE SVPG AGENT") 159 | 160 | if not args.freeze_discriminator and not args.load_discriminator: 161 | logger.warning("YOU ARE TRAINING THE DISCRIMINATOR FROM SCRATCH") 162 | 163 | if not args.load_agent: 164 | logger.warning("YOU ARE TRAINING THE AGENT POLICY FROM SCRATCH") 165 | 166 | if args.randomized_env_id == args.reference_env_id: 167 | logger.warning("REFERENCE AND RANDOMIZED IDs ARE SAME") -------------------------------------------------------------------------------- /experiments/domainrand/batch_reward_analysis.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('Agg') 3 | 4 | import re 5 | import os 6 | import glob 7 | import numpy as np 8 | import torch 9 | import gym 10 | import argparse 11 | import json 12 | import logging 13 | 14 | from itertools import combinations 15 | 16 | from common.utils.logging import setup_experiment_logs 17 | 18 | from experiments.domainrand.args import get_args, check_args 19 | 20 | 21 | def get_converged_modelpaths(paths): 22 | """ 23 | Function to find the learning curves and best generalization curves for each seed 24 | """ 25 | 26 | paper_path = paths['paper'] 27 | agent_paths = os.listdir(paper_path) 28 | 29 | learning_curves_files = glob.glob(os.path.join(os.getcwd(), paper_path, 'learning-curves*.npz')) 30 | generalization_files = glob.glob(os.path.join(os.getcwd(), paper_path, 'best-generalization*.npz')) 31 | 32 | print(learning_curves_files) 33 | 34 | learning_curves_combinations = combinations(learning_curves_files, 5) 35 | generalization_combinations = combinations(generalization_files, 5) 36 | 37 | agent_name_start = paper_path.find('v0') + 3 38 | agent_name_end = paper_path.find('-exp') 39 | 40 | agent_name = paper_path[agent_name_start:agent_name_end] 41 | 42 | return agent_name, list(learning_curves_files), generalization_files 43 | 44 | 45 | if __name__ == '__main__': 46 | args = get_args() 47 | experiment_name = 'unfreeze-policy' if not args.use_bootstrapping_results else 'bootstrapping' 48 | paths = setup_experiment_logs(experiment_name=experiment_name, args=args) 49 | check_args(args, experiment_name=experiment_name) 50 | 51 | agent_name, learning_curves_files, generalization_files = get_converged_modelpaths(paths) 52 | nseeds = len(learning_curves_files) 53 | 54 | nmetrics = len(np.load(learning_curves_files[0]).files) 55 | 56 | # Learning curves 57 | # Find Max Length and resize each array to that length 58 | 59 | # for combination in combinations: for lc in combination 60 | 61 | # for i, learning_curves_files in enumerate(learning_curves_combinations): 62 | # print(i, learning_curves_files, '\n\n') 63 | max_length = 0 64 | for lc in learning_curves_files: 65 | loaded_curve = np.load(lc)['ref_learning_curve_mean'] 66 | if loaded_curve.shape[0] > max_length: 67 | max_length = loaded_curve.shape[0] 68 | 69 | all_curves = np.zeros((nseeds, max_length)) 70 | all_metrics = {} 71 | 72 | for metric in np.load(learning_curves_files[0]).files: 73 | all_metrics[metric] = np.copy(all_curves) 74 | 75 | # Load each seed's metric (5 - 9 per file) 76 | for seed, lc in enumerate(learning_curves_files): 77 | loaded_curve = np.load(lc) 78 | for metric in loaded_curve.files: 79 | # hacky "Broadcast" of array 80 | length = len(loaded_curve[metric]) 81 | all_metrics[metric][seed][:length] = loaded_curve[metric] 82 | # If not same size, some will be 0s, do so we can use np.nanmean 83 | try: 84 | all_metrics[metric][seed][all_metrics[metric][seed] == 0] = np.nan 85 | except: 86 | pass 87 | 88 | all_metrics['label'] = np.array([agent_name]) 89 | 90 | np.savez(os.path.join(paths['paper'],'{}-{}-batched-learning-curves.npz'.format(0, agent_name)), **all_metrics) 91 | 92 | # Generalization Curves 93 | loaded_curve = np.load(generalization_files[0])['generalization_metric'] 94 | generalization_shape = loaded_curve.shape 95 | 96 | all_seeds_generalization = np.zeros((nseeds,) + generalization_shape) 97 | 98 | for seed, lc in enumerate(generalization_files): 99 | loaded_curve = np.load(lc) 100 | all_seeds_generalization[seed] = loaded_curve['generalization_metric'] 101 | 102 | np.savez(os.path.join(paths['paper'],'{}-batched-generalizations.npz'.format(agent_name)), 103 | all_seeds_generalization=all_seeds_generalization) 104 | -------------------------------------------------------------------------------- /experiments/domainrand/experiment_driver.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('Agg') 3 | 4 | import random 5 | import logging 6 | 7 | import numpy as np 8 | import torch 9 | import gym 10 | import argparse 11 | import os 12 | 13 | from common.agents.ddpg.ddpg import DDPG 14 | from common.agents.ddpg_actor import DDPGActor 15 | from common.utils.visualization import Visualizer 16 | from common.utils.sim_agent_helper import generate_simulator_agent 17 | from common.utils.logging import setup_experiment_logs, reshow_hyperparameters, StatsLogger 18 | 19 | from experiments.domainrand.args import get_args, check_args 20 | 21 | 22 | if __name__ == '__main__': 23 | args = get_args() 24 | paths = setup_experiment_logs(args) 25 | check_args(args) 26 | 27 | torch.manual_seed(args.seed) 28 | torch.cuda.manual_seed(args.seed) 29 | np.random.seed(args.seed) 30 | 31 | stats_logger = StatsLogger(args) 32 | visualizer = Visualizer(randomized_env_id=args.randomized_eval_env_id, seed=args.seed) 33 | 34 | reference_env = gym.make(args.reference_env_id) 35 | 36 | if args.freeze_agent: 37 | # only need the actor 38 | agent_policy = DDPGActor( 39 | state_dim=reference_env.observation_space.shape[0], 40 | action_dim=reference_env.action_space.shape[0], 41 | agent_name=args.agent_name, 42 | load_agent=args.load_agent 43 | ) 44 | else: 45 | agent_policy = DDPG( 46 | state_dim=reference_env.observation_space.shape[0], 47 | action_dim=reference_env.action_space.shape[0], 48 | agent_name=args.agent_name, 49 | ) 50 | 51 | if args.load_agent: 52 | agent_policy.load_model() 53 | 54 | 55 | simulator_agent = generate_simulator_agent(args) 56 | 57 | svpg_timesteps = 0 58 | 59 | while simulator_agent.agent_timesteps < args.max_agent_timesteps: 60 | if svpg_timesteps % args.plot_frequency == 0: 61 | generalization_metric = visualizer.generate_ground_truth(simulator_agent, agent_policy, svpg_timesteps, 62 | log_path=paths['groundtruth_logs']) 63 | 64 | np.savez('{}/generalization-seed{}.npz'.format(paths['paper'], args.seed), 65 | generalization_metric=generalization_metric, 66 | svpg_timesteps=svpg_timesteps, 67 | learning_curve_timesteps=simulator_agent.agent_timesteps 68 | ) 69 | 70 | visualizer.plot_reward(simulator_agent, agent_policy, 71 | svpg_timesteps, log_path=paths['policy_logs'], plot_path=paths['policy_plots']) 72 | visualizer.plot_value(simulator_agent, agent_policy, 73 | svpg_timesteps, log_path=paths['policy_logs'], plot_path=paths['policy_plots']) 74 | visualizer.plot_discriminator_reward(simulator_agent, agent_policy, 75 | svpg_timesteps, log_path=paths['policy_logs'], plot_path=paths['policy_plots']) 76 | 77 | if not args.freeze_svpg: 78 | visualizer.plot_sampling_frequency(simulator_agent, agent_policy, 79 | svpg_timesteps, log_path=paths['sampling_logs'], plot_path=paths['sampling_plots']) 80 | 81 | logging.info("SVPG TS: {}, Agent TS: {}".format(svpg_timesteps, simulator_agent.agent_timesteps)) 82 | 83 | solved, info = simulator_agent.select_action(agent_policy) 84 | svpg_timesteps += 1 85 | 86 | if info is not None: 87 | new_best = stats_logger.update(args, paths, info) 88 | 89 | if new_best: 90 | agent_policy.save(filename='best-seed{}'.format(args.seed), directory=paths['paper']) 91 | if args.save_particles: 92 | simulator_agent.svpg.save(directory=paths['particles']) 93 | 94 | generalization_metric = visualizer.generate_ground_truth(simulator_agent, agent_policy, svpg_timesteps, 95 | log_path=paths['groundtruth_logs']) 96 | 97 | np.savez('{}/best-generalization-seed{}.npz'.format(paths['paper'], args.seed), 98 | generalization_metric=generalization_metric, 99 | svpg_timesteps=svpg_timesteps, 100 | learning_curve_timesteps=simulator_agent.agent_timesteps 101 | ) 102 | 103 | if solved: 104 | logging.info("[SOLVED]") 105 | 106 | agent_policy.save(filename='final-seed{}'.format(args.seed), directory=paths['paper']) 107 | visualizer.plot_reward(simulator_agent, agent_policy, 108 | svpg_timesteps, log_path=paths['policy_logs'], plot_path=paths['policy_plots']) 109 | visualizer.plot_sampling_frequency(simulator_agent, agent_policy, 110 | svpg_timesteps, log_path=paths['sampling_logs'], plot_path=paths['sampling_plots']) 111 | reshow_hyperparameters(args, paths) 112 | -------------------------------------------------------------------------------- /experiments/domainrand/pusher_grid_generalization.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | matplotlib.use('Agg') 3 | 4 | import random 5 | import logging 6 | 7 | import numpy as np 8 | import torch 9 | import gym 10 | import argparse 11 | import os 12 | import glob 13 | import json 14 | 15 | from common.agents.ddpg.ddpg import DDPG 16 | from common.agents.ddpg_actor import DDPGActor 17 | from common.agents.svpg_simulator_agent import SVPGSimulatorAgent 18 | from common.envs import * 19 | from common.utils.visualization import Visualizer 20 | from common.utils.logging import setup_experiment_logs, reshow_hyperparameters 21 | 22 | from experiments.domainrand.args import get_args, check_args 23 | 24 | from common.utils.rollout_evaluation import evaluate_policy 25 | from common.envs.randomized_vecenv import make_vec_envs 26 | 27 | NEVAL_EPISODES = 10 28 | N_PROCESSES = 5 29 | N_SEEDS = 5 30 | 31 | if __name__ == '__main__': 32 | args = get_args() 33 | paths = setup_experiment_logs(experiment_name='unfreeze-policy', args=args) 34 | check_args(args, experiment_name='unfreeze-policy') 35 | reference_env = gym.make(args.reference_env_id) 36 | 37 | torch.manual_seed(args.seed) 38 | torch.cuda.manual_seed(args.seed) 39 | np.random.seed(args.seed) 40 | 41 | environment_prototype = 'Pusher3DOFGeneralization{}{}-v0' 42 | 43 | rewards_grid = np.zeros((3, 3, 5, NEVAL_EPISODES)) 44 | finaldists_grid = np.zeros((3, 3, 5, NEVAL_EPISODES)) 45 | 46 | for i in range(3): 47 | for j in range(3): 48 | randomized_env = make_vec_envs(environment_prototype.format(i, j), args.seed + i + j, N_PROCESSES) 49 | actor_paths = glob.glob(os.path.join(os.getcwd(), paths['paper'], 'best-seed*_actor.pth')) 50 | print(actor_paths) 51 | for actor_idx, actor_path in enumerate(actor_paths): 52 | agent_policy = DDPGActor( 53 | state_dim=reference_env.observation_space.shape[0], 54 | action_dim=reference_env.action_space.shape[0], 55 | agent_name=args.agent_name, 56 | load_agent=True, 57 | model_path=actor_path 58 | ) 59 | 60 | rewards_rand, dist_rand = evaluate_policy(nagents=N_PROCESSES, 61 | env=randomized_env, 62 | agent_policy=agent_policy, 63 | replay_buffer=None, 64 | eval_episodes=NEVAL_EPISODES // N_PROCESSES, 65 | max_steps=args.max_env_timesteps, 66 | return_rewards=True, 67 | add_noise=False, 68 | log_distances=True) 69 | 70 | rewards_grid[i, j, actor_idx, :] = rewards_rand 71 | finaldists_grid[i, j, actor_idx, :] = dist_rand 72 | 73 | reshow_hyperparameters(args, paths) 74 | print(finaldists_grid) 75 | 76 | np.savez(os.path.join(paths['paper'], 'grid_generalization.npz'), 77 | rewards_grid=rewards_grid, 78 | finaldists_grid=finaldists_grid 79 | ) 80 | -------------------------------------------------------------------------------- /real_robot.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | 4 | matplotlib.use('Agg') 5 | 6 | import random 7 | import logging 8 | 9 | import time 10 | import numpy as np 11 | import torch 12 | import gym 13 | import argparse 14 | import os 15 | import os.path as osp 16 | 17 | from tqdm import tqdm, trange 18 | 19 | from common.agents.ddpg_actor import DDPGActor 20 | import poppy_helpers 21 | import gym_ergojr 22 | import cv2 23 | 24 | parser = argparse.ArgumentParser(description='Real Robot Experiment Driver') 25 | 26 | parser.add_argument('--nepisodes', type=int, default=25, help='Number of trials per *seed*') 27 | parser.add_argument('--experiment-prefix', type=str, default='real', help='Prefix to append to logs') 28 | parser.add_argument('--log-dir', type=str, default='results/real-robot', help='Log Directory Prefix') 29 | parser.add_argument('--model-dir', type=str, default='saved-models/real-robot', help='Model Directory Prefix') 30 | 31 | args = parser.parse_args() 32 | 33 | TIMESTAMP = time.strftime("%y%m%d-%H%M%S") 34 | MAX_EPISODE_STEPS = 100 35 | EPISODES = args.nepisodes 36 | 37 | # Policies to look for 38 | policies = ['baseline', 'usdr', 'adr'] 39 | 40 | env = gym.make('ErgoReacher-Live-v1') 41 | # env = gym.make('ErgoReacher-Graphical-Simple-Halfdisk-v1') 42 | 43 | npa = np.array 44 | 45 | img_buffer = [] 46 | 47 | if not osp.exists(args.log_dir): 48 | os.makedirs(args.log_dir) 49 | 50 | with h5py.File("{}/{}-{}.hdf5".format(args.log_dir, args.experiment_prefix, TIMESTAMP), "w") as f: 51 | for policy_type in tqdm(policies): 52 | log_group = f.create_group(policy_type) 53 | model_path = osp.join(args.model_dir, policy_type) 54 | 55 | no_models = len(os.listdir(model_path)) 56 | 57 | rewards = log_group.create_dataset("rewards", (no_models, EPISODES, MAX_EPISODE_STEPS), dtype=np.float32) 58 | distances = log_group.create_dataset("distances", (no_models, EPISODES, MAX_EPISODE_STEPS), dtype=np.float32) 59 | trajectories = log_group.create_dataset("trajectories", (no_models, EPISODES, MAX_EPISODE_STEPS, 24), 60 | dtype=np.float32) 61 | imgs = log_group.create_dataset("images", (no_models, EPISODES, MAX_EPISODE_STEPS, 480, 640, 3), 62 | dtype=np.uint8) 63 | 64 | tqdm.write('Starting analysis of {}'.format(policy_type)) 65 | 66 | for model_idx, actorpth in enumerate(tqdm(os.listdir(model_path))): 67 | agent_policy = DDPGActor( 68 | state_dim=env.observation_space.shape[0], 69 | action_dim=env.action_space.shape[0], 70 | agent_name='real-{}'.format(policy_type), 71 | load_agent=True, 72 | model_path=osp.join(model_path, actorpth) 73 | ) 74 | 75 | for ep_num in trange(EPISODES): 76 | obs = env.reset() 77 | done = False 78 | cumulative = 0 79 | counter = 0 80 | while not done and counter < MAX_EPISODE_STEPS: 81 | action = agent_policy.select_action(obs) 82 | nobs, reward, done, misc = env.step(action) 83 | # tqdm.write("obs: {} {} ".format(np.around(obs, 2), np.around(action, 2))) 84 | cumulative += reward 85 | trajectories[model_idx, ep_num, counter, :] = np.concatenate([obs, action, nobs]) 86 | rewards[model_idx, ep_num, counter] = reward 87 | distances[model_idx, ep_num, counter] = misc["distance"] 88 | imgs[model_idx, ep_num, counter, :, :, :] = np.copy(misc["img"]) 89 | # print( 90 | # np.around(trajectories[model_idx, ep_num, counter, :], 1), 91 | # np.around(rewards[model_idx, ep_num, counter], 4), 92 | # np.around(distances[model_idx, ep_num, counter], 4) 93 | # ) 94 | 95 | obs = np.copy(nobs) 96 | counter += 1 97 | 98 | tqdm.write('Episode: {}, Reward: {}'.format(ep_num, cumulative)) 99 | 100 | # write to disk after every model run 101 | f.flush() 102 | env.reset() 103 | -------------------------------------------------------------------------------- /real_robot_torquesweep.py: -------------------------------------------------------------------------------- 1 | import h5py 2 | import matplotlib 3 | 4 | matplotlib.use('Agg') 5 | import time 6 | import numpy as np 7 | import gym 8 | import argparse 9 | import os 10 | import os.path as osp 11 | 12 | from tqdm import tqdm, trange 13 | 14 | from common.agents.ddpg_actor import DDPGActor 15 | import poppy_helpers 16 | import gym_ergojr 17 | 18 | parser = argparse.ArgumentParser(description='Real Robot Experiment Driver') 19 | 20 | parser.add_argument('--nepisodes', type=int, default=25, help='Number of trials per *seed*') 21 | parser.add_argument('--torques', type=list, nargs='+', default=[25, 50, 100, 200, 400], 22 | help='torque settings to iterate') 23 | parser.add_argument('--experiment-prefix', type=str, default='real', help='Prefix to append to logs') 24 | parser.add_argument('--log-dir', type=str, default='results/real-robot', help='Log Directory Prefix') 25 | parser.add_argument('--model-dir', type=str, default='saved-models/real-robot', help='Model Directory Prefix') 26 | parser.add_argument('--cont', type=str, default='190329-180631', help='To continue existing file, enter timestamp here') 27 | 28 | args = parser.parse_args() 29 | 30 | if len(args.cont) == 0: 31 | TIMESTAMP = time.strftime("%y%m%d-%H%M%S") 32 | file_flag = "w" 33 | 34 | else: 35 | TIMESTAMP = args.cont 36 | file_flag = "r+" 37 | 38 | file_path = "{}/{}-{}.hdf5".format(args.log_dir, args.experiment_prefix, TIMESTAMP) 39 | 40 | MAX_EPISODE_STEPS = 100 41 | EPISODES = args.nepisodes 42 | TORQUES = args.torques 43 | 44 | # Policies to look for 45 | policies = ['baseline', 'usdr', 'adr'] 46 | 47 | env = gym.make('ErgoReacher-Live-v1') 48 | # env = gym.make('ErgoReacher-Graphical-Simple-Halfdisk-v1') 49 | 50 | npa = np.array 51 | 52 | img_buffer = [] 53 | 54 | if not osp.exists(args.log_dir): 55 | os.makedirs(args.log_dir) 56 | 57 | with h5py.File(file_path, file_flag) as f: 58 | for policy_type in tqdm(policies, desc="approaches"): 59 | if policy_type not in f: # if dataset doesn't have these tables 60 | log_group = f.create_group(policy_type) 61 | rewards = log_group.create_dataset("rewards", (no_models, len(TORQUES), EPISODES, MAX_EPISODE_STEPS), 62 | dtype=np.float32) 63 | distances = log_group.create_dataset("distances", (no_models, len(TORQUES), EPISODES, MAX_EPISODE_STEPS), 64 | dtype=np.float32) 65 | trajectories = log_group.create_dataset("trajectories", 66 | (no_models, len(TORQUES), EPISODES, MAX_EPISODE_STEPS, 24), 67 | dtype=np.float32) 68 | imgs = log_group.create_dataset("images", 69 | (no_models, len(TORQUES), EPISODES, MAX_EPISODE_STEPS, 480, 640, 3), 70 | dtype=np.uint8, compression="lzf") 71 | else: # if tables are in dataset, grab their pointers 72 | rewards = f.get("/{}/{}".format(policy_type, "rewards")) 73 | distances = f.get("/{}/{}".format(policy_type, "distances")) 74 | trajectories = f.get("/{}/{}".format(policy_type, "trajectories")) 75 | imgs = f.get("/{}/{}".format(policy_type, "images")) 76 | 77 | model_path = osp.join(args.model_dir, policy_type) 78 | 79 | no_models = len(os.listdir(model_path)) 80 | 81 | tqdm.write('Starting analysis of {}'.format(policy_type)) 82 | 83 | for model_idx, actorpth in enumerate(tqdm(os.listdir(model_path), desc="models....")): 84 | agent_policy = DDPGActor( 85 | state_dim=env.observation_space.shape[0], 86 | action_dim=env.action_space.shape[0], 87 | agent_name='real-{}'.format(policy_type), 88 | load_agent=True, 89 | model_path=osp.join(model_path, actorpth) 90 | ) 91 | 92 | for torque_idx, torque in enumerate(tqdm(TORQUES, desc="torques...")): 93 | 94 | for ep_num in trange(EPISODES, desc="episodes.."): 95 | non_zero_steps = np.count_nonzero(trajectories[model_idx, torque_idx, ep_num], axis=1) 96 | 97 | if np.count_nonzero(non_zero_steps) == 0: 98 | obs = env.reset() 99 | env.unwrapped.setSpeed(torque) 100 | done = False 101 | cumulative = 0 102 | counter = 0 103 | img_buffer = [] 104 | while counter < MAX_EPISODE_STEPS: 105 | action = agent_policy.select_action(obs) 106 | nobs, reward, _, misc = env.step(action) 107 | cumulative += reward 108 | trajectories[model_idx, torque_idx, ep_num, counter, :] = np.concatenate( 109 | [obs, action, nobs]) 110 | rewards[model_idx, torque_idx, ep_num, counter] = reward 111 | distances[model_idx, torque_idx, ep_num, counter] = misc["distance"] 112 | img_buffer.append(np.copy(misc["img"])) 113 | 114 | obs = np.copy(nobs) 115 | counter += 1 116 | 117 | imgs[model_idx, torque_idx, ep_num, :counter, :, :, :] = img_buffer 118 | 119 | # tqdm.write('Episode: {}, Reward: {}'.format(ep_num, cumulative)) 120 | 121 | # write to disk after every model run 122 | f.flush() 123 | 124 | env.reset() 125 | -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | # TL;DR Scripting 2 | 3 | There are two scripts that are helpful to execute the experiments 4 | as decribed on [README](../README.md): 5 | 6 | 1. `scripts/multiseed.sh` for executing an experiment with multiple seeds. 7 | 2. `scripts/with_seed.sh` for executing one experiment with one seed. 8 | 9 | The abbreviated names for each experiment are defined on the `experiments.sh` 10 | script located on this folder. 11 | 12 | We currently support the following experiments: 13 | 14 | **Baselines** 15 | - `baseline_pure` 16 | - `baseline_fulldr` 17 | 18 | **Unfreeze Policy** 19 | - `unfreeze_policy_pretrained` 20 | - `unfreeze_policy_scratch` 21 | 22 | **Unfreeze Discriminator** 23 | - `unfreeze_discriminator_pretrained` 24 | - `unfreeze_discriminator_scratch` 25 | 26 | **SVPG 2D Full** 27 | - `svpg2d_ours` 28 | - `svpg2d_fulldr` 29 | 30 | #### Examples 31 | 32 | Use `multiseed.sh` to execute an experiment with multiple, consecutive seeds. 33 | The syntax for `multiseed.sh` is as follow: 34 | 35 | ```bash 36 | scripts/multiseed.sh [environment] [user] [experiment] [starting seed] [number of seeds] 37 | ``` 38 | 39 | For instance: 40 | 41 | ```bash 42 | scripts/multiseed.sh bluewire manfred svpg2d_fulldr 0 5 43 | ``` 44 | 45 | executes 5 seeds `[0, 1, 2, 3, 4]` of the `svpg2d_fulldr` experiment 46 | using `manfred.sh` configuration for the `bluewire` environment. 47 | 48 | Alternatively, you can use `with_seed.sh` to run an experiment with only 1 seed. 49 | The syntax for `with_seed.sh` is as follows: 50 | 51 | ```bash 52 | scripts/with_seed.sh [environment] [user] [experiment] [seed] 53 | ``` 54 | 55 | Then, 56 | 57 | ```bash 58 | scripts/with_seed.sh slurm bhairav svpg2d_ours 1234 59 | ``` 60 | 61 | executes `svpg2d_ours` experiment with `seed=1234` using Bhairav's slurm configuration. 62 | 63 | ### NOTE 64 | **ALWAYS!!!!** execute the scripts from the repo main folder. 65 | 66 | # Custom Configurations 67 | 68 | This section explains how (and why you need) to create per user/per 69 | environment configuration to run the experiments scripts. 70 | 71 | ## Environments 72 | 73 | As we currently have multiple places where we can run our experiments 74 | (slurm, bluewire, uberduck, etc), and we may be adding more soon (e.g., 75 | AWS) the particularities of each environment are quite different. 76 | Therefore, we need to isolate them from our main scripting. 77 | 78 | There are currently 3 folders to group each 79 | user's particular settings for any of those environments. 80 | 81 | ``` 82 | scripts 83 | - bluewire (Manfred's PC at home) 84 | - slurm (Mila's cluster) 85 | - uberduck (Lab's computer) 86 | ``` 87 | 88 | ## Users 89 | 90 | Create a `[env]\[user].sh` file to configure your particular setting in the `[env]` environment. 91 | 92 | For instance, this is Bhairav's configuration for MILA's Slurm at `slurm\bhairav.sh` 93 | 94 | ```bash 95 | #!/usr/bin/env bash 96 | #SBATCH --gres=gpu 97 | #SBATCH --cpus-per-task=2 # maximum CPU cores per GPU request: 6 on Cedar, 16 on Graham 98 | #SBATCH --mem=36000M # memory per node 99 | #SBATCH --time=1-12:00 # time (DD-HH:MM) 100 | #SBATCH --qos=low 101 | #SBATCH --requeue 102 | #SBATCH --mail-user=noreply@domain.com 103 | #SBATCH --mail-type=ALL 104 | 105 | echo "Configuring Slurm Job Environment - $SLURM_JOB_ID" 106 | source activate ml 107 | cd ~/coding/diffsim 108 | 109 | export PYTHONPATH="${PYTHONPATH}:`pwd`/coding" 110 | export LD_LIBRARY_PATH=/Tmp/glx:$LD_LIBRARY_PATH 111 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/u/mehtabha/.mujoco/mjpro150/bin 112 | export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so.1.10 113 | Xvfb :$SLURM_JOB_ID -screen 0 84x84x24 -ac +extension GLX +render -noreset &> xvfb.log & 114 | export DISPLAY=:$SLURM_JOB_ID 115 | 116 | ``` 117 | 118 | Hence, if e.g., Bhairav wants to run 5 seeds (starting at 0) on slurm of the `svpg2d_ours` on slurm, 119 | he would have to execute, from the main `diffsim` folder, the following command: 120 | 121 | ``` 122 | scripts/multiseed.sh slurm bhairav svpg2d_ours 0 5 123 | ``` -------------------------------------------------------------------------------- /scripts/docopts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/scripts/docopts -------------------------------------------------------------------------------- /scripts/docopts.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # vim: set et sw=4 ts=4 sts=4: 3 | # 4 | # docopts helper for bash 5 | # 6 | # Usage: 7 | # source path/to/docopts.sh 8 | # docopts -A ARGS -h "$help" -V $version : "$@" 9 | # 10 | # the prefix docopt_* is used to export globals and functions 11 | # docopt_auto_parse() modify $HELP and $ARGS 12 | 13 | # compute this file dirpath: 14 | docopt_sh_me=$($(type -p greadlink readlink | head -1 ) -f "${BASH_SOURCE[0]}") 15 | docopt_sh_dir="$(dirname "$docopt_sh_me")" 16 | 17 | # fetch Usage: from the given filename 18 | # usually $0 in the main level script 19 | docopt_get_help_string() { 20 | local myfname=$1 21 | # filter the block (/!\ all blocks) starting at a "# Usage:" and ending 22 | # at an empty line, one level of comment markup is removed 23 | # 24 | ## sed -n -e '/^# Usage:/,/\(^# \?----\|^$\)/ { /----/ d; s/^# \?//p }' rock_no-stdin_example.sh 25 | 26 | # -n : no print output 27 | # -e : pass sed code inline 28 | # /^# Usage:/,/^$/ : filter range blocks from '# Usage:' to empty line 29 | # s/^# \?// : substitute comment marker and an optional space 30 | # p : print 31 | sed -n -e '/^# Usage:/,/^$/ s/^# \?//p' < $myfname 32 | } 33 | 34 | # fetch version information from the given filename or string 35 | # usually $0 in the main level script, or the help string extracted 36 | # by docopt_get_help_string() 37 | docopt_get_version_string() { 38 | if [[ -f "$1" ]] ; then 39 | # filter the block (all blocks) starting at a "# Usage:" and ending 40 | # at an empty line, one level of comment markup is removed 41 | sed -n -e '/^# ----/,/^$/ s/^# \?//p' < "$1" 42 | else 43 | # use docopts --separator behavior 44 | echo "$1" 45 | fi 46 | } 47 | 48 | # convert a repeatable option parsed by docopts into a bash ARRAY 49 | # ARGS['FILE,#']=3 50 | # ARGS['FILE,0']=somefile1 51 | # ARGS['FILE,1']=somefile2 52 | # ARGS['FILE,2']=somefile3 53 | # Usage: myarray=( $(docopt_get_values ARGS FILE") ) 54 | docopt_get_values() { 55 | local opt=$2 56 | local ref="\${$1[$opt,#]}" 57 | local nb_val=$(eval echo "$ref") 58 | local i=0 59 | local vars="" 60 | while [[ $i -lt $nb_val ]] ; do 61 | ref="\${$1[$opt,$i]}" 62 | eval "vars+=\" $ref\"" 63 | i=$(($i + 1)) 64 | done 65 | echo $vars 66 | } 67 | 68 | # echo evaluable code to get alls the values into a bash array 69 | # Usage: eval "$(docopt_get_eval_array ARGS FILE myarray)" 70 | docopt_get_eval_array() { 71 | local ref="\${$1[$2,#]}" 72 | local nb_val=$(eval echo "$ref") 73 | local i=0 74 | local vars="" 75 | echo "declare -a $3" 76 | while [[ $i -lt $nb_val ]] ; do 77 | ref="\${$1[$2,$i]}" 78 | eval "echo \"$3+=( '$ref' )\"" 79 | i=$(($i + 1)) 80 | done 81 | } 82 | 83 | # Auto parser for the same docopts usage over scripts, for lazyness. 84 | # 85 | # It uses this convention: 86 | # - help string in: $HELP (modified at gobal scope) 87 | # - Usage is extracted by docopt_get_help_string at beginning of the script 88 | # - arguments are evaluated at global scope in the bash 4 assoc $ARGS 89 | # - no version information is handled 90 | # 91 | docopt_auto_parse() { 92 | local script_fname=$1 93 | shift 94 | # $HELP in global scope 95 | HELP="$(docopt_get_help_string "$script_fname")" 96 | # $ARGS[] assoc array must be declared outside of this function 97 | # or it's scope will be local, that's why we don't print it. 98 | scripts/docopts -A ARGS --no-declare -h "$HELP" : "$@" 99 | res=$? 100 | return $res 101 | } 102 | 103 | # Extract the raw value of a parsed docopts output. 104 | # arguments: 105 | # - assoc: the docopts assoc name 106 | # - key: the wanted key 107 | # - docopts_out: the full parsed output (before eval) 108 | docopt_get_raw_value() { 109 | local assoc=$1 110 | local key="$2" 111 | local docopts_out="$3" 112 | local kstr=$(printf "%s['%s']" $assoc "$key") 113 | # split on '=', outputs the remaining for the matching $1 114 | awk -F= "\$1 == \"$kstr\" {sub(\"^[^=]+=\", \"\", \$0);print}" <<<"$docopts_out" 115 | } 116 | 117 | # Debug, prints env varible ARGS or $1 formated as a bash 4 assoc array 118 | docopt_print_ARGS() { 119 | local assoc="$1" 120 | if [[ -z $assoc ]] ; then 121 | assoc=ARGS 122 | fi 123 | 124 | # bash dark magic copying $assoc argument to a local myassoc array 125 | # inspired by: 126 | # https://stackoverflow.com/questions/6660010/bash-how-to-assign-an-associative-array-to-another-variable-name-e-g-rename-t#8881121 127 | declare -A myassoc 128 | eval $(typeset -A -p $assoc|sed "s/ $assoc=/ myassoc=/") 129 | 130 | # loop on keys 131 | echo "docopt_print_ARGS => $assoc" 132 | local a 133 | for a in ${!myassoc[@]} ; do 134 | printf "%20s = %s\n" $a "${myassoc[$a]}" 135 | done 136 | } 137 | 138 | ## main code 139 | # --auto : don't forget to pass "$@" 140 | # Usage: source docopts.sh --auto "$@" 141 | if [[ "$1" == "--auto" ]] ; then 142 | shift 143 | # declare must be used at global scope to be accessible at 144 | # global level any were in the caller script. 145 | declare -A ARGS 146 | eval "$(docopt_auto_parse "${BASH_SOURCE[1]}" "$@")" 147 | fi 148 | -------------------------------------------------------------------------------- /scripts/envs/bluewire/manfred.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | echo "configuring -> Bluewire Environment" 4 | 5 | source `which virtualenvwrapper.sh` 6 | workon diffsim 7 | export PYTHONPATH="${PYTHONPATH}:`pwd`" 8 | Xvfb :1 -screen 0 84x84x24 -ac +extension GLX +render -noreset &> xvfb.log & 9 | export DISPLAY=:1 10 | -------------------------------------------------------------------------------- /scripts/envs/slurm/bhairav.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #SBATCH --gres=gpu 3 | #SBATCH --cpus-per-task=2 # maximum CPU cores per GPU request: 6 on Cedar, 16 on Graham 4 | #SBATCH --mem=36000M # memory per node 5 | #SBATCH --time=1-12:00 # time (DD-HH:MM) 6 | #SBATCH --qos=low 7 | #SBATCH --requeue 8 | #SBATCH --mail-user=noreply@domain.com 9 | #SBATCH --mail-type=ALL 10 | 11 | echo "Configuring Slurm Job Environment - $SLURM_JOB_ID" 12 | source activate rl-local 13 | cd ~/coding/diffsim 14 | 15 | export PYTHONPATH="${PYTHONPATH}:`pwd`/coding" 16 | export LD_LIBRARY_PATH=/Tmp/glx:$LD_LIBRARY_PATH 17 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/u/mehtabha/.mujoco/mjpro150/bin 18 | export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so.1.10 19 | Xvfb :$SLURM_JOB_ID -screen 0 84x84x24 -ac +extension GLX +render -noreset &> xvfb.log & 20 | export DISPLAY=:$SLURM_JOB_ID -------------------------------------------------------------------------------- /scripts/envs/slurm/manfred.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash -------------------------------------------------------------------------------- /scripts/envs/uberduck/bhairav.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash -------------------------------------------------------------------------------- /scripts/experiments/lunar_lander.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | 4 | # =============== Lunar Lander ============ 5 | 6 | lunar_lander_baseline() { 7 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=true-baseline --seeds 5 \ 8 | with --freeze-discriminator \ 9 | --agent-name=baseline \ 10 | --initial-svpg-steps=1e6 \ 11 | --continuous-svpg \ 12 | --freeze-svpg 13 | } 14 | 15 | lunar_lander_full_dr() { 16 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=fulldr-baseline --seeds 5 \ 17 | with --randomized-env-id="LunarLanderRandomized-v0" \ 18 | --freeze-discriminator \ 19 | --agent-name=baseline-full-dr \ 20 | --initial-svpg-steps=1e6 \ 21 | --continuous-svpg \ 22 | --freeze-svpg 23 | } 24 | 25 | lunar_lander_expert_813() { 26 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=e813-baseline --seeds 5 \ 27 | with --randomized-env-id="LunarLanderRandomized-RandomM813-v0" \ 28 | --freeze-discriminator \ 29 | --agent-name=expert-813 \ 30 | --initial-svpg-steps=1e6 \ 31 | --continuous-svpg \ 32 | --freeze-svpg 33 | } 34 | 35 | lunar_lander_expert_811() { 36 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=e811-baseline --seeds 5 \ 37 | with --randomized-env-id="LunarLanderRandomized-RandomM811-v0" \ 38 | --freeze-discriminator \ 39 | --agent-name=expert-811 \ 40 | --initial-svpg-steps=1e6 \ 41 | --continuous-svpg \ 42 | --freeze-svpg 43 | } 44 | 45 | lunar_lander_ours_1d() { 46 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=ours-lunar1d --seeds 5 \ 47 | with --randomized-env-id="LunarLanderRandomized-v0" \ 48 | --agent-name=ours-lunar1d \ 49 | --continuous-svpg 50 | } 51 | 52 | lunar_lander_ours_1d_5p() { 53 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=ours-lunar1d-5p --seeds 5 \ 54 | with --randomized-env-id="LunarLanderRandomized-v0" \ 55 | --agent-name=ours-lunar1d-5p \ 56 | --continuous-svpg \ 57 | --nagents=5 58 | } 59 | 60 | lunar_lander_ours_1d_5p_6_20() { 61 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=ours-lunar1d-5p-620 --seeds 1 \ 62 | with --randomized-env-id="LunarLanderRandomized-RandomM620-v0" \ 63 | --agent-name=ours-lunar1d-5p-620 \ 64 | --continuous-svpg \ 65 | --nagents=5 66 | } -------------------------------------------------------------------------------- /scripts/experiments/pusher_3dof.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # =============== Pusher 3 DoF ============ 4 | 5 | pusher_3dof_baseline() { 6 | python scripts/launch.py --main=unfreeze_policy --setting=pusher --prefix=true-baseline --seeds 5 \ 7 | with --freeze-discriminator \ 8 | --agent-name=baseline \ 9 | --initial-svpg-steps=1e6 \ 10 | --continuous-svpg \ 11 | --freeze-svpg 12 | } 13 | 14 | pusher_3dof_full_dr() { 15 | python scripts/launch.py --main=unfreeze_policy --setting=pusher --prefix=fulldr-baseline --seeds 5 \ 16 | with --randomized-env-id="Pusher3DOFRandomized-v0" \ 17 | --freeze-discriminator \ 18 | --agent-name=baseline-full-dr \ 19 | --initial-svpg-steps=1e6 \ 20 | --continuous-svpg \ 21 | --freeze-svpg 22 | } 23 | 24 | pusher_3dof_ours() { 25 | python scripts/launch.py --main=unfreeze_policy --setting=pusher --prefix=ours-pusher-3p --seeds 5 \ 26 | with --randomized-env-id="Pusher3DOFRandomized-v0" \ 27 | --agent-name=ours-pusher \ 28 | --continuous-svpg 29 | } 30 | 31 | pusher_3dof_ours_5p() { 32 | python scripts/launch.py --main=unfreeze_policy --setting=pusher --prefix=ours-pusher-5p --seeds 5 \ 33 | with --randomized-env-id="Pusher3DOFRandomized-v0" \ 34 | --agent-name=ours-pusher-5p \ 35 | --continuous-svpg \ 36 | --nagents=5 37 | } -------------------------------------------------------------------------------- /scripts/launch.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | from datetime import timedelta 4 | from subprocess import Popen, PIPE 5 | import time 6 | 7 | EXPERIMENT_TEMPLATE = 'nohup python -m experiments.domainrand.{main} {setting} --experiment-prefix={prefix} ' \ 8 | '--seed={seed} {extra}' 9 | SLEEP_TIME = 30 # in seconds 10 | 11 | LOG_DIRECTORY = 'tails' 12 | LOG_FILENAME_TEMPLATE = '{dir}/{main}-{setting}-{prefix}-{seed}.log' 13 | 14 | 15 | def parse(): 16 | 17 | parser = argparse.ArgumentParser() 18 | 19 | parser.add_argument('--main') 20 | parser.add_argument('--setting', choices=['lunar', 'pusher', 'ergo']) 21 | parser.add_argument('--prefix') 22 | parser.add_argument('--first-seed', default=0, type=int) 23 | parser.add_argument('--seeds', type=int) 24 | 25 | parser.add_argument('with', choices=['with']) 26 | parser.add_argument('extra', nargs=argparse.REMAINDER) 27 | 28 | return parser.parse_args() 29 | 30 | 31 | def call_module(main, setting, prefix, seed, arguments): 32 | cmd = EXPERIMENT_TEMPLATE.format( 33 | main=main, 34 | setting=setting, 35 | prefix=prefix, 36 | seed=seed, 37 | extra=" ".join(arguments) 38 | ) 39 | args = cmd.split(' ') 40 | 41 | log_filename = LOG_FILENAME_TEMPLATE.format( 42 | dir=LOG_DIRECTORY, 43 | main=main, 44 | setting=setting, 45 | prefix=prefix, 46 | seed=seed 47 | ) 48 | log_file = open(log_filename, '+w') 49 | 50 | handler = Popen(args=args, stdin=log_file, stdout=log_file, stderr=log_file) 51 | 52 | return handler 53 | 54 | 55 | def call_multi_seed(main, setting, prefix, initial_seed, seeds, extra): 56 | process_handlers = [] 57 | 58 | if not os.path.exists(LOG_DIRECTORY): 59 | os.mkdir(LOG_DIRECTORY) 60 | 61 | for index in range(seeds): 62 | handler = call_module( 63 | main=main, 64 | setting=setting, 65 | prefix=prefix, 66 | seed=index + initial_seed, 67 | arguments=extra 68 | ) 69 | process_handlers.append(handler) 70 | print("{} Seeds with PID = [{}]".format(seeds, ", ".join(list(map(lambda p: str(p.pid), process_handlers))))) 71 | return process_handlers 72 | 73 | 74 | def is_process_running(p): 75 | return p.poll() is None 76 | 77 | 78 | def wait_all(process_handlers): 79 | _time = time.time() 80 | 81 | while any(map(is_process_running, process_handlers)): 82 | print('\rWaiting for all seeds to finish...', end='') 83 | time.sleep(SLEEP_TIME) 84 | _time = time.time() - _time - SLEEP_TIME 85 | 86 | return _time 87 | 88 | 89 | def exit_status(process_handlers): 90 | return list(map(lambda p: str(p.poll()), process_handlers)) 91 | 92 | 93 | def run_experiment(args): 94 | print('Experiments') 95 | print('===================================') 96 | print("Launching experiment <{experiment}> with <{setting}>.".format(experiment=args.main, 97 | setting=args.setting)) 98 | process_handlers = call_multi_seed( 99 | main=args.main, 100 | setting=args.setting, 101 | prefix=args.prefix, 102 | initial_seed=args.first_seed, 103 | seeds=args.seeds, 104 | extra=args.extra 105 | ) 106 | _time = wait_all(process_handlers) 107 | print() 108 | print('<-------- COMPLETED -------------->') 109 | seeds_status = exit_status(process_handlers) 110 | print('Seeds Exit Status = [{}]'.format(",".join(seeds_status))) 111 | print('Elapsed Time = {}'.format(str(timedelta(seconds=_time)))) 112 | print('===================================') 113 | 114 | return all(int(status) == 0 for status in seeds_status) # if all exit statuses r 0 115 | 116 | 117 | def collect_data(args): 118 | print() 119 | print('Data Collection') 120 | print('===================================') 121 | print("Launching data recollection of <{experiment}> with <{setting}>.".format(experiment=args.main, 122 | setting=args.setting)) 123 | process_handlers = call_multi_seed( 124 | main='batch_reward_analysis', 125 | setting=args.setting, 126 | prefix=args.prefix, 127 | initial_seed=args.first_seed, 128 | seeds=args.seeds, 129 | extra=args.extra 130 | ) 131 | _time = wait_all(process_handlers) 132 | print() 133 | seeds_status = exit_status(process_handlers) 134 | print('<-------- COMPLETED -------------->') 135 | print('Seeds Exit Status = [{}]'.format(",".join(seeds_status))) 136 | print('Elapsed Time = {}'.format(str(timedelta(seconds=_time)))) 137 | print('===================================') 138 | 139 | return all(int(status) == 0 for status in seeds_status) # if all exit statuses r 0 140 | 141 | 142 | def launch(): 143 | 144 | print('<---- RUNNING ---->') 145 | 146 | args = parse() 147 | 148 | steps = [ 149 | run_experiment, 150 | collect_data 151 | ] 152 | 153 | done = False 154 | for step in steps: 155 | done = step(args) 156 | if not done: 157 | break 158 | print() 159 | print('<---- DONE: {} --->'.format(done)) 160 | 161 | 162 | if __name__ == '__main__': 163 | launch() 164 | -------------------------------------------------------------------------------- /scripts/real-robot-read-dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import h5py 3 | import cv2 4 | import matplotlib.pyplot as plt 5 | 6 | 7 | npa = np.array 8 | 9 | # f = h5py.File('../results/real-robot/real-190328-170248.hdf5', 'r') # smaller demo file with only 2 rollouts per model 10 | f = h5py.File('../results/real-robot/real-190328-174502.hdf5', 'r') # 25 rollouts per model 11 | 12 | experiments = ['adr', 'baseline', 'usdr'] 13 | 14 | for e in experiments: 15 | print(e + "/distances\t\t", f.get(e + "/distances").shape) 16 | print(e + "/rewards\t\t", f.get(e + "/rewards").shape) 17 | print(e + "/images\t\t\t", f.get(e + "/images").shape) 18 | print(e + "/trajectories\t", f.get(e + "/trajectories").shape) 19 | 20 | # example replay: 21 | model_type = "adr" 22 | model_no = 3 23 | run = 1 24 | 25 | # for frame in f.get("{}/images".format(model_type))[model_no, run]: 26 | # if np.count_nonzero(frame) > 0: 27 | # cv2.imshow("Replay", frame) 28 | # cv2.waitKey(20) 29 | # 30 | # frame_len = 0 31 | # for frame in f.get("{}/trajectories".format(model_type))[model_no, run]: 32 | # if np.count_nonzero(frame) > 0: 33 | # print (np.around(frame,1)) 34 | # frame_len+=1 35 | # 36 | # x = np.arange(frame_len) 37 | # 38 | # for motor in range(4): 39 | # plt.plot(x, f.get("{}/trajectories".format(model_type))[model_no, run, :frame_len, motor+14], label="motor "+str(motor+1)) 40 | # 41 | # plt.plot(x, 5*f.get("{}/distances".format(model_type))[model_no, run, :frame_len], label="distance to goal x 5") 42 | # plt.hlines(0.025*5, 0, frame_len, label="solved", linestyles="dotted") 43 | # plt.ylim((-1,1)) 44 | # plt.legend() 45 | # plt.tight_layout() 46 | # plt.show() 47 | 48 | # max_frame_len = 0 49 | 50 | # for color, model_type in zip(["red", "green", "blue"], experiments): 51 | # print (model_type, color) 52 | # 53 | # for model_no in range(5): 54 | # for run in range(len(f.get("{}/trajectories".format(model_type))[model_no, :])): 55 | # frame_len = np.count_nonzero( 56 | # np.count_nonzero(f.get("{}/trajectories".format(model_type))[model_no, run], axis=1)) 57 | # if frame_len > max_frame_len: 58 | # max_frame_len = frame_len 59 | # x = np.arange(frame_len) 60 | # plt.plot(x, f.get("{}/distances".format(model_type))[model_no, run, :frame_len], c=color) 61 | # 62 | # plt.hlines(0.025, 0, max_frame_len, label="solved", linestyles="dotted") 63 | # plt.legend() 64 | # plt.tight_layout() 65 | # plt.title("Distances Of All Rollouts Over Time") 66 | # plt.show() 67 | 68 | 69 | 70 | 71 | #### HISTOGRAM BAD 72 | 73 | # for color, model_type in zip(["red", "green", "blue"], experiments): 74 | # print (model_type, color) 75 | # values = [] 76 | # 77 | # for model_no in range(5): 78 | # for run in range(len(f.get("{}/trajectories".format(model_type))[model_no, :])): 79 | # frame_len = np.count_nonzero( 80 | # np.count_nonzero(f.get("{}/trajectories".format(model_type))[model_no, run], axis=1)) 81 | # values.append(frame_len) 82 | # plt.hist(values, alpha=0.5, color=color, label=model_type) 83 | # 84 | # plt.legend() 85 | # plt.tight_layout() 86 | # plt.title("Distances Of All Rollouts Over Time") 87 | # plt.show() 88 | 89 | #### FINAL DISTANCE PLOT 90 | 91 | 92 | pos = 1 93 | val = [] 94 | 95 | colors = ["red", "green", "blue"] 96 | 97 | for color, model_type in zip(colors, experiments): 98 | print (model_type, color) 99 | 100 | values_model = [] 101 | 102 | for model_no in range(5): 103 | for run in range(len(f.get("{}/trajectories".format(model_type))[model_no, :])): 104 | frame_len = np.count_nonzero( 105 | np.count_nonzero(f.get("{}/trajectories".format(model_type))[model_no, run], axis=1)) 106 | values_model.append(f.get("{}/distances".format(model_type))[model_no, run, frame_len-1]) 107 | 108 | # plt.scatter(np.ones(len(values))*pos, values, alpha=0.5, c=color, label=model_type) 109 | 110 | val.append(values_model) 111 | 112 | pos += 1 113 | 114 | bplot = plt.boxplot(npa(val).T, labels=experiments, patch_artist=True) 115 | 116 | cm = plt.cm.get_cmap('viridis') 117 | colors = [cm(val/3) for val in range(3)] 118 | 119 | for patch, color in zip(bplot['boxes'], colors): 120 | patch.set_facecolor(color) 121 | 122 | plt.legend() 123 | # plt.tight_layout() 124 | plt.title("Real Robot Rollout Performance Box Plots\n" 125 | "5 policies per approach, 25 runs per policy") 126 | plt.show() 127 | 128 | 129 | -------------------------------------------------------------------------------- /scripts/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Usage: run.sh --env=ENV --user=USR EXPERIMENT 4 | # 5 | # Process FILE and optionally apply correction to eitheleft-hand side or 6 | # right-hand side. 7 | # 8 | # Arguments: 9 | # EXPERIMENT experiment to run 10 | # 11 | # Options: 12 | # -h --help 13 | # --env=E Environment 14 | # --user=U User configuration 15 | # 16 | 17 | call_experiment() { 18 | $1 19 | } 20 | 21 | # load the environment configuration 22 | environment() { 23 | source scripts/envs/$1/$2.sh 24 | } 25 | 26 | # experiments 27 | source scripts/experiments/lunar_lander.sh 28 | source scripts/experiments/pusher_3dof.sh 29 | 30 | # parsing named arguments 31 | source scripts/docopts.sh --auto "$@" 32 | 33 | environment ${ARGS['--env']} ${ARGS['--user']} 34 | call_experiment ${ARGS['EXPERIMENT']} -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup(name='diffsim', 4 | version='1.0', 5 | install_requires=['gym>=0.5', 6 | 'sklearn', 7 | 'torch', 8 | 'numpy', 9 | 'matplotlib', 10 | 'scipy', 11 | 'bayesian-optimization', 12 | 'box2d', 13 | 'box2d-kengz', 14 | 'mujoco_py', 15 | 'lxml', 16 | 'tqdm', 17 | 'gym_ergojr>=1.2'] 18 | ) 19 | -------------------------------------------------------------------------------- /slurm.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | #SBATCH --gres=gpu 3 | #SBATCH --cpus-per-task=2 # maximum CPU cores per GPU request: 6 on Cedar, 16 on Graham 4 | #SBATCH --mem=36000M # memory per node 5 | #SBATCH --time=1-12:00 # time (DD-HH:MM) 6 | #SBATCH --qos=low 7 | #SBATCH --requeue 8 | #SBATCH --mail-user=noreply@domain.com 9 | #SBATCH --mail-type=ALL 10 | 11 | echo "Configuring Slurm Job Environment - $SLURM_JOB_ID" 12 | source activate rl-local 13 | cd ~/coding/diffsim 14 | 15 | export PYTHONPATH="${PYTHONPATH}:`pwd`/coding" 16 | # python -m experiments.domainrand.experiment_driver lunar --experiment-name=gail-baseline --initial-svpg-steps=1e6 --freeze-svpg --prerecorded-trajectories --expert-trajectories-file="reference_trajectories_trained_16" --continuous-svpg --randomized-env-id="LunarLanderRandomized-v0" --experiment-prefix="gailbaseline16" --seed=1 & 17 | # python -m experiments.domainrand.experiment_driver lunar --experiment-name=gail-baseline --initial-svpg-steps=1e6 --freeze-svpg --prerecorded-trajectories --expert-trajectories-file="reference_trajectories_trained_16" --continuous-svpg --randomized-env-id="LunarLanderRandomized-v0" --experiment-prefix="gailbaseline16" --seed=2 18 | 19 | python -m experiments.domainrand.experiment_driver lunar --experiment-name=adaptive-randomization --particle-path="saved-models/particles/" --reward-scale=-1.0 --kld-coefficient=0.01 --prerecorded-trajectories --expert-trajectories-file="reference_trajectories_trained_16" --continuous-svpg --randomized-env-id="LunarLanderRandomized-v0" --experiment-prefix="adrplus16" --seed=2 & 20 | 21 | python -m experiments.domainrand.experiment_driver lunar --experiment-name=adaptive-randomization --particle-path="saved-models/particles/" --reward-scale=-1.0 --kld-coefficient=0.01 --prerecorded-trajectories --expert-trajectories-file="reference_trajectories_trained_16" --continuous-svpg --randomized-env-id="LunarLanderRandomized-v0" --experiment-prefix="adrplus16" --seed=3 22 | -------------------------------------------------------------------------------- /tests/00-test-vecenv.py: -------------------------------------------------------------------------------- 1 | from common.envs import LunarLanderRandomized 2 | from common.envs.randomized_vecenv import make_vec_envs 3 | 4 | def _create_envs(seed, reference_env_id='LunarLanderDefault-v0', 5 | randomized_env_id='LunarLanderRandomized-v0'): 6 | 7 | reference_env = make_vec_envs(reference_env_id, seed, num_processes=3) 8 | randomized_env = make_vec_envs(randomized_env_id, seed, num_processes=3) 9 | 10 | return reference_env, randomized_env 11 | 12 | 13 | reference_env, randomized_env = _create_envs(1) 14 | obs = randomized_env.reset() 15 | print(randomized_env.get_current_params()) 16 | 17 | for _ in range(3): 18 | randomized_env.randomize(randomized_values=[['random'], ['random'], ['random']]) 19 | print(randomized_env.get_current_params()) 20 | 21 | print("2D Lunar Lander Randomization") 22 | reference_env, randomized_env = _create_envs(1, randomized_env_id='LunarLanderRandomized2D-v0') 23 | obs = randomized_env.reset() 24 | print(randomized_env.get_current_params()) 25 | 26 | for _ in range(3): 27 | randomized_env.randomize(randomized_values=[['random', 'random'], ['random', 'random'], ['random', 'random']]) 28 | print(randomized_env.get_current_params()) 29 | 30 | print("2D - Setting One Value") 31 | randomized_env.randomize(randomized_values=[[0.0, 'random'], [0.5, 'random'], [1.0, 'random']]) 32 | print(randomized_env.get_current_params()) 33 | 34 | print("2D - Setting Both Values") 35 | randomized_env.randomize(randomized_values=[[0.0, 1.0], [0.5, 0.5], [1.0, 0.0]]) 36 | print(randomized_env.get_current_params()) -------------------------------------------------------------------------------- /tests/01-test-svpg-vectorized.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from common.svpg.svpg import SVPG 3 | from common.envs.randomized_vecenv import make_vec_envs 4 | 5 | def _create_envs(seed, nagents, reference_env_id='LunarLanderDefault-v0', 6 | randomized_env_id='LunarLanderRandomized-v0'): 7 | 8 | reference_env = make_vec_envs(reference_env_id, seed, nagents) 9 | randomized_env = make_vec_envs(randomized_env_id, seed, nagents) 10 | 11 | return reference_env, randomized_env 12 | 13 | nagents = 3 14 | svpg = SVPG(nagents) 15 | reference_env, randomized_env = _create_envs(seed=123, nagents=nagents) 16 | 17 | simulation_settings = svpg.step() 18 | assert (nagents, svpg.svpg_rollout_length, svpg.nparams) == simulation_settings.shape 19 | 20 | simulation_settings = np.transpose(simulation_settings, (1, 0, 2)) 21 | 22 | for t in range(svpg.svpg_rollout_length): 23 | print("Current Timestep: {}".format(t)) 24 | print([simulation_settings[t]]) 25 | randomized_env.randomize(randomized_values=simulation_settings[t]) 26 | print(randomized_env.get_current_params()) 27 | 28 | 29 | -------------------------------------------------------------------------------- /tests/02-test-svpg-policy-rollout-vectorized.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from common.svpg.svpg import SVPG 3 | from common.envs.randomized_vecenv import make_vec_envs 4 | 5 | def _create_envs(seed, nagents, reference_env_id='LunarLanderDefault-v0', 6 | randomized_env_id='LunarLanderRandomized-v0'): 7 | 8 | reference_env = make_vec_envs(reference_env_id, seed, nagents) 9 | randomized_env = make_vec_envs(randomized_env_id, seed, nagents) 10 | 11 | return reference_env, randomized_env 12 | 13 | nagents = 3 14 | svpg = SVPG(nagents) 15 | reference_env, randomized_env = _create_envs(seed=123, nagents=nagents) 16 | 17 | simulation_settings = svpg.step() 18 | assert (nagents, svpg.svpg_rollout_length, svpg.nparams) == simulation_settings.shape 19 | 20 | simulation_settings = np.transpose(simulation_settings, (1, 0, 2)) 21 | 22 | for t in range(svpg.svpg_rollout_length): 23 | print("Current Timestep: {}".format(t)) 24 | print([simulation_settings[t]]) 25 | randomized_env.randomize(randomized_values=simulation_settings[t]) 26 | print(randomized_env.get_current_params()) 27 | 28 | 29 | -------------------------------------------------------------------------------- /tests/03-test-vanilla-fetchreach.py: -------------------------------------------------------------------------------- 1 | import common.envs 2 | import gym 3 | import time 4 | 5 | env = gym.make("FetchReachDenseDS-v1") 6 | 7 | print("action dim: {}, obs dim: {}".format(env.action_space, env.observation_space)) 8 | 9 | # exploration 10 | exploration_actions = [ # the actions are for the end effector, thus implying IK 11 | [1, 0, 0, 0], # forward 12 | [-1, 0, 0, 0], # backward 13 | [0, 1, 0, 0], # left (from robot's perspective 14 | [0, -1, 0, 0], # right 15 | [0, 0, 1, 0], # up 16 | [0, 0, -1, 0] # down 17 | # [0, 0, 0, 1], # gripper open/close, unused in fetch 18 | # [0, 0, 0, -1] # gripper open/close, unused in fetch 19 | ] 20 | exploration_length = 50 21 | env.reset() 22 | done = False 23 | i = 0 24 | exploration_action_idx = 0 25 | while True: 26 | action = exploration_actions[exploration_action_idx] 27 | obs, rew, done, misc = env.step(action) 28 | env.render() 29 | i += 1 30 | if i % exploration_length == 0: 31 | exploration_action_idx += 1 32 | if exploration_action_idx == len(exploration_actions): 33 | break 34 | time.sleep(0.02) 35 | 36 | # # randome movement 37 | # for i in range(5): 38 | # env.reset() 39 | # done = False 40 | # 41 | # while not done: 42 | # action = env.action_space.sample() 43 | # obs, rew, done, misc = env.step(action) 44 | # print (obs, rew, misc) 45 | # env.render() 46 | -------------------------------------------------------------------------------- /tests/04-test-randomized-mujoco-api.py: -------------------------------------------------------------------------------- 1 | import time 2 | from timeit import default_timer as timer 3 | import numpy as np 4 | import tqdm 5 | import gym 6 | import common.envs 7 | from common.envs.wrappers import RandomizedEnvWrapper 8 | 9 | np.random.seed(1234) 10 | 11 | 12 | env = gym.make('PusherRandomized-v0') 13 | env = RandomizedEnvWrapper(env=env, seed=0) 14 | 15 | obs = env.reset() 16 | 17 | start = timer() 18 | for i in tqdm.tqdm(range(int(1e6))): 19 | env.randomize(randomized_values=["random", "random", "random"]) 20 | print(timer() - start) 21 | -------------------------------------------------------------------------------- /tests/05-test-randomized-mujoco-viz.py: -------------------------------------------------------------------------------- 1 | import time 2 | from timeit import default_timer as timer 3 | import numpy as np 4 | import tqdm 5 | import gym 6 | import common.envs 7 | from common.envs.wrappers import RandomizedEnvWrapper 8 | 9 | np.random.seed(1234) 10 | 11 | 12 | env = gym.make('Pusher3DOFRandomized-v0') 13 | env = RandomizedEnvWrapper(env=env, seed=0) 14 | 15 | # obs = env.reset() 16 | 17 | start = timer() 18 | for i in tqdm.tqdm(range(100)): 19 | env.randomize(randomized_values=["random", "random", "random"]) 20 | env.reset() 21 | for _ in range(200): 22 | obs, reward, done, _ = env.step(env.action_space.sample()) 23 | env.render() 24 | print(obs) 25 | 26 | env.close() 27 | print(timer() - start) 28 | -------------------------------------------------------------------------------- /tests/06-test-randomized-ergoreach.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import gym_ergojr 3 | import time 4 | from tqdm import tqdm 5 | from common.envs.wrappers import RandomizedEnvWrapper 6 | 7 | # MODE = "MANUAL" # slow but let's you see what's happening 8 | MODE = "SPEED" # as fast as possible 9 | 10 | def no_op(x): 11 | pass 12 | 13 | 14 | if MODE == "MANUAL": 15 | env = gym.make("ErgoReacherRandomized-Graphical-v0") # looks nice 16 | timer = time.sleep 17 | else: 18 | env = gym.make("ErgoReacherRandomized-Headless-v0") # runs fast 19 | timer = no_op 20 | 21 | env = RandomizedEnvWrapper(env=env, seed=0) 22 | 23 | for _ in tqdm(range(100)): 24 | env.reset() 25 | env.randomize(randomized_values=["random"] * 8) # 8 values to randomize over 26 | 27 | while True: 28 | action = env.action_space.sample() 29 | obs, rew, done, misc = env.step(action) 30 | timer(0.05) 31 | 32 | if done: 33 | break -------------------------------------------------------------------------------- /tests/07-test-mujoco-3dof-keyboard-control.py: -------------------------------------------------------------------------------- 1 | #/usr/bin/env python 2 | # manual 3 | 4 | """ 5 | This script allows you to manually control the simulator or Duckiebot 6 | using the keyboard arrows. 7 | """ 8 | 9 | import sys 10 | from pynput import keyboard 11 | from pynput.keyboard import Key 12 | import numpy as np 13 | import gym 14 | import common.envs 15 | from common.envs.wrappers import RandomizedEnvWrapper 16 | 17 | env = gym.make('Pusher3DOFUberHard-v0') 18 | env = RandomizedEnvWrapper(env=env, seed=0) 19 | 20 | reward = 0. 21 | 22 | print('hi') 23 | env.randomize(randomized_values=["random", "random"]) 24 | env.reset() 25 | env.render() 26 | 27 | ACTIONS = [ 28 | np.array([0.0, 0.0, 1.0]), 29 | np.array([0.0, 0.0, -1.0]), 30 | np.array([0.0, 1.0, 0.0]), 31 | np.array([0.0, -1.0, 0.0]), 32 | np.array([1.0, 0.0, 0.0]), 33 | np.array([-1.0, 0.0, 0.0]) 34 | ] 35 | 36 | ACTION_KEYS = [Key.up, Key.down, Key.page_up , Key.page_down, Key.right, Key.left] 37 | 38 | 39 | 40 | def on_press(key): 41 | global reward 42 | if key in ACTION_KEYS: 43 | s_, r, d, info = env.step(ACTIONS[ACTION_KEYS.index(key)]) 44 | env.render() 45 | reward += r 46 | 47 | if d: 48 | print(info['goal_dist'], reward) 49 | env.randomize(randomized_values=["random", "random"]) 50 | env.reset() 51 | reward = 0 52 | 53 | with keyboard.Listener(on_press=on_press) as listener: 54 | listener.join() 55 | 56 | env.close() 57 | -------------------------------------------------------------------------------- /tests/08-test-mujoco-4dof-keyboard-control.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # manual 3 | 4 | """ 5 | This script allows you to manually control the simulator or Duckiebot 6 | using the keyboard arrows. 7 | """ 8 | 9 | import sys 10 | from pynput import keyboard 11 | from pynput.keyboard import Key 12 | import numpy as np 13 | import gym 14 | import common.envs 15 | from common.envs.wrappers import RandomizedEnvWrapper 16 | 17 | env = gym.make("ErgoReacher4DOFRandomizedHardVisual-v0") 18 | env = RandomizedEnvWrapper(env=env, seed=0) 19 | 20 | env.reset() 21 | env.render() 22 | 23 | ACTION_KEYS = [Key.up, Key.down, Key.page_up , Key.page_down, Key.right, Key.left, Key.home, Key.end, Key.alt] 24 | 25 | def on_press(key): 26 | if key == Key.tab: env.reset() 27 | if key in ACTION_KEYS: 28 | action = np.zeros(4) 29 | if key != Key.alt: 30 | index = ACTION_KEYS.index(key) 31 | multiplier = 1 if index % 2 == 0 else -1 32 | 33 | act_idx = index // 2 34 | action[act_idx] = multiplier 35 | 36 | s_, r, d, info = env.step(action) 37 | print(info) 38 | env.render() 39 | if d: 40 | env.randomize(randomized_values=["random", "random", "random"]) 41 | env.reset() 42 | 43 | with keyboard.Listener(on_press=on_press) as listener: 44 | listener.join() 45 | 46 | env.close() -------------------------------------------------------------------------------- /tests/09-test-mujoco-3dof-auto.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import time 3 | 4 | import numpy as np 5 | import gym 6 | from tqdm import tqdm 7 | 8 | import common.envs 9 | from common.envs.wrappers import RandomizedEnvWrapper 10 | 11 | env = gym.make('Pusher3DOFRandomized-v0') 12 | env = RandomizedEnvWrapper(env=env, seed=0)# env.randomize() 13 | 14 | actions = [ 15 | np.array([0.0, 0.0, 1.0]), 16 | np.array([0.0, 0.0, -1.0]), 17 | np.array([0.0, 1.0, 0.0]), 18 | np.array([0.0, -1.0, 0.0]), 19 | np.array([1.0, 0.0, 0.0]), 20 | np.array([-1.0, 0.0, 0.0]) 21 | ] 22 | actions.reverse() 23 | 24 | action_change_freq = 50 25 | 26 | for env_idx in tqdm(range(10)): 27 | env.reset() 28 | env.render() 29 | 30 | for action in actions: 31 | 32 | for _ in range(action_change_freq): 33 | _, _, _, _ = env.step(action) 34 | env.render() 35 | time.sleep(0.01) 36 | 37 | # print (np.min(env.unwrapped.qposes, axis=0),np.max(env.unwrapped.qposes, axis=0)) 38 | -------------------------------------------------------------------------------- /tests/10-test-mujoco-3dof-ranges.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import time 3 | 4 | import numpy as np 5 | import gym 6 | from tqdm import tqdm 7 | 8 | import common.envs 9 | from common.envs.wrappers import RandomizedEnvWrapper 10 | 11 | env = gym.make('Pusher3DOFRandomized-v0') 12 | env = RandomizedEnvWrapper(env=env, seed=0)# env.randomize() 13 | 14 | actions = [ 15 | np.array([0.0, 0.0, 1.0]), 16 | np.array([0.0, 0.0, -1.0]), 17 | np.array([0.0, 1.0, 0.0]), 18 | np.array([0.0, -1.0, 0.0]), 19 | np.array([1.0, 0.0, 0.0]), 20 | np.array([-1.0, 0.0, 0.0]) 21 | ] 22 | actions.reverse() 23 | 24 | discretization = 50 25 | 26 | randomized_values = ["default", "default"] 27 | values = np.linspace(0, 1, discretization) 28 | 29 | for dim in range(2): 30 | for i in tqdm(range(discretization)): 31 | rands = randomized_values 32 | rands[dim] = values[i] 33 | env.randomize(rands) 34 | env.reset() 35 | 36 | for _ in range(50): 37 | env.step(env.action_space.sample()) 38 | env.render() 39 | time.sleep(0.01) 40 | 41 | # print (np.min(env.unwrapped.qposes, axis=0),np.max(env.unwrapped.qposes, axis=0)) 42 | -------------------------------------------------------------------------------- /tests/11-test-randomized-ergoreach-halfdisk.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import gym_ergojr 3 | import time 4 | from tqdm import tqdm 5 | from common.envs.wrappers import RandomizedEnvWrapper 6 | 7 | # MODE = "MANUAL" # slow but let's you see what's happening 8 | MODE = "SPEED" # as fast as possible 9 | 10 | def no_op(x): 11 | pass 12 | 13 | 14 | if MODE == "MANUAL": 15 | env = gym.make("ErgoReacher-Halfdisk-Randomized-Graphical-v0") # looks nice 16 | timer = time.sleep 17 | else: 18 | env = gym.make("ErgoReacher-Halfdisk-Randomized-Headless-v0") # runs fast 19 | timer = no_op 20 | 21 | env = RandomizedEnvWrapper(env=env, seed=0) 22 | 23 | for _ in tqdm(range(100)): 24 | env.reset() 25 | env.randomize(randomized_values=["random"] * 8) # 8 values to randomize over 26 | 27 | while True: 28 | action = env.action_space.sample() 29 | obs, rew, done, misc = env.step(action) 30 | timer(0.05) 31 | 32 | if done: 33 | break -------------------------------------------------------------------------------- /tests/12-test-randomized-ergoreach-backlash-halfdisk.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import gym_ergojr 3 | import time 4 | from tqdm import tqdm 5 | from common.envs.wrappers import RandomizedEnvWrapper 6 | 7 | MODE = "MANUAL" # slow but let's you see what's happening 8 | # MODE = "SPEED" # as fast as possible 9 | 10 | def no_op(x): 11 | pass 12 | 13 | 14 | if MODE == "MANUAL": 15 | env = gym.make("ErgoReacher-DualGoal-Default-Graphical-v0") # looks nice 16 | timer = time.sleep 17 | else: 18 | env = gym.make("ErgoReacher-Halfdisk-Backlash-Randomized-Headless-v0") # runs fast 19 | timer = no_op 20 | 21 | env = RandomizedEnvWrapper(env=env, seed=0) 22 | 23 | for _ in tqdm(range(100)): 24 | env.reset() 25 | env.randomize(randomized_values=["random"] * 8) # 8 values to randomize over 26 | 27 | while True: 28 | action = env.action_space.sample() 29 | # action = [-1,0,0,0] 30 | obs, rew, done, misc = env.step(action) 31 | timer(0.05) 32 | 33 | if done: 34 | break -------------------------------------------------------------------------------- /tests/13-test-randomized-humanoid.py: -------------------------------------------------------------------------------- 1 | # /usr/bin/env python 2 | 3 | import time 4 | import gym 5 | import common.envs 6 | from common.envs.wrappers import RandomizedEnvWrapper 7 | 8 | env = gym.make('HumanoidRandomizedEnv-v0') 9 | env = RandomizedEnvWrapper(env=env, seed=0) 10 | 11 | reward = 0. 12 | 13 | env.randomize(randomized_values=["random", "random", "random", "random", "random", "random"]) 14 | env.reset() 15 | env.render() 16 | 17 | d = False 18 | while True: 19 | s_, r, d, info = env.step(env.action_space.sample()) 20 | time.sleep(0.1) 21 | 22 | if d: 23 | env.randomize( 24 | randomized_values=["random", "random", "random", "random", "random", "random"]) 25 | env.reset() 26 | env.render() 27 | -------------------------------------------------------------------------------- /tests/14-test-randomized-halfcheetah.py: -------------------------------------------------------------------------------- 1 | # /usr/bin/env python 2 | 3 | import time 4 | import gym 5 | import common.envs 6 | from common.envs.wrappers import RandomizedEnvWrapper 7 | 8 | env = gym.make('HalfCheetahRandomizedEnv-v0') 9 | env = RandomizedEnvWrapper(env=env, seed=0) 10 | 11 | reward = 0. 12 | 13 | env.randomize(randomized_values=["random", "random", "random", "random"]) 14 | env.reset() 15 | env.render() 16 | 17 | d = False 18 | t = 0 19 | while True: 20 | s_, r, d, info = env.step(env.action_space.sample()) 21 | t += 1 22 | 23 | if d or t > 1000: 24 | t = 0 25 | env.randomize( 26 | randomized_values=["random", "random", "random", "random"]) 27 | env.reset() 28 | env.render() 29 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/tests/__init__.py --------------------------------------------------------------------------------