├── .gitignore
├── README.md
├── adr.gif
├── common
├── __init__.py
├── agents
│ ├── __init__.py
│ ├── ddpg
│ │ ├── __init__.py
│ │ ├── ddpg.py
│ │ └── replay_buffer.py
│ ├── ddpg_actor.py
│ └── svpg_simulator_agent.py
├── discriminator
│ └── discriminator_rewarder.py
├── envs
│ ├── __init__.py
│ ├── assets
│ │ ├── LICENSE.md
│ │ ├── __init__.py
│ │ ├── fetch
│ │ │ ├── reach.xml
│ │ │ ├── robot.xml
│ │ │ └── shared.xml
│ │ ├── pusher_3dof.xml
│ │ └── stls
│ │ │ └── fetch
│ │ │ ├── base_link_collision.stl
│ │ │ ├── bellows_link_collision.stl
│ │ │ ├── elbow_flex_link_collision.stl
│ │ │ ├── estop_link.stl
│ │ │ ├── forearm_roll_link_collision.stl
│ │ │ ├── gripper_link.stl
│ │ │ ├── head_pan_link_collision.stl
│ │ │ ├── head_tilt_link_collision.stl
│ │ │ ├── l_wheel_link_collision.stl
│ │ │ ├── laser_link.stl
│ │ │ ├── r_wheel_link_collision.stl
│ │ │ ├── shoulder_lift_link_collision.stl
│ │ │ ├── shoulder_pan_link_collision.stl
│ │ │ ├── torso_fixed_link.stl
│ │ │ ├── torso_lift_link_collision.stl
│ │ │ ├── upperarm_roll_link_collision.stl
│ │ │ ├── wrist_flex_link_collision.stl
│ │ │ └── wrist_roll_link_collision.stl
│ ├── config
│ │ ├── ErgoReacherRandomized
│ │ │ ├── default-4dof.json
│ │ │ ├── default-6dof.json
│ │ │ ├── easy-4dof.json
│ │ │ ├── fulldr-4dof.json
│ │ │ ├── fulldr-6dof.json
│ │ │ └── hard-4dof.json
│ │ ├── ErgoReacherRandomizedBacklash
│ │ │ ├── default-4dof.json
│ │ │ ├── fulldr-4dof.json
│ │ │ ├── fulldr-easy.json
│ │ │ └── fulldr-hard.json
│ │ ├── HalfCheetahRandomized
│ │ │ └── default.json
│ │ ├── HumanoidRandomized
│ │ │ └── default.json
│ │ ├── LunarLanderRandomized
│ │ │ ├── 10.json
│ │ │ ├── 16.json
│ │ │ ├── debug.json
│ │ │ ├── default.json
│ │ │ ├── random2D_820.json
│ │ │ ├── random_1720.json
│ │ │ ├── random_620.json
│ │ │ ├── random_811.json
│ │ │ ├── random_812.json
│ │ │ ├── random_813.json
│ │ │ └── random_820.json
│ │ ├── Pusher3DOFGeneralization
│ │ │ ├── 00.json
│ │ │ ├── 01.json
│ │ │ ├── 02.json
│ │ │ ├── 10.json
│ │ │ ├── 11.json
│ │ │ ├── 12.json
│ │ │ ├── 20.json
│ │ │ ├── 21.json
│ │ │ └── 22.json
│ │ ├── Pusher3DOFRandomized
│ │ │ ├── default.json
│ │ │ ├── fulldr-easy.json
│ │ │ ├── fulldr-toohard.json
│ │ │ ├── fulldr.json
│ │ │ └── hard.json
│ │ └── __init__.py
│ ├── dimension.py
│ ├── ergoreacher.py
│ ├── ergoreacherbacklash.py
│ ├── fetch.py
│ ├── half_cheetah.py
│ ├── humanoid.py
│ ├── lunar_lander.py
│ ├── pusher.py
│ ├── pusher3dof.py
│ ├── randomized_locomotion.py
│ ├── randomized_vecenv.py
│ └── wrappers.py
├── models
│ ├── __init__.py
│ ├── actor_critic.py
│ └── discriminator.py
├── svpg
│ ├── __init__.py
│ ├── particles
│ │ ├── __init__.py
│ │ ├── distributions.py
│ │ ├── svpg_particle.py
│ │ └── utils.py
│ ├── svpg.py
│ └── svpg_utils.py
└── utils
│ ├── __init__.py
│ ├── logging.py
│ ├── plot_utils.py
│ ├── policy_evaluator.py
│ ├── recorder.py
│ ├── rollout_evaluation.py
│ ├── sim_agent_helper.py
│ └── visualization.py
├── experiments
├── __init__.py
└── domainrand
│ ├── __init__.py
│ ├── args.py
│ ├── batch_reward_analysis.py
│ ├── experiment_driver.py
│ └── pusher_grid_generalization.py
├── real_robot.py
├── real_robot_torquesweep.py
├── scripts
├── README.md
├── docopts
├── docopts.sh
├── envs
│ ├── bluewire
│ │ └── manfred.sh
│ ├── slurm
│ │ ├── bhairav.sh
│ │ └── manfred.sh
│ └── uberduck
│ │ └── bhairav.sh
├── experiments
│ ├── lunar_lander.sh
│ └── pusher_3dof.sh
├── launch.py
├── real-robot-read-dataset.py
└── run.sh
├── setup.py
├── slurm.sh
└── tests
├── 00-test-vecenv.py
├── 01-test-svpg-vectorized.py
├── 02-test-svpg-policy-rollout-vectorized.py
├── 03-test-vanilla-fetchreach.py
├── 04-test-randomized-mujoco-api.py
├── 05-test-randomized-mujoco-viz.py
├── 06-test-randomized-ergoreach.py
├── 07-test-mujoco-3dof-keyboard-control.py
├── 08-test-mujoco-4dof-keyboard-control.py
├── 09-test-mujoco-3dof-auto.py
├── 10-test-mujoco-3dof-ranges.py
├── 11-test-randomized-ergoreach-halfdisk.py
├── 12-test-randomized-ergoreach-backlash-halfdisk.py
├── 13-test-randomized-humanoid.py
├── 14-test-randomized-halfcheetah.py
└── __init__.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | ### Python template
3 |
4 | # Custom
5 | sftp-config*
6 |
7 | # Byte-compiled / optimized / DLL files
8 | __pycache__/
9 | *.py[cod]
10 | *$py.class
11 |
12 | # C extensions
13 | *.so
14 |
15 | # Distribution / packaging
16 | .Python
17 | build/
18 | develop-eggs/
19 | dist/
20 | downloads/
21 | eggs/
22 | .eggs/
23 | lib/
24 | lib64/
25 | parts/
26 | sdist/
27 | var/
28 | wheels/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | MANIFEST
33 |
34 | # PyInstaller
35 | # Usually these files are written by a python script from a template
36 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .coverage
48 | .coverage.*
49 | .cache
50 | nosetests.xml
51 | coverage.xml
52 | *.cover
53 | .hypothesis/
54 | .pytest_cache/
55 |
56 | # Translations
57 | *.mo
58 | *.pot
59 |
60 | # Django stuff:
61 | local_settings.py
62 | db.sqlite3
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # pyenv
81 | .python-version
82 |
83 | # celery beat schedule file
84 | celerybeat-schedule
85 |
86 | # SageMath parsed files
87 | *.sage.py
88 |
89 | # Environments
90 | .env
91 | .venv
92 | env/
93 | venv/
94 | ENV/
95 | env.bak/
96 | venv.bak/
97 |
98 | # Spyder project settings
99 | .spyderproject
100 | .spyproject
101 |
102 | # Rope project settings
103 | .ropeproject
104 |
105 | # mkdocs documentation
106 | /site
107 |
108 | # mypy
109 | .mypy_cache/
110 | .idea/
111 | diffsim.egg-info/
112 | results/real-robot/*.hdf5
113 | results/real-robot/*.hdf5.xz
114 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Active Domain Randomization
2 |
3 | [Active Domain Randomization](https://arxiv.org/abs/1904.04762) (ADR) is a new method for improved, zero-shot transfer of robotic reinforcement learning policies. Building upon traditional domain randomization, which uniformly samples the _randomization space_, we show that replacing this with an active search for difficult MDP instances improves generalization and robustness in the resulting policies.
4 |
5 |
6 | 
7 |
8 |
9 | Below is our code to reproduce the experiments in the paper. Please check out our [Domain Randomizer](https://github.com/montrealrobotics/domain-randomizer) repository if you're interested in a easy way to do domain randomization in parallel.
10 |
11 | ## Experiments
12 |
13 | ### Baseline Experiments
14 |
15 | #### Pure Baseline
16 |
17 | The most important flag here is `--initial-svpg-steps=1e6`, which will make sure that only randomized environments are proposed until that step limit is reached (it never will be). The environment names (`randomized-env-id`) handle the range of randomization - `LunarLanderDefault-v0` has a single-valued range, so a `randomize()` call will always result in the same, default environment. Likewise, `LunarLanderRandomized-v0` has the full randomization range (in one dimension).
18 |
19 | On the command line, specify an experiment type from `[lunar|pusher|ergo]` to get defaults for that experiment. You can find a detailed list of command line arguments in `experiments/args.py`.
20 |
21 | ```
22 | python -m experiments.domainrand.experiment_driver [lunar|pusher|ergo] \
23 | --experiment-name=unfreeze-policy --freeze-discriminator \
24 | --experiment-prefix="true-baseline" --agent-name=baseline --initial-svpg-steps=1e6 \
25 | --continuous-svpg --freeze-svpg --seed={SEED}
26 | ```
27 |
28 | #### Uniform Domain Randomization
29 |
30 | ```
31 | python -m experiments.domainrand.experiment_driver [lunar|pusher|ergo] \
32 | --experiment-name=unfreeze-policy --randomized-eval-env-id="[corresponding env ID]" \
33 | --experiment-prefix="fulldr" --agent-name=fulldr --initial-svpg-steps=1e6 \
34 | --continuous-svpg --freeze-svpg --seed={SEED}
35 | ```
36 |
37 | ### Active Domain Randomization
38 |
39 | ```
40 | python -m experiments.domainrand.experiment_driver [lunar|pusher|ergo] \
41 | --experiment-name=unfreeze-policy --load-discriminator --randomized-eval-env-id="[corresponding env ID]" \
42 | --freeze-discriminator --experiment-prefix="ours-agent-scratch" --seed={SEED}
43 | ```
44 |
45 | ## Reference
46 |
47 | ```
48 | @article{mehta2019adr,
49 | title={Active Domain Randomization},
50 | author={Mehta, Bhairav and Diaz, Manfred and Golemo, Florian and Pal, Christopher and Paull, Liam},
51 | url={https://arxiv.org/abs/1904.04762},
52 | year={2019}
53 | }
54 | ```
55 |
56 | Built by [@bhairavmehta95](https://bhairavmehta95.github.io), [@takeitallsource](https://github.com/takeitallsource), and [@fgolemo](https://github.com/fgolemo).
57 |
--------------------------------------------------------------------------------
/adr.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/adr.gif
--------------------------------------------------------------------------------
/common/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/__init__.py
--------------------------------------------------------------------------------
/common/agents/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/agents/__init__.py
--------------------------------------------------------------------------------
/common/agents/ddpg/__init__.py:
--------------------------------------------------------------------------------
1 | from .ddpg import Actor, Critic, DDPG
--------------------------------------------------------------------------------
/common/agents/ddpg/ddpg.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 | from common.models.actor_critic import Actor, Critic
7 |
8 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9 |
10 |
11 | class DDPG(object):
12 | def __init__(self, state_dim, action_dim, agent_name='baseline', max_action=1.):
13 | self.actor = Actor(state_dim, action_dim, max_action).to(device)
14 | self.actor_target = Actor(state_dim, action_dim, max_action).to(device)
15 | self.actor_target.load_state_dict(self.actor.state_dict())
16 | self.actor_optimizer = torch.optim.Adam(self.actor.parameters())
17 |
18 | self.critic = Critic(state_dim, action_dim).to(device)
19 | self.critic_target = Critic(state_dim, action_dim).to(device)
20 | self.critic_target.load_state_dict(self.critic.state_dict())
21 | self.critic_optimizer = torch.optim.Adam(self.critic.parameters())
22 |
23 | self.max_action = max_action
24 |
25 | self.agent_name = agent_name
26 |
27 | def select_action(self, state):
28 | state = torch.FloatTensor(state).to(device)
29 | return self.actor(state).cpu().data.numpy()
30 |
31 | def train(self, replay_buffer, iterations, batch_size=100, discount=0.99, tau=0.005):
32 | for it in range(iterations):
33 | # Sample replay buffer
34 | x, y, u, r, d = replay_buffer.sample(batch_size)
35 | state = torch.FloatTensor(x).to(device)
36 | action = torch.FloatTensor(u).to(device)
37 | next_state = torch.FloatTensor(y).to(device)
38 | done = torch.FloatTensor(1 - d).to(device)
39 | reward = torch.FloatTensor(r).to(device)
40 |
41 | # Compute the target Q value
42 | target_Q = self.critic_target(next_state, self.actor_target(next_state))
43 | target_Q = reward + (done * discount * target_Q).detach()
44 |
45 | # Get current Q estimate
46 | current_Q = self.critic(state, action)
47 |
48 | # Compute critic loss
49 | critic_loss = F.mse_loss(current_Q, target_Q)
50 |
51 | # Optimize the critic
52 | self.critic_optimizer.zero_grad()
53 | critic_loss.backward()
54 | self.critic_optimizer.step()
55 |
56 | # Compute actor loss
57 | actor_loss = -self.critic(state, self.actor(state)).mean()
58 |
59 | # Optimize the actor
60 | self.actor_optimizer.zero_grad()
61 | actor_loss.backward()
62 | self.actor_optimizer.step()
63 |
64 | # Update the frozen target models
65 | for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):
66 | target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)
67 |
68 | for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
69 | target_param.data.copy_(tau * param.data + (1 - tau) * target_param.data)
70 |
71 | def save(self, filename, directory):
72 | torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, filename))
73 | torch.save(self.critic.state_dict(), '%s/%s_critic.pth' % (directory, filename))
74 |
75 | def load(self, filename, directory):
76 | self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, filename), map_location=device))
77 | self.critic.load_state_dict(torch.load('%s/%s_critic.pth' % (directory, filename), map_location=device))
78 |
79 | # To ensure backwards compatibility D:
80 | def load_model(self):
81 | cur_dir = os.getcwd()
82 | actor_path = 'common/agents/ddpg/saved_model/{}_{}.pth'.format(self.agent_name, 'actor')
83 | critic_path = 'common/agents/ddpg/saved_model/{}_{}.pth'.format(self.agent_name, 'critic')
84 |
85 | self.actor.load_state_dict(torch.load(os.path.join(cur_dir, actor_path), map_location=device))
86 | self.critic.load_state_dict(torch.load(os.path.join(cur_dir, critic_path), map_location=device))
87 |
--------------------------------------------------------------------------------
/common/agents/ddpg/replay_buffer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | # Code based on:
4 | # https://github.com/openai/baselines/blob/master/baselines/deepq/replay_buffer.py
5 |
6 |
7 | # Simple replay buffer
8 | class ReplayBuffer(object):
9 | def __init__(self, max_size=1e6):
10 | self.storage = []
11 | self.max_size = int(max_size)
12 | self.next_idx = 0
13 |
14 | # Expects tuples of (state, next_state, action, reward, done)
15 | def add(self, data):
16 | if self.next_idx >= len(self.storage):
17 | self.storage.append(data)
18 | else:
19 | self.storage[self.next_idx] = data
20 |
21 | self.next_idx = (self.next_idx + 1) % self.max_size
22 |
23 | def sample(self, batch_size=100):
24 | ind = np.random.randint(0, len(self.storage), size=batch_size)
25 | x, y, u, r, d = [], [], [], [], []
26 |
27 | for i in ind:
28 | X, Y, U, R, D = self.storage[i]
29 | x.append(np.array(X, copy=False))
30 | y.append(np.array(Y, copy=False))
31 | u.append(np.array(U, copy=False))
32 | r.append(np.array(R, copy=False))
33 | d.append(np.array(D, copy=False))
34 |
35 | return np.array(x), np.array(y), np.array(u), np.array(r).reshape(-1, 1), np.array(d).reshape(-1, 1)
--------------------------------------------------------------------------------
/common/agents/ddpg_actor.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import logging
4 | from common.agents.ddpg import Actor
5 |
6 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7 |
8 |
9 | class DDPGActor(object):
10 | def __init__(self, state_dim, action_dim, max_action=1, agent_name="baseline", load_agent=True, model_path=None):
11 | self.actor = Actor(state_dim, action_dim, max_action).to(device)
12 | self.agent_name = agent_name
13 | self.model_path = model_path
14 |
15 | if load_agent:
16 | self._load()
17 |
18 | def select_action(self, state):
19 | state = torch.FloatTensor(state).to(device)
20 | return self.actor(state).cpu().data.numpy()
21 |
22 | def _load(self):
23 | if self.model_path is not None:
24 | logging.info('Loading DDPG from: {}'.format(self.model_path))
25 | self.actor.load_state_dict(torch.load(self.model_path, map_location=device))
26 | else:
27 | cur_dir = os.getcwd()
28 | full_path = os.path.join(cur_dir, 'saved-models/policy/baseline_actor.pth')
29 | self.actor.load_state_dict(torch.load(full_path, map_location=device))
30 |
--------------------------------------------------------------------------------
/common/discriminator/discriminator_rewarder.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import torch
4 | import torch.nn as nn
5 | from torch.autograd import Variable
6 |
7 | from common.models.discriminator import MLPDiscriminator
8 |
9 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10 |
11 |
12 | class DiscriminatorRewarder(object):
13 | def __init__(self, reference_env, randomized_env_id, discriminator_batchsz, reward_scale,
14 | load_discriminator, discriminator_lr=3e-3, add_pz=True):
15 | self.discriminator = MLPDiscriminator(
16 | state_dim=reference_env.observation_space.shape[0],
17 | action_dim=reference_env.action_space.shape[0]).to(device)
18 |
19 | self.discriminator_criterion = nn.BCELoss()
20 | self.discriminator_optimizer = torch.optim.Adam(self.discriminator.parameters(), lr=discriminator_lr)
21 | self.reward_scale = reward_scale
22 | self.batch_size = discriminator_batchsz
23 | self.add_pz = add_pz
24 |
25 | if load_discriminator:
26 | self._load_discriminator(randomized_env_id)
27 |
28 | def calculate_rewards(self, randomized_trajectory):
29 | """Discriminator based reward calculation
30 | We want to use the negative of the adversarial calculation (Normally, -log(D)). We want to *reward*
31 | our simulator for making it easier to discriminate between the reference env + randomized onea
32 | """
33 | score, _, _ = self.get_score(randomized_trajectory)
34 | reward = np.log(score)
35 |
36 | if self.add_pz:
37 | reward -= np.log(0.5)
38 |
39 | return self.reward_scale * reward
40 |
41 | def get_score(self, trajectory):
42 | """Discriminator based reward calculation
43 | We want to use the negative of the adversarial calculation (Normally, -log(D)). We want to *reward*
44 | our simulator for making it easier to discriminate between the reference env + randomized onea
45 | """
46 | traj_tensor = self._trajectory2tensor(trajectory).float()
47 |
48 | with torch.no_grad():
49 | score = (self.discriminator(traj_tensor).cpu().detach().numpy()+1e-8)
50 | return score.mean(), np.median(score), np.sum(score)
51 |
52 | def train_discriminator(self, reference_trajectory, randomized_trajectory, iterations):
53 | """Trains discriminator to distinguish between reference and randomized state action tuples
54 | """
55 | for _ in range(iterations):
56 | randind = np.random.randint(0, len(randomized_trajectory[0]), size=int(self.batch_size))
57 | refind = np.random.randint(0, len(reference_trajectory[0]), size=int(self.batch_size))
58 |
59 | randomized_batch = self._trajectory2tensor(randomized_trajectory[randind])
60 | reference_batch = self._trajectory2tensor(reference_trajectory[refind])
61 |
62 | g_o = self.discriminator(randomized_batch)
63 | e_o = self.discriminator(reference_batch)
64 |
65 | self.discriminator_optimizer.zero_grad()
66 |
67 | discrim_loss = self.discriminator_criterion(g_o, torch.ones((len(randomized_batch), 1), device=device)) + \
68 | self.discriminator_criterion(e_o, torch.zeros((len(reference_batch), 1), device=device))
69 | discrim_loss.backward()
70 |
71 | self.discriminator_optimizer.step()
72 |
73 | def _load_discriminator(self, name, path='saved-models/discriminator/discriminator_{}.pth'):
74 | self.discriminator.load_state_dict(torch.load(path.format(name), map_location=device))
75 |
76 | def _save_discriminator(self, name, path='saved-models/discriminator/discriminator_{}.pth'):
77 | torch.save(self.discriminator.state_dict(), path.format(name))
78 |
79 | def _trajectory2tensor(self, trajectory):
80 | return torch.from_numpy(trajectory).float().to(device)
81 |
--------------------------------------------------------------------------------
/common/envs/assets/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | MODEL_PATH = os.path.abspath(os.path.dirname(__file__))
4 |
--------------------------------------------------------------------------------
/common/envs/assets/fetch/reach.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/common/envs/assets/fetch/robot.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/common/envs/assets/fetch/shared.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/common/envs/assets/pusher_3dof.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
26 |
27 |
29 |
31 |
32 |
33 |
34 |
35 |
36 |
38 |
40 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
53 |
55 |
56 |
57 |
58 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/base_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/base_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/bellows_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/bellows_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/elbow_flex_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/elbow_flex_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/estop_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/estop_link.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/forearm_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/forearm_roll_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/gripper_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/gripper_link.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/head_pan_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/head_pan_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/head_tilt_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/head_tilt_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/l_wheel_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/l_wheel_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/laser_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/laser_link.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/r_wheel_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/r_wheel_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/shoulder_lift_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/shoulder_lift_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/shoulder_pan_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/shoulder_pan_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/torso_fixed_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/torso_fixed_link.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/torso_lift_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/torso_lift_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/upperarm_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/upperarm_roll_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/wrist_flex_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/wrist_flex_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/assets/stls/fetch/wrist_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/envs/assets/stls/fetch/wrist_roll_link_collision.stl
--------------------------------------------------------------------------------
/common/envs/config/ErgoReacherRandomized/default-4dof.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "ErgoReacherRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "joint0gain",
6 | "default": 1,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.0
9 | },
10 | {
11 | "name": "joint1gain",
12 | "default": 1,
13 | "multiplier_min": 1.0,
14 | "multiplier_max": 1.0
15 | },
16 | {
17 | "name": "joint2gain",
18 | "default": 1,
19 | "multiplier_min": 1.0,
20 | "multiplier_max": 1.0
21 | },
22 | {
23 | "name": "joint3gain",
24 | "default": 1,
25 | "multiplier_min": 1.0,
26 | "multiplier_max": 1.0
27 | },
28 | {
29 | "name": "joint0maxtorque",
30 | "default": 18,
31 | "multiplier_min": 1.0,
32 | "multiplier_max": 1.0
33 | },
34 | {
35 | "name": "joint1maxtorque",
36 | "default": 18,
37 | "multiplier_min": 1.0,
38 | "multiplier_max": 1.0
39 | },
40 | {
41 | "name": "joint2maxtorque",
42 | "default": 18,
43 | "multiplier_min": 1.0,
44 | "multiplier_max": 1.0
45 | },
46 | {
47 | "name": "joint3maxtorque",
48 | "default": 18,
49 | "multiplier_min": 1.0,
50 | "multiplier_max": 1.0
51 | }
52 | ]
53 | }
54 |
--------------------------------------------------------------------------------
/common/envs/config/ErgoReacherRandomized/default-6dof.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "ErgoReacherRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "joint0gain",
6 | "default": 1,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.0
9 | },
10 | {
11 | "name": "joint1gain",
12 | "default": 1,
13 | "multiplier_min": 1.0,
14 | "multiplier_max": 1.0
15 | },
16 | {
17 | "name": "joint2gain",
18 | "default": 1,
19 | "multiplier_min": 1.0,
20 | "multiplier_max": 1.0
21 | },
22 | {
23 | "name": "joint3gain",
24 | "default": 1,
25 | "multiplier_min": 1.0,
26 | "multiplier_max": 1.0
27 | },
28 | {
29 | "name": "joint4gain",
30 | "default": 1,
31 | "multiplier_min": 1.0,
32 | "multiplier_max": 1.0
33 | },
34 | {
35 | "name": "joint5gain",
36 | "default": 1,
37 | "multiplier_min": 1.0,
38 | "multiplier_max": 1.0
39 | },
40 | {
41 | "name": "joint0maxtorque",
42 | "default": 18,
43 | "multiplier_min": 1.0,
44 | "multiplier_max": 1.0
45 | },
46 | {
47 | "name": "joint1maxtorque",
48 | "default": 18,
49 | "multiplier_min": 1.0,
50 | "multiplier_max": 1.0
51 | },
52 | {
53 | "name": "joint2maxtorque",
54 | "default": 18,
55 | "multiplier_min": 1.0,
56 | "multiplier_max": 1.0
57 | },
58 | {
59 | "name": "joint3maxtorque",
60 | "default": 18,
61 | "multiplier_min": 1.0,
62 | "multiplier_max": 1.0
63 | },
64 | {
65 | "name": "joint4maxtorque",
66 | "default": 18,
67 | "multiplier_min": 1.0,
68 | "multiplier_max": 1.0
69 | },
70 | {
71 | "name": "joint5maxtorque",
72 | "default": 18,
73 | "multiplier_min": 1.0,
74 | "multiplier_max": 1.0
75 | }
76 | ]
77 | }
78 |
--------------------------------------------------------------------------------
/common/envs/config/ErgoReacherRandomized/easy-4dof.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "ErgoReacherRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "joint0gain",
6 | "default": 1.0,
7 | "multiplier_min": 0.3,
8 | "multiplier_max": 2.0
9 | },
10 | {
11 | "name": "joint1gain",
12 | "default": 1.0,
13 | "multiplier_min": 0.3,
14 | "multiplier_max": 2.0
15 | },
16 | {
17 | "name": "joint2gain",
18 | "default": 1.0,
19 | "multiplier_min": 0.3,
20 | "multiplier_max": 2.0
21 | },
22 | {
23 | "name": "joint3gain",
24 | "default": 1.0,
25 | "multiplier_min": 0.3,
26 | "multiplier_max": 2.0
27 | },
28 | {
29 | "name": "joint0maxtorque",
30 | "default": 5.0,
31 | "multiplier_min": 1.0,
32 | "multiplier_max": 4.0
33 | },
34 | {
35 | "name": "joint1maxtorque",
36 | "default": 5.0,
37 | "multiplier_min": 1.0,
38 | "multiplier_max": 4.0
39 | },
40 | {
41 | "name": "joint2maxtorque",
42 | "default": 5.0,
43 | "multiplier_min": 1.0,
44 | "multiplier_max": 4.0
45 | },
46 | {
47 | "name": "joint3maxtorque",
48 | "default": 5.0,
49 | "multiplier_min": 1.0,
50 | "multiplier_max": 4.0
51 | }
52 | ]
53 | }
54 |
--------------------------------------------------------------------------------
/common/envs/config/ErgoReacherRandomized/fulldr-4dof.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "ErgoReacherRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "joint0gain",
6 | "default": 1,
7 | "multiplier_min": 0.1,
8 | "multiplier_max": 100
9 | },
10 | {
11 | "name": "joint1gain",
12 | "default": 1,
13 | "multiplier_min": 0.1,
14 | "multiplier_max": 100
15 | },
16 | {
17 | "name": "joint2gain",
18 | "default": 1,
19 | "multiplier_min": 0.1,
20 | "multiplier_max": 100
21 | },
22 | {
23 | "name": "joint3gain",
24 | "default": 1,
25 | "multiplier_min": 0.1,
26 | "multiplier_max": 100
27 | },
28 | {
29 | "name": "joint0maxtorque",
30 | "default": 18,
31 | "multiplier_min": 0.2222,
32 | "multiplier_max": 2.7778
33 | },
34 | {
35 | "name": "joint1maxtorque",
36 | "default": 18,
37 | "multiplier_min": 0.2222,
38 | "multiplier_max": 2.7778
39 | },
40 | {
41 | "name": "joint2maxtorque",
42 | "default": 18,
43 | "multiplier_min": 0.2222,
44 | "multiplier_max": 2.7778
45 | },
46 | {
47 | "name": "joint3maxtorque",
48 | "default": 18,
49 | "multiplier_min": 0.2222,
50 | "multiplier_max": 2.7778
51 | }
52 | ]
53 | }
54 |
--------------------------------------------------------------------------------
/common/envs/config/ErgoReacherRandomized/fulldr-6dof.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "ErgoReacherRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "joint0gain",
6 | "default": 1,
7 | "multiplier_min": 0.1,
8 | "multiplier_max": 100
9 | },
10 | {
11 | "name": "joint1gain",
12 | "default": 1,
13 | "multiplier_min": 0.1,
14 | "multiplier_max": 100
15 | },
16 | {
17 | "name": "joint2gain",
18 | "default": 1,
19 | "multiplier_min": 0.1,
20 | "multiplier_max": 100
21 | },
22 | {
23 | "name": "joint3gain",
24 | "default": 1,
25 | "multiplier_min": 0.1,
26 | "multiplier_max": 100
27 | },
28 | {
29 | "name": "joint4gain",
30 | "default": 1,
31 | "multiplier_min": 0.1,
32 | "multiplier_max": 100
33 | },
34 | {
35 | "name": "joint5gain",
36 | "default": 1,
37 | "multiplier_min": 0.1,
38 | "multiplier_max": 100
39 | },
40 | {
41 | "name": "joint0maxtorque",
42 | "default": 18,
43 | "multiplier_min": 0.2222,
44 | "multiplier_max": 2.7778
45 | },
46 | {
47 | "name": "joint1maxtorque",
48 | "default": 18,
49 | "multiplier_min": 0.2222,
50 | "multiplier_max": 2.7778
51 | },
52 | {
53 | "name": "joint2maxtorque",
54 | "default": 18,
55 | "multiplier_min": 0.2222,
56 | "multiplier_max": 2.7778
57 | },
58 | {
59 | "name": "joint3maxtorque",
60 | "default": 18,
61 | "multiplier_min": 0.2222,
62 | "multiplier_max": 2.7778
63 | },
64 | {
65 | "name": "joint4maxtorque",
66 | "default": 18,
67 | "multiplier_min": 0.2222,
68 | "multiplier_max": 2.7778
69 | },
70 | {
71 | "name": "joint5maxtorque",
72 | "default": 18,
73 | "multiplier_min": 0.2222,
74 | "multiplier_max": 2.7778
75 | }
76 | ]
77 | }
78 |
--------------------------------------------------------------------------------
/common/envs/config/ErgoReacherRandomized/hard-4dof.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "ErgoReacherRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "joint0gain",
6 | "default": 0.2,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.0
9 | },
10 | {
11 | "name": "joint1gain",
12 | "default": 0.2,
13 | "multiplier_min": 1.0,
14 | "multiplier_max": 1.0
15 | },
16 | {
17 | "name": "joint2gain",
18 | "default": 0.2,
19 | "multiplier_min": 1.0,
20 | "multiplier_max": 1.0
21 | },
22 | {
23 | "name": "joint3gain",
24 | "default": 0.2,
25 | "multiplier_min": 1.0,
26 | "multiplier_max": 1.0
27 | },
28 | {
29 | "name": "joint0maxtorque",
30 | "default": 5.0,
31 | "multiplier_min": 1.0,
32 | "multiplier_max": 1.0
33 | },
34 | {
35 | "name": "joint1maxtorque",
36 | "default": 5.0,
37 | "multiplier_min": 1.0,
38 | "multiplier_max": 1.0
39 | },
40 | {
41 | "name": "joint2maxtorque",
42 | "default": 5.0,
43 | "multiplier_min": 1.0,
44 | "multiplier_max": 1.0
45 | },
46 | {
47 | "name": "joint3maxtorque",
48 | "default": 5.0,
49 | "multiplier_min": 1.0,
50 | "multiplier_max": 1.0
51 | }
52 | ]
53 | }
54 |
--------------------------------------------------------------------------------
/common/envs/config/ErgoReacherRandomizedBacklash/default-4dof.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "ErgoReacherRandomizedBacklash-v0",
3 | "dimensions": [
4 | {
5 | "name": "joint0gain",
6 | "default": 1000,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.0
9 | },
10 | {
11 | "name": "joint1gain",
12 | "default": 1000,
13 | "multiplier_min": 1.0,
14 | "multiplier_max": 1.0
15 | },
16 | {
17 | "name": "joint2gain",
18 | "default": 1000,
19 | "multiplier_min": 1.0,
20 | "multiplier_max": 1.0
21 | },
22 | {
23 | "name": "joint3gain",
24 | "default": 1000,
25 | "multiplier_min": 1.0,
26 | "multiplier_max": 1.0
27 | },
28 | {
29 | "name": "joint0backlash",
30 | "default": -2.30258509299,
31 | "multiplier_min": 1.0,
32 | "multiplier_max": 1.0
33 | },
34 | {
35 | "name": "joint1backlash",
36 | "default": -2.30258509299,
37 | "multiplier_min": 1.0,
38 | "multiplier_max": 1.0
39 | },
40 | {
41 | "name": "joint2backlash",
42 | "default": -2.30258509299,
43 | "multiplier_min": 1.0,
44 | "multiplier_max": 1.0
45 | },
46 | {
47 | "name": "joint3backlash",
48 | "default": -2.30258509299,
49 | "multiplier_min": 1.0,
50 | "multiplier_max": 1.0
51 | }
52 | ]
53 | }
54 |
--------------------------------------------------------------------------------
/common/envs/config/ErgoReacherRandomizedBacklash/fulldr-4dof.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "ErgoReacherRandomizedBacklash-v0",
3 | "dimensions": [
4 | {
5 | "name": "joint0gain",
6 | "default": 1000,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 7.0
9 | },
10 | {
11 | "name": "joint1gain",
12 | "default": 1000,
13 | "multiplier_min": 1.0,
14 | "multiplier_max": 7.0
15 | },
16 | {
17 | "name": "joint2gain",
18 | "default": 1000,
19 | "multiplier_min": 1.0,
20 | "multiplier_max": 7.0
21 | },
22 | {
23 | "name": "joint3gain",
24 | "default": 1000,
25 | "multiplier_min": 1.0,
26 | "multiplier_max": 7.0
27 | },
28 | {
29 | "name": "joint0backlash",
30 | "default": -2.302585,
31 | "multiplier_min": 4,
32 | "multiplier_max": 0
33 | },
34 | {
35 | "name": "joint1backlash",
36 | "default": -2.302585,
37 | "multiplier_min": 4,
38 | "multiplier_max": 0
39 | },
40 | {
41 | "name": "joint2backlash",
42 | "default": -2.302585,
43 | "multiplier_min": 4,
44 | "multiplier_max": 0
45 | },
46 | {
47 | "name": "joint3backlash",
48 | "default": -2.302585,
49 | "multiplier_min": 4,
50 | "multiplier_max": 0
51 | }
52 | ]
53 | }
54 |
--------------------------------------------------------------------------------
/common/envs/config/ErgoReacherRandomizedBacklash/fulldr-easy.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "ErgoReacherRandomizedBacklashEasy-v0",
3 | "dimensions": [
4 | {
5 | "name": "joint0gain",
6 | "default": 1000,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 5.0
9 | },
10 | {
11 | "name": "joint1gain",
12 | "default": 1000,
13 | "multiplier_min": 1.0,
14 | "multiplier_max": 5.0
15 | },
16 | {
17 | "name": "joint2gain",
18 | "default": 1000,
19 | "multiplier_min": 1.0,
20 | "multiplier_max": 5.0
21 | },
22 | {
23 | "name": "joint3gain",
24 | "default": 1000,
25 | "multiplier_min": 1.0,
26 | "multiplier_max": 5.0
27 | },
28 | {
29 | "name": "joint0backlash",
30 | "default": -0.91629,
31 | "multiplier_min": 7.5,
32 | "multiplier_max": 1.5
33 | },
34 | {
35 | "name": "joint1backlash",
36 | "default": -0.91629,
37 | "multiplier_min": 7.5,
38 | "multiplier_max": 1.5
39 | },
40 | {
41 | "name": "joint2backlash",
42 | "default": -0.91629,
43 | "multiplier_min": 7.5,
44 | "multiplier_max": 1.5
45 | },
46 | {
47 | "name": "joint3backlash",
48 | "default": -0.91629,
49 | "multiplier_min": 7.5,
50 | "multiplier_max": 1.5
51 | }
52 | ]
53 | }
54 |
--------------------------------------------------------------------------------
/common/envs/config/ErgoReacherRandomizedBacklash/fulldr-hard.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "ErgoReacherRandomizedBacklashHard-v0",
3 | "dimensions": [
4 | {
5 | "name": "joint0gain",
6 | "default": 1000,
7 | "multiplier_min": 5.0,
8 | "multiplier_max": 7.0
9 | },
10 | {
11 | "name": "joint1gain",
12 | "default": 1000,
13 | "multiplier_min": 5.0,
14 | "multiplier_max": 7.0
15 | },
16 | {
17 | "name": "joint2gain",
18 | "default": 1000,
19 | "multiplier_min": 5.0,
20 | "multiplier_max": 7.0
21 | },
22 | {
23 | "name": "joint3gain",
24 | "default": 1000,
25 | "multiplier_min": 5.0,
26 | "multiplier_max": 7.0
27 | },
28 | {
29 | "name": "joint0backlash",
30 | "default": -0.63147,
31 | "multiplier_min": 2.0,
32 | "multiplier_max": 1.5
33 | },
34 | {
35 | "name": "joint1backlash",
36 | "default": -0.63147,
37 | "multiplier_min": 2.0,
38 | "multiplier_max": 1.5
39 | },
40 | {
41 | "name": "joint2backlash",
42 | "default": -0.63147,
43 | "multiplier_min": 2.0,
44 | "multiplier_max": 1.5
45 | },
46 | {
47 | "name": "joint3backlash",
48 | "default": -0.63147,
49 | "multiplier_min": 2.0,
50 | "multiplier_max": 1.5
51 | }
52 | ]
53 | }
54 |
--------------------------------------------------------------------------------
/common/envs/config/HalfCheetahRandomized/default.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "HalfCheetahRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "foot",
6 | "default": 0.046,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.5
9 | },
10 | {
11 | "name": "shin",
12 | "default": 0.046,
13 | "multiplier_min": 1.0,
14 | "multiplier_max": 1.5
15 | },
16 | {
17 | "name": "thigh",
18 | "default": 0.046,
19 | "multiplier_min": 1.0,
20 | "multiplier_max": 1.5
21 | },
22 | {
23 | "name": "torso",
24 | "default": 0.046,
25 | "multiplier_min": 1.0,
26 | "multiplier_max": 1.5
27 | },
28 | {
29 | "name": "head",
30 | "default": 0.046,
31 | "multiplier_min": 1.0,
32 | "multiplier_max": 1.5
33 | }
34 | ],
35 | "geom_map" : {
36 | "foot": ["bfoot", "ffoot"],
37 | "shin": ["bshin", "fshin"],
38 | "thigh": ["bthigh", "fthigh"],
39 | "torso": ["torso"],
40 | "head": ["head"]
41 | },
42 | "suffixes": {
43 | "foot": "0.046",
44 | "shin": "0.046",
45 | "thigh": "0.046",
46 | "torso": "0.046",
47 | "head": "0.046"
48 | }
49 | }
--------------------------------------------------------------------------------
/common/envs/config/HumanoidRandomized/default.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "HumanoidRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "foot",
6 | "default": 0.075,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.5
9 | },
10 | {
11 | "name": "thigh",
12 | "default": 0.06,
13 | "multiplier_min": 1.0,
14 | "multiplier_max": 1.5
15 | },
16 | {
17 | "name": "shin",
18 | "default": 0.06,
19 | "multiplier_min": 1.0,
20 | "multiplier_max": 1.5
21 | },
22 | {
23 | "name": "torso",
24 | "default": 0.07,
25 | "multiplier_min": 1.0,
26 | "multiplier_max": 1.5
27 | },
28 | {
29 | "name": "head",
30 | "default": 0.09,
31 | "multiplier_min": 1.0,
32 | "multiplier_max": 1.5
33 | },
34 | {
35 | "name": "hand",
36 | "default": 0.04,
37 | "multiplier_min": 1.0,
38 | "multiplier_max": 1.5
39 | }
40 | ],
41 | "geom_map" : {
42 | "head": ["head"],
43 | "thigh": ["right_thigh1", "left_thigh1"],
44 | "shin": ["right_shin1", "left_shin1"],
45 | "foot": ["left_foot", "right_foot"],
46 | "hand": ["left_hand", "right_hand"],
47 | "torso": ["torso1"]
48 | }
49 | }
--------------------------------------------------------------------------------
/common/envs/config/LunarLanderRandomized/10.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "LunarLanderRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "main_engine",
6 | "default": 10.0,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.0
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/common/envs/config/LunarLanderRandomized/16.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "LunarLanderRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "main_engine",
6 | "default": 16.0,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.0
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/common/envs/config/LunarLanderRandomized/debug.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "LunarLanderRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "main_engine",
6 | "default": 11.0,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.0
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/common/envs/config/LunarLanderRandomized/default.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "LunarLanderRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "main_engine",
6 | "default": 13.0,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.0
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/common/envs/config/LunarLanderRandomized/random2D_820.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "LunarLanderRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "main_engine",
6 | "default": 13.0,
7 | "multiplier_min": 0.615,
8 | "multiplier_max": 1.538
9 | },
10 | {
11 | "name": "side_engine",
12 | "default": 0.6,
13 | "multiplier_min": 0.167,
14 | "multiplier_max": 3.333
15 | }
16 | ]
17 | }
--------------------------------------------------------------------------------
/common/envs/config/LunarLanderRandomized/random_1720.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "LunarLanderRandomized-Expert_0-v0",
3 | "dimensions": [
4 | {
5 | "name": "main_engine",
6 | "default": 13,
7 | "multiplier_min": 1.308,
8 | "multiplier_max": 1.538
9 | }
10 | ]
11 | }
--------------------------------------------------------------------------------
/common/envs/config/LunarLanderRandomized/random_620.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "LunarLanderRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "main_engine",
6 | "default": 13.0,
7 | "multiplier_min": 0.462,
8 | "multiplier_max": 1.538
9 | }
10 | ]
11 | }
--------------------------------------------------------------------------------
/common/envs/config/LunarLanderRandomized/random_811.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "LunarLanderRandomized-Expert_0-v0",
3 | "dimensions": [
4 | {
5 | "name": "main_engine",
6 | "default": 8,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.375
9 | }
10 | ]
11 | }
--------------------------------------------------------------------------------
/common/envs/config/LunarLanderRandomized/random_812.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "LunarLanderRandomized-Expert_0-v0",
3 | "dimensions": [
4 | {
5 | "name": "main_engine",
6 | "default": 8,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.5
9 | }
10 | ]
11 | }
--------------------------------------------------------------------------------
/common/envs/config/LunarLanderRandomized/random_813.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "LunarLanderRandomized-Expert_0-v0",
3 | "dimensions": [
4 | {
5 | "name": "main_engine",
6 | "default": 8,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.625
9 | }
10 | ]
11 | }
--------------------------------------------------------------------------------
/common/envs/config/LunarLanderRandomized/random_820.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "LunarLanderRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "main_engine",
6 | "default": 13.0,
7 | "multiplier_min": 0.615,
8 | "multiplier_max": 1.538
9 | }
10 | ]
11 | }
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFGeneralization/00.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFDefault-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 0.5,
8 | "multiplier_max": 0.666
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 0.833,
14 | "multiplier_max": 1.0
15 | }
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFGeneralization/01.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFDefault-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 0.666,
8 | "multiplier_max": 0.833
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 0.833,
14 | "multiplier_max": 1.0
15 | }
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFGeneralization/02.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFDefault-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 0.833,
8 | "multiplier_max": 1.0
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 0.833,
14 | "multiplier_max": 1.0
15 | }
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFGeneralization/10.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFDefault-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 0.5,
8 | "multiplier_max": 0.666
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 0.666,
14 | "multiplier_max": 0.833
15 | }
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFGeneralization/11.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFDefault-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 0.666,
8 | "multiplier_max": 0.833
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 0.666,
14 | "multiplier_max": 0.833
15 | }
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFGeneralization/12.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFDefault-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 0.833,
8 | "multiplier_max": 1.0
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 0.666,
14 | "multiplier_max": 0.833
15 | }
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFGeneralization/20.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFDefault-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 0.5,
8 | "multiplier_max": 0.666
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 0.5,
14 | "multiplier_max": 0.666
15 | }
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFGeneralization/21.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFDefault-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 0.666,
8 | "multiplier_max": 0.833
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 0.5,
14 | "multiplier_max": 0.666
15 | }
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFGeneralization/22.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFDefault-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 0.833,
8 | "multiplier_max": 1.0
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 0.5,
14 | "multiplier_max": 0.666
15 | }
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFRandomized/default.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFDefault-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 1.0,
8 | "multiplier_max": 1.0
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 1.0,
14 | "multiplier_max": 1.0
15 | }
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFRandomized/fulldr-easy.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFRandomizedEasy-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 0.666,
8 | "multiplier_max": 1.0
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 0.666,
14 | "multiplier_max": 1.0
15 | }
16 | ]
17 | }
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFRandomized/fulldr-toohard.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFDefault-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 0.05,
8 | "multiplier_max": 0.05
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 0.05,
14 | "multiplier_max": 0.05
15 | }
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFRandomized/fulldr.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFRandomized-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 0.4,
8 | "multiplier_max": 1.0
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 0.4,
14 | "multiplier_max": 1.0
15 | }
16 | ]
17 | }
--------------------------------------------------------------------------------
/common/envs/config/Pusher3DOFRandomized/hard.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "Pusher3DOFHard-v0",
3 | "dimensions": [
4 | {
5 | "name": "frictionloss",
6 | "default": 0.01,
7 | "multiplier_min": 0.4,
8 | "multiplier_max": 0.5
9 | },
10 | {
11 | "name": "damping",
12 | "default": 0.025,
13 | "multiplier_min": 0.4,
14 | "multiplier_max": 0.5
15 | }
16 | ]
17 | }
--------------------------------------------------------------------------------
/common/envs/config/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | CONFIG_PATH = os.path.abspath(os.path.dirname(__file__))
3 |
--------------------------------------------------------------------------------
/common/envs/dimension.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class Dimension(object):
5 | """Class which handles the machinery for doing BO over a particular dimensions
6 | """
7 | def __init__(self, default_value, seed, multiplier_min=0.0, multiplier_max=1.0, name=None):
8 | """Generates datapoints at specified discretization, and initializes BO
9 | """
10 | self.default_value = default_value
11 | self.current_value = default_value
12 | self.multiplier_min = multiplier_min
13 | self.multiplier_max = multiplier_max
14 | self.range_min = self.default_value * self.multiplier_min
15 | self.range_max = self.default_value * self.multiplier_max
16 | self.name = name
17 |
18 | # TODO: doesn't this change the random seed for all numpy uses?
19 | np.random.seed(seed)
20 |
21 | def _rescale(self, value):
22 | """Rescales normalized value to be within range of env. dimension
23 | """
24 | return self.range_min + (self.range_max - self.range_min) * value
25 |
26 | def randomize(self):
27 | self.current_value = np.random.uniform(low=self.range_min, high=self.range_max)
28 |
29 | def reset(self):
30 | self.current_value = self.default_value
31 |
32 | def set(self, value):
33 | self.current_value = value
34 |
35 |
--------------------------------------------------------------------------------
/common/envs/ergoreacher.py:
--------------------------------------------------------------------------------
1 | from gym_ergojr.envs import ErgoReacherEnv
2 | import numpy as np
3 |
4 |
5 | class ErgoReacherRandomizedEnv(ErgoReacherEnv):
6 | def __init__(self, **kwargs):
7 | self.dimensions = [] # this will be 8 elements long after wrapper init
8 | self.config_file = kwargs.get('config')
9 |
10 | del kwargs['config']
11 |
12 | super().__init__(**kwargs)
13 |
14 | # # these two are affected by the DR
15 | # self.max_force
16 | # self.max_vel
17 |
18 | def step(self, action):
19 | observation, reward, done, info = super().step(action)
20 | info = {'goal_dist': self.dist.query()}
21 | return observation, reward, False, info # I'm not crazy excited about the lack of early stopping, but alright
22 |
23 | def _update_randomized_params(self):
24 | # these are used automatically in the `step` function
25 | self.max_force = np.zeros(6, np.float32)
26 | self.max_vel = np.zeros(6, np.float32)
27 |
28 | if self.simple:
29 | self.max_force[[1, 2, 4, 5]] = [x.current_value for x in self.dimensions[:4]]
30 | self.max_vel[[1, 2, 4, 5]] = [x.current_value for x in self.dimensions[4:]]
31 | else:
32 | self.max_force[:] = [x.current_value for x in self.dimensions[:6]]
33 | self.max_vel[:] = [x.current_value for x in self.dimensions[6:]]
34 |
--------------------------------------------------------------------------------
/common/envs/ergoreacherbacklash.py:
--------------------------------------------------------------------------------
1 | from gym_ergojr.envs import ErgoReacherHeavyEnv
2 | import numpy as np
3 |
4 |
5 | class ErgoReacherRandomizedBacklashEnv(ErgoReacherHeavyEnv):
6 | def __init__(self, **kwargs):
7 | self.dimensions = [] # this will be 8 elements long after wrapper init
8 | self.config_file = kwargs.get('config')
9 |
10 | del kwargs['config']
11 |
12 | super().__init__(**kwargs)
13 |
14 | # # these three are affected by the DR
15 | # self.max_force
16 | # backlash + self.force_urdf_reload
17 |
18 | def step(self, action):
19 | observation, reward, done, info = super().step(action)
20 | info = {'goal_dist': self.dist.query()}
21 | return observation, reward, False, info
22 |
23 | def _update_randomized_params(self):
24 | # the self.max_force is used automatically in the step function,
25 | # but for the backlash to take effect, self.reset() has to be called
26 | self.max_force = np.zeros(6, np.float32)
27 | backlash = np.zeros(6, np.float32)
28 |
29 | if self.simple:
30 | self.max_force[[0, 3]] = [1000, 1000] # setting these to default
31 |
32 | self.max_force[[1, 2, 4, 5]] = [x.current_value for x in self.dimensions[:4]]
33 |
34 | # The values coming into the backlash from the JSON are from -2.302585*4 = -9.2103 to 0
35 | # ...so that when we do e^[-9.2103,0] we get [0.0001,1]
36 | backlash[[1, 2, 4, 5]] = [np.power(np.e, x.current_value) for x in self.dimensions[4:]]
37 | self.update_backlash(backlash)
38 | else:
39 | raise NotImplementedError("just ping me and I'll write this if need be")
40 | # reason I haven't written this yet is because
41 | # the 6dof+backlash task is wayyy too hard
42 |
43 | self.reset()
44 |
--------------------------------------------------------------------------------
/common/envs/fetch.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os.path as osp
3 | from gym.envs.robotics import rotations, robot_env, utils
4 | from gym.utils import EzPickle
5 | from common.envs.assets import MODEL_PATH
6 |
7 |
8 | def goal_distance(goal_a, goal_b):
9 | assert goal_a.shape == goal_b.shape
10 | return np.linalg.norm(goal_a - goal_b, axis=-1)
11 |
12 |
13 | class FetchEnv(robot_env.RobotEnv):
14 | """Superclass for all Fetch environments.
15 | """
16 |
17 | def __init__(
18 | self, model_path, n_substeps, gripper_extra_height, block_gripper,
19 | has_object, target_in_the_air, target_offset, obj_range, target_range,
20 | distance_threshold, initial_qpos, reward_type,
21 | ):
22 | """Initializes a new Fetch environment.
23 | Args:
24 | model_path (string): path to the environments XML file
25 | n_substeps (int): number of substeps the simulation runs on every call to step
26 | gripper_extra_height (float): additional height above the table when positioning the gripper
27 | block_gripper (boolean): whether or not the gripper is blocked (i.e. not movable) or not
28 | has_object (boolean): whether or not the environment has an object
29 | target_in_the_air (boolean): whether or not the target should be in the air above the table or on the table surface
30 | target_offset (float or array with 3 elements): offset of the target
31 | obj_range (float): range of a uniform distribution for sampling initial object positions
32 | target_range (float): range of a uniform distribution for sampling a target
33 | distance_threshold (float): the threshold after which a goal is considered achieved
34 | initial_qpos (dict): a dictionary of joint names and values that define the initial configuration
35 | reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense
36 | """
37 | self.gripper_extra_height = gripper_extra_height
38 | self.block_gripper = block_gripper
39 | self.has_object = has_object
40 | self.target_in_the_air = target_in_the_air
41 | self.target_offset = target_offset
42 | self.obj_range = obj_range
43 | self.target_range = target_range
44 | self.distance_threshold = distance_threshold
45 | self.reward_type = reward_type
46 |
47 | super(FetchEnv, self).__init__(
48 | model_path=model_path, n_substeps=n_substeps, n_actions=4,
49 | initial_qpos=initial_qpos)
50 |
51 | # GoalEnv methods
52 | # ----------------------------
53 |
54 | def compute_reward(self, achieved_goal, goal, info):
55 | # Compute distance between goal and the achieved goal.
56 | d = goal_distance(achieved_goal, goal)
57 | if self.reward_type == 'sparse':
58 | return -(d > self.distance_threshold).astype(np.float32)
59 | else:
60 | return -d
61 |
62 | # RobotEnv methods
63 | # ----------------------------
64 |
65 | def _step_callback(self):
66 | if self.block_gripper:
67 | self.sim.data.set_joint_qpos('robot0:l_gripper_finger_joint', 0.)
68 | self.sim.data.set_joint_qpos('robot0:r_gripper_finger_joint', 0.)
69 | self.sim.forward()
70 |
71 | def _set_action(self, action):
72 | assert action.shape == (4,)
73 | action = action.copy() # ensure that we don't change the action outside of this scope
74 | pos_ctrl, gripper_ctrl = action[:3], action[3]
75 |
76 | pos_ctrl *= 0.05 # limit maximum change in position
77 | rot_ctrl = [1., 0., 1., 0.] # fixed rotation of the end effector, expressed as a quaternion
78 | gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
79 | assert gripper_ctrl.shape == (2,)
80 | if self.block_gripper:
81 | gripper_ctrl = np.zeros_like(gripper_ctrl)
82 | action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
83 |
84 | # Apply action to simulation.
85 | utils.ctrl_set_action(self.sim, action)
86 | utils.mocap_set_action(self.sim, action)
87 |
88 | def _get_obs(self):
89 | # positions
90 | grip_pos = self.sim.data.get_site_xpos('robot0:grip')
91 | dt = self.sim.nsubsteps * self.sim.model.opt.timestep
92 | grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
93 | robot_qpos, robot_qvel = utils.robot_get_obs(self.sim)
94 | if self.has_object:
95 | object_pos = self.sim.data.get_site_xpos('object0')
96 | # rotations
97 | object_rot = rotations.mat2euler(self.sim.data.get_site_xmat('object0'))
98 | # velocities
99 | object_velp = self.sim.data.get_site_xvelp('object0') * dt
100 | object_velr = self.sim.data.get_site_xvelr('object0') * dt
101 | # gripper state
102 | object_rel_pos = object_pos - grip_pos
103 | object_velp -= grip_velp
104 | else:
105 | object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.zeros(0)
106 | gripper_state = robot_qpos[-2:]
107 | gripper_vel = robot_qvel[-2:] * dt # change to a scalar if the gripper is made symmetric
108 |
109 | if not self.has_object:
110 | achieved_goal = grip_pos.copy()
111 | else:
112 | achieved_goal = np.squeeze(object_pos.copy())
113 | obs = np.concatenate([
114 | grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(),
115 | object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel,
116 | ])
117 |
118 | return {
119 | 'observation': obs.copy(),
120 | 'achieved_goal': achieved_goal.copy(),
121 | 'desired_goal': self.goal.copy(),
122 | }
123 |
124 | def _viewer_setup(self):
125 | body_id = self.sim.model.body_name2id('robot0:gripper_link')
126 | lookat = self.sim.data.body_xpos[body_id]
127 | for idx, value in enumerate(lookat):
128 | self.viewer.cam.lookat[idx] = value
129 | self.viewer.cam.distance = 2.5
130 | self.viewer.cam.azimuth = 132.
131 | self.viewer.cam.elevation = -14.
132 |
133 | def _render_callback(self):
134 | # Visualize target.
135 | sites_offset = (self.sim.data.site_xpos - self.sim.model.site_pos).copy()
136 | site_id = self.sim.model.site_name2id('target0')
137 | self.sim.model.site_pos[site_id] = self.goal - sites_offset[0]
138 | self.sim.forward()
139 |
140 | def _reset_sim(self):
141 | self.sim.set_state(self.initial_state)
142 |
143 | # Randomize start position of object.
144 | if self.has_object:
145 | object_xpos = self.initial_gripper_xpos[:2]
146 | while np.linalg.norm(object_xpos - self.initial_gripper_xpos[:2]) < 0.1:
147 | object_xpos = self.initial_gripper_xpos[:2] + self.np_random.uniform(-self.obj_range, self.obj_range, size=2)
148 | object_qpos = self.sim.data.get_joint_qpos('object0:joint')
149 | assert object_qpos.shape == (7,)
150 | object_qpos[:2] = object_xpos
151 | self.sim.data.set_joint_qpos('object0:joint', object_qpos)
152 |
153 | self.sim.forward()
154 | return True
155 |
156 | def _sample_goal(self):
157 | if self.has_object:
158 | goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-self.target_range, self.target_range, size=3)
159 | goal += self.target_offset
160 | goal[2] = self.height_offset
161 | if self.target_in_the_air and self.np_random.uniform() < 0.5:
162 | goal[2] += self.np_random.uniform(0, 0.45)
163 | else:
164 | goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-0.15, 0.15, size=3)
165 | return goal.copy()
166 |
167 | def _is_success(self, achieved_goal, desired_goal):
168 | d = goal_distance(achieved_goal, desired_goal)
169 | return (d < self.distance_threshold).astype(np.float32)
170 |
171 | def _env_setup(self, initial_qpos):
172 | for name, value in initial_qpos.items():
173 | self.sim.data.set_joint_qpos(name, value)
174 | utils.reset_mocap_welds(self.sim)
175 | self.sim.forward()
176 |
177 | # Move end effector into position.
178 | gripper_target = np.array([-0.498, 0.005, -0.431 + self.gripper_extra_height]) + self.sim.data.get_site_xpos('robot0:grip')
179 | gripper_rotation = np.array([1., 0., 1., 0.])
180 | self.sim.data.set_mocap_pos('robot0:mocap', gripper_target)
181 | self.sim.data.set_mocap_quat('robot0:mocap', gripper_rotation)
182 | for _ in range(10):
183 | self.sim.step()
184 |
185 | # Extract information for sampling goals.
186 | self.initial_gripper_xpos = self.sim.data.get_site_xpos('robot0:grip').copy()
187 | if self.has_object:
188 | self.height_offset = self.sim.data.get_site_xpos('object0')[2]
189 |
190 |
191 | # Ensure we get the path separator correct on windows
192 | MODEL_XML_PATH = osp.join(MODEL_PATH, 'fetch', 'reach.xml')
193 |
194 |
195 | class FetchReachEnv(FetchEnv, EzPickle):
196 | def __init__(self, reward_type='sparse'):
197 | initial_qpos = {
198 | 'robot0:slide0': 0.4049,
199 | 'robot0:slide1': 0.48,
200 | 'robot0:slide2': 0.0,
201 | }
202 | FetchEnv.__init__(
203 | self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20,
204 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
205 | obj_range=0.15, target_range=0.15, distance_threshold=0.05,
206 | initial_qpos=initial_qpos, reward_type=reward_type)
207 | EzPickle.__init__(self)
--------------------------------------------------------------------------------
/common/envs/half_cheetah.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | from common.envs.randomized_locomotion import RandomizedLocomotionEnv
6 |
7 | class HalfCheetahRandomizedEnv(RandomizedLocomotionEnv):
8 | def __init__(self, **kwargs):
9 | RandomizedLocomotionEnv.__init__(self, **kwargs)
10 |
11 | def step(self, action):
12 | xposbefore = self.sim.data.qpos[0]
13 | self.do_simulation(action, self.frame_skip)
14 | xposafter = self.sim.data.qpos[0]
15 | ob = self._get_obs()
16 | reward_ctrl = - 0.1 * np.square(action).sum()
17 | reward_run = (xposafter - xposbefore)/self.dt
18 | reward = reward_ctrl + reward_run
19 | done = False
20 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
21 |
22 | def _get_obs(self):
23 | return np.concatenate([
24 | self.sim.data.qpos.flat[1:],
25 | self.sim.data.qvel.flat,
26 | ])
27 |
28 | def reset_model(self):
29 | qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
30 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
31 | self.set_state(qpos, qvel)
32 | return self._get_obs()
33 |
34 | def viewer_setup(self):
35 | self.viewer.cam.distance = self.model.stat.extent * 0.5
36 |
--------------------------------------------------------------------------------
/common/envs/humanoid.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | from gym import utils
5 | from gym.envs.mujoco import mujoco_env
6 | import xml.etree.ElementTree as et
7 |
8 | import mujoco_py
9 |
10 | from common.envs.randomized_locomotion import RandomizedLocomotionEnv
11 |
12 |
13 | def mass_center(model, sim):
14 | mass = np.expand_dims(model.body_mass, 1)
15 | xpos = sim.data.xipos
16 |
17 | return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
18 |
19 |
20 | # TODO: this class is not Thread-Safe
21 | class HumanoidRandomizedEnv(RandomizedLocomotionEnv):
22 | def __init__(self, **kwargs):
23 | RandomizedLocomotionEnv.__init__(self, **kwargs)
24 |
25 | def _get_obs(self):
26 | data = self.sim.data
27 | return np.concatenate([data.qpos.flat[2:],
28 | data.qvel.flat,
29 | data.cinert.flat,
30 | data.cvel.flat,
31 | data.qfrc_actuator.flat,
32 | data.cfrc_ext.flat])
33 |
34 | def step(self, a):
35 | pos_before = mass_center(self.model, self.sim)
36 | self.do_simulation(a, self.frame_skip)
37 | pos_after = mass_center(self.model, self.sim)
38 | alive_bonus = 5.0
39 | data = self.sim.data
40 | lin_vel_cost = 0.25 * (pos_after - pos_before) / self.model.opt.timestep
41 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
42 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
43 | quad_impact_cost = min(quad_impact_cost, 10)
44 | reward = lin_vel_cost - quad_ctrl_cost - quad_impact_cost + alive_bonus
45 | qpos = self.sim.data.qpos
46 | done = bool((qpos[2] < 1.0) or (qpos[2] > 2.0))
47 | return self._get_obs(), reward, done, dict(reward_linvel=lin_vel_cost, reward_quadctrl=-quad_ctrl_cost,
48 | reward_alive=alive_bonus, reward_impact=-quad_impact_cost)
49 |
50 | def reset_model(self):
51 | c = 0.01
52 | self.set_state(
53 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
54 | self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv, )
55 | )
56 | return self._get_obs()
57 |
58 | def viewer_setup(self):
59 | self.viewer.cam.trackbodyid = 1
60 | self.viewer.cam.distance = self.model.stat.extent * 1.0
61 | self.viewer.cam.lookat[2] = 2.0
62 |
63 | self.viewer.cam.elevation = -20
64 |
--------------------------------------------------------------------------------
/common/envs/pusher.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | from gym import utils
5 | from gym.envs.mujoco import mujoco_env
6 | import xml.etree.ElementTree as et
7 |
8 | import mujoco_py
9 |
10 | PLANE_LOCATION_Z = -0.325
11 |
12 |
13 | # TODO: this class is not Thread-Safe
14 | class PusherRandomizedEnv(mujoco_env.MujocoEnv, utils.EzPickle):
15 | def __init__(self, **kwargs):
16 | utils.EzPickle.__init__(self)
17 | mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', frame_skip=5)
18 |
19 | # randomization
20 | self.reference_path = os.path.join(os.path.dirname(mujoco_env.__file__), "assets", 'pusher.xml')
21 | self.reference_xml = et.parse(self.reference_path)
22 | self.config_file = kwargs.get('config')
23 | self.dimensions = []
24 | self._locate_randomize_parameters()
25 |
26 | def _locate_randomize_parameters(self):
27 | self.root = self.reference_xml.getroot()
28 | self.geom = self.root.find("./default/geom[@friction]")
29 | roll_link = self.root.find(".//body[@name='r_wrist_roll_link']")
30 | self.wrist = roll_link.findall("./geom[@type='capsule']")
31 | self.tips = roll_link.findall("./body[@name='tips_arm']/geom")
32 | self.object_body = self.root.find(".//body[@name='object']")
33 | self.object_body_geom = self.object_body.findall('./geom')
34 | self.goal_body = self.root.find(".//body[@name='goal']/geom")
35 |
36 | def _update_randomized_params(self):
37 | xml = self._create_xml()
38 | self._re_init(xml)
39 |
40 | def _re_init(self, xml):
41 | self.model = mujoco_py.load_model_from_xml(xml)
42 | self.sim = mujoco_py.MjSim(self.model)
43 | self.data = self.sim.data
44 | self.init_qpos = self.data.qpos.ravel().copy()
45 | self.init_qvel = self.data.qvel.ravel().copy()
46 | observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
47 | assert not done
48 | if self.viewer:
49 | self.viewer.update_sim(self.sim)
50 |
51 | def _create_xml(self):
52 | # TODO: I might speed this up, but I think is insignificant w.r.t to the model/sim creation...
53 | self._randomize_friction()
54 | self._randomize_density()
55 | self._randomize_size()
56 |
57 | return et.tostring(self.root, encoding='unicode', method='xml')
58 |
59 | # TODO: I'm making an assumption here that 3 places after the comma are good enough, are they?
60 | def _randomize_friction(self):
61 | self.geom.set('friction', '{:3f} 0.1 0.1'.format(self.dimensions[0].current_value))
62 |
63 | def _randomize_density(self):
64 | self.geom.set('density', '{:3f}'.format(self.dimensions[1].current_value))
65 |
66 | def _randomize_size(self):
67 | size = self.dimensions[2].current_value
68 |
69 | # grabber
70 | grabber_width = size * 2
71 | self.wrist[0].set('fromto', '0 -{:3f} 0. 0.0 +{:3f} 0'.format(grabber_width, grabber_width))
72 | self.wrist[1].set('fromto', '0 -{:3f} 0. {:3f} -{:3f} 0'.format(grabber_width, grabber_width, grabber_width))
73 | self.wrist[2].set('fromto', '0 +{:3f} 0. {:3f} +{:3f} 0'.format(grabber_width, grabber_width, grabber_width))
74 | self.tips[0].set('pos', '{:3f} -{:3f} 0.'.format(grabber_width, grabber_width))
75 | self.tips[1].set('pos', '{:3f} {:3f} 0.'.format(grabber_width, grabber_width))
76 |
77 | # object
78 | # self.object_body.set('pos', '0.45 -0.05 {:3f}'.format(PLANE_LOCATION_Z + size))
79 | # for geom in self.object_body_geom:
80 | # geom.set('size', "{:3f} {:3f} {:3f}".format(size, size, size))
81 |
82 | # goal
83 | # TODO: maybe a constant here? 1.6 is 0.08 / 0.05, the goal diam shrinks with the object diam
84 | # self.goal_body.set('size', "{:3f} 0.001 0.1".format(size * 1.6))
85 |
86 | def step(self, a):
87 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
88 | vec_2 = self.get_body_com("object") - self.get_body_com("goal")
89 |
90 | reward_near = - np.linalg.norm(vec_1)
91 | reward_dist = - np.linalg.norm(vec_2)
92 | reward_ctrl = - np.square(a).sum()
93 | reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
94 |
95 | self.do_simulation(a, self.frame_skip)
96 | ob = self._get_obs()
97 | done = False
98 | return ob, reward, done, dict(reward_dist=reward_dist,
99 | reward_ctrl=reward_ctrl)
100 |
101 | def viewer_setup(self):
102 | self.viewer.cam.trackbodyid = -1
103 | self.viewer.cam.distance = 4.0
104 |
105 | def reset_model(self):
106 | qpos = self.init_qpos
107 |
108 | self.goal_pos = np.asarray([0, 0])
109 | while True:
110 | self.cylinder_pos = np.concatenate([
111 | self.np_random.uniform(low=-0.3, high=0, size=1),
112 | self.np_random.uniform(low=-0.2, high=0.2, size=1)])
113 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
114 | break
115 |
116 | qpos[-4:-2] = self.cylinder_pos
117 | qpos[-2:] = self.goal_pos
118 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
119 | high=0.005, size=self.model.nv)
120 | qvel[-4:] = 0
121 | self.set_state(qpos, qvel)
122 | return self._get_obs()
123 |
124 | def _get_obs(self):
125 | return np.concatenate([
126 | self.sim.data.qpos.flat[:7],
127 | self.sim.data.qvel.flat[:7],
128 | self.get_body_com("tips_arm"),
129 | self.get_body_com("object"),
130 | self.get_body_com("goal"),
131 | ])
--------------------------------------------------------------------------------
/common/envs/pusher3dof.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import numpy as np
4 | from gym import utils
5 | from gym.envs.mujoco import mujoco_env
6 | import xml.etree.ElementTree as et
7 |
8 | import mujoco_py
9 |
10 |
11 | class PusherEnv3DofEnv(mujoco_env.MujocoEnv, utils.EzPickle):
12 | def __init__(self, **kwargs):
13 | utils.EzPickle.__init__(self)
14 | self.reference_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
15 | 'assets/pusher_3dof.xml')
16 | mujoco_env.MujocoEnv.__init__(self, self.reference_path, frame_skip=5)
17 |
18 | self.model.stat.extent = 10
19 |
20 | # randomization
21 | self.reference_xml = et.parse(self.reference_path)
22 | self.config_file = kwargs.get('config')
23 | self.dimensions = []
24 | self._locate_randomize_parameters()
25 |
26 | def _locate_randomize_parameters(self):
27 | self.root = self.reference_xml.getroot()
28 | end_effector = self.root.find(".//body[@name='distal_4']")
29 | self.wrist = end_effector.findall("./geom[@type='capsule']")
30 | self.tips = end_effector.findall(".//body[@name='tips_arm']/geom")
31 | self.object_body = self.root.find(".//body[@name='object']/geom")
32 | self.object_joints = self.root.findall(".//body[@name='object']/joint")
33 |
34 | def _update_randomized_params(self):
35 | xml = self._create_xml()
36 | self._re_init(xml)
37 |
38 | def _re_init(self, xml):
39 | self.model = mujoco_py.load_model_from_xml(xml)
40 | self.sim = mujoco_py.MjSim(self.model)
41 | self.data = self.sim.data
42 | self.init_qpos = self.data.qpos.ravel().copy()
43 | self.init_qvel = self.data.qvel.ravel().copy()
44 | observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
45 | assert not done
46 | if self.viewer:
47 | self.viewer.update_sim(self.sim)
48 |
49 | def _create_xml(self):
50 | # TODO: I might speed this up, but I think is insignificant w.r.t to the model/sim creation...
51 | self._randomize_friction()
52 | self._randomize_damping()
53 | # self._randomize_size()
54 |
55 | return et.tostring(self.root, encoding='unicode', method='xml')
56 |
57 | # TODO: I'm making an assumption here that 3 places after the comma are good enough, are they?
58 | def _randomize_friction(self):
59 | frictionloss = self.dimensions[0].current_value
60 |
61 | for joint in self.object_joints:
62 | joint.set('frictionloss', '{:3f}'.format(frictionloss))
63 |
64 | def _randomize_damping(self):
65 | damping = self.dimensions[1].current_value
66 | for joint in self.object_joints:
67 | joint.set('damping', '{:3f}'.format(damping))
68 |
69 | def _randomize_size(self):
70 | size = self.dimensions[2].current_value
71 | # grabber
72 | grabber_width = size * 2
73 | self.wrist[0].set('fromto', '0 -{:3f} 0. 0.0 +{:3f} 0'.format(grabber_width, grabber_width))
74 | self.wrist[1].set('fromto', '0 -{:3f} 0. {:3f} -{:3f} 0'.format(grabber_width, grabber_width, grabber_width))
75 | self.wrist[2].set('fromto', '0 +{:3f} 0. {:3f} +{:3f} 0'.format(grabber_width, grabber_width, grabber_width))
76 | self.tips[0].set('pos', '{:3f} -{:3f} 0.'.format(grabber_width, grabber_width))
77 | self.tips[1].set('pos', '{:3f} {:3f} 0.'.format(grabber_width, grabber_width))
78 |
79 | def step(self, action):
80 | arm_dist = np.linalg.norm(self.get_body_com("object")[:2] - self.get_body_com("tips_arm")[:2])
81 | goal_dist = np.linalg.norm(self.get_body_com("object")[:2] - self.get_body_com("goal")[:2])
82 |
83 | # Reward from Soft Q Learning
84 | action_cost = np.square(action).sum()
85 | reward = -0.1 * action_cost - goal_dist
86 |
87 | self.do_simulation(action, self.frame_skip)
88 | ob = self._get_obs()
89 | done = False
90 |
91 | return ob, reward, done, {'arm_dist': arm_dist, 'goal_dist': goal_dist}
92 |
93 | def viewer_setup(self):
94 | coords = [.7, -.5, 0]
95 | for i in range(3):
96 | self.viewer.cam.lookat[i] = coords[i]
97 | self.viewer.cam.trackbodyid = -1
98 | self.viewer.cam.distance = 2
99 |
100 | def reset_model(self):
101 | qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
102 |
103 | # Original
104 | # object_ = np.random.uniform(low=[.3,-1.0], high=[1.2,-0.4])
105 | # goal = np.random.uniform(low=[.8,-1.2], high=[1.2,-0.8])
106 |
107 | while True:
108 | # NOW RUNNING: "HARDER*"
109 | object_ = np.random.uniform(low=[.4,-1.0], high=[1.2,-0.5])
110 | # object_ = np.random.uniform(low=[.5,-1.0], high=[1.2,-0.6])
111 | goal = np.random.uniform(low=[.8,-1.2], high=[1.2,-0.8])
112 | if np.linalg.norm(object_ - goal) > 0.45:
113 | break
114 |
115 | self.object = np.array(object_)
116 | self.goal = np.array(goal)
117 |
118 | qpos[-4:-2] = self.object
119 | qpos[-2:] = self.goal
120 | qvel = self.init_qvel
121 | qvel[-4:] = 0
122 | self.set_state(qpos, qvel)
123 | return self._get_obs()
124 |
125 | def _get_obs(self):
126 | return np.concatenate([
127 | self.sim.data.qpos.flat[:-4],
128 | self.sim.data.qvel.flat[:-4],
129 | self.get_body_com("distal_4")[:2],
130 | self.get_body_com("object")[:2],
131 | self.get_body_com("goal")[:2],
132 | ])
133 |
--------------------------------------------------------------------------------
/common/envs/randomized_locomotion.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | import json
4 | import numpy as np
5 | from gym import utils
6 | from gym.envs.mujoco import mujoco_env
7 | import xml.etree.ElementTree as et
8 |
9 | import mujoco_py
10 |
11 |
12 | def mass_center(model, sim):
13 | mass = np.expand_dims(model.body_mass, 1)
14 | xpos = sim.data.xipos
15 |
16 | return (np.sum(mass * xpos, 0) / np.sum(mass))[0]
17 |
18 |
19 | # TODO: this class is not Thread-Safe
20 | class RandomizedLocomotionEnv(mujoco_env.MujocoEnv, utils.EzPickle):
21 | def __init__(self, **kwargs):
22 | utils.EzPickle.__init__(self)
23 | mujoco_env.MujocoEnv.__init__(self, kwargs.get('xml_name'), frame_skip=5)
24 |
25 | # randomization
26 | self.reference_path = os.path.join(os.path.dirname(mujoco_env.__file__), "assets", kwargs.get('xml_name'))
27 | self.reference_xml = et.parse(self.reference_path)
28 | self.config_file = kwargs.get('config')
29 | self.dimensions = []
30 | self.dimension_map = []
31 | self.suffixes = []
32 | self._locate_randomize_parameters()
33 |
34 | def _locate_randomize_parameters(self):
35 | self.root = self.reference_xml.getroot()
36 | with open(self.config_file, mode='r') as f:
37 | config = json.load(f)
38 |
39 | check_suffixes = config.get('suffixes', False)
40 |
41 | for entry in config['dimensions']:
42 | name = entry["name"]
43 | self.dimension_map.append([])
44 | for geom in config["geom_map"][name]:
45 | self.dimension_map[-1].append(self.root.find(".//geom[@name='{}']".format(geom)))
46 |
47 | if check_suffixes:
48 | suffix = config['suffixes'].get(name, "")
49 | self.suffixes.append(suffix)
50 | else:
51 | self.suffixes.append("")
52 |
53 | def _create_xml(self):
54 | for i, bodypart in enumerate(self.dimensions):
55 | for geom in self.dimension_map[i]:
56 | suffix = self.suffixes[i]
57 | value = "{:3f} {}".format(self.dimensions[i].current_value, suffix)
58 | geom.set('size', '{}'.format(value))
59 |
60 | return et.tostring(self.root, encoding='unicode', method='xml')
61 |
62 | def _update_randomized_params(self):
63 | xml = self._create_xml()
64 | self._re_init(xml)
65 |
66 | def _re_init(self, xml):
67 | self.model = mujoco_py.load_model_from_xml(xml)
68 | self.sim = mujoco_py.MjSim(self.model)
69 | self.data = self.sim.data
70 | self.init_qpos = self.data.qpos.ravel().copy()
71 | self.init_qvel = self.data.qvel.ravel().copy()
72 | observation, _reward, done, _info = self.step(np.zeros(self.model.nu))
73 | assert not done
74 | if self.viewer:
75 | self.viewer.update_sim(self.sim)
--------------------------------------------------------------------------------
/common/envs/randomized_vecenv.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import gym
4 | import numpy as np
5 | from multiprocessing import Process, Pipe
6 | from baselines.common.vec_env import VecEnv, CloudpickleWrapper
7 |
8 | from common.envs.wrappers import RandomizedEnvWrapper
9 |
10 | """File Description:
11 | Creates a vectorized environment with RandomizationEnvWrapper, which helps
12 | for fast / general Domain Randomization.
13 | The main thing to note here is unlike the OpenAI vectorized env,
14 | the step command does not automatically reset.
15 |
16 | We also provide simple helper functions to randomize environments
17 | """
18 |
19 | logger = logging.getLogger(__name__)
20 |
21 |
22 | def make_env(env_id, seed, rank):
23 | def _thunk():
24 | env = gym.make(env_id)
25 | env = RandomizedEnvWrapper(env, seed + rank)
26 |
27 | env.seed(seed + rank)
28 | obs_shape = env.observation_space.shape # TODO: is this something we can remove
29 |
30 | return env
31 |
32 | return _thunk
33 |
34 |
35 | def make_vec_envs(env_name, seed, num_processes):
36 | envs = [make_env(env_name, seed, i)
37 | for i in range(num_processes)]
38 | envs = RandomizedSubprocVecEnv(envs)
39 | return envs
40 |
41 |
42 | def worker(remote, parent_remote, env_fn_wrapper):
43 | parent_remote.close()
44 | env = env_fn_wrapper.x()
45 | try:
46 | while True:
47 | cmd, data = remote.recv()
48 | if cmd == 'step':
49 | ob, reward, done, info = env.step(data)
50 | remote.send((ob, reward, done, info))
51 | elif cmd == 'reset':
52 | ob = env.reset()
53 | remote.send(ob)
54 | elif cmd == 'render':
55 | remote.send(env.render(mode='rgb_array'))
56 | elif cmd == 'close':
57 | remote.close()
58 | break
59 | elif cmd == 'get_spaces':
60 | remote.send((env.observation_space, env.action_space, env.unwrapped.randomization_space))
61 | elif cmd == 'get_dimension_name':
62 | remote.send(env.unwrapped.dimensions[data].name)
63 | elif cmd == 'rescale_dimension':
64 | dimension = data[0]
65 | array = data[1]
66 | rescaled = env.unwrapped.dimensions[dimension]._rescale(array)
67 | remote.send(rescaled)
68 | elif cmd == 'randomize':
69 | randomized_val = data
70 | env.randomize(randomized_val)
71 | remote.send(None)
72 | elif cmd == 'get_current_randomization_values':
73 | values = []
74 | for dim in env.unwrapped.dimensions:
75 | values.append(dim.current_value)
76 |
77 | remote.send(values)
78 | else:
79 | raise NotImplementedError
80 | except KeyboardInterrupt:
81 | print('SubprocVecEnv worker: got KeyboardInterrupt')
82 | finally:
83 | env.close()
84 |
85 |
86 | class RandomizedSubprocVecEnv(VecEnv):
87 | """
88 | VecEnv that runs multiple environments in parallel in subproceses and communicates with them via pipes.
89 | Recommended to use when num_envs > 1 and step() can be a bottleneck.
90 | """
91 |
92 | # TODO: arg spaces is no longer used. Remove?
93 | def __init__(self, env_fns, spaces=None):
94 | """
95 | Arguments:
96 |
97 | env_fns: iterable of callables - functions that create environments to run in subprocesses. Need to be cloud-pickleable
98 | """
99 | self.waiting = False
100 | self.closed = False
101 | nenvs = len(env_fns)
102 | self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)])
103 | self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn)))
104 | for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)]
105 | for p in self.ps:
106 | p.daemon = True # if the main process crashes, we should not cause things to hang
107 | p.start()
108 | for remote in self.work_remotes:
109 | remote.close()
110 |
111 | self.remotes[0].send(('get_spaces', None))
112 | observation_space, action_space, randomization_space = self.remotes[0].recv()
113 | self.randomization_space = randomization_space
114 | self.viewer = None
115 | VecEnv.__init__(self, len(env_fns), observation_space, action_space)
116 |
117 | def step_async(self, actions):
118 | self._assert_not_closed()
119 | logger.debug('[step] => SENDING')
120 | for remote, action in zip(self.remotes, actions):
121 | remote.send(('step', action))
122 | logger.debug('[step] => SENT')
123 | self.waiting = True
124 |
125 | def step_wait(self):
126 | self._assert_not_closed()
127 | logger.debug('[step] => WAITING')
128 | results = [remote.recv() for remote in self.remotes]
129 | self.waiting = False
130 | logger.debug('[step] => DONE')
131 | obs, rews, dones, infos = zip(*results)
132 | return np.stack(obs), np.stack(rews), np.stack(dones), infos
133 |
134 | def randomize(self, randomized_values):
135 | self._assert_not_closed()
136 |
137 | logger.debug('[randomize] => SENDING')
138 | for remote, val in zip(self.remotes, randomized_values):
139 | remote.send(('randomize', val))
140 | results = [remote.recv() for remote in self.remotes] # TODO: why creating the array if you're not gonna use it
141 | logger.debug('[randomize] => SENT')
142 | self.waiting = False
143 |
144 | def get_current_params(self):
145 | logger.debug('[get_current_randomization_values] => SENDING')
146 | for remote in self.remotes:
147 | remote.send(('get_current_randomization_values', None))
148 | result = [remote.recv() for remote in self.remotes]
149 | logger.debug('[get_current_randomization_values] => SENT')
150 | return np.stack(result)
151 |
152 | def get_dimension_name(self, dimension):
153 | logger.debug('[get_dimension_name] => SENDING')
154 | self.remotes[0].send(('get_dimension_name', dimension))
155 | result = self.remotes[0].recv()
156 | logger.debug('[get_dimension_name] => SENT')
157 | return result
158 |
159 | def rescale(self, dimension, array):
160 | logger.debug('[rescale_dimension] => SENDING')
161 | data = (dimension, array)
162 | self.remotes[0].send(('rescale_dimension', data))
163 | result = self.remotes[0].recv()
164 | logger.debug('[rescale_dimension] => SENT')
165 | return result
166 |
167 | def reset(self):
168 | self._assert_not_closed()
169 | logger.debug('[reset] => SENDING')
170 | for remote in self.remotes:
171 | remote.send(('reset', None))
172 | result = [remote.recv() for remote in self.remotes]
173 | logger.debug('[reset] => SENT')
174 | return np.stack(result)
175 |
176 | def close(self):
177 | if self.closed:
178 | return
179 | if self.viewer is not None:
180 | self.viewer.close()
181 | self.close_extras()
182 | self.closed = True
183 |
184 | def close_extras(self):
185 | self.closed = True
186 | if self.waiting:
187 | for remote in self.remotes:
188 | remote.recv()
189 | for remote in self.remotes:
190 | remote.send(('close', None))
191 | for p in self.ps:
192 | p.join()
193 |
194 | def get_images(self):
195 | self._assert_not_closed()
196 | for pipe in self.remotes:
197 | pipe.send(('render', None))
198 | imgs = [pipe.recv() for pipe in self.remotes]
199 | return imgs
200 |
201 | def _assert_not_closed(self):
202 | assert not self.closed, "Trying to operate on a SubprocVecEnv after calling close()"
203 |
--------------------------------------------------------------------------------
/common/envs/wrappers.py:
--------------------------------------------------------------------------------
1 | from importlib import import_module
2 |
3 | import gym
4 | import json
5 | import numpy as np
6 |
7 | import gym.spaces as spaces
8 | import os.path as osp
9 |
10 | from enum import Enum
11 |
12 | from lxml import etree
13 | import numpy as np
14 |
15 | from common.envs.assets import MODEL_PATH
16 | from common.envs.dimension import Dimension
17 |
18 |
19 | class RandomizedEnvWrapper(gym.Wrapper):
20 | """Creates a randomization-enabled enviornment, which can change
21 | physics / simulation parameters without relaunching everything
22 | """
23 |
24 | def __init__(self, env, seed):
25 | super(RandomizedEnvWrapper, self).__init__(env)
26 | self.config_file = self.unwrapped.config_file
27 |
28 | self._load_randomization_dimensions(seed)
29 | self.unwrapped._update_randomized_params()
30 | self.randomized_default = ['random'] * len(self.unwrapped.dimensions)
31 |
32 | def _load_randomization_dimensions(self, seed):
33 | """ Helper function to load environment defaults ranges
34 | """
35 | self.unwrapped.dimensions = []
36 |
37 | with open(self.config_file, mode='r') as f:
38 | config = json.load(f)
39 |
40 | for dimension in config['dimensions']:
41 | self.unwrapped.dimensions.append(
42 | Dimension(
43 | default_value=dimension['default'],
44 | seed=seed,
45 | multiplier_min=dimension['multiplier_min'],
46 | multiplier_max=dimension['multiplier_max'],
47 | name=dimension['name']
48 | )
49 | )
50 |
51 | nrand = len(self.unwrapped.dimensions)
52 | self.unwrapped.randomization_space = spaces.Box(0, 1, shape=(nrand,), dtype=np.float32)
53 |
54 | # TODO: The default is not informative of the type of randomize_values
55 | # TODO: The .randomize API is counter intuitive...
56 | def randomize(self, randomized_values=-1):
57 | """Creates a randomized environment, using the dimension and value specified
58 | to randomize over
59 | """
60 | for dimension, randomized_value in enumerate(randomized_values):
61 | if randomized_value == 'default':
62 | self.unwrapped.dimensions[dimension].current_value = \
63 | self.unwrapped.dimensions[dimension].default_value
64 | elif randomized_value != 'random' and randomized_value != -1:
65 | assert 0.0 <= randomized_value <= 1.0, "using incorrect: {}".format(randomized_value)
66 | self.unwrapped.dimensions[dimension].current_value = \
67 | self.unwrapped.dimensions[dimension]._rescale(randomized_value)
68 | else: # random
69 | self.unwrapped.dimensions[dimension].randomize()
70 |
71 | self.unwrapped._update_randomized_params()
72 |
73 | def step(self, action):
74 | return self.env.step(action)
75 |
76 | def reset(self, **kwargs):
77 | return self.env.reset(**kwargs)
--------------------------------------------------------------------------------
/common/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/models/__init__.py
--------------------------------------------------------------------------------
/common/models/actor_critic.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class Actor(nn.Module):
7 | def __init__(self, state_dim, action_dim, max_action):
8 | super(Actor, self).__init__()
9 |
10 | self.l1 = nn.Linear(state_dim, 400)
11 | self.l2 = nn.Linear(400, 300)
12 | self.l3 = nn.Linear(300, action_dim)
13 |
14 | self.max_action = max_action
15 |
16 | def forward(self, x):
17 | x = F.relu(self.l1(x))
18 | x = F.relu(self.l2(x))
19 | x = self.max_action * torch.tanh(self.l3(x))
20 | return x
21 |
22 | class Critic(nn.Module):
23 | def __init__(self, state_dim, action_dim):
24 | super(Critic, self).__init__()
25 |
26 | self.l1 = nn.Linear(state_dim + action_dim, 400)
27 | self.l2 = nn.Linear(400, 300)
28 | self.l3 = nn.Linear(300, 1)
29 |
30 |
31 | def forward(self, x, u):
32 | x = F.relu(self.l1(torch.cat([x, u], 1)))
33 | x = F.relu(self.l2(x))
34 | x = self.l3(x)
35 | return x
36 |
--------------------------------------------------------------------------------
/common/models/discriminator.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 | class MLPDiscriminator(nn.Module):
5 | """Discriminator class based on Feedforward Network
6 | Input is a state-action-state' transition
7 | Output is probability that it was from a reference trajectory
8 | """
9 | def __init__(self, state_dim, action_dim):
10 | super(MLPDiscriminator, self).__init__()
11 |
12 | self.l1 = nn.Linear((state_dim + action_dim + state_dim), 128)
13 | self.l2 = nn.Linear(128, 128)
14 | self.logic = nn.Linear(128, 1)
15 |
16 | self.logic.weight.data.mul_(0.1)
17 | self.logic.bias.data.mul_(0.0)
18 |
19 | # Tuple of S-A-S'
20 | def forward(self, x):
21 | x = torch.tanh(self.l1(x))
22 | x = torch.tanh(self.l2(x))
23 | x = self.logic(x)
24 | return torch.sigmoid(x)
25 |
26 | class GAILMLPDiscriminator(nn.Module):
27 | """Discriminator class based on Feedforward Network
28 | Input is a state-action-state' transition
29 | Output is probability that it was from a reference trajectory
30 | """
31 | def __init__(self, state_dim, action_dim):
32 | super(GAILMLPDiscriminator, self).__init__()
33 | self.l1 = nn.Linear((state_dim + action_dim), 128)
34 | self.l2 = nn.Linear(128, 128)
35 | self.logic = nn.Linear(128, 1)
36 |
37 | self.logic.weight.data.mul_(0.1)
38 | self.logic.bias.data.mul_(0.0)
39 |
40 | # Tuple of S-A-S'
41 | def forward(self, x):
42 | x = torch.tanh(self.l1(x))
43 | x = torch.tanh(self.l2(x))
44 | x = self.logic(x)
45 | return torch.sigmoid(x)
46 |
47 |
48 | class LSTMDiscriminator(nn.Module):
49 | """Discriminator class based on Feedforward Network
50 | Input is a sequence of state-action-state' transitions
51 | Output is probability that it was from a reference trajectory
52 | """
53 | def __init__(self, state_dim, batch_size, hidden_dim):
54 | self.lstm = nn.LSTM(state_dim, hidden_dim, num_layers=1)
55 | self.state_dim = state_dim
56 |
57 | self.hidden_dim = hidden_dim
58 | self.hidden2out = nn.Linear(hidden_dim, output_size)
59 | self.hidden = self._init_hidden()
60 |
61 | def _init_hidden(self):
62 | return (Variable(torch.zeros(1, self.batch_size, self.hidden_dim)),
63 | Variable(torch.zeros(1, self.batch_size, self.hidden_dim)))
64 |
65 | def forward(self, trajectory):
66 | self.hidden = self._init_hidden()
67 |
68 | predictions, (ht, ct) = self.lstm(trajectory, self.hidden)
69 | output = self.hidden2out(ht[-1])
70 | return torch.sigmoid(output)
71 |
--------------------------------------------------------------------------------
/common/svpg/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/svpg/__init__.py
--------------------------------------------------------------------------------
/common/svpg/particles/__init__.py:
--------------------------------------------------------------------------------
1 | from .svpg_particle import SVPGParticle
--------------------------------------------------------------------------------
/common/svpg/particles/distributions.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from common.svpg.particles.utils import AddBias, init
6 |
7 | """
8 | Modify standard PyTorch distributions so they are compatible with this code.
9 | """
10 |
11 | #
12 | # Standardize distribution interfaces
13 | #
14 |
15 | # Categorical
16 | FixedCategorical = torch.distributions.Categorical
17 |
18 | old_sample = FixedCategorical.sample
19 | FixedCategorical.sample = lambda self: old_sample(self).unsqueeze(-1)
20 |
21 | log_prob_cat = FixedCategorical.log_prob
22 | FixedCategorical.log_probs = lambda self, actions: log_prob_cat(self, actions.squeeze(-1)).view(actions.size(0), -1).sum(-1).unsqueeze(-1)
23 |
24 | FixedCategorical.mode = lambda self: self.probs.argmax(dim=-1, keepdim=True)
25 |
26 |
27 | # Normal
28 | FixedNormal = torch.distributions.Normal
29 |
30 | log_prob_normal = FixedNormal.log_prob
31 | FixedNormal.log_probs = lambda self, actions: log_prob_normal(self, actions).sum(-1, keepdim=True)
32 |
33 | normal_entropy = FixedNormal.entropy
34 | FixedNormal.entropy = lambda self: normal_entropy(self).sum(-1)
35 |
36 | FixedNormal.mode = lambda self: self.mean
37 |
38 |
39 | class Categorical(nn.Module):
40 | def __init__(self, num_inputs, num_outputs):
41 | super(Categorical, self).__init__()
42 |
43 | init_ = lambda m: init(m,
44 | nn.init.orthogonal_,
45 | lambda x: nn.init.constant_(x, 0),
46 | gain=0.01)
47 |
48 | self.linear = init_(nn.Linear(num_inputs, num_outputs))
49 |
50 | def forward(self, x):
51 | x = self.linear(x)
52 | return FixedCategorical(logits=x)
53 |
54 |
55 | class DiagGaussian(nn.Module):
56 | def __init__(self, num_inputs, num_outputs):
57 | super(DiagGaussian, self).__init__()
58 |
59 | init_ = lambda m: init(m,
60 | nn.init.orthogonal_,
61 | lambda x: nn.init.constant_(x, 0))
62 |
63 | self.fc_mean = init_(nn.Linear(num_inputs, num_outputs))
64 | self.logstd = AddBias(torch.zeros(num_outputs))
65 |
66 | def forward(self, x):
67 | action_mean = self.fc_mean(x)
68 |
69 | # An ugly hack for my KFAC implementation.
70 | zeros = torch.zeros(action_mean.size())
71 | if x.is_cuda:
72 | zeros = zeros.cuda()
73 |
74 | action_logstd = self.logstd(zeros)
75 | return FixedNormal(action_mean, action_logstd.exp())
--------------------------------------------------------------------------------
/common/svpg/particles/svpg_particle.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | from .utils import orthogonal_init
6 | from .distributions import Categorical, DiagGaussian
7 |
8 |
9 | class SVPGParticleCritic(nn.Module):
10 | def __init__(self, input_dim, output_dim, hidden_dim):
11 | super(SVPGParticleCritic, self).__init__()
12 |
13 | self.critic = nn.Sequential(
14 | orthogonal_init(nn.Linear(input_dim, hidden_dim)),
15 | nn.Tanh(),
16 | orthogonal_init(nn.Linear(hidden_dim, hidden_dim)),
17 | nn.Tanh(),
18 | orthogonal_init(nn.Linear(hidden_dim, 1))
19 | )
20 |
21 | def forward(self, x):
22 | return self.critic(x)
23 |
24 | class SVPGParticleActorBase(nn.Module):
25 | def __init__(self, input_dim, hidden_dim):
26 | super(SVPGParticleActorBase, self).__init__()
27 |
28 | self.actor_hidden = nn.Sequential(
29 | orthogonal_init(nn.Linear(input_dim, hidden_dim)),
30 | nn.Tanh(),
31 | orthogonal_init(nn.Linear(hidden_dim, hidden_dim)),
32 | nn.Tanh(),
33 | )
34 |
35 | def forward(self, x):
36 | return self.actor_hidden(x)
37 |
38 |
39 | class SVPGParticle(nn.Module):
40 | """Implements a AC architecture for a Discrete Advantage
41 | Actor Critic Policy, used inside of SVPG
42 | """
43 | def __init__(self, input_dim, output_dim, hidden_dim, discrete, freeze=False):
44 | super(SVPGParticle, self).__init__()
45 |
46 | self.critic = SVPGParticleCritic(input_dim, output_dim, hidden_dim)
47 | self.actor_hidden = SVPGParticleActorBase(input_dim, hidden_dim)
48 |
49 | if discrete:
50 | self.dist = Categorical(hidden_dim, output_dim)
51 | else:
52 | self.dist = DiagGaussian(hidden_dim, output_dim)
53 |
54 | if freeze:
55 | self.freeze()
56 |
57 | self.reset()
58 |
59 | def forward(self, x):
60 | actor_hidden = self.actor_hidden(x)
61 | dist = self.dist(actor_hidden)
62 | value = self.critic(x)
63 |
64 | return dist, value
65 |
66 | def freeze(self):
67 | for param in self.critic.parameters():
68 | param.requires_grad = False
69 |
70 | for param in self.actor_hidden.parameters():
71 | param.requires_grad = False
72 |
73 | for param in self.dist.parameters():
74 | param.requires_grad = False
75 |
76 | def reset(self):
77 | self.saved_log_probs = []
78 | self.saved_klds = []
79 | self.rewards = []
--------------------------------------------------------------------------------
/common/svpg/particles/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch.nn as nn
3 |
4 |
5 | class AddBias(nn.Module):
6 | def __init__(self, bias):
7 | super(AddBias, self).__init__()
8 | self._bias = nn.Parameter(bias.unsqueeze(1))
9 |
10 | def forward(self, x):
11 | if x.dim() == 2:
12 | bias = self._bias.t().view(1, -1)
13 | else:
14 | bias = self._bias.t().view(1, -1, 1, 1)
15 |
16 | return x + bias
17 |
18 |
19 | def init(module, weight_init, bias_init, gain=1):
20 | weight_init(module.weight.data, gain=gain)
21 | bias_init(module.bias.data)
22 | return module
23 |
24 |
25 | orthogonal_init = lambda m: init(module=m,
26 | weight_init=nn.init.orthogonal_,
27 | bias_init=lambda x: nn.init.constant_(x, 0),
28 | gain=np.sqrt(2))
29 |
--------------------------------------------------------------------------------
/common/svpg/svpg_utils.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import numpy as np
3 | import os
4 |
5 |
6 | def parameters_to_vector(parameters, grad=False, both=False):
7 | """Convert parameters or/and their gradients to one vector
8 | Arguments:
9 | parameters (Iterable[Variable]): an iterator of Variables that are the
10 | parameters of a model.
11 | grad (bool): Vectorizes gradients if true, otherwise vectorizes params
12 | both (bool): If True, vectorizes both parameters and their gradients,
13 | `grad` has no effect in this case. Otherwise vectorizes parameters
14 | or gradients according to `grad`.
15 | Returns:
16 | The parameters or/and their gradients (each) represented by a single
17 | vector (th.Tensor, not Variable)
18 | """
19 | # Flag for the device where the parameter is located
20 | param_device = None
21 |
22 | if not both:
23 | vec = []
24 | if not grad:
25 | for param in parameters:
26 | # Ensure the parameters are located in the same device
27 | param_device = _check_param_device(param, param_device)
28 | vec.append(param.data.view(-1))
29 | else:
30 | for param in parameters:
31 | param_device = _check_param_device(param, param_device)
32 | vec.append(param.grad.data.view(-1))
33 | return torch.cat(vec)
34 | else:
35 | vec_params, vec_grads = [], []
36 | for param in parameters:
37 | param_device = _check_param_device(param, param_device)
38 | vec_params.append(param.data.view(-1))
39 | vec_grads.append(param.grad.data.view(-1))
40 | return torch.cat(vec_params), torch.cat(vec_grads)
41 |
42 | def vector_to_parameters(vec, parameters, grad=True):
43 | """Convert one vector to the parameters or gradients of the parameters
44 | Arguments:
45 | vec (torch.Tensor): a single vector represents the parameters of a model.
46 | parameters (Iterable[Variable]): an iterator of Variables that are the
47 | parameters of a model.
48 | grad (bool): True for assigning de-vectorized `vec` to gradients
49 | """
50 | # Ensure vec of type Variable
51 | if not isinstance(vec, torch.cuda.FloatTensor):
52 | raise TypeError('expected torch.Tensor, but got: {}'
53 | .format(torch.typename(vec)))
54 | # Flag for the device where the parameter is located
55 | param_device = None
56 |
57 | # Pointer for slicing the vector for each parameter
58 | pointer = 0
59 | if grad:
60 | for param in parameters:
61 | # Ensure the parameters are located in the same device
62 | param_device = _check_param_device(param, param_device)
63 | # The length of the parameter
64 | num_param = torch.prod(torch.LongTensor(list(param.size())))
65 | param.grad.data = vec[pointer:pointer + num_param].view(
66 | param.size())
67 | # Increment the pointer
68 | pointer += num_param
69 | else:
70 | for param in parameters:
71 | # Ensure the parameters are located in the same device
72 | param_device = _check_param_device(param, param_device)
73 | # The length of the parameter
74 | num_param = torch.prod(torch.LongTensor(list(param.size())))
75 | param.data = vec[pointer:pointer + num_param].view(
76 | param.size())
77 | # Increment the pointer
78 | pointer += num_param
79 |
80 |
81 | def _check_param_device(param, old_param_device):
82 | """This helper function is to check if the parameters are located
83 | in the same device. Currently, the conversion between model parameters
84 | and single vector form is not supported for multiple allocations,
85 | e.g. parameters in different GPUs, or mixture of CPU/GPU.
86 | Arguments:
87 | param ([Variable]): a Variable of a parameter of a model
88 | old_param_device (int): the device where the first parameter of a
89 | model is allocated.
90 | Returns:
91 | old_param_device (int): report device for the first time
92 | """
93 |
94 | # Meet the first parameter
95 | if old_param_device is None:
96 | old_param_device = param.get_device() if param.is_cuda else -1
97 | else:
98 | warn = False
99 | if param.is_cuda: # Check if in same GPU
100 | warn = (param.get_device() != old_param_device)
101 | else: # Check if in CPU
102 | warn = (old_param_device != -1)
103 | if warn:
104 | raise TypeError('Found two parameters on different devices, '
105 | 'this is currently not supported.')
106 | return old_param_device
--------------------------------------------------------------------------------
/common/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/common/utils/__init__.py
--------------------------------------------------------------------------------
/common/utils/plot_utils.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import matplotlib
3 | # matplotlib.use('Agg')
4 | import matplotlib.pyplot as plt
5 | import seaborn as sns
6 |
7 | # styling
8 | # plt.style.use('seaborn-dark')
9 | # matplotlib.rcParams.update({'font.size': 22})
10 |
11 | sns.set(font_scale=1.5)
12 | sns.set_style("whitegrid")
13 | COLORS = ["#95d0ff", "#966bff", "#ff6ad5", "#ffa58b", "#ff6a8b"]
14 |
15 |
16 | # sns.color_palette('bright', 6)
17 |
18 |
19 | def get_args():
20 | parser = argparse.ArgumentParser(description='Domain Randomization Driver')
21 | parser.add_argument('--environment', type=str,
22 | choices=['lunar', 'pusher', 'ergo', 'ergosix', 'lunar2', 'lunarbootstrap'])
23 | parser.add_argument('--filter', type=float)
24 | return parser.parse_args()
25 |
26 |
27 | def get_config(environment):
28 | if environment == 'lunar':
29 | return {
30 | 'metrics': ['ref_learning_curve_{}', 'hard_learning_curve_{}', 'rand_learning_curve_{}'],
31 | 'solved': 200,
32 | 'xlim': (7.5, 20.0),
33 | 'ylim': (0, 330),
34 | 'start_index': 0,
35 | 'environment': environment,
36 | # 'labels': ['baseline', 'UDR', 'oracle', 'ADR (ours)'],
37 | 'labels': ['Oracle', 'Baseline', 'UDR', 'ADR (ours)'],
38 | 'title': 'Generalization Results (LunarLander)',
39 | # 'title': 'Oracle vs. UDR (LunarLander)',
40 | 'dimensions': 1,
41 | 'colors': COLORS,
42 | 'legend_loc': 'lower right',
43 | 'x_label': 'Main Engine Strength (MES)',
44 | 'y_label': 'Average Reward'
45 | }
46 | elif environment == 'lunar2':
47 | return {
48 | 'metrics': ['ref_learning_curve_{}', 'hard_learning_curve_{}'],
49 | 'solved': 200,
50 | 'xlim': (7.5, 20.0),
51 | 'ylim': (-100, 330),
52 | 'start_index': 0,
53 | 'environment': environment,
54 | 'labels': ['$Baseline$', '$UDR$', '$ADR (ours)$'],
55 | 'title': ['Learning Curve (LL), Reference Env.', 'Learning Curve (LL), Hard Env.'],
56 | 'dimensions': 1,
57 | 'colors': [COLORS[1], COLORS[2], COLORS[0]],
58 | 'legend_loc': 'best',
59 | 'x_label': 'Main Engine Strength (MES)',
60 | 'y_label': 'Average Reward'
61 | }
62 | elif environment == 'lunarbootstrap':
63 | return {
64 | 'metrics': ['ref_learning_curve_{}'],
65 | 'solved': 200,
66 | 'xlim': (7.5, 11),
67 | 'ylim': (-150, 330),
68 | 'start_index': 0,
69 | 'environment': environment,
70 | 'labels': ['$ADR(boostrapped)$', '$ADR(original)$'],
71 | 'title': ['Bootstrapped ADR (LL)'],
72 | 'dimensions': 1,
73 | 'colors': [COLORS[1], COLORS[0]],
74 | 'legend_loc': 'lower right',
75 | 'x_label': 'Main Engine Strength (MES)',
76 | 'y_label': 'Average Reward'
77 | }
78 | elif environment == 'pusher':
79 | return {
80 | 'metrics': ['ref_final_dists_{}', 'hard_final_dists_{}'],
81 | 'solved': 0.35,
82 | 'xlim': (0, 1.0),
83 | 'ylim': (0.1, 0.7),
84 | 'start_index': 0,
85 | 'environment': environment,
86 | 'labels': ['$UDR$', '$ADR (ours)$'],
87 | 'title': ['Learning Curve (Pusher), Reference Env.', 'Learning Curve (Pusher), Hard Env.'],
88 | 'dimensions': 2,
89 | 'colors': [COLORS[2], COLORS[0]],
90 | 'legend_loc': 'upper right',
91 | 'x_label': 'Agent Timesteps',
92 | 'y_label': 'Average Final Distance to Goal'
93 | }
94 |
95 | elif environment == 'ergo':
96 | return {
97 | 'metrics': ['ref_final_dists_{}', 'hard_final_dists_{}'],
98 | 'solved': None,
99 | 'xlim': (0, 1.0),
100 | 'ylim': (0, 0.2),
101 | 'start_index': 0,
102 | 'environment': environment,
103 | 'labels': ['$UDR$', '$ADR (ours)$'],
104 | 'title': ['Learning Curve (Ergo), Reference Env.', 'Learning Curve (Ergo), Hard Env.'],
105 | 'dimensions': 8,
106 | 'colors': [COLORS[2], COLORS[0]],
107 | 'legend_loc': 'upper right',
108 | 'x_label': 'Agent Timesteps',
109 | 'y_label': 'Average Final Distance to Goal'
110 | }
111 |
112 |
113 | def gen_plot(config, file_path, data, title=None, learning_curve=False):
114 | plt.figure(figsize=(6, 5))
115 |
116 | plt.title(config['title'] if not title else title)
117 | plt.xlabel(config['x_label'])
118 | plt.ylabel(config['y_label'])
119 |
120 | plt.ylim(*config['ylim'])
121 | if config['solved']:
122 | # plt.axhline(config['solved'], color=COLORS[4], linestyle='--', label='$[Solved]$') # only for figure 1
123 | plt.axhline(config['solved'], color=COLORS[3], linestyle='--', label='$[Solved]$')
124 |
125 | # colors = config['colors'][::-1][1:] # only for figure 1
126 | colors = config['colors']
127 | for i, entry in enumerate(data):
128 | timesteps, averaged_curve, sigma, convergence = entry
129 | sns.lineplot(timesteps,
130 | averaged_curve,
131 | c=colors[i],
132 | label=config['labels'][i])
133 | if convergence is not None:
134 | plt.plot([timesteps[-1], timesteps[-1] + 0.5],
135 | [averaged_curve.values[-1], averaged_curve.values[-1]],
136 | color=colors[i],
137 | linestyle='--')
138 |
139 | plt.fill_between(x=timesteps,
140 | y1=averaged_curve + sigma,
141 | y2=averaged_curve - sigma,
142 | facecolor=colors[i],
143 | alpha=0.1)
144 | if learning_curve:
145 | plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0))
146 |
147 | plt.legend(loc=config['legend_loc'], frameon=True, framealpha=0.5)
148 | plt.grid(b=False)
149 |
150 | # plt.show()
151 |
152 | plt.savefig(fname=file_path,
153 | bbox_inches='tight',
154 | pad_inches=0)
155 | plt.close()
156 |
--------------------------------------------------------------------------------
/common/utils/policy_evaluator.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 | import numpy as np
4 | import gym
5 |
6 |
7 | class PolicyEvaluator:
8 | def __init__(self, env_id, seed, policy, eval_file_path):
9 | self.env = gym.make(env_id)
10 | self.env.seed(seed)
11 | self.policy = policy
12 | self.eval_file = open(eval_file_path, mode='w')
13 |
14 | def evaluate(self, iteration, episodes=10, debug=True):
15 |
16 | episodes_stats = []
17 | cumulative_reward = 0.0
18 |
19 | for _ in range(episodes):
20 | obs = self.env.reset()
21 |
22 | steps = 0
23 | total_reward = 0.0
24 | done = False
25 |
26 | while not done:
27 | action = self.policy.select_action(np.array(obs))
28 | obs, reward, done, _ = self.env.step(action)
29 |
30 | # stats
31 | steps += 1
32 | total_reward += reward
33 | cumulative_reward += reward
34 |
35 | if debug:
36 | self.env.render()
37 |
38 | episodes_stats.append({
39 | 'steps': steps,
40 | 'reward': total_reward
41 | })
42 |
43 | json.dump({
44 | 'iteration': iteration,
45 | 'reward': cumulative_reward,
46 | 'episodes': episodes,
47 | 'stats': episodes_stats
48 | }, self.eval_file, indent=2, sort_keys=True)
49 |
50 | self.eval_file.flush()
51 |
52 | self.env.close()
53 |
54 | return cumulative_reward / episodes
55 |
56 | def close(self):
57 | self.eval_file.close()
58 |
59 |
--------------------------------------------------------------------------------
/common/utils/rollout_evaluation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | LUNAR_LANDER_SOLVED_SCORE = 200.0
4 | ERGO_SOLVED_DISTANCE = 0.025
5 | PUSHER_SOLVED_DISTANCE = 0.25 # Radius=0.17
6 |
7 |
8 | def evaluate_policy(nagents, env, agent_policy, replay_buffer, eval_episodes, max_steps, freeze_agent=True,
9 | return_rewards=False, add_noise=False, log_distances=True,
10 | gail_rewarder=None, noise_scale=0.1, min_buffer_len=1000):
11 | """Evaluates a given policy in a particular environment,
12 | returns an array of rewards received from the evaluation step.
13 | """
14 |
15 | states = [[] for _ in range(nagents)]
16 | actions = [[] for _ in range(nagents)]
17 | next_states = [[] for _ in range(nagents)]
18 | rewards = [[] for _ in range(nagents)]
19 | ep_rewards = []
20 | final_dists = []
21 |
22 | for ep in range(eval_episodes):
23 | agent_total_rewards = np.zeros(nagents)
24 | state = env.reset()
25 |
26 | done = [False] * nagents
27 | add_to_buffer = [True] * nagents
28 | steps = 0
29 | training_iters = 0
30 |
31 | while not all(done) and steps <= max_steps:
32 | action = agent_policy.select_action(np.array(state))
33 |
34 | if add_noise:
35 | action = action + np.random.normal(0, noise_scale, size=action.shape)
36 | action = action.clip(-1, 1)
37 |
38 | next_state, reward, done, info = env.step(action)
39 | if gail_rewarder is not None:
40 | reward = gail_rewarder.get_reward(np.concatenate([state, action], axis=-1))
41 |
42 | for i, st in enumerate(state):
43 | if add_to_buffer[i]:
44 | states[i].append(st)
45 | actions[i].append(action[i])
46 | next_states[i].append(next_state[i])
47 | rewards[i].append(reward[i])
48 | agent_total_rewards[i] += reward[i]
49 | training_iters += 1
50 |
51 | if replay_buffer is not None:
52 | done_bool = 0 if steps + 1 == max_steps else float(done[i])
53 | replay_buffer.add((state[i], next_state[i], action[i], reward[i], done_bool))
54 |
55 | if done[i]:
56 | # Avoid duplicates
57 | add_to_buffer[i] = False
58 |
59 | if log_distances:
60 | final_dists.append(info[i]['goal_dist'])
61 |
62 | state = next_state
63 | steps += 1
64 |
65 | # Train for total number of env iterations
66 | if not freeze_agent and len(replay_buffer.storage) > min_buffer_len:
67 | agent_policy.train(replay_buffer=replay_buffer, iterations=training_iters)
68 |
69 | ep_rewards.append(agent_total_rewards)
70 |
71 | if return_rewards:
72 | return np.array(ep_rewards).flatten(), np.array(final_dists).flatten()
73 |
74 | trajectories = []
75 | for i in range(nagents):
76 | trajectories.append(np.concatenate(
77 | [
78 | np.array(states[i]),
79 | np.array(actions[i]),
80 | np.array(next_states[i])
81 | ], axis=-1))
82 |
83 | return trajectories
84 |
85 |
86 | def check_solved(env_name, criteria):
87 | if env_name.find('Lunar') != -1:
88 | return np.median(criteria) > LUNAR_LANDER_SOLVED_SCORE
89 | elif env_name.find('Ergo') != -1:
90 | return np.median(criteria) < ERGO_SOLVED_DISTANCE
91 | else:
92 | return np.median(criteria) < PUSHER_SOLVED_DISTANCE
93 |
94 |
95 | def check_new_best(env_name, new, current):
96 | if env_name.find('Lunar') != -1:
97 | return new > current
98 | else:
99 | return new < current
100 |
--------------------------------------------------------------------------------
/common/utils/sim_agent_helper.py:
--------------------------------------------------------------------------------
1 | from common.agents.svpg_simulator_agent import SVPGSimulatorAgent
2 |
3 | def generate_simulator_agent(args):
4 | return SVPGSimulatorAgent(
5 | reference_env_id=args.reference_env_id,
6 | randomized_env_id=args.randomized_env_id,
7 | randomized_eval_env_id=args.randomized_eval_env_id,
8 | agent_name=args.agent_name,
9 | nagents=args.nagents,
10 | nparams=args.nparams,
11 | temperature=args.temperature,
12 | svpg_rollout_length=args.svpg_rollout_length,
13 | svpg_horizon=args.svpg_horizon,
14 | max_step_length=args.max_step_length,
15 | reward_scale=args.reward_scale,
16 | initial_svpg_steps=args.initial_svpg_steps,
17 | max_env_timesteps=args.max_env_timesteps,
18 | episodes_per_instance=args.episodes_per_instance,
19 | discrete_svpg=args.discrete_svpg,
20 | load_discriminator=args.load_discriminator,
21 | freeze_discriminator=args.freeze_discriminator,
22 | freeze_agent=args.freeze_agent,
23 | seed=args.seed,
24 | particle_path=args.particle_path,
25 | )
--------------------------------------------------------------------------------
/experiments/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/experiments/__init__.py
--------------------------------------------------------------------------------
/experiments/domainrand/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/experiments/domainrand/__init__.py
--------------------------------------------------------------------------------
/experiments/domainrand/args.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import logging
3 |
4 | logger = logging.getLogger(__name__)
5 |
6 |
7 | def get_args():
8 | parser = argparse.ArgumentParser(description='Domain Randomization Driver')
9 |
10 | subparsers = parser.add_subparsers(help='sub-command help', dest='subparser_name')
11 |
12 | lunar_subparser = subparsers.add_parser('lunar', help='lunar lander subparser')
13 | pusher_subparser = subparsers.add_parser('pusher', help='puhser-3dof subparser')
14 | ergo_subparser = subparsers.add_parser('ergo', help='ergoreacher subparser')
15 | backlash_subparser = subparsers.add_parser('backlash', help='ergoreacher with backlash subparser')
16 | ergosix_subparser = subparsers.add_parser('ergosix', help='ergoreacher 6dpf subparser')
17 |
18 | lunar_subparser.add_argument("--randomized-env-id", default="LunarLanderDefault-v0",
19 | type=str, help="Name of the reference environment")
20 | lunar_subparser.add_argument("--reference-env-id", default="LunarLanderDefault-v0",
21 | type=str, help="Name of the randomized environment")
22 | lunar_subparser.add_argument("--randomized-eval-env-id", default="LunarLanderRandomized-v0",
23 | type=str, help="Name of the randomized environment")
24 | lunar_subparser.add_argument("--nparams", default=1, type=int, help="Number of randomization parameters")
25 | lunar_subparser.add_argument("--eval-randomization-discretization", default=50, type=int, help="number of eval points")
26 | lunar_subparser.add_argument("--max-env-timesteps", default=1000, type=int,
27 | help="environment timeout")
28 | lunar_subparser.add_argument("--plot-frequency", default=5, type=int, help="how often to plot / log")
29 | lunar_subparser.add_argument("--nagents", default=10, type=int,
30 | help="Number of SVPG particle")
31 |
32 | pusher_subparser.add_argument("--randomized-env-id", default="Pusher3DOFDefault-v0",
33 | type=str, help="Name of the reference environment")
34 | pusher_subparser.add_argument("--reference-env-id", default="Pusher3DOFDefault-v0",
35 | type=str, help="Name of the randomized environment")
36 | pusher_subparser.add_argument("--randomized-eval-env-id", default="Pusher3DOFRandomized-v0",
37 | type=str, help="Name of the randomized environment")
38 | pusher_subparser.add_argument("--nparams", default=2, type=int, help="Number of randomization parameters")
39 | pusher_subparser.add_argument("--eval-randomization-discretization", default=20, type=int, help="number of eval points")
40 | pusher_subparser.add_argument("--max-env-timesteps", default=100, type=int,
41 | help="environment timeout")
42 | pusher_subparser.add_argument("--plot-frequency", default=5, type=int, help="how often to plot / log")
43 | pusher_subparser.add_argument("--nagents", default=10, type=int,
44 | help="Number of SVPG particle")
45 |
46 | ergo_subparser.add_argument("--randomized-env-id", default="ErgoReacher4DOFDefault-v0",
47 | type=str, help="Name of the reference environment")
48 | ergo_subparser.add_argument("--reference-env-id", default="ErgoReacher4DOFDefault-v0",
49 | type=str, help="Name of the randomized environment")
50 | ergo_subparser.add_argument("--randomized-eval-env-id", default="ErgoReacher4DOFRandomizedEasy-v0",
51 | type=str, help="Name of the randomized environment")
52 | ergo_subparser.add_argument("--nparams", default=8, type=int, help="Number of randomization parameters")
53 | ergo_subparser.add_argument("--eval-randomization-discretization", default=5, type=int, help="number of eval points")
54 | ergo_subparser.add_argument("--max-env-timesteps", default=100, type=int,
55 | help="environment timeout")
56 | ergo_subparser.add_argument("--plot-frequency", default=50, type=int, help="how often to plot / log")
57 | ergo_subparser.add_argument("--nagents", default=10, type=int,
58 | help="Number of SVPG particle")
59 |
60 | backlash_subparser.add_argument("--randomized-env-id", default="ErgoReacherRandomizedBacklashEasy-v0",
61 | type=str, help="Name of the reference environment")
62 | backlash_subparser.add_argument("--reference-env-id", default="ErgoReacher-DualGoal-Easy-Default-Headless-v0",
63 | type=str, help="Name of the randomized environment")
64 | backlash_subparser.add_argument("--randomized-eval-env-id", default="ErgoReacherRandomizedBacklashEasy-v0",
65 | type=str, help="Name of the randomized environment")
66 | backlash_subparser.add_argument("--nparams", default=8, type=int, help="Number of randomization parameters")
67 | backlash_subparser.add_argument("--eval-randomization-discretization", default=20, type=int, help="number of eval points")
68 | backlash_subparser.add_argument("--max-env-timesteps", default=200, type=int,
69 | help="environment timeout")
70 | backlash_subparser.add_argument("--plot-frequency", default=50, type=int, help="how often to plot / log")
71 | backlash_subparser.add_argument("--nagents", default=10, type=int,
72 | help="Number of SVPG particle")
73 |
74 | ergosix_subparser.add_argument("--randomized-env-id", default="ErgoReacher-6Dof-Default-Headless-v0",
75 | type=str, help="Name of the reference environment")
76 | ergosix_subparser.add_argument("--reference-env-id", default="ErgoReacher-6Dof-Default-Headless-v0",
77 | type=str, help="Name of the randomized environment")
78 | ergosix_subparser.add_argument("--randomized-eval-env-id", default="ErgoReacher-6Dof-Randomized-Headless-v0",
79 | type=str, help="Name of the randomized environment")
80 | ergosix_subparser.add_argument("--nparams", default=12, type=int, help="Number of randomization parameters")
81 | ergosix_subparser.add_argument("--eval-randomization-discretization", default=20, type=int, help="number of eval points")
82 | ergosix_subparser.add_argument("--max-env-timesteps", default=100, type=int,
83 | help="environment timeout")
84 | ergosix_subparser.add_argument("--plot-frequency", default=5, type=int, help="how often to plot / log")
85 | ergosix_subparser.add_argument("--nagents", default=10, type=int,
86 | help="Number of SVPG particle")
87 |
88 | for subparser in [lunar_subparser, pusher_subparser, ergo_subparser, backlash_subparser, ergosix_subparser]:
89 | subparser.add_argument("--experiment-name", type=str,
90 | choices=['bootstrapping', 'unfreeze-policy'])
91 | subparser.add_argument("--experiment-prefix", default="experiment", type=str, help="Any custom string to attach")
92 | subparser.add_argument("--agent-name", default="baseline", type=str,
93 | help="Which Agent to benchmark")
94 | subparser.add_argument("--temperature", default=10.0, type=float,
95 | help="SVPG temperature")
96 | subparser.add_argument("--svpg-rollout-length", default=5, type=int,
97 | help="length of one svpg particle rollout")
98 | subparser.add_argument("--svpg-horizon", default=25, type=int,
99 | help="how often to fully reset svpg particles")
100 |
101 | subparser.add_argument("--max-step-length", default=0.05,
102 | type=float, help="step length / delta in parameters; If discrete, this is fixed, If continuous, this is max.")
103 |
104 | subparser.add_argument("--reward-scale", default=1.0, type=float,
105 | help="reward multipler for discriminator")
106 | subparser.add_argument("--initial-svpg-steps", default=0, type=float,
107 | help="number of svpg steps to take before updates")
108 | subparser.add_argument("--max-agent-timesteps", default=1e6, type=float,
109 | help="max iterations, counted in terms of AGENT env steps")
110 | subparser.add_argument("--episodes-per-instance", default=1, type=int,
111 | help="number of episodes to rollout the agent for per sim instance")
112 |
113 | subparser.add_argument("--kld-coefficient", default=0.00, type=float, help="kld coefficient for particles")
114 | subparser.add_argument("--discrete-svpg", action="store_true", help="discrete SVPG")
115 | subparser.add_argument("--continuous-svpg", action="store_true", help="continuous SVPG")
116 | subparser.add_argument("--save-particles", action="store_true", help="store the particle policies")
117 | subparser.add_argument("--particle-path", default="", type=str, help="where to load particles from")
118 | subparser.add_argument("--freeze-svpg", action="store_true", help="Freeze SVPG or not")
119 |
120 | subparser.add_argument("--pretrain-discriminator", help="pretrain discriminator or not")
121 | subparser.add_argument("--load-discriminator", action="store_true", help="load discriminator or not")
122 | subparser.add_argument("--load-agent", action="store_true", help="load an agent or not")
123 | subparser.add_argument("--freeze-discriminator", action="store_true", help="freeze discriminator (no training)")
124 | subparser.add_argument("--freeze-agent", action="store_true", help="freeze agent (no training)")
125 |
126 | subparser.add_argument("--seed", default=123, type=int)
127 | subparser.add_argument("--use-bootstrapping-results", action="store_true", help="where to look when running batch-reward-anaylsis")
128 |
129 | return parser.parse_args()
130 |
131 | def check_args(args):
132 | experiment_name = args.experiment_name
133 |
134 | assert args.nagents > 2, "TODO: Weird bug"
135 | assert args.discrete_svpg or args.continuous_svpg and not (args.discrete_svpg and args.continuous_svpg), "Specify continuous OR discrete"
136 |
137 | if experiment_name == 'batch-reward-anaylsis':
138 | assert args.load_agent
139 | assert args.episodes_per_instance >= 5, "Need to run atleast 5+ runs when doing reward plots"
140 | return
141 | elif experiment_name.find('reward') != -1:
142 | assert args.episodes_per_instance > 1, "Probably want more than just one eval_episode for evaluation?"
143 | elif experiment_name == 'bootstrapping':
144 | assert args.load_discriminator, "Need to load discriminator"
145 | assert args.freeze_agent == False, "Need to unfreeze agent"
146 |
147 | assert args.svpg_rollout_length < 25, "Rollout length likely too long - SVPG will likely need more frequent feedback"
148 | assert args.svpg_horizon > 10, "Horizon likely too short for consistency - might reset SVPG to random positions too frequently"
149 | assert args.episodes_per_instance > 0, "Must provide episodes_per_instance"
150 |
151 | if args.pretrain_discriminator:
152 | assert args.load_discriminator == True, "If pretraining, you should also load"
153 |
154 | if args.discrete_svpg:
155 | assert args.max_step_length < 0.1, "Step length for discrete_svpg too large"
156 |
157 | if args.initial_svpg_steps >= args.max_agent_timesteps:
158 | logger.warning("YOU WILL NOT TRAIN THE SVPG AGENT")
159 |
160 | if not args.freeze_discriminator and not args.load_discriminator:
161 | logger.warning("YOU ARE TRAINING THE DISCRIMINATOR FROM SCRATCH")
162 |
163 | if not args.load_agent:
164 | logger.warning("YOU ARE TRAINING THE AGENT POLICY FROM SCRATCH")
165 |
166 | if args.randomized_env_id == args.reference_env_id:
167 | logger.warning("REFERENCE AND RANDOMIZED IDs ARE SAME")
--------------------------------------------------------------------------------
/experiments/domainrand/batch_reward_analysis.py:
--------------------------------------------------------------------------------
1 | import matplotlib
2 | matplotlib.use('Agg')
3 |
4 | import re
5 | import os
6 | import glob
7 | import numpy as np
8 | import torch
9 | import gym
10 | import argparse
11 | import json
12 | import logging
13 |
14 | from itertools import combinations
15 |
16 | from common.utils.logging import setup_experiment_logs
17 |
18 | from experiments.domainrand.args import get_args, check_args
19 |
20 |
21 | def get_converged_modelpaths(paths):
22 | """
23 | Function to find the learning curves and best generalization curves for each seed
24 | """
25 |
26 | paper_path = paths['paper']
27 | agent_paths = os.listdir(paper_path)
28 |
29 | learning_curves_files = glob.glob(os.path.join(os.getcwd(), paper_path, 'learning-curves*.npz'))
30 | generalization_files = glob.glob(os.path.join(os.getcwd(), paper_path, 'best-generalization*.npz'))
31 |
32 | print(learning_curves_files)
33 |
34 | learning_curves_combinations = combinations(learning_curves_files, 5)
35 | generalization_combinations = combinations(generalization_files, 5)
36 |
37 | agent_name_start = paper_path.find('v0') + 3
38 | agent_name_end = paper_path.find('-exp')
39 |
40 | agent_name = paper_path[agent_name_start:agent_name_end]
41 |
42 | return agent_name, list(learning_curves_files), generalization_files
43 |
44 |
45 | if __name__ == '__main__':
46 | args = get_args()
47 | experiment_name = 'unfreeze-policy' if not args.use_bootstrapping_results else 'bootstrapping'
48 | paths = setup_experiment_logs(experiment_name=experiment_name, args=args)
49 | check_args(args, experiment_name=experiment_name)
50 |
51 | agent_name, learning_curves_files, generalization_files = get_converged_modelpaths(paths)
52 | nseeds = len(learning_curves_files)
53 |
54 | nmetrics = len(np.load(learning_curves_files[0]).files)
55 |
56 | # Learning curves
57 | # Find Max Length and resize each array to that length
58 |
59 | # for combination in combinations: for lc in combination
60 |
61 | # for i, learning_curves_files in enumerate(learning_curves_combinations):
62 | # print(i, learning_curves_files, '\n\n')
63 | max_length = 0
64 | for lc in learning_curves_files:
65 | loaded_curve = np.load(lc)['ref_learning_curve_mean']
66 | if loaded_curve.shape[0] > max_length:
67 | max_length = loaded_curve.shape[0]
68 |
69 | all_curves = np.zeros((nseeds, max_length))
70 | all_metrics = {}
71 |
72 | for metric in np.load(learning_curves_files[0]).files:
73 | all_metrics[metric] = np.copy(all_curves)
74 |
75 | # Load each seed's metric (5 - 9 per file)
76 | for seed, lc in enumerate(learning_curves_files):
77 | loaded_curve = np.load(lc)
78 | for metric in loaded_curve.files:
79 | # hacky "Broadcast" of array
80 | length = len(loaded_curve[metric])
81 | all_metrics[metric][seed][:length] = loaded_curve[metric]
82 | # If not same size, some will be 0s, do so we can use np.nanmean
83 | try:
84 | all_metrics[metric][seed][all_metrics[metric][seed] == 0] = np.nan
85 | except:
86 | pass
87 |
88 | all_metrics['label'] = np.array([agent_name])
89 |
90 | np.savez(os.path.join(paths['paper'],'{}-{}-batched-learning-curves.npz'.format(0, agent_name)), **all_metrics)
91 |
92 | # Generalization Curves
93 | loaded_curve = np.load(generalization_files[0])['generalization_metric']
94 | generalization_shape = loaded_curve.shape
95 |
96 | all_seeds_generalization = np.zeros((nseeds,) + generalization_shape)
97 |
98 | for seed, lc in enumerate(generalization_files):
99 | loaded_curve = np.load(lc)
100 | all_seeds_generalization[seed] = loaded_curve['generalization_metric']
101 |
102 | np.savez(os.path.join(paths['paper'],'{}-batched-generalizations.npz'.format(agent_name)),
103 | all_seeds_generalization=all_seeds_generalization)
104 |
--------------------------------------------------------------------------------
/experiments/domainrand/experiment_driver.py:
--------------------------------------------------------------------------------
1 | import matplotlib
2 | matplotlib.use('Agg')
3 |
4 | import random
5 | import logging
6 |
7 | import numpy as np
8 | import torch
9 | import gym
10 | import argparse
11 | import os
12 |
13 | from common.agents.ddpg.ddpg import DDPG
14 | from common.agents.ddpg_actor import DDPGActor
15 | from common.utils.visualization import Visualizer
16 | from common.utils.sim_agent_helper import generate_simulator_agent
17 | from common.utils.logging import setup_experiment_logs, reshow_hyperparameters, StatsLogger
18 |
19 | from experiments.domainrand.args import get_args, check_args
20 |
21 |
22 | if __name__ == '__main__':
23 | args = get_args()
24 | paths = setup_experiment_logs(args)
25 | check_args(args)
26 |
27 | torch.manual_seed(args.seed)
28 | torch.cuda.manual_seed(args.seed)
29 | np.random.seed(args.seed)
30 |
31 | stats_logger = StatsLogger(args)
32 | visualizer = Visualizer(randomized_env_id=args.randomized_eval_env_id, seed=args.seed)
33 |
34 | reference_env = gym.make(args.reference_env_id)
35 |
36 | if args.freeze_agent:
37 | # only need the actor
38 | agent_policy = DDPGActor(
39 | state_dim=reference_env.observation_space.shape[0],
40 | action_dim=reference_env.action_space.shape[0],
41 | agent_name=args.agent_name,
42 | load_agent=args.load_agent
43 | )
44 | else:
45 | agent_policy = DDPG(
46 | state_dim=reference_env.observation_space.shape[0],
47 | action_dim=reference_env.action_space.shape[0],
48 | agent_name=args.agent_name,
49 | )
50 |
51 | if args.load_agent:
52 | agent_policy.load_model()
53 |
54 |
55 | simulator_agent = generate_simulator_agent(args)
56 |
57 | svpg_timesteps = 0
58 |
59 | while simulator_agent.agent_timesteps < args.max_agent_timesteps:
60 | if svpg_timesteps % args.plot_frequency == 0:
61 | generalization_metric = visualizer.generate_ground_truth(simulator_agent, agent_policy, svpg_timesteps,
62 | log_path=paths['groundtruth_logs'])
63 |
64 | np.savez('{}/generalization-seed{}.npz'.format(paths['paper'], args.seed),
65 | generalization_metric=generalization_metric,
66 | svpg_timesteps=svpg_timesteps,
67 | learning_curve_timesteps=simulator_agent.agent_timesteps
68 | )
69 |
70 | visualizer.plot_reward(simulator_agent, agent_policy,
71 | svpg_timesteps, log_path=paths['policy_logs'], plot_path=paths['policy_plots'])
72 | visualizer.plot_value(simulator_agent, agent_policy,
73 | svpg_timesteps, log_path=paths['policy_logs'], plot_path=paths['policy_plots'])
74 | visualizer.plot_discriminator_reward(simulator_agent, agent_policy,
75 | svpg_timesteps, log_path=paths['policy_logs'], plot_path=paths['policy_plots'])
76 |
77 | if not args.freeze_svpg:
78 | visualizer.plot_sampling_frequency(simulator_agent, agent_policy,
79 | svpg_timesteps, log_path=paths['sampling_logs'], plot_path=paths['sampling_plots'])
80 |
81 | logging.info("SVPG TS: {}, Agent TS: {}".format(svpg_timesteps, simulator_agent.agent_timesteps))
82 |
83 | solved, info = simulator_agent.select_action(agent_policy)
84 | svpg_timesteps += 1
85 |
86 | if info is not None:
87 | new_best = stats_logger.update(args, paths, info)
88 |
89 | if new_best:
90 | agent_policy.save(filename='best-seed{}'.format(args.seed), directory=paths['paper'])
91 | if args.save_particles:
92 | simulator_agent.svpg.save(directory=paths['particles'])
93 |
94 | generalization_metric = visualizer.generate_ground_truth(simulator_agent, agent_policy, svpg_timesteps,
95 | log_path=paths['groundtruth_logs'])
96 |
97 | np.savez('{}/best-generalization-seed{}.npz'.format(paths['paper'], args.seed),
98 | generalization_metric=generalization_metric,
99 | svpg_timesteps=svpg_timesteps,
100 | learning_curve_timesteps=simulator_agent.agent_timesteps
101 | )
102 |
103 | if solved:
104 | logging.info("[SOLVED]")
105 |
106 | agent_policy.save(filename='final-seed{}'.format(args.seed), directory=paths['paper'])
107 | visualizer.plot_reward(simulator_agent, agent_policy,
108 | svpg_timesteps, log_path=paths['policy_logs'], plot_path=paths['policy_plots'])
109 | visualizer.plot_sampling_frequency(simulator_agent, agent_policy,
110 | svpg_timesteps, log_path=paths['sampling_logs'], plot_path=paths['sampling_plots'])
111 | reshow_hyperparameters(args, paths)
112 |
--------------------------------------------------------------------------------
/experiments/domainrand/pusher_grid_generalization.py:
--------------------------------------------------------------------------------
1 | import matplotlib
2 | matplotlib.use('Agg')
3 |
4 | import random
5 | import logging
6 |
7 | import numpy as np
8 | import torch
9 | import gym
10 | import argparse
11 | import os
12 | import glob
13 | import json
14 |
15 | from common.agents.ddpg.ddpg import DDPG
16 | from common.agents.ddpg_actor import DDPGActor
17 | from common.agents.svpg_simulator_agent import SVPGSimulatorAgent
18 | from common.envs import *
19 | from common.utils.visualization import Visualizer
20 | from common.utils.logging import setup_experiment_logs, reshow_hyperparameters
21 |
22 | from experiments.domainrand.args import get_args, check_args
23 |
24 | from common.utils.rollout_evaluation import evaluate_policy
25 | from common.envs.randomized_vecenv import make_vec_envs
26 |
27 | NEVAL_EPISODES = 10
28 | N_PROCESSES = 5
29 | N_SEEDS = 5
30 |
31 | if __name__ == '__main__':
32 | args = get_args()
33 | paths = setup_experiment_logs(experiment_name='unfreeze-policy', args=args)
34 | check_args(args, experiment_name='unfreeze-policy')
35 | reference_env = gym.make(args.reference_env_id)
36 |
37 | torch.manual_seed(args.seed)
38 | torch.cuda.manual_seed(args.seed)
39 | np.random.seed(args.seed)
40 |
41 | environment_prototype = 'Pusher3DOFGeneralization{}{}-v0'
42 |
43 | rewards_grid = np.zeros((3, 3, 5, NEVAL_EPISODES))
44 | finaldists_grid = np.zeros((3, 3, 5, NEVAL_EPISODES))
45 |
46 | for i in range(3):
47 | for j in range(3):
48 | randomized_env = make_vec_envs(environment_prototype.format(i, j), args.seed + i + j, N_PROCESSES)
49 | actor_paths = glob.glob(os.path.join(os.getcwd(), paths['paper'], 'best-seed*_actor.pth'))
50 | print(actor_paths)
51 | for actor_idx, actor_path in enumerate(actor_paths):
52 | agent_policy = DDPGActor(
53 | state_dim=reference_env.observation_space.shape[0],
54 | action_dim=reference_env.action_space.shape[0],
55 | agent_name=args.agent_name,
56 | load_agent=True,
57 | model_path=actor_path
58 | )
59 |
60 | rewards_rand, dist_rand = evaluate_policy(nagents=N_PROCESSES,
61 | env=randomized_env,
62 | agent_policy=agent_policy,
63 | replay_buffer=None,
64 | eval_episodes=NEVAL_EPISODES // N_PROCESSES,
65 | max_steps=args.max_env_timesteps,
66 | return_rewards=True,
67 | add_noise=False,
68 | log_distances=True)
69 |
70 | rewards_grid[i, j, actor_idx, :] = rewards_rand
71 | finaldists_grid[i, j, actor_idx, :] = dist_rand
72 |
73 | reshow_hyperparameters(args, paths)
74 | print(finaldists_grid)
75 |
76 | np.savez(os.path.join(paths['paper'], 'grid_generalization.npz'),
77 | rewards_grid=rewards_grid,
78 | finaldists_grid=finaldists_grid
79 | )
80 |
--------------------------------------------------------------------------------
/real_robot.py:
--------------------------------------------------------------------------------
1 | import h5py
2 | import matplotlib
3 |
4 | matplotlib.use('Agg')
5 |
6 | import random
7 | import logging
8 |
9 | import time
10 | import numpy as np
11 | import torch
12 | import gym
13 | import argparse
14 | import os
15 | import os.path as osp
16 |
17 | from tqdm import tqdm, trange
18 |
19 | from common.agents.ddpg_actor import DDPGActor
20 | import poppy_helpers
21 | import gym_ergojr
22 | import cv2
23 |
24 | parser = argparse.ArgumentParser(description='Real Robot Experiment Driver')
25 |
26 | parser.add_argument('--nepisodes', type=int, default=25, help='Number of trials per *seed*')
27 | parser.add_argument('--experiment-prefix', type=str, default='real', help='Prefix to append to logs')
28 | parser.add_argument('--log-dir', type=str, default='results/real-robot', help='Log Directory Prefix')
29 | parser.add_argument('--model-dir', type=str, default='saved-models/real-robot', help='Model Directory Prefix')
30 |
31 | args = parser.parse_args()
32 |
33 | TIMESTAMP = time.strftime("%y%m%d-%H%M%S")
34 | MAX_EPISODE_STEPS = 100
35 | EPISODES = args.nepisodes
36 |
37 | # Policies to look for
38 | policies = ['baseline', 'usdr', 'adr']
39 |
40 | env = gym.make('ErgoReacher-Live-v1')
41 | # env = gym.make('ErgoReacher-Graphical-Simple-Halfdisk-v1')
42 |
43 | npa = np.array
44 |
45 | img_buffer = []
46 |
47 | if not osp.exists(args.log_dir):
48 | os.makedirs(args.log_dir)
49 |
50 | with h5py.File("{}/{}-{}.hdf5".format(args.log_dir, args.experiment_prefix, TIMESTAMP), "w") as f:
51 | for policy_type in tqdm(policies):
52 | log_group = f.create_group(policy_type)
53 | model_path = osp.join(args.model_dir, policy_type)
54 |
55 | no_models = len(os.listdir(model_path))
56 |
57 | rewards = log_group.create_dataset("rewards", (no_models, EPISODES, MAX_EPISODE_STEPS), dtype=np.float32)
58 | distances = log_group.create_dataset("distances", (no_models, EPISODES, MAX_EPISODE_STEPS), dtype=np.float32)
59 | trajectories = log_group.create_dataset("trajectories", (no_models, EPISODES, MAX_EPISODE_STEPS, 24),
60 | dtype=np.float32)
61 | imgs = log_group.create_dataset("images", (no_models, EPISODES, MAX_EPISODE_STEPS, 480, 640, 3),
62 | dtype=np.uint8)
63 |
64 | tqdm.write('Starting analysis of {}'.format(policy_type))
65 |
66 | for model_idx, actorpth in enumerate(tqdm(os.listdir(model_path))):
67 | agent_policy = DDPGActor(
68 | state_dim=env.observation_space.shape[0],
69 | action_dim=env.action_space.shape[0],
70 | agent_name='real-{}'.format(policy_type),
71 | load_agent=True,
72 | model_path=osp.join(model_path, actorpth)
73 | )
74 |
75 | for ep_num in trange(EPISODES):
76 | obs = env.reset()
77 | done = False
78 | cumulative = 0
79 | counter = 0
80 | while not done and counter < MAX_EPISODE_STEPS:
81 | action = agent_policy.select_action(obs)
82 | nobs, reward, done, misc = env.step(action)
83 | # tqdm.write("obs: {} {} ".format(np.around(obs, 2), np.around(action, 2)))
84 | cumulative += reward
85 | trajectories[model_idx, ep_num, counter, :] = np.concatenate([obs, action, nobs])
86 | rewards[model_idx, ep_num, counter] = reward
87 | distances[model_idx, ep_num, counter] = misc["distance"]
88 | imgs[model_idx, ep_num, counter, :, :, :] = np.copy(misc["img"])
89 | # print(
90 | # np.around(trajectories[model_idx, ep_num, counter, :], 1),
91 | # np.around(rewards[model_idx, ep_num, counter], 4),
92 | # np.around(distances[model_idx, ep_num, counter], 4)
93 | # )
94 |
95 | obs = np.copy(nobs)
96 | counter += 1
97 |
98 | tqdm.write('Episode: {}, Reward: {}'.format(ep_num, cumulative))
99 |
100 | # write to disk after every model run
101 | f.flush()
102 | env.reset()
103 |
--------------------------------------------------------------------------------
/real_robot_torquesweep.py:
--------------------------------------------------------------------------------
1 | import h5py
2 | import matplotlib
3 |
4 | matplotlib.use('Agg')
5 | import time
6 | import numpy as np
7 | import gym
8 | import argparse
9 | import os
10 | import os.path as osp
11 |
12 | from tqdm import tqdm, trange
13 |
14 | from common.agents.ddpg_actor import DDPGActor
15 | import poppy_helpers
16 | import gym_ergojr
17 |
18 | parser = argparse.ArgumentParser(description='Real Robot Experiment Driver')
19 |
20 | parser.add_argument('--nepisodes', type=int, default=25, help='Number of trials per *seed*')
21 | parser.add_argument('--torques', type=list, nargs='+', default=[25, 50, 100, 200, 400],
22 | help='torque settings to iterate')
23 | parser.add_argument('--experiment-prefix', type=str, default='real', help='Prefix to append to logs')
24 | parser.add_argument('--log-dir', type=str, default='results/real-robot', help='Log Directory Prefix')
25 | parser.add_argument('--model-dir', type=str, default='saved-models/real-robot', help='Model Directory Prefix')
26 | parser.add_argument('--cont', type=str, default='190329-180631', help='To continue existing file, enter timestamp here')
27 |
28 | args = parser.parse_args()
29 |
30 | if len(args.cont) == 0:
31 | TIMESTAMP = time.strftime("%y%m%d-%H%M%S")
32 | file_flag = "w"
33 |
34 | else:
35 | TIMESTAMP = args.cont
36 | file_flag = "r+"
37 |
38 | file_path = "{}/{}-{}.hdf5".format(args.log_dir, args.experiment_prefix, TIMESTAMP)
39 |
40 | MAX_EPISODE_STEPS = 100
41 | EPISODES = args.nepisodes
42 | TORQUES = args.torques
43 |
44 | # Policies to look for
45 | policies = ['baseline', 'usdr', 'adr']
46 |
47 | env = gym.make('ErgoReacher-Live-v1')
48 | # env = gym.make('ErgoReacher-Graphical-Simple-Halfdisk-v1')
49 |
50 | npa = np.array
51 |
52 | img_buffer = []
53 |
54 | if not osp.exists(args.log_dir):
55 | os.makedirs(args.log_dir)
56 |
57 | with h5py.File(file_path, file_flag) as f:
58 | for policy_type in tqdm(policies, desc="approaches"):
59 | if policy_type not in f: # if dataset doesn't have these tables
60 | log_group = f.create_group(policy_type)
61 | rewards = log_group.create_dataset("rewards", (no_models, len(TORQUES), EPISODES, MAX_EPISODE_STEPS),
62 | dtype=np.float32)
63 | distances = log_group.create_dataset("distances", (no_models, len(TORQUES), EPISODES, MAX_EPISODE_STEPS),
64 | dtype=np.float32)
65 | trajectories = log_group.create_dataset("trajectories",
66 | (no_models, len(TORQUES), EPISODES, MAX_EPISODE_STEPS, 24),
67 | dtype=np.float32)
68 | imgs = log_group.create_dataset("images",
69 | (no_models, len(TORQUES), EPISODES, MAX_EPISODE_STEPS, 480, 640, 3),
70 | dtype=np.uint8, compression="lzf")
71 | else: # if tables are in dataset, grab their pointers
72 | rewards = f.get("/{}/{}".format(policy_type, "rewards"))
73 | distances = f.get("/{}/{}".format(policy_type, "distances"))
74 | trajectories = f.get("/{}/{}".format(policy_type, "trajectories"))
75 | imgs = f.get("/{}/{}".format(policy_type, "images"))
76 |
77 | model_path = osp.join(args.model_dir, policy_type)
78 |
79 | no_models = len(os.listdir(model_path))
80 |
81 | tqdm.write('Starting analysis of {}'.format(policy_type))
82 |
83 | for model_idx, actorpth in enumerate(tqdm(os.listdir(model_path), desc="models....")):
84 | agent_policy = DDPGActor(
85 | state_dim=env.observation_space.shape[0],
86 | action_dim=env.action_space.shape[0],
87 | agent_name='real-{}'.format(policy_type),
88 | load_agent=True,
89 | model_path=osp.join(model_path, actorpth)
90 | )
91 |
92 | for torque_idx, torque in enumerate(tqdm(TORQUES, desc="torques...")):
93 |
94 | for ep_num in trange(EPISODES, desc="episodes.."):
95 | non_zero_steps = np.count_nonzero(trajectories[model_idx, torque_idx, ep_num], axis=1)
96 |
97 | if np.count_nonzero(non_zero_steps) == 0:
98 | obs = env.reset()
99 | env.unwrapped.setSpeed(torque)
100 | done = False
101 | cumulative = 0
102 | counter = 0
103 | img_buffer = []
104 | while counter < MAX_EPISODE_STEPS:
105 | action = agent_policy.select_action(obs)
106 | nobs, reward, _, misc = env.step(action)
107 | cumulative += reward
108 | trajectories[model_idx, torque_idx, ep_num, counter, :] = np.concatenate(
109 | [obs, action, nobs])
110 | rewards[model_idx, torque_idx, ep_num, counter] = reward
111 | distances[model_idx, torque_idx, ep_num, counter] = misc["distance"]
112 | img_buffer.append(np.copy(misc["img"]))
113 |
114 | obs = np.copy(nobs)
115 | counter += 1
116 |
117 | imgs[model_idx, torque_idx, ep_num, :counter, :, :, :] = img_buffer
118 |
119 | # tqdm.write('Episode: {}, Reward: {}'.format(ep_num, cumulative))
120 |
121 | # write to disk after every model run
122 | f.flush()
123 |
124 | env.reset()
125 |
--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
1 | # TL;DR Scripting
2 |
3 | There are two scripts that are helpful to execute the experiments
4 | as decribed on [README](../README.md):
5 |
6 | 1. `scripts/multiseed.sh` for executing an experiment with multiple seeds.
7 | 2. `scripts/with_seed.sh` for executing one experiment with one seed.
8 |
9 | The abbreviated names for each experiment are defined on the `experiments.sh`
10 | script located on this folder.
11 |
12 | We currently support the following experiments:
13 |
14 | **Baselines**
15 | - `baseline_pure`
16 | - `baseline_fulldr`
17 |
18 | **Unfreeze Policy**
19 | - `unfreeze_policy_pretrained`
20 | - `unfreeze_policy_scratch`
21 |
22 | **Unfreeze Discriminator**
23 | - `unfreeze_discriminator_pretrained`
24 | - `unfreeze_discriminator_scratch`
25 |
26 | **SVPG 2D Full**
27 | - `svpg2d_ours`
28 | - `svpg2d_fulldr`
29 |
30 | #### Examples
31 |
32 | Use `multiseed.sh` to execute an experiment with multiple, consecutive seeds.
33 | The syntax for `multiseed.sh` is as follow:
34 |
35 | ```bash
36 | scripts/multiseed.sh [environment] [user] [experiment] [starting seed] [number of seeds]
37 | ```
38 |
39 | For instance:
40 |
41 | ```bash
42 | scripts/multiseed.sh bluewire manfred svpg2d_fulldr 0 5
43 | ```
44 |
45 | executes 5 seeds `[0, 1, 2, 3, 4]` of the `svpg2d_fulldr` experiment
46 | using `manfred.sh` configuration for the `bluewire` environment.
47 |
48 | Alternatively, you can use `with_seed.sh` to run an experiment with only 1 seed.
49 | The syntax for `with_seed.sh` is as follows:
50 |
51 | ```bash
52 | scripts/with_seed.sh [environment] [user] [experiment] [seed]
53 | ```
54 |
55 | Then,
56 |
57 | ```bash
58 | scripts/with_seed.sh slurm bhairav svpg2d_ours 1234
59 | ```
60 |
61 | executes `svpg2d_ours` experiment with `seed=1234` using Bhairav's slurm configuration.
62 |
63 | ### NOTE
64 | **ALWAYS!!!!** execute the scripts from the repo main folder.
65 |
66 | # Custom Configurations
67 |
68 | This section explains how (and why you need) to create per user/per
69 | environment configuration to run the experiments scripts.
70 |
71 | ## Environments
72 |
73 | As we currently have multiple places where we can run our experiments
74 | (slurm, bluewire, uberduck, etc), and we may be adding more soon (e.g.,
75 | AWS) the particularities of each environment are quite different.
76 | Therefore, we need to isolate them from our main scripting.
77 |
78 | There are currently 3 folders to group each
79 | user's particular settings for any of those environments.
80 |
81 | ```
82 | scripts
83 | - bluewire (Manfred's PC at home)
84 | - slurm (Mila's cluster)
85 | - uberduck (Lab's computer)
86 | ```
87 |
88 | ## Users
89 |
90 | Create a `[env]\[user].sh` file to configure your particular setting in the `[env]` environment.
91 |
92 | For instance, this is Bhairav's configuration for MILA's Slurm at `slurm\bhairav.sh`
93 |
94 | ```bash
95 | #!/usr/bin/env bash
96 | #SBATCH --gres=gpu
97 | #SBATCH --cpus-per-task=2 # maximum CPU cores per GPU request: 6 on Cedar, 16 on Graham
98 | #SBATCH --mem=36000M # memory per node
99 | #SBATCH --time=1-12:00 # time (DD-HH:MM)
100 | #SBATCH --qos=low
101 | #SBATCH --requeue
102 | #SBATCH --mail-user=noreply@domain.com
103 | #SBATCH --mail-type=ALL
104 |
105 | echo "Configuring Slurm Job Environment - $SLURM_JOB_ID"
106 | source activate ml
107 | cd ~/coding/diffsim
108 |
109 | export PYTHONPATH="${PYTHONPATH}:`pwd`/coding"
110 | export LD_LIBRARY_PATH=/Tmp/glx:$LD_LIBRARY_PATH
111 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/u/mehtabha/.mujoco/mjpro150/bin
112 | export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so.1.10
113 | Xvfb :$SLURM_JOB_ID -screen 0 84x84x24 -ac +extension GLX +render -noreset &> xvfb.log &
114 | export DISPLAY=:$SLURM_JOB_ID
115 |
116 | ```
117 |
118 | Hence, if e.g., Bhairav wants to run 5 seeds (starting at 0) on slurm of the `svpg2d_ours` on slurm,
119 | he would have to execute, from the main `diffsim` folder, the following command:
120 |
121 | ```
122 | scripts/multiseed.sh slurm bhairav svpg2d_ours 0 5
123 | ```
--------------------------------------------------------------------------------
/scripts/docopts:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/scripts/docopts
--------------------------------------------------------------------------------
/scripts/docopts.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | # vim: set et sw=4 ts=4 sts=4:
3 | #
4 | # docopts helper for bash
5 | #
6 | # Usage:
7 | # source path/to/docopts.sh
8 | # docopts -A ARGS -h "$help" -V $version : "$@"
9 | #
10 | # the prefix docopt_* is used to export globals and functions
11 | # docopt_auto_parse() modify $HELP and $ARGS
12 |
13 | # compute this file dirpath:
14 | docopt_sh_me=$($(type -p greadlink readlink | head -1 ) -f "${BASH_SOURCE[0]}")
15 | docopt_sh_dir="$(dirname "$docopt_sh_me")"
16 |
17 | # fetch Usage: from the given filename
18 | # usually $0 in the main level script
19 | docopt_get_help_string() {
20 | local myfname=$1
21 | # filter the block (/!\ all blocks) starting at a "# Usage:" and ending
22 | # at an empty line, one level of comment markup is removed
23 | #
24 | ## sed -n -e '/^# Usage:/,/\(^# \?----\|^$\)/ { /----/ d; s/^# \?//p }' rock_no-stdin_example.sh
25 |
26 | # -n : no print output
27 | # -e : pass sed code inline
28 | # /^# Usage:/,/^$/ : filter range blocks from '# Usage:' to empty line
29 | # s/^# \?// : substitute comment marker and an optional space
30 | # p : print
31 | sed -n -e '/^# Usage:/,/^$/ s/^# \?//p' < $myfname
32 | }
33 |
34 | # fetch version information from the given filename or string
35 | # usually $0 in the main level script, or the help string extracted
36 | # by docopt_get_help_string()
37 | docopt_get_version_string() {
38 | if [[ -f "$1" ]] ; then
39 | # filter the block (all blocks) starting at a "# Usage:" and ending
40 | # at an empty line, one level of comment markup is removed
41 | sed -n -e '/^# ----/,/^$/ s/^# \?//p' < "$1"
42 | else
43 | # use docopts --separator behavior
44 | echo "$1"
45 | fi
46 | }
47 |
48 | # convert a repeatable option parsed by docopts into a bash ARRAY
49 | # ARGS['FILE,#']=3
50 | # ARGS['FILE,0']=somefile1
51 | # ARGS['FILE,1']=somefile2
52 | # ARGS['FILE,2']=somefile3
53 | # Usage: myarray=( $(docopt_get_values ARGS FILE") )
54 | docopt_get_values() {
55 | local opt=$2
56 | local ref="\${$1[$opt,#]}"
57 | local nb_val=$(eval echo "$ref")
58 | local i=0
59 | local vars=""
60 | while [[ $i -lt $nb_val ]] ; do
61 | ref="\${$1[$opt,$i]}"
62 | eval "vars+=\" $ref\""
63 | i=$(($i + 1))
64 | done
65 | echo $vars
66 | }
67 |
68 | # echo evaluable code to get alls the values into a bash array
69 | # Usage: eval "$(docopt_get_eval_array ARGS FILE myarray)"
70 | docopt_get_eval_array() {
71 | local ref="\${$1[$2,#]}"
72 | local nb_val=$(eval echo "$ref")
73 | local i=0
74 | local vars=""
75 | echo "declare -a $3"
76 | while [[ $i -lt $nb_val ]] ; do
77 | ref="\${$1[$2,$i]}"
78 | eval "echo \"$3+=( '$ref' )\""
79 | i=$(($i + 1))
80 | done
81 | }
82 |
83 | # Auto parser for the same docopts usage over scripts, for lazyness.
84 | #
85 | # It uses this convention:
86 | # - help string in: $HELP (modified at gobal scope)
87 | # - Usage is extracted by docopt_get_help_string at beginning of the script
88 | # - arguments are evaluated at global scope in the bash 4 assoc $ARGS
89 | # - no version information is handled
90 | #
91 | docopt_auto_parse() {
92 | local script_fname=$1
93 | shift
94 | # $HELP in global scope
95 | HELP="$(docopt_get_help_string "$script_fname")"
96 | # $ARGS[] assoc array must be declared outside of this function
97 | # or it's scope will be local, that's why we don't print it.
98 | scripts/docopts -A ARGS --no-declare -h "$HELP" : "$@"
99 | res=$?
100 | return $res
101 | }
102 |
103 | # Extract the raw value of a parsed docopts output.
104 | # arguments:
105 | # - assoc: the docopts assoc name
106 | # - key: the wanted key
107 | # - docopts_out: the full parsed output (before eval)
108 | docopt_get_raw_value() {
109 | local assoc=$1
110 | local key="$2"
111 | local docopts_out="$3"
112 | local kstr=$(printf "%s['%s']" $assoc "$key")
113 | # split on '=', outputs the remaining for the matching $1
114 | awk -F= "\$1 == \"$kstr\" {sub(\"^[^=]+=\", \"\", \$0);print}" <<<"$docopts_out"
115 | }
116 |
117 | # Debug, prints env varible ARGS or $1 formated as a bash 4 assoc array
118 | docopt_print_ARGS() {
119 | local assoc="$1"
120 | if [[ -z $assoc ]] ; then
121 | assoc=ARGS
122 | fi
123 |
124 | # bash dark magic copying $assoc argument to a local myassoc array
125 | # inspired by:
126 | # https://stackoverflow.com/questions/6660010/bash-how-to-assign-an-associative-array-to-another-variable-name-e-g-rename-t#8881121
127 | declare -A myassoc
128 | eval $(typeset -A -p $assoc|sed "s/ $assoc=/ myassoc=/")
129 |
130 | # loop on keys
131 | echo "docopt_print_ARGS => $assoc"
132 | local a
133 | for a in ${!myassoc[@]} ; do
134 | printf "%20s = %s\n" $a "${myassoc[$a]}"
135 | done
136 | }
137 |
138 | ## main code
139 | # --auto : don't forget to pass "$@"
140 | # Usage: source docopts.sh --auto "$@"
141 | if [[ "$1" == "--auto" ]] ; then
142 | shift
143 | # declare must be used at global scope to be accessible at
144 | # global level any were in the caller script.
145 | declare -A ARGS
146 | eval "$(docopt_auto_parse "${BASH_SOURCE[1]}" "$@")"
147 | fi
148 |
--------------------------------------------------------------------------------
/scripts/envs/bluewire/manfred.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | echo "configuring -> Bluewire Environment"
4 |
5 | source `which virtualenvwrapper.sh`
6 | workon diffsim
7 | export PYTHONPATH="${PYTHONPATH}:`pwd`"
8 | Xvfb :1 -screen 0 84x84x24 -ac +extension GLX +render -noreset &> xvfb.log &
9 | export DISPLAY=:1
10 |
--------------------------------------------------------------------------------
/scripts/envs/slurm/bhairav.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #SBATCH --gres=gpu
3 | #SBATCH --cpus-per-task=2 # maximum CPU cores per GPU request: 6 on Cedar, 16 on Graham
4 | #SBATCH --mem=36000M # memory per node
5 | #SBATCH --time=1-12:00 # time (DD-HH:MM)
6 | #SBATCH --qos=low
7 | #SBATCH --requeue
8 | #SBATCH --mail-user=noreply@domain.com
9 | #SBATCH --mail-type=ALL
10 |
11 | echo "Configuring Slurm Job Environment - $SLURM_JOB_ID"
12 | source activate rl-local
13 | cd ~/coding/diffsim
14 |
15 | export PYTHONPATH="${PYTHONPATH}:`pwd`/coding"
16 | export LD_LIBRARY_PATH=/Tmp/glx:$LD_LIBRARY_PATH
17 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/u/mehtabha/.mujoco/mjpro150/bin
18 | export LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libGLEW.so.1.10
19 | Xvfb :$SLURM_JOB_ID -screen 0 84x84x24 -ac +extension GLX +render -noreset &> xvfb.log &
20 | export DISPLAY=:$SLURM_JOB_ID
--------------------------------------------------------------------------------
/scripts/envs/slurm/manfred.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
--------------------------------------------------------------------------------
/scripts/envs/uberduck/bhairav.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
--------------------------------------------------------------------------------
/scripts/experiments/lunar_lander.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 |
4 | # =============== Lunar Lander ============
5 |
6 | lunar_lander_baseline() {
7 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=true-baseline --seeds 5 \
8 | with --freeze-discriminator \
9 | --agent-name=baseline \
10 | --initial-svpg-steps=1e6 \
11 | --continuous-svpg \
12 | --freeze-svpg
13 | }
14 |
15 | lunar_lander_full_dr() {
16 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=fulldr-baseline --seeds 5 \
17 | with --randomized-env-id="LunarLanderRandomized-v0" \
18 | --freeze-discriminator \
19 | --agent-name=baseline-full-dr \
20 | --initial-svpg-steps=1e6 \
21 | --continuous-svpg \
22 | --freeze-svpg
23 | }
24 |
25 | lunar_lander_expert_813() {
26 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=e813-baseline --seeds 5 \
27 | with --randomized-env-id="LunarLanderRandomized-RandomM813-v0" \
28 | --freeze-discriminator \
29 | --agent-name=expert-813 \
30 | --initial-svpg-steps=1e6 \
31 | --continuous-svpg \
32 | --freeze-svpg
33 | }
34 |
35 | lunar_lander_expert_811() {
36 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=e811-baseline --seeds 5 \
37 | with --randomized-env-id="LunarLanderRandomized-RandomM811-v0" \
38 | --freeze-discriminator \
39 | --agent-name=expert-811 \
40 | --initial-svpg-steps=1e6 \
41 | --continuous-svpg \
42 | --freeze-svpg
43 | }
44 |
45 | lunar_lander_ours_1d() {
46 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=ours-lunar1d --seeds 5 \
47 | with --randomized-env-id="LunarLanderRandomized-v0" \
48 | --agent-name=ours-lunar1d \
49 | --continuous-svpg
50 | }
51 |
52 | lunar_lander_ours_1d_5p() {
53 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=ours-lunar1d-5p --seeds 5 \
54 | with --randomized-env-id="LunarLanderRandomized-v0" \
55 | --agent-name=ours-lunar1d-5p \
56 | --continuous-svpg \
57 | --nagents=5
58 | }
59 |
60 | lunar_lander_ours_1d_5p_6_20() {
61 | python scripts/launch.py --main=unfreeze_policy --setting=lunar --prefix=ours-lunar1d-5p-620 --seeds 1 \
62 | with --randomized-env-id="LunarLanderRandomized-RandomM620-v0" \
63 | --agent-name=ours-lunar1d-5p-620 \
64 | --continuous-svpg \
65 | --nagents=5
66 | }
--------------------------------------------------------------------------------
/scripts/experiments/pusher_3dof.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # =============== Pusher 3 DoF ============
4 |
5 | pusher_3dof_baseline() {
6 | python scripts/launch.py --main=unfreeze_policy --setting=pusher --prefix=true-baseline --seeds 5 \
7 | with --freeze-discriminator \
8 | --agent-name=baseline \
9 | --initial-svpg-steps=1e6 \
10 | --continuous-svpg \
11 | --freeze-svpg
12 | }
13 |
14 | pusher_3dof_full_dr() {
15 | python scripts/launch.py --main=unfreeze_policy --setting=pusher --prefix=fulldr-baseline --seeds 5 \
16 | with --randomized-env-id="Pusher3DOFRandomized-v0" \
17 | --freeze-discriminator \
18 | --agent-name=baseline-full-dr \
19 | --initial-svpg-steps=1e6 \
20 | --continuous-svpg \
21 | --freeze-svpg
22 | }
23 |
24 | pusher_3dof_ours() {
25 | python scripts/launch.py --main=unfreeze_policy --setting=pusher --prefix=ours-pusher-3p --seeds 5 \
26 | with --randomized-env-id="Pusher3DOFRandomized-v0" \
27 | --agent-name=ours-pusher \
28 | --continuous-svpg
29 | }
30 |
31 | pusher_3dof_ours_5p() {
32 | python scripts/launch.py --main=unfreeze_policy --setting=pusher --prefix=ours-pusher-5p --seeds 5 \
33 | with --randomized-env-id="Pusher3DOFRandomized-v0" \
34 | --agent-name=ours-pusher-5p \
35 | --continuous-svpg \
36 | --nagents=5
37 | }
--------------------------------------------------------------------------------
/scripts/launch.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | from datetime import timedelta
4 | from subprocess import Popen, PIPE
5 | import time
6 |
7 | EXPERIMENT_TEMPLATE = 'nohup python -m experiments.domainrand.{main} {setting} --experiment-prefix={prefix} ' \
8 | '--seed={seed} {extra}'
9 | SLEEP_TIME = 30 # in seconds
10 |
11 | LOG_DIRECTORY = 'tails'
12 | LOG_FILENAME_TEMPLATE = '{dir}/{main}-{setting}-{prefix}-{seed}.log'
13 |
14 |
15 | def parse():
16 |
17 | parser = argparse.ArgumentParser()
18 |
19 | parser.add_argument('--main')
20 | parser.add_argument('--setting', choices=['lunar', 'pusher', 'ergo'])
21 | parser.add_argument('--prefix')
22 | parser.add_argument('--first-seed', default=0, type=int)
23 | parser.add_argument('--seeds', type=int)
24 |
25 | parser.add_argument('with', choices=['with'])
26 | parser.add_argument('extra', nargs=argparse.REMAINDER)
27 |
28 | return parser.parse_args()
29 |
30 |
31 | def call_module(main, setting, prefix, seed, arguments):
32 | cmd = EXPERIMENT_TEMPLATE.format(
33 | main=main,
34 | setting=setting,
35 | prefix=prefix,
36 | seed=seed,
37 | extra=" ".join(arguments)
38 | )
39 | args = cmd.split(' ')
40 |
41 | log_filename = LOG_FILENAME_TEMPLATE.format(
42 | dir=LOG_DIRECTORY,
43 | main=main,
44 | setting=setting,
45 | prefix=prefix,
46 | seed=seed
47 | )
48 | log_file = open(log_filename, '+w')
49 |
50 | handler = Popen(args=args, stdin=log_file, stdout=log_file, stderr=log_file)
51 |
52 | return handler
53 |
54 |
55 | def call_multi_seed(main, setting, prefix, initial_seed, seeds, extra):
56 | process_handlers = []
57 |
58 | if not os.path.exists(LOG_DIRECTORY):
59 | os.mkdir(LOG_DIRECTORY)
60 |
61 | for index in range(seeds):
62 | handler = call_module(
63 | main=main,
64 | setting=setting,
65 | prefix=prefix,
66 | seed=index + initial_seed,
67 | arguments=extra
68 | )
69 | process_handlers.append(handler)
70 | print("{} Seeds with PID = [{}]".format(seeds, ", ".join(list(map(lambda p: str(p.pid), process_handlers)))))
71 | return process_handlers
72 |
73 |
74 | def is_process_running(p):
75 | return p.poll() is None
76 |
77 |
78 | def wait_all(process_handlers):
79 | _time = time.time()
80 |
81 | while any(map(is_process_running, process_handlers)):
82 | print('\rWaiting for all seeds to finish...', end='')
83 | time.sleep(SLEEP_TIME)
84 | _time = time.time() - _time - SLEEP_TIME
85 |
86 | return _time
87 |
88 |
89 | def exit_status(process_handlers):
90 | return list(map(lambda p: str(p.poll()), process_handlers))
91 |
92 |
93 | def run_experiment(args):
94 | print('Experiments')
95 | print('===================================')
96 | print("Launching experiment <{experiment}> with <{setting}>.".format(experiment=args.main,
97 | setting=args.setting))
98 | process_handlers = call_multi_seed(
99 | main=args.main,
100 | setting=args.setting,
101 | prefix=args.prefix,
102 | initial_seed=args.first_seed,
103 | seeds=args.seeds,
104 | extra=args.extra
105 | )
106 | _time = wait_all(process_handlers)
107 | print()
108 | print('<-------- COMPLETED -------------->')
109 | seeds_status = exit_status(process_handlers)
110 | print('Seeds Exit Status = [{}]'.format(",".join(seeds_status)))
111 | print('Elapsed Time = {}'.format(str(timedelta(seconds=_time))))
112 | print('===================================')
113 |
114 | return all(int(status) == 0 for status in seeds_status) # if all exit statuses r 0
115 |
116 |
117 | def collect_data(args):
118 | print()
119 | print('Data Collection')
120 | print('===================================')
121 | print("Launching data recollection of <{experiment}> with <{setting}>.".format(experiment=args.main,
122 | setting=args.setting))
123 | process_handlers = call_multi_seed(
124 | main='batch_reward_analysis',
125 | setting=args.setting,
126 | prefix=args.prefix,
127 | initial_seed=args.first_seed,
128 | seeds=args.seeds,
129 | extra=args.extra
130 | )
131 | _time = wait_all(process_handlers)
132 | print()
133 | seeds_status = exit_status(process_handlers)
134 | print('<-------- COMPLETED -------------->')
135 | print('Seeds Exit Status = [{}]'.format(",".join(seeds_status)))
136 | print('Elapsed Time = {}'.format(str(timedelta(seconds=_time))))
137 | print('===================================')
138 |
139 | return all(int(status) == 0 for status in seeds_status) # if all exit statuses r 0
140 |
141 |
142 | def launch():
143 |
144 | print('<---- RUNNING ---->')
145 |
146 | args = parse()
147 |
148 | steps = [
149 | run_experiment,
150 | collect_data
151 | ]
152 |
153 | done = False
154 | for step in steps:
155 | done = step(args)
156 | if not done:
157 | break
158 | print()
159 | print('<---- DONE: {} --->'.format(done))
160 |
161 |
162 | if __name__ == '__main__':
163 | launch()
164 |
--------------------------------------------------------------------------------
/scripts/real-robot-read-dataset.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import h5py
3 | import cv2
4 | import matplotlib.pyplot as plt
5 |
6 |
7 | npa = np.array
8 |
9 | # f = h5py.File('../results/real-robot/real-190328-170248.hdf5', 'r') # smaller demo file with only 2 rollouts per model
10 | f = h5py.File('../results/real-robot/real-190328-174502.hdf5', 'r') # 25 rollouts per model
11 |
12 | experiments = ['adr', 'baseline', 'usdr']
13 |
14 | for e in experiments:
15 | print(e + "/distances\t\t", f.get(e + "/distances").shape)
16 | print(e + "/rewards\t\t", f.get(e + "/rewards").shape)
17 | print(e + "/images\t\t\t", f.get(e + "/images").shape)
18 | print(e + "/trajectories\t", f.get(e + "/trajectories").shape)
19 |
20 | # example replay:
21 | model_type = "adr"
22 | model_no = 3
23 | run = 1
24 |
25 | # for frame in f.get("{}/images".format(model_type))[model_no, run]:
26 | # if np.count_nonzero(frame) > 0:
27 | # cv2.imshow("Replay", frame)
28 | # cv2.waitKey(20)
29 | #
30 | # frame_len = 0
31 | # for frame in f.get("{}/trajectories".format(model_type))[model_no, run]:
32 | # if np.count_nonzero(frame) > 0:
33 | # print (np.around(frame,1))
34 | # frame_len+=1
35 | #
36 | # x = np.arange(frame_len)
37 | #
38 | # for motor in range(4):
39 | # plt.plot(x, f.get("{}/trajectories".format(model_type))[model_no, run, :frame_len, motor+14], label="motor "+str(motor+1))
40 | #
41 | # plt.plot(x, 5*f.get("{}/distances".format(model_type))[model_no, run, :frame_len], label="distance to goal x 5")
42 | # plt.hlines(0.025*5, 0, frame_len, label="solved", linestyles="dotted")
43 | # plt.ylim((-1,1))
44 | # plt.legend()
45 | # plt.tight_layout()
46 | # plt.show()
47 |
48 | # max_frame_len = 0
49 |
50 | # for color, model_type in zip(["red", "green", "blue"], experiments):
51 | # print (model_type, color)
52 | #
53 | # for model_no in range(5):
54 | # for run in range(len(f.get("{}/trajectories".format(model_type))[model_no, :])):
55 | # frame_len = np.count_nonzero(
56 | # np.count_nonzero(f.get("{}/trajectories".format(model_type))[model_no, run], axis=1))
57 | # if frame_len > max_frame_len:
58 | # max_frame_len = frame_len
59 | # x = np.arange(frame_len)
60 | # plt.plot(x, f.get("{}/distances".format(model_type))[model_no, run, :frame_len], c=color)
61 | #
62 | # plt.hlines(0.025, 0, max_frame_len, label="solved", linestyles="dotted")
63 | # plt.legend()
64 | # plt.tight_layout()
65 | # plt.title("Distances Of All Rollouts Over Time")
66 | # plt.show()
67 |
68 |
69 |
70 |
71 | #### HISTOGRAM BAD
72 |
73 | # for color, model_type in zip(["red", "green", "blue"], experiments):
74 | # print (model_type, color)
75 | # values = []
76 | #
77 | # for model_no in range(5):
78 | # for run in range(len(f.get("{}/trajectories".format(model_type))[model_no, :])):
79 | # frame_len = np.count_nonzero(
80 | # np.count_nonzero(f.get("{}/trajectories".format(model_type))[model_no, run], axis=1))
81 | # values.append(frame_len)
82 | # plt.hist(values, alpha=0.5, color=color, label=model_type)
83 | #
84 | # plt.legend()
85 | # plt.tight_layout()
86 | # plt.title("Distances Of All Rollouts Over Time")
87 | # plt.show()
88 |
89 | #### FINAL DISTANCE PLOT
90 |
91 |
92 | pos = 1
93 | val = []
94 |
95 | colors = ["red", "green", "blue"]
96 |
97 | for color, model_type in zip(colors, experiments):
98 | print (model_type, color)
99 |
100 | values_model = []
101 |
102 | for model_no in range(5):
103 | for run in range(len(f.get("{}/trajectories".format(model_type))[model_no, :])):
104 | frame_len = np.count_nonzero(
105 | np.count_nonzero(f.get("{}/trajectories".format(model_type))[model_no, run], axis=1))
106 | values_model.append(f.get("{}/distances".format(model_type))[model_no, run, frame_len-1])
107 |
108 | # plt.scatter(np.ones(len(values))*pos, values, alpha=0.5, c=color, label=model_type)
109 |
110 | val.append(values_model)
111 |
112 | pos += 1
113 |
114 | bplot = plt.boxplot(npa(val).T, labels=experiments, patch_artist=True)
115 |
116 | cm = plt.cm.get_cmap('viridis')
117 | colors = [cm(val/3) for val in range(3)]
118 |
119 | for patch, color in zip(bplot['boxes'], colors):
120 | patch.set_facecolor(color)
121 |
122 | plt.legend()
123 | # plt.tight_layout()
124 | plt.title("Real Robot Rollout Performance Box Plots\n"
125 | "5 policies per approach, 25 runs per policy")
126 | plt.show()
127 |
128 |
129 |
--------------------------------------------------------------------------------
/scripts/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # Usage: run.sh --env=ENV --user=USR EXPERIMENT
4 | #
5 | # Process FILE and optionally apply correction to eitheleft-hand side or
6 | # right-hand side.
7 | #
8 | # Arguments:
9 | # EXPERIMENT experiment to run
10 | #
11 | # Options:
12 | # -h --help
13 | # --env=E Environment
14 | # --user=U User configuration
15 | #
16 |
17 | call_experiment() {
18 | $1
19 | }
20 |
21 | # load the environment configuration
22 | environment() {
23 | source scripts/envs/$1/$2.sh
24 | }
25 |
26 | # experiments
27 | source scripts/experiments/lunar_lander.sh
28 | source scripts/experiments/pusher_3dof.sh
29 |
30 | # parsing named arguments
31 | source scripts/docopts.sh --auto "$@"
32 |
33 | environment ${ARGS['--env']} ${ARGS['--user']}
34 | call_experiment ${ARGS['EXPERIMENT']}
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(name='diffsim',
4 | version='1.0',
5 | install_requires=['gym>=0.5',
6 | 'sklearn',
7 | 'torch',
8 | 'numpy',
9 | 'matplotlib',
10 | 'scipy',
11 | 'bayesian-optimization',
12 | 'box2d',
13 | 'box2d-kengz',
14 | 'mujoco_py',
15 | 'lxml',
16 | 'tqdm',
17 | 'gym_ergojr>=1.2']
18 | )
19 |
--------------------------------------------------------------------------------
/slurm.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | #SBATCH --gres=gpu
3 | #SBATCH --cpus-per-task=2 # maximum CPU cores per GPU request: 6 on Cedar, 16 on Graham
4 | #SBATCH --mem=36000M # memory per node
5 | #SBATCH --time=1-12:00 # time (DD-HH:MM)
6 | #SBATCH --qos=low
7 | #SBATCH --requeue
8 | #SBATCH --mail-user=noreply@domain.com
9 | #SBATCH --mail-type=ALL
10 |
11 | echo "Configuring Slurm Job Environment - $SLURM_JOB_ID"
12 | source activate rl-local
13 | cd ~/coding/diffsim
14 |
15 | export PYTHONPATH="${PYTHONPATH}:`pwd`/coding"
16 | # python -m experiments.domainrand.experiment_driver lunar --experiment-name=gail-baseline --initial-svpg-steps=1e6 --freeze-svpg --prerecorded-trajectories --expert-trajectories-file="reference_trajectories_trained_16" --continuous-svpg --randomized-env-id="LunarLanderRandomized-v0" --experiment-prefix="gailbaseline16" --seed=1 &
17 | # python -m experiments.domainrand.experiment_driver lunar --experiment-name=gail-baseline --initial-svpg-steps=1e6 --freeze-svpg --prerecorded-trajectories --expert-trajectories-file="reference_trajectories_trained_16" --continuous-svpg --randomized-env-id="LunarLanderRandomized-v0" --experiment-prefix="gailbaseline16" --seed=2
18 |
19 | python -m experiments.domainrand.experiment_driver lunar --experiment-name=adaptive-randomization --particle-path="saved-models/particles/" --reward-scale=-1.0 --kld-coefficient=0.01 --prerecorded-trajectories --expert-trajectories-file="reference_trajectories_trained_16" --continuous-svpg --randomized-env-id="LunarLanderRandomized-v0" --experiment-prefix="adrplus16" --seed=2 &
20 |
21 | python -m experiments.domainrand.experiment_driver lunar --experiment-name=adaptive-randomization --particle-path="saved-models/particles/" --reward-scale=-1.0 --kld-coefficient=0.01 --prerecorded-trajectories --expert-trajectories-file="reference_trajectories_trained_16" --continuous-svpg --randomized-env-id="LunarLanderRandomized-v0" --experiment-prefix="adrplus16" --seed=3
22 |
--------------------------------------------------------------------------------
/tests/00-test-vecenv.py:
--------------------------------------------------------------------------------
1 | from common.envs import LunarLanderRandomized
2 | from common.envs.randomized_vecenv import make_vec_envs
3 |
4 | def _create_envs(seed, reference_env_id='LunarLanderDefault-v0',
5 | randomized_env_id='LunarLanderRandomized-v0'):
6 |
7 | reference_env = make_vec_envs(reference_env_id, seed, num_processes=3)
8 | randomized_env = make_vec_envs(randomized_env_id, seed, num_processes=3)
9 |
10 | return reference_env, randomized_env
11 |
12 |
13 | reference_env, randomized_env = _create_envs(1)
14 | obs = randomized_env.reset()
15 | print(randomized_env.get_current_params())
16 |
17 | for _ in range(3):
18 | randomized_env.randomize(randomized_values=[['random'], ['random'], ['random']])
19 | print(randomized_env.get_current_params())
20 |
21 | print("2D Lunar Lander Randomization")
22 | reference_env, randomized_env = _create_envs(1, randomized_env_id='LunarLanderRandomized2D-v0')
23 | obs = randomized_env.reset()
24 | print(randomized_env.get_current_params())
25 |
26 | for _ in range(3):
27 | randomized_env.randomize(randomized_values=[['random', 'random'], ['random', 'random'], ['random', 'random']])
28 | print(randomized_env.get_current_params())
29 |
30 | print("2D - Setting One Value")
31 | randomized_env.randomize(randomized_values=[[0.0, 'random'], [0.5, 'random'], [1.0, 'random']])
32 | print(randomized_env.get_current_params())
33 |
34 | print("2D - Setting Both Values")
35 | randomized_env.randomize(randomized_values=[[0.0, 1.0], [0.5, 0.5], [1.0, 0.0]])
36 | print(randomized_env.get_current_params())
--------------------------------------------------------------------------------
/tests/01-test-svpg-vectorized.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from common.svpg.svpg import SVPG
3 | from common.envs.randomized_vecenv import make_vec_envs
4 |
5 | def _create_envs(seed, nagents, reference_env_id='LunarLanderDefault-v0',
6 | randomized_env_id='LunarLanderRandomized-v0'):
7 |
8 | reference_env = make_vec_envs(reference_env_id, seed, nagents)
9 | randomized_env = make_vec_envs(randomized_env_id, seed, nagents)
10 |
11 | return reference_env, randomized_env
12 |
13 | nagents = 3
14 | svpg = SVPG(nagents)
15 | reference_env, randomized_env = _create_envs(seed=123, nagents=nagents)
16 |
17 | simulation_settings = svpg.step()
18 | assert (nagents, svpg.svpg_rollout_length, svpg.nparams) == simulation_settings.shape
19 |
20 | simulation_settings = np.transpose(simulation_settings, (1, 0, 2))
21 |
22 | for t in range(svpg.svpg_rollout_length):
23 | print("Current Timestep: {}".format(t))
24 | print([simulation_settings[t]])
25 | randomized_env.randomize(randomized_values=simulation_settings[t])
26 | print(randomized_env.get_current_params())
27 |
28 |
29 |
--------------------------------------------------------------------------------
/tests/02-test-svpg-policy-rollout-vectorized.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from common.svpg.svpg import SVPG
3 | from common.envs.randomized_vecenv import make_vec_envs
4 |
5 | def _create_envs(seed, nagents, reference_env_id='LunarLanderDefault-v0',
6 | randomized_env_id='LunarLanderRandomized-v0'):
7 |
8 | reference_env = make_vec_envs(reference_env_id, seed, nagents)
9 | randomized_env = make_vec_envs(randomized_env_id, seed, nagents)
10 |
11 | return reference_env, randomized_env
12 |
13 | nagents = 3
14 | svpg = SVPG(nagents)
15 | reference_env, randomized_env = _create_envs(seed=123, nagents=nagents)
16 |
17 | simulation_settings = svpg.step()
18 | assert (nagents, svpg.svpg_rollout_length, svpg.nparams) == simulation_settings.shape
19 |
20 | simulation_settings = np.transpose(simulation_settings, (1, 0, 2))
21 |
22 | for t in range(svpg.svpg_rollout_length):
23 | print("Current Timestep: {}".format(t))
24 | print([simulation_settings[t]])
25 | randomized_env.randomize(randomized_values=simulation_settings[t])
26 | print(randomized_env.get_current_params())
27 |
28 |
29 |
--------------------------------------------------------------------------------
/tests/03-test-vanilla-fetchreach.py:
--------------------------------------------------------------------------------
1 | import common.envs
2 | import gym
3 | import time
4 |
5 | env = gym.make("FetchReachDenseDS-v1")
6 |
7 | print("action dim: {}, obs dim: {}".format(env.action_space, env.observation_space))
8 |
9 | # exploration
10 | exploration_actions = [ # the actions are for the end effector, thus implying IK
11 | [1, 0, 0, 0], # forward
12 | [-1, 0, 0, 0], # backward
13 | [0, 1, 0, 0], # left (from robot's perspective
14 | [0, -1, 0, 0], # right
15 | [0, 0, 1, 0], # up
16 | [0, 0, -1, 0] # down
17 | # [0, 0, 0, 1], # gripper open/close, unused in fetch
18 | # [0, 0, 0, -1] # gripper open/close, unused in fetch
19 | ]
20 | exploration_length = 50
21 | env.reset()
22 | done = False
23 | i = 0
24 | exploration_action_idx = 0
25 | while True:
26 | action = exploration_actions[exploration_action_idx]
27 | obs, rew, done, misc = env.step(action)
28 | env.render()
29 | i += 1
30 | if i % exploration_length == 0:
31 | exploration_action_idx += 1
32 | if exploration_action_idx == len(exploration_actions):
33 | break
34 | time.sleep(0.02)
35 |
36 | # # randome movement
37 | # for i in range(5):
38 | # env.reset()
39 | # done = False
40 | #
41 | # while not done:
42 | # action = env.action_space.sample()
43 | # obs, rew, done, misc = env.step(action)
44 | # print (obs, rew, misc)
45 | # env.render()
46 |
--------------------------------------------------------------------------------
/tests/04-test-randomized-mujoco-api.py:
--------------------------------------------------------------------------------
1 | import time
2 | from timeit import default_timer as timer
3 | import numpy as np
4 | import tqdm
5 | import gym
6 | import common.envs
7 | from common.envs.wrappers import RandomizedEnvWrapper
8 |
9 | np.random.seed(1234)
10 |
11 |
12 | env = gym.make('PusherRandomized-v0')
13 | env = RandomizedEnvWrapper(env=env, seed=0)
14 |
15 | obs = env.reset()
16 |
17 | start = timer()
18 | for i in tqdm.tqdm(range(int(1e6))):
19 | env.randomize(randomized_values=["random", "random", "random"])
20 | print(timer() - start)
21 |
--------------------------------------------------------------------------------
/tests/05-test-randomized-mujoco-viz.py:
--------------------------------------------------------------------------------
1 | import time
2 | from timeit import default_timer as timer
3 | import numpy as np
4 | import tqdm
5 | import gym
6 | import common.envs
7 | from common.envs.wrappers import RandomizedEnvWrapper
8 |
9 | np.random.seed(1234)
10 |
11 |
12 | env = gym.make('Pusher3DOFRandomized-v0')
13 | env = RandomizedEnvWrapper(env=env, seed=0)
14 |
15 | # obs = env.reset()
16 |
17 | start = timer()
18 | for i in tqdm.tqdm(range(100)):
19 | env.randomize(randomized_values=["random", "random", "random"])
20 | env.reset()
21 | for _ in range(200):
22 | obs, reward, done, _ = env.step(env.action_space.sample())
23 | env.render()
24 | print(obs)
25 |
26 | env.close()
27 | print(timer() - start)
28 |
--------------------------------------------------------------------------------
/tests/06-test-randomized-ergoreach.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import gym_ergojr
3 | import time
4 | from tqdm import tqdm
5 | from common.envs.wrappers import RandomizedEnvWrapper
6 |
7 | # MODE = "MANUAL" # slow but let's you see what's happening
8 | MODE = "SPEED" # as fast as possible
9 |
10 | def no_op(x):
11 | pass
12 |
13 |
14 | if MODE == "MANUAL":
15 | env = gym.make("ErgoReacherRandomized-Graphical-v0") # looks nice
16 | timer = time.sleep
17 | else:
18 | env = gym.make("ErgoReacherRandomized-Headless-v0") # runs fast
19 | timer = no_op
20 |
21 | env = RandomizedEnvWrapper(env=env, seed=0)
22 |
23 | for _ in tqdm(range(100)):
24 | env.reset()
25 | env.randomize(randomized_values=["random"] * 8) # 8 values to randomize over
26 |
27 | while True:
28 | action = env.action_space.sample()
29 | obs, rew, done, misc = env.step(action)
30 | timer(0.05)
31 |
32 | if done:
33 | break
--------------------------------------------------------------------------------
/tests/07-test-mujoco-3dof-keyboard-control.py:
--------------------------------------------------------------------------------
1 | #/usr/bin/env python
2 | # manual
3 |
4 | """
5 | This script allows you to manually control the simulator or Duckiebot
6 | using the keyboard arrows.
7 | """
8 |
9 | import sys
10 | from pynput import keyboard
11 | from pynput.keyboard import Key
12 | import numpy as np
13 | import gym
14 | import common.envs
15 | from common.envs.wrappers import RandomizedEnvWrapper
16 |
17 | env = gym.make('Pusher3DOFUberHard-v0')
18 | env = RandomizedEnvWrapper(env=env, seed=0)
19 |
20 | reward = 0.
21 |
22 | print('hi')
23 | env.randomize(randomized_values=["random", "random"])
24 | env.reset()
25 | env.render()
26 |
27 | ACTIONS = [
28 | np.array([0.0, 0.0, 1.0]),
29 | np.array([0.0, 0.0, -1.0]),
30 | np.array([0.0, 1.0, 0.0]),
31 | np.array([0.0, -1.0, 0.0]),
32 | np.array([1.0, 0.0, 0.0]),
33 | np.array([-1.0, 0.0, 0.0])
34 | ]
35 |
36 | ACTION_KEYS = [Key.up, Key.down, Key.page_up , Key.page_down, Key.right, Key.left]
37 |
38 |
39 |
40 | def on_press(key):
41 | global reward
42 | if key in ACTION_KEYS:
43 | s_, r, d, info = env.step(ACTIONS[ACTION_KEYS.index(key)])
44 | env.render()
45 | reward += r
46 |
47 | if d:
48 | print(info['goal_dist'], reward)
49 | env.randomize(randomized_values=["random", "random"])
50 | env.reset()
51 | reward = 0
52 |
53 | with keyboard.Listener(on_press=on_press) as listener:
54 | listener.join()
55 |
56 | env.close()
57 |
--------------------------------------------------------------------------------
/tests/08-test-mujoco-4dof-keyboard-control.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # manual
3 |
4 | """
5 | This script allows you to manually control the simulator or Duckiebot
6 | using the keyboard arrows.
7 | """
8 |
9 | import sys
10 | from pynput import keyboard
11 | from pynput.keyboard import Key
12 | import numpy as np
13 | import gym
14 | import common.envs
15 | from common.envs.wrappers import RandomizedEnvWrapper
16 |
17 | env = gym.make("ErgoReacher4DOFRandomizedHardVisual-v0")
18 | env = RandomizedEnvWrapper(env=env, seed=0)
19 |
20 | env.reset()
21 | env.render()
22 |
23 | ACTION_KEYS = [Key.up, Key.down, Key.page_up , Key.page_down, Key.right, Key.left, Key.home, Key.end, Key.alt]
24 |
25 | def on_press(key):
26 | if key == Key.tab: env.reset()
27 | if key in ACTION_KEYS:
28 | action = np.zeros(4)
29 | if key != Key.alt:
30 | index = ACTION_KEYS.index(key)
31 | multiplier = 1 if index % 2 == 0 else -1
32 |
33 | act_idx = index // 2
34 | action[act_idx] = multiplier
35 |
36 | s_, r, d, info = env.step(action)
37 | print(info)
38 | env.render()
39 | if d:
40 | env.randomize(randomized_values=["random", "random", "random"])
41 | env.reset()
42 |
43 | with keyboard.Listener(on_press=on_press) as listener:
44 | listener.join()
45 |
46 | env.close()
--------------------------------------------------------------------------------
/tests/09-test-mujoco-3dof-auto.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import time
3 |
4 | import numpy as np
5 | import gym
6 | from tqdm import tqdm
7 |
8 | import common.envs
9 | from common.envs.wrappers import RandomizedEnvWrapper
10 |
11 | env = gym.make('Pusher3DOFRandomized-v0')
12 | env = RandomizedEnvWrapper(env=env, seed=0)# env.randomize()
13 |
14 | actions = [
15 | np.array([0.0, 0.0, 1.0]),
16 | np.array([0.0, 0.0, -1.0]),
17 | np.array([0.0, 1.0, 0.0]),
18 | np.array([0.0, -1.0, 0.0]),
19 | np.array([1.0, 0.0, 0.0]),
20 | np.array([-1.0, 0.0, 0.0])
21 | ]
22 | actions.reverse()
23 |
24 | action_change_freq = 50
25 |
26 | for env_idx in tqdm(range(10)):
27 | env.reset()
28 | env.render()
29 |
30 | for action in actions:
31 |
32 | for _ in range(action_change_freq):
33 | _, _, _, _ = env.step(action)
34 | env.render()
35 | time.sleep(0.01)
36 |
37 | # print (np.min(env.unwrapped.qposes, axis=0),np.max(env.unwrapped.qposes, axis=0))
38 |
--------------------------------------------------------------------------------
/tests/10-test-mujoco-3dof-ranges.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | import time
3 |
4 | import numpy as np
5 | import gym
6 | from tqdm import tqdm
7 |
8 | import common.envs
9 | from common.envs.wrappers import RandomizedEnvWrapper
10 |
11 | env = gym.make('Pusher3DOFRandomized-v0')
12 | env = RandomizedEnvWrapper(env=env, seed=0)# env.randomize()
13 |
14 | actions = [
15 | np.array([0.0, 0.0, 1.0]),
16 | np.array([0.0, 0.0, -1.0]),
17 | np.array([0.0, 1.0, 0.0]),
18 | np.array([0.0, -1.0, 0.0]),
19 | np.array([1.0, 0.0, 0.0]),
20 | np.array([-1.0, 0.0, 0.0])
21 | ]
22 | actions.reverse()
23 |
24 | discretization = 50
25 |
26 | randomized_values = ["default", "default"]
27 | values = np.linspace(0, 1, discretization)
28 |
29 | for dim in range(2):
30 | for i in tqdm(range(discretization)):
31 | rands = randomized_values
32 | rands[dim] = values[i]
33 | env.randomize(rands)
34 | env.reset()
35 |
36 | for _ in range(50):
37 | env.step(env.action_space.sample())
38 | env.render()
39 | time.sleep(0.01)
40 |
41 | # print (np.min(env.unwrapped.qposes, axis=0),np.max(env.unwrapped.qposes, axis=0))
42 |
--------------------------------------------------------------------------------
/tests/11-test-randomized-ergoreach-halfdisk.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import gym_ergojr
3 | import time
4 | from tqdm import tqdm
5 | from common.envs.wrappers import RandomizedEnvWrapper
6 |
7 | # MODE = "MANUAL" # slow but let's you see what's happening
8 | MODE = "SPEED" # as fast as possible
9 |
10 | def no_op(x):
11 | pass
12 |
13 |
14 | if MODE == "MANUAL":
15 | env = gym.make("ErgoReacher-Halfdisk-Randomized-Graphical-v0") # looks nice
16 | timer = time.sleep
17 | else:
18 | env = gym.make("ErgoReacher-Halfdisk-Randomized-Headless-v0") # runs fast
19 | timer = no_op
20 |
21 | env = RandomizedEnvWrapper(env=env, seed=0)
22 |
23 | for _ in tqdm(range(100)):
24 | env.reset()
25 | env.randomize(randomized_values=["random"] * 8) # 8 values to randomize over
26 |
27 | while True:
28 | action = env.action_space.sample()
29 | obs, rew, done, misc = env.step(action)
30 | timer(0.05)
31 |
32 | if done:
33 | break
--------------------------------------------------------------------------------
/tests/12-test-randomized-ergoreach-backlash-halfdisk.py:
--------------------------------------------------------------------------------
1 | import gym
2 | import gym_ergojr
3 | import time
4 | from tqdm import tqdm
5 | from common.envs.wrappers import RandomizedEnvWrapper
6 |
7 | MODE = "MANUAL" # slow but let's you see what's happening
8 | # MODE = "SPEED" # as fast as possible
9 |
10 | def no_op(x):
11 | pass
12 |
13 |
14 | if MODE == "MANUAL":
15 | env = gym.make("ErgoReacher-DualGoal-Default-Graphical-v0") # looks nice
16 | timer = time.sleep
17 | else:
18 | env = gym.make("ErgoReacher-Halfdisk-Backlash-Randomized-Headless-v0") # runs fast
19 | timer = no_op
20 |
21 | env = RandomizedEnvWrapper(env=env, seed=0)
22 |
23 | for _ in tqdm(range(100)):
24 | env.reset()
25 | env.randomize(randomized_values=["random"] * 8) # 8 values to randomize over
26 |
27 | while True:
28 | action = env.action_space.sample()
29 | # action = [-1,0,0,0]
30 | obs, rew, done, misc = env.step(action)
31 | timer(0.05)
32 |
33 | if done:
34 | break
--------------------------------------------------------------------------------
/tests/13-test-randomized-humanoid.py:
--------------------------------------------------------------------------------
1 | # /usr/bin/env python
2 |
3 | import time
4 | import gym
5 | import common.envs
6 | from common.envs.wrappers import RandomizedEnvWrapper
7 |
8 | env = gym.make('HumanoidRandomizedEnv-v0')
9 | env = RandomizedEnvWrapper(env=env, seed=0)
10 |
11 | reward = 0.
12 |
13 | env.randomize(randomized_values=["random", "random", "random", "random", "random", "random"])
14 | env.reset()
15 | env.render()
16 |
17 | d = False
18 | while True:
19 | s_, r, d, info = env.step(env.action_space.sample())
20 | time.sleep(0.1)
21 |
22 | if d:
23 | env.randomize(
24 | randomized_values=["random", "random", "random", "random", "random", "random"])
25 | env.reset()
26 | env.render()
27 |
--------------------------------------------------------------------------------
/tests/14-test-randomized-halfcheetah.py:
--------------------------------------------------------------------------------
1 | # /usr/bin/env python
2 |
3 | import time
4 | import gym
5 | import common.envs
6 | from common.envs.wrappers import RandomizedEnvWrapper
7 |
8 | env = gym.make('HalfCheetahRandomizedEnv-v0')
9 | env = RandomizedEnvWrapper(env=env, seed=0)
10 |
11 | reward = 0.
12 |
13 | env.randomize(randomized_values=["random", "random", "random", "random"])
14 | env.reset()
15 | env.render()
16 |
17 | d = False
18 | t = 0
19 | while True:
20 | s_, r, d, info = env.step(env.action_space.sample())
21 | t += 1
22 |
23 | if d or t > 1000:
24 | t = 0
25 | env.randomize(
26 | randomized_values=["random", "random", "random", "random"])
27 | env.reset()
28 | env.render()
29 |
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/active-domainrand/e6dc550734f9e5946d3300f26d21412f3ebd418c/tests/__init__.py
--------------------------------------------------------------------------------