├── .gitignore
├── readme.md
├── requirements.txt
├── result
    └── slbo.png
├── setup.py
└── slbo
    ├── __init__.py
    ├── algos
        ├── __init__.py
        ├── mbrl
        │   ├── __init__.py
        │   └── slbo.py
        └── mfrl
        │   ├── __init__.py
        │   ├── ppo.py
        │   └── trpo.py
    ├── configs
        ├── __init__.py
        ├── config.py
        ├── slbo_config.yaml
        └── trpo_config.yaml
    ├── envs
        ├── __init__.py
        ├── mujoco
        │   ├── __init__.py
        │   ├── gym
        │   │   ├── __init__.py
        │   │   ├── ant_env.py
        │   │   ├── half_cheetah_env.py
        │   │   ├── hopper_env.py
        │   │   ├── swimmer_env.py
        │   │   └── walker2d_env.py
        │   ├── mujoco_envs.py
        │   └── rllab
        │   │   ├── __init__.py
        │   │   ├── ant_env.py
        │   │   ├── half_cheetah_env.py
        │   │   ├── hopper_env.py
        │   │   ├── humanoid_env.py
        │   │   ├── mujoco_models
        │   │       ├── ant.xml
        │   │       ├── green_ball.xml
        │   │       ├── half_cheetah.xml
        │   │       ├── hill_ant_env.xml.mako
        │   │       ├── hill_half_cheetah_env.xml.mako
        │   │       ├── hill_hopper_env.xml.mako
        │   │       ├── hill_swimmer3d_env.xml.mako
        │   │       ├── hill_walker2d_env.xml.mako
        │   │       ├── hopper.xml
        │   │       ├── humanoid.xml
        │   │       ├── inverted_double_pendulum.xml
        │   │       ├── inverted_double_pendulum.xml.mako
        │   │       ├── point.xml
        │   │       ├── red_ball.xml
        │   │       ├── simple_humanoid.xml
        │   │       ├── swimmer.xml
        │   │       ├── swimmer3d.xml
        │   │       ├── utils.mako
        │   │       └── walker2d.xml
        │   │   ├── rllab_ant.xml
        │   │   ├── rllab_half_cheetah.xml
        │   │   ├── rllab_hopper.xml
        │   │   ├── rllab_simple_humanoid.xml
        │   │   ├── rllab_swimmer.xml
        │   │   ├── rllab_walker2d.xml
        │   │   ├── swimmer_env.py
        │   │   └── walker2d_env.py
        ├── virtual_env.py
        └── wrapped_envs.py
    ├── misc
        ├── __init__.py
        ├── distributions.py
        ├── logger.py
        ├── ou_noise.py
        ├── param.py
        └── utils.py
    ├── models
        ├── __init__.py
        ├── actor.py
        ├── actor_critic.py
        ├── actor_layer.py
        ├── critic.py
        ├── dynamics.py
        ├── initializer.py
        ├── normalizers.py
        └── utils.py
    ├── scripts
        ├── run_slbo.py
        └── run_trpo.py
    └── storages
        ├── __init__.py
        ├── off_policy_buffer.py
        └── on_policy_buffer.py


/.gitignore:
--------------------------------------------------------------------------------
1 | /envs/mujoco/rllab/mujoco_models/
2 | /.idea
3 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # SLBO_PyTorch
 2 | A PyTorch reimplementation of SLBO
 3 | 
 4 | # Dependency
 5 | 
 6 | Please refer to ./requirements.txt.
 7 | 
 8 | # Usage
 9 | 
10 |     python ./scripts/run_slbo.py
11 |     
12 |   hyperparams in ./configs/slbo_config.yaml
13 | 
14 | # Result
15 | 
16 | ![results on modified hopper](./result/slbo.png)
17 | 
18 | # Credits
19 | 1. [kostrikov/pytorch-trpo](https://github.com/ikostrikov/pytorch-trpo)
20 | 2. [facebookresearch/slbo](https://github.com/facebookresearch/slbo)
21 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | atari-py==0.2.6
 2 | attrs==19.3.0
 3 | box2d-py==2.3.8
 4 | cloudpickle==1.2.1
 5 | colorama==0.4.3
 6 | filelock==3.0.12
 7 | gtimer==1.0.0b5
 8 | gym~=0.17.2
 9 | matplotlib==3.1.2
10 | more-itertools==8.0.2
11 | mpi4py==3.0.3
12 | mujoco-py==2.0.2.5
13 | multidict==4.7.5
14 | munch==2.5.0
15 | numpy~=1.18.5
16 | pybullet==2.6.1
17 | PyYAML~=5.3.1
18 | stable-baselines==2.10.0
19 | tensorboard==1.14.0
20 | tensorflow==1.14.0
21 | tensorflow-estimator==1.14.0
22 | termcolor==1.1.0
23 | torch~=1.6.0
24 | scipy~=1.5.1
25 | tqdm~=4.46.1
26 | 


--------------------------------------------------------------------------------
/result/slbo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangsy/slbo_pytorch/7b1283968a82c939725c2705e7315d1f3759ee29/result/slbo.png


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages
 2 | from setuptools import setup
 3 | 
 4 | setup(
 5 |     name='slbo_pytorch',
 6 |     auther='Shengyi Jiang',
 7 |     author_email='shengyi.jiang@outlook.com',
 8 |     packages=find_packages(),
 9 |     package_data={},
10 |     install_requires=[
11 |         'torch>=1.4.0',
12 |         'gym>=0.17.0',
13 |         'numpy',
14 |         'stable_baselines',
15 |         'pyglib',
16 |         'scipy',
17 |     ])
18 | 


--------------------------------------------------------------------------------
/slbo/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangsy/slbo_pytorch/7b1283968a82c939725c2705e7315d1f3759ee29/slbo/__init__.py


--------------------------------------------------------------------------------
/slbo/algos/__init__.py:
--------------------------------------------------------------------------------
1 | from slbo.algos.mbrl.slbo import SLBO
2 | from slbo.algos.mfrl import PPO, TRPO
3 | 


--------------------------------------------------------------------------------
/slbo/algos/mbrl/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangsy/slbo_pytorch/7b1283968a82c939725c2705e7315d1f3759ee29/slbo/algos/mbrl/__init__.py


--------------------------------------------------------------------------------
/slbo/algos/mbrl/slbo.py:
--------------------------------------------------------------------------------
 1 | from operator import itemgetter
 2 | import torch
 3 | 
 4 | from slbo.models.dynamics import Dynamics
 5 | from slbo.models.normalizers import Normalizers
 6 | 
 7 | 
 8 | class SLBO:
 9 |     def __init__(self, dynamics: Dynamics, normalizers: Normalizers, batch_size: int, num_updates: int,
10 |                  num_rollout_steps, l2_reg_coef, lr, max_grad_norm=2):
11 |         self.dynamics = dynamics
12 |         self.normalizers = normalizers
13 | 
14 |         self.num_updates = num_updates
15 |         self.num_rollout_steps = num_rollout_steps
16 |         self.batch_size = batch_size
17 |         self.l2_reg_coef = l2_reg_coef
18 |         self.max_grad_norm = max_grad_norm
19 | 
20 |         self.dynamics_optimizer = torch.optim.Adam(self.dynamics.parameters(), lr)
21 | 
22 |     def update(self, model_buffer) -> dict:
23 | 
24 |         gen = model_buffer.get_sequential_batch_generator(self.batch_size, self.num_rollout_steps)
25 | 
26 |         model_loss_epoch = 0.
27 |         l2_loss_epoch = 0.
28 |         for _ in range(self.num_updates):
29 |             try:
30 |                 state_sequences, action_sequences, next_state_sequences, mask_sequences = \
31 |                     itemgetter(*['states', 'actions', 'next_states', 'masks'])(next(gen))
32 |             except StopIteration:
33 |                 gen = model_buffer.get_sequential_batch_generator(self.batch_size, self.num_rollout_steps)
34 |                 state_sequences, action_sequences, next_state_sequences, mask_sequences = \
35 |                     itemgetter(*['states', 'actions', 'next_states', 'masks'])(next(gen))
36 | 
37 |             cur_states = state_sequences[:, 0]
38 |             model_loss = 0.
39 | 
40 |             for i in range(self.num_rollout_steps):
41 |                 next_states = self.dynamics(cur_states, action_sequences[:, i])
42 |                 diffs = next_states - cur_states - next_state_sequences[:, i] + state_sequences[:, i]
43 |                 weighted_diffs = diffs / torch.clamp(self.normalizers.diff_normalizer.std, min=1e-6)
44 |                 model_loss += weighted_diffs.pow(2).mean(-1).sqrt()
45 | 
46 |                 if i < self.num_rollout_steps - 1:
47 |                     cur_states = state_sequences[:, i + 1] + \
48 |                                  mask_sequences[:, i] * (next_states - state_sequences[:, i + 1])
49 | 
50 |             model_loss = model_loss.mean() / self.num_rollout_steps
51 |             params = self.dynamics.parameters()
52 |             l2_loss = self.l2_reg_coef * torch.stack([torch.norm(t, p=2) for t in params]).sum()
53 | 
54 |             model_loss_epoch += model_loss.item()
55 |             l2_loss_epoch += l2_loss.item()
56 | 
57 |             self.dynamics_optimizer.zero_grad()
58 |             (model_loss + l2_loss).backward()
59 |             torch.nn.utils.clip_grad_norm_(self.dynamics.parameters(), self.max_grad_norm)
60 |             self.dynamics_optimizer.step()
61 | 
62 |         model_loss_epoch /= self.num_updates
63 |         return {'model_loss': model_loss_epoch, 'l2_loss': l2_loss_epoch}
64 | 


--------------------------------------------------------------------------------
/slbo/algos/mfrl/__init__.py:
--------------------------------------------------------------------------------
1 | from slbo.algos.mfrl.ppo import PPO
2 | from slbo.algos.mfrl.trpo import TRPO


--------------------------------------------------------------------------------
/slbo/algos/mfrl/ppo.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.optim as optim
 4 | 
 5 | from slbo.models.actor_critic import ActorCritic
 6 | 
 7 | 
 8 | class PPO:
 9 |     def __init__(self, actor_critic: ActorCritic, clip_param: float, num_grad_updates: int, batch_size: int,
10 |                  value_loss_coef: float, entropy_coef: float, lr: float = None, max_grad_norm: float = None,
11 |                  use_clipped_value_loss=True, verbose=0):
12 |         self.actor_critic = actor_critic
13 | 
14 |         self.clip_param = clip_param
15 |         self.num_grad_updates = num_grad_updates
16 |         self.batch_size = batch_size
17 |         self.value_loss_coef = value_loss_coef
18 |         self.entropy_coef = entropy_coef
19 | 
20 |         self.max_grad_norm = max_grad_norm
21 |         self.use_clipped_value_loss = use_clipped_value_loss
22 | 
23 |         self.optimizer = optim.Adam(actor_critic.parameters(), lr=lr)
24 | 
25 |         self.verbose = verbose
26 | 
27 |     def update(self, policy_buffer) -> dict:
28 |         advantage = policy_buffer.returns[:-1] - policy_buffer.values[:-1]
29 |         advantage = (advantage - advantage.mean()) / (advantage.std() + 1e-5)
30 | 
31 |         value_loss_epoch = 0
32 |         action_loss_epoch = 0
33 |         dist_entropy_epoch = 0
34 | 
35 |         for _ in range(self.num_grad_updates):
36 | 
37 |             data_generator = policy_buffer.get_batch_generator(self.batch_size, advantage)
38 | 
39 |             for sample in data_generator:
40 |                 states, actions, value_preds, returns, old_action_log_probs, adv_targets = \
41 |                     sample['states'], sample['actions'], sample['values'], \
42 |                     sample['returns'], sample['action_log_probs'], sample['adv_targets']
43 | 
44 |                 values, action_log_probs, dist_entropy = self.actor_critic.evaluate_action(states, actions)
45 | 
46 |                 ratio = torch.exp(action_log_probs - old_action_log_probs)
47 |                 surr1 = ratio * adv_targets
48 |                 surr2 = torch.clamp(ratio, 1.0 - self.clip_param, 1.0 + self.clip_param) * adv_targets
49 | 
50 |                 action_loss = -torch.min(surr1, surr2).mean()
51 | 
52 |                 if self.use_clipped_value_loss:
53 |                     value_pred_clipped = value_preds + \
54 |                                          (values - value_preds).clamp(-self.clip_param, self.clip_param)
55 |                     value_losses = (values - returns).pow(2)
56 |                     value_losses_clipped = (
57 |                             value_pred_clipped - returns).pow(2)
58 |                     value_loss = 0.5 * torch.max(value_losses,
59 |                                                  value_losses_clipped).mean()
60 |                 else:
61 |                     value_loss = 0.5 * (returns - values).pow(2).mean()
62 | 
63 |                 self.optimizer.zero_grad()
64 |                 (value_loss * self.value_loss_coef + action_loss -
65 |                  dist_entropy * self.entropy_coef).backward()
66 |                 nn.utils.clip_grad_norm_(self.actor_critic.parameters(),
67 |                                          self.max_grad_norm)
68 |                 self.optimizer.step()
69 | 
70 |                 value_loss_epoch += value_loss.item()
71 |                 action_loss_epoch += action_loss.item()
72 |                 dist_entropy_epoch += dist_entropy.item()
73 | 
74 |         num_updates = self.num_grad_updates * self.batch_size
75 | 
76 |         value_loss_epoch /= num_updates
77 |         action_loss_epoch /= num_updates
78 |         dist_entropy_epoch /= num_updates
79 | 
80 |         return {'value_loss': value_loss_epoch, 'action_loss': action_loss_epoch,
81 |                 'dist_entropy': dist_entropy_epoch}
82 | 


--------------------------------------------------------------------------------
/slbo/algos/mfrl/trpo.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.optimize
  3 | import torch
  4 | 
  5 | from slbo.models import Actor, VCritic
  6 | from slbo.models.utils import get_flat_params, set_flat_params, get_flat_grad
  7 | try:
  8 |     from slbo.misc import logger
  9 | except ImportError:
 10 |     from stable_baselines import logger
 11 | 
 12 | 
 13 | # noinspection DuplicatedCode
 14 | class TRPO:
 15 |     def __init__(self, actor: Actor, critic: VCritic, max_kld=1e-2, l2_reg_coef=1e-3, damping=0.1,
 16 |                  entropy_coef=0., line_search_accepted_ratio=0.1, verbose=0):
 17 | 
 18 |         self.actor = actor
 19 |         self.critic = critic
 20 | 
 21 |         self.max_kld = max_kld
 22 |         self.l2_reg = l2_reg_coef
 23 |         self.damping = damping
 24 |         self.linesearch_accepted_ratio = line_search_accepted_ratio
 25 |         self.entropy_coef = entropy_coef
 26 | 
 27 |         self.verbose = verbose
 28 | 
 29 |     @staticmethod
 30 |     def get_conjugate_gradient(Avp, b, nsteps, residual_tol=1e-10):
 31 |         x = torch.zeros_like(b)
 32 |         r = b.clone()
 33 |         p = b.clone()
 34 |         rdotr = torch.dot(r, r)
 35 |         for i in range(nsteps):
 36 |             _Avp = Avp(p)
 37 |             alpha = rdotr / torch.dot(p, _Avp)
 38 |             x += alpha * p
 39 |             r -= alpha * _Avp
 40 |             new_rdotr = torch.dot(r, r)
 41 |             beta = new_rdotr / rdotr
 42 |             p = r + beta * p
 43 |             rdotr = new_rdotr
 44 |             if rdotr < residual_tol:
 45 |                 break
 46 |         return x
 47 | 
 48 |     def linesearch(self, f, init_params, fullstep, expected_improve_rate, max_backtracks=10):
 49 |         with torch.no_grad():
 50 |             fval = f()
 51 |             for (_n_backtracks, stepfrac) in enumerate(.5 ** np.arange(max_backtracks)):
 52 |                 new_params = init_params + stepfrac * fullstep
 53 |                 set_flat_params(self.actor, new_params)
 54 |                 newfval = f()
 55 |                 actual_improve = fval - newfval
 56 |                 expected_improve = expected_improve_rate * stepfrac
 57 |                 ratio = actual_improve / expected_improve
 58 |                 if self.verbose > 0:
 59 |                     logger.log("a/e/r ", actual_improve.item(), expected_improve.item(), ratio.item())
 60 |                 if ratio.item() > self.linesearch_accepted_ratio and actual_improve.item() > 0:
 61 |                     return True, new_params
 62 |             return False, init_params
 63 | 
 64 |     # noinspection DuplicatedCode
 65 |     def update_critic(self, states, targets):
 66 |         def get_value_loss(params):
 67 |             set_flat_params(self.critic, torch.tensor(params))
 68 |             for param in self.critic.parameters():
 69 |                 if param.grad is not None:
 70 |                     param.grad.data.fill_(0)
 71 | 
 72 |             values = self.critic(states)
 73 |             value_loss_ = (values - targets).pow(2).mean()
 74 | 
 75 |             loss = value_loss_
 76 |             for param in self.critic.parameters():
 77 |                 loss += param.pow(2).sum() * self.l2_reg
 78 |             loss.backward()
 79 |             return loss.data.cpu().double().numpy(), get_flat_grad(self.critic).data.cpu().double().numpy()
 80 | 
 81 |         flat_params, value_loss, _ = scipy.optimize.fmin_l_bfgs_b(get_value_loss,
 82 |                                                                   get_flat_params(self.critic).cpu().double().numpy(),
 83 |                                                                   maxiter=25)
 84 |         set_flat_params(self.critic, torch.tensor(flat_params))
 85 |         return value_loss
 86 | 
 87 |     def update(self, policy_buffer) -> dict:
 88 |         advantages = policy_buffer.returns[:-1] - policy_buffer.values[:-1]
 89 |         advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-5)
 90 | 
 91 |         data_generator = policy_buffer.get_batch_generator(advantages=advantages)
 92 | 
 93 |         value_loss_epoch = 0.
 94 |         action_loss_epoch = 0.
 95 | 
 96 |         for sample in data_generator:
 97 |             states, actions, returns, adv_targets = \
 98 |                 sample['states'], sample['actions'], sample['returns'], sample['adv_targets']
 99 | 
100 |             value_loss = self.update_critic(states, returns)
101 |             fixed_log_prob = self.actor.evaluate_action(states, actions)[0].detach()
102 | 
103 |             def get_action_loss():
104 |                 log_prob, entropy = self.actor.evaluate_action(states, actions)
105 |                 action_loss_ = - adv_targets * torch.exp(log_prob - fixed_log_prob) - self.entropy_coef * entropy
106 |                 return action_loss_.mean()
107 | 
108 |             def get_kl():
109 |                 *_, action_means, action_logstds, action_stds = self.actor.act(states)
110 | 
111 |                 fixed_action_means = action_means.detach()
112 |                 fixed_action_logstds = action_logstds.detach()
113 |                 fixed_action_stds = action_stds.detach()
114 |                 kl = action_logstds - fixed_action_logstds + \
115 |                      (fixed_action_stds.pow(2) + (fixed_action_means - action_means).pow(2)) / \
116 |                      (2.0 * action_stds.pow(2)) - 0.5
117 |                 return kl.sum(1, keepdim=True)
118 | 
119 |             action_loss = get_action_loss()
120 |             action_loss_grad = torch.autograd.grad(action_loss, self.actor.parameters())
121 |             flat_action_loss_grad = torch.cat([grad.view(-1) for grad in action_loss_grad]).data
122 | 
123 |             def Fvp(v):
124 |                 kl = get_kl()
125 |                 kl = kl.mean()
126 | 
127 |                 kld_grad = torch.autograd.grad(kl, self.actor.parameters(), create_graph=True)
128 |                 flat_kld_grad = torch.cat([grad.view(-1) for grad in kld_grad])
129 | 
130 |                 kl_v = (flat_kld_grad * v).sum()
131 |                 kld_grad_grad = torch.autograd.grad(kl_v, self.actor.parameters())
132 |                 flat_kld_grad_grad = torch.cat([grad.contiguous().view(-1) for grad in kld_grad_grad]).data
133 | 
134 |                 return flat_kld_grad_grad + v * self.damping
135 | 
136 |             stepdir = self.get_conjugate_gradient(Fvp, -flat_action_loss_grad, 10)
137 | 
138 |             shs = 0.5 * (stepdir * Fvp(stepdir)).sum(0)
139 | 
140 |             lm = torch.sqrt(shs / self.max_kld)
141 |             fullstep = stepdir / lm
142 | 
143 |             neggdotstepdir = (-flat_action_loss_grad * stepdir).sum(0, keepdim=True)
144 |             if self.verbose > 0:
145 |                 logger.info(("lagrange multiplier:", lm, "grad_norm:", flat_action_loss_grad.norm()))
146 | 
147 |             prev_params = get_flat_params(self.actor)
148 |             success, new_params = self.linesearch(get_action_loss, prev_params, fullstep, neggdotstepdir / lm)
149 |             set_flat_params(self.actor, new_params)
150 | 
151 |             value_loss_epoch += value_loss
152 |             action_loss_epoch += action_loss.item()
153 | 
154 |         return {'action_loss': action_loss_epoch, 'value_loss': value_loss_epoch}
155 | 


--------------------------------------------------------------------------------
/slbo/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangsy/slbo_pytorch/7b1283968a82c939725c2705e7315d1f3759ee29/slbo/configs/__init__.py


--------------------------------------------------------------------------------
/slbo/configs/config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import yaml
 4 | from munch import DefaultMunch
 5 | from yaml import Loader
 6 | import collections
 7 | 
 8 | try:
 9 |     from slbo.misc import logger
10 | except ImportError:
11 |     from stable_baselines import logger
12 | 
13 | 
14 | def flatten(d, parent_key='', sep='.'):
15 |     items = []
16 |     for k, v in d.items():
17 |         new_key = parent_key + sep + k if parent_key else k
18 |         if isinstance(v, collections.MutableMapping):
19 |             items.extend(flatten(v, new_key, sep=sep).items())
20 |         else:
21 |             items.append((new_key, str(v)))
22 |     return dict(items)
23 | 
24 | 
25 | class Config:
26 |     def __new__(cls, config_path='config.yaml'):
27 |         if not config_path.startswith('/'):
28 |             config_path = os.path.join(os.path.dirname(__file__), config_path)
29 |         logger.info('Loading configs from {}.'.format(config_path))
30 |         with open(config_path, 'r', encoding='utf-8') as f:
31 |             config_dict = yaml.load(f, Loader=Loader)
32 |         config = DefaultMunch.fromDict(config_dict, object())
33 |         config_dict = flatten(config_dict)
34 |         logged_config_dict = {}
35 |         for key, value in config_dict.items():
36 |             if key.find('.') >= 0:
37 |                 logged_config_dict[key] = value
38 |         return config, logged_config_dict
39 | 


--------------------------------------------------------------------------------
/slbo/configs/slbo_config.yaml:
--------------------------------------------------------------------------------
 1 | mf_algo: 'trpo'
 2 | proj_dir: '/home/liuxh/Documents/slbo'
 3 | result_dir: './result'
 4 | use_cuda: True
 5 | seed: 0
 6 | verbose: 0
 7 | model_load_path: ~
 8 | buffer_load_path: ~
 9 | save_freq: 2
10 | eval_freq: 1
11 | 
12 | env:
13 |   env_name: 'Hopper-v2'
14 |   num_real_envs: 1
15 |   num_virtual_envs: 8
16 |   gamma: 0.99
17 |   max_episode_steps: 500
18 | 
19 | ou_noise:
20 |   theta: 0.15
21 |   sigma: 0.3
22 | 
23 | trpo:
24 |   entropy_coef: 0.005
25 |   max_kld: 0.01
26 |   num_env_steps: 500 # 500 x 8 = 4000
27 |   critic_hidden_dims: [64, 64]
28 |   actor_hidden_dims: [32, 32]
29 |   use_limited_ent_actor: True
30 |   use_gae: True
31 |   gae_lambda: 0.95
32 |   use_proper_time_limits: True
33 |   log_interval: 5
34 |   l2_reg_coef : 0.
35 |   norm_reward: False
36 | 
37 | slbo:
38 |   num_env_steps: 4000
39 |   num_epochs: 100 # collect num_env_steps per epoch
40 |   num_iters: 20 # number of iteration per epoch
41 |   num_model_updates: 100 # number of model updates per iteration
42 |   num_policy_updates: 40 # number of policy updates per iteration
43 |   use_prev_data: True
44 |   dynamics_hidden_dims: [500, 500]
45 |   num_rollout_steps: 2
46 |   batch_size: 128
47 |   buffer_size: 200000
48 |   lr: 0.001
49 |   l2_reg_coef: 0.00001
50 |   log_interval: 1
51 |   start_strategy: 'reset' # choose from 'reset' and 'buffer'
52 | 


--------------------------------------------------------------------------------
/slbo/configs/trpo_config.yaml:
--------------------------------------------------------------------------------
 1 | mf_algo: 'trpo'
 2 | proj_dir: '/home/polixir/jiangsy/slbo'
 3 | result_dir: './result'
 4 | use_cuda: False
 5 | seed: 0
 6 | verbose: 0
 7 | model_load_path: ~
 8 | buffer_load_path: ~
 9 | save_interval: 10
10 | log_interval: 1
11 | eval_interval: 10
12 | 
13 | env:
14 |   env_name: 'Hopper-v2'
15 |   num_envs: 1
16 |   gamma: 0.99
17 | 
18 | ou_noise:
19 |   theta: 0.15
20 |   sigma: 0.3
21 | 
22 | trpo:
23 |   total_env_steps: 2000000
24 |   entropy_coef: 0.
25 |   max_kld: 0.01
26 |   num_env_steps: 2048
27 |   critic_hidden_dims: [64, 64]
28 |   actor_hidden_dims: [64, 64]
29 |   use_gae: True
30 |   gae_lambda: 0.95
31 |   use_proper_time_limits: True
32 | 


--------------------------------------------------------------------------------
/slbo/envs/__init__.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import gym
 3 | import numpy as np
 4 | import torch
 5 | from stable_baselines import logger
 6 | 
 7 | from slbo.storages.off_policy_buffer import OffPolicyBuffer
 8 | 
 9 | 
10 | class BaseBatchedEnv(gym.Env, abc.ABC):
11 |     n_envs: int
12 | 
13 |     @abc.abstractmethod
14 |     def step(self, actions):
15 |         pass
16 | 
17 |     def reset(self):
18 |         return self.partial_reset(range(self.n_envs))
19 | 
20 |     @abc.abstractmethod
21 |     def partial_reset(self, indices):
22 |         pass
23 | 
24 |     def set_state(self, state):
25 |         logger.warn('`set_state` is not implemented')
26 | 
27 | 
28 | class BaseModelBasedEnv(gym.Env, abc.ABC):
29 |     @abc.abstractmethod
30 |     def mb_step(self, states: np.ndarray, actions: np.ndarray, next_states: np.ndarray):
31 |         raise NotImplementedError
32 | 
33 |     def verify(self, n=2000, eps=1e-4):
34 |         buffer = OffPolicyBuffer(n, self.observation_space.shape, 1, self.action_space)
35 |         state = self.reset()
36 |         for _ in range(n):
37 |             action = self.action_space.sample()
38 |             next_state, reward, done, _ = self.step(action)
39 | 
40 |             mask = torch.tensor([0.0] if done else [1.0], dtype=torch.float32)
41 | 
42 |             buffer.insert(torch.tensor(state), torch.tensor(action), torch.tensor(reward),
43 |                           torch.tensor(next_state), torch.tensor(mask))
44 | 
45 |             state = next_state
46 |             if done:
47 |                 state = self.reset()
48 | 
49 |         rewards_, dones_ = self.mb_step(buffer.states.numpy(), buffer.actions.numpy(), buffer.next_states.numpy())
50 |         diff = (buffer.rewards.numpy() - rewards_[:, np.newaxis]) * buffer.masks.numpy()
51 |         l_inf = np.abs(diff).max()
52 |         logger.info('reward difference: %.6f', l_inf)
53 | 
54 |         assert np.allclose(dones_, buffer.masks), 'reward model is inaccurate'
55 |         assert l_inf < eps, 'done model is inaccurate'
56 | 
57 |     def seed(self, seed: int = None):
58 |         pass
59 | 
60 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangsy/slbo_pytorch/7b1283968a82c939725c2705e7315d1f3759ee29/slbo/envs/mujoco/__init__.py


--------------------------------------------------------------------------------
/slbo/envs/mujoco/gym/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangsy/slbo_pytorch/7b1283968a82c939725c2705e7315d1f3759ee29/slbo/envs/mujoco/gym/__init__.py


--------------------------------------------------------------------------------
/slbo/envs/mujoco/gym/ant_env.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.mujoco import ant
 2 | import numpy as np
 3 | from stable_baselines import logger
 4 | 
 5 | from slbo.envs import BaseModelBasedEnv
 6 | 
 7 | 
 8 | # noinspection DuplicatedCode
 9 | class AntEnv(ant.AntEnv, BaseModelBasedEnv):
10 |     def __init__(self, use_approximated_vel=True):
11 |         logger.warn('Modified Gym Envs!')
12 |         self.rescale_action = False
13 |         self.use_approximated_vel = use_approximated_vel
14 | 
15 |         ant.AntEnv.__init__(self)
16 |         BaseModelBasedEnv.__init__(self)
17 | 
18 |     def get_body_xmat(self, body_name):
19 |         return self.sim.data.get_body_xmat(body_name)
20 | 
21 |     def get_body_comvel(self, body_name):
22 |         return self.sim.data.get_body_xvelp(body_name)
23 | 
24 |     def _get_obs(self):
25 |         return np.concatenate([
26 |             self.sim.data.qpos.flat,  # 15
27 |             self.sim.data.qvel.flat,  # 14
28 |             self.get_body_xmat("torso").flat,  # 9
29 |             self.get_body_com("torso"),  # 9
30 |             self.get_body_comvel("torso"),  # 3
31 |         ]).reshape(-1)
32 | 
33 |     def step(self, action):
34 |         pre_pos = self.sim.data.qpos[0]
35 |         self.do_simulation(action, self.frame_skip)
36 |         post_pos = self.sim.data.qpos[0]
37 |         if self.use_approximated_vel:
38 |             fwd_reward = (post_pos - pre_pos) / self.dt
39 |         else:
40 |             fwd_reward = self.get_body_comvel('torso')[0]
41 |         ctrl_reward = - .5 * np.square(action).sum()
42 |         # make sure the reward can be recovered from state and action completely
43 |         contact_reward = - 0.
44 |         survive_reward = 1.0
45 |         reward = fwd_reward + ctrl_reward + contact_reward + survive_reward
46 |         state = self.state_vector()
47 |         done = not(np.isfinite(state).all() and 0.2 <= state[2] <= 1.0)
48 |         ob = self._get_obs()
49 |         return ob, reward, done, {}
50 | 
51 |     def mb_step(self, states: np.ndarray, actions: np.ndarray, next_states: np.ndarray):
52 |         if self.use_approximated_vel:
53 |             reward_forward = (next_states[:, 0] - states[:, 0]) / self.dt
54 |         else:
55 |             reward_forward = next_states[..., -3]
56 | 
57 |         ctrl_cost = .5 * np.square(actions).sum(-1)
58 |         contact_cost = 0.
59 |         survive_reward = 1.0
60 |         reward = reward_forward - ctrl_cost - contact_cost + survive_reward
61 |         notdone = np.all(0.2 <= next_states[..., 2] <= 1.0, axis=0)
62 |         return reward, 1. - notdone
63 | 
64 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/gym/half_cheetah_env.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.mujoco import half_cheetah
 2 | import numpy as np
 3 | from stable_baselines import logger
 4 | 
 5 | from slbo.envs import BaseModelBasedEnv
 6 | 
 7 | 
 8 | # noinspection DuplicatedCode
 9 | class HalfCheetahEnv(half_cheetah.HalfCheetahEnv, BaseModelBasedEnv):
10 |     def __init__(self, use_approximated_vel=True):
11 |         self.use_approximated_vel = use_approximated_vel
12 |         self.rescale_action = False
13 |         if not self.use_approximated_vel:
14 |             logger.warn('Modified Gym Env!')
15 | 
16 |         half_cheetah.HalfCheetahEnv.__init__(self)
17 |         BaseModelBasedEnv.__init__(self)
18 | 
19 |     def get_body_comvel(self, body_name):
20 |         return self.sim.data.get_body_xvelp(body_name)
21 | 
22 |     def _get_obs(self):
23 |         return np.concatenate([
24 |             self.model.data.qpos.flat,  # 9
25 |             self.model.data.qvel.flat,  # 9
26 |             self.get_body_com("torso").flat,  # 3
27 |             self.get_body_comvel("torso").flat,  # 3
28 |         ])
29 | 
30 |     def step(self, action: np.ndarray):
31 |         pre_pos = self.sim.data.qpos[0]
32 |         self.do_simulation(action, self.frame_skip)
33 |         post_pos = self.sim.data.qpos[0]
34 |         if self.use_approximated_vel:
35 |             fwd_reward = (post_pos - pre_pos) / self.dt
36 |         else:
37 |             fwd_reward = self.get_body_comvel('torso')[0]
38 |         ctrl_reward = - 0.1 * np.square(action).sum()
39 |         reward = ctrl_reward + fwd_reward
40 |         obs = self._get_obs()
41 |         return obs, reward, False, {}
42 | 
43 |     def mb_step(self, states, actions, next_states):
44 |         ctrl_rewards = - 0.1 * np.square(actions).sum(-1)
45 |         if self.use_approximated_vel:
46 |             fwd_rewards = (next_states[:, 0] - states[:, 0]) / self.dt
47 |         else:
48 |             fwd_rewards = next_states[:, 21]
49 |         rewards = fwd_rewards + ctrl_rewards
50 |         return rewards, np.zeros_like(rewards, dtype=np.bool)
51 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/gym/hopper_env.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.mujoco import hopper
 2 | import numpy as np
 3 | from stable_baselines import logger
 4 | 
 5 | from slbo.envs import BaseModelBasedEnv
 6 | 
 7 | 
 8 | # noinspection DuplicatedCode
 9 | class HopperEnv(hopper.HopperEnv, BaseModelBasedEnv):
10 |     def __init__(self, use_approximated_vel=True):
11 |         self.use_approximated_vel = use_approximated_vel
12 |         self.rescale_action = False
13 | 
14 |         if not self.use_approximated_vel:
15 |             logger.warn('Modified Gym Env!')
16 |         hopper.HopperEnv.__init__(self)
17 |         BaseModelBasedEnv.__init__(self)
18 | 
19 |     def get_body_comvel(self, body_name):
20 |         return self.sim.data.get_body_xvelp(body_name)
21 | 
22 |     def _get_obs(self):
23 |         return np.concatenate([
24 |             self.sim.data.qpos.flat,  # 6
25 |             self.sim.data.qvel.flat,  # 6
26 |             self.get_body_com("torso").flat,  # 3
27 |             self.get_body_comvel("torso").flat,  # 3
28 |         ])
29 | 
30 |     def step(self, action):
31 |         pre_pos = self.sim.data.qpos[0]
32 |         self.do_simulation(action, self.frame_skip)
33 |         post_pos, height, ang = self.sim.data.qpos[0:3]
34 |         if self.use_approximated_vel:
35 |             fwd_reward = (post_pos - pre_pos) / self.dt
36 |         else:
37 |             fwd_reward = self.get_body_comvel('torso')[0]
38 |         survive_reward = 1.0
39 |         ctrl_reward = -1e-3 * np.square(action).sum()
40 |         reward = fwd_reward + survive_reward + ctrl_reward
41 |         s = self.state_vector()
42 |         done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and
43 |                     (height > .7) and (abs(ang) < .2))
44 |         ob = self._get_obs()
45 |         return ob, reward, done, {}
46 | 
47 |     def mb_step(self, states, actions, next_states):
48 |         if self.use_approximated_vel:
49 |             fwd_reward = (next_states[:, 0] - states[:, 0]) / self.dt
50 |         else:
51 |             fwd_reward = next_states[:, -3]
52 | 
53 |         survive_reward = 1.0
54 |         ctrl_reward = -1e-3 * np.square(actions).sum(-1)
55 | 
56 |         reward = fwd_reward + survive_reward + ctrl_reward
57 | 
58 |         done = ~((next_states[:, 2:12] < 100).all(axis=-1) &
59 |                  (next_states[:, 1] > 0.7) &
60 |                  (np.abs(next_states[:, 2]) < 0.2))
61 |         return reward, done
62 | 
63 | 
64 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/gym/swimmer_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from gym.envs.mujoco import swimmer
 3 | from stable_baselines import logger
 4 | 
 5 | from slbo.envs import BaseModelBasedEnv
 6 | 
 7 | 
 8 | # noinspection DuplicatedCode
 9 | class SwimmerEnv(swimmer.SwimmerEnv, BaseModelBasedEnv):
10 |     def __init__(self, use_approximated_vel=True):
11 |         self.use_approximated_vel = use_approximated_vel
12 |         self.rescale_action = False
13 | 
14 |         if not self.use_approximated_vel:
15 |             logger.warn('Modified Gym Env!')
16 | 
17 |         swimmer.SwimmerEnv.__init__(self)
18 |         BaseModelBasedEnv.__init__(self)
19 | 
20 |     def get_body_comvel(self, body_name):
21 |         return self.sim.data.get_body_xvelp(body_name)
22 | 
23 |     def _get_obs(self):
24 |         return np.concatenate([
25 |             self.model.data.qpos.flat,  # 5
26 |             self.model.data.qvel.flat,  # 5
27 |             self.get_body_com("torso").flat,  # 3
28 |             self.get_body_comvel("torso").flat,  # 3
29 |         ]).reshape(-1)
30 | 
31 |     def step(self, action):
32 |         pre_pos = self.sim.data.qpos[0]
33 |         self.do_simulation(action, self.frame_skip)
34 |         post_pos, height, ang = self.sim.data.qpos[0:3]
35 |         if self.use_approximated_vel:
36 |             fwd_reward = (post_pos - pre_pos) / self.dt
37 |         else:
38 |             fwd_reward = self.get_body_comvel('torso')[0]
39 |         ctrl_reward = - 0.0001 * np.square(action).sum()
40 |         reward = fwd_reward + ctrl_reward
41 |         obs = self._get_obs()
42 |         return obs, reward, False, {}
43 | 
44 |     def mb_step(self, states: np.ndarray, actions: np.ndarray, next_states: np.ndarray):
45 |         ctrl_reward = -  0.0001 * np.square(actions).sum(-1)
46 |         fwd_reward = next_states[:, -3]
47 |         reward = fwd_reward + ctrl_reward
48 |         return reward, False
49 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/gym/walker2d_env.py:
--------------------------------------------------------------------------------
 1 | from gym.envs.mujoco import walker2d
 2 | import numpy as np
 3 | from stable_baselines import logger
 4 | 
 5 | from slbo.envs import BaseModelBasedEnv
 6 | 
 7 | 
 8 | # noinspection DuplicatedCode
 9 | class Walker2DEnv(walker2d.Walker2dEnv, BaseModelBasedEnv):
10 |     def __init__(self, use_approximated_vel=True):
11 |         self.use_approximated_vel = use_approximated_vel
12 |         self.rescale_action = False
13 | 
14 |         if not self.use_approximated_vel:
15 |             logger.warn('Modified Gym Env!')
16 | 
17 |         walker2d.Walker2dEnv.__init__(self)
18 |         BaseModelBasedEnv.__init__(self)
19 | 
20 |     def get_body_comvel(self, body_name):
21 |         return self.sim.data.get_body_xvelp(body_name)
22 | 
23 |     def _get_obs(self):
24 |         return np.concatenate([
25 |             self.model.data.qpos.flat,
26 |             self.model.data.qvel.flat,
27 |             self.get_body_com("torso").flat,
28 |             self.get_body_comvel("torso").flat
29 |         ])
30 | 
31 |     def step(self, action):
32 |         pre_pos = self.sim.data.qpos[0]
33 |         self.do_simulation(action, self.frame_skip)
34 |         post_pos, height, ang = self.sim.data.qpos[0:3]
35 |         if self.use_approximated_vel:
36 |             fwd_reward = (post_pos - pre_pos) / self.dt
37 |         else:
38 |             fwd_reward = self.get_body_comvel('torso')[0]
39 |         survive_reward = 1.0
40 |         ctrl_reward = - 1e-3 * np.square(action).sum()
41 |         reward = fwd_reward + survive_reward + ctrl_reward
42 |         done = not (0.8 < height < 2.0 and -1.0 < ang < 1.0)
43 |         ob = self._get_obs()
44 |         return ob, reward, done, {}
45 | 
46 |     def mb_step(self, states, actions, next_states):
47 |         if self.use_approximated_vel:
48 |             fwd_rewards = (states[:, 0] - next_states[:, 0]) / self.dt
49 |         else:
50 |             fwd_rewards = next_states[:, 21]
51 |         survive_rewards = 1.0
52 |         ctrl_rewards = - 1e-3 * np.square(actions).sum(-1)
53 |         rewards = fwd_rewards + survive_rewards + ctrl_rewards
54 |         dones = ~((0.8 < next_states[:, 1] < 2.0) &
55 |                   (-1.0 < next_states[:, 2] < 1.0))
56 |         return rewards, dones
57 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/mujoco_envs.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from slbo.envs.mujoco.gym.ant_env import AntEnv
 4 | from slbo.envs.mujoco.gym.half_cheetah_env import HalfCheetahEnv
 5 | from slbo.envs.mujoco.gym.hopper_env import HopperEnv
 6 | from slbo.envs.mujoco.gym.swimmer_env import SwimmerEnv
 7 | from slbo.envs.mujoco.gym.walker2d_env import Walker2DEnv
 8 | from slbo.envs.mujoco.rllab.ant_env import RLLabAntEnv
 9 | from slbo.envs.mujoco.rllab.half_cheetah_env import RLLabHalfCheetahEnv
10 | from slbo.envs.mujoco.rllab.hopper_env import RLLabHopperEnv
11 | from slbo.envs.mujoco.rllab.humanoid_env import RLLabSimpleHumanoidEnv
12 | from slbo.envs.mujoco.rllab.swimmer_env import RLLabSwimmerEnv
13 | from slbo.envs.mujoco.rllab.walker2d_env import RLLabWalker2dEnv
14 | try:
15 |     from slbo.misc import logger
16 | except ImportError:
17 |     from stable_baselines import logger
18 | 
19 | 
20 | def make_mujoco_env(env_name: str):
21 |     envs = {
22 |         'HalfCheetah-v2': HalfCheetahEnv,
23 |         'Walker2D-v2': Walker2DEnv,
24 |         'Ant-v2': AntEnv,
25 |         'Hopper-v2': HopperEnv,
26 |         'Swimmer-v2': SwimmerEnv,
27 |         'RLLabHalfCheetah-v2': RLLabHalfCheetahEnv,
28 |         'RLLabWalker2D-v2': RLLabWalker2dEnv,
29 |         'RLLabAnt-v2': RLLabAntEnv,
30 |         'RLLabHopper-v2': RLLabHopperEnv,
31 |         'RLLabSwimmer-v2': RLLabSwimmerEnv,
32 |         'RLLabHumanoid-v2': RLLabSimpleHumanoidEnv
33 |     }
34 |     env = envs[env_name]()
35 |     if not hasattr(env, 'reward_range'):
36 |         env.reward_range = (-np.inf, np.inf)
37 |     if not hasattr(env, 'metadata'):
38 |         env.metadata = {}
39 |     env.seed(np.random.randint(2 ** 60))
40 |     return env
41 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangsy/slbo_pytorch/7b1283968a82c939725c2705e7315d1f3759ee29/slbo/envs/mujoco/rllab/__init__.py


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/ant_env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | from gym import utils
 5 | from gym.envs.mujoco import mujoco_env
 6 | 
 7 | from slbo.envs import BaseModelBasedEnv
 8 | 
 9 | 
10 | class RLLabAntEnv(mujoco_env.MujocoEnv, utils.EzPickle, BaseModelBasedEnv):
11 |     def __init__(self):
12 |         self.rescale_action = True
13 | 
14 |         mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), 'rllab_ant.xml'), 1)
15 |         utils.EzPickle.__init__(self)
16 | 
17 |     def get_body_xmat(self, body_name):
18 |         return self.sim.data.get_body_xmat(body_name)
19 | 
20 |     def get_body_comvel(self, body_name):
21 |         return self.sim.data.get_body_xvelp(body_name)
22 | 
23 |     def _get_obs(self):
24 |         return np.concatenate([
25 |             self.sim.data.qpos.flat,  # 15
26 |             self.sim.data.qvel.flat,  # 14
27 |             self.get_body_xmat("torso").flat,  # 9
28 |             self.get_body_com("torso").flat,  # 9 (should be 3?)
29 |             self.get_body_comvel("torso").flat,  # 3
30 |         ]).reshape(-1)
31 | 
32 |     def step(self, action: np.ndarray):
33 |         self.do_simulation(action, self.frame_skip)
34 |         comvel = self.get_body_comvel("torso")
35 |         fwd_reward = comvel[0]
36 |         scaling = (self.action_space.high - self.action_space.low) * 0.5
37 |         ctrl_cost = 0.5 * 1e-2 * np.sum(np.square(action / scaling))
38 |         contact_cost = 0.
39 |         survive_reward = 0.05
40 |         reward = fwd_reward - ctrl_cost - contact_cost + survive_reward
41 |         state = self.state_vector()
42 |         done = not (np.isfinite(state).all() and 0.2 <= state[2] <= 1.0)
43 |         obs = self._get_obs()
44 |         return obs, float(reward), done, {}
45 | 
46 |     def mb_step(self, states: np.ndarray, actions: np.ndarray, next_states: np.ndarray):
47 |         comvel = next_states[..., -3:]
48 |         fwd_reward = comvel[..., 0]
49 |         scaling = (self.action_space.high - self.action_space.low) * 0.5
50 |         ctrl_cost = 0.5 * 1e-2 * np.sum(np.square(actions / scaling), axis=-1)
51 |         contact_cost = 0.
52 |         survive_reward = 0.05
53 |         reward = fwd_reward - ctrl_cost - contact_cost + survive_reward
54 |         notdone = np.all([next_states[..., 2] >= 0.2, next_states[..., 2] <= 1.0], axis=0)
55 |         return reward, 1. - notdone
56 | 
57 |     def reset_model(self):
58 |         qpos = self.init_qpos + self.np_random.normal(size=self.init_qpos.shape) * 0.01
59 |         qvel = self.init_qvel + self.np_random.normal(size=self.init_qvel.shape) * 0.1
60 |         self.set_state(qpos, qvel)
61 |         return self._get_obs()
62 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/half_cheetah_env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | from gym import utils
 5 | from gym.envs.mujoco import mujoco_env
 6 | 
 7 | from slbo.envs import BaseModelBasedEnv
 8 | 
 9 | 
10 | class RLLabHalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle, BaseModelBasedEnv):
11 |     def __init__(self):
12 |         self.rescale_action = True
13 | 
14 |         mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), 'rllab_half_cheetah.xml'), 1)
15 |         utils.EzPickle.__init__(self)
16 | 
17 |     def get_body_xmat(self, body_name):
18 |         return self.sim.data.get_body_xmat(body_name)
19 | 
20 |     def get_body_comvel(self, body_name):
21 |         return self.sim.data.get_body_xvelp(body_name)
22 | 
23 |     def _get_obs(self):
24 |         return np.concatenate([
25 |             self.sim.data.qpos.flat,  # 9
26 |             self.sim.data.qvel.flat,  # 9
27 |             self.get_body_com("torso").flat,  # 3
28 |             self.get_body_comvel("torso").flat,  # 3
29 |         ])
30 | 
31 |     def step(self, action: np.ndarray):
32 |         self.do_simulation(action, self.frame_skip)
33 |         action = np.clip(action, self.action_space.low, self.action_space.high)
34 |         fwd_reward = self.get_body_comvel("torso")[0]
35 |         ctrl_reward = - 0.05 * np.sum(np.square(action))
36 |         reward = ctrl_reward + fwd_reward
37 |         obs = self._get_obs()
38 |         return obs, reward, False, {}
39 | 
40 |     def mb_step(self, states, actions, next_states):
41 |         actions = np.clip(actions, self.action_space.low, self.action_space.high)
42 |         ctrl_rewards = - 0.05 * np.sum(np.square(actions), axis=-1)
43 |         fwd_rewards = next_states[..., 21]
44 |         rewards = fwd_rewards + ctrl_rewards
45 |         return rewards, np.zeros_like(fwd_rewards, dtype=np.bool)
46 | 
47 |     def reset_model(self):
48 |         qpos = self.init_qpos + self.np_random.normal(size=self.init_qpos.shape) * 0.01
49 |         qvel = self.init_qvel + self.np_random.normal(size=self.init_qvel.shape) * 0.1
50 |         self.set_state(qpos, qvel)
51 |         return self._get_obs()
52 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/hopper_env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | from gym import utils
 5 | from gym.envs.mujoco import mujoco_env
 6 | 
 7 | from slbo.envs import BaseModelBasedEnv
 8 | 
 9 | 
10 | class RLLabHopperEnv(mujoco_env.MujocoEnv, utils.EzPickle, BaseModelBasedEnv):
11 |     def __init__(self):
12 |         self.rescale_action = True
13 | 
14 |         mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), 'rllab_hopper.xml'), 1)
15 |         utils.EzPickle.__init__(self)
16 | 
17 |     def get_body_comvel(self, body_name):
18 |         return self.sim.data.get_body_xvelp(body_name)
19 | 
20 |     def _get_obs(self):
21 |         return np.concatenate([
22 |             self.sim.data.qpos.flat,  # 6
23 |             self.sim.data.qvel.flat,  # 6
24 |             self.get_body_com("torso").flat,  # 3
25 |             self.get_body_comvel("torso"),  # 3
26 |         ])
27 | 
28 |     def step(self, action: np.ndarray):
29 |         self.do_simulation(action, self.frame_skip)
30 |         scaling = 0.5 * (self.action_space.high - self.action_space.low)
31 |         vel = self.get_body_comvel("torso")[0]
32 |         alive_bonus = 1.0
33 |         reward = vel + alive_bonus - 0.005 * np.sum(np.square(action / scaling))
34 |         # FIXME
35 |         state = self.state_vector()
36 |         done = not (np.isfinite(state).all() and
37 |                    (np.abs(state[3:]) < 100).all() and (state[0] > .7) and
38 |                    (abs(state[2]) < .2))
39 |         obs = self._get_obs()
40 |         return obs, reward, done, {}
41 | 
42 |     def mb_step(self, states, actions, next_states):
43 |         scaling = (self.action_space.high - self.action_space.low) * 0.5
44 |         vel = next_states[:, -3]
45 |         alive_bonus = 1.0
46 |         reward = vel + alive_bonus - 0.005 * np.sum(np.square(actions / scaling), axis=-1)
47 | 
48 |         done = ~((next_states[:, 3:12] < 100).all(axis=-1) &
49 |                  (next_states[:, 0] > 0.7) &
50 |                  (np.abs(next_states[:, 2]) < 0.2))
51 |         return reward, done
52 | 
53 |     def reset_model(self):
54 |         qpos = self.init_qpos + self.np_random.normal(size=self.init_qpos.shape) * 0.01
55 |         qvel = self.init_qvel + self.np_random.normal(size=self.init_qvel.shape) * 0.1
56 |         self.set_state(qpos, qvel)
57 |         return self._get_obs()


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/humanoid_env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import gym.utils as utils
 4 | import numpy as np
 5 | from gym.envs.mujoco import mujoco_env
 6 | 
 7 | from slbo.envs import BaseModelBasedEnv
 8 | 
 9 | 
10 | class RLLabSimpleHumanoidEnv(mujoco_env.MujocoEnv, utils.EzPickle, BaseModelBasedEnv):
11 |     def __init__(self):
12 |         self.rescale_action = True
13 | 
14 |         mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), 'rllab_simple_humanoid.xml'), 1)
15 |         utils.EzPickle.__init__(self)
16 | 
17 |     def get_body_xmat(self, body_name):
18 |         return self.sim.data.get_body_xmat(body_name)
19 | 
20 |     def get_body_comvel(self, body_name):
21 |         return self.sim.data.get_body_xvelp(body_name)
22 | 
23 |     def _get_obs(self):
24 |         data = self.sim.data
25 |         return np.concatenate([
26 |             data.qpos.flat,  # 17
27 |             data.qvel.flat,  # 16
28 |             self.get_body_com("torso").flat,  # 3
29 |             self.get_body_comvel("torso").flat,  # 3
30 |         ])
31 | 
32 |     def step(self, actions: np.ndarray):
33 |         alive_bonus = 0.2
34 |         comvel = self.get_body_comvel("torso")
35 |         lin_vel_reward = comvel[0]
36 |         scaling = 0.5 * (self.action_space.high - self.action_space.low)
37 |         ctrl_cost = 5e-4 * np.sum(np.square(actions / scaling))
38 |         impact_cost = 0.
39 |         vel_deviation_cost = 5e-3 * np.sum(np.square(comvel[1:]))
40 |         reward = lin_vel_reward + alive_bonus - ctrl_cost - impact_cost - vel_deviation_cost
41 |         done = not (0.8 <= self.sim.data.qpos.flat[2] <= 2.0)
42 |         next_obs = self._get_obs()
43 |         return next_obs, reward, done, {}
44 | 
45 |     def mb_step(self, states, actions, next_states):
46 |         scaling = 0.5 * (self.action_space.high - self.action_space.low)
47 | 
48 |         alive_bonus = 0.2
49 |         lin_vel_reward = next_states[:, 36]
50 |         ctrl_cost = 5.e-4 * np.square(actions / scaling).sum(axis=1)
51 |         impact_cost = 0.
52 |         vel_deviation_cost = 5.e-3 * np.square(next_states[:, 37:39]).sum(axis=1)
53 |         reward = lin_vel_reward + alive_bonus - ctrl_cost - impact_cost - vel_deviation_cost
54 | 
55 |         dones = not (0.8 <= next_states[:, 2] <= 2.0)
56 |         return reward, dones
57 | 
58 |     def reset_model(self):
59 |         qpos = self.init_qpos + self.np_random.normal(size=self.init_qpos.shape) * 0.01
60 |         qvel = self.init_qvel + self.np_random.normal(size=self.init_qvel.shape) * 0.1
61 |         self.set_state(qpos, qvel)
62 |         return self._get_obs()
63 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/ant.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="ant">
 2 |   <compiler inertiafromgeom="true" angle="degree" coordinate="local" />
 3 |   <option timestep="0.02" integrator="RK4" />
 4 |   <custom>
 5 |     <numeric name="init_qpos" data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" />
 6 |   </custom>
 7 |   <default>
 8 |     <joint limited="true" armature="1" damping="1" />
 9 |     <geom condim="3" conaffinity="0" margin="0.01" friction="1 0.5 0.5" solref=".02 1" solimp=".8 .8 .01" rgba="0.8 0.6 0.4 1" density="5.0" />
10 |   </default>
11 |   <asset>
12 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
13 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
14 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
15 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
16 |     <material name='geom' texture="texgeom" texuniform="true" />
17 |   </asset>
18 |   <worldbody>
19 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
20 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
21 |     <body name="torso" pos="0 0 0.75">
22 |       <geom name="torso_geom" type="sphere" size="0.25" pos="0 0 0" />
23 |       <joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />
24 |       <body name="front_left_leg" pos="0 0 0">
25 |         <geom name="aux_1_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 0.2 0.0" />
26 |         <body name="aux_1" pos="0.2 0.2 0">
27 |           <joint name="hip_1" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
28 |           <geom name="left_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 0.2 0.0" />
29 |           <body pos="0.2 0.2 0">
30 |             <joint name="ankle_1" type="hinge" pos="0.0 0.0 0.0" axis="-1 1 0" range="30 70" />
31 |             <geom name="left_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.4 0.4 0.0" />
32 |           </body>
33 |         </body>
34 |       </body>
35 |       <body name="front_right_leg" pos="0 0 0">
36 |         <geom name="aux_2_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 0.2 0.0" />
37 |         <body name="aux_2" pos="-0.2 0.2 0">
38 |           <joint name="hip_2" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
39 |           <geom name="right_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 0.2 0.0" />
40 |           <body pos="-0.2 0.2 0">
41 |             <joint name="ankle_2" type="hinge" pos="0.0 0.0 0.0" axis="1 1 0" range="-70 -30" />
42 |             <geom name="right_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.4 0.4 0.0" />
43 |           </body>
44 |         </body>
45 |       </body>
46 |       <body name="back_leg" pos="0 0 0">
47 |         <geom name="aux_3_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" />
48 |         <body name="aux_3" pos="-0.2 -0.2 0">
49 |           <joint name="hip_3" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
50 |           <geom name="back_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" />
51 |           <body pos="-0.2 -0.2 0">
52 |             <joint name="ankle_3" type="hinge" pos="0.0 0.0 0.0" axis="-1 1 0" range="-70 -30" />
53 |             <geom name="third_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" />
54 |           </body>
55 |         </body>
56 |       </body>
57 |       <body name="right_back_leg" pos="0 0 0">
58 |         <geom name="aux_4_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 -0.2 0.0" />
59 |         <body name="aux_4" pos="0.2 -0.2 0">
60 |           <joint name="hip_4" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
61 |           <geom name="rightback_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 -0.2 0.0" />
62 |           <body pos="0.2 -0.2 0">
63 |             <joint name="ankle_4" type="hinge" pos="0.0 0.0 0.0" axis="1 1 0" range="30 70" />
64 |             <geom name="fourth_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.4 -0.4 0.0" />
65 |           </body>
66 |         </body>
67 |       </body>
68 |     </body>
69 |   </worldbody>
70 |   <actuator>
71 |     <motor joint="hip_4" ctrlrange="-150.0 150.0" ctrllimited="true" />
72 |     <motor joint="ankle_4" ctrlrange="-150.0 150.0" ctrllimited="true" />
73 |     <motor joint="hip_1" ctrlrange="-150.0 150.0" ctrllimited="true" />
74 |     <motor joint="ankle_1" ctrlrange="-150.0 150.0" ctrllimited="true" />
75 |     <motor joint="hip_2" ctrlrange="-150.0 150.0" ctrllimited="true" />
76 |     <motor joint="ankle_2" ctrlrange="-150.0 150.0" ctrllimited="true" />
77 |     <motor joint="hip_3" ctrlrange="-150.0 150.0" ctrllimited="true" />
78 |     <motor joint="ankle_3" ctrlrange="-150.0 150.0" ctrllimited="true" />
79 |   </actuator>
80 | </mujoco>
81 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/green_ball.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="box">
 2 | 	<compiler inertiafromgeom="true" angle="degree" coordinate="local"/>
 3 | 	<worldbody>
 4 |     <body name="ball" pos="0 0 0">
 5 |       <joint name='ballx' type='slide' axis='1 0 0' pos='0 0 0' limited='false' damping='0.1' armature='0' stiffness='0'/>
 6 |       <joint name='bally' type='slide' axis='0 1 0' pos='0 0 0' limited='false' damping='0.1' armature='0' stiffness='0'/>
 7 | 		  <geom type="sphere" size="0.5" pos="0 0 0.5" rgba="0 1 0 1" />
 8 |     </body>
 9 | 	</worldbody>
10 | </mujoco>
11 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/half_cheetah.xml:
--------------------------------------------------------------------------------
 1 | <!-- Cheetah Model
 2 | 
 3 |     The state space is populated with joints in the order that they are
 4 |     defined in this file. The actuators also operate on joints.
 5 | 
 6 |     State-Space (name/joint/parameter):
 7 |         - rootx     slider      position (m)
 8 |         - rootz     slider      position (m)
 9 |         - rooty     hinge       angle (rad)
10 |         - bthigh    hinge       angle (rad)
11 |         - bshin     hinge       angle (rad)
12 |         - bfoot     hinge       angle (rad)
13 |         - fthigh    hinge       angle (rad)
14 |         - fshin     hinge       angle (rad)
15 |         - ffoot     hinge       angle (rad)
16 |         - rootx     slider      velocity (m/s)
17 |         - rootz     slider      velocity (m/s)
18 |         - rooty     hinge       angular velocity (rad/s)
19 |         - bthigh    hinge       angular velocity (rad/s)
20 |         - bshin     hinge       angular velocity (rad/s)
21 |         - bfoot     hinge       angular velocity (rad/s)
22 |         - fthigh    hinge       angular velocity (rad/s)
23 |         - fshin     hinge       angular velocity (rad/s)
24 |         - ffoot     hinge       angular velocity (rad/s)
25 | 
26 |     Actuators (name/actuator/parameter):
27 |         - bthigh    hinge       torque (N m)
28 |         - bshin     hinge       torque (N m)
29 |         - bfoot     hinge       torque (N m)
30 |         - fthigh    hinge       torque (N m)
31 |         - fshin     hinge       torque (N m)
32 |         - ffoot     hinge       torque (N m)
33 | 
34 | -->
35 | <mujoco model='cheetah'>
36 |   <compiler inertiafromgeom='true' coordinate='local' angle='radian' settotalmass='14' />
37 |   <default>
38 |     <joint limited='true' damping='.01' armature='.1' stiffness='8' solreflimit='.02 1' solimplimit='0 .8 .03' />
39 |     <geom contype='1' conaffinity='0' condim='3' friction='.4 .1 .1' rgba='0.8 0.6 .4 1' solimp='0.0 0.8 0.01' solref='0.02 1' />
40 |     <motor ctrlrange='-1 1' ctrllimited='true' />
41 |   </default>
42 |   <size nstack='300000' nuser_geom='1' />
43 |   <option timestep='0.01' gravity='0 0 -9.81' />
44 |   <asset>
45 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
46 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
47 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
48 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
49 |     <material name='geom' texture="texgeom" texuniform="true" />
50 |   </asset>
51 |   <worldbody>
52 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
53 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
54 |     <body name='torso' pos='0 0 .7'>
55 |       <joint name='rootx' type='slide' pos='0 0 0' axis='1 0 0' limited='false' damping='0' armature='0' stiffness='0' />
56 |       <joint name='rootz' type='slide' pos='0 0 0' axis='0 0 1' limited='false' damping='0' armature='0' stiffness='0' />
57 |       <joint name='rooty' type='hinge' pos='0 0 0' axis='0 1 0' limited='false' damping='0' armature='0' stiffness='0' />
58 |       <geom name='torso' type='capsule' fromto='-.5 0 0 .5 0 0' size='0.046' />
59 |       <geom name='head' type='capsule' pos='.6 0 .1' axisangle='0 1 0 .87' size='0.046 .15' />
60 |       <!-- <site name='tip'  pos='.15 0 .11'/> -->
61 |       <body name='bthigh' pos='-.5 0 0'>
62 |         <joint name='bthigh' type='hinge' pos='0 0 0' axis='0 1 0' range='-.52 1.05' stiffness='240' damping='6' />
63 |         <geom name='bthigh' type='capsule' pos='.1 0 -.13' axisangle='0 1 0 -3.8' size='0.046 .145' />
64 |         <body name='bshin' pos='.16 0 -.25'>
65 |           <joint name='bshin' type='hinge' pos='0 0 0' axis='0 1 0' range='-.785 .785' stiffness='180' damping='4.5' />
66 |           <geom name='bshin' type='capsule' pos='-.14 0 -.07' axisangle='0 1 0 -2.03' size='0.046 .15' rgba='0.9 0.6 0.6 1' />
67 |           <body name='bfoot' pos='-.28 0 -.14'>
68 |             <joint name='bfoot' type='hinge' pos='0 0 0' axis='0 1 0' range='-.4 .785' stiffness='120' damping='3' />
69 |             <geom name='bfoot' type='capsule' pos='.03 0 -.097' axisangle='0 1 0 -.27' size='0.046 .094' rgba='0.9 0.6 0.6 1' />
70 |           </body>
71 |         </body>
72 |       </body>
73 |       <body name='fthigh' pos='.5 0 0'>
74 |         <joint name='fthigh' type='hinge' pos='0 0 0' axis='0 1 0' range='-1 .7' stiffness='180' damping='4.5' />
75 |         <geom name='fthigh' type='capsule' pos='-.07 0 -.12' axisangle='0 1 0 .52' size='0.046 .133' />
76 |         <body name='fshin' pos='-.14 0 -.24'>
77 |           <joint name='fshin' type='hinge' pos='0 0 0' axis='0 1 0' range='-1.2 .87' stiffness='120' damping='3' />
78 |           <geom name='fshin' type='capsule' pos='.065 0 -.09' axisangle='0 1 0 -.6' size='0.046 .106' rgba='0.9 0.6 0.6 1' />
79 |           <body name='ffoot' pos='.13 0 -.18'>
80 |             <joint name='ffoot' type='hinge' pos='0 0 0' axis='0 1 0' range='-.5 .5' stiffness='60' damping='1.5' />
81 |             <geom name='ffoot' type='capsule' pos='.045 0 -.07' axisangle='0 1 0 -.6' size='0.046 .07' rgba='0.9 0.6 0.6 1' />
82 |           </body>
83 |         </body>
84 |       </body>
85 |     </body>
86 |   </worldbody>
87 |   <actuator>
88 |     <motor name='bthigh' joint='bthigh' gear='120' />
89 |     <motor name='bshin' joint='bshin' gear='90' />
90 |     <motor name='bfoot' joint='bfoot' gear='60' />
91 |     <motor name='fthigh' joint='fthigh' gear='120' />
92 |     <motor name='fshin' joint='fshin' gear='60' />
93 |     <motor name='ffoot' joint='ffoot' gear='30' />
94 |   </actuator>
95 | </mujoco>
96 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/hill_ant_env.xml.mako:
--------------------------------------------------------------------------------
 1 | <%
 2 |     difficulty = opts.get("difficulty", 1.0)
 3 |     texturedir = opts.get("texturedir", "/tmp/mujoco_textures")
 4 |     hfield_file = opts.get("hfield_file", "/tmp/mujoco_terrains/hills.png")
 5 | %>
 6 | <mujoco model="ant">
 7 |   <compiler inertiafromgeom="true" angle="degree" coordinate="local" texturedir="${texturedir}"/>
 8 |   <option timestep="0.02" integrator="RK4" />
 9 |   <custom>
10 |     <numeric name="init_qpos" data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" />
11 |   </custom>
12 |   <default>
13 |     <joint limited="true" armature="1" damping="1" />
14 |     <geom condim="3" conaffinity="0" margin="0.01" friction="1 0.5 0.5" solref=".02 1" solimp=".8 .8 .01" rgba="0.8 0.6 0.4 1" density="5.0" />
15 |   </default>
16 |   <asset>
17 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
18 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
19 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
20 |     <texture name="hilltexture" file="hills_texture.png" height="40" rgb1="0.62 0.81 0.55" rgb2="0.62 0.81 0.55" type="2d" width="40"/>
21 |     <material name="MatPlane" reflectance="0.0" shininess="1" specular="1" texrepeat="1 1" texture="hilltexture"/>
22 |     <material name='geom' texture="texgeom" texuniform="true" />
23 |     <hfield name="hill" file="${hfield_file}" size="40 40 ${difficulty} 0.1"/>
24 |   </asset>
25 |   <worldbody>
26 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
27 |     <geom name="floor" conaffinity="1" condim="3" material="MatPlane" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="hfield" hfield="hill"/>
28 |     <body name="torso" pos="0 0 0.75">
29 |       <geom name="torso_geom" type="sphere" size="0.25" pos="0 0 0" />
30 |       <joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />
31 |       <body name="front_left_leg" pos="0 0 0">
32 |         <geom name="aux_1_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 0.2 0.0" />
33 |         <body name="aux_1" pos="0.2 0.2 0">
34 |           <joint name="hip_1" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
35 |           <geom name="left_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 0.2 0.0" />
36 |           <body pos="0.2 0.2 0">
37 |             <joint name="ankle_1" type="hinge" pos="0.0 0.0 0.0" axis="-1 1 0" range="30 70" />
38 |             <geom name="left_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.4 0.4 0.0" />
39 |           </body>
40 |         </body>
41 |       </body>
42 |       <body name="front_right_leg" pos="0 0 0">
43 |         <geom name="aux_2_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 0.2 0.0" />
44 |         <body name="aux_2" pos="-0.2 0.2 0">
45 |           <joint name="hip_2" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
46 |           <geom name="right_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 0.2 0.0" />
47 |           <body pos="-0.2 0.2 0">
48 |             <joint name="ankle_2" type="hinge" pos="0.0 0.0 0.0" axis="1 1 0" range="-70 -30" />
49 |             <geom name="right_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.4 0.4 0.0" />
50 |           </body>
51 |         </body>
52 |       </body>
53 |       <body name="back_leg" pos="0 0 0">
54 |         <geom name="aux_3_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" />
55 |         <body name="aux_3" pos="-0.2 -0.2 0">
56 |           <joint name="hip_3" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
57 |           <geom name="back_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" />
58 |           <body pos="-0.2 -0.2 0">
59 |             <joint name="ankle_3" type="hinge" pos="0.0 0.0 0.0" axis="-1 1 0" range="-70 -30" />
60 |             <geom name="third_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" />
61 |           </body>
62 |         </body>
63 |       </body>
64 |       <body name="right_back_leg" pos="0 0 0">
65 |         <geom name="aux_4_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 -0.2 0.0" />
66 |         <body name="aux_4" pos="0.2 -0.2 0">
67 |           <joint name="hip_4" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
68 |           <geom name="rightback_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 -0.2 0.0" />
69 |           <body pos="0.2 -0.2 0">
70 |             <joint name="ankle_4" type="hinge" pos="0.0 0.0 0.0" axis="1 1 0" range="30 70" />
71 |             <geom name="fourth_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.4 -0.4 0.0" />
72 |           </body>
73 |         </body>
74 |       </body>
75 |     </body>
76 |   </worldbody>
77 |   <actuator>
78 |     <motor joint="hip_4" ctrlrange="-150.0 150.0" ctrllimited="true" />
79 |     <motor joint="ankle_4" ctrlrange="-150.0 150.0" ctrllimited="true" />
80 |     <motor joint="hip_1" ctrlrange="-150.0 150.0" ctrllimited="true" />
81 |     <motor joint="ankle_1" ctrlrange="-150.0 150.0" ctrllimited="true" />
82 |     <motor joint="hip_2" ctrlrange="-150.0 150.0" ctrllimited="true" />
83 |     <motor joint="ankle_2" ctrlrange="-150.0 150.0" ctrllimited="true" />
84 |     <motor joint="hip_3" ctrlrange="-150.0 150.0" ctrllimited="true" />
85 |     <motor joint="ankle_3" ctrlrange="-150.0 150.0" ctrllimited="true" />
86 |   </actuator>
87 | </mujoco>
88 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/hill_half_cheetah_env.xml.mako:
--------------------------------------------------------------------------------
  1 | <!-- Cheetah Model
  2 | 
  3 |     The state space is populated with joints in the order that they are
  4 |     defined in this file. The actuators also operate on joints.
  5 | 
  6 |     State-Space (name/joint/parameter):
  7 |         - rootx     slider      position (m)
  8 |         - rootz     slider      position (m)
  9 |         - rooty     hinge       angle (rad)
 10 |         - bthigh    hinge       angle (rad)
 11 |         - bshin     hinge       angle (rad)
 12 |         - bfoot     hinge       angle (rad)
 13 |         - fthigh    hinge       angle (rad)
 14 |         - fshin     hinge       angle (rad)
 15 |         - ffoot     hinge       angle (rad)
 16 |         - rootx     slider      velocity (m/s)
 17 |         - rootz     slider      velocity (m/s)
 18 |         - rooty     hinge       angular velocity (rad/s)
 19 |         - bthigh    hinge       angular velocity (rad/s)
 20 |         - bshin     hinge       angular velocity (rad/s)
 21 |         - bfoot     hinge       angular velocity (rad/s)
 22 |         - fthigh    hinge       angular velocity (rad/s)
 23 |         - fshin     hinge       angular velocity (rad/s)
 24 |         - ffoot     hinge       angular velocity (rad/s)
 25 | 
 26 |     Actuators (name/actuator/parameter):
 27 |         - bthigh    hinge       torque (N m)
 28 |         - bshin     hinge       torque (N m)
 29 |         - bfoot     hinge       torque (N m)
 30 |         - fthigh    hinge       torque (N m)
 31 |         - fshin     hinge       torque (N m)
 32 |         - ffoot     hinge       torque (N m)
 33 | 
 34 | -->
 35 | <%
 36 |     difficulty = opts.get("difficulty", 1.0)
 37 |     texturedir = opts.get("texturedir", "/tmp/mujoco_textures")
 38 |     hfield_file = opts.get("hfield_file", "/tmp/mujoco_terrains/hills.png")
 39 | %>
 40 | <mujoco model='cheetah'>
 41 |   <compiler inertiafromgeom='true' coordinate='local' angle='radian' settotalmass='14' texturedir="${texturedir}"/>
 42 |   <default>
 43 |     <joint limited='true' damping='.01' armature='.1' stiffness='8' solreflimit='.02 1' solimplimit='0 .8 .03' />
 44 |     <geom contype='1' conaffinity='0' condim='3' friction='.4 .1 .1' rgba='0.8 0.6 .4 1' solimp='0.0 0.8 0.01' solref='0.02 1' />
 45 |     <motor ctrlrange='-1 1' ctrllimited='true' />
 46 |   </default>
 47 |   <size nstack='300000' nuser_geom='1' />
 48 |   <option timestep='0.01' gravity='0 0 -9.81' />
 49 |   <asset>
 50 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
 51 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
 52 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
 53 |     <texture name="hilltexture" file="hills_texture.png" height="40" rgb1="0.62 0.81 0.55" rgb2="0.62 0.81 0.55" type="2d" width="40"/>
 54 |     <material name="MatPlane" reflectance="0.0" shininess="1" specular="1" texrepeat="1 1" texture="hilltexture"/>
 55 |     <material name='geom' texture="texgeom" texuniform="true" />
 56 |     <hfield name="hill" file="${hfield_file}" size="40 40 ${difficulty} 0.1"/>
 57 |   </asset>
 58 |   <worldbody>
 59 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
 60 |     <geom name="floor" conaffinity="1" condim="3" material="MatPlane" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="hfield" hfield="hill"/>
 61 |     <body name='torso' pos='0 0 .7'>
 62 |       <joint name='rootx' type='slide' pos='0 0 0' axis='1 0 0' limited='false' damping='0' armature='0' stiffness='0' />
 63 |       <joint name='rootz' type='slide' pos='0 0 0' axis='0 0 1' limited='false' damping='0' armature='0' stiffness='0' />
 64 |       <joint name='rooty' type='hinge' pos='0 0 0' axis='0 1 0' limited='false' damping='0' armature='0' stiffness='0' />
 65 |       <geom name='torso' type='capsule' fromto='-.5 0 0 .5 0 0' size='0.046' />
 66 |       <geom name='head' type='capsule' pos='.6 0 .1' axisangle='0 1 0 .87' size='0.046 .15' />
 67 |       <!-- <site name='tip'  pos='.15 0 .11'/> -->
 68 |       <body name='bthigh' pos='-.5 0 0'>
 69 |         <joint name='bthigh' type='hinge' pos='0 0 0' axis='0 1 0' range='-.52 1.05' stiffness='240' damping='6' />
 70 |         <geom name='bthigh' type='capsule' pos='.1 0 -.13' axisangle='0 1 0 -3.8' size='0.046 .145' />
 71 |         <body name='bshin' pos='.16 0 -.25'>
 72 |           <joint name='bshin' type='hinge' pos='0 0 0' axis='0 1 0' range='-.785 .785' stiffness='180' damping='4.5' />
 73 |           <geom name='bshin' type='capsule' pos='-.14 0 -.07' axisangle='0 1 0 -2.03' size='0.046 .15' rgba='0.9 0.6 0.6 1' />
 74 |           <body name='bfoot' pos='-.28 0 -.14'>
 75 |             <joint name='bfoot' type='hinge' pos='0 0 0' axis='0 1 0' range='-.4 .785' stiffness='120' damping='3' />
 76 |             <geom name='bfoot' type='capsule' pos='.03 0 -.097' axisangle='0 1 0 -.27' size='0.046 .094' rgba='0.9 0.6 0.6 1' />
 77 |           </body>
 78 |         </body>
 79 |       </body>
 80 |       <body name='fthigh' pos='.5 0 0'>
 81 |         <joint name='fthigh' type='hinge' pos='0 0 0' axis='0 1 0' range='-1 .7' stiffness='180' damping='4.5' />
 82 |         <geom name='fthigh' type='capsule' pos='-.07 0 -.12' axisangle='0 1 0 .52' size='0.046 .133' />
 83 |         <body name='fshin' pos='-.14 0 -.24'>
 84 |           <joint name='fshin' type='hinge' pos='0 0 0' axis='0 1 0' range='-1.2 .87' stiffness='120' damping='3' />
 85 |           <geom name='fshin' type='capsule' pos='.065 0 -.09' axisangle='0 1 0 -.6' size='0.046 .106' rgba='0.9 0.6 0.6 1' />
 86 |           <body name='ffoot' pos='.13 0 -.18'>
 87 |             <joint name='ffoot' type='hinge' pos='0 0 0' axis='0 1 0' range='-.5 .5' stiffness='60' damping='1.5' />
 88 |             <geom name='ffoot' type='capsule' pos='.045 0 -.07' axisangle='0 1 0 -.6' size='0.046 .07' rgba='0.9 0.6 0.6 1' />
 89 |           </body>
 90 |         </body>
 91 |       </body>
 92 |     </body>
 93 |   </worldbody>
 94 |   <actuator>
 95 |     <motor name='bthigh' joint='bthigh' gear='120' />
 96 |     <motor name='bshin' joint='bshin' gear='90' />
 97 |     <motor name='bfoot' joint='bfoot' gear='60' />
 98 |     <motor name='fthigh' joint='fthigh' gear='120' />
 99 |     <motor name='fshin' joint='fshin' gear='60' />
100 |     <motor name='ffoot' joint='ffoot' gear='30' />
101 |   </actuator>
102 | </mujoco>
103 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/hill_hopper_env.xml.mako:
--------------------------------------------------------------------------------
 1 | <%
 2 |     difficulty = opts.get("difficulty", 1.0)
 3 |     texturedir = opts.get("texturedir", "/tmp/mujoco_textures")
 4 |     hfield_file = opts.get("hfield_file", "/tmp/mujoco_terrains/hills.png")
 5 | %>
 6 | <mujoco model="hopper">
 7 |   <compiler inertiafromgeom="true" angle="degree" coordinate="global" texturedir="${texturedir}" />
 8 |   <default>
 9 |     <joint limited='true' damping='1' armature='1' />
10 |     <geom contype='1' conaffinity='1' condim='1' rgba='0.8 0.6 .4 1' margin="0.001" solref=".02 1" solimp=".8 .8 .01" material="geom" />
11 |     <motor ctrlrange='-.4 .4' ctrllimited='true' />
12 |   </default>
13 |   <option timestep="0.02" integrator="RK4" />
14 |   <asset>
15 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
16 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
17 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
18 |     <texture name="hilltexture" file="hills_texture.png" height="40" rgb1="0.62 0.81 0.55" rgb2="0.62 0.81 0.55" type="2d" width="40"/>
19 |     <material name="MatPlane" reflectance="0.0" shininess="1" specular="1" texrepeat="1 1" texture="hilltexture"/>
20 |     <material name='geom' texture="texgeom" texuniform="true" />
21 |     <hfield name="hill" file="${hfield_file}" size="40 40 ${difficulty} 0.1"/>
22 |   </asset>
23 |   <worldbody>
24 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
25 |     <geom name="floor" conaffinity="1" condim="3" material="MatPlane" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="hfield" hfield="hill"/>
26 |     <body name="torso" pos="0 0 1.25">
27 |       <joint name='rootz' type='slide' axis='0 0 1' pos='0 0 0' limited='false' damping='0' armature='0' stiffness='0' ref="1.25" />
28 |       <joint name='rootx' type='slide' axis='1 0 0' pos='0 0 0' limited='false' damping='0' armature='0' stiffness='0' />
29 |       <joint name='rooty' type='hinge' axis='0 1 0' pos='0 0 1.25' limited='false' damping='0' armature='0' stiffness='0' />
30 |       <geom name="torso_geom" type="capsule" fromto="0 0 1.45 0 0 1.05" size="0.05" friction="0.9" />
31 |       <body name="thigh" pos="0 0 1.05">
32 |         <joint name="thigh_joint" type="hinge" pos="0 0 1.05" axis="0 -1 0" range="-150 0" />
33 |         <geom name="thigh_geom" type="capsule" fromto="0 0 1.05 0 0 0.6" size="0.05" friction="0.9" />
34 |         <body name="leg" pos="0 0 0.35">
35 |           <joint name="leg_joint" type="hinge" pos="0 0 0.6" axis="0 -1 0" range="-150 0" />
36 |           <geom name="leg_geom" type="capsule" fromto="0 0 0.6 0 0 0.1" size="0.04" friction="0.9" />
37 |           <body name="foot" pos="0.13/2 0 0.1">
38 |             <joint name="foot_joint" type="hinge" pos="0 0 0.1" axis="0 -1 0" range="-45 45" />
39 |             <geom name="foot_geom" type="capsule" fromto="-0.13 0 0.1 0.26 0 0.1" size="0.06" friction="2.0" />
40 |           </body>
41 |         </body>
42 |       </body>
43 |     </body>
44 |   </worldbody>
45 |   <actuator>
46 |     <!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/> -->
47 |     <motor joint="thigh_joint" ctrlrange="-200.0 200.0" ctrllimited="true" />
48 |     <motor joint="leg_joint" ctrlrange="-200.0 200.0" ctrllimited="true" />
49 |     <motor joint="foot_joint" ctrlrange="-200.0 200.0" ctrllimited="true" />
50 |     <!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/> -->
51 |   </actuator>
52 | </mujoco>
53 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/hill_swimmer3d_env.xml.mako:
--------------------------------------------------------------------------------
 1 | <%
 2 |     difficulty = opts.get("difficulty", 1.0)
 3 |     texturedir = opts.get("texturedir", "/tmp/mujoco_textures")
 4 |     hfield_file = opts.get("hfield_file", "/tmp/mujoco_terrains/hills.png")
 5 | %>
 6 | <mujoco model="swimmer">
 7 |   <compiler inertiafromgeom="true" angle="degree" coordinate="local" texturedir="${texturedir}"/>
 8 |   <custom>
 9 |     <numeric name="frame_skip" data="50" />
10 |   </custom>
11 |   <option timestep="0.001" density="4000" viscosity="0.1" integrator="Euler" iterations="1000">
12 |     <flag warmstart="disable" />
13 |   </option>
14 |   <default>
15 |     <geom contype='1' conaffinity='1' condim='1' rgba='0.8 0.6 .4 1' material="geom" />
16 |     <!--<joint armature='1'  />-->
17 |   </default>
18 |   <asset>
19 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
20 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
21 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
22 |     <texture name="hilltexture" file="hills_texture.png" height="40" rgb1="0.62 0.81 0.55" rgb2="0.62 0.81 0.55" type="2d" width="40"/>
23 |     <material name="MatPlane" reflectance="0.0" shininess="1" specular="1" texrepeat="1 1" texture="hilltexture"/>
24 |     <material name='geom' texture="texgeom" texuniform="true" />
25 |     <hfield name="hill" file="${hfield_file}" size="40 40 ${difficulty/2.0} 0.1"/>
26 |   </asset>
27 |   <worldbody>
28 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
29 |     <geom name="floor" conaffinity="1" condim="1" material="MatPlane" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="hfield" hfield="hill"/>
30 |     <!--  ================= SWIMMER ================= /-->
31 |     <body name="torso" pos="0 0 0">
32 |       <geom name="torso" type="capsule" fromto="1.5 0 0 0.5 0 0" size="0.1" density="1000" />
33 |       <joint name="root" armature="0" damping="0" limited="false"  pos="0 0 0" axis="0 0 1" stiffness="0" type="free"/>
34 |       <body name="mid" pos="0.5 0 0">
35 |         <geom name="mid" type="capsule" fromto="0 0 0 -1 0 0" size="0.1" density="1000" />
36 |         <joint name="rot2" type="hinge" pos="0 0 0" axis="0 0 1" range="-100 100" limited="true" />
37 |         <body name="back" pos="-1 0 0">
38 |           <geom name="back" type="capsule" fromto="0 0 0 -1 0 0" size="0.1" density="1000" />
39 |           <joint name="rot3" type="hinge" pos="0 0 0" axis="0 0 1" range="-100 100" limited="true" />
40 |         </body>
41 |       </body>
42 |     </body>
43 |   </worldbody>
44 |   <actuator>
45 |     <motor joint="rot2" ctrllimited="true" ctrlrange="-50 50" />
46 |     <motor joint="rot3" ctrllimited="true" ctrlrange="-50 50" />
47 |   </actuator>
48 | </mujoco>
49 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/hill_walker2d_env.xml.mako:
--------------------------------------------------------------------------------
 1 | <%
 2 |     difficulty = opts.get("difficulty", 1.0)
 3 |     texturedir = opts.get("texturedir", "/tmp/mujoco_textures")
 4 |     hfield_file = opts.get("hfield_file", "/tmp/mujoco_terrains/hills.png")
 5 | %>
 6 | <mujoco model="walker2d">
 7 |   <compiler inertiafromgeom="true" angle="degree" coordinate="global" texturedir="${texturedir}"/>
 8 |   <option timestep="0.005" />
 9 |   <default>
10 |     <joint limited='true' damping='.1' armature='0.01' />
11 |     <geom contype='1' conaffinity='0' condim='3' friction='.7 .1 .1' rgba='0.8 0.6 .4 1' density='1000' />
12 |   </default>
13 |   <asset>
14 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
15 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
16 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
17 |     <texture name="hilltexture" file="hills_texture.png" height="40" rgb1="0.62 0.81 0.55" rgb2="0.62 0.81 0.55" type="2d" width="40"/>
18 |     <material name="MatPlane" reflectance="0.0" shininess="1" specular="1" texrepeat="1 1" texture="hilltexture"/>
19 |     <material name='geom' texture="texgeom" texuniform="true" />
20 |     <hfield name="hill" file="${hfield_file}" size="40 40 ${difficulty} 0.1"/>
21 |   </asset>
22 |   <worldbody>
23 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
24 |     <geom name="floor" conaffinity="1" condim="3" material="MatPlane" pos="0 0 -0.1" rgba="0.8 0.9 0.8 1" size="40 40 0.1" type="hfield" hfield="hill"/>
25 |     <body name="torso" pos="0 0 1.25">
26 |       <joint name='rootz' type='slide' axis='0 0 1' pos='0 0 0' limited='false' damping='0' armature='0' stiffness='0' ref="1.25" />
27 |       <joint name='rootx' type='slide' axis='1 0 0' pos='0 0 0' limited='false' damping='0' armature='0' stiffness='0' />
28 |       <joint name='rooty' type='hinge' axis='0 1 0' pos='0 0 1.25' limited='false' damping='0' armature='0' stiffness='0' />
29 |       <geom name="torso_geom" type="capsule" fromto="0 0 1.45 0 0 1.05" size="0.05" friction="0.9" />
30 |       <body name="thigh" pos="0 0 1.05">
31 |         <joint name="thigh_joint" type="hinge" pos="0 0 1.05" axis="0 -1 0" range="-150 0" />
32 |         <geom name="thigh_geom" type="capsule" fromto="0 0 1.05 0 0 0.6" size="0.05" friction="0.9" />
33 |         <body name="leg" pos="0 0 0.35">
34 |           <joint name="leg_joint" type="hinge" pos="0 0 0.6" axis="0 -1 0" range="-150 0" />
35 |           <geom name="leg_geom" type="capsule" fromto="0 0 0.6 0 0 0.1" size="0.04" friction="0.9" />
36 |           <body name="foot" pos="0.2/2 0 0.1">
37 |             <joint name="foot_joint" type="hinge" pos="0 0 0.1" axis="0 -1 0" range="-45 45" />
38 |             <geom name="foot_geom" type="capsule" fromto="-0.0 0 0.1 0.2 0 0.1" size="0.06" friction="0.9" />
39 |           </body>
40 |         </body>
41 |       </body>
42 |       <!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
43 |       <body name="thigh_left" pos="0 0 1.05">
44 |         <joint name="thigh_left_joint" type="hinge" pos="0 0 1.05" axis="0 -1 0" range="-150 0" />
45 |         <geom name="thigh_left_geom" type="capsule" fromto="0 0 1.05 0 0 0.6" size="0.05" friction="0.9" rgba=".7 .3 .6 1" />
46 |         <body name="leg_left" pos="0 0 0.35">
47 |           <joint name="leg_left_joint" type="hinge" pos="0 0 0.6" axis="0 -1 0" range="-150 0" />
48 |           <geom name="leg_left_geom" type="capsule" fromto="0 0 0.6 0 0 0.1" size="0.04" friction="0.9" rgba=".7 .3 .6 1" />
49 |           <body name="foot_left" pos="0.2/2 0 0.1">
50 |             <joint name="foot_left_joint" type="hinge" pos="0 0 0.1" axis="0 -1 0" range="-45 45" />
51 |             <geom name="foot_left_geom" type="capsule" fromto="-0.0 0 0.1 0.2 0 0.1" size="0.06" friction="1.9" rgba=".7 .3 .6 1" />
52 |           </body>
53 |         </body>
54 |       </body>
55 |     </body>
56 |   </worldbody>
57 |   <actuator>
58 |     <!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/> -->
59 |     <motor joint="thigh_joint" ctrlrange="-150.0 150.0" ctrllimited="true" />
60 |     <motor joint="leg_joint" ctrlrange="-100.0 100.0" ctrllimited="true" />
61 |     <motor joint="foot_joint" ctrlrange="-100.0 100.0" ctrllimited="true" />
62 |     <motor joint="thigh_left_joint" ctrlrange="-150.0 150.0" ctrllimited="true" />
63 |     <motor joint="leg_left_joint" ctrlrange="-100.0 100.0" ctrllimited="true" />
64 |     <motor joint="foot_left_joint" ctrlrange="-100.0 100.0" ctrllimited="true" />
65 |     <!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/> -->
66 |   </actuator>
67 | </mujoco>
68 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/hopper.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="hopper">
 2 |   <compiler inertiafromgeom="true" angle="degree" coordinate="global" />
 3 |   <default>
 4 |     <joint limited='true' damping='1' armature='1' />
 5 |     <geom contype='1' conaffinity='1' condim='1' rgba='0.8 0.6 .4 1' margin="0.001" solref=".02 1" solimp=".8 .8 .01" material="geom" />
 6 |     <motor ctrlrange='-.4 .4' ctrllimited='true' />
 7 |   </default>
 8 |   <option timestep="0.02" integrator="RK4" />
 9 |   <asset>
10 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
11 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
12 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
13 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
14 |     <material name='geom' texture="texgeom" texuniform="true" />
15 |   </asset>
16 |   <worldbody>
17 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
18 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
19 |     <body name="torso" pos="0 0 1.25">
20 |       <joint name='rootz' type='slide' axis='0 0 1' pos='0 0 0' limited='false' damping='0' armature='0' stiffness='0' ref="1.25" />
21 |       <joint name='rootx' type='slide' axis='1 0 0' pos='0 0 0' limited='false' damping='0' armature='0' stiffness='0' />
22 |       <joint name='rooty' type='hinge' axis='0 1 0' pos='0 0 1.25' limited='false' damping='0' armature='0' stiffness='0' />
23 |       <geom name="torso_geom" type="capsule" fromto="0 0 1.45 0 0 1.05" size="0.05" friction="0.9" />
24 |       <body name="thigh" pos="0 0 1.05">
25 |         <joint name="thigh_joint" type="hinge" pos="0 0 1.05" axis="0 -1 0" range="-150 0" />
26 |         <geom name="thigh_geom" type="capsule" fromto="0 0 1.05 0 0 0.6" size="0.05" friction="0.9" />
27 |         <body name="leg" pos="0 0 0.35">
28 |           <joint name="leg_joint" type="hinge" pos="0 0 0.6" axis="0 -1 0" range="-150 0" />
29 |           <geom name="leg_geom" type="capsule" fromto="0 0 0.6 0 0 0.1" size="0.04" friction="0.9" />
30 |           <body name="foot" pos="0.13/2 0 0.1">
31 |             <joint name="foot_joint" type="hinge" pos="0 0 0.1" axis="0 -1 0" range="-45 45" />
32 |             <geom name="foot_geom" type="capsule" fromto="-0.13 0 0.1 0.26 0 0.1" size="0.06" friction="2.0" />
33 |           </body>
34 |         </body>
35 |       </body>
36 |     </body>
37 |   </worldbody>
38 |   <actuator>
39 |     <!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/> -->
40 |     <motor joint="thigh_joint" ctrlrange="-200.0 200.0" ctrllimited="true" />
41 |     <motor joint="leg_joint" ctrlrange="-200.0 200.0" ctrllimited="true" />
42 |     <motor joint="foot_joint" ctrlrange="-200.0 200.0" ctrllimited="true" />
43 |     <!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/> -->
44 |   </actuator>
45 | </mujoco>
46 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/humanoid.xml:
--------------------------------------------------------------------------------
  1 | <mujoco model='humanoid'>
  2 |   <compiler inertiafromgeom='true' angle='degree' />
  3 |   <custom>
  4 |     <numeric name="frame_skip" data="4" />
  5 |   </custom>
  6 |   <default>
  7 |     <joint limited='true' damping='1' armature='0' />
  8 |     <geom contype='1' conaffinity='1' condim='1' rgba='0.8 0.6 .4 1' margin="0.001" solref=".02 1" solimp=".8 .8 .01" material="geom" />
  9 |     <motor ctrlrange='-.4 .4' ctrllimited='true' />
 10 |   </default>
 11 |   <option timestep='0.002' integrator="RK4" iterations="50" solver="PGS">
 12 |     <flag solverstat="enable" energy="enable" />
 13 |   </option>
 14 |   <size nuser_geom='1' nkey='5' />
 15 |   <visual>
 16 |     <map fogstart="3" fogend="5" />
 17 |   </visual>
 18 |   <asset>
 19 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
 20 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
 21 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
 22 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
 23 |     <material name='geom' texture="texgeom" texuniform="true" />
 24 |   </asset>
 25 |   <worldbody>
 26 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
 27 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
 28 |     <body name='torso' pos='0 0 1.4'>
 29 |       <joint name='root' type='free' pos='0 0 0' limited='false' damping='0' armature='0' stiffness='0' />
 30 |       <geom name='torso1' type='capsule' fromto='0 -.07 0 0 .07 0' size='0.07' />
 31 |       <geom name='head' type='sphere' pos='0 0 .19' size='.09' user='258' />
 32 |       <geom name='uwaist' type='capsule' fromto='-.01 -.06 -.12 -.01 .06 -.12' size='0.06' />
 33 |       <body name='lwaist' pos='-.01 0 -0.260' quat='1.000 0 -0.002 0'>
 34 |         <geom name='lwaist' type='capsule' fromto='0 -.06 0 0 .06 0' size='0.06' />
 35 |         <joint name='abdomen_z' type='hinge' pos='0 0 0.065' axis='0 0 1' range='-45 45' damping='5' stiffness='20' armature='0.02' />
 36 |         <joint name='abdomen_y' type='hinge' pos='0 0 0.065' axis='0 1 0' range='-75 30' damping='5' stiffness='10' armature='0.02' />
 37 |         <body name='pelvis' pos='0 0 -0.165' quat='1.000 0 -0.002 0'>
 38 |           <joint name='abdomen_x' type='hinge' pos='0 0 0.1' axis='1 0 0' range='-35 35' damping='5' stiffness='10' armature='0.02' />
 39 |           <geom name='butt' type='capsule' fromto='-.02 -.07 0 -.02 .07 0' size='0.09' />
 40 |           <body name='right_thigh' pos='0 -0.1 -0.04'>
 41 |             <joint name='right_hip_x' type='hinge' pos='0 0 0' axis='1 0 0' range='-25 5' damping='5' stiffness='10' armature='0.01' />
 42 |             <joint name='right_hip_z' type='hinge' pos='0 0 0' axis='0 0 1' range='-60 35' damping='5' stiffness='10' armature='0.01' />
 43 |             <joint name='right_hip_y' type='hinge' pos='0 0 0' axis='0 1 0' range='-110 20' damping='5' stiffness='20' armature='0.0080' />
 44 |             <geom name='right_thigh1' type='capsule' fromto='0 0 0 0 0.01 -.34' size='0.06' />
 45 |             <body name='right_shin' pos='0 0.01 -0.403'>
 46 |               <joint name='right_knee' type='hinge' pos='0 0 .02' axis='0 -1 0' range='-160 -2' armature='0.0060' />
 47 |               <geom name='right_shin1' type='capsule' fromto='0 0 0 0 0 -.3' size='0.049' />
 48 |               <body name='right_foot' pos='0 0 -.39'>
 49 |                 <joint name='right_ankle_y' type='hinge' pos='0 0 0.08' axis='0 1 0' range='-50 50' stiffness='4' armature='0.0008' />
 50 |                 <joint name='right_ankle_x' type='hinge' pos='0 0 0.04' axis='1 0 0.5' range='-50 50' stiffness='1' armature='0.0006' />
 51 |                 <geom name='right_foot_cap1' type='capsule' fromto='-.07 -0.02 0 0.14 -0.04 0' size='0.027' />
 52 |                 <geom name='right_foot_cap2' type='capsule' fromto='-.07 0 0 0.14 0.02 0' size='0.027' />
 53 |               </body>
 54 |             </body>
 55 |           </body>
 56 |           <body name='left_thigh' pos='0 0.1 -0.04'>
 57 |             <joint name='left_hip_x' type='hinge' pos='0 0 0' axis='-1 0 0' range='-25 5' damping='5' stiffness='10' armature='0.01' />
 58 |             <joint name='left_hip_z' type='hinge' pos='0 0 0' axis='0 0 -1' range='-60 35' damping='5' stiffness='10' armature='0.01' />
 59 |             <joint name='left_hip_y' type='hinge' pos='0 0 0' axis='0 1 0' range='-120 20' damping='5' stiffness='20' armature='0.01' />
 60 |             <geom name='left_thigh1' type='capsule' fromto='0 0 0 0 -0.01 -.34' size='0.06' />
 61 |             <body name='left_shin' pos='0 -0.01 -0.403'>
 62 |               <joint name='left_knee' type='hinge' pos='0 0 .02' axis='0 -1 0' range='-160 -2' stiffness='1' armature='0.0060' />
 63 |               <geom name='left_shin1' type='capsule' fromto='0 0 0 0 0 -.3' size='0.049' />
 64 |               <body name='left_foot' pos='0 0 -.39'>
 65 |                 <joint name='left_ankle_y' type='hinge' pos='0 0 0.08' axis='0 1 0' range='-50 50' stiffness='4' armature='0.0008' />
 66 |                 <joint name='left_ankle_x' type='hinge' pos='0 0 0.04' axis='1 0 0.5' range='-50 50' stiffness='1' armature='0.0006' />
 67 |                 <geom name='left_foot_cap1' type='capsule' fromto='-.07 0.02 0 0.14 0.04 0' size='0.027' />
 68 |                 <geom name='left_foot_cap2' type='capsule' fromto='-.07 0 0 0.14 -0.02 0' size='0.027' />
 69 |               </body>
 70 |             </body>
 71 |           </body>
 72 |         </body>
 73 |       </body>
 74 |       <body name='right_upper_arm' pos='0 -0.17 0.06'>
 75 |         <joint name='right_shoulder1' type='hinge' pos='0 0 0' axis='2 1 1' range='-85 60' stiffness='1' armature='0.0068' />
 76 |         <joint name='right_shoulder2' type='hinge' pos='0 0 0' axis='0 -1 1' range='-85 60' stiffness='1' armature='0.0051' />
 77 |         <geom name='right_uarm1' type='capsule' fromto='0 0 0 .16 -.16 -.16' size='0.04 0.16' />
 78 |         <body name='right_lower_arm' pos='.18 -.18 -.18'>
 79 |           <joint name='right_elbow' type='hinge' pos='0 0 0' axis='0 -1 1' range='-90 50' stiffness='0' armature='0.0028' />
 80 |           <geom name='right_larm' type='capsule' fromto='0.01 0.01 0.01 .17 .17 .17' size='0.031' />
 81 |           <geom name='right_hand' type='sphere' pos='.18 .18 .18' size='0.04' />
 82 |           <camera pos="0 0 0" />
 83 |         </body>
 84 |       </body>
 85 |       <body name='left_upper_arm' pos='0 0.17 0.06'>
 86 |         <joint name='left_shoulder1' type='hinge' pos='0 0 0' axis='2 -1 1' range='-60 85' stiffness='1' armature='0.0068' />
 87 |         <joint name='left_shoulder2' type='hinge' pos='0 0 0' axis='0 1 1' range='-60 85' stiffness='1' armature='0.0051' />
 88 |         <geom name='left_uarm1' type='capsule' fromto='0 0 0 .16 .16 -.16' size='0.04 0.16' />
 89 |         <body name='left_lower_arm' pos='.18 .18 -.18'>
 90 |           <joint name='left_elbow' type='hinge' pos='0 0 0' axis='0 -1 -1' range='-90 50' stiffness='0' armature='0.0028' />
 91 |           <geom name='left_larm' type='capsule' fromto='0.01 -0.01 0.01 .17 -.17 .17' size='0.031' />
 92 |           <geom name='left_hand' type='sphere' pos='.18 -.18 .18' size='0.04' />
 93 |         </body>
 94 |       </body>
 95 |     </body>
 96 |   </worldbody>
 97 |   <tendon>
 98 |     <fixed name='left_hipknee'>
 99 |       <joint joint='left_hip_y' coef='-1' />
100 |       <joint joint='left_knee' coef='1' />
101 |     </fixed>
102 |     <fixed name='right_hipknee'>
103 |       <joint joint='right_hip_y' coef='-1' />
104 |       <joint joint='right_knee' coef='1' />
105 |     </fixed>
106 |   </tendon>
107 |   <keyframe>
108 |     <key qpos='-0.0233227 0.00247283 0.0784829 0.728141 0.00223397 -0.685422 -0.00181805 -0.000580139 -0.245119 0.0329713 -0.0461148 0.0354257 0.252234 -0.0347763 -0.4663 -0.0313013 0.0285638 0.0147285 0.264063 -0.0346441 -0.559198 0.021724 -0.0333332 -0.718563 0.872778 0.000260393 0.733088 0.872748' />
109 |     <key qpos='0.0168601 -0.00192002 0.127167 0.762693 0.00191588 0.646754 -0.00210291 -0.000199049 0.0573113 -4.05731e-005 0.0134177 -0.00468944 0.0985945 -0.282695 -0.0469067 0.00874203 0.0263262 -0.00295056 0.0984851 -0.282098 -0.044293 0.00475795 0.127371 -0.42895 0.882402 -0.0980573 0.428506 0.88193' />
110 |     <key qpos='0.000471586 0.0317577 0.210587 0.758805 -0.583984 0.254155 0.136322 -0.0811633 0.0870309 -0.0935227 0.0904958 -0.0278004 -0.00978614 -0.359193 0.139761 -0.240168 0.060149 0.237062 -0.00622109 -0.252598 -0.00376874 -0.160597 0.25253 -0.278634 0.834376 -0.990444 -0.169065 0.652876' />
111 |     <key qpos='-0.0602175 0.048078 0.194579 -0.377418 -0.119412 -0.675073 -0.622553 0.139093 0.0710746 -0.0506027 0.0863461 0.196165 -0.0276685 -0.521954 -0.267784 0.179051 0.0371897 0.0560134 -0.032595 -0.0480022 0.0357436 0.108502 0.963806 0.157805 0.873092 -1.01145 -0.796409 0.24736' />
112 |   </keyframe>
113 |   <actuator>
114 |     <motor name='abdomen_y' gear='200' joint='abdomen_y' />
115 |     <motor name='abdomen_z' gear='200' joint='abdomen_z' />
116 |     <motor name='abdomen_x' gear='200' joint='abdomen_x' />
117 |     <motor name='right_hip_x' gear='200' joint='right_hip_x' />
118 |     <motor name='right_hip_z' gear='200' joint='right_hip_z' />
119 |     <motor name='right_hip_y' gear='600' joint='right_hip_y' />
120 |     <motor name='right_knee' gear='400' joint='right_knee' />
121 |     <motor name='right_ankle_x' gear='100' joint='right_ankle_x' />
122 |     <motor name='right_ankle_y' gear='100' joint='right_ankle_y' />
123 |     <motor name='left_hip_x' gear='200' joint='left_hip_x' />
124 |     <motor name='left_hip_z' gear='200' joint='left_hip_z' />
125 |     <motor name='left_hip_y' gear='600' joint='left_hip_y' />
126 |     <motor name='left_knee' gear='400' joint='left_knee' />
127 |     <motor name='left_ankle_x' gear='100' joint='left_ankle_x' />
128 |     <motor name='left_ankle_y' gear='100' joint='left_ankle_y' />
129 |     <motor name='right_shoulder1' gear='100' joint='right_shoulder1' />
130 |     <motor name='right_shoulder2' gear='100' joint='right_shoulder2' />
131 |     <motor name='right_elbow' gear='200' joint='right_elbow' />
132 |     <motor name='left_shoulder1' gear='100' joint='left_shoulder1' />
133 |     <motor name='left_shoulder2' gear='100' joint='left_shoulder2' />
134 |     <motor name='left_elbow' gear='200' joint='left_elbow' />
135 |   </actuator>
136 | </mujoco>
137 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/inverted_double_pendulum.xml:
--------------------------------------------------------------------------------
  1 | <!-- Cartpole Model
  2 | 
  3 |     The state space is populated with joints in the order that they are
  4 |     defined in this file. The actuators also operate on joints.
  5 | 
  6 |     State-Space (name/joint/parameter):
  7 |         - cart      slider      position (m)
  8 |         - pole      hinge       angle (rad)
  9 |         - cart      slider      velocity (m/s)
 10 |         - pole      hinge       angular velocity (rad/s)
 11 | 
 12 |     Actuators (name/actuator/parameter):
 13 |         - cart      motor       force x (N)
 14 | 
 15 | -->
 16 | <mujoco model='cartpole'>
 17 |     <compiler inertiafromgeom='true'
 18 |               coordinate='local'/>
 19 | 
 20 |     <custom>
 21 |         <numeric name="frame_skip" data="2" />
 22 |     </custom>
 23 | 
 24 |     <default>
 25 |         <joint damping='0.05' />
 26 |         <geom contype='0'
 27 |               friction='1 0.1 0.1'
 28 |               rgba='0.7 0.7 0 1' />
 29 |     </default>
 30 | 
 31 |     <option timestep='0.01'
 32 |             gravity='1e-5 0 -9.81'
 33 |             integrator="RK4"
 34 |     />
 35 | 
 36 |     <size nstack='3000'/>
 37 | 
 38 |     <worldbody>
 39 |         <geom name='floor'
 40 |               pos='0 0 -3.0'
 41 |               size='40 40 40'
 42 |               type='plane'
 43 |               rgba='0.8 0.9 0.8 1' />
 44 |         <geom name='rail'
 45 |               type='capsule'
 46 |               pos='0 0 0'
 47 |               quat='0.707 0 0.707 0'
 48 |               size='0.02 1'
 49 |               rgba='0.3 0.3 0.7 1' />
 50 |         <body name='cart' pos='0 0 0'>
 51 |             <joint name='slider'
 52 |                    type='slide'
 53 |                    limited='true'
 54 |                    pos='0 0 0'
 55 |                    axis='1 0 0'
 56 |                    range='-10 10'
 57 |                    margin='0.01'/>
 58 |             <geom name='cart'
 59 |                   type='capsule'
 60 |                   pos='0 0 0'
 61 |                   quat='0.707 0 0.707 0'
 62 |                   size='0.1 0.1' />
 63 |             <body name='pole' pos='0 0 0'>
 64 |                 <joint name='hinge'
 65 |                        type='hinge'
 66 |                        pos='0 0 0'
 67 |                        axis='0 1 0'/>
 68 |                 <geom name='cpole'
 69 |                       type='capsule'
 70 |                       fromto='0 0 0 0 0 0.6'
 71 |                       size='0.045 0.3'
 72 |                       rgba='0 0.7 0.7 1' />
 73 |                 <body name='pole2' pos='0 0 0.6'>
 74 |                     <joint name='hinge2'
 75 |                            type='hinge'
 76 |                            pos='0 0 0'
 77 |                            axis='0 1 0'/>
 78 |                     <geom name='cpole2'
 79 |                           type='capsule'
 80 |                           fromto='0 0 0 0 0 0.6'
 81 |                           size='0.045 0.3'
 82 |                           rgba='0 0.7 0.7 1' />
 83 |                     <site name='tip'
 84 |                           size='0.01 0.01'
 85 |                           pos='0 0 .6'/>
 86 |                 </body>
 87 |             </body>
 88 |         </body>
 89 |     </worldbody>
 90 | 
 91 |     <actuator>
 92 |         <motor name='slide'
 93 |                joint='slider'
 94 |                gear='500'
 95 |                ctrlrange='-1 1'
 96 |                ctrllimited='true'/>
 97 |     </actuator>
 98 | </mujoco>
 99 | 
100 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/inverted_double_pendulum.xml.mako:
--------------------------------------------------------------------------------
  1 | <!-- Cartpole Model
  2 | 
  3 |     The state space is populated with joints in the order that they are
  4 |     defined in this file. The actuators also operate on joints.
  5 | 
  6 |     State-Space (name/joint/parameter):
  7 |         - cart      slider      position (m)
  8 |         - pole      hinge       angle (rad)
  9 |         - cart      slider      velocity (m/s)
 10 |         - pole      hinge       angular velocity (rad/s)
 11 | 
 12 |     Actuators (name/actuator/parameter):
 13 |         - cart      motor       force x (N)
 14 | 
 15 | -->
 16 | <%
 17 |     noise = opts.get("noise", False)
 18 |     pole1_height = 0.6
 19 |     pole2_height = 0.6
 20 |     if noise:
 21 |         import numpy as np
 22 |         pole1_height = pole1_height + np.random.uniform(-0.1, 0.4)
 23 |         pole2_height = pole2_height + np.random.uniform(-0.1, 0.4)
 24 | %>
 25 | 
 26 | 
 27 | <mujoco model='cartpole'>
 28 |     <compiler inertiafromgeom='true'
 29 |               coordinate='local'/>
 30 | 
 31 |     <custom>
 32 |         <numeric name="frame_skip" data="2" />
 33 |     </custom>
 34 | 
 35 |     <default>
 36 |         <joint damping='0.05' />
 37 |         <geom contype='0'
 38 |               friction='1 0.1 0.1'
 39 |               rgba='0.7 0.7 0 1' />
 40 |     </default>
 41 | 
 42 |     <option timestep='0.01'
 43 |             gravity='1e-5 0 -9.81'
 44 |             integrator="RK4"
 45 |     />
 46 | 
 47 |     <size nstack='3000'/>
 48 | 
 49 |     <worldbody>
 50 |         <geom name='floor'
 51 |               pos='0 0 -3.0'
 52 |               size='40 40 40'
 53 |               type='plane'
 54 |               rgba='0.8 0.9 0.8 1' />
 55 |         <geom name='rail'
 56 |               type='capsule'
 57 |               pos='0 0 0'
 58 |               quat='0.707 0 0.707 0'
 59 |               size='0.02 1'
 60 |               rgba='0.3 0.3 0.7 1' />
 61 |         <body name='cart' pos='0 0 0'>
 62 |             <joint name='slider'
 63 |                    type='slide'
 64 |                    limited='true'
 65 |                    pos='0 0 0'
 66 |                    axis='1 0 0'
 67 |                    range='-10 10'
 68 |                    margin='0.01'/>
 69 |             <geom name='cart'
 70 |                   type='capsule'
 71 |                   pos='0 0 0'
 72 |                   quat='0.707 0 0.707 0'
 73 |                   size='0.1 0.1' />
 74 |             <body name='pole' pos='0 0 0'>
 75 |                 <joint name='hinge'
 76 |                        type='hinge'
 77 |                        pos='0 0 0'
 78 |                        axis='0 1 0'/>
 79 |                 <geom name='cpole'
 80 |                       type='capsule'
 81 |                       fromto='0 0 0 0 0 ${pole1_height}'
 82 |                       size='0.045 ${pole1_height/2}'
 83 |                       rgba='0 0.7 0.7 1' />
 84 |                 <body name='pole2' pos='0 0 ${pole1_height}'>
 85 |                     <joint name='hinge2'
 86 |                            type='hinge'
 87 |                            pos='0 0 0'
 88 |                            axis='0 1 0'/>
 89 |                     <geom name='cpole2'
 90 |                           type='capsule'
 91 |                           fromto='0 0 0 0 0 ${pole2_height}'
 92 |                           size='0.045 ${pole2_height/2}'
 93 |                           rgba='0 0.7 0.7 1' />
 94 |                     <site name='tip'
 95 |                           size='0.01 0.01'
 96 |                           pos='0 0 ${pole2_height}'/>
 97 |                 </body>
 98 |             </body>
 99 |         </body>
100 |     </worldbody>
101 | 
102 |     <actuator>
103 |         <motor name='slide'
104 |                joint='slider'
105 |                gear='500'
106 |                ctrlrange='-1 1'
107 |                ctrllimited='true'/>
108 |     </actuator>
109 | </mujoco>
110 | 
111 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/point.xml:
--------------------------------------------------------------------------------
 1 | <mujoco>
 2 |   <compiler inertiafromgeom="true" angle="degree" coordinate="local" />
 3 |   <option timestep="0.02" integrator="RK4" />
 4 |   <default>
 5 |     <joint limited="false" armature="0" damping="0" />
 6 |     <geom condim="3" conaffinity="0" margin="0" friction="1 0.5 0.5" rgba="0.8 0.6 0.4 1" density="100" />
 7 |   </default>
 8 |   <asset>
 9 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
10 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
11 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
12 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
13 |     <material name='geom' texture="texgeom" texuniform="true" />
14 |   </asset>
15 |   <worldbody>
16 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
17 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
18 |     <body name="torso" pos="0 0 0">
19 |       <geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5" />
20 |       <geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5" />
21 |       <joint name='ballx' type='slide' axis='1 0 0' pos='0 0 0' />
22 |       <joint name='bally' type='slide' axis='0 1 0' pos='0 0 0' />
23 |       <joint name='rot' type='hinge' axis='0 0 1' pos='0 0 0' limited="false" />
24 |     </body>
25 |   </worldbody>
26 |   <actuator>
27 |     <!-- Those are just dummy actuators for providing ranges -->
28 |     <motor joint='ballx' ctrlrange="-1 1" ctrllimited="true" />
29 |     <motor joint='rot' ctrlrange="-0.25 0.25" ctrllimited="true" />
30 |   </actuator>
31 | </mujoco>
32 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/red_ball.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="box">
 2 | 	<compiler inertiafromgeom="true" angle="degree" coordinate="local"/>
 3 | 	<worldbody>
 4 |     <body name="ball" pos="0 0 0">
 5 |       <joint name='ballx' type='slide' axis='1 0 0' pos='0 0 0' limited='false' damping='0.1' armature='0' stiffness='0'/>
 6 |       <joint name='bally' type='slide' axis='0 1 0' pos='0 0 0' limited='false' damping='0.1' armature='0' stiffness='0'/>
 7 | 		  <geom type="sphere" size="0.5" pos="0 0 0.5" rgba="1 0 0 1" />
 8 |     </body>
 9 | 	</worldbody>
10 | </mujoco>
11 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/simple_humanoid.xml:
--------------------------------------------------------------------------------
  1 | <mujoco model="Humanoid">
  2 |   <compiler coordinate="local" angle="radian" />
  3 |   <size njmax="200" nuser_geom="1" nstack="82825" />
  4 |   <default>
  5 |     <joint limited="true" armature="1" damping="1" />
  6 |     <geom condim="3" conaffinity="0" margin="0.1" friction="1.0 0.5 0.5" rgba="0.8 0.6 0.4 1" />
  7 |   </default>
  8 |   <option timestep="0.01" integrator="RK4" />
  9 |   <asset>
 10 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
 11 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
 12 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
 13 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
 14 |     <material name='geom' texture="texgeom" texuniform="true" />
 15 |   </asset>
 16 |   <worldbody>
 17 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
 18 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
 19 |     <body name="torso" pos="0 0 1.4">
 20 |       <inertial pos="0 0 0.0403624" mass="8.32208" diaginertia="0.159056 0.147471 0.0330627" />
 21 |       <joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />
 22 |       <geom name="torso1" type="capsule" size="0.07 0.07" pos="0 0 0" quat="0.707107 0.707107 0 0" user="0" />
 23 |       <geom name="head" type="sphere" size="0.09" pos="0 0 0.19" user="0" />
 24 |       <geom name="uwaist" type="capsule" size="0.06 0.06" pos="0 0 -0.12" quat="0.707107 0.707107 0 0" user="0" />
 25 |       <body name="pelvis" pos="0 0 -0.425" quat="0.999998 0 -0.002 0">
 26 |         <inertial pos="-0.0148387 0 0.0387097" quat="0.997805 0 0.0662273 0" mass="7.88854" diaginertia="0.0795697 0.0619562 0.0449816" />
 27 |         <!-- <joint name="abdomen_z" type="hinge" pos="0 0 0.22" axis="0 0 1" stiffness="20" range="-0.785398 0.785398" margin="0.01" damping="3" /> -->
 28 |         <!-- <joint name="abdomen_x" type="hinge" pos="0 0 0.22" axis="1 0 0" stiffness="10" range="-0.349066 0.349066" margin="0.01" damping="3" /> -->
 29 |         <!-- <joint name="abdomen_y" type="hinge" pos="0 0 0.22" axis="0 1 0" stiffness="10" range="-1.0472 0.523599" margin="0.01" damping="3" /> -->
 30 |         <geom name="lwaist" type="capsule" size="0.06 0.06" pos="0 0 0.15" quat="0.707107 0.707107 0 0" user="0" />
 31 |         <geom name="butt" type="capsule" size="0.09 0.07" pos="-0.02 0 0" quat="0.707107 0.707107 0 0" user="0" />
 32 |         <body name="right_thigh" pos="0 -0.1 -0.04">
 33 |           <inertial pos="0 0.005 -0.17" quat="0.999892 0.0147011 0 0" mass="5.37761" diaginertia="0.0792387 0.0792387 0.0113602" />
 34 |           <joint name="right_hip1" type="hinge" pos="0 0 0" axis="1 0 0" stiffness="10" range="-0.15 0.0872665" margin="0.01" />
 35 |           <!-- <joint name="right_hip2" type="hinge" pos="0 0 0" axis="0 0 1" stiffness="10" range="-0.3 0.3" margin="0.01" /> -->
 36 |           <joint name="right_hip3" type="hinge" pos="0 0 0" axis="0 1 0" stiffness="20" range="-1.91986 0.174533" margin="0.01" />
 37 |           <geom name="right_thigh1" type="capsule" size="0.065 0.170074" pos="0 0.005 -0.17" quat="0.999892 0.0147011 0 0" user="0" />
 38 |           <body name="right_shin" pos="0 0.01 -0.403">
 39 |             <inertial pos="0 0 -0.181" mass="3.10016" diaginertia="0.045501 0.045501 0.00372174" />
 40 |             <joint name="right_knee" type="hinge" pos="0 0 0.05" axis="0 -1 0" range="-1.3 -0.0349066" margin="0.01" />
 41 |             <geom name="right_shin1" type="capsule" size="0.049 0.181" pos="0 0 -0.181" user="0" />
 42 |             <body name="right_foot" pos="0 0 -0.45">
 43 |               <geom name="right_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
 44 |             </body>
 45 |             <!-- <body name="right_foot" pos="0 0 -0.45">
 46 |                             <inertial pos="0.035 -0.01 0" quat="0 0.707107 0 0.707107" mass="1.08992" diaginertia="0.00577723 0.00529682 0.000877682" />
 47 |                             <joint name="right_ankle_x" type="hinge" pos="0 0 0.06" axis="0.894427 0 0.447214" stiffness="1" range="-0.3 0.3" margin="0.01" damping="0.3" />
 48 |                             <joint name="right_ankle_y" type="hinge" pos="0 0 0.06" axis="0 1 0" stiffness="4" range="-0.4 0.8" margin="0.01" damping="0.3" />
 49 |                             <geom name="right_foot_cap1" type="capsule" size="0.027 0.105475" pos="0.035 -0.03 0" quat="0.707107 -0.0670402 -0.703922 0" user="0" />
 50 |                             <geom name="right_foot_cap2" type="capsule" size="0.027 0.105475" pos="0.035 0.01 0" quat="0.707107 0.0670402 -0.703922 0" user="0" />
 51 |                         </body> -->
 52 |           </body>
 53 |         </body>
 54 |         <body name="left_thigh" pos="0 0.1 -0.04">
 55 |           <inertial pos="0 -0.005 -0.17" quat="0.999892 -0.0147011 0 0" mass="5.37761" diaginertia="0.0792387 0.0792387 0.0113602" />
 56 |           <joint name="left_hip1" type="hinge" pos="0 0 0" axis="-1 0 0" stiffness="10" range="-0.15 0.0872665" margin="0.01" />
 57 |           <!-- <joint name="left_hip2" type="hinge" pos="0 0 0" axis="0 0 -1" stiffness="10" range="-0.3 0.3" margin="0.01" /> -->
 58 |           <joint name="left_hip3" type="hinge" pos="0 0 0" axis="0 1 0" stiffness="20" range="-1.91986 0.174533" margin="0.01" />
 59 |           <geom name="left_thigh1" type="capsule" size="0.065 0.170074" pos="0 -0.005 -0.17" quat="0.999892 -0.0147011 0 0" user="0" />
 60 |           <body name="left_shin" pos="0 -0.01 -0.403">
 61 |             <inertial pos="0 0 -0.181" mass="3.10016" diaginertia="0.045501 0.045501 0.00372174" />
 62 |             <joint name="left_knee" type="hinge" pos="0 0 0.05" axis="0 -1 0" range="-1.3 -0.0349066" margin="0.01" />
 63 |             <geom name="left_shin1" type="capsule" size="0.049 0.181" pos="0 0 -0.181" user="0" />
 64 |             <body name="left_foot" pos="0 0 -0.45">
 65 |               <geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
 66 |             </body>
 67 |             <!-- <body name="left_foot" pos="0 0 -0.45">
 68 |                             <inertial pos="0.035 0.01 0" quat="0 0.707107 0 0.707107" mass="1.08992" diaginertia="0.00577723 0.00529682 0.000877682" />
 69 |                             <joint name="left_ankle_x" type="hinge" pos="0 0 0.06" axis="0.894427 0 0.447214" stiffness="1" range="-0.3 0.3" margin="0.01" damping="0.3" />
 70 |                             <joint name="left_ankle_y" type="hinge" pos="0 0 0.06" axis="0 1 0" stiffness="4" range="-0.4 0.8" margin="0.01" damping="0.3" />
 71 |                             <geom name="left_foot_cap1" type="capsule" size="0.027 0.105475" pos="0.035 0.03 0" quat="0.707107 0.0670402 -0.703922 0" user="0" />
 72 |                             <geom name="left_foot_cap2" type="capsule" size="0.027 0.105475" pos="0.035 -0.01 0" quat="0.707107 -0.0670402 -0.703922 0" user="0" />
 73 |                         </body> -->
 74 |           </body>
 75 |         </body>
 76 |       </body>
 77 |       <body name="right_upper_arm" pos="0 -0.17 0.06">
 78 |         <inertial pos="0.08 -0.08 -0.08" quat="0.888074 -0.325058 -0.325058 0" mass="1.59406" diaginertia="0.0139972 0.0139972 0.00127525" />
 79 |         <joint name="right_shoulder1" type="hinge" pos="0 0 0" axis="0 -0.707107 0.707107" stiffness="1" range="-1.0472 0.785398" margin="0.01" />
 80 |         <joint name="right_shoulder2" type="hinge" pos="0 0 0" axis="0.816497 0.408248 0.408248" stiffness="1" range="-0.785398 0.785398" margin="0.01" />
 81 |         <geom name="right_uarm1" type="capsule" size="0.04 0.2" pos="0.08 -0.08 -0.08" quat="0.888074 -0.325058 -0.325058 0" user="0" />
 82 |         <!-- <body name="right_lower_arm" pos="0.18 -0.18 -0.18">
 83 |                     <inertial pos="0.095 0.095 0.095" quat="0.459701 0.627963 -0.627963 0" mass="0.877969" diaginertia="0.00639835 0.00639835 0.000421864" />
 84 |                     <joint name="right_elbow" type="hinge" pos="0 0 0" axis="0 -0.707107 0.707107" range="-1.5708 0.872665" margin="0.01" />
 85 |                     <geom name="right_larm" type="capsule" size="0.031 0.129904" pos="0.095 0.095 0.095" quat="0.459701 0.627963 -0.627963 0" user="0" />
 86 |                 </body> -->
 87 |       </body>
 88 |       <body name="left_upper_arm" pos="0 0.17 0.06">
 89 |         <inertial pos="0.08 0.08 -0.08" quat="0.888074 0.325058 -0.325058 0" mass="1.59406" diaginertia="0.0139972 0.0139972 0.00127525" />
 90 |         <joint name="left_shoulder1" type="hinge" pos="0 0 0" axis="0 0.707107 0.707107" stiffness="1" range="-1.0472 0.785398" margin="0.01" />
 91 |         <joint name="left_shoulder2" type="hinge" pos="0 0 0" axis="0.816497 -0.408248 0.408248" stiffness="1" range="-0.785398 0.785398" margin="0.01" />
 92 |         <geom name="left_uarm1" type="capsule" size="0.04 0.2" pos="0.08 0.08 -0.08" quat="0.888074 0.325058 -0.325058 0" user="0" />
 93 |         <!-- <body name="left_lower_arm" pos="0.18 0.18 -0.18">
 94 |                     <inertial pos="0.095 -0.095 0.095" quat="0.459701 -0.627963 -0.627963 0" mass="0.877969" diaginertia="0.00639835 0.00639835 0.000421864" />
 95 |                     <joint name="left_elbow" type="hinge" pos="0 0 0" axis="0 -0.707107 -0.707107" range="-1.5708 0.872665" margin="0.01" />
 96 |                     <geom name="left_larm" type="capsule" size="0.031 0.129904" pos="0.095 -0.095 0.095" quat="0.459701 -0.627963 -0.627963 0" user="0" />
 97 |                 </body> -->
 98 |       </body>
 99 |     </body>
100 |   </worldbody>
101 |   <actuator>
102 |     <!-- <motor joint='abdomen_y'        ctrlrange="-30 30" ctrllimited="true"/> -->
103 |     <!-- <motor joint='abdomen_z'        ctrlrange="-30 30" ctrllimited="true"/> -->
104 |     <!-- <motor joint='abdomen_x'        ctrlrange="-30 30" ctrllimited="true"/> -->
105 |     <motor joint='right_hip1' ctrlrange="-100.0 100.0" ctrllimited="true" />
106 |     <!-- <motor joint='right_hip2'       ctrlrange="-100.0 100.0" ctrllimited="true"/> -->
107 |     <motor joint='right_hip3' ctrlrange="-100.0 100.0" ctrllimited="true" />
108 |     <motor joint='right_knee' ctrlrange="-100.0 100.0" ctrllimited="true" />
109 |     <!-- <motor joint='right_ankle_x'    ctrlrange="-20 20" ctrllimited="true"/> -->
110 |     <!-- <motor joint='right_ankle_y'    ctrlrange="-20 20" ctrllimited="true"/> -->
111 |     <motor joint='left_hip1' ctrlrange="-100.0 100.0" ctrllimited="true" />
112 |     <!-- <motor joint='left_hip2'        ctrlrange="-100.0 100.0" ctrllimited="true"/> -->
113 |     <motor joint='left_hip3' ctrlrange="-100.0 100.0" ctrllimited="true" />
114 |     <motor joint='left_knee' ctrlrange="-100.0 100.0" ctrllimited="true" />
115 |     <!-- <motor joint='left_ankle_x'     ctrlrange="-20 20" ctrllimited="true"/> -->
116 |     <!-- <motor joint='left_ankle_y'     ctrlrange="-20 20" ctrllimited="true"/> -->
117 |     <motor joint='right_shoulder1' ctrlrange="-20 20" ctrllimited="true" />
118 |     <motor joint='right_shoulder2' ctrlrange="-20 20" ctrllimited="true" />
119 |     <!-- <motor joint='right_elbow'      ctrlrange="-20 20" ctrllimited="true"/> -->
120 |     <motor joint='left_shoulder1' ctrlrange="-20 20" ctrllimited="true" />
121 |     <motor joint='left_shoulder2' ctrlrange="-20 20" ctrllimited="true" />
122 |     <!-- <motor joint='left_elbow'       ctrlrange="-20 20" ctrllimited="true"/> -->
123 |   </actuator>
124 | </mujoco>
125 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/swimmer.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="swimmer">
 2 |   <compiler inertiafromgeom="true" angle="degree" coordinate="local" />
 3 |   <custom>
 4 |     <numeric name="frame_skip" data="50" />
 5 |   </custom>
 6 |   <option timestep="0.001" density="4000" viscosity="0.1" collision="predefined" integrator="Euler" iterations="1000">
 7 |     <flag warmstart="disable" />
 8 |   </option>
 9 |   <default>
10 |     <geom contype='1' conaffinity='1' condim='1' rgba='0.8 0.6 .4 1' material="geom" />
11 |     <!--<joint armature='1'  />-->
12 |   </default>
13 |   <asset>
14 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
15 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
16 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
17 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
18 |     <material name='geom' texture="texgeom" texuniform="true" />
19 |   </asset>
20 |   <worldbody>
21 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
22 |     <geom name='floor' material="MatPlane" pos='0 0 -0.1' size='40 40 0.1' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
23 |     <!--  ================= SWIMMER ================= /-->
24 |     <body name="torso" pos="0 0 0">
25 |       <geom name="torso" type="capsule" fromto="1.5 0 0 0.5 0 0" size="0.1" density="1000" />
26 |       <joint pos="0 0 0" type="slide" name="slider1" axis="1 0 0" />
27 |       <joint pos="0 0 0" type="slide" name="slider2" axis="0 1 0" />
28 |       <joint name="rot" type="hinge" pos="0 0 0" axis="0 0 1" />
29 |       <body name="mid" pos="0.5 0 0">
30 |         <geom name="mid" type="capsule" fromto="0 0 0 -1 0 0" size="0.1" density="1000" />
31 |         <joint name="rot2" type="hinge" pos="0 0 0" axis="0 0 1" range="-100 100" limited="true" />
32 |         <body name="back" pos="-1 0 0">
33 |           <geom name="back" type="capsule" fromto="0 0 0 -1 0 0" size="0.1" density="1000" />
34 |           <joint name="rot3" type="hinge" pos="0 0 0" axis="0 0 1" range="-100 100" limited="true" />
35 |         </body>
36 |       </body>
37 |     </body>
38 |   </worldbody>
39 |   <actuator>
40 |     <motor joint="rot2" ctrllimited="true" ctrlrange="-50 50" />
41 |     <motor joint="rot3" ctrllimited="true" ctrlrange="-50 50" />
42 |   </actuator>
43 | </mujoco>
44 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/swimmer3d.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="swimmer">
 2 |   <compiler inertiafromgeom="true" angle="degree" coordinate="local" />
 3 |   <custom>
 4 |     <numeric name="frame_skip" data="50" />
 5 |   </custom>
 6 |   <option timestep="0.001" density="4000" viscosity="0.1" integrator="Euler" iterations="1000">
 7 |     <flag warmstart="disable" />
 8 |   </option>
 9 |   <default>
10 |     <geom contype='1' conaffinity='1' condim='1' rgba='0.8 0.6 .4 1' material="geom" />
11 |     <!--<joint armature='1'  />-->
12 |   </default>
13 |   <asset>
14 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
15 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
16 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
17 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
18 |     <material name='geom' texture="texgeom" texuniform="true" />
19 |   </asset>
20 |   <worldbody>
21 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
22 |     <geom name='floor' material="MatPlane" pos='0 0 -0.1' size='40 40 0.1' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='1' />
23 |     <!--  ================= SWIMMER ================= /-->
24 |     <body name="torso" pos="0 0 0">
25 |       <geom name="torso" type="capsule" fromto="1.5 0 0 0.5 0 0" size="0.1" density="1000" />
26 |       <joint name="root" armature="0" damping="0" limited="false"  pos="0 0 0" axis="0 0 1" stiffness="0" type="free"/>
27 |       <body name="mid" pos="0.5 0 0">
28 |         <geom name="mid" type="capsule" fromto="0 0 0 -1 0 0" size="0.1" density="1000" />
29 |         <joint name="rot2" type="hinge" pos="0 0 0" axis="0 0 1" range="-100 100" limited="true" />
30 |         <body name="back" pos="-1 0 0">
31 |           <geom name="back" type="capsule" fromto="0 0 0 -1 0 0" size="0.1" density="1000" />
32 |           <joint name="rot3" type="hinge" pos="0 0 0" axis="0 0 1" range="-100 100" limited="true" />
33 |         </body>
34 |       </body>
35 |     </body>
36 |   </worldbody>
37 |   <actuator>
38 |     <motor joint="rot2" ctrllimited="true" ctrlrange="-50 50" />
39 |     <motor joint="rot3" ctrllimited="true" ctrlrange="-50 50" />
40 |   </actuator>
41 | </mujoco>
42 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/utils.mako:
--------------------------------------------------------------------------------
 1 | <%def name="make_maze(structure, height, size_scaling)">
 2 |     % for i in xrange(len(structure)):
 3 |         % for j in xrange(len(structure[0])):
 4 |             % if str(structure[i][j]) == '1':
 5 |                 <geom
 6 |                   name="block_${i}_${j}"
 7 |                   pos='${j*size_scaling} ${i*size_scaling} ${height/2*size_scaling}'
 8 |                   size='${0.5*size_scaling} ${0.5*size_scaling} ${height/2*size_scaling}'
 9 |                   type='box'
10 |                   material=""
11 |                   contype="1"
12 |                   conaffinity="1"
13 |                   rgba='0.4 0.4 0.4 1'
14 |                   />
15 |             % endif
16 |         % endfor
17 |     % endfor
18 | </%def>
19 | 
20 | <%def name="make_contacts(geom_name, structure)">
21 |     % for i in xrange(len(structure)):
22 |         % for j in xrange(len(structure[0])):
23 |             % if str(structure[i][j]) == '1':
24 |                 <pair
25 |                   geom1="${geom_name}"
26 |                   geom2="block_${i}_${j}"
27 |                   />
28 |             % endif
29 |         % endfor
30 |     % endfor
31 | </%def>
32 | 
33 | <%def name="find_goal_range(structure, size_scaling)">
34 |     <%
35 |         found = False
36 |         goal_range = []
37 |         for i in xrange(len(structure)):
38 |             for j in xrange(len(structure[0])):
39 |                 if structure[i][j] == 'g':
40 |                     goal_range.append(j*size_scaling-size_scaling*0.5),
41 |                     goal_range.append(j*size_scaling+size_scaling*0.5),
42 |                     goal_range.append(i*size_scaling-size_scaling*0.5),
43 |                     goal_range.append(i*size_scaling+size_scaling*0.5),
44 |                     found = True
45 |                     break
46 |             if found:
47 |                 break
48 |     %>
49 |     <numeric name="goal_range" data="${" ".join(map(str, goal_range))}" />
50 | </%def>
51 | 
52 | <%def name="find_robot(structure, size_scaling, z_offset=0)">
53 |     <%
54 |         robot_pos = [0, 0, z_offset]
55 |         found = False
56 |         for i in xrange(len(structure)):
57 |             for j in xrange(len(structure[0])):
58 |                 if structure[i][j] == 'r':
59 |                     robot_pos[0] = j*size_scaling
60 |                     robot_pos[1] = i*size_scaling
61 |                     found = True
62 |                     break
63 |             if found:
64 |                 break
65 |     %>
66 |     ${' '.join(map(str, robot_pos))}
67 | </%def>
68 | 
69 | <%def name="encode_map(structure, size_scaling)">
70 |     <%
71 |         data = []
72 |         data.append(len(structure))
73 |         data.append(len(structure[0]))
74 |         data.append(size_scaling)
75 |         for i in xrange(len(structure)):
76 |             for j in xrange(len(structure[0])):
77 |                 if structure[i][j] == 1:
78 |                     data.append(1)
79 |                 elif structure[i][j] == 'g':
80 |                     data.append(2)
81 |                 else:
82 |                     data.append(0)
83 |     %>
84 |     ${' '.join(map(str, data))}
85 | </%def>
86 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/mujoco_models/walker2d.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="walker2d">
 2 |   <compiler inertiafromgeom="true" angle="degree" coordinate="global" />
 3 |   <option timestep="0.005" />
 4 |   <default>
 5 |     <joint limited='true' damping='.1' armature='0.01' />
 6 |     <geom contype='1' conaffinity='0' condim='3' friction='.7 .1 .1' rgba='0.8 0.6 .4 1' density='1000' />
 7 |   </default>
 8 |   <asset>
 9 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
10 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
11 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
12 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
13 |     <material name='geom' texture="texgeom" texuniform="true" />
14 |   </asset>
15 |   <worldbody>
16 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
17 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
18 |     <body name="torso" pos="0 0 1.25">
19 |       <joint name='rootz' type='slide' axis='0 0 1' pos='0 0 0' limited='false' damping='0' armature='0' stiffness='0' ref="1.25" />
20 |       <joint name='rootx' type='slide' axis='1 0 0' pos='0 0 0' limited='false' damping='0' armature='0' stiffness='0' />
21 |       <joint name='rooty' type='hinge' axis='0 1 0' pos='0 0 1.25' limited='false' damping='0' armature='0' stiffness='0' />
22 |       <geom name="torso_geom" type="capsule" fromto="0 0 1.45 0 0 1.05" size="0.05" friction="0.9" />
23 |       <body name="thigh" pos="0 0 1.05">
24 |         <joint name="thigh_joint" type="hinge" pos="0 0 1.05" axis="0 -1 0" range="-150 0" />
25 |         <geom name="thigh_geom" type="capsule" fromto="0 0 1.05 0 0 0.6" size="0.05" friction="0.9" />
26 |         <body name="leg" pos="0 0 0.35">
27 |           <joint name="leg_joint" type="hinge" pos="0 0 0.6" axis="0 -1 0" range="-150 0" />
28 |           <geom name="leg_geom" type="capsule" fromto="0 0 0.6 0 0 0.1" size="0.04" friction="0.9" />
29 |           <body name="foot" pos="0.2/2 0 0.1">
30 |             <joint name="foot_joint" type="hinge" pos="0 0 0.1" axis="0 -1 0" range="-45 45" />
31 |             <geom name="foot_geom" type="capsule" fromto="-0.0 0 0.1 0.2 0 0.1" size="0.06" friction="0.9" />
32 |           </body>
33 |         </body>
34 |       </body>
35 |       <!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
36 |       <body name="thigh_left" pos="0 0 1.05">
37 |         <joint name="thigh_left_joint" type="hinge" pos="0 0 1.05" axis="0 -1 0" range="-150 0" />
38 |         <geom name="thigh_left_geom" type="capsule" fromto="0 0 1.05 0 0 0.6" size="0.05" friction="0.9" rgba=".7 .3 .6 1" />
39 |         <body name="leg_left" pos="0 0 0.35">
40 |           <joint name="leg_left_joint" type="hinge" pos="0 0 0.6" axis="0 -1 0" range="-150 0" />
41 |           <geom name="leg_left_geom" type="capsule" fromto="0 0 0.6 0 0 0.1" size="0.04" friction="0.9" rgba=".7 .3 .6 1" />
42 |           <body name="foot_left" pos="0.2/2 0 0.1">
43 |             <joint name="foot_left_joint" type="hinge" pos="0 0 0.1" axis="0 -1 0" range="-45 45" />
44 |             <geom name="foot_left_geom" type="capsule" fromto="-0.0 0 0.1 0.2 0 0.1" size="0.06" friction="1.9" rgba=".7 .3 .6 1" />
45 |           </body>
46 |         </body>
47 |       </body>
48 |     </body>
49 |   </worldbody>
50 |   <actuator>
51 |     <!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/> -->
52 |     <motor joint="thigh_joint" ctrlrange="-150.0 150.0" ctrllimited="true" />
53 |     <motor joint="leg_joint" ctrlrange="-100.0 100.0" ctrllimited="true" />
54 |     <motor joint="foot_joint" ctrlrange="-100.0 100.0" ctrllimited="true" />
55 |     <motor joint="thigh_left_joint" ctrlrange="-150.0 150.0" ctrllimited="true" />
56 |     <motor joint="leg_left_joint" ctrlrange="-100.0 100.0" ctrllimited="true" />
57 |     <motor joint="foot_left_joint" ctrlrange="-100.0 100.0" ctrllimited="true" />
58 |     <!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/> -->
59 |   </actuator>
60 | </mujoco>
61 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/rllab_ant.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="ant">
 2 |   <compiler inertiafromgeom="true" angle="degree" coordinate="local" />
 3 |   <option timestep="0.02" integrator="RK4" />
 4 |   <custom>
 5 |     <numeric name="init_qpos" data="0.0 0.0 0.55 1.0 0.0 0.0 0.0 0.0 1.0 0.0 -1.0 0.0 -1.0 0.0 1.0" />
 6 |   </custom>
 7 |   <default>
 8 |     <joint limited="true" armature="1" damping="1" />
 9 |     <geom condim="3" conaffinity="0" margin="0.01" friction="1 0.5 0.5" solref=".02 1" solimp=".8 .8 .01" rgba="0.8 0.6 0.4 1" density="5.0" />
10 |   </default>
11 |   <asset>
12 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
13 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
14 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
15 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
16 |     <material name='geom' texture="texgeom" texuniform="true" />
17 |   </asset>
18 |   <worldbody>
19 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
20 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
21 |     <body name="torso" pos="0 0 0.75">
22 |       <geom name="torso_geom" type="sphere" size="0.25" pos="0 0 0" />
23 |       <joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />
24 |       <body name="front_left_leg" pos="0 0 0">
25 |         <geom name="aux_1_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 0.2 0.0" />
26 |         <body name="aux_1" pos="0.2 0.2 0">
27 |           <joint name="hip_1" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
28 |           <geom name="left_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 0.2 0.0" />
29 |           <body pos="0.2 0.2 0">
30 |             <joint name="ankle_1" type="hinge" pos="0.0 0.0 0.0" axis="-1 1 0" range="30 70" />
31 |             <geom name="left_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.4 0.4 0.0" />
32 |           </body>
33 |         </body>
34 |       </body>
35 |       <body name="front_right_leg" pos="0 0 0">
36 |         <geom name="aux_2_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 0.2 0.0" />
37 |         <body name="aux_2" pos="-0.2 0.2 0">
38 |           <joint name="hip_2" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
39 |           <geom name="right_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 0.2 0.0" />
40 |           <body pos="-0.2 0.2 0">
41 |             <joint name="ankle_2" type="hinge" pos="0.0 0.0 0.0" axis="1 1 0" range="-70 -30" />
42 |             <geom name="right_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.4 0.4 0.0" />
43 |           </body>
44 |         </body>
45 |       </body>
46 |       <body name="back_leg" pos="0 0 0">
47 |         <geom name="aux_3_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" />
48 |         <body name="aux_3" pos="-0.2 -0.2 0">
49 |           <joint name="hip_3" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
50 |           <geom name="back_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.2 -0.2 0.0" />
51 |           <body pos="-0.2 -0.2 0">
52 |             <joint name="ankle_3" type="hinge" pos="0.0 0.0 0.0" axis="-1 1 0" range="-70 -30" />
53 |             <geom name="third_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 -0.4 -0.4 0.0" />
54 |           </body>
55 |         </body>
56 |       </body>
57 |       <body name="right_back_leg" pos="0 0 0">
58 |         <geom name="aux_4_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 -0.2 0.0" />
59 |         <body name="aux_4" pos="0.2 -0.2 0">
60 |           <joint name="hip_4" type="hinge" pos="0.0 0.0 0.0" axis="0 0 1" range="-30 30" />
61 |           <geom name="rightback_leg_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.2 -0.2 0.0" />
62 |           <body pos="0.2 -0.2 0">
63 |             <joint name="ankle_4" type="hinge" pos="0.0 0.0 0.0" axis="1 1 0" range="30 70" />
64 |             <geom name="fourth_ankle_geom" type="capsule" size="0.08" fromto="0.0 0.0 0.0 0.4 -0.4 0.0" />
65 |           </body>
66 |         </body>
67 |       </body>
68 |     </body>
69 |   </worldbody>
70 |   <actuator>
71 |     <motor joint="hip_4" ctrlrange="-150.0 150.0" ctrllimited="true" />
72 |     <motor joint="ankle_4" ctrlrange="-150.0 150.0" ctrllimited="true" />
73 |     <motor joint="hip_1" ctrlrange="-150.0 150.0" ctrllimited="true" />
74 |     <motor joint="ankle_1" ctrlrange="-150.0 150.0" ctrllimited="true" />
75 |     <motor joint="hip_2" ctrlrange="-150.0 150.0" ctrllimited="true" />
76 |     <motor joint="ankle_2" ctrlrange="-150.0 150.0" ctrllimited="true" />
77 |     <motor joint="hip_3" ctrlrange="-150.0 150.0" ctrllimited="true" />
78 |     <motor joint="ankle_3" ctrlrange="-150.0 150.0" ctrllimited="true" />
79 |   </actuator>
80 | </mujoco>
81 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/rllab_half_cheetah.xml:
--------------------------------------------------------------------------------
 1 | <!-- Cheetah Model
 2 | 
 3 |     The state space is populated with joints in the order that they are
 4 |     defined in this file. The actuators also operate on joints.
 5 | 
 6 |     State-Space (name/joint/parameter):
 7 |         - rootx     slider      position (m)
 8 |         - rootz     slider      position (m)
 9 |         - rooty     hinge       angle (rad)
10 |         - bthigh    hinge       angle (rad)
11 |         - bshin     hinge       angle (rad)
12 |         - bfoot     hinge       angle (rad)
13 |         - fthigh    hinge       angle (rad)
14 |         - fshin     hinge       angle (rad)
15 |         - ffoot     hinge       angle (rad)
16 |         - rootx     slider      velocity (m/s)
17 |         - rootz     slider      velocity (m/s)
18 |         - rooty     hinge       angular velocity (rad/s)
19 |         - bthigh    hinge       angular velocity (rad/s)
20 |         - bshin     hinge       angular velocity (rad/s)
21 |         - bfoot     hinge       angular velocity (rad/s)
22 |         - fthigh    hinge       angular velocity (rad/s)
23 |         - fshin     hinge       angular velocity (rad/s)
24 |         - ffoot     hinge       angular velocity (rad/s)
25 | 
26 |     Actuators (name/actuator/parameter):
27 |         - bthigh    hinge       torque (N m)
28 |         - bshin     hinge       torque (N m)
29 |         - bfoot     hinge       torque (N m)
30 |         - fthigh    hinge       torque (N m)
31 |         - fshin     hinge       torque (N m)
32 |         - ffoot     hinge       torque (N m)
33 | 
34 | -->
35 | <mujoco model='cheetah'>
36 |   <compiler inertiafromgeom='true' coordinate='local' angle='radian' settotalmass='14' />
37 |   <default>
38 |     <joint limited='true' damping='.01' armature='.1' stiffness='8' solreflimit='.02 1' solimplimit='0 .8 .03' />
39 |     <geom contype='1' conaffinity='0' condim='3' friction='.4 .1 .1' rgba='0.8 0.6 .4 1' solimp='0.0 0.8 0.01' solref='0.02 1' />
40 |     <motor ctrlrange='-1 1' ctrllimited='true' />
41 |   </default>
42 |   <size nstack='300000' nuser_geom='1' />
43 |   <option timestep='0.01' gravity='0 0 -9.81' />
44 |   <asset>
45 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
46 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
47 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
48 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
49 |     <material name='geom' texture="texgeom" texuniform="true" />
50 |   </asset>
51 |   <worldbody>
52 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
53 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
54 |     <body name='torso' pos='0 0 .7'>
55 |       <joint name='rootx' type='slide' pos='0 0 0' axis='1 0 0' limited='false' damping='0' armature='0' stiffness='0' />
56 |       <joint name='rootz' type='slide' pos='0 0 0' axis='0 0 1' limited='false' damping='0' armature='0' stiffness='0' />
57 |       <joint name='rooty' type='hinge' pos='0 0 0' axis='0 1 0' limited='false' damping='0' armature='0' stiffness='0' />
58 |       <geom name='torso' type='capsule' fromto='-.5 0 0 .5 0 0' size='0.046' />
59 |       <geom name='head' type='capsule' pos='.6 0 .1' axisangle='0 1 0 .87' size='0.046 .15' />
60 |       <!-- <site name='tip'  pos='.15 0 .11'/> -->
61 |       <body name='bthigh' pos='-.5 0 0'>
62 |         <joint name='bthigh' type='hinge' pos='0 0 0' axis='0 1 0' range='-.52 1.05' stiffness='240' damping='6' />
63 |         <geom name='bthigh' type='capsule' pos='.1 0 -.13' axisangle='0 1 0 -3.8' size='0.046 .145' />
64 |         <body name='bshin' pos='.16 0 -.25'>
65 |           <joint name='bshin' type='hinge' pos='0 0 0' axis='0 1 0' range='-.785 .785' stiffness='180' damping='4.5' />
66 |           <geom name='bshin' type='capsule' pos='-.14 0 -.07' axisangle='0 1 0 -2.03' size='0.046 .15' rgba='0.9 0.6 0.6 1' />
67 |           <body name='bfoot' pos='-.28 0 -.14'>
68 |             <joint name='bfoot' type='hinge' pos='0 0 0' axis='0 1 0' range='-.4 .785' stiffness='120' damping='3' />
69 |             <geom name='bfoot' type='capsule' pos='.03 0 -.097' axisangle='0 1 0 -.27' size='0.046 .094' rgba='0.9 0.6 0.6 1' />
70 |           </body>
71 |         </body>
72 |       </body>
73 |       <body name='fthigh' pos='.5 0 0'>
74 |         <joint name='fthigh' type='hinge' pos='0 0 0' axis='0 1 0' range='-1 .7' stiffness='180' damping='4.5' />
75 |         <geom name='fthigh' type='capsule' pos='-.07 0 -.12' axisangle='0 1 0 .52' size='0.046 .133' />
76 |         <body name='fshin' pos='-.14 0 -.24'>
77 |           <joint name='fshin' type='hinge' pos='0 0 0' axis='0 1 0' range='-1.2 .87' stiffness='120' damping='3' />
78 |           <geom name='fshin' type='capsule' pos='.065 0 -.09' axisangle='0 1 0 -.6' size='0.046 .106' rgba='0.9 0.6 0.6 1' />
79 |           <body name='ffoot' pos='.13 0 -.18'>
80 |             <joint name='ffoot' type='hinge' pos='0 0 0' axis='0 1 0' range='-.5 .5' stiffness='60' damping='1.5' />
81 |             <geom name='ffoot' type='capsule' pos='.045 0 -.07' axisangle='0 1 0 -.6' size='0.046 .07' rgba='0.9 0.6 0.6 1' />
82 |           </body>
83 |         </body>
84 |       </body>
85 |     </body>
86 |   </worldbody>
87 |   <actuator>
88 |     <motor name='bthigh' joint='bthigh' gear='120' />
89 |     <motor name='bshin' joint='bshin' gear='90' />
90 |     <motor name='bfoot' joint='bfoot' gear='60' />
91 |     <motor name='fthigh' joint='fthigh' gear='120' />
92 |     <motor name='fshin' joint='fshin' gear='60' />
93 |     <motor name='ffoot' joint='ffoot' gear='30' />
94 |   </actuator>
95 | </mujoco>
96 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/rllab_hopper.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="hopper">
 2 |   <compiler inertiafromgeom="true" angle="degree" coordinate="global" />
 3 |   <default>
 4 |     <joint limited='true' damping='1' armature='1' />
 5 |     <geom contype='1' conaffinity='1' condim='1' rgba='0.8 0.6 .4 1' margin="0.001" solref=".02 1" solimp=".8 .8 .01" material="geom" />
 6 |     <motor ctrlrange='-.4 .4' ctrllimited='true' />
 7 |   </default>
 8 |   <option timestep="0.02" integrator="RK4" />
 9 |   <asset>
10 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
11 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
12 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
13 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
14 |     <material name='geom' texture="texgeom" texuniform="true" />
15 |   </asset>
16 |   <worldbody>
17 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
18 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
19 |     <body name="torso" pos="0 0 1.25">
20 |       <joint name='rootz' type='slide' axis='0 0 1' pos='0 0 0' limited='false' damping='0' armature='0' stiffness='0' ref="1.25" />
21 |       <joint name='rootx' type='slide' axis='1 0 0' pos='0 0 0' limited='false' damping='0' armature='0' stiffness='0' />
22 |       <joint name='rooty' type='hinge' axis='0 1 0' pos='0 0 1.25' limited='false' damping='0' armature='0' stiffness='0' />
23 |       <geom name="torso_geom" type="capsule" fromto="0 0 1.45 0 0 1.05" size="0.05" friction="0.9" />
24 |       <body name="thigh" pos="0 0 1.05">
25 |         <joint name="thigh_joint" type="hinge" pos="0 0 1.05" axis="0 -1 0" range="-150 0" />
26 |         <geom name="thigh_geom" type="capsule" fromto="0 0 1.05 0 0 0.6" size="0.05" friction="0.9" />
27 |         <body name="leg" pos="0 0 0.35">
28 |           <joint name="leg_joint" type="hinge" pos="0 0 0.6" axis="0 -1 0" range="-150 0" />
29 |           <geom name="leg_geom" type="capsule" fromto="0 0 0.6 0 0 0.1" size="0.04" friction="0.9" />
30 |           <body name="foot" pos="0.13/2 0 0.1">
31 |             <joint name="foot_joint" type="hinge" pos="0 0 0.1" axis="0 -1 0" range="-45 45" />
32 |             <geom name="foot_geom" type="capsule" fromto="-0.13 0 0.1 0.26 0 0.1" size="0.06" friction="2.0" />
33 |           </body>
34 |         </body>
35 |       </body>
36 |     </body>
37 |   </worldbody>
38 |   <actuator>
39 |     <!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/> -->
40 |     <motor joint="thigh_joint" ctrlrange="-200.0 200.0" ctrllimited="true" />
41 |     <motor joint="leg_joint" ctrlrange="-200.0 200.0" ctrllimited="true" />
42 |     <motor joint="foot_joint" ctrlrange="-200.0 200.0" ctrllimited="true" />
43 |     <!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/> -->
44 |   </actuator>
45 | </mujoco>
46 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/rllab_simple_humanoid.xml:
--------------------------------------------------------------------------------
  1 | <mujoco model="Humanoid">
  2 |   <compiler coordinate="local" angle="radian" />
  3 |   <size njmax="200" nuser_geom="1" nstack="82825" />
  4 |   <default>
  5 |     <joint limited="true" armature="1" damping="1" />
  6 |     <geom condim="3" conaffinity="0" margin="0.1" friction="1.0 0.5 0.5" rgba="0.8 0.6 0.4 1" />
  7 |   </default>
  8 |   <option timestep="0.01" integrator="RK4" />
  9 |   <asset>
 10 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
 11 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
 12 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
 13 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
 14 |     <material name='geom' texture="texgeom" texuniform="true" />
 15 |   </asset>
 16 |   <worldbody>
 17 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
 18 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
 19 |     <body name="torso" pos="0 0 1.4">
 20 |       <inertial pos="0 0 0.0403624" mass="8.32208" diaginertia="0.159056 0.147471 0.0330627" />
 21 |       <joint name="root" type="free" limited="false" pos="0 0 0" axis="0 0 1" margin="0.01" armature="0" damping="0" />
 22 |       <geom name="torso1" type="capsule" size="0.07 0.07" pos="0 0 0" quat="0.707107 0.707107 0 0" user="0" />
 23 |       <geom name="head" type="sphere" size="0.09" pos="0 0 0.19" user="0" />
 24 |       <geom name="uwaist" type="capsule" size="0.06 0.06" pos="0 0 -0.12" quat="0.707107 0.707107 0 0" user="0" />
 25 |       <body name="pelvis" pos="0 0 -0.425" quat="0.999998 0 -0.002 0">
 26 |         <inertial pos="-0.0148387 0 0.0387097" quat="0.997805 0 0.0662273 0" mass="7.88854" diaginertia="0.0795697 0.0619562 0.0449816" />
 27 |         <!-- <joint name="abdomen_z" type="hinge" pos="0 0 0.22" axis="0 0 1" stiffness="20" range="-0.785398 0.785398" margin="0.01" damping="3" /> -->
 28 |         <!-- <joint name="abdomen_x" type="hinge" pos="0 0 0.22" axis="1 0 0" stiffness="10" range="-0.349066 0.349066" margin="0.01" damping="3" /> -->
 29 |         <!-- <joint name="abdomen_y" type="hinge" pos="0 0 0.22" axis="0 1 0" stiffness="10" range="-1.0472 0.523599" margin="0.01" damping="3" /> -->
 30 |         <geom name="lwaist" type="capsule" size="0.06 0.06" pos="0 0 0.15" quat="0.707107 0.707107 0 0" user="0" />
 31 |         <geom name="butt" type="capsule" size="0.09 0.07" pos="-0.02 0 0" quat="0.707107 0.707107 0 0" user="0" />
 32 |         <body name="right_thigh" pos="0 -0.1 -0.04">
 33 |           <inertial pos="0 0.005 -0.17" quat="0.999892 0.0147011 0 0" mass="5.37761" diaginertia="0.0792387 0.0792387 0.0113602" />
 34 |           <joint name="right_hip1" type="hinge" pos="0 0 0" axis="1 0 0" stiffness="10" range="-0.15 0.0872665" margin="0.01" />
 35 |           <!-- <joint name="right_hip2" type="hinge" pos="0 0 0" axis="0 0 1" stiffness="10" range="-0.3 0.3" margin="0.01" /> -->
 36 |           <joint name="right_hip3" type="hinge" pos="0 0 0" axis="0 1 0" stiffness="20" range="-1.91986 0.174533" margin="0.01" />
 37 |           <geom name="right_thigh1" type="capsule" size="0.065 0.170074" pos="0 0.005 -0.17" quat="0.999892 0.0147011 0 0" user="0" />
 38 |           <body name="right_shin" pos="0 0.01 -0.403">
 39 |             <inertial pos="0 0 -0.181" mass="3.10016" diaginertia="0.045501 0.045501 0.00372174" />
 40 |             <joint name="right_knee" type="hinge" pos="0 0 0.05" axis="0 -1 0" range="-1.3 -0.0349066" margin="0.01" />
 41 |             <geom name="right_shin1" type="capsule" size="0.049 0.181" pos="0 0 -0.181" user="0" />
 42 |             <body name="right_foot" pos="0 0 -0.45">
 43 |               <geom name="right_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
 44 |             </body>
 45 |             <!-- <body name="right_foot" pos="0 0 -0.45">
 46 |                             <inertial pos="0.035 -0.01 0" quat="0 0.707107 0 0.707107" mass="1.08992" diaginertia="0.00577723 0.00529682 0.000877682" />
 47 |                             <joint name="right_ankle_x" type="hinge" pos="0 0 0.06" axis="0.894427 0 0.447214" stiffness="1" range="-0.3 0.3" margin="0.01" damping="0.3" />
 48 |                             <joint name="right_ankle_y" type="hinge" pos="0 0 0.06" axis="0 1 0" stiffness="4" range="-0.4 0.8" margin="0.01" damping="0.3" />
 49 |                             <geom name="right_foot_cap1" type="capsule" size="0.027 0.105475" pos="0.035 -0.03 0" quat="0.707107 -0.0670402 -0.703922 0" user="0" />
 50 |                             <geom name="right_foot_cap2" type="capsule" size="0.027 0.105475" pos="0.035 0.01 0" quat="0.707107 0.0670402 -0.703922 0" user="0" />
 51 |                         </body> -->
 52 |           </body>
 53 |         </body>
 54 |         <body name="left_thigh" pos="0 0.1 -0.04">
 55 |           <inertial pos="0 -0.005 -0.17" quat="0.999892 -0.0147011 0 0" mass="5.37761" diaginertia="0.0792387 0.0792387 0.0113602" />
 56 |           <joint name="left_hip1" type="hinge" pos="0 0 0" axis="-1 0 0" stiffness="10" range="-0.15 0.0872665" margin="0.01" />
 57 |           <!-- <joint name="left_hip2" type="hinge" pos="0 0 0" axis="0 0 -1" stiffness="10" range="-0.3 0.3" margin="0.01" /> -->
 58 |           <joint name="left_hip3" type="hinge" pos="0 0 0" axis="0 1 0" stiffness="20" range="-1.91986 0.174533" margin="0.01" />
 59 |           <geom name="left_thigh1" type="capsule" size="0.065 0.170074" pos="0 -0.005 -0.17" quat="0.999892 -0.0147011 0 0" user="0" />
 60 |           <body name="left_shin" pos="0 -0.01 -0.403">
 61 |             <inertial pos="0 0 -0.181" mass="3.10016" diaginertia="0.045501 0.045501 0.00372174" />
 62 |             <joint name="left_knee" type="hinge" pos="0 0 0.05" axis="0 -1 0" range="-1.3 -0.0349066" margin="0.01" />
 63 |             <geom name="left_shin1" type="capsule" size="0.049 0.181" pos="0 0 -0.181" user="0" />
 64 |             <body name="left_foot" pos="0 0 -0.45">
 65 |               <geom name="left_foot" type="sphere" size="0.075" pos="0 0 0.1" user="0" />
 66 |             </body>
 67 |             <!-- <body name="left_foot" pos="0 0 -0.45">
 68 |                             <inertial pos="0.035 0.01 0" quat="0 0.707107 0 0.707107" mass="1.08992" diaginertia="0.00577723 0.00529682 0.000877682" />
 69 |                             <joint name="left_ankle_x" type="hinge" pos="0 0 0.06" axis="0.894427 0 0.447214" stiffness="1" range="-0.3 0.3" margin="0.01" damping="0.3" />
 70 |                             <joint name="left_ankle_y" type="hinge" pos="0 0 0.06" axis="0 1 0" stiffness="4" range="-0.4 0.8" margin="0.01" damping="0.3" />
 71 |                             <geom name="left_foot_cap1" type="capsule" size="0.027 0.105475" pos="0.035 0.03 0" quat="0.707107 0.0670402 -0.703922 0" user="0" />
 72 |                             <geom name="left_foot_cap2" type="capsule" size="0.027 0.105475" pos="0.035 -0.01 0" quat="0.707107 -0.0670402 -0.703922 0" user="0" />
 73 |                         </body> -->
 74 |           </body>
 75 |         </body>
 76 |       </body>
 77 |       <body name="right_upper_arm" pos="0 -0.17 0.06">
 78 |         <inertial pos="0.08 -0.08 -0.08" quat="0.888074 -0.325058 -0.325058 0" mass="1.59406" diaginertia="0.0139972 0.0139972 0.00127525" />
 79 |         <joint name="right_shoulder1" type="hinge" pos="0 0 0" axis="0 -0.707107 0.707107" stiffness="1" range="-1.0472 0.785398" margin="0.01" />
 80 |         <joint name="right_shoulder2" type="hinge" pos="0 0 0" axis="0.816497 0.408248 0.408248" stiffness="1" range="-0.785398 0.785398" margin="0.01" />
 81 |         <geom name="right_uarm1" type="capsule" size="0.04 0.2" pos="0.08 -0.08 -0.08" quat="0.888074 -0.325058 -0.325058 0" user="0" />
 82 |         <!-- <body name="right_lower_arm" pos="0.18 -0.18 -0.18">
 83 |                     <inertial pos="0.095 0.095 0.095" quat="0.459701 0.627963 -0.627963 0" mass="0.877969" diaginertia="0.00639835 0.00639835 0.000421864" />
 84 |                     <joint name="right_elbow" type="hinge" pos="0 0 0" axis="0 -0.707107 0.707107" range="-1.5708 0.872665" margin="0.01" />
 85 |                     <geom name="right_larm" type="capsule" size="0.031 0.129904" pos="0.095 0.095 0.095" quat="0.459701 0.627963 -0.627963 0" user="0" />
 86 |                 </body> -->
 87 |       </body>
 88 |       <body name="left_upper_arm" pos="0 0.17 0.06">
 89 |         <inertial pos="0.08 0.08 -0.08" quat="0.888074 0.325058 -0.325058 0" mass="1.59406" diaginertia="0.0139972 0.0139972 0.00127525" />
 90 |         <joint name="left_shoulder1" type="hinge" pos="0 0 0" axis="0 0.707107 0.707107" stiffness="1" range="-1.0472 0.785398" margin="0.01" />
 91 |         <joint name="left_shoulder2" type="hinge" pos="0 0 0" axis="0.816497 -0.408248 0.408248" stiffness="1" range="-0.785398 0.785398" margin="0.01" />
 92 |         <geom name="left_uarm1" type="capsule" size="0.04 0.2" pos="0.08 0.08 -0.08" quat="0.888074 0.325058 -0.325058 0" user="0" />
 93 |         <!-- <body name="left_lower_arm" pos="0.18 0.18 -0.18">
 94 |                     <inertial pos="0.095 -0.095 0.095" quat="0.459701 -0.627963 -0.627963 0" mass="0.877969" diaginertia="0.00639835 0.00639835 0.000421864" />
 95 |                     <joint name="left_elbow" type="hinge" pos="0 0 0" axis="0 -0.707107 -0.707107" range="-1.5708 0.872665" margin="0.01" />
 96 |                     <geom name="left_larm" type="capsule" size="0.031 0.129904" pos="0.095 -0.095 0.095" quat="0.459701 -0.627963 -0.627963 0" user="0" />
 97 |                 </body> -->
 98 |       </body>
 99 |     </body>
100 |   </worldbody>
101 |   <actuator>
102 |     <!-- <motor joint='abdomen_y'        ctrlrange="-30 30" ctrllimited="true"/> -->
103 |     <!-- <motor joint='abdomen_z'        ctrlrange="-30 30" ctrllimited="true"/> -->
104 |     <!-- <motor joint='abdomen_x'        ctrlrange="-30 30" ctrllimited="true"/> -->
105 |     <motor joint='right_hip1' ctrlrange="-100.0 100.0" ctrllimited="true" />
106 |     <!-- <motor joint='right_hip2'       ctrlrange="-100.0 100.0" ctrllimited="true"/> -->
107 |     <motor joint='right_hip3' ctrlrange="-100.0 100.0" ctrllimited="true" />
108 |     <motor joint='right_knee' ctrlrange="-100.0 100.0" ctrllimited="true" />
109 |     <!-- <motor joint='right_ankle_x'    ctrlrange="-20 20" ctrllimited="true"/> -->
110 |     <!-- <motor joint='right_ankle_y'    ctrlrange="-20 20" ctrllimited="true"/> -->
111 |     <motor joint='left_hip1' ctrlrange="-100.0 100.0" ctrllimited="true" />
112 |     <!-- <motor joint='left_hip2'        ctrlrange="-100.0 100.0" ctrllimited="true"/> -->
113 |     <motor joint='left_hip3' ctrlrange="-100.0 100.0" ctrllimited="true" />
114 |     <motor joint='left_knee' ctrlrange="-100.0 100.0" ctrllimited="true" />
115 |     <!-- <motor joint='left_ankle_x'     ctrlrange="-20 20" ctrllimited="true"/> -->
116 |     <!-- <motor joint='left_ankle_y'     ctrlrange="-20 20" ctrllimited="true"/> -->
117 |     <motor joint='right_shoulder1' ctrlrange="-20 20" ctrllimited="true" />
118 |     <motor joint='right_shoulder2' ctrlrange="-20 20" ctrllimited="true" />
119 |     <!-- <motor joint='right_elbow'      ctrlrange="-20 20" ctrllimited="true"/> -->
120 |     <motor joint='left_shoulder1' ctrlrange="-20 20" ctrllimited="true" />
121 |     <motor joint='left_shoulder2' ctrlrange="-20 20" ctrllimited="true" />
122 |     <!-- <motor joint='left_elbow'       ctrlrange="-20 20" ctrllimited="true"/> -->
123 |   </actuator>
124 | </mujoco>
125 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/rllab_swimmer.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="swimmer">
 2 |   <compiler inertiafromgeom="true" angle="degree" coordinate="local" />
 3 |   <custom>
 4 |     <numeric name="frame_skip" data="50" />
 5 |   </custom>
 6 |   <option timestep="0.001" density="4000" viscosity="0.1" collision="predefined" integrator="Euler" iterations="1000">
 7 |     <flag warmstart="disable" />
 8 |   </option>
 9 |   <default>
10 |     <geom contype='1' conaffinity='1' condim='1' rgba='0.8 0.6 .4 1' material="geom" />
11 |     <!--<joint armature='1'  />-->
12 |   </default>
13 |   <asset>
14 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
15 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
16 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
17 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
18 |     <material name='geom' texture="texgeom" texuniform="true" />
19 |   </asset>
20 |   <worldbody>
21 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
22 |     <geom name='floor' material="MatPlane" pos='0 0 -0.1' size='40 40 0.1' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
23 |     <!--  ================= SWIMMER ================= /-->
24 |     <body name="torso" pos="0 0 0">
25 |       <geom name="torso" type="capsule" fromto="1.5 0 0 0.5 0 0" size="0.1" density="1000" />
26 |       <joint pos="0 0 0" type="slide" name="slider1" axis="1 0 0" />
27 |       <joint pos="0 0 0" type="slide" name="slider2" axis="0 1 0" />
28 |       <joint name="rot" type="hinge" pos="0 0 0" axis="0 0 1" />
29 |       <body name="mid" pos="0.5 0 0">
30 |         <geom name="mid" type="capsule" fromto="0 0 0 -1 0 0" size="0.1" density="1000" />
31 |         <joint name="rot2" type="hinge" pos="0 0 0" axis="0 0 1" range="-100 100" limited="true" />
32 |         <body name="back" pos="-1 0 0">
33 |           <geom name="back" type="capsule" fromto="0 0 0 -1 0 0" size="0.1" density="1000" />
34 |           <joint name="rot3" type="hinge" pos="0 0 0" axis="0 0 1" range="-100 100" limited="true" />
35 |         </body>
36 |       </body>
37 |     </body>
38 |   </worldbody>
39 |   <actuator>
40 |     <motor joint="rot2" ctrllimited="true" ctrlrange="-50 50" />
41 |     <motor joint="rot3" ctrllimited="true" ctrlrange="-50 50" />
42 |   </actuator>
43 | </mujoco>
44 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/rllab_walker2d.xml:
--------------------------------------------------------------------------------
 1 | <mujoco model="walker2d">
 2 |   <compiler inertiafromgeom="true" angle="degree" coordinate="global" />
 3 |   <option timestep="0.005" />
 4 |   <default>
 5 |     <joint limited='true' damping='.1' armature='0.01' />
 6 |     <geom contype='1' conaffinity='0' condim='3' friction='.7 .1 .1' rgba='0.8 0.6 .4 1' density='1000' />
 7 |   </default>
 8 |   <asset>
 9 |     <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
10 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
11 |     <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
12 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
13 |     <material name='geom' texture="texgeom" texuniform="true" />
14 |   </asset>
15 |   <worldbody>
16 |     <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
17 |     <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
18 |     <body name="torso" pos="0 0 1.25">
19 |       <joint name='rootz' type='slide' axis='0 0 1' pos='0 0 0' limited='false' damping='0' armature='0' stiffness='0' ref="1.25" />
20 |       <joint name='rootx' type='slide' axis='1 0 0' pos='0 0 0' limited='false' damping='0' armature='0' stiffness='0' />
21 |       <joint name='rooty' type='hinge' axis='0 1 0' pos='0 0 1.25' limited='false' damping='0' armature='0' stiffness='0' />
22 |       <geom name="torso_geom" type="capsule" fromto="0 0 1.45 0 0 1.05" size="0.05" friction="0.9" />
23 |       <body name="thigh" pos="0 0 1.05">
24 |         <joint name="thigh_joint" type="hinge" pos="0 0 1.05" axis="0 -1 0" range="-150 0" />
25 |         <geom name="thigh_geom" type="capsule" fromto="0 0 1.05 0 0 0.6" size="0.05" friction="0.9" />
26 |         <body name="leg" pos="0 0 0.35">
27 |           <joint name="leg_joint" type="hinge" pos="0 0 0.6" axis="0 -1 0" range="-150 0" />
28 |           <geom name="leg_geom" type="capsule" fromto="0 0 0.6 0 0 0.1" size="0.04" friction="0.9" />
29 |           <body name="foot" pos="0.2/2 0 0.1">
30 |             <joint name="foot_joint" type="hinge" pos="0 0 0.1" axis="0 -1 0" range="-45 45" />
31 |             <geom name="foot_geom" type="capsule" fromto="-0.0 0 0.1 0.2 0 0.1" size="0.06" friction="0.9" />
32 |           </body>
33 |         </body>
34 |       </body>
35 |       <!-- copied and then replace thigh->thigh_left, leg->leg_left, foot->foot_right -->
36 |       <body name="thigh_left" pos="0 0 1.05">
37 |         <joint name="thigh_left_joint" type="hinge" pos="0 0 1.05" axis="0 -1 0" range="-150 0" />
38 |         <geom name="thigh_left_geom" type="capsule" fromto="0 0 1.05 0 0 0.6" size="0.05" friction="0.9" rgba=".7 .3 .6 1" />
39 |         <body name="leg_left" pos="0 0 0.35">
40 |           <joint name="leg_left_joint" type="hinge" pos="0 0 0.6" axis="0 -1 0" range="-150 0" />
41 |           <geom name="leg_left_geom" type="capsule" fromto="0 0 0.6 0 0 0.1" size="0.04" friction="0.9" rgba=".7 .3 .6 1" />
42 |           <body name="foot_left" pos="0.2/2 0 0.1">
43 |             <joint name="foot_left_joint" type="hinge" pos="0 0 0.1" axis="0 -1 0" range="-45 45" />
44 |             <geom name="foot_left_geom" type="capsule" fromto="-0.0 0 0.1 0.2 0 0.1" size="0.06" friction="1.9" rgba=".7 .3 .6 1" />
45 |           </body>
46 |         </body>
47 |       </body>
48 |     </body>
49 |   </worldbody>
50 |   <actuator>
51 |     <!-- <motor joint="torso_joint" ctrlrange="-100.0 100.0" isctrllimited="true"/> -->
52 |     <motor joint="thigh_joint" ctrlrange="-150.0 150.0" ctrllimited="true" />
53 |     <motor joint="leg_joint" ctrlrange="-100.0 100.0" ctrllimited="true" />
54 |     <motor joint="foot_joint" ctrlrange="-100.0 100.0" ctrllimited="true" />
55 |     <motor joint="thigh_left_joint" ctrlrange="-150.0 150.0" ctrllimited="true" />
56 |     <motor joint="leg_left_joint" ctrlrange="-100.0 100.0" ctrllimited="true" />
57 |     <motor joint="foot_left_joint" ctrlrange="-100.0 100.0" ctrllimited="true" />
58 |     <!-- <motor joint="finger2_rot" ctrlrange="-20.0 20.0" isctrllimited="true"/> -->
59 |   </actuator>
60 | </mujoco>
61 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/swimmer_env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import gym.utils as utils
 4 | import numpy as np
 5 | from gym.envs.mujoco import mujoco_env
 6 | 
 7 | from slbo.envs import BaseModelBasedEnv
 8 | 
 9 | 
10 | class RLLabSwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle, BaseModelBasedEnv):
11 |     def __init__(self):
12 |         self.rescale_action = True
13 | 
14 |         mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), 'rllab_swimmer.xml'), 50)
15 |         utils.EzPickle.__init__(self)
16 | 
17 |     def get_body_xmat(self, body_name):
18 |         return self.sim.data.get_body_xmat(body_name)
19 | 
20 |     def get_body_comvel(self, body_name):
21 |         return self.sim.data.get_body_xvelp(body_name)
22 | 
23 |     def _get_obs(self):
24 |         return np.concatenate([
25 |             self.sim.data.qpos.flat,  # 5
26 |             self.sim.data.qvel.flat,  # 5
27 |             self.get_body_com("torso").flat,  # 3
28 |             self.get_body_comvel("torso").flat,  # 3
29 |         ]).reshape(-1)
30 | 
31 |     def step(self, action: np.ndarray):
32 |         self.do_simulation(action, self.frame_skip)
33 |         scaling = 0.5 * (self.action_space.high - self.action_space.low)
34 |         ctrl_cost = 0.005 * np.sum(np.square(action / scaling))
35 |         fwd_reward = self.get_body_comvel("torso")[0]
36 |         reward = fwd_reward - ctrl_cost
37 |         obs = self._get_obs()
38 |         return obs, reward, False, {}
39 | 
40 |     def mb_step(self, states: np.ndarray, actions: np.ndarray, next_states: np.ndarray):
41 |         scaling = 0.5 * (self.action_space.high - self.action_space.low)
42 |         ctrl_cost = 0.005 * np.sum(np.square(actions / scaling), axis=-1)
43 |         fwd_reward = next_states[:, -3]
44 |         reward = fwd_reward - ctrl_cost
45 |         return reward, np.zeros_like(reward, dtype=np.bool)
46 | 
47 |     def reset_model(self):
48 |         qpos = self.init_qpos + self.np_random.normal(size=self.init_qpos.shape) * 0.01
49 |         qvel = self.init_qvel + self.np_random.normal(size=self.init_qvel.shape) * 0.1
50 |         self.set_state(qpos, qvel)
51 |         return self._get_obs()
52 | 


--------------------------------------------------------------------------------
/slbo/envs/mujoco/rllab/walker2d_env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import gym.utils as utils
 4 | import numpy as np
 5 | from gym.envs.mujoco import mujoco_env
 6 | 
 7 | from slbo.envs import BaseModelBasedEnv
 8 | 
 9 | 
10 | class RLLabWalker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle, BaseModelBasedEnv):
11 |     def __init__(self):
12 |         self.rescale_action = True
13 | 
14 |         mujoco_env.MujocoEnv.__init__(self, os.path.join(os.path.dirname(__file__), 'rllab_walker2d.xml'), 1)
15 |         utils.EzPickle.__init__(self)
16 | 
17 |     def get_body_xmat(self, body_name):
18 |         return self.sim.data.get_body_xmat(body_name)
19 | 
20 |     def get_body_comvel(self, body_name):
21 |         return self.sim.data.get_body_xvelp(body_name)
22 | 
23 |     def _get_obs(self):
24 |         return np.concatenate([
25 |             self.sim.data.qpos.flat,
26 |             self.sim.data.qvel.flat,
27 |             self.get_body_com("torso").flat,
28 |             self.get_body_comvel("torso").flat
29 |         ])
30 | 
31 |     def step(self, action: np.ndarray):
32 |         self.do_simulation(action, self.frame_skip)
33 |         fwd_reward = self.get_body_comvel("torso")[0]
34 |         scaling = 0.5 * (self.action_space.high - self.action_space.low)
35 |         ctrl_cost = 1e-3 * np.sum(np.square(action / scaling))
36 |         alive_bonus = 1.
37 |         reward = fwd_reward - ctrl_cost + alive_bonus
38 |         qpos = self.sim.data.qpos
39 |         done = not (0.8 < qpos[0] < 2.0 and -1.0 < qpos[2] < 1.0)
40 |         obs = self._get_obs()
41 |         return obs, reward, done, {}
42 | 
43 |     def mb_step(self, states, actions, next_states):
44 |         scaling = 0.5 * (self.action_space.high - self.action_space.low)
45 |         reward_ctrl = -0.001 * np.sum(np.square(actions / scaling), axis=-1)
46 |         reward_fwd = next_states[:, 21]
47 |         alive_bonus = 1.
48 |         rewards = reward_ctrl + reward_fwd + alive_bonus
49 |         dones = not ((0.8 < next_states[:, 0] < 2.0) and (-1.0 < next_states[:, 2] < 1.0))
50 |         return rewards, dones
51 | 
52 |     def reset_model(self):
53 |         qpos = self.init_qpos + self.np_random.normal(size=self.init_qpos.shape) * 0.01
54 |         qvel = self.init_qvel + self.np_random.normal(size=self.init_qvel.shape) * 0.1
55 |         self.set_state(qpos, qvel)
56 |         return self._get_obs()
57 | 


--------------------------------------------------------------------------------
/slbo/envs/virtual_env.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | import numpy as np
  3 | from stable_baselines.common.vec_env.base_vec_env import VecEnv
  4 | import torch
  5 | 
  6 | from slbo.envs import BaseModelBasedEnv
  7 | from slbo.models.dynamics import Dynamics
  8 | 
  9 | 
 10 | class VirtualEnv(gym.Env):
 11 |     def __init__(self, dynamics: Dynamics, env: BaseModelBasedEnv, seed):
 12 |         super().__init__()
 13 |         self.observation_space = env.observation_space
 14 |         self.action_space = env.action_space
 15 | 
 16 |         self.state_dim = self.observation_space.shape[0]
 17 |         self.action_dim = self.action_space.shape[0]
 18 | 
 19 |         self.dynamics = dynamics
 20 |         self.device = next(self.dynamics.parameters()).device
 21 |         self.env = env
 22 |         self.env.seed(seed)
 23 | 
 24 |         self.state = np.zeros([self.observation_space.shape[0]], dtype=np.float32)
 25 | 
 26 |     def _rescale_action(self, action):
 27 |         lo, hi = self.action_space.low, self.action_space.high
 28 |         return lo + (action + 1.) * 0.5 * (hi - lo)
 29 | 
 30 |     def step_await(self, action: np.ndarray):
 31 |         states = self.state.reshape([1, self.state_dim])
 32 |         actions = action.reshape([1, self.action_dim])
 33 |         rescaled_actions = self._rescale_action(action).reshape([1, self.action_dim])
 34 |         with torch.no_grad():
 35 |             next_states = self.dynamics(torch.tensor(states, device=self.device, dtype=torch.float32),
 36 |                                         torch.tensor(actions, device=self.device, dtype=torch.float32)).cpu().numpy()
 37 |             rewards, dones = self.env.mb_step(states, rescaled_actions, next_states)
 38 |             reward, done = rewards[0], dones[0]
 39 |         self.state = next_states[0]
 40 |         return self.state.copy(), reward.copy(), done.copy(), {}
 41 | 
 42 |     def reset(self) -> np.ndarray:
 43 |         self.state = self.env.reset()
 44 |         return self.state.copy()
 45 | 
 46 |     def set_state(self, state: np.ndarray):
 47 |         self.state = state.copy()
 48 | 
 49 |     def render(self, mode='human'):
 50 |         raise NotImplemented
 51 | 
 52 | 
 53 | class VecVirtualEnv(VecEnv):
 54 |     def __init__(self, dynamics: Dynamics, env: BaseModelBasedEnv, num_envs, seed, max_episode_steps=1000,
 55 |                  auto_reset=True):
 56 |         super(VecEnv, self).__init__()
 57 |         self.observation_space = env.observation_space
 58 |         self.action_space = env.action_space
 59 | 
 60 |         self.state_dim = self.observation_space.shape[0]
 61 |         self.action_dim = self.action_space.shape[0]
 62 |         self.num_envs = num_envs
 63 |         self.max_episode_steps = max_episode_steps
 64 |         self.auto_reset = auto_reset
 65 | 
 66 |         self.dynamics = dynamics
 67 |         self.device = next(self.dynamics.parameters()).device
 68 |         self.env = env
 69 |         self.env.seed(seed)
 70 | 
 71 |         self.elapsed_steps = np.zeros([self.num_envs], dtype=np.int32)
 72 |         self.episode_rewards = np.zeros([self.num_envs])
 73 | 
 74 |         self.states = np.zeros([self.num_envs, self.observation_space.shape[0]], dtype=np.float32)
 75 | 
 76 |     def _rescale_action(self, actions: np.array):
 77 |         lo, hi = self.action_space.low, self.action_space.high
 78 |         return lo + (actions + 1.) * 0.5 * (hi - lo)
 79 | 
 80 |     def step_async(self, actions):
 81 |         self.actions = actions
 82 | 
 83 |     def step_wait(self):
 84 |         rescaled_actions = self._rescale_action(self.actions)
 85 |         self.elapsed_steps += 1
 86 |         with torch.no_grad():
 87 |             next_states = self.dynamics(torch.tensor(self.states, device=self.device, dtype=torch.float32),
 88 |                                         torch.tensor(self.actions, device=self.device, dtype=torch.float32)).cpu().numpy()
 89 |         rewards, dones = self.env.mb_step(self.states, rescaled_actions, next_states)
 90 |         self.episode_rewards += rewards
 91 |         self.states = next_states.copy()
 92 |         timeouts = self.elapsed_steps == self.max_episode_steps
 93 |         dones |= timeouts
 94 |         info_dicts = [{} for _ in range(self.num_envs)]
 95 |         for i, (done, timeout) in enumerate(zip(dones, timeouts)):
 96 |             if done:
 97 |                 info = {'episode': {'r': self.episode_rewards[i], 'l': self.elapsed_steps[i]}}
 98 |                 if timeout:
 99 |                     info.update({'TimeLimit.truncated': True})
100 |                 info_dicts[i] = info
101 |             else:
102 |                 info_dicts[i] = {}
103 |         if self.auto_reset:
104 |             self.reset(np.argwhere(dones).squeeze(axis=-1))
105 |         return self.states.copy(), rewards.copy(), dones.copy(), info_dicts
106 | 
107 |     # if indices = None, every env will be reset
108 |     def reset(self, indices=None) -> np.ndarray:
109 |         # have to distinguish [] and None
110 |         indices = np.arange(self.num_envs) if indices is None else indices
111 |         if np.size(indices) == 0:
112 |             return np.array([])
113 |         states = np.array([self.env.reset() for _ in indices])
114 |         self.states[indices] = states
115 |         self.elapsed_steps[indices] = 0
116 |         self.episode_rewards[indices] = 0.
117 |         return states.copy()
118 | 
119 |     # if indices = None, every env will be set
120 |     def set_state(self, states: np.ndarray, indices=None):
121 |         indices = indices or np.arange(self.num_envs)
122 |         assert states.ndim == 2 and states.shape[0] == indices.shape[0]
123 |         self.states[indices] = states.copy()
124 |         # set_state should reset reward and length
125 |         self.elapsed_steps[indices] = 0
126 |         self.episode_rewards[indices] = 0.
127 | 
128 |     def close(self):
129 |         pass
130 | 
131 |     def seed(self, seed):
132 |         return self.env.seed(seed)
133 | 
134 |     def render(self, mode='human'):
135 |         raise NotImplemented
136 | 
137 |     def set_attr(self, attr_name, value, indices=None):
138 |         raise NotImplemented
139 | 
140 |     def get_attr(self, attr_name, indices=None):
141 |         raise NotImplemented
142 | 
143 |     def env_method(self, method_name, *method_args, indices=None, **method_kwargs):
144 |         raise NotImplemented
145 | 
146 | 


--------------------------------------------------------------------------------
/slbo/envs/wrapped_envs.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import Optional
  3 | 
  4 | import gym
  5 | from gym.wrappers import TimeLimit
  6 | import torch
  7 | from stable_baselines import bench
  8 | from stable_baselines.common.vec_env import VecEnvWrapper
  9 | from stable_baselines.common.vec_env.dummy_vec_env import DummyVecEnv
 10 | from stable_baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
 11 | from stable_baselines.common.vec_env.vec_normalize import VecNormalize
 12 | 
 13 | from slbo.envs.mujoco.mujoco_envs import make_mujoco_env
 14 | from slbo.envs.virtual_env import VirtualEnv, VecVirtualEnv
 15 | from slbo.models.dynamics import Dynamics
 16 | 
 17 | 
 18 | def make_env(env_id, seed, rank, log_dir, allow_early_resets, max_episode_steps, test=True):
 19 |     def _thunk():
 20 |         if test:
 21 |             env = gym.make(env_id)
 22 |         else:
 23 |             env = make_mujoco_env(env_id)
 24 |             env = TimeLimit(env, max_episode_steps)
 25 | 
 26 |         env.seed(seed + rank)
 27 |         log_dir_ = os.path.join(log_dir, str(rank)) if log_dir is not None else log_dir
 28 |         env = bench.Monitor(env, log_dir_, allow_early_resets=allow_early_resets)
 29 | 
 30 |         return env
 31 | 
 32 |     return _thunk
 33 | 
 34 | 
 35 | def make_vec_envs(env_name: str,
 36 |                   seed: int,
 37 |                   num_envs: int,
 38 |                   gamma: float,
 39 |                   log_dir: Optional[str],
 40 |                   device: torch.device,
 41 |                   allow_early_resets: bool,
 42 |                   max_episode_steps: int = 1000,
 43 |                   norm_reward=True,
 44 |                   norm_obs=True,
 45 |                   test=False,
 46 |                   ):
 47 |     envs = [
 48 |         make_env(env_name, seed, i, log_dir, allow_early_resets, max_episode_steps, test)
 49 |         for i in range(num_envs)
 50 |     ]
 51 | 
 52 |     if len(envs) > 1:
 53 |         envs = SubprocVecEnv(envs)
 54 |     else:
 55 |         envs = DummyVecEnv(envs)
 56 | 
 57 |     if len(envs.observation_space.shape) == 1:
 58 |         if gamma is None:
 59 |             envs = VecNormalize(envs, norm_reward=False, norm_obs=norm_obs)
 60 |         else:
 61 |             envs = VecNormalize(envs, gamma=gamma, norm_reward=norm_reward, norm_obs=norm_obs)
 62 | 
 63 |     envs = VecPyTorch(envs, device)
 64 | 
 65 |     return envs
 66 | 
 67 | 
 68 | def make_vec_virtual_envs(env_name: str,
 69 |                           dynamics: Dynamics,
 70 |                           seed: int,
 71 |                           num_envs: int,
 72 |                           gamma: Optional[float],
 73 |                           device: torch.device,
 74 |                           allow_early_resets: bool,
 75 |                           max_episode_steps: int = 1000,
 76 |                           norm_reward=False,
 77 |                           norm_obs=False,
 78 |                           ):
 79 |     envs = VecVirtualEnv(dynamics, make_mujoco_env(env_name), num_envs, seed, max_episode_steps)
 80 | 
 81 |     if len(envs.observation_space.shape) == 1 and (norm_reward or norm_obs):
 82 |         if gamma is None:
 83 |             envs = VecNormalize(envs, norm_reward=False, norm_obs=norm_obs)
 84 |         else:
 85 |             envs = VecNormalize(envs, gamma=gamma, norm_reward=norm_reward, norm_obs=norm_obs)
 86 | 
 87 |     envs = VecPyTorch(envs, device)
 88 | 
 89 |     return envs
 90 | 
 91 | 
 92 | class VecPyTorch(VecEnvWrapper):
 93 |     def __init__(self, venv, device):
 94 |         super(VecPyTorch, self).__init__(venv)
 95 |         self.device = device
 96 | 
 97 |     def reset(self):
 98 |         obs = self.venv.reset()
 99 |         obs = torch.from_numpy(obs).float().to(self.device)
100 |         return obs
101 | 
102 |     def step_async(self, actions):
103 |         if isinstance(actions, torch.LongTensor):
104 |             actions = actions.squeeze(1)
105 |         actions = actions.cpu().numpy()
106 |         self.venv.step_async(actions)
107 | 
108 |     def step_wait(self):
109 |         obs, reward, done, info = self.venv.step_wait()
110 |         obs = torch.from_numpy(obs).float().to(self.device)
111 |         reward = torch.from_numpy(reward).unsqueeze(dim=1).float()
112 |         return obs, reward, done, info
113 | 
114 | 
115 | def get_vec_normalize(venv):
116 |     if isinstance(venv, VecNormalize):
117 |         return venv
118 |     elif hasattr(venv, 'venv'):
119 |         return get_vec_normalize(venv.venv)
120 | 
121 |     return None
122 | 
123 | 
124 | 


--------------------------------------------------------------------------------
/slbo/misc/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangsy/slbo_pytorch/7b1283968a82c939725c2705e7315d1f3759ee29/slbo/misc/__init__.py


--------------------------------------------------------------------------------
/slbo/misc/distributions.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.distributions import Distribution, Normal
  3 | import math
  4 | 
  5 | class TanhNormal(Distribution):
  6 |     """
  7 |     Represent distribution of X where
  8 |         Z ~ N(mean, std)
  9 |         X ~ tanh(Z)
 10 |     Note: this is not very numerically stable.
 11 |     """
 12 |     def __init__(self, mean, std, epsilon=1e-6):
 13 |         """
 14 |         :param mean: Mean of the normal distribution
 15 |         :param std: Std of the normal distribution
 16 |         :param epsilon: Numerical stability epsilon when computing log-prob.
 17 |         """
 18 |         super().__init__()
 19 |         self.normal_mean = mean
 20 |         self.normal_std = std
 21 |         self.normal = Normal(mean, std)
 22 |         self.epsilon = epsilon
 23 | 
 24 |     def log_prob(self, value, pre_tanh_value=None):
 25 |         if pre_tanh_value is None:
 26 |             pre_tanh_value = torch.log((1 + value) / (1 - value)) / 2
 27 |         return self.normal.log_prob(pre_tanh_value) - torch.log(1 - value * value + self.epsilon)
 28 | 
 29 |     def log_probs(self, value, pre_tanh_value):
 30 |         return self.log_prob(value, pre_tanh_value).sum(-1, keepdim=True)
 31 | 
 32 |     def sample(self, sample_shape=torch.Size([])):
 33 |         z = self.normal.sample(sample_shape)
 34 |         return torch.tanh(z), z
 35 | 
 36 |     def rsample(self, sample_shape=torch.Size([]), return_pretanh_value=False):
 37 |         z = (
 38 |                 self.normal_mean +
 39 |                 self.normal_std *
 40 |                 Normal(
 41 |                     torch.zeros_like(self.normal_mean),
 42 |                     torch.ones_like(self.normal_std)
 43 |                 ).sample()
 44 |         )
 45 |         z.requires_grad_()
 46 |         return torch.tanh(z), z
 47 | 
 48 |     def entropy(self):
 49 |         return self.normal.entropy().sum(-1)
 50 | 
 51 |     def mode(self):
 52 |         return torch.tan(self.normal_mean), self.normal_mean
 53 | 
 54 | 
 55 | class FixedLimitedEntNormal(torch.distributions.Normal):
 56 |     def log_probs(self, actions):
 57 |         return super().log_prob(actions).sum(-1, keepdim=True)
 58 | 
 59 |     def entropy(self):
 60 |         limit = 2.
 61 |         lo, hi = (-limit - self.loc) / self.scale / math.sqrt(2), (limit - self.loc) / self.scale / math.sqrt(2)
 62 |         return (0.5 * (self.scale.log() + math.log(2 * math.pi) / 2) * (hi.erf() - lo.erf()) + 0.5 *
 63 |                 (torch.exp(-hi * hi) * hi - torch.exp(-lo * lo) * lo)).sum(-1)
 64 | 
 65 |     def mode(self):
 66 |         return self.mean
 67 | 
 68 | 
 69 | class FixedCategorical(torch.distributions.Categorical):
 70 |     def sample(self, **kwargs):
 71 |         return super().sample(**kwargs).unsqueeze(-1)
 72 | 
 73 |     def log_probs(self, actions):
 74 |         return (
 75 |             super()
 76 |             .log_prob(actions.squeeze(-1))
 77 |             .view(actions.size(0), -1)
 78 |             .sum(-1)
 79 |             .unsqueeze(-1)
 80 |         )
 81 | 
 82 |     def mode(self):
 83 |         return self.probs.argmax(dim=-1, keepdim=True)
 84 | 
 85 | 
 86 | class FixedNormal(torch.distributions.Normal):
 87 | 
 88 |     def log_probs(self, actions):
 89 |         return super().log_prob(actions).sum(-1, keepdim=True)
 90 | 
 91 |     def entropy(self):
 92 |         return super().entropy().sum(-1)
 93 | 
 94 |     def mode(self):
 95 |         return self.mean
 96 | 
 97 | 
 98 | class FixedBernoulli(torch.distributions.Bernoulli):
 99 | 
100 |     def log_probs(self, actions):
101 |         return super().log_prob(actions).view(actions.size(0), -1).sum(-1, keepdim=True)
102 | 
103 |     def entropy(self):
104 |         return super().entropy().sum(-1)
105 | 
106 |     def mode(self):
107 |         return torch.gt(self.probs, 0.5).float()
108 | 
109 | 


--------------------------------------------------------------------------------
/slbo/misc/ou_noise.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from slbo.models.actor import Actor
 4 | 
 5 | 
 6 | class OUNoise(object):
 7 | 
 8 |     def __init__(self, action_space, mu=0.0, theta=0.15, sigma=0.3):
 9 |         self.mu = mu
10 |         self.theta = theta
11 |         self.sigma = sigma
12 |         self.action_space = action_space
13 |         self.state = None
14 |         self.actor = None
15 | 
16 |         self.shape = action_space.shape
17 | 
18 |         self.reset()
19 | 
20 |     def reset(self):
21 |         self.state = torch.ones(self.shape) * self.mu
22 | 
23 |     def next(self):
24 |         delta = self.theta * (self.mu - self.state) + self.sigma * torch.randn_like(self.state)
25 |         self.state = self.state + delta
26 |         return self.state
27 | 
28 |     def act(self, states):
29 |         result = self.actor.act(states)
30 |         return (result[0] + self.next(), *result[1:])
31 | 
32 |     def wrap(self, actor: Actor):
33 |         self.actor = actor
34 |         self.state = self.state.to(next(actor.parameters()).device)
35 |         return self
36 | 
37 | 


--------------------------------------------------------------------------------
/slbo/misc/param.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | def get_flat_params_from(model: nn.Module):
 7 |     params = []
 8 |     for param in model.parameters():
 9 |         params.append(param.view(-1))
10 | 
11 |     flat_params = torch.cat(params)
12 |     return flat_params
13 | 
14 | 
15 | def set_flat_params_to(model: nn.Module, flat_params):
16 |     prev_ind = 0
17 |     for param in model.parameters():
18 |         flat_size = int(np.prod(list(param.size())))
19 |         param.data.copy_(flat_params[prev_ind:prev_ind + flat_size].view(param.size()))
20 |         prev_ind += flat_size
21 | 
22 | 
23 | def get_flat_grad_from(inputs, grad_grad=False):
24 |     grads = []
25 |     for param in inputs:
26 |         if grad_grad:
27 |             grads.append(param.grad.grad.view(-1))
28 |         else:
29 |             if param.grad is None:
30 |                 grads.append(torch.zeros(param.view(-1).shape))
31 |             else:
32 |                 grads.append(param.grad.view(-1))
33 | 
34 |     flat_grad = torch.cat(grads)
35 |     return flat_grad
36 | 
37 | 
38 | def compute_flat_grad(output, inputs, filter_input_ids: set, retain_graph=False, create_graph=False):
39 |     filter_input_ids = filter_input_ids.copy()
40 |     if create_graph:
41 |         retain_graph = True
42 | 
43 |     inputs = list(inputs)
44 |     params = []
45 |     for i, param in enumerate(inputs):
46 |         if i not in filter_input_ids:
47 |             params.append(param)
48 | 
49 |     grads = torch.autograd.grad(output, params, retain_graph=retain_graph, create_graph=create_graph)
50 | 
51 |     j = 0
52 |     out_grads = []
53 |     for i, param in enumerate(inputs):
54 |         if i in filter_input_ids:
55 |             out_grads.append(torch.zeros(param.view(-1).shape, device=param.device, dtype=param.dtype))
56 |         else:
57 |             out_grads.append(grads[j].view(-1))
58 |             j += 1
59 |     grads = torch.cat(out_grads)
60 | 
61 |     for param in params:
62 |         param.grad = None
63 |     return grads


--------------------------------------------------------------------------------
/slbo/misc/utils.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import torch
 4 | from torch.utils.tensorboard import SummaryWriter
 5 | 
 6 | from slbo.envs.wrapped_envs import make_vec_envs, get_vec_normalize
 7 | 
 8 | 
 9 | def log_and_write(logger, writer: SummaryWriter, log_infos: List, global_step: int):
10 |     for idx, (name, value) in enumerate(log_infos):
11 |         if logger is not None:
12 |             logger.logkv('{}.'.format(idx) + name.split('/')[-1], value)
13 |         if writer is not None and name.find('/') > -1:
14 |             writer.add_scalar(name, value, global_step=global_step)
15 |     if logger is not None:
16 |         logger.dumpkvs()
17 | 
18 | 
19 | def collect_traj(actor, envs, buffer, total_step):
20 |     episode_rewards = []
21 |     episode_lengths = []
22 | 
23 |     step = 0
24 |     while step < total_step:
25 |         states = envs.reset()
26 |         dones = False
27 |         traj = {'states': [], 'actions': [], 'rewards': [], 'next_states': [], 'masks': []}
28 |         while not dones:
29 |             with torch.no_grad():
30 |                 actions, *_ = actor(states, deterministic=False, reparameterize=False)
31 | 
32 |             new_states, rewards, dones, infos = envs.step_index(actions)
33 |             mask = torch.tensor([[0.0] if done_ else [1.0] for done_ in dones], dtype=torch.float32)
34 | 
35 |             traj['states'].append(states)
36 |             traj['actions'].append(actions)
37 |             traj['next_states'].append(new_states)
38 |             traj['rewards'].append(rewards)
39 |             traj['masks'].append(mask)
40 | 
41 |             states = new_states
42 | 
43 |             for info_ in infos:
44 |                 if 'episode' in info_.keys():
45 |                     episode_rewards.append(info_['episode']['r'])
46 |                     episode_lengths.append(info_['episode']['l'])
47 | 
48 |         traj_len = len(traj['actions'])
49 |         step += traj_len
50 |         buffer.add_traj(traj)
51 | 
52 |     return episode_rewards, episode_lengths
53 | 
54 | 
55 | def evaluate(actor, env_name, seed, num_episode, eval_log_dir,
56 |              device, max_episode_steps=1000, norm_reward=False, norm_obs=True, obs_rms=None, test=True):
57 |     eval_envs = make_vec_envs(env_name, seed + 1, 1, None, eval_log_dir, device, True,
58 |                               max_episode_steps, norm_reward, norm_obs, test)
59 | 
60 |     vec_norm = get_vec_normalize(eval_envs)
61 |     if vec_norm is not None and norm_obs:
62 |         assert obs_rms is not None
63 |         vec_norm.training = False
64 |         vec_norm.obs_rms = obs_rms
65 | 
66 |     eval_episode_rewards = []
67 |     eval_episode_lengths = []
68 | 
69 |     obs = eval_envs.reset()
70 | 
71 |     while len(eval_episode_rewards) < num_episode:
72 |         with torch.no_grad():
73 |             action, *_ = actor.act(obs, deterministic=True)
74 | 
75 |         obs, _, done, infos = eval_envs.step(action)
76 | 
77 |         for info in infos:
78 |             if 'episode' in info.keys():
79 |                 eval_episode_rewards.append(info['episode']['r'])
80 |                 eval_episode_lengths.append(info['episode']['l'])
81 | 
82 |     eval_envs.close()
83 | 
84 |     return eval_episode_rewards, eval_episode_lengths
85 | 


--------------------------------------------------------------------------------
/slbo/models/__init__.py:
--------------------------------------------------------------------------------
1 | from slbo.models.actor import Actor
2 | from slbo.models.actor_critic import ActorCritic
3 | from slbo.models.critic import VCritic, QCritic
4 | from slbo.models.dynamics import Dynamics
5 | from slbo.models.normalizers import Normalizers
6 | 


--------------------------------------------------------------------------------
/slbo/models/actor.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from typing import List, Callable, Optional
 4 | 
 5 | from slbo.models.initializer import normc_init
 6 | from slbo.models.utils import MLP, init
 7 | from slbo.models.actor_layer import *
 8 | 
 9 | 
10 | class Actor(nn.Module):
11 |     def __init__(self, state_dim: int, action_space, hidden_dims: List[int],
12 |                  state_normalizer: Optional[nn.Module], use_limited_entropy=False):
13 |         super(Actor, self).__init__()
14 |         self.state_dim = state_dim
15 |         self.action_dim = action_space
16 |         self.hidden_dims = hidden_dims
17 | 
18 |         self.actor_feature = MLP(state_dim, hidden_dims[-1], hidden_dims[:-1],
19 |                                  activation='Tanh', last_activation='Tanh')
20 |         self.state_normalizer = state_normalizer or nn.Identity()
21 | 
22 |         if action_space.__class__.__name__ == "Discrete":
23 |             action_dim = action_space.n
24 |             self.actor = CategoricalActorLayer(hidden_dims[-1], action_dim)
25 |         elif action_space.__class__.__name__ == "Box":
26 |             action_dim = action_space.shape[0]
27 |             if use_limited_entropy:
28 |                 self.actor = LimitedEntGaussianActorLayer(hidden_dims[-1], action_dim, use_state_dependent_std=False)
29 |             else:
30 |                 self.actor = GaussianActorLayer(hidden_dims[-1], action_dim, use_state_dependent_std=False)
31 |         elif action_space.__class__.__name__ == "MultiBinary":
32 |             action_dim = action_space.shape[0]
33 |             self.actor = BernoulliActorLayer(hidden_dims[-1], action_dim)
34 |         else:
35 |             raise NotImplemented
36 | 
37 |         init_ = lambda m: init(m, normc_init, lambda x: nn.init.constant_(x, 0))
38 |         self.actor_feature.init(init_, init_)
39 | 
40 |     def act(self, states, deterministic=False, reparamterize=False):
41 |         states = self.state_normalizer(states)
42 |         action_features = self.actor_feature(states)
43 |         action_dists, action_means, log_stds = self.actor(action_features)
44 | 
45 |         if deterministic:
46 |             actions = action_dists.mode()
47 |         else:
48 |             if reparamterize:
49 |                 actions = action_dists.rsample()
50 |             else:
51 |                 actions = action_dists.sample()
52 | 
53 |         log_probs = action_dists.log_probs(actions)
54 |         entropy = action_dists.entropy().mean()
55 | 
56 |         return actions, log_probs, entropy, action_means, log_stds, log_stds.exp()
57 | 
58 |     def evaluate_action(self, states, actions):
59 |         states = self.state_normalizer(states)
60 |         action_feature = self.actor_feature(states)
61 |         action_dist, *_ = self.actor(action_feature)
62 | 
63 |         log_probs = action_dist.log_probs(actions)
64 |         entropy = action_dist.entropy().mean()
65 | 
66 |         return log_probs, entropy
67 | 


--------------------------------------------------------------------------------
/slbo/models/actor_critic.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import numpy as np
 4 | 
 5 | from slbo.models.actor_layer import *
 6 | from slbo.models.utils import MLP, init
 7 | 
 8 | 
 9 | class ActorCritic(nn.Module):
10 | 
11 |     def __init__(self, dim_state, action_space, actor_hidden_dims: List[int], critic_hidden_dims: List[int],
12 |                  normalizer: nn.Module = None):
13 |         super(ActorCritic, self).__init__()
14 | 
15 |         self.actor_feature = MLP(dim_state, actor_hidden_dims[-1], actor_hidden_dims[:-1],
16 |                                  activation='Tanh', last_activation='Tanh')
17 |         self.critic = MLP(dim_state, 1, critic_hidden_dims, activation='Tanh', last_activation='Identity')
18 |         self.normalizer = normalizer or nn.Identity()
19 | 
20 |         init_ = lambda m: init(m, lambda x: nn.init.orthogonal_(x, np.sqrt(2)), lambda x: nn.init.constant_(x, 0))
21 |         self.actor_feature.init(init_, init_)
22 |         self.critic.init(init_, init_)
23 | 
24 |         self.train()
25 | 
26 |         if action_space.__class__.__name__ == "Discrete":
27 |             dim_action = action_space.n
28 |             self.actor = CategoricalActorLayer(actor_hidden_dims[-1], dim_action)
29 |         elif action_space.__class__.__name__ == "Box":
30 |             dim_action = action_space.shape[0]
31 |             self.actor = GaussianActorLayer(actor_hidden_dims[-1], dim_action, use_state_dependent_std=False)
32 |         elif action_space.__class__.__name__ == "MultiBinary":
33 |             dim_action = action_space.shape[0]
34 |             self.actor = BernoulliActorLayer(actor_hidden_dims[-1], dim_action)
35 | 
36 |     def act(self, states, deterministic=False, reparamterize=False):
37 |         action_feature, value = self.actor_feature(states), self.critic(states)
38 |         action_dist, *_ = self.actor(action_feature)
39 | 
40 |         if deterministic:
41 |             action = action_dist.mode()
42 |         else:
43 |             if reparamterize:
44 |                 action = action_dist.rsample()
45 |             else:
46 |                 action = action_dist.sample()
47 | 
48 |         action_log_prob = action_dist.log_probs(action)
49 |         dist_entropy = action_dist.entropy().mean()
50 | 
51 |         return value, action, action_log_prob, dist_entropy
52 | 
53 |     def criticize(self, states):
54 |         values = self.critic(states)
55 |         return values
56 | 
57 |     def evaluate_action(self, state, action):
58 |         action_feature, value = self.actor_feature(state), self.critic(state)
59 |         action_dist = self.actor(action_feature)
60 | 
61 |         action_log_probs = action_dist.log_prob(action).sum(-1, keepdim=True)
62 |         dist_entropy = action_dist.entropy().mean()
63 | 
64 |         return value, action_log_probs, dist_entropy
65 | 
66 | 


--------------------------------------------------------------------------------
/slbo/models/actor_layer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | 
  4 | from slbo.misc.distributions import FixedNormal, FixedCategorical, FixedBernoulli, TanhNormal, FixedLimitedEntNormal
  5 | from slbo.models.utils import init
  6 | 
  7 | 
  8 | class CategoricalActorLayer(nn.Module):
  9 |     def __init__(self, num_inputs, num_outputs):
 10 |         super(CategoricalActorLayer, self).__init__()
 11 | 
 12 |         self.actor = nn.Linear(num_inputs, num_outputs)
 13 |         init(self.actor, lambda x: nn.init.orthogonal_(x, 0.01), lambda x: nn.init.constant_(x, 0))
 14 | 
 15 |     def forward(self, x):
 16 |         x = self.actor(x)
 17 |         return FixedCategorical(logits=x)
 18 | 
 19 | 
 20 | class GaussianActorLayer(nn.Module):
 21 |     def __init__(self, num_inputs, num_outputs, use_state_dependent_std):
 22 |         super(GaussianActorLayer, self).__init__()
 23 | 
 24 |         self.actor_mean = nn.Linear(num_inputs, num_outputs)
 25 |         init(self.actor_mean, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0))
 26 |         self.use_state_dependent_std = use_state_dependent_std
 27 |         if self.use_state_dependent_std:
 28 |             self.actor_logstd = nn.Linear(num_inputs, num_outputs)
 29 |             init(self.actor_logstd, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0))
 30 | 
 31 |         else:
 32 |             self.logstd = nn.Parameter(torch.zeros(num_outputs), requires_grad=True)
 33 | 
 34 |     def forward(self, x):
 35 |         action_mean = self.actor_mean(x)
 36 | 
 37 |         if self.use_state_dependent_std:
 38 |             logstd = self.actor_logstd(x)
 39 |         else:
 40 |             logstd = self.logstd
 41 | 
 42 |         return FixedNormal(action_mean, logstd.exp()), action_mean, logstd
 43 | 
 44 | 
 45 | class LimitedEntGaussianActorLayer(nn.Module):
 46 |     def __init__(self, num_inputs, num_outputs, use_state_dependent_std):
 47 |         super(LimitedEntGaussianActorLayer, self).__init__()
 48 | 
 49 |         self.actor_mean = nn.Linear(num_inputs, num_outputs)
 50 |         init(self.actor_mean, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0))
 51 |         self.use_state_dependent_std = use_state_dependent_std
 52 |         if self.use_state_dependent_std:
 53 |             self.actor_logstd = nn.Linear(num_inputs, num_outputs)
 54 |             init(self.actor_logstd, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0))
 55 | 
 56 |         else:
 57 |             self.logstd = nn.Parameter(torch.zeros(num_outputs), requires_grad=True)
 58 | 
 59 |     def forward(self, x):
 60 |         action_mean = self.actor_mean(x)
 61 | 
 62 |         if self.use_state_dependent_std:
 63 |             logstd = self.actor_logstd(x)
 64 |         else:
 65 |             logstd = self.logstd
 66 | 
 67 |         return FixedLimitedEntNormal(action_mean, logstd.exp()), action_mean, logstd
 68 | 
 69 | 
 70 | class BernoulliActorLayer(nn.Module):
 71 |     def __init__(self, num_inputs, num_outputs):
 72 |         super(BernoulliActorLayer, self).__init__()
 73 | 
 74 |         self.actor = nn.Linear(num_inputs, num_outputs)
 75 |         init(self.actor, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0))
 76 | 
 77 |     def forward(self, x):
 78 |         x = self.actor(x)
 79 |         return FixedBernoulli(logits=x)
 80 | 
 81 | 
 82 | class TanhGaussainActorLayer(nn.Module):
 83 |     def __init__(self, num_inputs, num_outputs, state_dependent_std, init_w=1e-3):
 84 |         super(TanhGaussainActorLayer, self).__init__()
 85 | 
 86 |         self.actor_mean = nn.Linear(num_inputs, num_outputs)
 87 |         init(self.actor_mean, lambda x: nn.init.uniform_(x, -init_w, init_w),
 88 |                               lambda x: nn.init.uniform_(x, -init_w, init_w))
 89 |         self.state_dependent_std = state_dependent_std
 90 |         if self.state_dependent_std:
 91 |             self.actor_logstd = nn.Linear(num_inputs, num_outputs)
 92 |             init(self.actor_mean, lambda x: nn.init.uniform_(x, -init_w, init_w),
 93 |                  lambda x: nn.init.uniform_(x, -init_w, init_w))
 94 |         else:
 95 |             self.logstd = nn.Parameter(torch.zeros(num_outputs), requires_grad=True)
 96 | 
 97 |     def forward(self, x):
 98 |         action_mean = self.actor_mean(x)
 99 | 
100 |         if self.state_dependent_std:
101 |             action_logstd = self.actor_logstd(x)
102 |         else:
103 |             action_logstd = self.logstd
104 | 
105 |         action_logstd = torch.clamp(action_logstd, -20, 2)
106 | 
107 |         return TanhNormal(action_mean, action_logstd.exp()), torch.tanh(action_mean), action_logstd
108 | 


--------------------------------------------------------------------------------
/slbo/models/critic.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from slbo.models.initializer import normc_init
 7 | from slbo.models.utils import MLP, init
 8 | 
 9 | 
10 | class QCritic(nn.Module):
11 |     def __init__(self, dim_state: int, dim_action: int, hidden_states: List[int]):
12 |         super(QCritic, self).__init__()
13 |         self.critic = MLP(dim_state + dim_action, hidden_states, 1)
14 | 
15 |     def forward(self, state, action):
16 |             x = torch.cat([state, action], dim=-1)
17 |             return self.critic(x)
18 | 
19 | 
20 | class VCritic(nn.Module):
21 |     def __init__(self, dim_state: int, hidden_dims: List[int], state_normalizer=None, activation='Tanh'):
22 |         super(VCritic, self).__init__()
23 |         self.critic = MLP(dim_state, 1, hidden_dims, activation=activation)
24 |         self.normalizer = state_normalizer or nn.Identity()
25 | 
26 |         init_ = lambda m: init(m, normc_init, lambda x: nn.init.constant_(x, 0))
27 |         self.critic.init(init_, init_)
28 | 
29 |     def forward(self, state):
30 |         state = self.normalizer(state)
31 |         return self.critic(state)
32 | 


--------------------------------------------------------------------------------
/slbo/models/dynamics.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | 
 6 | from slbo.models.initializer import truncated_norm_init
 7 | from slbo.models.normalizers import Normalizers
 8 | from slbo.models.utils import MLP, init
 9 | 
10 | 
11 | class Dynamics(nn.Module):
12 |     def __init__(self, state_dim: int, action_dim: int, hidden_dims: List[int], normalizer: Normalizers):
13 |         super(Dynamics, self).__init__()
14 |         self.dim_state = state_dim
15 |         self.dim_action = action_dim
16 |         self.normalizer = normalizer
17 |         self.diff_dynamics = MLP(state_dim + action_dim, state_dim, hidden_dims, activation='ReLU')
18 | 
19 |         init_ = lambda m: init(m, truncated_norm_init, lambda x: nn.init.constant_(x, 0))
20 |         self.diff_dynamics.init(init_, init_)
21 | 
22 |     def forward(self, state, action):
23 |         # action clip is the best normalization according to the authors
24 |         x = torch.cat([self.normalizer.state_normalizer(state), action.clamp(-1., 1.)], dim=-1)
25 |         normalized_diff = self.diff_dynamics(x)
26 |         next_states = state + self.normalizer.diff_normalizer(normalized_diff, inverse=True)
27 |         next_states = self.normalizer.state_normalizer(self.normalizer.state_normalizer(next_states).clamp(-100, 100),
28 |                                                        inverse=True)
29 |         return next_states
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/slbo/models/initializer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def normc_init(tensor, std=1.0, **kwargs):
 5 |     tensor.data.normal_(0, 1)
 6 |     tensor.data *= std / np.sqrt(tensor.data.pow(2).sum(1, keepdim=True))
 7 | 
 8 | 
 9 | def fanin_init(tensor, **kwargs):
10 |     size = tensor.size()
11 |     if len(size) == 2:
12 |         fan_in = size[0]
13 |     elif len(size) > 2:
14 |         fan_in = np.prod(size[1:])
15 |     else:
16 |         raise Exception("Shape must be have dimension at least 2.")
17 |     bound = 1. / np.sqrt(fan_in)
18 |     return tensor.data.uniform_(-bound, bound)
19 | 
20 | 
21 | def truncated_norm_init(tensor, mean=0, std=1e-5, **kwargs):
22 |     size = tensor.shape
23 |     tmp = tensor.new_empty(size + (4,)).normal_()
24 |     valid = (tmp < 2) & (tmp > -2)
25 |     ind = valid.max(-1, keepdim=True)[1]
26 |     tensor.data.copy_(tmp.gather(-1, ind).squeeze(-1))
27 |     tensor.data.mul_(std).add_(mean)
28 |     return tensor
29 | 
30 | 


--------------------------------------------------------------------------------
/slbo/models/normalizers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.distributions.kl as kl
 3 | import torch.nn as nn
 4 | from typing import List
 5 | try:
 6 |     from slbo.misc import logger
 7 | except ImportError:
 8 |     from stable_baselines import logger
 9 | 
10 | 
11 | class GaussianNormalizer(nn.Module):
12 |     def __init__(self, shape: List[int], eps=1e-8, verbose=0):
13 |         super().__init__()
14 | 
15 |         self.shape = shape
16 |         self.verbose = verbose
17 | 
18 |         self.mean = torch.zeros(shape, dtype=torch.float32)
19 |         self.std = torch.ones(shape, dtype=torch.float32)
20 |         self.eps = eps
21 |         self.n = 0
22 | 
23 |     def forward(self, x: torch.Tensor, inverse=False):
24 |         if inverse:
25 |             return x * self.std + self.mean
26 |         return (x - self.mean) / (torch.clamp(self.std, min=self.eps))
27 | 
28 |     def to(self, *args, **kwargs):
29 |         self.mean = self.mean.to(*args, **kwargs)
30 |         self.std = self.std.to(*args, **kwargs)
31 | 
32 |     # noinspection DuplicatedCode
33 |     # samples in [batch_size, ...]
34 |     def update(self, samples: torch.Tensor):
35 |         old_mean, old_std, old_n = self.mean, self.std, self.n
36 |         samples = samples - old_mean
37 |         n = samples.shape[0]
38 |         delta = samples.mean(dim=0)
39 |         new_n = old_n + n
40 |         new_mean = old_mean + delta * n / new_n
41 |         new_std = torch.sqrt((old_std**2 * old_n + samples.var(dim=0) * n + delta**2 * old_n * n / new_n) / new_n)
42 |         kl_old_new = kl.kl_divergence(torch.distributions.Normal(new_mean, torch.clamp(new_std, 1e-20)),
43 |                                       torch.distributions.Normal(old_mean, torch.clamp(old_std, 1e-20))).sum()
44 |         self.mean, self.std, self.n = new_mean, new_std, new_n
45 | 
46 |         if self.verbose > 0:
47 |             logger.debug("updating Normalizer<%s>, KL divergence = %.6f", self.name, kl_old_new)
48 | 
49 |     # noinspection PyMethodOverriding
50 |     def state_dict(self, *args, **kwargs):
51 |         return {'mean': self.mean, 'std': self.std, 'n': self.n}
52 | 
53 |     # noinspection PyMethodOverriding
54 |     def load_state_dict(self, state_dict):
55 |         self.mean = state_dict['mean']
56 |         self.std = state_dict['std']
57 |         self.n = state_dict['n']
58 | 
59 | 
60 | class Normalizers(nn.Module):
61 |     def __init__(self, dim_action: int, dim_state: int, verbose=0):
62 |         super().__init__()
63 |         # action_normalizer is not used
64 |         self.action_normalizer = GaussianNormalizer([dim_action], verbose=verbose)
65 |         self.state_normalizer = GaussianNormalizer([dim_state], verbose=verbose)
66 |         self.diff_normalizer = GaussianNormalizer([dim_state], verbose=verbose)
67 | 
68 |     def forward(self):
69 |         raise NotImplemented
70 | 
71 |     def to(self, *args, **kwargs):
72 |         self.action_normalizer.to(*args, **kwargs)
73 |         self.state_normalizer.to(*args, **kwargs)
74 |         self.diff_normalizer.to(*args, **kwargs)
75 | 
76 |     # noinspection PyMethodOverriding
77 |     def state_dict(self, *args, **kwargs):
78 |         return {'action_normalizer': self.action_normalizer.state_dict(),
79 |                 'state_normalizer': self.state_normalizer.state_dict(),
80 |                 'diff_normalizer': self.diff_normalizer.state_dict()}
81 | 
82 |     # noinspection PyMethodOverriding, PyTypeChecker
83 |     def load_state_dict(self, state_dict):
84 |         self.action_normalizer.load_state_dict(state_dict['action_normalizer'])
85 |         self.state_normalizer.load_state_dict(state_dict['state_normalizer'])
86 |         self.diff_normalizer.load_state_dict(state_dict['diff_normalizer'])
87 | 
88 | 
89 | 
90 | 


--------------------------------------------------------------------------------
/slbo/models/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | 
 6 | class MLP(nn.Module):
 7 |     def __init__(self, input_dim, output_dim, hidden_dims, activation='Tanh', last_activation='Identity', biases=None):
 8 |         super(MLP, self).__init__()
 9 |         sizes_list = hidden_dims.copy()
10 |         self.activation = getattr(nn, activation)()
11 |         self.last_activation = getattr(nn, last_activation)()
12 |         sizes_list.insert(0, input_dim)
13 |         biases = [True] * len(sizes_list) if biases is None else biases.copy()
14 | 
15 |         layers = []
16 |         if 1 < len(sizes_list):
17 |             for i in range(len(sizes_list) - 1):
18 |                 layers.append(nn.Linear(sizes_list[i], sizes_list[i + 1], bias=biases[i]))
19 |         self.last_layer = nn.Linear(sizes_list[-1], output_dim)
20 |         self.layers = nn.ModuleList(layers)
21 | 
22 |     def forward(self, x):
23 |         for layer in self.layers:
24 |             x = layer(x)
25 |             x = self.activation(x)
26 |         x = self.last_layer(x)
27 |         x = self.last_activation(x)
28 |         return x
29 | 
30 |     def init(self, init_fn, last_init_fn):
31 |         for layer in self.layers:
32 |             init_fn(layer)
33 |         last_init_fn(self.last_layer)
34 | 
35 | 
36 | def soft_update(source_model: nn.Module, target_model: nn.Module, tau):
37 |     for target_param, param in zip(target_model.parameters(), source_model.parameters()):
38 |         target_param.data.copy_(target_param.data * (1.0 - tau) + param.data * tau)
39 | 
40 | 
41 | def copy_model_params_from_to(source, target):
42 |     for target_param, param in zip(target.parameters(), source.parameters()):
43 |         target_param.data.copy_(param.data)
44 | 
45 | 
46 | def init(module, weight_init=None, bias_init=None):
47 |     if weight_init:
48 |         weight_init(module.weight.data)
49 |     if bias_init:
50 |         bias_init(module.bias.data)
51 | 
52 | 
53 | def get_flat_params(model):
54 |     params = []
55 |     for param in model.parameters():
56 |         params.append(param.data.view(-1))
57 | 
58 |     flat_params = torch.cat(params)
59 |     return flat_params
60 | 
61 | 
62 | def set_flat_params(model, flat_params):
63 |     prev_ind = 0
64 |     for param in model.parameters():
65 |         flat_size = int(np.prod(list(param.size())))
66 |         param.data.copy_(
67 |             flat_params[prev_ind:prev_ind + flat_size].view(param.size()))
68 |         prev_ind += flat_size
69 | 
70 | 
71 | def get_flat_grad(net, grad_grad=False):
72 |     grads = []
73 |     for param in net.parameters():
74 |         if grad_grad:
75 |             grads.append(param.grad.grad.view(-1))
76 |         else:
77 |             grads.append(param.grad.view(-1))
78 | 
79 |     flat_grad = torch.cat(grads)
80 |     return flat_grad
81 | 
82 | 


--------------------------------------------------------------------------------
/slbo/scripts/run_trpo.py:
--------------------------------------------------------------------------------
  1 | import shutil
  2 | import time
  3 | from collections import deque
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.backends.cudnn
  8 | 
  9 | from torch.utils.tensorboard import SummaryWriter
 10 | import os
 11 | 
 12 | from slbo.algos.mfrl.trpo import TRPO
 13 | from slbo.configs.config import Config
 14 | from slbo.envs.wrapped_envs import make_vec_envs, get_vec_normalize
 15 | from slbo.models import Actor, VCritic
 16 | from slbo.misc.utils import evaluate, log_and_write
 17 | from slbo.storages.on_policy_buffer import OnPolicyBuffer
 18 | try:
 19 |     from slbo.misc import logger
 20 | except ImportError:
 21 |     from stable_baselines import logger
 22 | 
 23 | 
 24 | # noinspection DuplicatedCode
 25 | def main():
 26 |     logger.info('Test script for TRPO')
 27 |     config, hparam_dict = Config('trpo_config.yaml')
 28 | 
 29 |     torch.manual_seed(config.seed)
 30 |     # noinspection PyUnresolvedReferences
 31 |     torch.cuda.manual_seed_all(config.seed)
 32 | 
 33 |     if config.use_cuda and torch.cuda.is_available() and config.cuda_deterministic:
 34 |         torch.backends.cudnn.benchmark = False
 35 |         torch.backends.cudnn.deterministic = True
 36 | 
 37 |     import datetime
 38 |     current_time = datetime.datetime.now().strftime('%b%d_%H%M%S')
 39 |     log_dir = os.path.join(config.proj_dir, config.result_dir, current_time, 'log')
 40 |     eval_log_dir = os.path.join(config.proj_dir, config.result_dir, current_time, 'log_eval')
 41 |     save_dir = os.path.join(config.proj_dir, config.result_dir, current_time, 'save')
 42 |     os.makedirs(log_dir, exist_ok=True)
 43 |     os.makedirs(eval_log_dir, exist_ok=True)
 44 |     os.makedirs(save_dir, exist_ok=True)
 45 |     writer = SummaryWriter(log_dir=log_dir)
 46 |     writer.add_hparams(hparam_dict, {})
 47 | 
 48 |     # save current version of code
 49 |     shutil.copytree(config.proj_dir, save_dir + '/code', ignore=shutil.ignore_patterns('result', 'data', 'ref'))
 50 | 
 51 |     torch.set_num_threads(1)
 52 |     device = torch.device('cuda' if config.use_cuda else 'cpu')
 53 | 
 54 |     envs = make_vec_envs(config.env.env_name, config.seed, config.env.num_envs, config.env.gamma, log_dir, device,
 55 |                          allow_early_resets=False, norm_reward=True, norm_obs=True, test=True)
 56 | 
 57 |     state_dim = envs.observation_space.shape[0]
 58 |     action_space = envs.action_space
 59 |     action_dim = action_space.shape[0]
 60 | 
 61 |     actor = Actor(state_dim, action_space, hidden_dims=config.trpo.actor_hidden_dims,
 62 |                   state_normalizer=None)
 63 |     critic = VCritic(state_dim, hidden_dims=config.trpo.critic_hidden_dims, state_normalizer=None)
 64 |     actor.to(device)
 65 |     critic.to(device)
 66 | 
 67 |     agent = TRPO(actor, critic,)
 68 | 
 69 |     on_policy_buffer = \
 70 |         OnPolicyBuffer(config.trpo.num_env_steps, config.env.num_envs, envs.observation_space.shape, envs.action_space,
 71 |                        use_gae=config.trpo.use_gae, gamma=config.env.gamma, gae_lambda=config.trpo.gae_lambda,
 72 |                        use_proper_time_limits=config.trpo.use_proper_time_limits, )
 73 | 
 74 |     state = envs.reset()
 75 |     # noinspection PyUnresolvedReferences
 76 |     on_policy_buffer.states[0].copy_(state)
 77 |     on_policy_buffer.to(device)
 78 | 
 79 |     episode_rewards = deque(maxlen=10)
 80 |     episode_lengths = deque(maxlen=10)
 81 | 
 82 |     start = time.time()
 83 |     num_updates = config.trpo.total_env_steps // config.trpo.num_env_steps // config.env.num_envs
 84 | 
 85 |     for j in range(num_updates):
 86 | 
 87 |         for step in range(config.trpo.num_env_steps):
 88 |             with torch.no_grad():
 89 |                 action, action_log_prob, dist_entropy, *_ = actor.act(on_policy_buffer.states[step])
 90 |                 value = critic(on_policy_buffer.states[step])
 91 | 
 92 |             state, reward, done, info = envs.step(action)
 93 | 
 94 |             for info_ in info:
 95 |                 if 'episode' in info_.keys():
 96 |                     episode_rewards.append(info_['episode']['r'])
 97 |                     episode_lengths.append(info_['episode']['l'])
 98 | 
 99 |             mask = torch.tensor([[0.0] if done_ else [1.0] for done_ in done], dtype=torch.float32)
100 |             bad_mask = torch.tensor([[0.0] if 'bad_transition' in info_.keys() else [1.0] for info_ in info],
101 |                                     dtype=torch.float32)
102 |             on_policy_buffer.insert(states=state, actions=action, action_log_probs=action_log_prob,
103 |                                     values=value, rewards=reward, masks=mask, bad_masks=bad_mask)
104 | 
105 |         with torch.no_grad():
106 |             next_value = critic(on_policy_buffer.states[-1])
107 | 
108 |         on_policy_buffer.compute_returns(next_value)
109 |         losses = agent.update(on_policy_buffer)
110 |         on_policy_buffer.after_update()
111 | 
112 |         if j % config.save_interval == 0 or j == num_updates - 1:
113 |             save_path = os.path.join(save_dir, config.mf_algo)
114 |             try:
115 |                 os.makedirs(save_path)
116 |             except OSError:
117 |                 pass
118 | 
119 |             logger.info('Model saved.')
120 |             torch.save([actor.state_dict(), critic.state_dict(),
121 |                         getattr(get_vec_normalize(envs), 'obs_rms', None)],
122 |                        os.path.join(save_path, config.env.env_name + ".pt"))
123 | 
124 |         serial_timsteps = (j + 1) * config.trpo.num_env_steps
125 |         total_num_steps = config.env.num_envs * serial_timsteps
126 |         end = time.time()
127 | 
128 |         fps = int(total_num_steps / (end - start))
129 | 
130 |         if j % config.log_interval == 0 and len(episode_rewards) > 0:
131 |             log_info = [('serial_timesteps', serial_timsteps), ('total_timesteps', total_num_steps),
132 |                         ('ep_rew_mean', np.mean(episode_rewards)), ('ep_len_mean', np.mean(episode_lengths)),
133 |                         ('fps', fps), ('time_elapsed', end - start)]
134 | 
135 |             for loss_name, loss_value in losses.items():
136 |                 log_info.append((loss_name, loss_value))
137 |             log_and_write(logger, writer, log_info, global_step=j)
138 | 
139 |         if (config.eval_interval is not None and len(episode_rewards) > 0
140 |                 and j % config.eval_interval == 0):
141 |             obs_rms = get_vec_normalize(envs).obs_rms
142 |             eval_episode_rewards, eval_episode_lengths = \
143 |                 evaluate(actor, config.env.env_name, config.seed,
144 |                          num_episode=10, eval_log_dir=None, device=device, norm_reward=True, norm_obs=True,
145 |                          obs_rms=obs_rms, test=True)
146 | 
147 |             logger.info('Evaluation:')
148 |             log_and_write(logger, writer, [('eval_ep_rew_mean', np.mean(eval_episode_rewards)),
149 |                                            ('eval_ep_rew_min', np.min(eval_episode_rewards)),
150 |                                            ('eval_ep_rew_max', np.max(eval_episode_rewards))], global_step=j)
151 | 
152 |     envs.close()
153 | 
154 | 
155 | if __name__ == "__main__":
156 |     main()
157 | 


--------------------------------------------------------------------------------
/slbo/storages/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jiangsy/slbo_pytorch/7b1283968a82c939725c2705e7315d1f3759ee29/slbo/storages/__init__.py


--------------------------------------------------------------------------------
/slbo/storages/off_policy_buffer.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
 3 | import numpy as np
 4 | 
 5 | 
 6 | class OffPolicyBuffer(object):
 7 |     def __init__(self, buffer_size, num_envs, state_dim, action_dim):
 8 |         self.buffer_size = buffer_size
 9 |         self.num_envs = num_envs
10 |         self.states = torch.zeros(buffer_size, num_envs, state_dim)
11 |         self.next_states = torch.zeros(buffer_size, num_envs, state_dim)
12 |         self.rewards = torch.zeros(buffer_size, num_envs, 1)
13 |         self.actions = torch.zeros(buffer_size, num_envs, action_dim)
14 |         self.masks = torch.ones(buffer_size, num_envs, 1)
15 |         self.bad_masks = torch.ones(buffer_size, num_envs, 1)
16 | 
17 |         self.buffer_size = buffer_size
18 |         self.index = 0
19 |         self.size = 0
20 |         self.device = torch.device('cpu')
21 | 
22 |     def to(self, device):
23 |         self.states = self.states.to(device)
24 |         self.next_states = self.next_states.to(device)
25 |         self.rewards = self.rewards.to(device)
26 |         self.actions = self.actions.to(device)
27 |         self.masks = self.masks.to(device)
28 |         self.bad_masks = self.bad_masks.to(device)
29 | 
30 |         self.device = device
31 | 
32 |     def add_buffer(self, buffer):
33 |         for idx in range(buffer.size):
34 |             self.insert(buffer.states[idx], buffer.actions[idx], buffer.rewards[idx], buffer.next_states[idx],
35 |                         buffer.masks[idx], buffer.bad_masks[idx])
36 | 
37 |     def insert(self, states, actions, rewards, next_states, masks, bad_masks):
38 |         self.states[self.index, :, :].copy_(states)
39 |         self.actions[self.index, :, :].copy_(actions)
40 |         self.rewards[self.index, :, :].copy_(rewards)
41 |         self.next_states[self.index, :, :].copy_(next_states)
42 |         self.masks[self.index, :, :].copy_(masks)
43 |         self.bad_masks[self.index, :, :].copy_(bad_masks)
44 | 
45 |         self.index = (self.index + 1) % self.buffer_size
46 |         self.size = min(self.size + 1, self.buffer_size)
47 | 
48 |     def clear(self):
49 |         self.index = 0
50 |         self.size = 0
51 | 
52 |     def get_batch_generator(self, batch_size):
53 |         sampler = BatchSampler(SubsetRandomSampler(range(self.size * self.num_envs)), batch_size, drop_last=True)
54 | 
55 |         for indices in sampler:
56 |             states = self.states.view(-1, *self.states.shape[2:])[indices]
57 |             actions = self.actions.view(-1, self.actions.shape[-1])[indices]
58 |             rewards = self.rewards.view(-1, 1)[indices]
59 |             next_states = self.next_states.view(-1, *self.states.shape[2:])[indices]
60 |             masks = self.masks.view(-1, 1)[indices]
61 |             bad_masks = self.bad_masks.view(-1, 1)[indices]
62 | 
63 |             yield {'states': states, 'actions': actions, 'rewards': rewards, 'next_states': next_states,
64 |                    'masks': masks, 'bad_masks': bad_masks}
65 | 
66 |     def get_sequential_batch_generator(self, batch_size, num_steps):
67 |         sampler = BatchSampler(SubsetRandomSampler(range(self.size - num_steps)),
68 |                                int(batch_size / self.num_envs), drop_last=True)
69 | 
70 |         for indices in sampler:
71 |             indices = np.array(indices)
72 |             states = torch.zeros(batch_size, num_steps, *self.states.shape[2:], device=self.device)
73 |             next_states = torch.zeros(batch_size, num_steps, *self.next_states.shape[2:], device=self.device)
74 |             actions = torch.zeros([batch_size, num_steps, self.actions.shape[-1]], device=self.device)
75 |             rewards = torch.zeros([batch_size, num_steps, 1], device=self.device)
76 |             masks = torch.zeros([batch_size, num_steps, 1], device=self.device)
77 |             bad_masks = torch.zeros([batch_size, num_steps, 1], device=self.device)
78 |             for step in range(num_steps):
79 |                 states[:, step, :].copy_(self.states[indices + step].view(-1, *self.states.shape[2:]))
80 |                 next_states[:, step, :].copy_(self.next_states[indices + step].view(-1, *self.next_states.shape[2:]))
81 |                 actions[:, step, :].copy_(self.actions[indices + step].view(-1, self.actions.shape[-1]))
82 |                 rewards[:, step, :].copy_(self.rewards[indices + step].view(-1, 1))
83 |                 masks[:, step, :].copy_(self.masks[indices + step].view(-1, 1))
84 |                 bad_masks[:, step, :].copy_(self.bad_masks[indices + step].view(-1, 1))
85 | 
86 |             yield {'states': states, 'actions': actions, 'masks': masks, 'next_states':next_states,
87 |                    'rewards': rewards, 'bad_masks': bad_masks}
88 | 
89 |     def load(self, file_name):
90 |         raise NotImplemented


--------------------------------------------------------------------------------
/slbo/storages/on_policy_buffer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch.utils.data.sampler import BatchSampler, SubsetRandomSampler
  3 | 
  4 | 
  5 | class OnPolicyBuffer(object):
  6 |     def __init__(self, num_steps, num_envs, obs_shape, action_space,
  7 |                  use_gae=True, gamma=0.99, gae_lambda=0.95, use_proper_time_limits=True):
  8 |         self.num_steps = num_steps
  9 |         self.num_envs = num_envs
 10 |         self.states = torch.zeros(num_steps + 1, num_envs, *obs_shape)
 11 |         self.rewards = torch.zeros(num_steps, num_envs, 1)
 12 |         self.values = torch.zeros(num_steps + 1, num_envs, 1)
 13 |         self.returns = torch.zeros(num_steps + 1, num_envs, 1)
 14 |         self.action_log_probs = torch.zeros(num_steps, num_envs, 1)
 15 |         if action_space.__class__.__name__ == 'Discrete':
 16 |             action_shape = 1
 17 |         else:
 18 |             action_shape = action_space.shape[0]
 19 |         self.actions = torch.zeros(num_steps, num_envs, action_shape)
 20 |         if action_space.__class__.__name__ == 'Discrete':
 21 |             self.actions = self.actions.long()
 22 |         self.masks = torch.ones(num_steps + 1, num_envs, 1)
 23 | 
 24 |         self.bad_masks = torch.ones(num_steps + 1, num_envs, 1)
 25 | 
 26 |         self.num_steps = num_steps
 27 |         self.step = 0
 28 | 
 29 |         self.use_gae = use_gae
 30 |         self.gamma = gamma
 31 |         self.gae_lambda = gae_lambda
 32 |         self.use_proper_time_limits = use_proper_time_limits
 33 | 
 34 |     def to(self, device):
 35 |         self.states = self.states.to(device)
 36 |         self.rewards = self.rewards.to(device)
 37 |         self.values = self.values.to(device)
 38 |         self.returns = self.returns.to(device)
 39 |         self.action_log_probs = self.action_log_probs.to(device)
 40 |         self.actions = self.actions.to(device)
 41 |         self.masks = self.masks.to(device)
 42 |         self.bad_masks = self.bad_masks.to(device)
 43 | 
 44 |     def insert(self, states, actions, action_log_probs,
 45 |                values, rewards, masks, bad_masks):
 46 |         self.states[self.step + 1].copy_(states)
 47 |         self.actions[self.step].copy_(actions)
 48 |         self.action_log_probs[self.step].copy_(action_log_probs)
 49 |         self.values[self.step].copy_(values)
 50 |         self.rewards[self.step].copy_(rewards)
 51 |         self.masks[self.step + 1].copy_(masks)
 52 |         self.bad_masks[self.step + 1].copy_(bad_masks)
 53 | 
 54 |         self.step = (self.step + 1) % self.num_steps
 55 | 
 56 |     def after_update(self):
 57 |         self.states[0].copy_(self.states[-1])
 58 |         self.masks[0].copy_(self.masks[-1])
 59 |         self.bad_masks[0].copy_(self.bad_masks[-1])
 60 | 
 61 |     def compute_returns(self, next_value):
 62 |         if self.use_proper_time_limits:
 63 |             if self.use_gae:
 64 |                 self.values[-1] = next_value
 65 |                 gae = 0
 66 |                 for step in reversed(range(self.num_steps)):
 67 |                     delta = self.rewards[step] + self.gamma * self.values[step + 1] * self.masks[step + 1] - \
 68 |                             self.values[step]
 69 |                     gae = delta + self.gamma * self.gae_lambda * self.masks[step + 1] * gae
 70 |                     gae = gae * self.bad_masks[step + 1]
 71 |                     self.returns[step] = gae + self.values[step]
 72 |             else:
 73 |                 self.returns[-1] = next_value
 74 |                 for step in reversed(range(self.num_steps)):
 75 |                     self.returns[step] = (self.returns[step + 1] *
 76 |                         self.gamma * self.masks[step + 1] + self.rewards[step]) * self.bad_masks[step + 1] \
 77 |                         + (1 - self.bad_masks[step + 1]) * self.values[step]
 78 |         else:
 79 |             if self.use_gae:
 80 |                 self.values[-1] = next_value
 81 |                 gae = 0
 82 |                 for step in reversed(range(self.num_steps)):
 83 |                     delta = self.rewards[step] + self.gamma * self.values[step + 1] * self.masks[step + 1] - self.values[step]
 84 |                     gae = delta + self.gamma * self.gae_lambda * self.masks[step + 1] * gae
 85 |                     self.returns[step] = gae + self.values[step]
 86 |             else:
 87 |                 self.returns[-1] = next_value
 88 |                 for step in reversed(range(self.num_steps)):
 89 |                     self.returns[step] = self.returns[step + 1] * self.gamma * self.masks[step + 1] + self.rewards[step]
 90 | 
 91 |     def get_batch_generator(self, batch_size=None, advantages=None):
 92 |         batch_size = self.num_steps * self.num_envs if batch_size is None else batch_size
 93 |         sampler = BatchSampler(SubsetRandomSampler(range(self.num_steps * self.num_envs)), batch_size, drop_last=True)
 94 | 
 95 |         for indices in sampler:
 96 |             states = self.states[:-1].view(-1, *self.states.size()[2:])[indices]
 97 |             actions = self.actions.view(-1, self.actions.size(-1))[indices]
 98 |             values = self.values[:-1].view(-1, 1)[indices]
 99 |             returns = self.returns[:-1].view(-1, 1)[indices]
100 |             masks = self.masks[:-1].view(-1, 1)[indices]
101 |             action_log_probs = self.action_log_probs.view(-1, 1)[indices]
102 |             if advantages is None:
103 |                 adv_targets = None
104 |             else:
105 |                 adv_targets = advantages.view(-1, 1)[indices]
106 | 
107 |             yield {'states': states, 'actions': actions, 'values': values, 'returns': returns,
108 |                    'masks': masks, 'action_log_probs': action_log_probs, 'adv_targets': adv_targets}
109 | 


--------------------------------------------------------------------------------