├── __init__.py
├── common
    ├── __init__.py
    ├── adr
    │   ├── __init__.py
    │   └── adr.py
    ├── svpg
    │   ├── __init__.py
    │   ├── particles
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   ├── distributions.py
    │   │   └── svpg_particle.py
    │   ├── svpg_utils.py
    │   └── svpg.py
    ├── agents
    │   ├── __init__.py
    │   ├── ddpg
    │   │   ├── __init__.py
    │   │   ├── replay_buffer.py
    │   │   └── ddpg.py
    │   └── selfplay_models.py
    ├── models
    │   ├── __init__.py
    │   ├── actor_critic.py
    │   └── discriminator.py
    ├── randomizer
    │   ├── controllers
    │   │   ├── __init__.py
    │   │   ├── miscalibrated_push_controller.py
    │   │   ├── pick_and_place_controller.py
    │   │   └── hook_controller.py
    │   ├── residual_envs
    │   │   ├── __init__.py
    │   │   └── residual_fetch_push_env.py
    │   ├── config
    │   │   ├── __init__.py
    │   │   ├── ErgoPushRandomized
    │   │   │   ├── hard.json
    │   │   │   ├── default.json
    │   │   │   └── random.json
    │   │   ├── FetchPushRandomized
    │   │   │   ├── default.json
    │   │   │   └── random.json
    │   │   ├── FetchSlideRandomized
    │   │   │   ├── default.json
    │   │   │   └── random.json
    │   │   ├── ResidualPushRandomized
    │   │   │   ├── default.json
    │   │   │   └── random.json
    │   │   ├── FetchHookRandomized
    │   │   │   ├── default.json
    │   │   │   └── random.json
    │   │   ├── ResidualComplexHookRandomized
    │   │   │   ├── default.json
    │   │   │   └── random.json
    │   │   ├── ResidualFetchHookRandomized
    │   │   │   ├── default.json
    │   │   │   └── random.json
    │   │   └── ErgoReacherRandomized
    │   │   │   ├── default.json
    │   │   │   ├── random.json
    │   │   │   └── hard.json
    │   ├── assets
    │   │   ├── stls
    │   │   │   ├── estop_link.stl
    │   │   │   ├── laser_link.stl
    │   │   │   ├── gripper_link.stl
    │   │   │   ├── torso_fixed_link.stl
    │   │   │   ├── base_link_collision.stl
    │   │   │   ├── bellows_link_collision.stl
    │   │   │   ├── head_pan_link_collision.stl
    │   │   │   ├── l_wheel_link_collision.stl
    │   │   │   ├── r_wheel_link_collision.stl
    │   │   │   ├── elbow_flex_link_collision.stl
    │   │   │   ├── head_tilt_link_collision.stl
    │   │   │   ├── torso_lift_link_collision.stl
    │   │   │   ├── wrist_flex_link_collision.stl
    │   │   │   ├── wrist_roll_link_collision.stl
    │   │   │   ├── forearm_roll_link_collision.stl
    │   │   │   ├── shoulder_lift_link_collision.stl
    │   │   │   ├── shoulder_pan_link_collision.stl
    │   │   │   └── upperarm_roll_link_collision.stl
    │   │   ├── textures
    │   │   │   ├── block.png
    │   │   │   └── block_hidden.png
    │   │   ├── push.xml
    │   │   ├── slide.xml
    │   │   ├── hook.xml
    │   │   ├── shared.xml
    │   │   ├── pusher.xml
    │   │   └── robot.xml
    │   ├── test.py
    │   ├── randomized_fetchslide.py
    │   ├── dimension.py
    │   ├── randomized_fetchpush.py
    │   ├── randomized_residual_push.py
    │   ├── randomized_ergo_pusher.py
    │   ├── randomized_ergoreacher.py
    │   ├── miscalibrated_push_controller.py
    │   ├── ergo_objects.py
    │   ├── wrappers.py
    │   ├── residual_fetch_push_env.py
    │   ├── __init__.py
    │   ├── complex_hook_env.py
    │   ├── randomized_fetch.py
    │   ├── residual_push.py
    │   └── fetch_hook_env.py
    └── discriminator
    │   └── discriminator_rewarder.py
├── utils
    ├── __init__.py
    ├── learning_curve.py
    └── plot_utils.py
├── architecture-selfplay.png
├── setup.py
├── ergo_expts.sh
├── .gitignore
├── experiments
    ├── evaluate_ergo_envs.py
    └── ddpg_train.py
├── README.md
├── arguments.py
└── realrobot_evals
    └── realrobot_test.py


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/common/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/common/adr/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/common/svpg/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/common/agents/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/common/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/common/agents/ddpg/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/common/randomizer/controllers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/common/randomizer/residual_envs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/common/svpg/particles/__init__.py:
--------------------------------------------------------------------------------
1 | from .svpg_particle import SVPGParticle


--------------------------------------------------------------------------------
/common/randomizer/config/__init__.py:
--------------------------------------------------------------------------------
1 | import os
2 | CONFIG_PATH = os.path.abspath(os.path.dirname(__file__))
3 | 


--------------------------------------------------------------------------------
/architecture-selfplay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/architecture-selfplay.png


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/estop_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/estop_link.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/laser_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/laser_link.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/textures/block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/textures/block.png


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/gripper_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/gripper_link.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/torso_fixed_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/torso_fixed_link.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/textures/block_hidden.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/textures/block_hidden.png


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/base_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/base_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/bellows_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/bellows_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/head_pan_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/head_pan_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/l_wheel_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/l_wheel_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/r_wheel_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/r_wheel_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/elbow_flex_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/elbow_flex_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/head_tilt_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/head_tilt_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/torso_lift_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/torso_lift_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/wrist_flex_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/wrist_flex_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/wrist_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/wrist_roll_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/forearm_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/forearm_roll_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/shoulder_lift_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/shoulder_lift_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/shoulder_pan_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/shoulder_pan_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/assets/stls/upperarm_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/montrealrobotics/unsupervised-adr/HEAD/common/randomizer/assets/stls/upperarm_roll_link_collision.stl


--------------------------------------------------------------------------------
/common/randomizer/config/ErgoPushRandomized/hard.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "env": "ErgoPusherRandomized-v0",
 3 |   "dimensions": [
 4 |     {
 5 |       "name": "friction",
 6 |       "default": 0.1,
 7 |       "multiplier_min": 1.0,
 8 |       "multiplier_max": 1.0
 9 |     }
10 |   ]
11 | }


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | setup(
 4 |     name='unsupadr',
 5 |     version='1.0',
 6 |     packages=['common'],
 7 |     install_requires=[
 8 |         "torch>=1.0", "tqdm", "numpy", "matplotlib", 'gym>=0.10',
 9 |         'mujoco_py>=1.15'
10 |     ])
11 | 


--------------------------------------------------------------------------------
/common/randomizer/config/ErgoPushRandomized/default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "env": "ErgoPusherRandomized-v0",
 3 |   "dimensions": [
 4 |     {
 5 |       "name": "friction",
 6 |       "default": 0.4,
 7 |       "multiplier_min": 1.0,
 8 |       "multiplier_max": 1.0
 9 |     }
10 |   ]
11 | }


--------------------------------------------------------------------------------
/common/randomizer/config/ErgoPushRandomized/random.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "env": "ErgoPusherRandomized-v0",
 3 |   "dimensions": [
 4 |     {
 5 |       "name": "friction",
 6 |       "default": 0.4,
 7 |       "multiplier_min": 0.25,
 8 |       "multiplier_max": 2.0
 9 |     }
10 |   ]
11 | }


--------------------------------------------------------------------------------
/common/randomizer/config/FetchPushRandomized/default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "env": "FetchPushDefault-v0",
 3 |     "dimensions": [
 4 |       {
 5 |         "name": "friction",
 6 |         "default": 0.18,
 7 |         "multiplier_min": 1.0,
 8 |         "multiplier_max": 1.0
 9 |       }
10 |     ]
11 |   }
12 |   


--------------------------------------------------------------------------------
/common/randomizer/config/FetchPushRandomized/random.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "env": "FetchPushDefault-v0",
 3 |     "dimensions": [
 4 |       {
 5 |         "name": "friction",
 6 |         "default": 0.18,
 7 |         "multiplier_min": 0.1,
 8 |         "multiplier_max": 1.11
 9 |       }
10 |     ]
11 |   }
12 | 


--------------------------------------------------------------------------------
/common/randomizer/config/FetchSlideRandomized/default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "env": "FetchPushDefault-v0",
 3 |     "dimensions": [
 4 |       {
 5 |         "name": "damping",
 6 |         "default": 0.01,
 7 |         "multiplier_min": 1.0,
 8 |         "multiplier_max": 1.0
 9 |       }
10 |     ]
11 |   }
12 |   


--------------------------------------------------------------------------------
/common/randomizer/config/FetchSlideRandomized/random.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "env": "FetchPushDefault-v0",
 3 |     "dimensions": [
 4 |       {
 5 |         "name": "friction",
 6 |         "default": 0.18,
 7 |         "multiplier_min": 0.2,
 8 |         "multiplier_max": 2.22
 9 |       }
10 |     ]
11 |   }
12 | 


--------------------------------------------------------------------------------
/common/randomizer/config/ResidualPushRandomized/default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "env": "ResidualPushDefaultEnv-v0",
 3 |     "dimensions": [
 4 |       {
 5 |         "name": "damping",
 6 |         "default": 0.01,
 7 |         "multiplier_min": 1.0,
 8 |         "multiplier_max": 1.0
 9 |       },
10 |       {
11 |         "name": "friction",
12 |         "default": 0.18,
13 |         "multiplier_min": 1.0,
14 |         "multiplier_max": 1.0
15 |       }
16 |     ]
17 |   }
18 |   


--------------------------------------------------------------------------------
/common/randomizer/config/ResidualPushRandomized/random.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "env": "ResidualPushRandomizedEnv-v0",
 3 |     "dimensions": [
 4 |       {
 5 |         "name": "damping",
 6 |         "default": 0.1,
 7 |         "multiplier_min": 0.1,
 8 |         "multiplier_max": 3
 9 |       },
10 | 
11 |       {
12 |         "name": "friction",
13 |         "default": 0.18,
14 |         "multiplier_min": 0.2777,
15 |         "multiplier_max": 2.222
16 |       }
17 |     ]
18 |   }
19 |   


--------------------------------------------------------------------------------
/common/randomizer/test.py:
--------------------------------------------------------------------------------
 1 | import randomizer
 2 | import gym
 3 | import numpy as np
 4 | from randomizer.wrappers import RandomizedEnvWrapper
 5 | 
 6 | 
 7 | env = gym.make("FetchPushRandomizedEnv-v0")
 8 | # env = RandomizedEnvWrapper(env, seed=12)
 9 | obs = env.reset()
10 | 
11 | 
12 | for i in range(1000):
13 |     obs, rew, _, _ = env.step(env.action_space.sample())
14 |     env.render()
15 |     if i % 100 == 0:
16 |         # print('done')
17 |         print(rew)
18 |         # Randomize the environment
19 |         env.randomize([-1])
20 |         env.reset()
21 | 


--------------------------------------------------------------------------------
/common/randomizer/config/FetchHookRandomized/default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "env": "ResidualHookRandomizedEnv-v0",
 3 |     "dimensions": [
 4 |       {
 5 |         "name": "block_mass",
 6 |         "default": 4,
 7 |         "multiplier_min": 1.0,
 8 |         "multiplier_max": 1.0
 9 |       },
10 |       {
11 |         "name": "hook_mass",
12 |         "default": 4,
13 |         "multiplier_min": 1.0,
14 |         "multiplier_max": 1.0
15 |       },
16 |       {
17 |         "name": "friction",
18 |         "default": 1,
19 |         "multiplier_min": 1.0,
20 |         "multiplier_max": 1.0
21 |       }
22 |     ]
23 |   }


--------------------------------------------------------------------------------
/common/randomizer/config/ResidualComplexHookRandomized/default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "env": "ResidualHookRandomizedEnv-v0",
 3 |     "dimensions": [
 4 |       {
 5 |         "name": "block_mass",
 6 |         "default": 4,
 7 |         "multiplier_min": 1.0,
 8 |         "multiplier_max": 1.0
 9 |       },
10 |       {
11 |         "name": "hook_mass",
12 |         "default": 4,
13 |         "multiplier_min": 1.0,
14 |         "multiplier_max": 1.0
15 |       },
16 |       {
17 |         "name": "friction",
18 |         "default": 1,
19 |         "multiplier_min": 1.0,
20 |         "multiplier_max": 1.0
21 |       }
22 |     ]
23 |   }


--------------------------------------------------------------------------------
/common/randomizer/config/ResidualFetchHookRandomized/default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "env": "ResidualHookRandomizedEnv-v0",
 3 |     "dimensions": [
 4 |       {
 5 |         "name": "block_mass",
 6 |         "default": 4,
 7 |         "multiplier_min": 1.0,
 8 |         "multiplier_max": 1.0
 9 |       },
10 |       {
11 |         "name": "hook_mass",
12 |         "default": 4,
13 |         "multiplier_min": 1.0,
14 |         "multiplier_max": 1.0
15 |       },
16 |       {
17 |         "name": "friction",
18 |         "default": 1,
19 |         "multiplier_min": 1.0,
20 |         "multiplier_max": 1.0
21 |       }
22 |     ]
23 |   }


--------------------------------------------------------------------------------
/common/randomizer/config/FetchHookRandomized/random.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "env": "ResidualHookRandomizedEnv-v0",
 3 |     "dimensions": [
 4 |       {
 5 |         "name": "block_mass",
 6 |         "default": 2,
 7 |         "multiplier_min": 0.6,
 8 |         "multiplier_max": 4
 9 |       },
10 |       {
11 |         "name": "hook_mass",
12 |         "default": 4,
13 |         "multiplier_min": 0.6,
14 |         "multiplier_max": 2.5
15 |       },
16 |       {
17 |         "name": "friction",
18 |         "default": 1,
19 |         "multiplier_min": 0.5,
20 |         "multiplier_max": 1.2
21 |       }
22 |     ]
23 |   }
24 |   
25 | 


--------------------------------------------------------------------------------
/common/randomizer/config/ResidualComplexHookRandomized/random.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "env": "ResidualHookRandomizedEnv-v0",
 3 |     "dimensions": [
 4 |       {
 5 |         "name": "block_mass",
 6 |         "default": 4,
 7 |         "multiplier_min": 0.3,
 8 |         "multiplier_max": 3
 9 |       },
10 |       {
11 |         "name": "hook_mass",
12 |         "default": 4,
13 |         "multiplier_min": 0.6,
14 |         "multiplier_max": 4
15 |       },
16 |       {
17 |         "name": "friction",
18 |         "default": 1,
19 |         "multiplier_min": 0.8,
20 |         "multiplier_max": 2.3
21 |       }
22 |     ]
23 |   }
24 |   
25 | 


--------------------------------------------------------------------------------
/common/randomizer/config/ResidualFetchHookRandomized/random.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "env": "ResidualHookRandomizedEnv-v0",
 3 |     "dimensions": [
 4 |       {
 5 |         "name": "block_mass",
 6 |         "default": 2,
 7 |         "multiplier_min": 0.3,
 8 |         "multiplier_max": 4
 9 |       },
10 |       {
11 |         "name": "hook_mass",
12 |         "default": 4,
13 |         "multiplier_min": 0.6,
14 |         "multiplier_max": 2.5
15 |       },
16 |       {
17 |         "name": "friction",
18 |         "default": 1,
19 |         "multiplier_min": 0.5,
20 |         "multiplier_max": 1.2
21 |       }
22 |     ]
23 |   }
24 |   
25 | 


--------------------------------------------------------------------------------
/common/svpg/particles/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class AddBias(nn.Module):
 6 |     def __init__(self, bias):
 7 |         super(AddBias, self).__init__()
 8 |         self._bias = nn.Parameter(bias.unsqueeze(1))
 9 | 
10 |     def forward(self, x):
11 |         if x.dim() == 2:
12 |             bias = self._bias.t().view(1, -1)
13 |         else:
14 |             bias = self._bias.t().view(1, -1, 1, 1)
15 | 
16 |         return x + bias
17 | 
18 | 
19 | def init(module, weight_init, bias_init, gain=1):
20 |     weight_init(module.weight.data, gain=gain)
21 |     bias_init(module.bias.data)
22 |     return module
23 | 
24 | 
25 | orthogonal_init = lambda m: init(module=m,
26 |                                  weight_init=nn.init.orthogonal_,
27 |                                  bias_init=lambda x: nn.init.constant_(x, 0),
28 |                                  gain=np.sqrt(2))
29 | 


--------------------------------------------------------------------------------
/ergo_expts.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH --partition=long
 3 | #SBATCH --gres=gpu:1                          # Ask for 1 GPU
 4 |                                               # SBATCH --nodes=5
 5 |                                               # SBATCH --ntasks=8
 6 | #SBATCH --mem=36G                             # Ask for 10 GB of RAM
 7 | #SBATCH --time=72:00:00                        # The job will run for 3 hours
 8 | #SBATCH --array=1-5
 9 | #SBATCH --mail-user=sharathraparthy@gmail.com
10 | #SBATCH --mail-type=ALL
11 | #SBATCH --mail-type=BEGIN
12 | #SBATCH --mail-type=END
13 | #SBATCH --mail-type=FAIL
14 | #SBATCH --mail-type=REQUEUE
15 | #SBATCH -o /network/home/raparths/onlysp-reacher-adr-ergo-%j.out
16 | 
17 | module load python/3.6
18 | # module load mujoco/2.0
19 | module load cuda/10.0
20 | source $HOME/sharath/bin/activate
21 | 
22 | python  ddpg_train.py  --sp-percent 1.0 --approach 'unsupervised-default' --only-sp=True --env_name='ErgoReacherRandomizedEnv-Headless-v0' --n-params=8 
23 | 


--------------------------------------------------------------------------------
/common/randomizer/randomized_fetchslide.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | 
 4 | from gym import utils
 5 | from .randomized_fetch import RandomizedFetchEnv
 6 | 
 7 | 
 8 | # Ensure we get the path separator correct on windows
 9 | MODEL_XML_PATH = os.path.join('fetch', 'slide.xml')
10 | 
11 | 
12 | class FetchSlideRandomizedEnv(RandomizedFetchEnv, utils.EzPickle):
13 |     def __init__(self, reward_type='sparse', **kwargs):
14 |         initial_qpos = {
15 |             'robot0:slide0': 0.05,
16 |             'robot0:slide1': 0.48,
17 |             'robot0:slide2': 0.0,
18 |             'object0:joint': [1.7, 1.1, 0.41, 1., 0., 0., 0.],
19 |         }
20 |         RandomizedFetchEnv.__init__(
21 |             self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
22 |             gripper_extra_height=-0.02, target_in_the_air=False, target_offset=np.array([0.4, 0.0, 0.0]),
23 |             obj_range=0.1, target_range=0.3, distance_threshold=0.05,
24 |             initial_qpos=initial_qpos, reward_type=reward_type, **kwargs)
25 | 


--------------------------------------------------------------------------------
/common/randomizer/dimension.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | class Dimension(object):
 5 |     """Class which handles the machinery of randomizing a particular dimension
 6 |     """
 7 |     def __init__(self, default_value, multiplier_min=0.0, multiplier_max=1.0, name=None):
 8 |         self.default_value = default_value
 9 |         self.current_value = default_value
10 |         self.multiplier_min = multiplier_min
11 |         self.multiplier_max = multiplier_max
12 |         self.range_min = self.default_value * self.multiplier_min
13 |         self.range_max = self.default_value * self.multiplier_max
14 |         self.name = name
15 | 
16 |     def randomize(self):
17 |         self.current_value = np.random.uniform(low=self.range_min, high=self.range_max)
18 | 
19 |     def rescale(self, value):
20 |         return self.range_min + (self.range_max - self.range_min) * value
21 | 
22 |     def reset(self):
23 |         self.current_value = self.default_value
24 | 
25 |     def set(self, value):
26 |         self.current_value = value
27 | 
28 | 


--------------------------------------------------------------------------------
/common/models/actor_critic.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | 
 6 | class Actor(nn.Module):
 7 |     def __init__(self, state_dim, action_dim, max_action):
 8 |         super(Actor, self).__init__()
 9 | 
10 |         self.l1 = nn.Linear(state_dim, 400)
11 |         self.l2 = nn.Linear(400, 300)
12 |         self.l3 = nn.Linear(300, action_dim)
13 | 
14 |         self.max_action = max_action
15 | 
16 |     def forward(self, x):
17 |         x = F.relu(self.l1(x))
18 |         x = F.relu(self.l2(x))
19 |         x = self.max_action * torch.tanh(self.l3(x))
20 |         return x
21 | 
22 | class Critic(nn.Module):
23 |     def __init__(self, state_dim, action_dim):
24 |         super(Critic, self).__init__()
25 | 
26 |         self.l1 = nn.Linear(state_dim + action_dim, 400)
27 |         self.l2 = nn.Linear(400, 300)
28 |         self.l3 = nn.Linear(300, 1)
29 | 
30 |     def forward(self, x, u):
31 |         x = F.relu(self.l1(torch.cat([x, u], 1)))
32 |         x = F.relu(self.l2(x))
33 |         x = self.l3(x)
34 |         return x 
35 | 


--------------------------------------------------------------------------------
/common/randomizer/randomized_fetchpush.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from gym import utils
 4 | from gym.envs.mujoco import mujoco_env
 5 | 
 6 | from .randomized_fetch import RandomizedFetchEnv
 7 | 
 8 | # Ensure we get the path separator correct on windows
 9 | MODEL_XML_PATH = os.path.join('fetch', 'push.xml')
10 | 
11 | class FetchPushRandomizedEnv(RandomizedFetchEnv, utils.EzPickle):
12 |     def __init__(self, reward_type='dense', **kwargs):
13 |         initial_qpos = {
14 |             'robot0:slide0': 0.405,
15 |             'robot0:slide1': 0.48,
16 |             'robot0:slide2': 0.0,
17 |             'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
18 |         }
19 |         
20 |         RandomizedFetchEnv.__init__(self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
21 |             gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0,
22 |             obj_range=0.15, target_range=0.15, distance_threshold=0.05,
23 |             initial_qpos=initial_qpos, reward_type=reward_type, **kwargs)
24 | 
25 |         utils.EzPickle.__init__(self)


--------------------------------------------------------------------------------
/common/randomizer/randomized_residual_push.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from gym import utils
 4 | from gym.envs.mujoco import mujoco_env
 5 | 
 6 | from .residual_push import RandomizedResidualPushEnv
 7 | 
 8 | # Ensure we get the path separator correct on windows
 9 | MODEL_XML_PATH = os.path.join('fetch', 'push.xml')
10 | 
11 | # RANDOMIZED RESIDUAL PUSH ENVIRONMENT
12 | class ResidualPushRandomizedEnv(RandomizedResidualPushEnv, utils.EzPickle):
13 |     def __init__(self, reward_type='sparse', **kwargs):
14 |         initial_qpos = {
15 |             'robot0:slide0': 0.405,
16 |             'robot0:slide1': 0.48,
17 |             'robot0:slide2': 0.0,
18 |             'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
19 |         }
20 |         RandomizedResidualPushEnv.__init__(self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
21 |                                     gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0,
22 |                                     obj_range=0.15, target_range=0.15, distance_threshold=0.05,
23 |                                     initial_qpos=initial_qpos, reward_type=reward_type, **kwargs)
24 | 
25 |         utils.EzPickle.__init__(self)


--------------------------------------------------------------------------------
/common/randomizer/config/ErgoReacherRandomized/default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "env": "ErgoReacherDefault-v0",
 3 |   "dimensions": [
 4 |     {
 5 |       "name": "joint0gain",
 6 |       "default": 1,
 7 |       "multiplier_min": 1.0,
 8 |       "multiplier_max": 1.0
 9 |     },
10 |     {
11 |       "name": "joint1gain",
12 |       "default": 1,
13 |       "multiplier_min": 1.0,
14 |       "multiplier_max": 1.0
15 |     },
16 |     {
17 |       "name": "joint2gain",
18 |       "default": 1,
19 |       "multiplier_min": 1.0,
20 |       "multiplier_max": 1.0
21 |     },
22 |     {
23 |       "name": "joint3gain",
24 |       "default": 1,
25 |       "multiplier_min": 1.0,
26 |       "multiplier_max": 1.0
27 |     },
28 |     {
29 |       "name": "joint0maxtorque",
30 |       "default": 18,
31 |       "multiplier_min": 1.0,
32 |       "multiplier_max": 1.0
33 |     },
34 |     {
35 |       "name": "joint1maxtorque",
36 |       "default": 18,
37 |       "multiplier_min": 1.0,
38 |       "multiplier_max": 1.0
39 |     },
40 |     {
41 |       "name": "joint2maxtorque",
42 |       "default": 18,
43 |       "multiplier_min": 1.0,
44 |       "multiplier_max": 1.0
45 |     },
46 |     {
47 |       "name": "joint3maxtorque",
48 |       "default": 18,
49 |       "multiplier_min": 1.0,
50 |       "multiplier_max": 1.0
51 |     }
52 |   ]
53 | }


--------------------------------------------------------------------------------
/common/randomizer/config/ErgoReacherRandomized/random.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "env": "ErgoReacherRandomized-v0",
 3 |   "dimensions": [
 4 |     {
 5 |       "name": "joint0gain",
 6 |       "default": 1,
 7 |       "multiplier_min": 0.3,
 8 |       "multiplier_max": 2.0
 9 |     },
10 |     {
11 |       "name": "joint1gain",
12 |       "default": 1,
13 |       "multiplier_min": 0.3,
14 |       "multiplier_max": 2.0
15 |     },
16 |     {
17 |       "name": "joint2gain",
18 |       "default": 1,
19 |       "multiplier_min": 0.3,
20 |       "multiplier_max": 2.0
21 |     },
22 |     {
23 |       "name": "joint3gain",
24 |       "default": 1,
25 |       "multiplier_min": 0.3,
26 |       "multiplier_max": 2.0
27 |     },
28 |     {
29 |       "name": "joint0maxtorque",
30 |       "default": 18,
31 |       "multiplier_min": 1.0,
32 |       "multiplier_max": 4.0
33 |     },
34 |     {
35 |       "name": "joint1maxtorque",
36 |       "default": 18,
37 |       "multiplier_min": 1.0,
38 |       "multiplier_max": 4.0
39 |     },
40 |     {
41 |       "name": "joint2maxtorque",
42 |       "default": 18,
43 |       "multiplier_min": 1.0,
44 |       "multiplier_max": 4.0
45 |     },
46 |     {
47 |       "name": "joint3maxtorque",
48 |       "default": 18,
49 |       "multiplier_min": 1.0,
50 |       "multiplier_max": 4.0
51 |     }
52 |   ]
53 | }


--------------------------------------------------------------------------------
/common/randomizer/config/ErgoReacherRandomized/hard.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "env": "ErgoReacherRandomizedHard-v0",
 3 |   "dimensions": [
 4 |     {
 5 |       "name": "joint0gain",
 6 |       "default": 0.25,
 7 |       "multiplier_min": 1.0,
 8 |       "multiplier_max": 1.0
 9 |     },
10 |     {
11 |       "name": "joint1gain",
12 |       "default": 0.25,
13 |       "multiplier_min": 1.0,
14 |       "multiplier_max": 1.0
15 |     },
16 |     {
17 |       "name": "joint2gain",
18 |       "default": 0.25,
19 |       "multiplier_min": 1.0,
20 |       "multiplier_max": 1.0
21 |     },
22 |     {
23 |       "name": "joint3gain",
24 |       "default": 0.25,
25 |       "multiplier_min": 1.0,
26 |       "multiplier_max": 1.0
27 |     },
28 |     {
29 |       "name": "joint0maxtorque",
30 |       "default": 5.0,
31 |       "multiplier_min": 1.0,
32 |       "multiplier_max": 1.0
33 |     },
34 |     {
35 |       "name": "joint1maxtorque",
36 |       "default": 5.0,
37 |       "multiplier_min": 1.0,
38 |       "multiplier_max": 1.0
39 |     },
40 |     {
41 |       "name": "joint2maxtorque",
42 |       "default": 5.0,
43 |       "multiplier_min": 1.0,
44 |       "multiplier_max": 1.0
45 |     },
46 |     {
47 |       "name": "joint3maxtorque",
48 |       "default": 5.0,
49 |       "multiplier_min": 1.0,
50 |       "multiplier_max": 1.0
51 |     }
52 |   ]
53 | }


--------------------------------------------------------------------------------
/common/randomizer/assets/push.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <mujoco>
 3 | 	<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
 4 | 	<option timestep="0.002">
 5 | 		<flag warmstart="enable"></flag>
 6 | 	</option>
 7 | 
 8 | 	<include file="shared.xml"></include>
 9 | 	
10 | 	<worldbody>
11 | 		<geom name="floor0" pos="0.8 0.75 0" size="0.85 0.70 1" type="plane" condim="3" material="floor_mat"></geom>
12 | 		<body name="floor0" pos="0.8 0.75 0">
13 | 			<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
14 | 		</body>
15 | 
16 | 		<include file="robot.xml"></include>
17 | 
18 | 		<body pos="1.3 0.75 0.2" name="table0">
19 | 			<geom size="0.25 0.35 0.2" type="box" mass="2000" material="table_mat"></geom>
20 | 		</body>
21 | 		
22 | 		<body name="object0" pos="0.025 0.025 0.025">
23 | 			<joint name="object0:joint" type="free" damping="0.01"></joint>
24 | 			<geom size="0.025 0.025 0.025" type="box" condim="3" name="object0" material="block_mat" mass="2"></geom>
25 | 			<site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
26 | 		</body>
27 | 
28 | 		<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
29 | 	</worldbody>
30 | 	
31 | 	<actuator></actuator>
32 | </mujoco>


--------------------------------------------------------------------------------
/common/randomizer/assets/slide.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <mujoco>
 3 | 	<compiler angle="radian" coordinate="local" meshdir="../stls/fetch" texturedir="../textures"></compiler>
 4 | 	<option timestep="0.002">
 5 | 		<flag warmstart="enable"></flag>
 6 | 	</option>
 7 | 
 8 | 	<include file="shared.xml"></include>
 9 | 	
10 | 	<worldbody>
11 | 		<geom name="floor0" pos="1 0.75 0" size="1.05 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
12 | 		<body name="floor0" pos="1 0.75 0">
13 | 			<site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
14 | 		</body>
15 | 
16 | 		<include file="robot.xml"></include>
17 | 		
18 | 		<body name="table0" pos="1.32441906 0.75018422 0.2">
19 | 			<geom size="0.625 0.45 0.2" type="box" condim="3" name="table0" material="table_mat" mass="2000" friction="0.1 0.005 0.0001"></geom>
20 | 		</body>
21 | 
22 | 		<body name="object0" pos="0.025 0.025 0.02">
23 | 			<joint name="object0:joint" type="free" damping="0.01"></joint>
24 | 			<geom size="0.025 0.02" type="cylinder" condim="3" name="object0" material="puck_mat" friction="0.1 0.005 0.0001" mass="2"></geom>
25 | 			<site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
26 | 		</body>
27 | 
28 | 		<light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
29 | 	</worldbody>
30 | 
31 | 	<actuator></actuator>
32 | </mujoco>
33 | 


--------------------------------------------------------------------------------
/utils/learning_curve.py:
--------------------------------------------------------------------------------
 1 | from arguments import get_args
 2 | import numpy as np
 3 | import os
 4 | import matplotlib.pyplot as plt
 5 | from utils.plot_utils import learning_curve
 6 | 
 7 | if __name__=='__main__':
 8 |     args = get_args()
 9 |     # load the model param
10 |     selfplay_percent = [0.0, 1.0, 1.0]
11 |     x_gen_labels = np.linspace(0, 199, 199)
12 |     approach = ["udr", "adr", "unsupervised-default"]
13 |     PLOTCOLORS = ['hotpink', 'red', 'darkolivegreen', 'hotpink', 'blue']
14 |     save_plots = os.getcwd() + f'/plots/{args.env_name}/'
15 |     if not os.path.isdir(save_plots):
16 |         os.makedirs(save_plots)
17 |     plt.rcParams["figure.figsize"] = (10, 6)
18 |     SEED = [42, 41, 44, 43]
19 | 
20 |     for selfplay_index, a in enumerate(approach):
21 |         mean, std = learning_curve(args, selfplay_percent, selfplay_index, a, SEED)
22 |         mean = np.reshape(mean, (-1))
23 |         std = np.reshape(std, (-1))
24 |         plt.plot(x_gen_labels, mean, label=f'{a} - sp{selfplay_percent[selfplay_index]}',  alpha=0.7)
25 |         plt.fill_between(x_gen_labels, mean + std/2, mean - std/2,  alpha=0.2)
26 |     plt.title(f'Learning Curve for {args.env_name} | Mode : {args.mode}')
27 |     plt.xlabel("Number of evaluations | 1 eval per 5000 timesteps")
28 |     plt.ylabel("Average Distance")
29 |     plt.ylim(0, 0.3)
30 |     plt.xlim(0, 200)
31 |     plt.legend()
32 |     plt.savefig(f'{save_plots}/{args.mode}_generalization_sp{selfplay_percent[1]}.png', figsize=(10, 6))
33 |     plt.show()


--------------------------------------------------------------------------------
/common/agents/ddpg/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | class ReplayBuffer(object):
 6 | 
 7 |     def __init__(self, state_dim, action_dim, max_size=int(1e6)):
 8 |         self.max_size = max_size
 9 |         self.ptr = 0
10 |         self.size = 0
11 | 
12 |         self.state = np.zeros((max_size, state_dim))
13 |         self.action = np.zeros((max_size, action_dim))
14 |         self.next_state = np.zeros((max_size, state_dim))
15 |         self.reward = np.zeros((max_size, 1))
16 |         self.not_done = np.zeros((max_size, 1))
17 | 
18 |         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
19 | 
20 |     def add(self, state, action, next_state, reward, done):
21 |         self.state[self.ptr] = state
22 |         self.action[self.ptr] = action
23 |         self.next_state[self.ptr] = next_state
24 |         self.reward[self.ptr] = reward
25 |         self.not_done[self.ptr] = 1. - done
26 | 
27 |         self.ptr = (self.ptr + 1) % self.max_size
28 |         self.size = min(self.size + 1, self.max_size)
29 | 
30 |     def sample(self, batch_size):
31 |         ind = np.random.randint(0, self.size, size=batch_size)
32 | 
33 |         return (
34 |                 torch.FloatTensor(self.state[ind]).to(self.device),
35 |                 torch.FloatTensor(self.action[ind]).to(self.device),
36 |                 torch.FloatTensor(self.next_state[ind]).to(self.device),
37 |                 torch.FloatTensor(self.reward[ind]).to(self.device),
38 |                 torch.FloatTensor(self.not_done[ind]).to(self.device)
39 |                 )


--------------------------------------------------------------------------------
/common/models/discriminator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class MLPDiscriminator(nn.Module):
 5 |     """Discriminator class based on Feedforward Network
 6 |     Input is a state-action-state' transition
 7 |     Output is probability that it was from a reference trajectory
 8 |     """
 9 |     def __init__(self, state_dim, action_dim):
10 |         super(MLPDiscriminator, self).__init__()
11 |         
12 |         self.l1 = nn.Linear((state_dim + action_dim + state_dim), 128)
13 |         self.l2 = nn.Linear(128, 128)
14 |         self.logic = nn.Linear(128, 1)
15 |         
16 |         self.logic.weight.data.mul_(0.1)
17 |         self.logic.bias.data.mul_(0.0)
18 | 
19 |     # Tuple of S-A-S'
20 |     def forward(self, x):
21 |         x = torch.tanh(self.l1(x))
22 |         x = torch.tanh(self.l2(x))
23 |         x = self.logic(x)
24 |         return torch.sigmoid(x)
25 | 
26 | class GAILMLPDiscriminator(nn.Module):
27 |     """Discriminator class based on Feedforward Network
28 |     Input is a state-action-state' transition
29 |     Output is probability that it was from a reference trajectory
30 |     """
31 |     def __init__(self, state_dim, action_dim):
32 |         super(GAILMLPDiscriminator, self).__init__()
33 |         self.l1 = nn.Linear((state_dim + action_dim), 128)
34 |         self.l2 = nn.Linear(128, 128)
35 |         self.logic = nn.Linear(128, 1)
36 |         
37 |         self.logic.weight.data.mul_(0.1)
38 |         self.logic.bias.data.mul_(0.0)
39 | 
40 |     # Tuple of S-A-S'
41 |     def forward(self, x):
42 |         x = torch.tanh(self.l1(x))
43 |         x = torch.tanh(self.l2(x))
44 |         x = self.logic(x)
45 |         return torch.sigmoid(x)


--------------------------------------------------------------------------------
/common/randomizer/randomized_ergo_pusher.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import gym_ergojr
 3 | from gym_ergojr.envs.ergo_pusher_env import ErgoPusherEnv
 4 | 
 5 | def goal_distance(goal_a, goal_b):
 6 |     assert goal_a.shape == goal_b.shape
 7 |     return np.linalg.norm(goal_a - goal_b, axis=-1)
 8 | 
 9 | 
10 | class ErgoPusherRandomizedEnv(ErgoPusherEnv):
11 |     def __init__(self, **kwargs):
12 |         self.dimensions = []
13 |         self.config_file = kwargs.get('config')
14 |         del kwargs['config']
15 | 
16 |         super().__init__(**kwargs)
17 |         self.reward_type = 'sparse'
18 |         self.distance_threshold = 0.05
19 | 
20 |         # # this is affected by the DR
21 |         # self.puck.friction
22 | 
23 |     def step(self, action):
24 |         obs, reward, done, info = super().step(action)
25 |         observation = {"observation": obs,
26 |                        "achieved_goal": obs[6:8],
27 |                        "desired_goal": obs[8:]}
28 |         return observation, reward, done, info
29 | 
30 |     def reset(self, forced=False):
31 |         obs = super(ErgoPusherRandomizedEnv, self).reset()
32 |         observation = {"observation": obs,
33 |                        "achieved_goal": obs[6:8],
34 |                        "desired_goal": obs[8:]}
35 |         return observation
36 |     
37 |     def update_randomized_params(self):
38 | 
39 |         self.puck.friction = self.dimensions[0].current_value
40 | 
41 |     def compute_reward(self, achieved_goal, goal, info):
42 |         # Compute distance between goal and the achieved goal.
43 |         d = goal_distance(achieved_goal, goal)
44 |         if self.reward_type == 'sparse':
45 |             return -(d > self.distance_threshold).astype(np.float32)
46 |         else:
47 |             return -d


--------------------------------------------------------------------------------
/common/randomizer/randomized_ergoreacher.py:
--------------------------------------------------------------------------------
 1 | from gym_ergojr.envs import ErgoReacherEnv
 2 | import numpy as np
 3 | 
 4 | 
 5 | class ErgoReacherRandomizedEnv(ErgoReacherEnv):
 6 |     def __init__(self, **kwargs):
 7 |         self.dimensions = []  # this will be 8 elements long after wrapper init
 8 |         self.config_file = kwargs.get('config')
 9 | 
10 |         del kwargs['config']
11 | 
12 |         super().__init__(**kwargs)
13 | 
14 |         # # these two are affected by the DR
15 |         # self.max_force
16 |         # self.max_vel
17 | 
18 |     def step(self, action):
19 |         obs, reward, done, info = super().step(action)
20 |         info = {'distance': self.dist.query()}
21 |         observation = {"observation": obs,
22 |                        "achieved_goal": np.asarray(self.get_tip()[0][1:]),
23 |                        "desired_goal": obs[8:]}
24 |         return observation, reward, done, info
25 | 
26 |     def reset(self, forced=False):
27 |         obs = super(ErgoReacherRandomizedEnv, self).reset()
28 |         observation = {"observation": obs,
29 |                        "achieved_goal": np.asarray(self.get_tip()[0][1:]),
30 |                        "desired_goal": obs[8:]}
31 |         return observation
32 | 
33 |     def update_randomized_params(self):
34 |         # these are used automatically in the `step` function
35 |         self.max_force = np.zeros(6, np.float32)
36 |         self.max_vel = np.zeros(6, np.float32)
37 | 
38 |         if self.simple:
39 |             self.max_force[[1, 2, 4, 5]] = [x.current_value for x in self.dimensions[:4]]
40 |             self.max_vel[[1, 2, 4, 5]] = [x.current_value for x in self.dimensions[4:]]
41 |         else:
42 |             self.max_force[:] = [x.current_value for x in self.dimensions[:6]]
43 |             self.max_vel[:] = [x.current_value for x in self.dimensions[6:]]


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | saved-models/
  2 | .idea/
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | build/
 15 | develop-eggs/
 16 | dist/
 17 | downloads/
 18 | eggs/
 19 | .eggs/
 20 | lib/
 21 | lib64/
 22 | parts/
 23 | sdist/
 24 | var/
 25 | wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | db.sqlite3
 61 | 
 62 | # Flask stuff:
 63 | instance/
 64 | .webassets-cache
 65 | 
 66 | # Scrapy stuff:
 67 | .scrapy
 68 | 
 69 | # Sphinx documentation
 70 | docs/_build/
 71 | 
 72 | # PyBuilder
 73 | target/
 74 | 
 75 | # Jupyter Notebook
 76 | .ipynb_checkpoints
 77 | 
 78 | # pyenv
 79 | .python-version
 80 | 
 81 | # celery beat schedule file
 82 | celerybeat-schedule
 83 | 
 84 | # SageMath parsed files
 85 | *.sage.py
 86 | 
 87 | # Environments
 88 | .env
 89 | .venv
 90 | env/
 91 | venv/
 92 | ENV/
 93 | env.bak/
 94 | venv.bak/
 95 | 
 96 | # Spyder project settings
 97 | .spyderproject
 98 | .spyproject
 99 | 
100 | # Rope project settings
101 | .ropeproject
102 | 
103 | # mkdocs documentation
104 | /site
105 | 
106 | # mypy
107 | .mypy_cache/
108 | .DS_Store
109 | 
110 | *.pth
111 | *.pt
112 | *.npy
113 | 


--------------------------------------------------------------------------------
/common/randomizer/assets/hook.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <mujoco>
 3 |     <compiler angle="radian" coordinate="local" meshdir="stls" texturedir="textures"></compiler>
 4 |     <option timestep="0.002">
 5 |         <flag warmstart="enable"></flag>
 6 |     </option>
 7 | 
 8 |     <include file="shared.xml"></include>
 9 |     
10 |     <worldbody>
11 |         <geom name="floor0" pos="1.42 0.75 0" size="1.0 0.7 1" type="plane" condim="3" material="floor_mat"></geom>
12 |         <body name="floor0" pos="1.42 0.75 0">
13 |             <site name="target0" pos="0 0 0.5" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
14 |         </body>
15 | 
16 |         <include file="robot.xml"></include>
17 |         
18 |         <body pos="1.65 0.75 0.2" name="table0">
19 |             <geom size="0.5 0.45 0.2" type="box" mass="2000" material="table_mat"></geom>
20 |         </body>
21 |         
22 |         <body name="object0" pos="0.025 0.025 0.025">
23 |             <joint name="object0:joint" type="free" damping="0.01"></joint>
24 |             <geom size="0.025 0.025 0.025" type="box" condim="3" name="object0" material="block_mat" mass="2"></geom>
25 |             <site name="object0" pos="0 0 0" size="0.02 0.02 0.02" rgba="1 0 0 1" type="sphere"></site>
26 |         </body>
27 | 
28 |         <body name="hook" pos="1.0 0.025 0.025">
29 |             <joint name="hook:joint" type="free" damping="0.01"></joint>
30 |             <geom size="0.4 0.015 0.015" type="box" condim="3" name="hook_base" material="block_mat" mass="2"></geom>
31 |             <geom size="0.015 0.1 0.015" pos="0.4 0.085 0." type="box" condim="3" name="hook_head" material="block_mat" mass="2"></geom>
32 |             <site name="hook" pos="-0.25 0 0" size="0.002 0.002 0.002" rgba="1 0 0 1" type="sphere"></site>
33 |         </body>
34 | 
35 |         <light directional="true" ambient="0.2 0.2 0.2" diffuse="0.8 0.8 0.8" specular="0.3 0.3 0.3" castshadow="false" pos="0 0 4" dir="0 0 -1" name="light0"></light>
36 |     </worldbody>
37 | 
38 |     <actuator>
39 |         <position ctrllimited="true" ctrlrange="0 0.2" joint="robot0:l_gripper_finger_joint" kp="30000" name="robot0:l_gripper_finger_joint" user="1"></position>
40 |         <position ctrllimited="true" ctrlrange="0 0.2" joint="robot0:r_gripper_finger_joint" kp="30000" name="robot0:r_gripper_finger_joint" user="1"></position>
41 |     </actuator>
42 | </mujoco>


--------------------------------------------------------------------------------
/common/svpg/particles/distributions.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from common.svpg.particles.utils import AddBias, init
 6 | 
 7 | """
 8 | Modify standard PyTorch distributions so they are compatible with this code.
 9 | """
10 | 
11 | # Categorical
12 | FixedCategorical = torch.distributions.Categorical
13 | 
14 | old_sample = FixedCategorical.sample
15 | FixedCategorical.sample = lambda self: old_sample(self).unsqueeze(-1)
16 | 
17 | log_prob_cat = FixedCategorical.log_prob
18 | FixedCategorical.log_probs = lambda self, actions: log_prob_cat(self, actions.squeeze(-1)).view(actions.size(0), -1).sum(-1).unsqueeze(-1)
19 | 
20 | FixedCategorical.mode = lambda self: self.probs.argmax(dim=-1, keepdim=True)
21 | 
22 | 
23 | # Normal
24 | FixedNormal = torch.distributions.Normal
25 | 
26 | log_prob_normal = FixedNormal.log_prob
27 | FixedNormal.log_probs = lambda self, actions: log_prob_normal(self, actions).sum(-1, keepdim=True)
28 | 
29 | normal_entropy = FixedNormal.entropy
30 | FixedNormal.entropy = lambda self: normal_entropy(self).sum(-1)
31 | 
32 | FixedNormal.mode = lambda self: self.mean
33 | 
34 | 
35 | class Categorical(nn.Module):
36 |     def __init__(self, num_inputs, num_outputs):
37 |         super(Categorical, self).__init__()
38 | 
39 |         init_ = lambda m: init(m,
40 |             nn.init.orthogonal_,
41 |             lambda x: nn.init.constant_(x, 0),
42 |             gain=0.01)
43 | 
44 |         self.linear = init_(nn.Linear(num_inputs, num_outputs))
45 | 
46 |     def forward(self, x):
47 |         x = self.linear(x)
48 |         return FixedCategorical(logits=x)
49 | 
50 | 
51 | class DiagGaussian(nn.Module):
52 |     def __init__(self, num_inputs, num_outputs):
53 |         super(DiagGaussian, self).__init__()
54 | 
55 |         init_ = lambda m: init(m,
56 |             nn.init.orthogonal_,
57 |             lambda x: nn.init.constant_(x, 0))
58 | 
59 |         self.fc_mean = init_(nn.Linear(num_inputs, num_outputs))
60 |         self.logstd = AddBias(torch.zeros(num_outputs))
61 | 
62 |     def forward(self, x):
63 |         action_mean = self.fc_mean(x)
64 | 
65 |         #  An ugly hack for my KFAC implementation.
66 |         zeros = torch.zeros(action_mean.size())
67 |         if x.is_cuda:
68 |             zeros = zeros.cuda()
69 | 
70 |         action_logstd = self.logstd(zeros)
71 |         return FixedNormal(action_mean, action_logstd.exp())


--------------------------------------------------------------------------------
/common/randomizer/controllers/miscalibrated_push_controller.py:
--------------------------------------------------------------------------------
 1 | from .pick_and_place_controller import get_move_action
 2 | 
 3 | import numpy as np
 4 | 
 5 | DEBUG = False
 6 | 
 7 | 
 8 | def get_push_control(obs, atol=1e-3, block_width=0.15, workspace_height=0.1):
 9 | 
10 |     gripper_position = obs['observation'][:3]
11 |     block_position = obs['observation'][3:6]
12 |     goal = obs['desired_goal']
13 | 
14 |     desired_block_angle = np.arctan2(goal[0] - block_position[0], goal[1] - block_position[1])
15 |     gripper_angle = np.arctan2(goal[0] - gripper_position[0], goal[1] - gripper_position[1])
16 | 
17 |     push_position = block_position.copy()
18 |     push_position[0] += -1. * np.sin(desired_block_angle) * block_width / 2.
19 |     push_position[1] += -1. * np.cos(desired_block_angle) * block_width / 2.
20 |     push_position[2] += 0.005
21 | 
22 |     # If the block is already at the place position, do nothing
23 |     if np.sum(np.subtract(block_position, goal)**2) < atol:
24 |         if DEBUG:
25 |             print("The block is already at the place position; do nothing")
26 |         return np.array([0., 0., 0., 0.])
27 | 
28 |     # Angle between gripper and goal vs block and goal is roughly the same
29 |     angle_diff = abs((desired_block_angle - gripper_angle + np.pi) % (2*np.pi) - np.pi)
30 | 
31 |     gripper_sq_distance = (gripper_position[0] - goal[0])**2 + (gripper_position[1] - goal[1])**2
32 |     block_sq_distance = (block_position[0] - goal[0])**2 + (block_position[1] - goal[1])**2
33 | 
34 |     if (gripper_position[2] - push_position[2])**2 < atol and angle_diff < np.pi/4 and block_sq_distance < gripper_sq_distance:
35 | 
36 |         # Push towards the goal
37 |         target_position = goal
38 |         target_position[2] = gripper_position[2]
39 |         if DEBUG:
40 |             print("Push")
41 |         return get_move_action(obs, target_position, atol=atol, gain=5.0)
42 | 
43 |     # If the gripper is above the push position
44 |     if (gripper_position[0] - push_position[0])**2 + (gripper_position[1] - push_position[1])**2 < atol:
45 | 
46 |         # Move down to prepare for push
47 |         if DEBUG:
48 |             print("Move down to prepare for push")
49 |         return get_move_action(obs, push_position, atol=atol)
50 | 
51 | 
52 |     # Else move the gripper to above the push
53 |     target_position = push_position.copy()
54 |     target_position[2] += workspace_height
55 |     if DEBUG:
56 |         print("Move to above the push position")
57 | 
58 |     return get_move_action(obs, target_position, atol=atol)
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/common/randomizer/miscalibrated_push_controller.py:
--------------------------------------------------------------------------------
 1 | from .controllers.pick_and_place_controller import get_move_action
 2 | 
 3 | import numpy as np
 4 | 
 5 | DEBUG = False
 6 | 
 7 | 
 8 | def get_push_control(obs, atol=1e-3, block_width=0.15, workspace_height=0.1):
 9 | 
10 |     gripper_position = obs['observation'][:3]
11 |     block_position = obs['observation'][3:6]
12 |     goal = obs['desired_goal']
13 | 
14 |     desired_block_angle = np.arctan2(goal[0] - block_position[0], goal[1] - block_position[1])
15 |     gripper_angle = np.arctan2(goal[0] - gripper_position[0], goal[1] - gripper_position[1])
16 | 
17 |     push_position = block_position.copy()
18 |     push_position[0] += -1. * np.sin(desired_block_angle) * block_width / 2.
19 |     push_position[1] += -1. * np.cos(desired_block_angle) * block_width / 2.
20 |     push_position[2] += 0.005
21 | 
22 |     # If the block is already at the place position, do nothing
23 |     if np.sum(np.subtract(block_position, goal)**2) < atol:
24 |         if DEBUG:
25 |             print("The block is already at the place position; do nothing")
26 |         return np.array([0., 0., 0., 0.])
27 | 
28 |     # Angle between gripper and goal vs block and goal is roughly the same
29 |     angle_diff = abs((desired_block_angle - gripper_angle + np.pi) % (2*np.pi) - np.pi)
30 | 
31 |     gripper_sq_distance = (gripper_position[0] - goal[0])**2 + (gripper_position[1] - goal[1])**2
32 |     block_sq_distance = (block_position[0] - goal[0])**2 + (block_position[1] - goal[1])**2
33 | 
34 |     if (gripper_position[2] - push_position[2])**2 < atol and angle_diff < np.pi/4 and block_sq_distance < gripper_sq_distance:
35 | 
36 |         # Push towards the goal
37 |         target_position = goal
38 |         target_position[2] = gripper_position[2]
39 |         if DEBUG:
40 |             print("Push")
41 |         return get_move_action(obs, target_position, atol=atol, gain=5.0)
42 | 
43 |     # If the gripper is above the push position
44 |     if (gripper_position[0] - push_position[0])**2 + (gripper_position[1] - push_position[1])**2 < atol:
45 | 
46 |         # Move down to prepare for push
47 |         if DEBUG:
48 |             print("Move down to prepare for push")
49 |         return get_move_action(obs, push_position, atol=atol)
50 | 
51 | 
52 |     # Else move the gripper to above the push
53 |     target_position = push_position.copy()
54 |     target_position[2] += workspace_height
55 |     if DEBUG:
56 |         print("Move to above the push position")
57 | 
58 |     return get_move_action(obs, target_position, atol=atol)
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/common/svpg/particles/svpg_particle.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | from .utils import orthogonal_init
 6 | from .distributions import Categorical, DiagGaussian
 7 | 
 8 | 
 9 | class SVPGParticleCritic(nn.Module):
10 |     def __init__(self, input_dim, output_dim, hidden_dim):
11 |         super(SVPGParticleCritic, self).__init__()
12 | 
13 |         self.critic = nn.Sequential(
14 |             orthogonal_init(nn.Linear(input_dim, hidden_dim)),
15 |             nn.Tanh(),
16 |             orthogonal_init(nn.Linear(hidden_dim, hidden_dim)),
17 |             nn.Tanh(),
18 |             orthogonal_init(nn.Linear(hidden_dim, 1))
19 |         )
20 | 
21 |     def forward(self, x):
22 |         return self.critic(x)
23 | 
24 | class SVPGParticleActorBase(nn.Module):
25 |     def __init__(self, input_dim, hidden_dim):
26 |         super(SVPGParticleActorBase, self).__init__()
27 | 
28 |         self.actor_hidden = nn.Sequential(
29 |             orthogonal_init(nn.Linear(input_dim, hidden_dim)),
30 |             nn.Tanh(),
31 |             orthogonal_init(nn.Linear(hidden_dim, hidden_dim)),
32 |             nn.Tanh(),
33 |         )
34 | 
35 |     def forward(self, x):
36 |         return self.actor_hidden(x)
37 | 
38 | 
39 | class SVPGParticle(nn.Module):
40 |     """Implements a AC architecture for a Discrete Advantage
41 |     Actor Critic Policy, used inside of SVPG
42 |     """
43 |     def __init__(self, input_dim, output_dim, hidden_dim, discrete, freeze=False):
44 |         super(SVPGParticle, self).__init__()
45 | 
46 |         self.critic = SVPGParticleCritic(input_dim, output_dim, hidden_dim)
47 |         self.actor_hidden = SVPGParticleActorBase(input_dim, hidden_dim)
48 | 
49 |         if discrete:
50 |             self.dist = Categorical(hidden_dim, output_dim)
51 |         else:
52 |             self.dist = DiagGaussian(hidden_dim, output_dim)
53 | 
54 |         if freeze:
55 |             self.freeze()
56 | 
57 |         self.reset()
58 | 
59 |     def forward(self, x):
60 |         actor_hidden = self.actor_hidden(x)
61 |         dist = self.dist(actor_hidden)
62 |         value = self.critic(x)
63 | 
64 |         return dist, value
65 | 
66 |     def freeze(self):
67 |         for param in self.critic.parameters():
68 |             param.requires_grad = False
69 | 
70 |         for param in self.actor_hidden.parameters():
71 |             param.requires_grad = False
72 | 
73 |         for param in self.dist.parameters():
74 |             param.requires_grad = False
75 | 
76 |     def reset(self):
77 |         self.saved_log_probs = []
78 |         self.saved_klds = []
79 |         self.rewards = []


--------------------------------------------------------------------------------
/common/randomizer/ergo_objects.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import os
 3 | import gym
 4 | import numpy as np
 5 | from gym import spaces
 6 | from tqdm import tqdm
 7 | from gym_ergojr.sim.objects import Puck
 8 | import pybullet as p
 9 | 
10 | PUSHER_GOAL_X = [-.2, -.1] # Get three points
11 | PUSHER_GOAL_Y = [-.1, .05] # Get three points
12 | # Define a circle equation with three points. After that choose four equidistant points on that circle.
13 | 
14 | circle1 = np.array([[-0.1, -0.1],
15 |                     [-0.15, -0.1],
16 |                     [-0.1, 0.05]])
17 | shift = [-0.029, -0.0475, -0.066]
18 | 
19 | 
20 | def define_circle(p1, p2, p3):
21 |     temp = p2[0] * p2[0] + p2[1] * p2[1]
22 |     bc = (p1[0] * p1[0] + p1[1] * p1[1] - temp) / 2
23 |     cd = (temp - p3[0] * p3[0] - p3[1] * p3[1]) / 2
24 |     det = (p1[0] - p2[0]) * (p2[1] - p3[1]) - (p2[0] - p3[0]) * (p1[1] - p2[1])
25 | 
26 |     if abs(det) < 1.0e-6:
27 |         return (None, np.inf)
28 | 
29 |     # Center of circle
30 |     cx = (bc*(p2[1] - p3[1]) - cd*(p1[1] - p2[1])) / det
31 |     cy = ((p1[0] - p2[0]) * cd - (p2[0] - p3[0]) * bc) / det
32 | 
33 |     radius = np.sqrt((cx - p1[0])**2 + (cy - p1[1])**2)
34 |     return ((cx, cy), radius)
35 | 
36 | 
37 | def angle(line1, line2):
38 |     return np.arccos(np.dot(line1, line2)/(np.linalg.norm(line1)*np.linalg.norm(line2)))
39 | 
40 | 
41 | goal_pos = np.asarray([[-0.154, 0.0, 0], [-0.15, 0.032, 0], [-0.15, -0.032, 0], [-0.137, 0.062, 0],
42 | [-0.137, -0.062, 0], [-0.117, 0.088, 0], [-0.117, -0.088, 0], [-0.172, 0.0, 0],
43 | [-0.168, 0.032, 0], [-0.168, -0.032, 0],[-0.155, 0.062, 0],[-0.155, -0.062, 0], [-0.135, 0.088, 0],
44 | [-0.135, -0.088, 0], [-0.191, 0.0, 0], [-0.191, -0.0, 0], [-0.186, 0.032, 0], [-0.186, -0.032, 0],
45 | [-0.174, 0.062, 0], [-0.174, -0.062, 0], [-0.154, 0.088, 0], [-0.154, -0.08, 0]])  # Don't hard code. Change afterwards
46 | 
47 | puck_positions = np.array([[-0.07, 0.05], 
48 |                            [-0.85, 0.65], 
49 |                            [-0.10, 0.08]])
50 | class PuckEval(Puck):
51 |     def __init__(self, friction=0.4):
52 |         super(PuckEval, self).__init__()
53 |         self.goal = None
54 | 
55 |     def reset(self):
56 |         super().reset()
57 | 
58 |     def hard_reset(self):
59 |         super().hard_reset()
60 | 
61 |     def add_target(self):
62 |         self.dbo.goal = goal_pos[int(os.environ['goal_index'])]
63 |         self.obj_visual = p.createVisualShape(
64 |             p.GEOM_CYLINDER, radius=0.02, length=0.01, rgbaColor=[0, 1, 0, 1])
65 |         self.target = p.createMultiBody(
66 |             baseVisualShapeIndex=self.obj_visual, basePosition=self.dbo.goal)
67 | 
68 | 


--------------------------------------------------------------------------------
/common/randomizer/wrappers.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import json
 3 | import numpy as np
 4 | 
 5 | import gym.spaces as spaces
 6 | 
 7 | from common.randomizer.dimension import Dimension
 8 | 
 9 | 
10 | class RandomizedEnvWrapper(gym.Wrapper):
11 |     """Creates a randomization-enabled enviornment, which can change
12 |     physics / simulation parameters without relaunching everything
13 |     """
14 | 
15 |     def __init__(self, env, seed):
16 |         super(RandomizedEnvWrapper, self).__init__(env)
17 |         self.config_file = self.unwrapped.config_file
18 | 
19 |         self._load_randomization_dimensions(seed)
20 |         self.unwrapped.update_randomized_params()
21 |         self.randomized_default = ['random'] * len(self.unwrapped.dimensions)
22 | 
23 |     def _load_randomization_dimensions(self, seed):
24 |         """ Helper function to load environment defaults ranges
25 |         """
26 |         self.unwrapped.dimensions = []
27 | 
28 |         with open(self.config_file, mode='r') as f:
29 |             config = json.load(f)
30 | 
31 |         for dimension in config['dimensions']:
32 |             self.unwrapped.dimensions.append(
33 |                 Dimension(
34 |                     default_value=dimension['default'],
35 |                     multiplier_min=dimension['multiplier_min'],
36 |                     multiplier_max=dimension['multiplier_max'],
37 |                     name=dimension['name']
38 |                 )
39 |             )
40 | 
41 |         nrand = len(self.unwrapped.dimensions)
42 |         self.unwrapped.randomization_space = spaces.Box(0, 1, shape=(nrand,), dtype=np.float32)
43 | 
44 |     def randomize(self, randomized_values=[-1]):
45 |         """Sets the parameter values such that a call to`update_randomized_params()`
46 |         will generate an environment with those settings.
47 | 
48 |         Passing a list of 'default' strings will give the default value
49 |         Passing a list of 'random' strings will give a purely random value for that dimension
50 |         Passing a list of -1 integers will have the same effect.
51 |         """
52 |         for dimension, randomized_value in enumerate(randomized_values):
53 |             if randomized_value == 'default':
54 |                 self.unwrapped.dimensions[dimension].current_value = \
55 |                     self.unwrapped.dimensions[dimension].default_value
56 |             elif randomized_value != 'random' and randomized_value != -1:
57 |                 assert 0.0 <= randomized_value <= 1.0, "using incorrect: {}".format(randomized_value)
58 |                 self.unwrapped.dimensions[dimension].current_value = \
59 |                     self.unwrapped.dimensions[dimension].rescale(randomized_value)
60 |             else:  # random
61 |                 self.unwrapped.dimensions[dimension].randomize()
62 | 
63 |         self.unwrapped.update_randomized_params()
64 | 
65 |     def step(self, action):
66 |         return self.env.step(action)
67 | 
68 |     def reset(self, **kwargs):
69 |         return self.env.reset(**kwargs)


--------------------------------------------------------------------------------
/experiments/evaluate_ergo_envs.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from common.randomizer.wrappers import RandomizedEnvWrapper
 3 | from locale import atof
 4 | import time
 5 | import torch
 6 | import common.randomizer as randomizer
 7 | from arguments import get_args
 8 | import gym
 9 | import gym_ergojr
10 | import numpy as np
11 | import os.path as osp
12 | import os
13 | import time
14 | from common.agents.ddpg.ddpg import DDPG
15 | 
16 | 
17 | def atof(text):
18 |     try:
19 |         retval = float(text)
20 |     except ValueError:
21 |         retval = text
22 |     return retval
23 | 
24 | 
25 | def natural_keys(text):
26 |     return [atof(c) for c in re.split(r'[+-]?([0-9]+(?:[.][0-9]*)?|[.][0-9]+)', text) ]
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     args = get_args()
31 |     # load the model param
32 |     SEED = [41, 42, 43, 44]
33 |     APPROACH = ['udr']
34 |     SP_PERCENT = [0.0]
35 |     for i, approach in enumerate(APPROACH):
36 |         for seed in SEED:
37 |             print('------------------------------------------------------------------------')
38 |             print(f'Approach : {approach} | Seed : {seed} | Env : {args.env_name}')
39 |             print('------------------------------------------------------------------------')
40 |             args.save_dir = osp.join('/saved_models', 'sp{}polyak{}'.format(SP_PERCENT[i], args.sp_polyak) + '-' + approach)
41 |             model_path = osp.join(os.getcwd() + args.save_dir, str(seed), args.env_name)
42 |             models_path = os.listdir(model_path + '/')
43 | 
44 |             # List the file names that start with model and sort according to number.
45 |             models_path = list(filter(lambda x: x.lower().endswith("actor.pth"), models_path))
46 |             models_path.sort(key=natural_keys)
47 | 
48 |             env = gym.make(args.env_name)
49 |             env = RandomizedEnvWrapper(env, seed=12)
50 |             env.randomize(['default'] * args.n_param)
51 |             env.seed(args.seed + 100)
52 |             eval_episodes = 1
53 | 
54 |             state_dim = env.observation_space.shape[0]
55 |             action_dim = env.action_space.shape[0]
56 |             max_action = float(env.action_space.high[0])
57 |             goal_dim = 2
58 |             final_dist = []
59 |             policy = DDPG(args, state_dim, action_dim, max_action, goal_dim)
60 |             for model in models_path:
61 |                 print(model)
62 |                 policy.load(model, model_path)
63 |                 avg_reward = 0.
64 |                 avg_dist = 0
65 |                 model_dist = []
66 |                 for key in range(eval_episodes):
67 |                     obs = env.reset()
68 |                     done = False
69 |                     while not done:
70 |                         time.sleep(0.01)
71 |                         action = policy.select_action(np.array(obs, dtype=np.float64))
72 |                         obs, reward, done, info = env.step(action)
73 |                         avg_reward += reward
74 | 
75 |                     avg_dist += info["distance"]
76 |                 final_dist.append(avg_dist/eval_episodes)
77 |             np.save(model_path + f'{args.mode}_evaluation.npy', np.asarray(final_dist))
78 | 
79 | 
80 | 


--------------------------------------------------------------------------------
/utils/plot_utils.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import numpy as np
 3 | import os
 4 | import os.path as osp
 5 | 
 6 | 
 7 | def learning_curve(args, selfplay_percent, selplay_index, approach, seed):
 8 |     '''
 9 |     args: arguments
10 |     selfplay_percent: list of selfplay percentages (list)
11 |     selplay_index: index of particular approach (integer)
12 |     approach: list of different approaches ( Ex: ['udr', 'adr', 'unsupervised-default']
13 |     seed: list of seed numbers
14 |     return: returns mean and standard deviation across all seeds.
15 |     '''
16 |     default_dist = []
17 |     for s in seed:
18 |         a = []
19 |         save_dir = osp.join(os.getcwd() + '/' + args.save_dir, "sp" + str(selfplay_percent[selplay_index]) + "polyak" +
20 |                             str(args.polyak) + '-' + str(approach), str(s) + '/')
21 |         array = np.load(save_dir + f'{args.env_name}{args.mode}_evaluation.npy', allow_pickle=True)
22 |         for arr in array:
23 |             a.append(arr)
24 |         array = np.asarray(a)
25 |         print(f'Array shape : {array.shape} | Seed : {s} | Approach : {approach} | selplay_index : {selplay_index}')
26 |         default_dist.append([array[:199]])
27 | 
28 |     mean = np.mean(np.asarray(default_dist), axis=0)
29 |     N = args.window_size
30 |     mode = 'causal'
31 |     mean = smooth(np.reshape(mean, (mean.shape[1])), N, mode=mode)
32 |     std = np.std(np.asarray(default_dist), axis=0)
33 |     std = smooth(np.reshape(std, (std.shape[1])), N, mode=mode)
34 | 
35 |     return mean, std
36 | 
37 | 
38 | def smooth(y, radius, mode='two_sided', valid_only=False):
39 |     '''
40 |     Smooth signal y, where radius is determines the size of the window
41 |     mode='twosided':
42 |         average over the window [max(index - radius, 0), min(index + radius, len(y)-1)]
43 |     mode='causal':
44 |         average over the window [max(index - radius, 0), index]
45 |     valid_only: put nan in entries where the full-sized window is not available
46 |     '''
47 |     assert mode in ('two_sided', 'causal')
48 |     if len(y) < 2*radius+1:
49 |         return np.ones_like(y) * y.mean()
50 |     elif mode == 'two_sided':
51 |         convkernel = np.ones(2 * radius+1)
52 |         out = np.convolve(y, convkernel,mode='same') / np.convolve(np.ones_like(y), convkernel, mode='same')
53 |         if valid_only:
54 |             out[:radius] = out[-radius:] = np.nan
55 |     elif mode == 'causal':
56 |         convkernel = np.ones(radius)
57 |         out = np.convolve(y, convkernel,mode='full') / np.convolve(np.ones_like(y), convkernel, mode='full')
58 |         out = out[:-radius+1]
59 |         if valid_only:
60 |             out[:radius] = np.nan
61 |     return out
62 | 
63 | 
64 | def sampling_plot(args, seed, sp_index, approach='adr'):
65 | 
66 |     for s in seed:
67 |         save_dir = osp.join(args.save_dir, "sp" + str(sp_index) + "polyak" +
68 |                             str(args.polyak) + '-' + str(approach), str(s), args.env_name + '/')
69 |         print(save_dir)
70 |         alice_envs = np.load(save_dir + f'alice_envs.npy', allow_pickle=True)
71 |         envs = alice_envs
72 |         envs = list(itertools.chain(*envs))
73 |     list_ = np.reshape(envs, (-1, 1))
74 |     return list_
75 | 
76 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Self-Supervised Active Domain Randomization 
 2 | This repository provides official code base for the paper "Generating Automatic Curricula via Self-Supervised Active Domain Randomization".
 3 | ![Self-Supervised Active Domain Randomization Architecture](architecture-selfplay.png)
 4 | 
 5 | ## Experiments
 6 | We perform our experiments on ErgoReacher, a 4 DoF arm and ErgoPusher, a 3-DoF arm from both in simulation and on the real robot.
 7 | ### Important Flags
 8 | There are few important flags which differentiate various experiments. `--approach` specifies the approach ['udr' | 'unsupervised-default' | 'unsupervised-adr'], `--sp-percent` flag specifies the self-play percentage. For all out experiments we use either `--sp-percent=1.0` (full self-play/completely unsupervised) or `--sp-percent=0.0` (no self-play,/completely supervised) depending on the `--approach`.  `--only-sp` specifies where the bob operates. It is `True` for `--approach=unsupervised-default` and `False` for `--approach=unsupervised-adr`. For all the reacher experiments we used `--n-params=8` and for pusher experiments we used `--n-params=1`.   
 9 | 
10 | ### Uniform Domain Randomization 
11 | For `ErgoReacher` baseline experiments:
12 | 
13 | `python  experiments/ddpg_train.py  --sp-percent 0.0 --approach 'udr' --env-name='ErgoReacherRandomizedEnv-Headless-v0' --n-params=8 `
14 | 
15 | For `ErgoPusher` baseline experiments:
16 | 
17 | `python  experiments/ddpg_train.py  --sp-percent 0.0 --approach 'udr' --env-name='ErgoPushRandomizedEnv-Headless-v0' --n-params=1`
18 | 
19 | ### Unsupervised Default 
20 | For `ErgoReacher` experiments:
21 | 
22 | `python  experiments/ddpg_train.py  --sp-percent 1.0 --approach 'unsupervised-default' --only-sp=True  --env-name='ErgoReacherRandomizedEnv-Headless-v0' --n-params=8 `
23 | 
24 | For `ErgoPusher` experiments:
25 | 
26 | `python  experiments/ddpg_train.py  --sp-percent 1.0 --approach 'unsupervised-default' --only-sp=True  --env-name='ErgoPushRandomizedEnv-Headless-v0' --n-params=1`
27 | 
28 | ### Self-Supervised Active Domain Randomization
29 | For `ErgoReacher` experiments:
30 | 
31 | `python  experiments/ddpg_train.py  --sp-percent 1.0 --approach 'unsupervised-adr' --only-sp=True  --env-name='ErgoReacherRandomizedEnv-Headless-v0' --n-params=8`
32 | 
33 | For `ErgoPusher` experiments:
34 | 
35 | `python  experiments/ddpg_train.py  --sp-percent 1.0 --approach 'unsupervised-adr' --only-sp=True  --env-name='ErgoPushRandomizedEnv-Headless-v0' --n-params=1`
36 | 
37 | ## Evaluations
38 | In order to evaluate the trained models on simulator, on the command line execute the following. 
39 | 
40 | Here `--mode` can be `[default | hard]`. Here is an example to evaluate `ErgoReacher` in default environment. You can likewise evaluate `ErgoPusher` with different modes.  
41 |  
42 | `python experiments/evaluate_ergo_envs.py  --env-name "ErgoReacherRandomizedEnv-Headless-v0"  --mode='default' --sp-polyak 0.95 --n-params=8`
43 | 
44 | ## Reference
45 | 
46 | ```
47 | @article{raparthy2020selfsupadr,
48 |   title={Generating Automatic Curricula via Self-Supervised Active Domain Randomization},
49 |   author={Raparthy, Sharath Chandra and Mehta, Bhairav and Golemo, Florian and Paull, Liam},
50 |   url={https://arxiv.org/abs/2002.07911},
51 |   year={2020}
52 | }
53 | ```
54 | 
55 | Built by [@Sharath](https://sharathraparthy.github.io/) and [@Bhairav Mehta](https://bhairavmehta95.github.io/)
56 | 


--------------------------------------------------------------------------------
/common/discriminator/discriminator_rewarder.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | import torch
 4 | import torch.nn as nn
 5 | from torch.autograd import Variable
 6 | 
 7 | from common.models.discriminator import MLPDiscriminator
 8 | 
 9 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
10 | 
11 | 
12 | class DiscriminatorRewarder(object):
13 |     def __init__(self, state_dim, action_dim, discriminator_batchsz, reward_scale,
14 |                  load_discriminator=False, discriminator_lr=3e-3, add_pz=True):
15 |         self.discriminator = MLPDiscriminator(
16 |             state_dim=state_dim,
17 |             action_dim=action_dim
18 |         )
19 | 
20 |         self.discriminator_criterion = nn.BCELoss()
21 |         self.discriminator_optimizer = torch.optim.Adam(self.discriminator.parameters(), lr=discriminator_lr)
22 |         self.reward_scale = reward_scale
23 |         self.batch_size = discriminator_batchsz 
24 |         self.add_pz = add_pz
25 | 
26 |         if load_discriminator:
27 |             self._load_discriminator(randomized_env_id)
28 | 
29 |     def calculate_rewards(self, randomized_trajectory):
30 |         """Discriminator based reward calculation
31 |         We want to use the negative of the adversarial calculation (Normally, -log(D)). We want to *reward*
32 |         our simulator for making it easier to discriminate between the reference env + randomized onea
33 |         """
34 |         score, _, _ = self.get_score(randomized_trajectory)
35 |         reward = np.log(score)
36 | 
37 |         if self.add_pz:
38 |             reward -= np.log(0.5)
39 | 
40 |         return self.reward_scale * reward
41 | 
42 |     def get_score(self, trajectory):
43 |         """Discriminator based reward calculation
44 |         We want to use the negative of the adversarial calculation (Normally, -log(D)). We want to *reward*
45 |         our simulator for making it easier to discriminate between the reference env + randomized onea
46 |         """
47 |         traj_tensor = self._trajectory2tensor(trajectory).float()
48 | 
49 |         with torch.no_grad():
50 |             score = (self.discriminator(traj_tensor).cpu().detach().numpy()+1e-8)
51 |             return score.mean(), np.median(score), np.sum(score)
52 | 
53 |     def train_discriminator(self, reference_trajectory, randomized_trajectory, iterations):
54 |         """Trains discriminator to distinguish between reference and randomized state action tuples
55 |         """
56 |         for _ in range(iterations):
57 |             randind = np.random.randint(0, len(randomized_trajectory[0]), size=int(self.batch_size))
58 |             refind = np.random.randint(0, len(reference_trajectory[0]), size=int(self.batch_size))
59 | 
60 |             randomized_batch = self._trajectory2tensor(randomized_trajectory[randind])
61 |             reference_batch = self._trajectory2tensor(reference_trajectory[refind])
62 | 
63 |             g_o = self.discriminator(randomized_batch)
64 |             e_o = self.discriminator(reference_batch)
65 | 
66 |             self.discriminator_optimizer.zero_grad()
67 | 
68 |             discrim_loss = self.discriminator_criterion(g_o, torch.ones((len(randomized_batch), 1), device=device)) + \
69 |                            self.discriminator_criterion(e_o, torch.zeros((len(reference_batch), 1), device=device))
70 |             discrim_loss.backward()
71 | 
72 |             self.discriminator_optimizer.step()
73 | 
74 |     def _load_discriminator(self, name, path='saved-models/discriminator/discriminator_{}.pth'):
75 |         self.discriminator.load_state_dict(torch.load(path.format(name), map_location=device))
76 | 
77 |     def _save_discriminator(self, name, path='saved-models/discriminator/discriminator_{}.pth'):
78 |         torch.save(self.discriminator.state_dict(), path.format(name))
79 | 
80 |     def _trajectory2tensor(self, trajectory):
81 |         return torch.from_numpy(trajectory).float().to(device)
82 | 


--------------------------------------------------------------------------------
/common/agents/selfplay_models.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.optim as optim
  4 | from torch.distributions import Bernoulli
  5 | import numpy as np
  6 | 
  7 | eps = np.finfo(np.float32).eps.item()
  8 | 
  9 | 
 10 | class BernoulliPolicyFetch(nn.Module):
 11 |     def __init__(self, goal_dim, action_dim=1):
 12 |         super(BernoulliPolicyFetch, self).__init__()
 13 |         self.base = nn.Sequential(
 14 |             nn.Linear(goal_dim * 2, 300),
 15 |             nn.ReLU(),
 16 |             nn.Linear(300, 300),
 17 |             nn.ReLU(),
 18 |         )
 19 | 
 20 |         self.out = nn.Linear(300, 1)
 21 |         self.value = nn.Linear(300, 1)
 22 | 
 23 |         self.saved_log_probs = []
 24 |         self.rewards = []
 25 |         self.values = []
 26 | 
 27 |     def forward(self, x):
 28 |         x = self.base(x)
 29 |         termination_prob = self.out(x)
 30 |         value = self.value(x)
 31 |         return torch.sigmoid(termination_prob), value
 32 | 
 33 | 
 34 | class AlicePolicyFetch:
 35 |     def __init__(self, args, goal_dim, action_dim=1):
 36 |         self.policy = BernoulliPolicyFetch(goal_dim, action_dim=1)
 37 |         self.optimizer = optim.Adam(self.policy.parameters(), lr=3e-3)
 38 |         # self.comm = setup_mpi(self.policy)
 39 |         self.args = args
 40 | 
 41 |     def select_action(self, state, deterministic=False, save_log_probs=True):
 42 |         state = torch.from_numpy(state).float().unsqueeze(0)
 43 |         probs, value = self.policy(state)
 44 |         m = Bernoulli(probs)
 45 |         action = m.sample()
 46 |         if save_log_probs:
 47 |             self.policy.saved_log_probs.append(m.log_prob(action))
 48 |             self.policy.values.append(value)
 49 | 
 50 |         return action.cpu().data.numpy()[0]
 51 | 
 52 |     def log(self, reward):
 53 |         self.policy.rewards.append(reward)
 54 | 
 55 |     def load_from_file(self, file):
 56 |         self.policy.load_state_dict(torch.load(file))
 57 | 
 58 |     def load_from_policy(self, original):
 59 |         with torch.no_grad():
 60 |             for param, target_param in zip(self.policy.parameters(), original.policy.parameters()):
 61 |                 param.data.copy_((1 - self.args.polyak) * param.data + self.args.polyak * target_param.data)
 62 |                 param.requires_grad = False
 63 | 
 64 |     def perturb(self, alpha, weight_noise, bias_noise):
 65 |         with torch.no_grad():
 66 |             weight_noise = torch.from_numpy(alpha * weight_noise).float()
 67 |             bias_noise = torch.from_numpy(alpha * bias_noise).float()
 68 |             for param, noise in zip(self.policy.parameters(), [weight_noise, bias_noise]):
 69 |                 param.add_(noise)
 70 |                 param.requires_grad = False
 71 | 
 72 |     def finish_episode(self, gamma):
 73 |         R = 0
 74 |         returns = []
 75 | 
 76 |         # Normalize
 77 |         self.policy.rewards = np.array(self.policy.rewards)
 78 |         self.policy.rewards = (self.policy.rewards - self.policy.rewards.mean()) / (self.policy.rewards.std() + eps)
 79 | 
 80 |         for r in self.policy.rewards[::-1]:
 81 |             R = r + gamma * R
 82 |             returns.insert(0, torch.FloatTensor([R]).unsqueeze(1))
 83 | 
 84 |         log_probs = torch.cat(self.policy.saved_log_probs)
 85 |         returns = torch.cat(returns).detach()
 86 |         values = torch.cat(self.policy.values)
 87 | 
 88 |         advantage = returns - values
 89 | 
 90 |         actor_loss = -(log_probs * advantage.detach()).mean()
 91 |         critic_loss = advantage.pow(2).mean()
 92 |         loss = actor_loss + 0.5 * critic_loss
 93 | 
 94 |         self.optimizer.zero_grad()
 95 |         loss.backward()
 96 |         # sync_grads(self.comm, self.policy)
 97 |         self.optimizer.step()
 98 | 
 99 |         self.policy.rewards = []
100 |         self.policy.saved_log_probs = []
101 |         self.policy.values = []


--------------------------------------------------------------------------------
/common/randomizer/residual_fetch_push_env.py:
--------------------------------------------------------------------------------
  1 | from .miscalibrated_push_controller import get_push_control
  2 | 
  3 | from gym.envs.robotics import FetchPushEnv
  4 | 
  5 | import gym
  6 | from gym import error, spaces, utils
  7 | from gym.utils import seeding
  8 | 
  9 | import numpy as np
 10 | import time
 11 | 
 12 | 
 13 | class ResidualSlipperyPushEnv(gym.Env):
 14 | 
 15 |     def __init__(self, *args, **kwargs):
 16 |         self.fetch_env = gym.make("FetchPush-v1")
 17 | 
 18 |         for i in range(len(self.fetch_env.env.sim.model.geom_friction)):
 19 |             self.fetch_env.env.sim.model.geom_friction[i] = [18e-2, 5.e-3, 1e-4]
 20 | 
 21 |         self.metadata = self.fetch_env.metadata
 22 |         self.hardcoded_controller = None
 23 |         self.action_space = self.fetch_env.action_space
 24 |         self.observation_space = self.fetch_env.observation_space
 25 | 
 26 | 
 27 |     def step(self, residual_action):
 28 |         residual_action = 2. * residual_action
 29 | 
 30 |         action = np.add(residual_action, get_push_control(self._last_observation))
 31 |         action = np.clip(action, -1, 1)
 32 |         observation, reward, done, debug_info = self.fetch_env.step(action)
 33 |         self._last_observation = observation
 34 |         
 35 |         return observation, reward, done, debug_info
 36 | 
 37 |     def reset(self):
 38 |         observation = self.fetch_env.reset()
 39 |         self._last_observation = observation
 40 |         return observation
 41 | 
 42 |     def seed(self, seed=0):
 43 |         return self.fetch_env.seed(seed=seed)
 44 | 
 45 |     def render(self, mode="human", *args, **kwargs):
 46 |         # See https://github.com/openai/gym/issues/1081
 47 |         self.fetch_env.env._render_callback()
 48 |         if mode == 'rgb_array':
 49 |             self.fetch_env.env._get_viewer().render()
 50 |             width, height = 3350, 1800
 51 |             data = self.fetch_env.env._get_viewer().read_pixels(width, height, depth=False)
 52 |             # original image is upside-down, so flip it
 53 |             return data[::-1, :, :]
 54 |         elif mode == 'human':
 55 |             self.fetch_env.env._get_viewer().render()
 56 | 
 57 |         return self.fetch_env.render(*args, **kwargs)
 58 | 
 59 |     def compute_reward(self, *args, **kwargs):
 60 |         return self.fetch_env.compute_reward(*args, **kwargs)
 61 | 
 62 | class SlipperyPushEnv(gym.Env):
 63 | 
 64 |     def __init__(self, *args, **kwargs):
 65 |         self.fetch_env = gym.make("FetchPush-v1")
 66 | 
 67 |         for i in range(len(self.fetch_env.env.sim.model.geom_friction)):
 68 |             self.fetch_env.env.sim.model.geom_friction[i] = [18e-2, 5.e-3, 1e-4]
 69 | 
 70 |         self.metadata = self.fetch_env.metadata
 71 | 
 72 |         self.action_space = self.fetch_env.action_space
 73 |         self.observation_space = self.fetch_env.observation_space 
 74 | 
 75 |     def step(self, action):
 76 |         return self.fetch_env.step(action)
 77 | 
 78 |     def reset(self):
 79 |         return self.fetch_env.reset()
 80 | 
 81 |     def seed(self, seed=0):
 82 |         return self.fetch_env.seed(seed=seed)
 83 | 
 84 |     def render(self, mode="human", *args, **kwargs):
 85 |         # See https://github.com/openai/gym/issues/1081
 86 |         self.fetch_env.env._render_callback()
 87 |         if mode == 'rgb_array':
 88 |             self.fetch_env.env._get_viewer().render()
 89 |             width, height = 3350, 1800
 90 |             data = self.fetch_env.env._get_viewer().read_pixels(width, height, depth=False)
 91 |             # original image is upside-down, so flip it
 92 |             return data[::-1, :, :]
 93 |         elif mode == 'human':
 94 |             self.fetch_env.env._get_viewer().render()
 95 | 
 96 |         return self.fetch_env.render(*args, **kwargs)
 97 | 
 98 |     def compute_reward(self, *args, **kwargs):
 99 |         return self.fetch_env.compute_reward(*args, **kwargs)
100 | 
101 | 


--------------------------------------------------------------------------------
/common/randomizer/controllers/pick_and_place_controller.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | DEBUG = False
 4 | 
 5 | def get_move_action(observation, target_position, atol=1e-3, gain=10., close_gripper=False):
 6 |     """
 7 |     Move an end effector to a position and orientation.
 8 |     """
 9 |     # Get the currents
10 |     current_position = observation['observation'][:3]
11 | 
12 |     action = gain * np.subtract(target_position, current_position)
13 |     if close_gripper:
14 |         gripper_action = -1.
15 |     else:
16 |         gripper_action = 0.
17 |     action = np.hstack((action, gripper_action))
18 | 
19 |     return action
20 | 
21 | def block_is_grasped(obs, relative_grasp_position, atol=1e-3):
22 |     return block_inside_grippers(obs, relative_grasp_position, atol=atol) and grippers_are_closed(obs, atol=atol)
23 | 
24 | def block_inside_grippers(obs, relative_grasp_position, atol=1e-3):
25 |     gripper_position = obs['observation'][:3]
26 |     block_position = obs['observation'][3:6]
27 | 
28 |     relative_position = np.subtract(gripper_position, block_position)
29 | 
30 |     return np.sum(np.subtract(relative_position, relative_grasp_position)**2) < atol
31 | 
32 | def grippers_are_closed(obs, atol=1e-3):
33 |     gripper_state = obs['observation'][9:11]
34 |     return abs(gripper_state[0] - 0.024) < atol
35 | 
36 | def grippers_are_open(obs, atol=1e-3):
37 |     gripper_state = obs['observation'][9:11]
38 |     return abs(gripper_state[0] - 0.05) < atol
39 | 
40 | def get_pick_and_place_control(obs, relative_grasp_position=(0., 0., -0.02), workspace_height=0.1, atol=1e-3, gain=10):
41 |     """
42 |     Returns
43 |     -------
44 |     action : [float] * 4
45 |     """
46 |     gripper_position = obs['observation'][:3]
47 |     block_position = obs['observation'][3:6]
48 |     place_position = obs['desired_goal']
49 | 
50 |     # If the gripper is already grasping the block
51 |     if block_is_grasped(obs, relative_grasp_position, atol=atol):
52 | 
53 |         # If the block is already at the place position, do nothing except keep the gripper closed
54 |         if np.sum(np.subtract(block_position, place_position)**2) < atol:
55 |             if DEBUG:
56 |                 print("The block is already at the place position; do nothing")
57 |             return np.array([0., 0., 0., -1.])
58 | 
59 |         # Move to the place position while keeping the gripper closed
60 |         target_position = np.add(place_position, relative_grasp_position)
61 |         target_position[2] += workspace_height/2.
62 |         if DEBUG:
63 |             print("Move to above the place position")
64 |         return get_move_action(obs, target_position, atol=atol, close_gripper=True, gain=gain)
65 | 
66 |     # If the block is ready to be grasped
67 |     if block_inside_grippers(obs, relative_grasp_position, atol=atol):
68 | 
69 |         # Close the grippers
70 |         if DEBUG:
71 |             print("Close the grippers")
72 |         return np.array([0., 0., 0., -1.])
73 | 
74 |     # If the gripper is above the block
75 |     if (gripper_position[0] - block_position[0])**2 + (gripper_position[1] - block_position[1])**2 < atol:
76 | 
77 |         # If the grippers are closed, open them
78 |         if not grippers_are_open(obs, atol=atol):
79 |             if DEBUG:
80 |                 print("Open the grippers")
81 |             return np.array([0., 0., 0., 1.])
82 | 
83 |         # Move down to grasp
84 |         target_position = np.add(block_position, relative_grasp_position)
85 |         if DEBUG:
86 |             print("Move down to grasp")
87 |         return get_move_action(obs, target_position, atol=atol, gain=gain)
88 | 
89 | 
90 |     # Else move the gripper to above the block
91 |     target_position = np.add(block_position, relative_grasp_position)
92 |     target_position[2] += workspace_height
93 |     if DEBUG:
94 |         print("Move to above the block")
95 |     return get_move_action(obs, target_position, atol=atol, gain=gain)
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/common/randomizer/residual_envs/residual_fetch_push_env.py:
--------------------------------------------------------------------------------
  1 | from randomizer.controllers.miscalibrated_push_controller import get_push_control
  2 | 
  3 | from gym.envs.robotics import FetchPushEnv
  4 | 
  5 | import gym
  6 | from gym import error, spaces, utils
  7 | from gym.utils import seeding
  8 | 
  9 | import numpy as np
 10 | import time
 11 | 
 12 | 
 13 | class ResidualSlipperyPushEnv(gym.Env):
 14 | 
 15 |     def __init__(self, *args, **kwargs):
 16 |         self.fetch_env = gym.make("FetchPush-v1")
 17 | 
 18 |         for i in range(len(self.fetch_env.env.sim.model.geom_friction)):
 19 |             self.fetch_env.env.sim.model.geom_friction[i] = [18e-2, 5.e-3, 1e-4]
 20 | 
 21 |         self.metadata = self.fetch_env.metadata
 22 |         self.hardcoded_controller = None
 23 |         self.action_space = self.fetch_env.action_space
 24 |         self.observation_space = self.fetch_env.observation_space
 25 | 
 26 | 
 27 |     def step(self, residual_action):
 28 |         residual_action = 2. * residual_action
 29 | 
 30 |         action = np.add(residual_action, get_push_control(self._last_observation))
 31 |         action = np.clip(action, -1, 1)
 32 |         observation, reward, done, debug_info = self.fetch_env.step(action)
 33 |         self._last_observation = observation
 34 |         
 35 |         return observation, reward, done, debug_info
 36 | 
 37 |     def reset(self):
 38 |         observation = self.fetch_env.reset()
 39 |         self._last_observation = observation
 40 |         return observation
 41 | 
 42 |     def seed(self, seed=0):
 43 |         return self.fetch_env.seed(seed=seed)
 44 | 
 45 |     def render(self, mode="human", *args, **kwargs):
 46 |         # See https://github.com/openai/gym/issues/1081
 47 |         self.fetch_env.env._render_callback()
 48 |         if mode == 'rgb_array':
 49 |             self.fetch_env.env._get_viewer(mode=mode).render()
 50 |             width, height = 3350, 1800
 51 |             data = self.fetch_env.env._get_viewer().read_pixels(width, height, depth=False)
 52 |             # original image is upside-down, so flip it
 53 |             return data[::-1, :, :]
 54 |         elif mode == 'human':
 55 |             self.fetch_env.env._get_viewer().render()
 56 | 
 57 |         return self.fetch_env.render(*args, **kwargs)
 58 | 
 59 |     def compute_reward(self, *args, **kwargs):
 60 |         return self.fetch_env.compute_reward(*args, **kwargs)
 61 | 
 62 | class SlipperyPushEnv(gym.Env):
 63 | 
 64 |     def __init__(self, *args, **kwargs):
 65 |         self.fetch_env = gym.make("FetchPush-v1")
 66 | 
 67 |         for i in range(len(self.fetch_env.env.sim.model.geom_friction)):
 68 |             self.fetch_env.env.sim.model.geom_friction[i] = [18e-2, 5.e-3, 1e-4]
 69 | 
 70 |         self.metadata = self.fetch_env.metadata
 71 | 
 72 |         self.action_space = self.fetch_env.action_space
 73 |         self.observation_space = self.fetch_env.observation_space 
 74 | 
 75 |     def step(self, action):
 76 |         return self.fetch_env.step(action)
 77 | 
 78 |     def reset(self):
 79 |         return self.fetch_env.reset()
 80 | 
 81 |     def seed(self, seed=0):
 82 |         return self.fetch_env.seed(seed=seed)
 83 | 
 84 |     def render(self, mode="human", *args, **kwargs):
 85 |         # See https://github.com/openai/gym/issues/1081
 86 |         self.fetch_env.env._render_callback()
 87 |         if mode == 'rgb_array':
 88 |             self.fetch_env.env._get_viewer().render()
 89 |             width, height = 3350, 1800
 90 |             data = self.fetch_env.env._get_viewer().read_pixels(width, height, depth=False)
 91 |             # original image is upside-down, so flip it
 92 |             return data[::-1, :, :]
 93 |         elif mode == 'human':
 94 |             self.fetch_env.env._get_viewer().render()
 95 | 
 96 |         return self.fetch_env.render(*args, **kwargs)
 97 | 
 98 |     def compute_reward(self, *args, **kwargs):
 99 |         return self.fetch_env.compute_reward(*args, **kwargs)
100 | 
101 | 


--------------------------------------------------------------------------------
/common/adr/adr.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from common.discriminator.discriminator_rewarder import DiscriminatorRewarder
  4 | from common.svpg.svpg import SVPG
  5 | 
  6 | class ADR:
  7 |     def __init__(
  8 |         self,
  9 |         nparticles,
 10 |         nparams,
 11 |         state_dim,
 12 |         action_dim,
 13 |         temperature,
 14 |         svpg_rollout_length,
 15 |         svpg_horizon,
 16 |         max_step_length,
 17 |         reward_scale,
 18 |         initial_svpg_steps,
 19 |         seed,
 20 |         discriminator_batchsz
 21 |     ):
 22 |         assert nparticles > 2
 23 | 
 24 |         self.nparticles = nparticles
 25 |         self.nparams = nparams
 26 | 
 27 |         self.svpg_rollout_length = svpg_rollout_length
 28 |         self.svpg_horizon = svpg_horizon
 29 |         self.initial_svpg_steps = initial_svpg_steps
 30 | 
 31 |         self.seed = seed
 32 |         self.svpg_timesteps = 0
 33 | 
 34 |         self.discriminator_rewarder = DiscriminatorRewarder(
 35 |             state_dim=state_dim,
 36 |             action_dim=action_dim,
 37 |             discriminator_batchsz=discriminator_batchsz,
 38 |             reward_scale=reward_scale,
 39 |         )
 40 | 
 41 |         self.svpg = SVPG(
 42 |             nparticles=nparticles,
 43 |             nparams=self.nparams,
 44 |             max_step_length=max_step_length,
 45 |             svpg_rollout_length=svpg_rollout_length,
 46 |             svpg_horizon=svpg_horizon,
 47 |             temperature=temperature,
 48 |         )
 49 | 
 50 |         self.parameter_settings = np.ones(
 51 |             (self.nparticles,
 52 |             self.svpg_horizon,
 53 |             self.svpg.svpg_rollout_length,
 54 |             self.svpg.nparams)
 55 |         ) * -1
 56 | 
 57 |     def score_trajectories(self, randomized_trajectories):
 58 |         rewards = np.zeros((self.nparticles, self.svpg.svpg_rollout_length))
 59 |  
 60 |         for i in range(self.nparticles):
 61 |             for t in range(self.svpg.svpg_rollout_length):
 62 |                 # flatten and combine all randomized and reference trajectories for discriminator
 63 |                 randomized_discrim_score_mean, _, _ = \
 64 |                     self.discriminator_rewarder.get_score(randomized_trajectories[i][t])
 65 | 
 66 |                 rewards[i][t] = randomized_discrim_score_mean
 67 | 
 68 |         return rewards
 69 |             
 70 |     def step_particles(self):
 71 |         if self.svpg_timesteps >= self.initial_svpg_steps:
 72 |             # Get sim instances from SVPG policy
 73 |             simulation_instances = self.svpg.step()
 74 | 
 75 |             index = self.svpg_timesteps % self.svpg_horizon
 76 |             self.parameter_settings[:, index, :, :] = simulation_instances
 77 | 
 78 |         else:
 79 |             # Creates completely randomized environment
 80 |             simulation_instances = np.ones((self.nparticles,
 81 |                                             self.svpg.svpg_rollout_length,
 82 |                                             self.svpg.nparams)) * -1
 83 | 
 84 |         assert (self.nparticles, self.svpg.svpg_rollout_length, self.svpg.nparams) \
 85 |             == simulation_instances.shape
 86 | 
 87 |         # Reshape to work with vectorized environments
 88 |         simulation_instances = np.transpose(simulation_instances, (1, 0, 2))
 89 | 
 90 |         self.svpg_timesteps += 1
 91 |         return simulation_instances
 92 | 
 93 |     def train(self, reference_trajectories, randomized_trajectories):
 94 |         rewards = self.score_trajectories(randomized_trajectories)
 95 |         self._train_particles(rewards)
 96 |         self._train_discriminator(reference_trajectories, randomized_trajectories)
 97 | 
 98 |     def _train_discriminator(self, reference_trajectories, randomized_trajectories):
 99 |         flattened_randomized = [randomized_trajectories[i][t] for i in range(self.nagents)]
100 |         flattened_randomized = np.concatenate(flattened_randomized)
101 | 
102 |         flattened_reference = [reference_trajectories[i][t] for i in range(self.nagents)]
103 |         flattened_reference = np.concatenate(flattened_reference)
104 | 
105 |         self.discriminator_rewarder.train_discriminator(
106 |             flattened_reference, 
107 |             flattened_randomized,
108 |             iterations=len(flattened_randomized)
109 |         )
110 |     
111 |     def _train_particles(self, rewards):
112 |         self.svpg.train(rewards)
113 | 
114 |     


--------------------------------------------------------------------------------
/common/randomizer/assets/shared.xml:
--------------------------------------------------------------------------------
 1 | <mujoco>
 2 |     <asset>
 3 |         <texture type="skybox" builtin="gradient" rgb1="0.44 0.85 0.56" rgb2="0.46 0.87 0.58" width="32" height="32"></texture>
 4 |         <texture name="texture_block" file="block.png" gridsize="3 4" gridlayout=".U..LFRB.D.."></texture>
 5 | 
 6 |         <material name="floor_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 1"></material>
 7 |         <material name="table_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.93 0.93 0.93 1"></material>
 8 |         <material name="block_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 1"></material>
 9 |         <material name="puck_mat" specular="0" shininess="0.5" reflectance="0" rgba="0.2 0.2 0.2 1"></material>
10 |         <material name="robot0:geomMat" shininess="0.03" specular="0.4"></material>
11 |         <material name="robot0:gripper_finger_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
12 |         <material name="robot0:gripper_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
13 |         <material name="robot0:arm_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
14 |         <material name="robot0:head_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
15 |         <material name="robot0:torso_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
16 |         <material name="robot0:base_mat" shininess="0.03" specular="0.4" reflectance="0"></material>
17 |         
18 |         <mesh file="base_link_collision.stl" name="robot0:base_link"></mesh>
19 |         <mesh file="bellows_link_collision.stl" name="robot0:bellows_link"></mesh>
20 |         <mesh file="elbow_flex_link_collision.stl" name="robot0:elbow_flex_link"></mesh>
21 |         <mesh file="estop_link.stl" name="robot0:estop_link"></mesh>
22 |         <mesh file="forearm_roll_link_collision.stl" name="robot0:forearm_roll_link"></mesh>
23 |         <mesh file="gripper_link.stl" name="robot0:gripper_link"></mesh>
24 |         <mesh file="head_pan_link_collision.stl" name="robot0:head_pan_link"></mesh>
25 |         <mesh file="head_tilt_link_collision.stl" name="robot0:head_tilt_link"></mesh>
26 |         <mesh file="l_wheel_link_collision.stl" name="robot0:l_wheel_link"></mesh>
27 |         <mesh file="laser_link.stl" name="robot0:laser_link"></mesh>
28 |         <mesh file="r_wheel_link_collision.stl" name="robot0:r_wheel_link"></mesh>
29 |         <mesh file="torso_lift_link_collision.stl" name="robot0:torso_lift_link"></mesh>
30 |         <mesh file="shoulder_pan_link_collision.stl" name="robot0:shoulder_pan_link"></mesh>
31 |         <mesh file="shoulder_lift_link_collision.stl" name="robot0:shoulder_lift_link"></mesh>
32 |         <mesh file="upperarm_roll_link_collision.stl" name="robot0:upperarm_roll_link"></mesh>
33 |         <mesh file="wrist_flex_link_collision.stl" name="robot0:wrist_flex_link"></mesh>
34 |         <mesh file="wrist_roll_link_collision.stl" name="robot0:wrist_roll_link"></mesh>
35 |         <mesh file="torso_fixed_link.stl" name="robot0:torso_fixed_link"></mesh>
36 |     </asset>
37 | 
38 |     <equality>
39 |         <weld body1="robot0:mocap" body2="robot0:gripper_link" solimp="0.9 0.95 0.001" solref="0.02 1"></weld>
40 |     </equality>
41 |     
42 |     <contact>
43 |         <exclude body1="robot0:r_gripper_finger_link" body2="robot0:l_gripper_finger_link"></exclude>
44 |         <exclude body1="robot0:torso_lift_link" body2="robot0:torso_fixed_link"></exclude>
45 |         <exclude body1="robot0:torso_lift_link" body2="robot0:shoulder_pan_link"></exclude>
46 |     </contact>
47 |     
48 |     <default>
49 |         <default class="robot0:fetch">
50 |             <geom margin="0.001" material="robot0:geomMat" rgba="1 1 1 1" solimp="0.99 0.99 0.01" solref="0.01 1" type="mesh" user="0"></geom>
51 |             <joint armature="1" damping="50" frictionloss="0" stiffness="0"></joint>
52 |             
53 |             <default class="robot0:fetchGripper">
54 |                 <geom condim="4" margin="0.001" type="box" user="0" rgba="0.356 0.361 0.376 1.0"></geom>
55 |                 <joint armature="100" damping="1000" limited="true" solimplimit="0.99 0.999 0.01" solreflimit="0.01 1" type="slide"></joint>
56 |             </default>
57 | 
58 |             <default class="robot0:grey">
59 |                 <geom rgba="0.356 0.361 0.376 1.0"></geom>
60 |             </default>
61 |             <default class="robot0:blue">
62 |                 <geom rgba="0.086 0.506 0.767 1.0"></geom>
63 |             </default>
64 |         </default>
65 |     </default>
66 | </mujoco>
67 | 


--------------------------------------------------------------------------------
/common/svpg/svpg_utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import os
  4 | 
  5 | 
  6 | def parameters_to_vector(parameters, grad=False, both=False):
  7 |     """Convert parameters or/and their gradients to one vector
  8 |     Arguments:
  9 |         parameters (Iterable[Variable]): an iterator of Variables that are the
 10 |             parameters of a model.
 11 |         grad (bool): Vectorizes gradients if true, otherwise vectorizes params
 12 |         both (bool): If True, vectorizes both parameters and their gradients,
 13 |             `grad` has no effect in this case. Otherwise vectorizes parameters
 14 |             or gradients according to `grad`.
 15 |     Returns:
 16 |         The parameters or/and their gradients (each) represented by a single
 17 |         vector (th.Tensor, not Variable)
 18 |     """
 19 |     # Flag for the device where the parameter is located
 20 |     param_device = None
 21 | 
 22 |     if not both:
 23 |         vec = []
 24 |         if not grad:
 25 |             for param in parameters:
 26 |                 # Ensure the parameters are located in the same device
 27 |                 param_device = _check_param_device(param, param_device)
 28 |                 vec.append(param.data.view(-1))
 29 |         else:
 30 |             for param in parameters:
 31 |                 param_device = _check_param_device(param, param_device)
 32 |                 vec.append(param.grad.data.view(-1))
 33 |         return torch.cat(vec)
 34 |     else:
 35 |         vec_params, vec_grads = [], []
 36 |         for param in parameters:
 37 |             param_device = _check_param_device(param, param_device)
 38 |             vec_params.append(param.data.view(-1))
 39 |             vec_grads.append(param.grad.data.view(-1))
 40 |         return torch.cat(vec_params), torch.cat(vec_grads)
 41 | 
 42 | def vector_to_parameters(vec, parameters, grad=True):
 43 |     """Convert one vector to the parameters or gradients of the parameters
 44 |     Arguments:
 45 |         vec (torch.Tensor): a single vector represents the parameters of a model.
 46 |         parameters (Iterable[Variable]): an iterator of Variables that are the
 47 |             parameters of a model.
 48 |         grad (bool): True for assigning de-vectorized `vec` to gradients
 49 |     """
 50 |     # Ensure vec of type Variable
 51 |     if not isinstance(vec, torch.cuda.FloatTensor):
 52 |         raise TypeError('expected torch.Tensor, but got: {}'
 53 |                         .format(torch.typename(vec)))
 54 |     # Flag for the device where the parameter is located
 55 |     param_device = None
 56 | 
 57 |     # Pointer for slicing the vector for each parameter
 58 |     pointer = 0
 59 |     if grad:
 60 |         for param in parameters:
 61 |             # Ensure the parameters are located in the same device
 62 |             param_device = _check_param_device(param, param_device)
 63 |             # The length of the parameter
 64 |             num_param = torch.prod(torch.LongTensor(list(param.size())))
 65 |             param.grad.data = vec[pointer:pointer + num_param].view(
 66 |                 param.size())
 67 |             # Increment the pointer
 68 |             pointer += num_param
 69 |     else:
 70 |         for param in parameters:
 71 |             # Ensure the parameters are located in the same device
 72 |             param_device = _check_param_device(param, param_device)
 73 |             # The length of the parameter
 74 |             num_param = torch.prod(torch.LongTensor(list(param.size())))
 75 |             param.data = vec[pointer:pointer + num_param].view(
 76 |                 param.size())
 77 |             # Increment the pointer
 78 |             pointer += num_param
 79 | 
 80 | 
 81 | def _check_param_device(param, old_param_device):
 82 |     """This helper function is to check if the parameters are located
 83 |     in the same device. Currently, the conversion between model parameters
 84 |     and single vector form is not supported for multiple allocations,
 85 |     e.g. parameters in different GPUs, or mixture of CPU/GPU.
 86 |     Arguments:
 87 |         param ([Variable]): a Variable of a parameter of a model
 88 |         old_param_device (int): the device where the first parameter of a
 89 |                                 model is allocated.
 90 |     Returns:
 91 |         old_param_device (int): report device for the first time
 92 |     """
 93 | 
 94 |     # Meet the first parameter
 95 |     if old_param_device is None:
 96 |         old_param_device = param.get_device() if param.is_cuda else -1
 97 |     else:
 98 |         warn = False
 99 |         if param.is_cuda:  # Check if in same GPU
100 |             warn = (param.get_device() != old_param_device)
101 |         else:  # Check if in CPU
102 |             warn = (old_param_device != -1)
103 |         if warn:
104 |             raise TypeError('Found two parameters on different devices, '
105 |                             'this is currently not supported.')
106 |     return old_param_device


--------------------------------------------------------------------------------
/arguments.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | """
 4 | Here are the param for the training
 5 | 
 6 | """
 7 | def get_args():
 8 |     parser = argparse.ArgumentParser()
 9 |     # the environment setting
10 |     parser.add_argument('--n-epochs', type=int, default=50, help='the number of epochs to train the agent')
11 |     parser.add_argument('--n-cycles', type=int, default=50, help='the times to collect samples per epoch')
12 |     parser.add_argument('--n-batches', type=int, default=40, help='the times to update the network')
13 |     parser.add_argument('--save-interval', type=int, default=5, help='the interval that save the trajectory')
14 |     parser.add_argument('--seed', type=int, default=123, help='random seed')
15 |     parser.add_argument('--num-workers', type=int, default=1, help='the number of cpus to collect samples')
16 |     parser.add_argument('--replay-strategy', type=str, default='future', help='the HER strategy')
17 |     parser.add_argument('--clip-return', type=float, default=50, help='if clip the returns')
18 |     parser.add_argument('--save-dir', type=str, default='saved_models/', help='the path to save the models')
19 |     parser.add_argument('--noise-eps', type=float, default=0.2, help='noise eps')
20 |     parser.add_argument('--random-eps', type=float, default=0.3, help='random eps')
21 |     parser.add_argument('--buffer-size', type=int, default=int(1e6), help='the size of the buffer')
22 |     parser.add_argument('--replay-k', type=int, default=4, help='ratio to be replace')
23 |     parser.add_argument('--clip-obs', type=float, default=200, help='the clip ratio')
24 |     parser.add_argument('--batch-size', type=int, default=256, help='the sample batch size')
25 |     parser.add_argument('--gamma', type=float, default=0.98, help='the discount factor')
26 |     parser.add_argument('--action-l2', type=float, default=1, help='l2 reg')
27 |     parser.add_argument('--lr-actor', type=float, default=0.001, help='the learning rate of the actor')
28 |     parser.add_argument('--lr-critic', type=float, default=0.001, help='the learning rate of the critic')
29 |     parser.add_argument('--polyak', type=float, default=0.95, help='the average coefficient')
30 |     parser.add_argument('--n-test-rollouts', type=int, default=10, help='the number of tests')
31 |     parser.add_argument('--clip-range', type=float, default=5, help='the clip range')
32 |     parser.add_argument('--demo-length', type=int, default=20, help='the demo length')
33 |     parser.add_argument('--cuda', action='store_true', help='if use gpu do the acceleration')
34 |     parser.add_argument('--num-rollouts-per-mpi', type=int, default=10, help='the rollouts per mpi')
35 |     parser.add_argument('--sp-polyak', type=float, default=0.1, help='Polyak Averaging Coefficient')
36 |     parser.add_argument('--sp-gamma', type=float, default=0.1, help='Self play gamma')
37 |     parser.add_argument('--sp-percent', type=float, default=0.1, help='Self Play Percentage')
38 |     parser.add_argument('--friction', type=float, default=0.18, help='friction parameter to set')
39 |     parser.add_argument('--approach', type=str, default='baseline', help='Different approaches for experiments')
40 |     parser.add_argument('--svpg-rollout-length', type=int, default=5)
41 |     parser.add_argument('--nmpi', type=int, default=8) # TODO!
42 |     parser.add_argument('--mode', type=str, default='default')
43 |     parser.add_argument('--window-size', type=int, default=20)
44 |     parser.add_argument("--policy-name", default="OurDDPG")  # Policy name
45 |     parser.add_argument("--env-name", default="ErgoPushRandomizedEnv-Headless-v0")  # OpenAI gym environment name
46 |     parser.add_argument("--start-timesteps", default=1e4,
47 |                         type=int)  # How many time steps purely random policy is run for
48 |     parser.add_argument("--eval-freq", default=5e3, type=float)  # How often (time steps) we evaluate
49 |     parser.add_argument("--max-timesteps", default=1e6, type=float)  # Max time steps to run environment for
50 |     parser.add_argument("--save-models", default=True, action="store_true")  # Whether or not models are saved
51 |     parser.add_argument("--expl-noise", default=0.1, type=float)  # Std of Gaussian exploration noise
52 |     parser.add_argument("--discount", default=0.99, type=float)  # Discount factor
53 |     parser.add_argument("--tau", default=0.005, type=float)  # Target network update rate
54 |     parser.add_argument("--policy-noise", default=0.2, type=float)  # Noise added to target policy during critic update
55 |     parser.add_argument("--noise-clip", default=0.5, type=float)  # Range to clip target policy noise
56 |     parser.add_argument("--policy-freq", default=2, type=int)  # Frequency of delayed policy updates
57 |     parser.add_argument("--nparticles", default=1, type=int)
58 |     parser.add_argument('--n-params', type=int, default=1)
59 |     parser.add_argument('--only-sp', type=bool, default=False)
60 | 
61 | 
62 | # Add more arguments if needed.
63 | 
64 |     args = parser.parse_args()
65 | 
66 |     return args
67 | 


--------------------------------------------------------------------------------
/common/randomizer/controllers/hook_controller.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | DEBUG = False
  4 | 
  5 | def get_move_action(observation, target_position, atol=1e-3, gain=10., close_gripper=False):
  6 |     """
  7 |     Move an end effector to a position and orientation.
  8 |     """
  9 |     # Get the currents
 10 |     current_position = observation['observation'][:3]
 11 | 
 12 |     action = gain * np.subtract(target_position, current_position)
 13 |     if close_gripper:
 14 |         gripper_action = -1.
 15 |     else:
 16 |         gripper_action = 0.
 17 |     action = np.hstack((action, gripper_action))
 18 | 
 19 |     return action
 20 | 
 21 | def block_is_grasped(obs, gripper_position, block_position, relative_grasp_position, atol=1e-3):
 22 |     block_inside = block_inside_grippers(gripper_position, block_position, relative_grasp_position, atol=atol) 
 23 |     grippers_closed = grippers_are_closed(obs, atol=atol)
 24 | 
 25 |     return block_inside and grippers_closed
 26 | 
 27 | def block_inside_grippers(gripper_position, block_position, relative_grasp_position, atol=1e-3):
 28 |     relative_position = np.subtract(gripper_position, block_position)
 29 | 
 30 |     return np.sum(np.subtract(relative_position, relative_grasp_position)**2) < atol
 31 | 
 32 | def grippers_are_closed(obs, atol=1e-3):
 33 |     gripper_state = obs['observation'][9:11]
 34 |     return abs(gripper_state[0]) - 0.024 <= atol
 35 | 
 36 | def grippers_are_open(obs, atol=1e-3):
 37 |     gripper_state = obs['observation'][9:11]
 38 | 
 39 |     return abs(gripper_state[0] - 0.05) <= atol
 40 | 
 41 | 
 42 | def pick_at_position(obs, block_position, place_position, relative_grasp_position=(0., 0., -0.02), workspace_height=0.1, atol=1e-3):
 43 |     """
 44 |     Returns
 45 |     -------
 46 |     action : [float] * 4
 47 |     """
 48 |     gripper_position = obs['observation'][:3]
 49 | 
 50 |     # If the gripper is already grasping the block
 51 |     if block_is_grasped(obs, gripper_position, block_position, relative_grasp_position, atol=atol):
 52 | 
 53 |         # If the block is already at the place position, do nothing except keep the gripper closed
 54 |         if np.sum(np.subtract(block_position, place_position)**2) < atol:
 55 |             if DEBUG:
 56 |                 print("The block is already at the place position; do nothing")
 57 |             return np.array([0., 0., 0., -1.])
 58 | 
 59 |         # Move to the place position while keeping the gripper closed
 60 |         target_position = np.add(place_position, relative_grasp_position)
 61 |         target_position[2] += workspace_height/2.
 62 |         if DEBUG:
 63 |             print("Move to above the place position")
 64 |         return get_move_action(obs, target_position, atol=atol, close_gripper=True)
 65 | 
 66 |     # If the block is ready to be grasped
 67 |     if block_inside_grippers(gripper_position, block_position, relative_grasp_position, atol=atol):
 68 | 
 69 |         # Close the grippers
 70 |         if DEBUG:
 71 |             print("Close the grippers")
 72 |         return np.array([0., 0., 0., -1.])
 73 | 
 74 |     # If the gripper is above the block
 75 |     target_position = np.add(block_position, relative_grasp_position)    
 76 |     if (gripper_position[0] - target_position[0])**2 + (gripper_position[1] - target_position[1])**2 < atol:
 77 | 
 78 |         # If the grippers are closed, open them
 79 |         if not grippers_are_open(obs, atol=atol):
 80 |             if DEBUG:
 81 |                 print("Open the grippers")
 82 |             return np.array([0., 0., 0., 1.])
 83 | 
 84 |         # Move down to grasp
 85 |         if DEBUG:
 86 |             print("Move down to grasp")
 87 |         return get_move_action(obs, target_position, atol=atol)
 88 | 
 89 | 
 90 |     # Else move the gripper to above the block
 91 |     target_position[2] += workspace_height
 92 |     if DEBUG:
 93 |         print("Move to above the block")
 94 |     return get_move_action(obs, target_position, atol=atol)
 95 | 
 96 | 
 97 | def get_hook_control(obs, atol=1e-2):
 98 |     """
 99 |     Returns
100 |     -------
101 |     action : [float] * 4
102 |     """
103 |     gripper_position = obs['observation'][:3]
104 |     block_position = obs['observation'][3:6]
105 |     hook_position = obs['observation'][25:28]
106 |     place_position = obs['desired_goal']
107 | 
108 |     # Done
109 |     if abs(block_position[0] - place_position[0]) + abs(block_position[1] - place_position[1]) <= 1e-2:
110 |         if DEBUG:
111 |             print("DONE")
112 |         return np.array([0., 0., 0., -1.])
113 | 
114 |     # Grasp and lift the hook
115 |     if not block_is_grasped(obs, gripper_position, hook_position, relative_grasp_position=(0., 0., -0.05), atol=atol):
116 |         if DEBUG:
117 |             print("Grasping and lifting the hook")
118 |         hook_target = hook_position.copy()
119 |         hook_target[2] = 0.5
120 |         return pick_at_position(obs, hook_position, hook_target, relative_grasp_position=(0., 0., -0.05))
121 | 
122 |     # Align the hook to sweep
123 |     hook_target = np.array([block_position[0] - 0.5, block_position[1] - 0.05, 0.45])
124 | 
125 |     if hook_position[0] >= hook_target[0] + 0.1 or hook_position[1] + 0.1 <= hook_target[1]:
126 |         if DEBUG:
127 |             print("Aligning to sweep", hook_position[0] + atol, hook_target[0], hook_position[1] + atol, hook_target[1])
128 |         return get_move_action(obs, hook_target, close_gripper=True)
129 | 
130 |     if DEBUG:
131 |         print("Sweeping back")
132 | 
133 |     direction = np.subtract(place_position, block_position)
134 |     direction = direction[:2] / np.linalg.norm(direction[:2])
135 | 
136 |     return np.array([0.4 * direction[0], 0.4 * direction[1], 0., -1.])
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/realrobot_evals/realrobot_test.py:
--------------------------------------------------------------------------------
  1 | import h5py
  2 | import torch
  3 | import time
  4 | import numpy as np
  5 | import gym
  6 | import argparse
  7 | import os
  8 | import os.path as osp
  9 | from tqdm import tqdm, trange
 10 | 
 11 | from common.agents.ddpg.ddpg import DDPG
 12 | import gym_ergojr
 13 | 
 14 | parser = argparse.ArgumentParser(description='Real Robot Experiment Driver')
 15 | 
 16 | parser.add_argument('--nepisodes', type=int, default=25, help='Number of trials per *seed*')
 17 | parser.add_argument('--experiment-prefix', type=str, default='real', help='Prefix to append to logs')
 18 | parser.add_argument('--log-dir', type=str, default='/data/fgolemo/UADR-results/real-robot', help='Log Directory Prefix')
 19 | parser.add_argument('--model-dir', type=str, default='saved_models/', help='Model Directory Prefix')  # TODO
 20 | parser.add_argument('--cont', type=str, default='190329-180631', help='To continue existing file, enter timestamp here')
 21 | parser.add_argument('--cont', type=str, default='', help='To continue existing file, enter timestamp here')
 22 | 
 23 | args = parser.parse_args()
 24 | 
 25 | if len(args.cont) == 0:
 26 |     TIMESTAMP = time.strftime("%y%m%d-%H%M%S")
 27 |     file_flag = "w"
 28 | 
 29 | else:
 30 |     TIMESTAMP = args.cont
 31 |     file_flag = "r+"
 32 | 
 33 | file_path = "{}/{}-{}.hdf5".format(args.log_dir, args.experiment_prefix, TIMESTAMP)
 34 | 
 35 | MAX_EPISODE_STEPS = 100
 36 | EPISODES = args.nepisodes
 37 | SEED = ["31", "32", "33", "34", "35"]
 38 | # Policies to look for
 39 | policies = ['udr', 'adr-1.0', 'baseline-sp-1.0']
 40 | 
 41 | env_name = "ErgoReacher-MultiGoal-Live-v1"
 42 | npa = np.array
 43 | 
 44 | img_buffer = []
 45 | 
 46 | if not osp.exists(args.log_dir):
 47 |     os.makedirs(args.log_dir)
 48 | 
 49 | 
 50 | env = gym.make(env_name)
 51 | state_dim = env.observation_space.shape[0]
 52 | action_dim = env.action_space.shape[0]
 53 | 
 54 | n_episodes = 3  # num of episodes to run
 55 | max_timesteps = 100  # max timesteps in one episode
 56 | render = True  # render the environment
 57 | save_gif = False  # png images are saved in gif folder
 58 | 
 59 | state_dim = env.observation_space.shape[0]
 60 | action_dim = env.action_space.shape[0]
 61 | max_action = float(env.action_space.high[0])
 62 | goal_dim = 2
 63 | 
 64 | policy = DDPG(args, state_dim, action_dim, max_action, goal_dim)
 65 | 
 66 | obs = env.reset()
 67 | 
 68 | 
 69 | with h5py.File(file_path, file_flag) as f:
 70 |     for policy_type in tqdm(policies, desc="approaches"):
 71 |         for torque_idx, torque in enumerate(tqdm(SEED, desc="torques...")):
 72 | 
 73 |             model_path = osp.join(args.model_dir, policy_type, f"Variant-{torque}")
 74 |             print(model_path)
 75 |             no_models = len(os.listdir(model_path))
 76 | 
 77 |             if policy_type not in f:  # if dataset doesn't have these tables
 78 |                 log_group = f.create_group(policy_type)
 79 |                 rewards = log_group.create_dataset("rewards", (no_models, len(SEED), EPISODES, MAX_EPISODE_STEPS),
 80 |                                                    dtype=np.float32)
 81 |                 distances = log_group.create_dataset("distances", (no_models, len(SEED), EPISODES, MAX_EPISODE_STEPS),
 82 |                                                      dtype=np.float32)
 83 |                 trajectories = log_group.create_dataset("trajectories",
 84 |                                                         (no_models, len(SEED), EPISODES, MAX_EPISODE_STEPS, 24),
 85 |                                                         dtype=np.float32)
 86 |                 imgs = log_group.create_dataset("images",
 87 |                                                 (no_models, len(SEED), EPISODES, MAX_EPISODE_STEPS, 480, 640, 3),
 88 |                                                 dtype=np.uint8, compression="lzf")
 89 |             else:  # if tables are in dataset, grab their pointers
 90 |                 rewards = f.get(f"/{policy_type}/rewards")
 91 |                 distances = f.get(f"/{policy_type}/distances")
 92 |                 trajectories = f.get(f"/{policy_type}/trajectories")
 93 |                 imgs = f.get(f"/{policy_type}/images")
 94 | 
 95 |             tqdm.write(f'Starting analysis of {policy_type} - variant {torque}')
 96 | 
 97 |             for model_idx, actorpth in enumerate(tqdm(os.listdir(model_path), desc="models....")):
 98 |                 # Load model weights
 99 |                 policy.load(os.path.join(model_path, actorpth))
100 | 
101 |                 for ep_num in trange(EPISODES, desc="episodes.."):
102 |                     non_zero_steps = np.count_nonzero(trajectories[model_idx, torque_idx, ep_num], axis=1)
103 | 
104 |                     if np.count_nonzero(non_zero_steps) == 0:
105 |                         obs = env.reset()
106 |                         done = False
107 |                         cumulative = 0
108 |                         counter = 0
109 |                         img_buffer = []
110 |                         while counter < MAX_EPISODE_STEPS:
111 |                             with torch.no_grad():
112 |                                 # print(counter, obs)
113 |                                 action = policy.select_action(np.array(obs))
114 | 
115 |                             nobs, reward, _, misc = env.step(action)
116 |                             cumulative += reward
117 | 
118 |                             trajectories[model_idx, torque_idx, ep_num, counter, :] = np.concatenate(
119 |                                 [obs, action, nobs])
120 |                             rewards[model_idx, torque_idx, ep_num, counter] = reward
121 |                             distances[model_idx, torque_idx, ep_num, counter] = misc["distance"]
122 |                             img_buffer.append(np.copy(misc["img"]))
123 | 
124 |                             obs = np.copy(nobs)
125 |                             counter += 1
126 | 
127 |                         imgs[model_idx, torque_idx, ep_num, :counter, :, :, :] = img_buffer
128 | 
129 |                     f.flush()
130 | 
131 |                 env.reset()
132 | 


--------------------------------------------------------------------------------
/common/randomizer/__init__.py:
--------------------------------------------------------------------------------
  1 | from gym.envs.registration import register
  2 | import os.path as osp
  3 | 
  4 | 
  5 | register(
  6 |     id='FetchPushRandomizedEnv-v0',
  7 |     entry_point='common.randomizer.randomized_fetchpush:FetchPushRandomizedEnv',
  8 |     max_episode_steps=50,
  9 |     kwargs={
 10 |         'config': 'common/randomizer/config/FetchPushRandomized/random.json',
 11 |         'xml_name': 'push.xml'
 12 |     }
 13 | )
 14 | register(
 15 |     id='FetchSlideRandomizedEnv-v0',
 16 |     entry_point='common.randomizer.randomized_fetchslide:FetchSlideRandomizedEnv',
 17 |     max_episode_steps=50,
 18 |     kwargs={
 19 |         'config': 'common/randomizer/config/FetchSlideRandomized/random.json',
 20 |         'xml_name': 'slide.xml'
 21 |     }
 22 | )
 23 | 
 24 | register(
 25 |     id='FetchHookRandomizedEnv-v0',
 26 |     entry_point='common.randomizer.randomized_residual_hook:FetchHookEnv',
 27 |     max_episode_steps=100,
 28 |     kwargs={
 29 |         'config': 'common/randomizer/config/FetchHookRandomized/random.json',
 30 |         'xml_name': 'hook.xml'
 31 |     }
 32 | )
 33 | register(
 34 |     id='FetchHookDefaultEnv-v0',
 35 |     entry_point='common.randomizer.randomized_residual_hook:FetchHookEnv',
 36 |     max_episode_steps=100,
 37 |     kwargs={
 38 |         'config': 'common/randomizer/config/FetchHookRandomized/default.json',
 39 |         'xml_name': 'hook.xml'
 40 |     }
 41 | )
 42 | register(
 43 |     id='NoisyFetchHookRandomizedEnv-v0',
 44 |     entry_point='common.randomizer.randomized_residual_hook:NoisyResidualFetchHookEnv',
 45 |     max_episode_steps=100,
 46 |     kwargs={
 47 |         'config': 'common/randomizer/config/FetchHookRandomized/random.json',
 48 |         'xml_name': 'hook.xml'
 49 |     }
 50 | )
 51 | register(
 52 |     id='NoisyFetchHookDefaultEnv-v0',
 53 |     entry_point='common.randomizer.randomized_residual_hook:NoisyResidualFetchHookEnv',
 54 |     max_episode_steps=100,
 55 |     kwargs={
 56 |         'config': 'common/randomizer/config/FetchHookRandomized/default.json',
 57 |         'xml_name': 'hook.xml'
 58 |     }
 59 | )
 60 | register(
 61 |     id='ErgoPushRandomizedEnv-Headless-v0',
 62 |     entry_point='common.randomizer.randomized_ergo_pusher:ErgoPusherRandomizedEnv',
 63 |     max_episode_steps=100,
 64 |     reward_threshold=0,
 65 |     kwargs={
 66 |         'config': 'common/randomizer/config/ErgoPushRandomized/random.json',
 67 |         'headless': True
 68 |     }
 69 | )
 70 | register(
 71 |     id='ErgoPushDefaultEnv-Headless-v0',
 72 |     entry_point='common.randomizer.randomized_ergo_pusher:ErgoPusherRandomizedEnv',
 73 |     max_episode_steps=100,
 74 |     reward_threshold=0,
 75 |     kwargs={
 76 |         'config': 'common/randomizer/config/ErgoPushRandomized/default.json',
 77 |         'headless': True
 78 |     }
 79 | )
 80 | register(
 81 |     id='ErgoPushHardEnv-Headless-v0',
 82 |     entry_point='common.randomizer.randomized_ergo_pusher:ErgoPusherRandomizedEnv',
 83 |     max_episode_steps=100,
 84 |     reward_threshold=0,
 85 |     kwargs={
 86 |         'config': 'common/randomizer/config/ErgoPushRandomized/hard.json',
 87 |         'headless': True
 88 |     }
 89 | )
 90 | register(
 91 |     id='ErgoReacherRandomizedEnv-Headless-v0',
 92 |     entry_point='common.randomizer.randomized_ergoreacher:ErgoReacherRandomizedEnv',
 93 |     max_episode_steps=100,
 94 |     reward_threshold=0,
 95 |     kwargs={
 96 |         'config': 'common/randomizer/config/ErgoReacherRandomized/random.json',
 97 |         'headless': True,
 98 |         'simple': True,
 99 |         'goal_halfsphere': True,
100 |         'multi_goal': True
101 |     }
102 | )
103 | register(
104 |     id='ErgoReacherDefaultEnv-Headless-v0',
105 |     entry_point='common.randomizer.randomized_ergoreacher:ErgoReacherRandomizedEnv',
106 |     max_episode_steps=100,
107 |     kwargs={
108 |         'config': 'common/randomizer/config/ErgoReacherRandomized/default.json',
109 |         'headless': True,
110 |         'simple': True,
111 |         'goal_halfsphere': True,
112 |         'multi_goal': True
113 |     }
114 | )
115 | register(
116 |     id='ErgoReacherHardEnv-Headless-v0',
117 |     entry_point='common.randomizer.randomized_ergoreacher:ErgoReacherRandomizedEnv',
118 |     max_episode_steps=100,
119 |     kwargs={
120 |         'config': 'common/randomizer/config/ErgoReacherRandomized/hard.json',
121 |         'headless': True,
122 |         'simple': True,
123 |         'goal_halfsphere': True,
124 |         'multi_goal': True
125 |     }
126 | )
127 | register(
128 |     id='ErgoReacherDefaultEnv-Graphical-v0',
129 |     entry_point='common.randomizer.randomized_ergoreacher:ErgoReacherRandomizedEnv',
130 |     max_episode_steps=100,
131 |     kwargs={
132 |         'config': 'common/randomizer/config/ErgoReacherRandomized/hard.json',
133 |         'headless': False,
134 |         'simple': True,
135 |         'goal_halfsphere': True,
136 |         'multi_goal': True
137 |     }
138 | )
139 | register(
140 |     id='ErgoPushRandomizedEnv-Graphical-v0',
141 |     entry_point='common.randomizer.randomized_ergo_pusher:ErgoPusherRandomizedEnv',
142 |     max_episode_steps=100,
143 |     reward_threshold=0,
144 |     kwargs={
145 |         'config': 'common/randomizer/config/ErgoPushRandomized/random.json',
146 |         'headless': False
147 |     }
148 | )
149 | register(
150 |     id='ErgoPushHardEnv-Graphical-v0',
151 |     entry_point='common.randomizer.randomized_ergo_pusher:ErgoPusherRandomizedEnv',
152 |     max_episode_steps=100,
153 |     reward_threshold=0,
154 |     kwargs={
155 |         'config': 'common/randomizer/config/ErgoPushRandomized/hard.json',
156 |         'headless': False
157 |     }
158 | )
159 | 
160 | register(
161 |     id='ResFetchPushRandomizedEnv-v0',
162 |     entry_point='common.randomizer.randomized_residual_push:ResidualPushRandomizedEnv',
163 |     max_episode_steps=50,
164 |     kwargs={
165 |         'config': 'common/randomizer/config/FetchPushRandomized/random.json',
166 |         'xml_name': 'push.xml'
167 |     }
168 | )
169 | register(
170 |     id='ResFetchPushDefaultEnv-v0',
171 |     entry_point='common.randomizer.randomized_residual_push:ResidualPushRandomizedEnv',
172 |     max_episode_steps=50,
173 |     kwargs={
174 |         'config': 'common/randomizer/config/FetchPushRandomized/default.json',
175 |         'xml_name': 'push.xml'
176 |     }
177 | )


--------------------------------------------------------------------------------
/common/randomizer/assets/pusher.xml:
--------------------------------------------------------------------------------
  1 | <mujoco model="arm3d">
  2 |   <compiler inertiafromgeom="true" angle="radian" coordinate="local"/>
  3 |   <option timestep="0.01" gravity="0 0 0" iterations="20" integrator="Euler" />
  4 |   
  5 |   <default>
  6 |     <joint armature='0.04' damping="1" limited="true"/>
  7 |     <geom friction=".8 .1 .1" density="300" margin="0.002" condim="1" contype="0" conaffinity="0"/>
  8 |   </default>
  9 |   <asset>
 10 |     <texture type="skybox" builtin="checker" rgb1="1 1 1" rgb2="1 1 1"
 11 |              width="256" height="256"/>
 12 |     <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
 13 |     <texture name="texplane" type="2d" builtin="checker" rgb1=".5 .5 .5" rgb2=".5 .5 .5" width="100" height="100" />
 14 |     <texture name="texplane_show" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.9 0.9 0.9" width="100" height="100" />
 15 |     <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1"  reflectance="0.5" />
 16 |     <material name='geom' texture="texgeom" texuniform="true" />
 17 |   </asset>
 18 | 
 19 |   <worldbody>
 20 |     <light diffuse=".5 .5 .5" pos="0 0 3" dir="0 0 -1"/>
 21 |     <geom name="table" type="plane" pos="0 0.5 -0.325" size="1 1 0.1" contype="1" conaffinity="1"/>
 22 | 
 23 |     <body name="r_shoulder_pan_link" pos="0 -0.6 0">
 24 |       <geom name="e1" type="sphere" rgba="0.6 0.6 0.6 1" pos="-0.06 0.05 0.2" size="0.05" />
 25 |       <geom name="e2" type="sphere" rgba="0.6 0.6 0.6 1" pos=" 0.06 0.05 0.2" size="0.05" />
 26 |       <geom name="e1p" type="sphere" rgba="0.1 0.1 0.1 1" pos="-0.06 0.09 0.2" size="0.03" />
 27 |       <geom name="e2p" type="sphere" rgba="0.1 0.1 0.1 1" pos=" 0.06 0.09 0.2" size="0.03" />
 28 |       <geom name="sp" type="capsule" fromto="0 0 -0.4 0 0 0.2" size="0.1" />
 29 |       <!--<joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 1.714602" damping="1.0" />-->
 30 |       <joint name="r_shoulder_pan_joint" type="hinge" pos="0 0 0" axis="0 0 1" range="-2.2854 2.0" damping="1.0" />
 31 | 
 32 |       <body name="r_shoulder_lift_link" pos="0.1 0 0">
 33 |         <geom name="sl" type="capsule" fromto="0 -0.1 0 0 0.1 0" size="0.1" />
 34 |         <joint name="r_shoulder_lift_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-0.5236 1.3963" damping="1.0" />
 35 | 
 36 |         <body name="r_upper_arm_roll_link" pos="0 0 0">
 37 |           <geom name="uar" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
 38 |           <joint name="r_upper_arm_roll_joint" type="hinge" pos="0 0 0" axis="1 0 0" range="-1.5 1.7" damping="0.1" />
 39 | 
 40 |           <body name="r_upper_arm_link" pos="0 0 0">
 41 |             <geom name="ua" type="capsule" fromto="0 0 0 0.4 0 0" size="0.06" />
 42 | 
 43 |             <body name="r_elbow_flex_link" pos="0.4 0 0">
 44 |               <geom name="ef" type="capsule" fromto="0 -0.02 0 0.0 0.02 0" size="0.06" />
 45 |               <joint name="r_elbow_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-2.3213 0" damping="0.1" />
 46 | 
 47 |               <body name="r_forearm_roll_link" pos="0 0 0">
 48 |                 <geom name="fr" type="capsule" fromto="-0.1 0 0 0.1 0 0" size="0.02" />
 49 |                 <joint name="r_forearm_roll_joint" type="hinge" limited="true" pos="0 0 0" axis="1 0 0" damping=".1" range="-1.5 1.5"/>
 50 | 
 51 |                 <body name="r_forearm_link" pos="0 0 0">
 52 |                   <geom name="fa" type="capsule" fromto="0 0 0 0.291 0 0" size="0.05" />
 53 | 
 54 |                   <body name="r_wrist_flex_link" pos="0.321 0 0">
 55 |                     <geom name="wf" type="capsule" fromto="0 -0.02 0 0 0.02 0" size="0.01" />
 56 |                     <joint name="r_wrist_flex_joint" type="hinge" pos="0 0 0" axis="0 1 0" range="-1.094 0" damping=".1" />
 57 | 
 58 |                     <body name="r_wrist_roll_link" pos="0 0 0">
 59 |                       <joint name="r_wrist_roll_joint" type="hinge" pos="0 0 0" limited="true" axis="1 0 0" damping="0.1" range="-1.5 1.5"/>
 60 |                       <body name="tips_arm" pos="0 0 0">
 61 |                         <geom name="tip_arml" type="sphere" pos="0.1 -0.1 0." size="0.01" />
 62 |                         <geom name="tip_armr" type="sphere" pos="0.1 0.1 0." size="0.01" />
 63 |                       </body>
 64 |                       <geom type="capsule" fromto="0 -0.1 0. 0.0 +0.1 0" size="0.02" contype="1" conaffinity="1" />
 65 |                       <geom type="capsule" fromto="0 -0.1 0. 0.1 -0.1 0" size="0.02" contype="1" conaffinity="1" />
 66 |                       <geom type="capsule" fromto="0 +0.1 0. 0.1 +0.1 0." size="0.02" contype="1" conaffinity="1" />
 67 |                     </body>
 68 |                   </body>
 69 |                 </body>
 70 |               </body>
 71 |             </body>
 72 |           </body>
 73 |         </body>
 74 |       </body>
 75 |     </body>
 76 | 
 77 |     <!--<body name="object" pos="0.55 -0.3 -0.275" >-->
 78 |     <body name="object" pos="0.45 -0.05 -0.275" >
 79 |       <geom rgba="1 1 1 0" type="sphere" size="0.05 0.05 0.05" density="0.00001" conaffinity="0"/>
 80 |       <geom rgba="1 1 1 1" type="cylinder" size="0.05 0.05 0.05" density="0.00001" contype="1" conaffinity="0"/>
 81 |       <joint name="obj_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
 82 |       <joint name="obj_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/>
 83 |     </body>
 84 | 
 85 |     <body name="goal" pos="0.45 -0.05 -0.3230">
 86 |       <geom rgba="1 0 0 1" type="cylinder" size="0.08 0.001 0.1" density='0.00001' contype="0" conaffinity="0"/>
 87 |       <joint name="goal_slidey" type="slide" pos="0 0 0" axis="0 1 0" range="-10.3213 10.3" damping="0.5"/>
 88 |       <joint name="goal_slidex" type="slide" pos="0 0 0" axis="1 0 0" range="-10.3213 10.3" damping="0.5"/> 
 89 |     </body>
 90 |   </worldbody>
 91 | 
 92 |   <actuator>
 93 |     <motor joint="r_shoulder_pan_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
 94 |     <motor joint="r_shoulder_lift_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
 95 |     <motor joint="r_upper_arm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
 96 |     <motor joint="r_elbow_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
 97 |     <motor joint="r_forearm_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
 98 |     <motor joint="r_wrist_flex_joint" ctrlrange="-2.0 2.0" ctrllimited="true" />
 99 |     <motor joint="r_wrist_roll_joint" ctrlrange="-2.0 2.0" ctrllimited="true"/>
100 |   </actuator>
101 | </mujoco>
102 | 


--------------------------------------------------------------------------------
/experiments/ddpg_train.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import gym
  4 | import argparse
  5 | import os
  6 | import gym_ergojr
  7 | from common.agents.ddpg.replay_buffer import ReplayBuffer
  8 | from common.agents.ddpg.ddpg import DDPG
  9 | import common.randomizer
 10 | from common.randomizer.wrappers import RandomizedEnvWrapper
 11 | from common.adr.adr import ADR
 12 | import multiprocessing as mp
 13 | from arguments import get_args
 14 | 
 15 | 
 16 | def get_env_params(env):
 17 |     observation = env.reset()
 18 |     # close the environment
 19 |     params = {'obs': observation["observation"].shape[0], 'goal': observation["achieved_goal"].shape[0], 'action': env.action_space.shape[0],
 20 |               'action_max': env.action_space.high[0], 'max_timesteps': env._max_episode_steps}
 21 |     return params
 22 | 
 23 | 
 24 | args = get_args()
 25 | args.save_models = True
 26 | args.num_workers = 8
 27 | 
 28 | env = gym.make(args.env_name)
 29 | env_param = get_env_params(env)
 30 | 
 31 | # jobid = os.environ['SLURM_ARRAY_TASK_ID']
 32 | # seed = [40, 41, 42, 43, 44]
 33 | # args.seed = seed[int(jobid) - 1]  # Set seeds
 34 | 
 35 | env.seed(args.seed)
 36 | torch.manual_seed(args.seed)
 37 | np.random.seed(args.seed)
 38 | args.nparticles = mp.cpu_count() - 1
 39 | 
 40 | state_dim = env_param["obs"]
 41 | action_dim = env_param["action"]
 42 | max_action = env_param["action_max"]
 43 | goal_dim = env_param["goal"]
 44 | total_timesteps = 0
 45 | timesteps_since_eval = 0
 46 | episode_num = 0
 47 | done = True
 48 | env = RandomizedEnvWrapper(env, seed=args.seed)
 49 | env.reset()
 50 | svpg_rewards = []
 51 | 
 52 | # Initialize policy
 53 | policy = DDPG(args, state_dim, action_dim, max_action, goal_dim)
 54 | 
 55 | replay_buffer = ReplayBuffer(state_dim, action_dim)
 56 | 
 57 | # ADR integration
 58 | adr = ADR(
 59 |     nparticles=args.num_workers,
 60 |     nparams=args.n_params,
 61 |     state_dim=1,
 62 |     action_dim=1,
 63 |     temperature=10,
 64 |     svpg_rollout_length=args.svpg_rollout_length,
 65 |     svpg_horizon=25,
 66 |     max_step_length=0.05,
 67 |     reward_scale=1,
 68 |     initial_svpg_steps=0,
 69 |     seed=args.seed,
 70 |     discriminator_batchsz=320,
 71 | )
 72 | count = 0
 73 | args.save_dir = os.getcwd() + '/' + os.path.join(args.save_dir,
 74 |                               'sp{}polyak{}'.format(args.sp_percent, args.polyak) + '-' + args.approach,
 75 |                               str(args.seed))
 76 | model_path = os.path.join(args.save_dir, args.env_name)
 77 | 
 78 | if not os.path.isdir(model_path):
 79 |     os.makedirs(model_path)
 80 | args.save_dir = model_path
 81 | alice_envs = []
 82 | alice_envs_total = []
 83 | 
 84 | while total_timesteps < args.max_timesteps:
 85 |     if done:
 86 |         env.randomize([-1] * args.n_params)  # Randomize the environment
 87 |         if total_timesteps != 0:
 88 |             print("Total T: {} Episode Num: {} Episode T: {} Reward: {}".format(total_timesteps, episode_num,
 89 |                                                                                 episode_timesteps, episode_reward))
 90 |         # Reset environment
 91 |         obs = env.reset()
 92 |         done = False
 93 |         episode_reward = 0
 94 |         episode_timesteps = 0
 95 |         episode_num += 1
 96 | 
 97 |     env_settings = adr.step_particles()
 98 |     env_settings = np.ascontiguousarray(env_settings)
 99 |     count = 0
100 |     svpg_index = total_timesteps % args.svpg_rollout_length
101 | 
102 |     if np.random.random() < args.sp_percent:  # Self-play loop
103 |         env.randomize(["default"] * args.n_params)
104 | 
105 |         bobs_goal_state, alice_time = policy.alice_loop(args, env, env_param)  # Alice Loop
106 | 
107 |         if total_timesteps % int(1e4) == 0:
108 |             alice_envs_total.append(alice_envs)
109 |             alice_envs = []
110 | 
111 |         if not args.only_sp:
112 |             multiplier = np.clip(env_settings[svpg_index][0][:args.n_params], 0, 1.0)
113 |             alice_envs.append(multiplier)
114 |             env.randomize(multiplier)  # Randomize the env for bob
115 |             bob_time, done = policy.bob_loop(env, env_param, bobs_goal_state, alice_time, replay_buffer)  # Bob Loop
116 |             alice_reward = policy.train_alice(alice_time, bob_time)  # Train alice
117 |             svpg_rewards.append(alice_reward)
118 |             if len(svpg_rewards) == args.num_workers * args.svpg_rollout_length:  # ADR training
119 |                 all_rewards = np.reshape(np.asarray(svpg_rewards), (args.num_workers, args.svpg_rollout_length))
120 |                 adr._train_particles(all_rewards)
121 |                 svpg_rewards = []
122 |         else:
123 |             env.randomize(["default"] * args.n_params)
124 |             bob_time, done = policy.bob_loop(env, env_param, bobs_goal_state, alice_time, replay_buffer)  # Bob Loop with only_sp=True
125 |             alice_reward = policy.train_alice(alice_time, bob_time)
126 |     else:
127 |         observation = env.reset()
128 |         obs = observation["observation"]
129 |         # Select action randomly or according to policy
130 |         if total_timesteps < args.start_timesteps:
131 |             action = env.action_space.sample()
132 |         else:
133 |             action = policy.select_action(np.array(obs))
134 |             if args.expl_noise != 0:
135 |                 action = (action + np.random.normal(0, args.expl_noise, size=env.action_space.shape[0])).clip(
136 |                     env.action_space.low, env.action_space.high)
137 | 
138 |         # Perform action
139 |         new_obs, reward, done, _ = env.step(action)
140 |         done_bool = 0 if episode_timesteps + 1 == env_param['max_timesteps'] else float(done)
141 |         done = done or episode_timesteps + 1 == env_param['max_timesteps']
142 |         episode_reward += reward
143 | 
144 |         # Store data in replay buffer
145 |         replay_buffer.add(obs, action, new_obs["observation"], reward, done_bool)
146 |         obs = new_obs["observation"]
147 |         # Train the policy after collecting sufficient data
148 |     if total_timesteps >= args.start_timesteps:
149 |         policy.train(replay_buffer, args.batch_size)
150 | 
151 |     if timesteps_since_eval >= args.eval_freq:
152 |         timesteps_since_eval %= args.eval_freq
153 | 
154 |         if args.save_models: policy.save(f'model_{total_timesteps}',
155 |                                          directory=args.save_dir)
156 |         np.save(f"{args.save_dir}/alice_envs.npy", alice_envs_total)
157 |         print("---------------------------------------")
158 |         print("Env Name: %s | Seed : %s | sp-percent : %s" % (args.env_name, args.seed, args.sp_percent))
159 |         print("---------------------------------------")
160 | 
161 |     episode_timesteps += 1
162 |     total_timesteps += 1
163 |     timesteps_since_eval += 1
164 | 
165 | if args.save_models: policy.save(f'model_{total_timesteps}', directory=args.save_dir)
166 | 


--------------------------------------------------------------------------------
/common/agents/ddpg/ddpg.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import numpy as np
  3 | import torch
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | from common.agents.selfplay_models import AlicePolicyFetch
  7 | 
  8 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
  9 | 
 10 | 
 11 | # Re-tuned version of Deep Deterministic Policy Gradients (DDPG)
 12 | # Paper: https://arxiv.org/abs/1509.02971
 13 | 
 14 | 
 15 | class Actor(nn.Module):
 16 |     def __init__(self, state_dim, action_dim, max_action):
 17 |         super(Actor, self).__init__()
 18 | 
 19 |         self.l1 = nn.Linear(state_dim, 400)
 20 |         self.l2 = nn.Linear(400, 300)
 21 |         self.l3 = nn.Linear(300, action_dim)
 22 | 
 23 |         self.max_action = max_action
 24 | 
 25 |     def forward(self, state):
 26 |         a = F.relu(self.l1(state))
 27 |         a = F.relu(self.l2(a))
 28 |         return self.max_action * torch.tanh(self.l3(a))
 29 | 
 30 | 
 31 | class Critic(nn.Module):
 32 |     def __init__(self, state_dim, action_dim):
 33 |         super(Critic, self).__init__()
 34 | 
 35 |         self.l1 = nn.Linear(state_dim + action_dim, 400)
 36 |         self.l2 = nn.Linear(400, 300)
 37 |         self.l3 = nn.Linear(300, 1)
 38 | 
 39 |     def forward(self, state, action):
 40 |         q = F.relu(self.l1(torch.cat([state, action], 1)))
 41 |         q = F.relu(self.l2(q))
 42 |         return self.l3(q)
 43 | 
 44 | 
 45 | class DDPG(object):
 46 |     def __init__(self, args, state_dim, action_dim, max_action, goal_dim, discount=0.99, tau=0.005):
 47 |         self.args = args
 48 |         self.goal_dim = goal_dim
 49 |         self.actor = Actor(state_dim, action_dim, max_action).to(device)  # Bob's policy
 50 |         self.actor_target = copy.deepcopy(self.actor)  # Alice acting policy
 51 |         self.actor_optimizer = torch.optim.Adam(self.actor.parameters())
 52 | 
 53 |         self.critic = Critic(state_dim, action_dim).to(device)  # Bob's critic network
 54 |         self.critic_target = copy.deepcopy(self.critic)  # Alice's critic network
 55 |         self.critic_optimizer = torch.optim.Adam(self.critic.parameters())
 56 | 
 57 |         self.alice_policy = AlicePolicyFetch(self.args, goal_dim, action_dim=1) # Alice Policy
 58 | 
 59 |         self.discount = discount
 60 |         self.tau = tau
 61 | 
 62 |     def select_action(self, state):
 63 |         state = torch.FloatTensor(state.reshape(1, -1)).to(device)
 64 |         return self.actor(state).cpu().data.numpy().flatten()
 65 | 
 66 | 
 67 |     def train(self, replay_buffer, batch_size=100):
 68 | 
 69 |         # Sample replay buffer
 70 |         state, action, next_state, reward, not_done = replay_buffer.sample(batch_size)
 71 | 
 72 |         # Compute the target Q value
 73 |         target_Q = self.critic_target(next_state, self.actor_target(next_state))
 74 |         target_Q = reward + (not_done * self.discount * target_Q).detach()
 75 | 
 76 |         # Get current Q estimate
 77 |         current_Q = self.critic(state, action)
 78 | 
 79 |         # Compute critic loss
 80 |         critic_loss = F.mse_loss(current_Q, target_Q)
 81 | 
 82 |         # Optimize the critic
 83 |         self.critic_optimizer.zero_grad()
 84 |         critic_loss.backward()
 85 |         self.critic_optimizer.step()
 86 | 
 87 |         # Compute actor loss
 88 |         actor_loss = -self.critic(state, self.actor(state)).mean()
 89 | 
 90 |         # Optimize the actor
 91 |         self.actor_optimizer.zero_grad()
 92 |         actor_loss.backward()
 93 |         self.actor_optimizer.step()
 94 | 
 95 |         # Update the frozen target models
 96 |         for param, target_param in zip(self.critic.parameters(), self.critic_target.parameters()):
 97 |             target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)
 98 | 
 99 |         for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
100 |             target_param.data.copy_(self.tau * param.data + (1 - self.tau) * target_param.data)
101 | 
102 |     def alice_loop(self, args, env, env_params):
103 |         alice_done = False
104 |         alice_time = 0
105 |         env.randomize(["default"] * args.n_params)
106 |         observation = env.reset()
107 | 
108 |         obs = observation["observation"]
109 |         ag = observation["achieved_goal"]
110 |         alice_state = np.concatenate([ag, np.zeros(ag.shape[0])])
111 | 
112 |         while not alice_done and (alice_time < env_params['max_timesteps']):
113 |             observation = torch.FloatTensor(obs.reshape(1, -1)).to(device)
114 |             pi = self.actor_target(observation)
115 |             action = pi.cpu().data.numpy().flatten()
116 | 
117 |             observation_new, reward, env_done, _ = env.step(action)
118 |             obs_new = observation_new
119 |             ag_new = observation_new["achieved_goal"]
120 |             alice_signal = self.alice_policy.select_action(alice_state)
121 |             bobs_goal_state = ag_new
122 | 
123 |             # Stopping Criteria
124 |             if alice_signal > np.random.random(): alice_done = True
125 |             alice_done = env_done or alice_time + 1 == env_params['max_timesteps'] or alice_signal and alice_time >= 1
126 |             if not alice_done:
127 |                 alice_state[env_params["goal"]:] = ag_new
128 |                 bobs_goal_state = ag_new
129 |                 alice_time += 1
130 |                 self.alice_policy.log(0.0)
131 |                 obs = obs_new["observation"]
132 |                 ag = ag_new
133 | 
134 |             return bobs_goal_state, alice_time
135 | 
136 |     def bob_loop(self, env, env_params, bobs_goal_state, alice_time, replay_buffer):
137 |         observation = env.reset()
138 |         obs = observation["observation"]
139 |         bob_done = False
140 |         bob_time = 0
141 |         while not bob_done and alice_time + bob_time < env_params['max_timesteps']:
142 |             action = self.select_action(obs)
143 |             new_obs, reward, env_done, _ = env.step(action)
144 |             bob_signal = self._check_closeness(new_obs["achieved_goal"], bobs_goal_state)
145 |             bob_done = env_done or bob_signal or bob_done
146 | 
147 |             if not bob_done:
148 |                 replay_buffer.add(obs, action, new_obs["observation"], reward, bob_done)
149 |                 obs = new_obs["observation"]
150 |                 bob_time += 1
151 | 
152 |         return bob_time, bob_done
153 | 
154 |     def train_alice(self, alice_time, bob_time):
155 |         reward_alice = self.args.sp_gamma * max(0, bob_time - alice_time)
156 |         self.alice_policy.log(reward_alice)
157 |         self.alice_policy.finish_episode(gamma=0.99)
158 |         return reward_alice
159 | 
160 |     @staticmethod
161 |     def _check_closeness(state, goal):
162 |         dist = np.linalg.norm(state - goal)
163 |         return dist < 0.025
164 | 
165 |     def save(self, filename, directory):
166 |         torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, filename))
167 |         torch.save(self.critic.state_dict(), '%s/%s_critic.pth' % (directory, filename))
168 | 
169 |     def load(self, filename, directory):
170 |         self.actor.load_state_dict(torch.load('%s/%s' % (directory, filename), map_location=lambda storage, loc: storage))
171 |         # self.critic.load_state_dict(torch.load('%s/%s_critic.pth' % (directory, filename), map_location='cpu'))
172 | 


--------------------------------------------------------------------------------
/common/randomizer/assets/robot.xml:
--------------------------------------------------------------------------------
  1 | <mujoco>
  2 | 	<body mocap="true" name="robot0:mocap" pos="0 0 0">
  3 | 		<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.7" size="0.005 0.005 0.005" type="box"></geom>
  4 | 		<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="1 0.005 0.005" type="box"></geom>
  5 | 		<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="0.005 1 0.001" type="box"></geom>
  6 | 		<geom conaffinity="0" contype="0" pos="0 0 0" rgba="0 0.5 0 0.1" size="0.005 0.005 1" type="box"></geom>
  7 | 	</body>
  8 | 	<body childclass="robot0:fetch" name="robot0:base_link" pos="0.2869 0.2641 0">
  9 | 		<joint armature="0.0001" axis="1 0 0" damping="1e+11" name="robot0:slide0" pos="0 0 0" type="slide"></joint>
 10 | 		<joint armature="0.0001" axis="0 1 0" damping="1e+11" name="robot0:slide1" pos="0 0 0" type="slide"></joint>
 11 | 		<joint armature="0.0001" axis="0 0 1" damping="1e+11" name="robot0:slide2" pos="0 0 0" type="slide"></joint>
 12 | 		<inertial diaginertia="1.2869 1.2236 0.9868" mass="70.1294" pos="-0.0036 0 0.0014" quat="0.7605 -0.0133 -0.0061 0.6491"></inertial>
 13 | 		<geom mesh="robot0:base_link" name="robot0:base_link" material="robot0:base_mat" class="robot0:grey"></geom>
 14 | 		<body name="robot0:torso_lift_link" pos="-0.0869 0 0.3774">
 15 | 			<inertial diaginertia="0.3365 0.3354 0.0943" mass="10.7796" pos="-0.0013 -0.0009 0.2935" quat="0.9993 -0.0006 0.0336 0.0185"></inertial>
 16 | 			<joint axis="0 0 1" damping="1e+07" name="robot0:torso_lift_joint" range="0.0386 0.3861" type="slide"></joint>
 17 | 			<geom mesh="robot0:torso_lift_link" name="robot0:torso_lift_link" material="robot0:torso_mat"></geom>
 18 | 			<body name="robot0:head_pan_link" pos="0.0531 0 0.603">
 19 | 				<inertial diaginertia="0.0185 0.0128 0.0095" mass="2.2556" pos="0.0321 0.0161 0.039" quat="0.5148 0.5451 -0.453 0.4823"></inertial>
 20 | 				<joint axis="0 0 1" name="robot0:head_pan_joint" range="-1.57 1.57"></joint>
 21 | 				<geom mesh="robot0:head_pan_link" name="robot0:head_pan_link" material="robot0:head_mat" class="robot0:grey"></geom>
 22 | 				<body name="robot0:head_tilt_link" pos="0.1425 0 0.058">
 23 | 					<inertial diaginertia="0.0063 0.0059 0.0014" mass="0.9087" pos="0.0081 0.0025 0.0113" quat="0.6458 0.66 -0.274 0.2689"></inertial>
 24 | 					<joint axis="0 1 0" damping="1000" name="robot0:head_tilt_joint" range="-0.76 1.45" ref="0.06"></joint>
 25 | 					<geom mesh="robot0:head_tilt_link" name="robot0:head_tilt_link" material="robot0:head_mat" class="robot0:blue"></geom>
 26 | 					<body name="robot0:head_camera_link" pos="0.055 0 0.0225">
 27 | 						<inertial diaginertia="0 0 0" mass="0" pos="0.055 0 0.0225"></inertial>
 28 | 						<body name="robot0:head_camera_rgb_frame" pos="0 0.02 0">
 29 | 							<inertial diaginertia="0 0 0" mass="0" pos="0 0.02 0"></inertial>
 30 | 							<body name="robot0:head_camera_rgb_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5">
 31 | 								<inertial diaginertia="0 0 0" mass="0" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5"></inertial>
 32 | 								<camera euler="3.1415 0 0" fovy="50" name="head_camera_rgb" pos="0 0 0"></camera>
 33 | 							</body>
 34 | 						</body>
 35 | 						<body name="robot0:head_camera_depth_frame" pos="0 0.045 0">
 36 | 							<inertial diaginertia="0 0 0" mass="0" pos="0 0.045 0"></inertial>
 37 | 							<body name="robot0:head_camera_depth_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5">
 38 | 								<inertial diaginertia="0 0 0" mass="0" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5"></inertial>
 39 | 							</body>
 40 | 						</body>
 41 | 					</body>
 42 | 				</body>
 43 | 			</body>
 44 | 			<body name="robot0:shoulder_pan_link" pos="0.1195 0 0.3486">
 45 | 				<inertial diaginertia="0.009 0.0086 0.0041" mass="2.5587" pos="0.0927 -0.0056 0.0564" quat="-0.1364 0.7624 -0.1562 0.613"></inertial>
 46 | 				<joint axis="0 0 1" name="robot0:shoulder_pan_joint" range="-1.6056 1.6056"></joint>
 47 | 				<geom mesh="robot0:shoulder_pan_link" name="robot0:shoulder_pan_link" material="robot0:arm_mat"></geom>
 48 | 				<body name="robot0:shoulder_lift_link" pos="0.117 0 0.06">
 49 | 					<inertial diaginertia="0.0116 0.0112 0.0023" mass="2.6615" pos="0.1432 0.0072 -0.0001" quat="0.4382 0.4382 0.555 0.555"></inertial>
 50 | 					<joint axis="0 1 0" name="robot0:shoulder_lift_joint" range="-1.221 1.518"></joint>
 51 | 					<geom mesh="robot0:shoulder_lift_link" name="robot0:shoulder_lift_link" material="robot0:arm_mat" class="robot0:blue"></geom>
 52 | 					<body name="robot0:upperarm_roll_link" pos="0.219 0 0">
 53 | 						<inertial diaginertia="0.0047 0.0045 0.0019" mass="2.3311" pos="0.1165 0.0014 0" quat="-0.0136 0.707 0.0136 0.707"></inertial>
 54 | 						<joint axis="1 0 0" limited="false" name="robot0:upperarm_roll_joint"></joint>
 55 | 						<geom mesh="robot0:upperarm_roll_link" name="robot0:upperarm_roll_link" material="robot0:arm_mat"></geom>
 56 | 						<body name="robot0:elbow_flex_link" pos="0.133 0 0">
 57 | 							<inertial diaginertia="0.0086 0.0084 0.002" mass="2.1299" pos="0.1279 0.0073 0" quat="0.4332 0.4332 0.5589 0.5589"></inertial>
 58 | 							<joint axis="0 1 0" name="robot0:elbow_flex_joint" range="-2.251 2.251"></joint>
 59 | 							<geom mesh="robot0:elbow_flex_link" name="robot0:elbow_flex_link" material="robot0:arm_mat" class="robot0:blue"></geom>
 60 | 							<body name="robot0:forearm_roll_link" pos="0.197 0 0">
 61 | 								<inertial diaginertia="0.0035 0.0031 0.0015" mass="1.6563" pos="0.1097 -0.0266 0" quat="-0.0715 0.7035 0.0715 0.7035"></inertial>
 62 | 								<joint armature="2.7538" axis="1 0 0" damping="3.5247" frictionloss="0" limited="false" name="robot0:forearm_roll_joint" stiffness="10"></joint>
 63 | 								<geom mesh="robot0:forearm_roll_link" name="robot0:forearm_roll_link" material="robot0:arm_mat"></geom>
 64 | 								<body name="robot0:wrist_flex_link" pos="0.1245 0 0">
 65 | 									<inertial diaginertia="0.0042 0.0042 0.0018" mass="1.725" pos="0.0882 0.0009 -0.0001" quat="0.4895 0.4895 0.5103 0.5103"></inertial>
 66 | 									<joint axis="0 1 0" name="robot0:wrist_flex_joint" range="-2.16 2.16"></joint>
 67 | 									<geom mesh="robot0:wrist_flex_link" name="robot0:wrist_flex_link" material="robot0:arm_mat" class="robot0:blue"></geom>
 68 | 									<body name="robot0:wrist_roll_link" pos="0.1385 0 0">
 69 | 										<inertial diaginertia="0.0001 0.0001 0.0001" mass="0.1354" pos="0.0095 0.0004 -0.0002"></inertial>
 70 | 										<joint axis="1 0 0" limited="false" name="robot0:wrist_roll_joint"></joint>
 71 | 										<geom mesh="robot0:wrist_roll_link" name="robot0:wrist_roll_link" material="robot0:arm_mat"></geom>
 72 | 										<body euler="0 0 0" name="robot0:gripper_link" pos="0.1664 0 0">
 73 | 											<inertial diaginertia="0.0024 0.0019 0.0013" mass="1.5175" pos="-0.09 -0.0001 -0.0017" quat="0 0.7071 0 0.7071"></inertial>
 74 | 											<geom mesh="robot0:gripper_link" name="robot0:gripper_link" material="robot0:gripper_mat"></geom>
 75 | 											<body name="robot0:gipper_camera_link" pos="0.055 0 0.0225">
 76 | 												<body name="robot0:gripper_camera_rgb_frame" pos="0 0.02 0">
 77 | 													<body name="robot0:gripper_camera_rgb_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5">
 78 | 														<camera euler="3.1415 0 0" fovy="50" name="gripper_camera_rgb" pos="0 0 0"></camera>
 79 | 													</body>
 80 | 												</body>
 81 | 												<body name="robot0:gripper_camera_depth_frame" pos="0 0.045 0">
 82 | 													<body name="robot0:gripper_camera_depth_optical_frame" pos="0 0 0" quat="0.5 -0.5 0.5 -0.5"></body>
 83 | 												</body>
 84 | 											</body>
 85 | 
 86 | 											<body childclass="robot0:fetchGripper" name="robot0:r_gripper_finger_link" pos="0 0.0159 0">
 87 | 												<inertial diaginertia="0.1 0.1 0.1" mass="4" pos="-0.01 0 0"></inertial>
 88 | 												<joint axis="0 1 0" name="robot0:r_gripper_finger_joint" range="0 0.05"></joint>
 89 | 												<geom pos="0 -0.008 0" size="0.0385 0.007 0.0135" type="box" name="robot0:r_gripper_finger_link" material="robot0:gripper_finger_mat" condim="4" friction="1 0.05 0.01"></geom>
 90 | 											</body>
 91 | 											<body childclass="robot0:fetchGripper" name="robot0:l_gripper_finger_link" pos="0 -0.0159 0">
 92 | 												<inertial diaginertia="0.1 0.1 0.1" mass="4" pos="-0.01 0 0"></inertial>
 93 | 												<joint axis="0 -1 0" name="robot0:l_gripper_finger_joint" range="0 0.05"></joint>
 94 | 												<geom pos="0 0.008 0" size="0.0385 0.007 0.0135" type="box" name="robot0:l_gripper_finger_link" material="robot0:gripper_finger_mat" condim="4" friction="1 0.05 0.01"></geom>
 95 | 											</body>
 96 | 											<site name="robot0:grip" pos="0.02 0 0" rgba="0 0 0 0" size="0.02 0.02 0.02"></site>
 97 | 										</body>
 98 | 									</body>
 99 | 								</body>
100 | 							</body>
101 | 						</body>
102 | 					</body>
103 | 				</body>
104 | 			</body>
105 | 		</body>
106 | 		<body name="robot0:estop_link" pos="-0.1246 0.2389 0.3113" quat="0.7071 0.7071 0 0">
107 | 			<inertial diaginertia="0 0 0" mass="0.002" pos="0.0024 -0.0033 0.0067" quat="0.3774 -0.1814 0.1375 0.8977"></inertial>
108 | 			<geom mesh="robot0:estop_link" rgba="0.8 0 0 1" name="robot0:estop_link"></geom>
109 | 		</body>
110 | 		<body name="robot0:laser_link" pos="0.235 0 0.2878" quat="0 1 0 0">
111 | 			<inertial diaginertia="0 0 0" mass="0.0083" pos="-0.0306 0.0007 0.0552" quat="0.5878 0.5378 -0.4578 0.3945"></inertial>
112 | 			<geom mesh="robot0:laser_link" rgba="0.7922 0.8196 0.9333 1" name="robot0:laser_link"></geom>
113 | 			<camera euler="1.55 -1.55 3.14" fovy="25" name="lidar" pos="0 0 0.02"></camera>
114 | 		</body>
115 | 		<body name="robot0:torso_fixed_link" pos="-0.0869 0 0.3774">
116 | 			<inertial diaginertia="0.3865 0.3394 0.1009" mass="13.2775" pos="-0.0722 0.0057 0.2656" quat="0.9995 0.0249 0.0177 0.011"></inertial>
117 | 			<geom mesh="robot0:torso_fixed_link" name="robot0:torso_fixed_link" class="robot0:blue"></geom>
118 | 		</body>
119 | 		<body name="robot0:external_camera_body_0" pos="0 0 0">
120 | 			<camera euler="0 0.75 1.57" fovy="43.3" name="external_camera_0" pos="1.3 0 1.2"></camera>
121 | 		</body>
122 | 	</body>
123 | </mujoco>
124 | 


--------------------------------------------------------------------------------
/common/randomizer/complex_hook_env.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from gym import utils, spaces
  4 | from gym.utils import seeding
  5 | from gym.envs.robotics import rotations, fetch_env
  6 | import gym
  7 | 
  8 | 
  9 | import glob
 10 | import os
 11 | 
 12 | 
 13 | from .controllers.hook_controller import get_hook_control
 14 | 
 15 | DIR_PATH = os.path.dirname(os.path.abspath(__file__))
 16 | 
 17 | 
 18 | class ComplexHookSingleObjectEnv(fetch_env.FetchEnv, utils.EzPickle):
 19 |     def __init__(self, xml_file=None):
 20 |         initial_qpos = {
 21 |             'robot0:slide0': 0.405,
 22 |             'robot0:slide1': 0.48,
 23 |             'robot0:slide2': 0.0,
 24 |             'object0:joint': [1.25, 0.53, 0.6, 1., 0., 0., 0.],
 25 |             'hook:joint': [1.35, 0.35, 0.6, 1., 0., 0., 0.],
 26 |         }
 27 | 
 28 |         if xml_file is None:
 29 |             xml_file = os.path.join(DIR_PATH, 'assets', 'hook.xml')
 30 | 
 31 |         self._goal_pos = np.array([1.65, 0.75, 0.42])
 32 |         self._object_xpos = np.array([1.8, 0.75])
 33 | 
 34 |         fetch_env.FetchEnv.__init__(
 35 |             self, xml_file, has_object=True, block_gripper=False, n_substeps=20,
 36 |             gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
 37 |             obj_range=None, target_range=None, distance_threshold=0.05,
 38 |             initial_qpos=initial_qpos, reward_type='sparse')
 39 | 
 40 |         utils.EzPickle.__init__(self)
 41 | 
 42 | 
 43 | 
 44 |     def render(self, mode="human", *args, **kwargs):
 45 |         # See https://github.com/openai/gym/issues/1081
 46 |         self._render_callback()
 47 |         if mode == 'rgb_array':
 48 |             self._get_viewer().render()
 49 |             width, height = 3350, 1800
 50 |             data = self._get_viewer().read_pixels(width, height, depth=False)
 51 |             # original image is upside-down, so flip it
 52 |             return data[::-1, :, :]
 53 |         elif mode == 'human':
 54 |             self._get_viewer().render()
 55 | 
 56 |         return super(ComplexHookSingleObjectEnv, self).render(*args, **kwargs)
 57 | 
 58 |     def _sample_goal(self):
 59 |         goal_pos = self._goal_pos.copy()
 60 |         goal_pos[:2] += self.np_random.uniform(-0.05, 0.05)
 61 |         return goal_pos
 62 | 
 63 |     def _viewer_setup(self):
 64 |         body_id = self.sim.model.body_name2id('robot0:gripper_link')
 65 |         lookat = self.sim.data.body_xpos[body_id]
 66 |         for idx, value in enumerate(lookat):
 67 |             self.viewer.cam.lookat[idx] = value
 68 |         self.viewer.cam.distance = 2.5
 69 |         self.viewer.cam.azimuth = 180.
 70 |         self.viewer.cam.elevation = -24.
 71 | 
 72 |     def _reset_sim(self):
 73 |         self.sim.set_state(self.initial_state)
 74 | 
 75 |         object_xpos_x = 1.65 + self.np_random.uniform(-0.05, 0.05)
 76 |         while True:
 77 |             object_xpos_x = 1.8 + self.np_random.uniform(-0.05, 0.05)
 78 |             object_xpos_y = 0.75 + self.np_random.uniform(-0.05, 0.05)
 79 |             if (object_xpos_x - self._goal_pos[0]) ** 2 + (object_xpos_y - self._goal_pos[1]) ** 2 >= 0.01:
 80 |                 break
 81 |         self._object_xpos = np.array([object_xpos_x, object_xpos_y])
 82 | 
 83 |         object_qpos = self.sim.data.get_joint_qpos('object0:joint')
 84 |         assert object_qpos.shape == (7,)
 85 |         object_qpos[:2] = self._object_xpos
 86 |         self.sim.data.set_joint_qpos('object0:joint', object_qpos)
 87 | 
 88 |         self.sim.forward()
 89 |         return True
 90 | 
 91 |     def _get_obs(self):
 92 |         obs = fetch_env.FetchEnv._get_obs(self)
 93 | 
 94 |         grip_pos = self.sim.data.get_site_xpos('robot0:grip')
 95 |         dt = self.sim.nsubsteps * self.sim.model.opt.timestep
 96 |         grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
 97 | 
 98 |         hook_pos = self.sim.data.get_site_xpos('hook')
 99 |         # rotations
100 |         hook_rot = rotations.mat2euler(self.sim.data.get_site_xmat('hook'))
101 |         # velocities
102 |         hook_velp = self.sim.data.get_site_xvelp('hook') * dt
103 |         hook_velr = self.sim.data.get_site_xvelr('hook') * dt
104 |         # gripper state
105 |         hook_rel_pos = hook_pos - grip_pos
106 |         hook_velp -= grip_velp
107 | 
108 |         hook_observation = np.concatenate([hook_pos, hook_rot, hook_velp, hook_velr, hook_rel_pos])
109 | 
110 |         obs['observation'] = np.concatenate([obs['observation'], hook_observation])
111 | 
112 |         return obs
113 | 
114 | 
115 | class ResidualComplexHookSingleObjectEnv(ComplexHookSingleObjectEnv):
116 |     def step(self, residual_action):
117 |         residual_action = 2. * residual_action
118 | 
119 |         action = np.add(residual_action, get_hook_control(self._last_observation))
120 |         action = np.clip(action, -1, 1)
121 |         observation, reward, done, debug_info = ComplexHookSingleObjectEnv.step(self, action)
122 | 
123 |         self._last_observation = observation
124 | 
125 |         return observation, reward, done, debug_info
126 | 
127 |     def compute_reward(self, *args, **kwargs):
128 |         return ComplexHookSingleObjectEnv.compute_reward(self, *args, **kwargs)
129 | 
130 |     def reset(self):
131 |         observation = ComplexHookSingleObjectEnv.reset(self)
132 |         self._last_observation = observation
133 | 
134 |         return observation
135 | 
136 | 
137 | class TwoFrameResidualComplexHookSingleObjectEnv(ComplexHookSingleObjectEnv):
138 |     def __init__(self, xml_file):
139 |         super(TwoFrameResidualComplexHookSingleObjectEnv, self).__init__(xml_file)
140 |         self.observation_space.spaces['observation'] = spaces.Box(low=np.hstack(
141 |             (self.observation_space.spaces['observation'].low, self.observation_space.spaces['observation'].low)),
142 |                                                                   high=np.hstack((self.observation_space.spaces[
143 |                                                                                       'observation'].high,
144 |                                                                                   self.observation_space.spaces[
145 |                                                                                       'observation'].high)),
146 |                                                                   dtype=np.float32)
147 | 
148 |     def step(self, residual_action):
149 |         residual_action = 2. * residual_action
150 | 
151 |         action = np.add(residual_action, get_hook_control(self._last_observation))
152 |         action = np.clip(action, -1, 1)
153 |         observation, reward, done, debug_info = ComplexHookSingleObjectEnv.step(self, action)
154 | 
155 |         obs_out = observation.copy()
156 |         obs_out['observation'] = np.hstack((self._last_observation['observation'], observation['observation']))
157 |         self._last_observation = observation
158 | 
159 |         return obs_out, reward, done, debug_info
160 | 
161 |     def reset(self):
162 |         observation = ComplexHookSingleObjectEnv.reset(self)
163 |         self._last_observation = observation.copy()
164 |         observation['observation'] = np.hstack((self._last_observation['observation'], observation['observation']))
165 |         return observation
166 | 
167 | 
168 | class ComplexHookEnv(gym.Env):
169 |     def __init__(self, train=True, num_scenes=100, scene_offset=0, subenv_type='normal', **kwargs):
170 |         if train:
171 |             xml_files = list(glob.glob(os.path.join(DIR_PATH, 'assets', 'fetch_complex_objects', 'fetch',
172 |                                                     'train_scene_hook_*.xml')))
173 |         else:
174 |             xml_files = list(glob.glob(os.path.join(DIR_PATH, 'assets', 'fetch_complex_objects', 'fetch',
175 |                                                     'test_scene_hook_*.xml')))
176 | 
177 |         xml_files = xml_files[scene_offset:scene_offset + num_scenes]
178 | 
179 |         self._subenvs = []
180 | 
181 |         self.seed()
182 | 
183 |         for xml_file in xml_files:
184 |             try:
185 |                 if subenv_type == 'residual':
186 |                     subenv = ResidualComplexHookSingleObjectEnv(xml_file)
187 |                 elif subenv_type == 'twoframe_residual':
188 |                     subenv = TwoFrameResidualComplexHookSingleObjectEnv(xml_file)
189 |                 elif subenv_type == 'normal':
190 |                     subenv = ComplexHookSingleObjectEnv(xml_file)
191 |                 else:
192 |                     print('subenv_type not recognized')
193 |                 self._subenvs.append(subenv)
194 |             except:
195 |                 print("FAILED; skipping")
196 | 
197 |         self._num_envs = len(self._subenvs)
198 | 
199 |         self.action_space = self._subenvs[0].action_space
200 |         self.observation = self._subenvs[0].observation_space
201 |         self.metadata = self._subenvs[0].metadata
202 |         self.config_file = kwargs.get('config')
203 |         self.dimensions = []
204 |         self.reset()
205 | 
206 |     def _randomize_block_mass(self):
207 |         block_mass = self.dimensions[0].current_value
208 |         self._env.sim.model.body_mass[-2] = block_mass
209 | 
210 |     def _randomize_hook_mass(self):
211 |         hook_mass = self.dimensions[1].current_value
212 |         self._env.sim.model.body_mass[-1] = hook_mass
213 | 
214 |     def _randomize_friction(self):
215 |         current_friction = self.dimensions[2].current_value
216 |         for i in range(len(self._env.sim.model.geom_friction)):
217 |             self._env.sim.model.geom_friction[i] = [current_friction, 5.e-3, 1e-4]
218 | 
219 |     def update_randomized_params(self):
220 |         self._randomize_block_mass()
221 |         self._randomize_hook_mass()
222 |         self._randomize_friction()
223 | 
224 |     def reset(self):
225 |         env_id = self.np_random.randint(self._num_envs)
226 |         self._env = self._subenvs[env_id]
227 |         return self._env.reset()
228 | 
229 |     def step(self, action):
230 |         return self._env.step(action)
231 | 
232 |     def render(self, *args, **kwargs):
233 |         return self._env.render(*args, **kwargs)
234 | 
235 |     def seed(self, seed=0):
236 |         self.np_random, seed = seeding.np_random(seed)
237 | 
238 |         for subenv in self._subenvs:
239 |             subenv.seed(seed=self.np_random.randint(1e8))
240 | 
241 |         return [seed]
242 | 
243 |     def compute_reward(self, *args, **kwargs):
244 |         return self._env.compute_reward(*args, **kwargs)
245 | 
246 | 
247 | class ResidualComplexHookEnv(ComplexHookEnv):
248 |     def __init__(self, **kwargs):
249 |         ComplexHookEnv.__init__(self, train=True, subenv_type='residual', **kwargs)
250 | 
251 | 
252 | class TwoFrameResidualComplexHookEnv(ComplexHookEnv):
253 |     def __init__(self):
254 |         ComplexHookEnv.__init__(self, train=True, subenv_type='twoframe_residual')
255 | 
256 | 
257 | 


--------------------------------------------------------------------------------
/common/randomizer/randomized_fetch.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | 
  4 | import xml.etree.ElementTree as et
  5 | 
  6 | from gym.envs.robotics import rotations, robot_env, utils
  7 | 
  8 | import mujoco_py
  9 | 
 10 | def goal_distance(goal_a, goal_b):
 11 |     assert goal_a.shape == goal_b.shape
 12 |     return np.linalg.norm(goal_a - goal_b, axis=-1)
 13 | 
 14 | 
 15 | class RandomizedFetchEnv(robot_env.RobotEnv):
 16 |     """Superclass for all Fetch environments.
 17 |     """
 18 | 
 19 |     def __init__(
 20 |         self, model_path, n_substeps, gripper_extra_height, block_gripper,
 21 |         has_object, target_in_the_air, target_offset, obj_range, target_range,
 22 |         distance_threshold, initial_qpos, reward_type, **kwargs,
 23 |     ):
 24 |         """Initializes a new Fetch environment.
 25 | 
 26 |         Args:
 27 |             model_path (string): path to the environments XML file
 28 |             n_substeps (int): number of substeps the simulation runs on every call to step
 29 |             gripper_extra_height (float): additional height above the table when positioning the gripper
 30 |             block_gripper (boolean): whether or not the gripper is blocked (i.e. not movable) or not
 31 |             has_object (boolean): whether or not the environment has an object
 32 |             target_in_the_air (boolean): whether or not the target should be in the air above the table or on the table surface
 33 |             target_offset (float or array with 3 elements): offset of the target
 34 |             obj_range (float): range of a uniform distribution for sampling initial object positions
 35 |             target_range (float): range of a uniform distribution for sampling a target
 36 |             distance_threshold (float): the threshold after which a goal is considered achieved
 37 |             initial_qpos (dict): a dictionary of joint names and values that define the initial configuration
 38 |             reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense
 39 |         """
 40 |         self.gripper_extra_height = gripper_extra_height
 41 |         self.block_gripper = block_gripper
 42 |         self.has_object = has_object
 43 |         self.target_in_the_air = target_in_the_air
 44 |         self.target_offset = target_offset
 45 |         self.obj_range = obj_range
 46 |         self.target_range = target_range
 47 |         self.distance_threshold = distance_threshold
 48 |         self.reward_type = reward_type
 49 | 
 50 |         super(RandomizedFetchEnv, self).__init__(
 51 |             model_path=model_path, n_substeps=n_substeps, n_actions=4,
 52 |             initial_qpos=initial_qpos)
 53 | 
 54 |         # randomization
 55 |         self.xml_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "assets")
 56 |         self.reference_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "assets", kwargs.get('xml_name'))
 57 |         self.reference_xml = et.parse(self.reference_path)
 58 |         self.config_file = kwargs.get('config')
 59 |         self.dimensions = []
 60 |         self.dimension_map = []
 61 |         self.suffixes = []
 62 |         self._locate_randomize_parameters()
 63 | 
 64 |     # Randomization methods
 65 |     # ----------------------------
 66 |     def _locate_randomize_parameters(self):
 67 |         self.root = self.reference_xml.getroot()
 68 |         self.object_joints = self.root.findall(".//body[@name='object0']/joint")
 69 | 
 70 |     def _randomize_damping(self):
 71 |         damping = self.dimensions[0].current_value
 72 |         for joint in self.object_joints:
 73 |             joint.set('damping', '{:3f}'.format(damping))
 74 | 
 75 |     def _randomize_friction(self):
 76 |         current_friction = self.dimensions[0].current_value
 77 |         for i in range(len(self.sim.model.geom_friction)):
 78 |             self.sim.model.geom_friction[i] = [current_friction, 5.e-3, 1e-4]
 79 | 
 80 |     def _create_xml(self):
 81 |         self._randomize_damping()
 82 |         return et.tostring(self.root, encoding='unicode', method='xml')
 83 | 
 84 |     def update_randomized_params(self):
 85 |         self._randomize_friction()
 86 |         xml = self._create_xml()
 87 |         # self._re_init(xml)
 88 | 
 89 |     def _re_init(self, xml):
 90 |         # TODO: Now, likely needs rank
 91 |         randomized_path = os.path.join(self.xml_dir, "randomizedfetch.xml")
 92 |         with open(randomized_path, 'wb') as fp:
 93 |             fp.write(xml.encode())
 94 |             fp.flush()
 95 |         
 96 |         self.model = mujoco_py.load_model_from_path(randomized_path)
 97 |         self.sim = mujoco_py.MjSim(self.model)
 98 |         self.data = self.sim.data
 99 |         self.init_qpos = self.data.qpos.ravel().copy()
100 |         self.init_qvel = self.data.qvel.ravel().copy()
101 |         observation, _reward, done, _info = self.step(np.zeros(4))
102 |         assert not done
103 |         if self.viewer:
104 |             self.viewer.update_sim(self.sim)
105 | 
106 |     # GoalEnv methods
107 |     # ----------------------------
108 | 
109 |     def compute_reward(self, achieved_goal, goal, info):
110 |         # Compute distance between goal and the achieved goal.
111 |         d = goal_distance(achieved_goal, goal)
112 |         if self.reward_type == 'sparse':
113 |             return -(d > self.distance_threshold).astype(np.float32)
114 |         else:
115 |             return -d
116 | 
117 |     # RobotEnv methods
118 |     # ----------------------------
119 | 
120 |     def _step_callback(self):
121 |         if self.block_gripper:
122 |             self.sim.data.set_joint_qpos('robot0:l_gripper_finger_joint', 0.)
123 |             self.sim.data.set_joint_qpos('robot0:r_gripper_finger_joint', 0.)
124 |             self.sim.forward()
125 | 
126 |     def _set_action(self, action):
127 |         assert action.shape == (4,)
128 |         action = action.copy()  # ensure that we don't change the action outside of this scope
129 |         pos_ctrl, gripper_ctrl = action[:3], action[3]
130 | 
131 |         pos_ctrl *= 0.05  # limit maximum change in position
132 |         rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
133 |         gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
134 |         assert gripper_ctrl.shape == (2,)
135 |         if self.block_gripper:
136 |             gripper_ctrl = np.zeros_like(gripper_ctrl)
137 |         action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
138 | 
139 |         # Apply action to simulation.
140 |         utils.ctrl_set_action(self.sim, action)
141 |         utils.mocap_set_action(self.sim, action)
142 | 
143 |     def _get_obs(self):
144 |         # positions
145 |         grip_pos = self.sim.data.get_site_xpos('robot0:grip')
146 |         dt = self.sim.nsubsteps * self.sim.model.opt.timestep
147 |         grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
148 |         robot_qpos, robot_qvel = utils.robot_get_obs(self.sim)
149 |         if self.has_object:
150 |             object_pos = self.sim.data.get_site_xpos('object0')
151 |             # rotations
152 |             object_rot = rotations.mat2euler(self.sim.data.get_site_xmat('object0'))
153 |             # velocities
154 |             object_velp = self.sim.data.get_site_xvelp('object0') * dt
155 |             object_velr = self.sim.data.get_site_xvelr('object0') * dt
156 |             # gripper state
157 |             object_rel_pos = object_pos - grip_pos
158 |             object_velp -= grip_velp
159 |         else:
160 |             object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.zeros(0)
161 |         gripper_state = robot_qpos[-2:]
162 |         gripper_vel = robot_qvel[-2:] * dt  # change to a scalar if the gripper is made symmetric
163 | 
164 |         if not self.has_object:
165 |             achieved_goal = grip_pos.copy()
166 |         else:
167 |             achieved_goal = np.squeeze(object_pos.copy())
168 |         obs = np.concatenate([
169 |             grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(),
170 |             object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel,
171 |         ])
172 | 
173 |         return {
174 |             'observation': obs.copy(),
175 |             'achieved_goal': achieved_goal.copy(),
176 |             'desired_goal': self.goal.copy(),
177 |         }
178 | 
179 |     def _viewer_setup(self):
180 |         body_id = self.sim.model.body_name2id('robot0:gripper_link')
181 |         lookat = self.sim.data.body_xpos[body_id]
182 |         for idx, value in enumerate(lookat):
183 |             self.viewer.cam.lookat[idx] = value
184 |         self.viewer.cam.distance = 2.5
185 |         self.viewer.cam.azimuth = 132.
186 |         self.viewer.cam.elevation = -14.
187 | 
188 |     def _render_callback(self):
189 |         # Visualize target.
190 |         sites_offset = (self.sim.data.site_xpos - self.sim.model.site_pos).copy()
191 |         site_id = self.sim.model.site_name2id('target0')
192 |         self.sim.model.site_pos[site_id] = self.goal - sites_offset[0]
193 |         self.sim.forward()
194 | 
195 |     def _reset_sim(self):
196 |         self.sim.set_state(self.initial_state)
197 | 
198 |         # Randomize start position of object.
199 |         if self.has_object:
200 |             object_xpos = self.initial_gripper_xpos[:2]
201 |             while np.linalg.norm(object_xpos - self.initial_gripper_xpos[:2]) < 0.1:
202 |                 object_xpos = self.initial_gripper_xpos[:2] + self.np_random.uniform(-self.obj_range, self.obj_range, size=2)
203 |             object_qpos = self.sim.data.get_joint_qpos('object0:joint')
204 |             assert object_qpos.shape == (7,)
205 |             object_qpos[:2] = object_xpos
206 |             self.sim.data.set_joint_qpos('object0:joint', object_qpos)
207 | 
208 |         self.sim.forward()
209 |         return True
210 | 
211 |     def _sample_goal(self):
212 |         if self.has_object:
213 |             goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-self.target_range, self.target_range, size=3)
214 |             goal += self.target_offset
215 |             goal[2] = self.height_offset
216 |             if self.target_in_the_air and self.np_random.uniform() < 0.5:
217 |                 goal[2] += self.np_random.uniform(0, 0.45)
218 |         else:
219 |             goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-0.15, 0.15, size=3)
220 |         return goal.copy()
221 | 
222 |     def _is_success(self, achieved_goal, desired_goal):
223 |         d = goal_distance(achieved_goal, desired_goal)
224 |         return (d < self.distance_threshold).astype(np.float32)
225 | 
226 |     def _env_setup(self, initial_qpos):
227 |         for name, value in initial_qpos.items():
228 |             self.sim.data.set_joint_qpos(name, value)
229 |         utils.reset_mocap_welds(self.sim)
230 |         self.sim.forward()
231 | 
232 |         # Move end effector into position.
233 |         gripper_target = np.array([-0.498, 0.005, -0.431 + self.gripper_extra_height]) + self.sim.data.get_site_xpos('robot0:grip')
234 |         gripper_rotation = np.array([1., 0., 1., 0.])
235 |         self.sim.data.set_mocap_pos('robot0:mocap', gripper_target)
236 |         self.sim.data.set_mocap_quat('robot0:mocap', gripper_rotation)
237 |         for _ in range(10):
238 |             self.sim.step()
239 | 
240 |         # Extract information for sampling goals.
241 |         self.initial_gripper_xpos = self.sim.data.get_site_xpos('robot0:grip').copy()
242 |         if self.has_object:
243 |             self.height_offset = self.sim.data.get_site_xpos('object0')[2]
244 | 
245 |     def render(self, mode='human', width=500, height=500):
246 |         return super(RandomizedFetchEnv, self).render()
247 | 


--------------------------------------------------------------------------------
/common/randomizer/residual_push.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | 
  4 | import xml.etree.ElementTree as et
  5 | 
  6 | from .residual_envs.residual_fetch_push_env import ResidualSlipperyPushEnv
  7 | from gym.envs.robotics import rotations, robot_env, utils
  8 | import mujoco_py
  9 | 
 10 | def goal_distance(goal_a, goal_b):
 11 |     assert goal_a.shape == goal_b.shape
 12 |     return np.linalg.norm(goal_a - goal_b, axis=-1)
 13 | 
 14 | 
 15 | class RandomizedResidualPushEnv(ResidualSlipperyPushEnv):
 16 |     """Superclass for all Fetch environments.
 17 |     """
 18 | 
 19 |     def __init__(
 20 |             self, model_path, n_substeps, gripper_extra_height, block_gripper,
 21 |             has_object, target_in_the_air, target_offset, obj_range, target_range,
 22 |             distance_threshold, initial_qpos, reward_type, **kwargs
 23 |     ):
 24 |         """Initializes a new Fetch environment.
 25 | 
 26 |         Args:
 27 |             model_path (string): path to the environments XML file
 28 |             n_substeps (int): number of substeps the simulation runs on every call to step
 29 |             gripper_extra_height (float): additional height above the table when positioning the gripper
 30 |             block_gripper (boolean): whether or not the gripper is blocked (i.e. not movable) or not
 31 |             has_object (boolean): whether or not the environment has an object
 32 |             target_in_the_air (boolean): whether or not the target should be in the air above the table or on the table surface
 33 |             target_offset (float or array with 3 elements): offset of the target
 34 |             obj_range (float): range of a uniform distribution for sampling initial object positions
 35 |             target_range (float): range of a uniform distribution for sampling a target
 36 |             distance_threshold (float): the threshold after which a goal is considered achieved
 37 |             initial_qpos (dict): a dictionary of joint names and values that define the initial configuration
 38 |             reward_type ('sparse' or 'dense'): the reward type, i.e. sparse or dense
 39 |         """
 40 | 
 41 |         self.gripper_extra_height = gripper_extra_height
 42 |         self.block_gripper = block_gripper
 43 |         self.has_object = has_object
 44 |         self.target_in_the_air = target_in_the_air
 45 |         self.target_offset = target_offset
 46 |         self.obj_range = obj_range
 47 |         self.target_range = target_range
 48 |         self.distance_threshold = distance_threshold
 49 |         self.reward_type = reward_type
 50 | 
 51 |         super(RandomizedResidualPushEnv, self).__init__(
 52 |             model_path=model_path, n_substeps=n_substeps, n_actions=4,
 53 |             initial_qpos=initial_qpos)
 54 | 
 55 |         # randomization
 56 |         self.xml_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "assets")
 57 |         self.reference_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "assets",
 58 |                                            kwargs.get('xml_name'))
 59 |         self.reference_xml = et.parse(self.reference_path)
 60 |         self.config_file = kwargs.get('config')
 61 |         self.dimensions = []
 62 |         self.dimension_map = []
 63 |         self.suffixes = []
 64 |         self.reset()
 65 |         self._locate_randomize_parameters()
 66 | 
 67 | 
 68 |     # Randomization methods
 69 |     # ----------------------------
 70 |     def _locate_randomize_parameters(self):
 71 |         self.root = self.reference_xml.getroot()
 72 |         self.object_joints = self.root.findall(".//body[@name='object0']/joint")
 73 | 
 74 |     def _randomize_damping(self):
 75 |         damping = self.dimensions[0].current_value
 76 |         for joint in self.object_joints:
 77 |             joint.set('damping', '{:3f}'.format(damping))
 78 | 
 79 |     def _randomize_friction(self):
 80 |         current_friction = self.dimensions[0].current_value
 81 |         for i in range(len(self.fetch_env.env.sim.model.geom_friction)):
 82 |             self.fetch_env.env.sim.model.geom_friction[i] = [current_friction, 5.e-3, 1e-4]
 83 | 
 84 |     def _create_xml(self):
 85 |         self._randomize_friction()
 86 |         return et.tostring(self.root, encoding='unicode', method='xml')
 87 | 
 88 |     def update_randomized_params(self):
 89 |         self._randomize_friction()
 90 |         xml = self._create_xml()
 91 |         # # self._re_init(xml)
 92 | 
 93 |     def _re_init(self, xml):
 94 |         # TODO: Now, likely needs rank
 95 |         randomized_path = os.path.join(self.xml_dir, "pusher.xml")
 96 |         with open(randomized_path, 'wb') as fp:
 97 |             fp.write(xml.encode())
 98 |             fp.flush()
 99 | 
100 |         self.model = mujoco_py.load_model_from_path(randomized_path)
101 |         self.sim = mujoco_py.MjSim(self.model)
102 |         self.data = self.sim.data
103 |         self.init_qpos = self.data.qpos.ravel().copy()
104 |         self.init_qvel = self.data.qvel.ravel().copy()
105 | 
106 |         observation, _reward, done, _info = self.step(np.zeros(4))
107 |         assert not done
108 |         if self.viewer:
109 |             self.viewer.update_sim(self.sim)
110 | 
111 |     # GoalEnv methods
112 |     # ----------------------------
113 | 
114 |     def compute_reward(self, achieved_goal, goal, info):
115 |         # Compute distance between goal and the achieved goal.
116 |         d = goal_distance(achieved_goal, goal)
117 |         if self.reward_type == 'sparse':
118 |             return -(d > self.distance_threshold).astype(np.float32)
119 |         else:
120 |             return -d
121 | 
122 |     # RobotEnv methods
123 |     # ----------------------------
124 | 
125 |     def _step_callback(self):
126 |         if self.block_gripper:
127 |             self.sim.data.set_joint_qpos('robot0:l_gripper_finger_joint', 0.)
128 |             self.sim.data.set_joint_qpos('robot0:r_gripper_finger_joint', 0.)
129 |             self.sim.forward()
130 | 
131 |     def _set_action(self, action):
132 |         assert action.shape == (4,)
133 |         action = action.copy()  # ensure that we don't change the action outside of this scope
134 |         pos_ctrl, gripper_ctrl = action[:3], action[3]
135 | 
136 |         pos_ctrl *= 0.05  # limit maximum change in position
137 |         rot_ctrl = [1., 0., 1., 0.]  # fixed rotation of the end effector, expressed as a quaternion
138 |         gripper_ctrl = np.array([gripper_ctrl, gripper_ctrl])
139 |         assert gripper_ctrl.shape == (2,)
140 |         if self.block_gripper:
141 |             gripper_ctrl = np.zeros_like(gripper_ctrl)
142 |         action = np.concatenate([pos_ctrl, rot_ctrl, gripper_ctrl])
143 | 
144 |         # Apply action to simulation.
145 |         utils.ctrl_set_action(self.sim, action)
146 |         utils.mocap_set_action(self.sim, action)
147 |     
148 | 
149 |     def _get_obs(self):
150 |         # positions
151 |         grip_pos = self.sim.data.get_site_xpos('robot0:grip')
152 |         dt = self.sim.nsubsteps * self.sim.model.opt.timestep
153 |         grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
154 |         robot_qpos, robot_qvel = utils.robot_get_obs(self.sim)
155 |         if self.has_object:
156 |             object_pos = self.sim.data.get_site_xpos('object0')
157 |             # rotations
158 |             object_rot = rotations.mat2euler(self.sim.data.get_site_xmat('object0'))
159 |             # velocities
160 |             object_velp = self.sim.data.get_site_xvelp('object0') * dt
161 |             object_velr = self.sim.data.get_site_xvelr('object0') * dt
162 |             # gripper state
163 |             object_rel_pos = object_pos - grip_pos
164 |             object_velp -= grip_velp
165 |         else:
166 |             object_pos = object_rot = object_velp = object_velr = object_rel_pos = np.zeros(0)
167 |         gripper_state = robot_qpos[-2:]
168 |         gripper_vel = robot_qvel[-2:] * dt  # change to a scalar if the gripper is made symmetric
169 | 
170 |         if not self.has_object:
171 |             achieved_goal = grip_pos.copy()
172 |         else:
173 |             achieved_goal = np.squeeze(object_pos.copy())
174 |         obs = np.concatenate([
175 |             grip_pos, object_pos.ravel(), object_rel_pos.ravel(), gripper_state, object_rot.ravel(),
176 |             object_velp.ravel(), object_velr.ravel(), grip_velp, gripper_vel,
177 |         ])
178 | 
179 |         return {
180 |             'observation': obs.copy(),
181 |             'achieved_goal': achieved_goal.copy(),
182 |             'desired_goal': self.goal.copy(),
183 |         }
184 | 
185 |     # def _viewer_setup(self):
186 |     #     body_id = self.sim.model.body_name2id('robot0:gripper_link')
187 |     #     lookat = self.sim.data.body_xpos[body_id]
188 |     #     for idx, value in enumerate(lookat):
189 |     #         self.viewer.cam.lookat[idx] = value
190 |     #     self.viewer.cam.distance = 2.5
191 |     #     self.viewer.cam.azimuth = 132.
192 |     #     self.viewer.cam.elevation = -14.
193 | 
194 |     def _render_callback(self):
195 |         # Visualize target.
196 |         sites_offset = (self.sim.data.site_xpos - self.sim.model.site_pos).copy()
197 |         site_id = self.sim.model.site_name2id('target0')
198 |         self.sim.model.site_pos[site_id] = self.goal - sites_offset[0]
199 |         self.sim.forward()
200 | 
201 |     def _reset_sim(self):
202 |         self.sim.set_state(self.initial_state)
203 | 
204 |         # Randomize start position of object.
205 |         if self.has_object:
206 |             object_xpos = self.initial_gripper_xpos[:2]
207 |             while np.linalg.norm(object_xpos - self.initial_gripper_xpos[:2]) < 0.1:
208 |                 object_xpos = self.initial_gripper_xpos[:2] + self.np_random.uniform(-self.obj_range, self.obj_range,
209 |                                                                                      size=2)
210 |             object_qpos = self.sim.data.get_joint_qpos('object0:joint')
211 |             assert object_qpos.shape == (7,)
212 |             object_qpos[:2] = object_xpos
213 |             self.sim.data.set_joint_qpos('object0:joint', object_qpos)
214 | 
215 |         self.sim.forward()
216 |         return True
217 | 
218 |     def _sample_goal(self):
219 |         if self.has_object:
220 |             goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-self.target_range, self.target_range, size=3)
221 |             goal += self.target_offset
222 |             goal[2] = self.height_offset
223 |             if self.target_in_the_air and self.np_random.uniform() < 0.5:
224 |                 goal[2] += self.np_random.uniform(0, 0.45)
225 |         else:
226 |             goal = self.initial_gripper_xpos[:3] + self.np_random.uniform(-0.15, 0.15, size=3)
227 |         return goal.copy()
228 | 
229 |     def _is_success(self, achieved_goal, desired_goal):
230 |         d = goal_distance(achieved_goal, desired_goal)
231 |         return (d < self.distance_threshold).astype(np.float32)
232 | 
233 |     def _env_setup(self, initial_qpos):
234 |         for name, value in initial_qpos.items():
235 |             self.sim.data.set_joint_qpos(name, value)
236 |         utils.reset_mocap_welds(self.sim)
237 |         self.sim.forward()
238 | 
239 |         # Move end effector into position.
240 |         gripper_target = np.array([-0.498, 0.005, -0.431 + self.gripper_extra_height]) + self.sim.data.get_site_xpos(
241 |             'robot0:grip')
242 |         gripper_rotation = np.array([1., 0., 1., 0.])
243 |         self.sim.data.set_mocap_pos('robot0:mocap', gripper_target)
244 |         self.sim.data.set_mocap_quat('robot0:mocap', gripper_rotation)
245 |         for _ in range(10):
246 |             self.sim.step()
247 | 
248 |         # Extract information for sampling goals.
249 |         self.initial_gripper_xpos = self.sim.data.get_site_xpos('robot0:grip').copy()
250 |         if self.has_object:
251 |             self.height_offset = self.sim.data.get_site_xpos('object0')[2]
252 | 
253 |     def render(self, mode='human', width=500, height=500):
254 | 
255 |         return super(RandomizedResidualPushEnv, self).render()


--------------------------------------------------------------------------------
/common/svpg/svpg.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | 
  3 | import torch.optim as optim
  4 | from torch.distributions.kl import kl_divergence
  5 | 
  6 | from scipy.spatial.distance import squareform, pdist
  7 | 
  8 | from common.svpg.particles import SVPGParticle
  9 | from common.svpg.svpg_utils import *
 10 | 
 11 | 
 12 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 13 | torch.set_default_tensor_type(torch.FloatTensor)
 14 | eps = np.finfo(np.float32).eps.item()
 15 | 
 16 | LEARNING_RATE = 3e-4
 17 | HIDDEN_DIMENSIONS = 100
 18 | 
 19 | class SVPG:
 20 |     """Class that implements Stein Variational Policy Gradient
 21 |     Input is the current randomization settings, and output is either a 
 22 |     direction to move in (Discrete - for 1D/2D) or a delta across all parameter (Continuous)
 23 |     """
 24 |     def __init__(self, nparticles, nparams, max_step_length, svpg_rollout_length, svpg_horizon, temperature, discrete=False, kld_coefficient=0.0):
 25 |         self.particles = []
 26 |         self.prior_particles = []
 27 |         self.optimizers = []
 28 |         self.max_step_length = max_step_length
 29 |         self.svpg_rollout_length = svpg_rollout_length
 30 |         self.svpg_horizon = svpg_horizon
 31 |         self.temperature = temperature 
 32 |         self.nparticles = nparticles
 33 |         self.gamma = 0.99
 34 | 
 35 |         self.nparams = nparams
 36 |         self.noutputs = nparams * 2 if discrete else nparams
 37 |         self.discrete = discrete
 38 |         self.kld_coefficient = kld_coefficient
 39 | 
 40 |         self.last_states = np.random.uniform(0, 1, (self.nparticles, self.nparams))
 41 |         self.timesteps = np.zeros(self.nparticles)
 42 | 
 43 |         for i in range(self.nparticles):
 44 |             # Initialize each of the individual particles
 45 |             policy = SVPGParticle(input_dim=self.nparams,
 46 |                                   output_dim=self.noutputs,
 47 |                                   hidden_dim=HIDDEN_DIMENSIONS,
 48 |                                   discrete=discrete).to(device)
 49 | 
 50 |             prior_policy = SVPGParticle(input_dim=self.nparams,
 51 |                                   output_dim=self.noutputs,
 52 |                                   hidden_dim=HIDDEN_DIMENSIONS,
 53 |                                   discrete=discrete,
 54 |                                   freeze=True).to(device)
 55 | 
 56 |             optimizer = optim.Adam(policy.parameters(), lr=LEARNING_RATE)
 57 |             self.particles.append(policy)
 58 |             self.prior_particles.append(prior_policy)
 59 |             self.optimizers.append(optimizer)
 60 | 
 61 |     def _squared_dist(self, X):
 62 |         # Compute squared distance matrix using torch.pdist
 63 |         dists = torch.pdist(X)
 64 |         inds_l, inds_r = np.triu_indices(X.shape[-2], 1)
 65 |         res = torch.zeros(*X.shape[:-2], X.shape[-2], X.shape[-2], dtype=X.dtype, device=X.device)
 66 |         res[..., inds_l, inds_r] = dists
 67 |         res[..., inds_r, inds_l] = dists
 68 | 
 69 |         return res
 70 | 
 71 |     def _Kxx_dxKxx(self, X):
 72 |         """
 73 |         Computes covariance matrix K(X,X) and its gradient w.r.t. X
 74 |         for RBF kernel with design matrix X, as in the second term in eqn (8)
 75 |         of reference SVGD paper.
 76 | 
 77 |         Args:
 78 |             X (Tensor): (S, P), design matrix of samples, where S is num of
 79 |                 samples, P is the dim of each sample which stacks all params
 80 |                 into a (1, P) row. Thus P could be 1 millions.
 81 |         """
 82 | 
 83 |         X_np = X.cpu().data.numpy()
 84 |         pairwise_dists = squareform(pdist(X_np))**2
 85 | 
 86 |         # Median trick
 87 |         h = np.median(pairwise_dists)  
 88 |         h = np.sqrt(0.5 * h / np.log(self.nparticles+1))
 89 | 
 90 |         # Compute RBF Kernel
 91 |         Kxx = torch.exp(-torch.from_numpy(pairwise_dists).to(device).float() / h**2 / 2)
 92 | 
 93 |         # Compute kernel gradient
 94 |         dxKxx = -(Kxx).matmul(X)
 95 |         sumKxx = Kxx.sum(1)
 96 |         for i in range(X.shape[1]):
 97 |             dxKxx[:, i] = dxKxx[:, i] + X[:, i].matmul(sumKxx)
 98 |         dxKxx /= (h ** 2)
 99 | 
100 |         return Kxx, dxKxx
101 | 
102 |     def select_action(self, policy_idx, state):
103 |         state = torch.from_numpy(state).float().unsqueeze(0).to(device)
104 |         policy = self.particles[policy_idx]
105 |         prior_policy = self.prior_particles[policy_idx]
106 |         dist, value = policy(state)
107 |         prior_dist, _ = prior_policy(state)
108 | 
109 |         action = dist.sample()               
110 |             
111 |         policy.saved_log_probs.append(dist.log_prob(action))
112 |         policy.saved_klds.append(kl_divergence(dist, prior_dist))
113 |         
114 |         # TODO: Gross temporary hack
115 |         if self.nparams == 1 or self.discrete:
116 |             action = action.item()
117 |         else:
118 |             action = action.squeeze().cpu().detach().numpy()
119 | 
120 |         return action, value
121 | 
122 |     def compute_returns(self, next_value, rewards, masks, klds):
123 |         R = next_value 
124 |         returns = []
125 |         for step in reversed(range(len(rewards))):
126 |             # Eq. 80: https://arxiv.org/abs/1704.06440
127 |             R = self.gamma * masks[step] * R + (rewards[step] - self.kld_coefficient * klds[step])
128 |             returns.insert(0, R)
129 | 
130 |         return returns
131 | 
132 |     def step(self):
133 |         """Rollout trajectories, starting from random initializations,
134 |         of randomization settings, each of svpg_rollout_length size
135 |         Then, send it to agent for further training and reward calculation
136 |         """
137 |         self.simulation_instances = np.zeros((self.nparticles, self.svpg_rollout_length, self.nparams))
138 | 
139 |         # Store the values of each state - for advantage estimation
140 |         self.values = torch.zeros((self.nparticles, self.svpg_rollout_length, 1)).float().to(device)
141 |         # Store the last states for each particle (calculating rewards)
142 |         self.masks = np.ones((self.nparticles, self.svpg_rollout_length))
143 | 
144 |         for i in range(self.nparticles):
145 |             self.particles[i].reset()
146 |             current_sim_params = self.last_states[i]
147 | 
148 |             for t in range(self.svpg_rollout_length):
149 |                 self.simulation_instances[i][t] = current_sim_params
150 | 
151 |                 action, value = self.select_action(i, current_sim_params)  
152 |                 self.values[i][t] = value
153 |                 
154 |                 action = self._process_action(action)          
155 |                 clipped_action = action * self.max_step_length
156 |                 next_params = np.clip(current_sim_params + clipped_action, 0, 1)
157 | 
158 |                 if np.array_equal(next_params, current_sim_params) or self.timesteps[i] + 1 == self.svpg_horizon:
159 |                     next_params = np.random.uniform(0, 1, (self.nparams,))
160 |                     
161 |                     self.masks[i][t] = 0 # done = True
162 |                     self.timesteps[i] = 0
163 | 
164 |                 current_sim_params = next_params
165 |                 self.timesteps[i] += 1
166 | 
167 |             self.last_states[i] = current_sim_params
168 | 
169 |         return np.array(self.simulation_instances)
170 | 
171 |     def train(self, simulator_rewards):
172 |         """Train SVPG agent with rewards from rollouts
173 |         """
174 |         policy_grads = []
175 |         parameters = []
176 | 
177 |         for i in range(self.nparticles):
178 |             policy_grad_particle = []
179 |             
180 |             # Calculate the value of last state - for Return Computation
181 |             _, next_value = self.select_action(i, self.last_states[i]) 
182 | 
183 |             particle_rewards = torch.from_numpy(simulator_rewards[i]).float().to(device)
184 |             masks = torch.from_numpy(self.masks[i]).float().to(device)
185 |             
186 |             # Calculate entropy-augmented returns, advantages
187 |             returns = self.compute_returns(next_value, particle_rewards, masks, self.particles[i].saved_klds)
188 |             returns = torch.cat(returns).detach()
189 |             advantages = returns - self.values[i]
190 |             # for s, v, r in zip(self.simulation_instances[i], self.values[i], simulator_rewards[i]):
191 |             #     print('Setting: {}, Reward: {}, Value: {}'.format(s,r,v))
192 | 
193 |             # logprob * A = policy gradient (before backwards)
194 |             for log_prob, advantage in zip(self.particles[i].saved_log_probs, advantages):
195 |                 policy_grad_particle.append(log_prob * advantage.detach())
196 | 
197 |             # Compute value loss, update critic
198 |             self.optimizers[i].zero_grad()
199 |             critic_loss = 0.5 * advantages.pow(2).mean()
200 |             critic_loss.backward(retain_graph=True)
201 |             self.optimizers[i].step()
202 | 
203 |             # Store policy gradients for SVPG update
204 |             self.optimizers[i].zero_grad()
205 |             policy_grad = -torch.cat(policy_grad_particle).mean()
206 |             policy_grad.backward()
207 | 
208 |             # Vectorize parameters and PGs
209 |             vec_param, vec_policy_grad = parameters_to_vector(
210 |                 self.particles[i].parameters(), both=True)
211 | 
212 |             policy_grads.append(vec_policy_grad.unsqueeze(0))
213 |             parameters.append(vec_param.unsqueeze(0))
214 | 
215 |         # calculating the kernel matrix and its gradients
216 |         parameters = torch.cat(parameters)
217 |         Kxx, dxKxx = self._Kxx_dxKxx(parameters)
218 | 
219 |         policy_grads = 1 / self.temperature * torch.cat(policy_grads)
220 |         grad_logp = torch.mm(Kxx, policy_grads)
221 | 
222 |         grad_theta = (grad_logp + dxKxx) / self.nparticles
223 |         # explicitly deleting variables does not release memory :(
224 | 
225 |         # update param gradients
226 |         for i in range(self.nparticles):
227 |             vector_to_parameters(grad_theta[i],
228 |                  self.particles[i].parameters(), grad=True)
229 |             self.optimizers[i].step()
230 | 
231 |     def save(self, directory):
232 |         for i in range(self.nparticles):
233 |             torch.save(self.particles[i].state_dict(), '{}/particle_{}.pth'.format(directory, i))
234 |             torch.save(self.particles[i].critic.state_dict(), '{}/particle_critic{}.pth'.format(directory, i))
235 | 
236 |     def load(self, directory):
237 |         return # TODO: Does this solve the value function issue?
238 |         for i in range(self.nparticles):
239 |             prior = torch.load('{}/particle_{}.pth'.format(directory, i))
240 |             particle = self.particles[i].state_dict()
241 |             actor = {k: v for k, v in prior.items() if k.find('critic') == -1}
242 | 
243 |             # Only load the actor!
244 |             self.particles[i].load_state_dict(actor, strict=False)
245 | 
246 |     def load_prior_particles(self, directory):
247 |         for i in range(self.nparticles):
248 |             self.prior_particles[i].load_state_dict(torch.load('{}/particle_{}.pth'.format(directory, i), map_location=device))
249 |             self.prior_particles[i].freeze()
250 | 
251 |     def _process_action(self, action):
252 |         """Transform policy output into environment-action
253 |         """
254 |         if self.discrete:
255 |             if self.nparams == 1:
256 |                 if action == 0:
257 |                     action = [-1.]
258 |                 elif action == 1:
259 |                     action = [1.]
260 |             elif self.nparams == 2:
261 |                 if action == 0:
262 |                     action = [-1., 0]
263 |                 elif action == 1:
264 |                     action = [1., 0]
265 |                 elif action == 2:
266 |                     action = [0, -1.]
267 |                 elif action == 3:
268 |                     action = [0, 1.]
269 |         else:
270 |             if isinstance(action, float):
271 |                 action = np.clip(action, -1, 1)
272 |             else:
273 |                 action /= np.linalg.norm(action, ord=2)
274 | 
275 |         return np.array(action)
276 | 


--------------------------------------------------------------------------------
/common/randomizer/fetch_hook_env.py:
--------------------------------------------------------------------------------
  1 | from gym.envs.robotics import rotations, fetch_env
  2 | from gym import utils, spaces
  3 | import numpy as np
  4 | import os
  5 | 
  6 | from .controllers.hook_controller import get_hook_control
  7 | 
  8 | import pdb
  9 | 
 10 | DIR_PATH = os.path.dirname(os.path.abspath(__file__))
 11 | 
 12 | 
 13 | class FetchHookEnv(fetch_env.FetchEnv, utils.EzPickle):
 14 |     def __init__(self, xml_file=None):
 15 |         initial_qpos = {
 16 |             'robot0:slide0': 0.405,
 17 |             'robot0:slide1': 0.48,
 18 |             'robot0:slide2': 0.0,
 19 |             'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
 20 |             'hook:joint': [1.35, 0.35, 0.4, 1., 0., 0., 0.],
 21 |         }
 22 | 
 23 |         if xml_file is None:
 24 |             xml_file = os.path.join(DIR_PATH, 'assets_residual', 'hook.xml')
 25 | 
 26 |         self._goal_pos = np.array([1.65, 0.75, 0.42])
 27 |         self._object_xpos = np.array([1.8, 0.75])
 28 | 
 29 |         fetch_env.FetchEnv.__init__(
 30 |             self, xml_file, has_object=True, block_gripper=False, n_substeps=20,
 31 |             gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
 32 |             obj_range=None, target_range=None, distance_threshold=0.05,
 33 |             initial_qpos=initial_qpos, reward_type='sparse')
 34 | 
 35 |         utils.EzPickle.__init__(self)
 36 | 
 37 |     def render(self, mode="human", *args, **kwargs):
 38 |         # See https://github.com/openai/gym/issues/1081
 39 |         self._render_callback()
 40 |         if mode == 'rgb_array':
 41 |             self._get_viewer().render()
 42 |             width, height = 3350, 1800
 43 |             data = self._get_viewer().read_pixels(width, height, depth=False)
 44 |             # original image is upside-down, so flip it
 45 |             return data[::-1, :, :]
 46 |         elif mode == 'human':
 47 |             self._get_viewer().render()
 48 | 
 49 |         return super(FetchHookEnv, self).render(*args, **kwargs)
 50 | 
 51 |     def _sample_goal(self):
 52 |         goal_pos = self._goal_pos.copy()
 53 |         goal_pos[:2] += self.np_random.uniform(-0.05, 0.05)
 54 |         return goal_pos
 55 | 
 56 |     def _viewer_setup(self):
 57 |         body_id = self.sim.model.body_name2id('robot0:gripper_link')
 58 |         lookat = self.sim.data.body_xpos[body_id]
 59 |         for idx, value in enumerate(lookat):
 60 |             self.viewer.cam.lookat[idx] = value
 61 |         self.viewer.cam.distance = 2.5
 62 |         self.viewer.cam.azimuth = 180.
 63 |         self.viewer.cam.elevation = -24.
 64 | 
 65 |     def _reset_sim(self):
 66 |         self.sim.set_state(self.initial_state)
 67 | 
 68 |         object_xpos_x = 1.65 + self.np_random.uniform(-0.05, 0.05)
 69 |         while True:
 70 |             object_xpos_x = 1.8 + self.np_random.uniform(-0.05, 0.10)
 71 |             object_xpos_y = 0.75 + self.np_random.uniform(-0.05, 0.05)
 72 |             if (object_xpos_x - self._goal_pos[0]) ** 2 + (object_xpos_y - self._goal_pos[1]) ** 2 >= 0.01:
 73 |                 break
 74 |         self._object_xpos = np.array([object_xpos_x, object_xpos_y])
 75 | 
 76 |         object_qpos = self.sim.data.get_joint_qpos('object0:joint')
 77 |         assert object_qpos.shape == (7,)
 78 |         object_qpos[:2] = self._object_xpos
 79 |         self.sim.data.set_joint_qpos('object0:joint', object_qpos)
 80 | 
 81 |         self.sim.forward()
 82 |         return True
 83 | 
 84 |     def _get_obs(self):
 85 |         obs = fetch_env.FetchEnv._get_obs(self)
 86 | 
 87 |         grip_pos = self.sim.data.get_site_xpos('robot0:grip')
 88 |         dt = self.sim.nsubsteps * self.sim.model.opt.timestep
 89 |         grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
 90 | 
 91 |         hook_pos = self.sim.data.get_site_xpos('hook')
 92 |         # rotations
 93 |         hook_rot = rotations.mat2euler(self.sim.data.get_site_xmat('hook'))
 94 |         # velocities
 95 |         hook_velp = self.sim.data.get_site_xvelp('hook') * dt
 96 |         hook_velr = self.sim.data.get_site_xvelr('hook') * dt
 97 |         # gripper state
 98 |         hook_rel_pos = hook_pos - grip_pos
 99 |         hook_velp -= grip_velp
100 | 
101 |         hook_observation = np.concatenate([hook_pos, hook_rot, hook_velp, hook_velr, hook_rel_pos])
102 | 
103 |         obs['observation'] = np.concatenate([obs['observation'], hook_observation])
104 | 
105 |         return obs
106 | 
107 |     def _noisify_obs(self, obs, noise=1.):
108 |         return obs + np.random.normal(0, noise, size=obs.shape)
109 | 
110 | 
111 | class ResidualFetchHookEnv(FetchHookEnv):
112 | 
113 |     def step(self, residual_action):
114 |         residual_action = 2. * residual_action
115 | 
116 |         action = np.add(residual_action, get_hook_control(self._last_observation))
117 |         action = np.clip(action, -1, 1)
118 |         observation, reward, done, debug_info = FetchHookEnv.step(self, action)
119 | 
120 |         self._last_observation = observation
121 | 
122 |         return observation, reward, done, debug_info
123 | 
124 |     def compute_reward(self, *args, **kwargs):
125 |         return FetchHookEnv.compute_reward(self, *args, **kwargs)
126 | 
127 |     def reset(self):
128 |         observation = FetchHookEnv.reset(self)
129 |         self._last_observation = observation
130 | 
131 |         return observation
132 | 
133 | 
134 | class NoisyFetchHookEnv(FetchHookEnv):
135 | 
136 |     def _get_obs(self):
137 |         obs = fetch_env.FetchEnv._get_obs(self)
138 | 
139 |         grip_pos = self.sim.data.get_site_xpos('robot0:grip')
140 |         dt = self.sim.nsubsteps * self.sim.model.opt.timestep
141 |         grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
142 | 
143 |         hook_pos = self.sim.data.get_site_xpos('hook')
144 |         hook_pos = self._noisify_obs(hook_pos, noise=0.025)
145 |         # rotations
146 |         hook_rot = rotations.mat2euler(self.sim.data.get_site_xmat('hook'))
147 |         hook_rot = self._noisify_obs(hook_rot, noise=0.025)
148 |         # velocities
149 |         hook_velp = self.sim.data.get_site_xvelp('hook') * dt
150 |         hook_velr = self.sim.data.get_site_xvelr('hook') * dt
151 |         # gripper state
152 |         hook_rel_pos = hook_pos - grip_pos
153 |         hook_velp -= grip_velp
154 | 
155 |         hook_observation = np.concatenate([hook_pos, hook_rot, hook_velp, hook_velr, hook_rel_pos])
156 | 
157 |         obs['observation'] = np.concatenate([obs['observation'], hook_observation])
158 |         obs['observation'][3:5] = self._noisify_obs(obs['observation'][3:5], noise=0.025)
159 |         obs['observation'][6:9] = obs['observation'][3:6] - obs['observation'][:3]  # object_pos - grip_pos
160 |         obs['observation'][12:15] = self._noisify_obs(obs['observation'][6:9], noise=0.025)
161 |         return obs
162 | 
163 |     def _noisify_obs(self, obs, noise=1.):
164 |         return obs + np.random.normal(0, noise, size=obs.shape)
165 | 
166 |     def compute_reward(self, *args, **kwargs):
167 |         return FetchHookEnv.compute_reward(self, *args, **kwargs)
168 | 
169 |     def reset(self):
170 |         observation = FetchHookEnv.reset(self)
171 |         self._last_observation = observation
172 | 
173 |         return observation
174 | 
175 | 
176 | class TwoFrameResidualHookNoisyEnv(FetchHookEnv):
177 |     def __init__(self, xml_file=None):
178 |         super(TwoFrameResidualHookNoisyEnv, self).__init__()
179 |         self.observation_space.spaces['observation'] = spaces.Box(low=np.hstack(
180 |             (self.observation_space.spaces['observation'].low, self.observation_space.spaces['observation'].low)),
181 |                                                                   high=np.hstack((self.observation_space.spaces[
182 |                                                                                       'observation'].high,
183 |                                                                                   self.observation_space.spaces[
184 |                                                                                       'observation'].high)),
185 |                                                                   dtype=np.float32)
186 | 
187 |     def _get_obs(self):
188 |         obs = fetch_env.FetchEnv._get_obs(self)
189 | 
190 |         grip_pos = self.sim.data.get_site_xpos('robot0:grip')
191 |         dt = self.sim.nsubsteps * self.sim.model.opt.timestep
192 |         grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
193 | 
194 |         hook_pos = self.sim.data.get_site_xpos('hook')
195 |         hook_pos = self._noisify_obs(hook_pos, noise=0.025)
196 |         # rotations
197 |         hook_rot = rotations.mat2euler(self.sim.data.get_site_xmat('hook'))
198 |         hook_rot = self._noisify_obs(hook_rot, noise=0.025)
199 |         # velocities
200 |         hook_velp = self.sim.data.get_site_xvelp('hook') * dt
201 |         hook_velr = self.sim.data.get_site_xvelr('hook') * dt
202 |         # gripper state
203 |         hook_rel_pos = hook_pos - grip_pos
204 |         hook_velp -= grip_velp
205 | 
206 |         hook_observation = np.concatenate([hook_pos, hook_rot, hook_velp, hook_velr, hook_rel_pos])
207 | 
208 |         obs['observation'] = np.concatenate([obs['observation'], hook_observation])
209 |         obs['observation'][3:5] = self._noisify_obs(obs['observation'][3:5], noise=0.025)
210 |         obs['observation'][6:9] = obs['observation'][3:6] - obs['observation'][:3]  # object_pos - grip_pos
211 |         obs['observation'][12:15] = self._noisify_obs(obs['observation'][6:9], noise=0.025)
212 |         return obs
213 | 
214 |     def step(self, residual_action):
215 |         residual_action = 2. * residual_action
216 | 
217 |         action = np.add(residual_action, get_hook_control(self._last_observation))
218 |         action = np.clip(action, -1, 1)
219 |         observation, reward, done, debug_info = FetchHookEnv.step(self, action)
220 | 
221 |         obs_out = observation.copy()
222 |         obs_out['observation'] = np.hstack((self._last_observation['observation'], observation['observation']))
223 |         self._last_observation = observation
224 | 
225 |         return obs_out, reward, done, debug_info
226 | 
227 |     def reset(self):
228 |         observation = FetchHookEnv.reset(self)
229 |         self._last_observation = observation.copy()
230 |         observation['observation'] = np.hstack((self._last_observation['observation'], observation['observation']))
231 |         return observation
232 | 
233 |     def _noisify_obs(self, obs, noise=1.):
234 |         return obs + np.random.normal(0, noise, size=obs.shape)
235 | 
236 | 
237 | class TwoFrameHookNoisyEnv(FetchHookEnv):
238 |     def __init__(self, xml_file=None):
239 |         super(TwoFrameHookNoisyEnv, self).__init__()
240 |         self.observation_space.spaces['observation'] = spaces.Box(low=np.hstack(
241 |             (self.observation_space.spaces['observation'].low, self.observation_space.spaces['observation'].low)),
242 |                                                                   high=np.hstack((self.observation_space.spaces[
243 |                                                                                       'observation'].high,
244 |                                                                                   self.observation_space.spaces[
245 |                                                                                       'observation'].high)),
246 |                                                                   dtype=np.float32)
247 | 
248 |     def _get_obs(self):
249 |         obs = fetch_env.FetchEnv._get_obs(self)
250 | 
251 |         grip_pos = self.sim.data.get_site_xpos('robot0:grip')
252 |         dt = self.sim.nsubsteps * self.sim.model.opt.timestep
253 |         grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
254 | 
255 |         hook_pos = self.sim.data.get_site_xpos('hook')
256 |         hook_pos = self._noisify_obs(hook_pos, noise=0.025)
257 |         # rotations
258 |         hook_rot = rotations.mat2euler(self.sim.data.get_site_xmat('hook'))
259 |         hook_rot = self._noisify_obs(hook_rot, noise=0.025)
260 |         # velocities
261 |         hook_velp = self.sim.data.get_site_xvelp('hook') * dt
262 |         hook_velr = self.sim.data.get_site_xvelr('hook') * dt
263 |         # gripper state
264 |         hook_rel_pos = hook_pos - grip_pos
265 |         hook_velp -= grip_velp
266 | 
267 |         hook_observation = np.concatenate([hook_pos, hook_rot, hook_velp, hook_velr, hook_rel_pos])
268 | 
269 |         obs['observation'] = np.concatenate([obs['observation'], hook_observation])
270 |         obs['observation'][3:5] = self._noisify_obs(obs['observation'][3:5], noise=0.025)
271 |         obs['observation'][6:9] = obs['observation'][3:6] - obs['observation'][:3]  # object_pos - grip_pos
272 |         obs['observation'][12:15] = self._noisify_obs(obs['observation'][6:9], noise=0.025)
273 |         return obs
274 | 
275 |     def step(self, action):
276 |         observation, reward, done, debug_info = FetchHookEnv.step(self, action)
277 | 
278 |         obs_out = observation.copy()
279 |         obs_out['observation'] = np.hstack((self._last_observation['observation'], observation['observation']))
280 |         self._last_observation = observation
281 | 
282 |         return obs_out, reward, done, debug_info
283 | 
284 |     def reset(self):
285 |         observation = FetchHookEnv.reset(self)
286 |         self._last_observation = observation.copy()
287 |         observation['observation'] = np.hstack((self._last_observation['observation'], observation['observation']))
288 |         return observation
289 | 
290 |     def _noisify_obs(self, obs, noise=1.):
291 |         return obs + np.random.normal(0, noise, size=obs.shape)
292 | 
293 | 
294 | class NoisyResidualFetchHookEnv(FetchHookEnv):
295 | 
296 |     def step(self, residual_action):
297 |         residual_action = 2. * residual_action
298 | 
299 |         action = np.add(residual_action, get_hook_control(self._last_observation))
300 |         action = np.clip(action, -1, 1)
301 |         observation, reward, done, debug_info = FetchHookEnv.step(self, action)
302 | 
303 |         self._last_observation = observation
304 | 
305 |         return observation, reward, done, debug_info
306 | 
307 |     def _get_obs(self):
308 |         obs = fetch_env.FetchEnv._get_obs(self)
309 | 
310 |         grip_pos = self.sim.data.get_site_xpos('robot0:grip')
311 |         dt = self.sim.nsubsteps * self.sim.model.opt.timestep
312 |         grip_velp = self.sim.data.get_site_xvelp('robot0:grip') * dt
313 | 
314 |         hook_pos = self.sim.data.get_site_xpos('hook')
315 |         hook_pos = self._noisify_obs(hook_pos, noise=0.025)
316 |         # rotations
317 |         hook_rot = rotations.mat2euler(self.sim.data.get_site_xmat('hook'))
318 |         hook_rot = self._noisify_obs(hook_rot, noise=0.025)
319 |         # velocities
320 |         hook_velp = self.sim.data.get_site_xvelp('hook') * dt
321 |         hook_velr = self.sim.data.get_site_xvelr('hook') * dt
322 |         # gripper state
323 |         hook_rel_pos = hook_pos - grip_pos
324 |         hook_velp -= grip_velp
325 | 
326 |         hook_observation = np.concatenate([hook_pos, hook_rot, hook_velp, hook_velr, hook_rel_pos])
327 | 
328 |         obs['observation'] = np.concatenate([obs['observation'], hook_observation])
329 |         obs['observation'][3:5] = self._noisify_obs(obs['observation'][3:5], noise=0.025)
330 |         obs['observation'][6:9] = obs['observation'][3:6] - obs['observation'][:3]  # object_pos - grip_pos
331 |         obs['observation'][12:15] = self._noisify_obs(obs['observation'][6:9], noise=0.025)
332 |         return obs
333 | 
334 |     def _noisify_obs(self, obs, noise=1.):
335 |         return obs + np.random.normal(0, noise, size=obs.shape)
336 | 
337 |     def compute_reward(self, *args, **kwargs):
338 |         return FetchHookEnv.compute_reward(self, *args, **kwargs)
339 | 
340 |     def reset(self):
341 |         observation = FetchHookEnv.reset(self)
342 |         self._last_observation = observation
343 | 
344 |         return observation
345 | 


--------------------------------------------------------------------------------