├── LICENSE
├── ReadMe.md
├── requirements.txt
├── robosuite-extra
├── robosuite_extra
│ ├── __init__.py
│ ├── assets
│ │ ├── ReadMe.md
│ │ ├── grippers
│ │ │ ├── meshes
│ │ │ │ ├── electric_gripper_base.STL
│ │ │ │ ├── paddle_tip.STL
│ │ │ │ └── standard_narrow.STL
│ │ │ └── slide_panel_gripper.xml
│ │ ├── robot
│ │ │ └── sawyer
│ │ │ │ ├── meshes
│ │ │ │ ├── base.stl
│ │ │ │ ├── head.stl
│ │ │ │ ├── l0.stl
│ │ │ │ ├── l1.stl
│ │ │ │ ├── l2.stl
│ │ │ │ ├── l3.stl
│ │ │ │ ├── l4.stl
│ │ │ │ ├── l5.stl
│ │ │ │ ├── l6.stl
│ │ │ │ └── pedestal.stl
│ │ │ │ ├── robot.xml
│ │ │ │ └── robot_torque.xml
│ │ └── urdf
│ │ │ └── sawyer_intera.urdf
│ ├── controllers
│ │ ├── __init__.py
│ │ └── sawyer_eef_velocity_controller.py
│ ├── env_base
│ │ ├── __init__.py
│ │ ├── base.py
│ │ └── sawyer.py
│ ├── models
│ │ ├── __init__.py
│ │ ├── generated_objects.py
│ │ ├── grippers
│ │ │ ├── __init__.py
│ │ │ ├── gripper_factory.py
│ │ │ ├── pushing_gripper.py
│ │ │ └── slide_panel_gripper.py
│ │ ├── robot
│ │ │ ├── __init__.py
│ │ │ └── sawyer_robot.py
│ │ └── tasks
│ │ │ ├── __init__.py
│ │ │ └── placement_sampler.py
│ ├── push_env
│ │ ├── __init__.py
│ │ ├── demo.py
│ │ ├── push_task.py
│ │ └── sawyer_push.py
│ ├── reach_env
│ │ ├── __init__.py
│ │ ├── demo.py
│ │ ├── reach_task.py
│ │ └── sawyer_reach.py
│ ├── slide_env
│ │ ├── __init__.py
│ │ ├── demo.py
│ │ ├── sawyer_slide.py
│ │ └── slide_task.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── domain_randomisation_utils.py
│ │ └── transform_utils.py
│ └── wrappers
│ │ ├── __init__.py
│ │ ├── eef_velocy_control_wrapper.py
│ │ └── gym_wrapper.py
└── setup.py
├── sim2real-calibration-characterisation
├── __init__.py
├── setup.py
└── sim2real_calibration_characterisation
│ ├── __init__.py
│ ├── sim2real_characterisation
│ ├── __init__.py
│ ├── pushing_characterisation.py
│ ├── reaching_characterisation.py
│ └── sliding_characterisation.py
│ ├── simulation_calibration
│ ├── __init__.py
│ └── calibrate_internal_dynamics.py
│ └── utils
│ ├── __init__.py
│ └── logger.py
└── sim2real-policies
├── setup.py
└── sim2real_policies
├── __init__.py
├── after_training_tests
├── epi_policy_ablation.py
├── epi_utils.py
├── network_loading.py
├── pushing_test.py
├── reaching_test.py
├── run_all_tests.py
├── sliding_test.py
├── tsne_for_epi.py
├── tsne_for_epi_pushing.py
└── uposi_policy_ablation.py
├── lstm_td3
├── lstm_td3_multiprocess.py
└── plot.py
├── ppo
├── default_params.py
└── ppo_multiprocess.py
├── sys_id
├── common
│ ├── nets.py
│ ├── operations.py
│ └── utils.py
├── env_probing_interaction
│ ├── dataset_collection.py
│ ├── epi.py
│ └── understand_separation_loss.ipynb
└── universal_policy_online_system_identification
│ ├── evaluate.py
│ ├── osi.py
│ ├── osi_class.py
│ └── uposi_separate_td3_multiprocess.py
├── td3
├── default_params.py
└── td3_multiprocess.py
└── utils
├── __init__.py
├── buffers.py
├── choose_env.py
├── envs.py
├── evaluate.py
├── initialize.py
├── load_params.py
├── logger.py
├── optimizers.py
├── policy_networks.py
├── rl_utils.py
└── value_networks.py
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 The Robot Learning Lab at Imperial College London
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/ReadMe.md:
--------------------------------------------------------------------------------
1 | # Crossing the Gap: A Deep Dive into Zero-Shot Sim-to-Real Transfer for Dynamics
2 |
3 | This Repository contains the code used for training the manipulation policies in simulation,
4 | before deploying it to the real world using the ros package found here: https://github.com/eugval/sim2real_dynamics_robot
5 |
6 | ### Dependencies
7 | In order to run this project, the following dependencies need to be satisfied.
8 |
9 | Mujoco : http://www.mujoco.org/ \
10 | Openai gym : https://github.com/openai/gym \
11 | Openai mujoco-py : https://github.com/openai/mujoco-py \
12 | urdf_parser_py : https://github.com/ros/urdf_parser_py \
13 | PyKDL : If the pip version does not work, it will need to be compiled from scratch by following the instructions
14 | at https://github.com/orocos/orocos_kinematics_dynamics/issues/115 \
15 | Robosuite (version 0.1.0): https://github.com/StanfordVL/robosuite
16 |
17 |
18 | The following forked repositories:
19 | kdl_parser, the branch fixing_setup_py from https://github.com/eugval/kdl_parser/tree/fixing_setup_py
20 | simple-pid, the branch allow arrays from https://github.com/eugval/simple-pid/tree/allow_arrays
21 |
22 | Simpler dependencies are listed (that can be recursively installed) in requirements.txt.
23 |
24 | ### Project Structure
25 | Each of the following need to be installed independently by navigating to the corresponding folder and running ```pip install -e .```.
26 |
27 | ``robosuite-extra`` implements extra environments and functionalities for the robosuite framework
28 |
29 | ``sim2real-policies`` implements the methods for training and evaluating the different sim2real polcies.
30 |
31 | ``reality-calibration-characterisation`` implements code to identify and characterise the different sources of error
32 | when transferring from simulation to the real world for the given tasks, as well as scripts to optimise simulator
33 | parameters.
34 |
35 |
36 | ### Acknowedgments
37 |
38 | Our environments are created using Robosuite (https://github.com/ARISE-Initiative/robosuite), part of which we modified in robosuite-extra to suite our project.
39 |
40 | If you are using this code in your work, please also consider citing our paper:
41 |
42 | ```
43 | @inproceedings{
44 |
45 | valassakis2020crossing,
46 |
47 | title={Crossing the Gap: A Deep Dive into Zero-Shot Sim-to-Real Transfer for Dynamics},
48 |
49 | author={Valassakis, Eugene and Ding, Zihan and Johns, Edward},
50 |
51 | booktitle={International Conference on Intelligent Robots and Systems (IROS)},
52 |
53 | year={2020}
54 |
55 | }
56 | ```
57 |
58 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | torch
3 | matplotlib
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/__init__.py
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/ReadMe.md:
--------------------------------------------------------------------------------
1 | ### Disclaimer
2 |
3 | Most assets have been taken/modified from Robosuite (version 0.1.0 , https://github.com/ARISE-Initiative/robosuite)
4 | and the sawyer simulator from Rethink Robotics (https://github.com/RethinkRobotics/sawyer_simulator).
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/grippers/meshes/electric_gripper_base.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/grippers/meshes/electric_gripper_base.STL
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/grippers/meshes/paddle_tip.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/grippers/meshes/paddle_tip.STL
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/grippers/meshes/standard_narrow.STL:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/grippers/meshes/standard_narrow.STL
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/grippers/slide_panel_gripper.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/base.stl
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/head.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/head.stl
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l0.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l0.stl
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l1.stl
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l2.stl
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l3.stl
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l4.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l4.stl
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l5.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l5.stl
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l6.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l6.stl
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/pedestal.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/pedestal.stl
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/assets/robot/sawyer/robot.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/controllers/__init__.py:
--------------------------------------------------------------------------------
1 | from robosuite_extra.controllers.sawyer_eef_velocity_controller import SawyerEEFVelocityController
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/controllers/sawyer_eef_velocity_controller.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import sys
3 | import os
4 |
5 | if '/opt/ros/melodic/lib/python2.7/dist-packages' in sys.path:
6 | sys.path.remove('/opt/ros/melodic/lib/python2.7/dist-packages')
7 | sys.path.append('/opt/ros/melodic/lib/python2.7/dist-packages')
8 |
9 | import PyKDL
10 | from urdf_parser_py.urdf import URDF
11 | from kdl_parser_py.urdf import treeFromFile, treeFromUrdfModel
12 | import robosuite_extra.utils.transform_utils as T
13 |
14 |
15 | class SawyerEEFVelocityController(object):
16 |
17 | def __init__(self):
18 | self.num_joints = 7
19 | # Get the URDF
20 | urdf_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), '../assets/urdf/sawyer_intera.urdf')
21 |
22 | self.urdf_model = URDF.from_xml_string(open(urdf_path).read())
23 |
24 | # Set the base link, and the tip link.
25 | self.base_link = self.urdf_model.get_root()
26 | self.tip_link = "right_hand"
27 | # # Create a KDL tree from the URDF model. This is an object defining the kinematics of the entire robot.
28 | self.kdl_tree = treeFromUrdfModel(self.urdf_model)
29 | # Create a KDL chain from the tree. This is one specific chain within the overall kinematics model.
30 | self.arm_chain = self.kdl_tree[1].getChain(self.base_link, self.tip_link)
31 | # Create a solver which will be used to compute the forward kinematics
32 | self.forward_kinematics_solver = PyKDL.ChainFkSolverPos_recursive(self.arm_chain)
33 | # Create a solver which will be used to compute the Jacobian
34 | self.jacobian_solver = PyKDL.ChainJntToJacSolver(self.arm_chain)
35 |
36 | #Velocity inverse kinematics solver
37 | self.IK_v_solver = PyKDL.ChainIkSolverVel_pinv(self.arm_chain)
38 |
39 | # Create a solver to retrieve the joint angles form the eef position
40 | self.IK_solver = PyKDL.ChainIkSolverPos_NR(self.arm_chain, self.forward_kinematics_solver, self.IK_v_solver)
41 |
42 |
43 | def compute_joint_angles_for_endpoint_pose(self, target_endpoint_pose_in_base_frame, q_guess):
44 | pos_kdl = PyKDL.Vector(*target_endpoint_pose_in_base_frame[:3,-1])
45 |
46 | rot_kdl = PyKDL.Rotation(target_endpoint_pose_in_base_frame[0,0],target_endpoint_pose_in_base_frame[0,1], target_endpoint_pose_in_base_frame[0,2],
47 | target_endpoint_pose_in_base_frame[1, 0], target_endpoint_pose_in_base_frame[1,1],target_endpoint_pose_in_base_frame[1,2],
48 | target_endpoint_pose_in_base_frame[2, 0], target_endpoint_pose_in_base_frame[2,1], target_endpoint_pose_in_base_frame[2,2])
49 |
50 | frame_kdl = PyKDL.Frame(rot_kdl, pos_kdl)
51 | q_guess_kdl = self.convert_joint_angles_array_to_kdl_array(q_guess)
52 |
53 | kdl_jnt_array = PyKDL.JntArray(self.num_joints)
54 | self.IK_solver.CartToJnt(q_guess_kdl,frame_kdl,kdl_jnt_array)
55 |
56 | return self.kdl_1Darray_to_numpy(kdl_jnt_array)
57 |
58 |
59 | # Function to compute the robot's joint velocities for a desired Cartesian endpoint (robot gripper) velocity
60 | def compute_joint_velocities_for_endpoint_velocity(self, endpoint_velocity_in_base_frame, joint_angles_array):
61 |
62 | ###
63 | ### Compute the Jacobian inverse at the current set of joint angles
64 | ###
65 | # Create a KDL array of the joint angles
66 | joint_angles_kdl_array = self.convert_joint_angles_array_to_kdl_array(joint_angles_array)
67 | # Compute the Jacobian at the current joint angles
68 | jacobian = self.compute_jacobian(joint_angles_kdl_array)
69 | # Compute the pseudo-inverse
70 | jacobian_inverse = np.linalg.pinv(jacobian)
71 | ###
72 | ### Then, use the Jacobian inverse to compute the required joint velocities
73 | ###
74 | # Multiply the Jacobian inverse by the Cartesian velocity
75 | joint_velocities = jacobian_inverse * np.concatenate([endpoint_velocity_in_base_frame[:3,-1].reshape(3,1),
76 | T.mat2euler(endpoint_velocity_in_base_frame[:3,:3],axes = 'sxyz').reshape(3,1)])
77 | # Return the velocities
78 | return joint_velocities
79 |
80 |
81 | # Function to convert from a NumPy array of joint angles, to a KDL array of joint angles (its the same data, just a different type of container)
82 | def convert_joint_angles_array_to_kdl_array(self, joint_angles_array):
83 | num_joints = len(joint_angles_array)
84 | kdl_array = PyKDL.JntArray(num_joints)
85 | for i in range(num_joints):
86 | kdl_array[i] = joint_angles_array[i]
87 | return kdl_array
88 |
89 | # Function to compute the jacobian directly with a numpy array
90 | def get_jacobian(self, joint_angles_array):
91 | joint_angles_kdl_array = self.convert_joint_angles_array_to_kdl_array(joint_angles_array)
92 | # Compute the Jacobian at the current joint angles
93 | jacobian = self.compute_jacobian(joint_angles_kdl_array)
94 | return jacobian
95 |
96 | # Function to compute the Jacobian at a particular set of joint angles
97 | def compute_jacobian(self, joint_angles_kdl_array):
98 | jacobian = PyKDL.Jacobian(self.num_joints)
99 | self.jacobian_solver.JntToJac(joint_angles_kdl_array, jacobian)
100 | jacobian_matrix = self.kdl_array_to_numpy_mat(jacobian)
101 | return jacobian_matrix
102 |
103 | # Function to get the endpoint pose in the base frame. It returns a 4x4 NumPy matrix homogeneous transformation
104 | def get_endpoint_pose_matrix(self, joint_angles_array):
105 | # Get the joint angles
106 | joint_angles_kdl_array = self.convert_joint_angles_array_to_kdl_array(joint_angles_array)
107 | # Do the forward kinematics
108 | endpoint_frame = PyKDL.Frame()
109 | self.forward_kinematics_solver.JntToCart(joint_angles_kdl_array, endpoint_frame)
110 | endpoint_frame = self.kdl_frame_to_numpy_mat(endpoint_frame)
111 | return endpoint_frame
112 |
113 | # Function to convert a KDL array to a NumPy matrix (its the same data, just a different type of container)
114 | @staticmethod
115 | def kdl_array_to_numpy_mat(kdl_array):
116 | mat = np.mat(np.zeros((kdl_array.rows(), kdl_array.columns())))
117 | for i in range(kdl_array.rows()):
118 | for j in range(kdl_array.columns()):
119 | mat[i, j] = kdl_array[i, j]
120 | return mat
121 |
122 | # Function to convert a KDL 1D array to a NumPy array (its the same data, just a different type of container)
123 | @staticmethod
124 | def kdl_1Darray_to_numpy(kdl_array):
125 | array = np.zeros((kdl_array.rows()))
126 | for i in range(kdl_array.rows()):
127 | array[i]=kdl_array[i]
128 |
129 | return array
130 |
131 | @staticmethod
132 | # Function to convert a KDL transformation frame to a NumPy matrix. Code taken from KDL pose math library.
133 | def kdl_frame_to_numpy_mat(kdl_frame):
134 | mat = np.mat(np.zeros([4, 4]))
135 | for i in range(3):
136 | for j in range(3):
137 | mat[i, j] = kdl_frame.M[i, j]
138 | for i in range(3):
139 | mat[i, 3] = kdl_frame.p[i]
140 | mat[3] = [0, 0, 0, 1]
141 | return mat
142 |
143 |
144 | if __name__ =='__main__':
145 | sawyer = Sawyer()
146 |
147 | print(sawyer)
148 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/env_base/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import *
2 | from .sawyer import *
3 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/models/__init__.py
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/models/generated_objects.py:
--------------------------------------------------------------------------------
1 | '''
2 | Taken and modified from the original robosuite repository (version 0.1.0)
3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite
4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite
5 |
6 | '''
7 |
8 | from robosuite.models.objects import MujocoGeneratedObject
9 | from robosuite.models.objects.generated_objects import _get_size , _get_randomized_range, DEFAULT_DENSITY_RANGE
10 | import numpy as np
11 |
12 | class FullyFrictionalBoxObject(MujocoGeneratedObject):
13 | """ Generates a mujoco object than can have different values for the sliding and torsional frictions"""
14 | def __init__(
15 | self,
16 | size=None,
17 | size_max=None,
18 | size_min=None,
19 | density=None,
20 | density_range=None,
21 | friction=None,
22 | rgba="random",
23 | ):
24 |
25 |
26 | size = _get_size(size,
27 | size_max,
28 | size_min,
29 | [0.07, 0.07, 0.07],
30 | [0.03, 0.03, 0.03])
31 | density_range = _get_randomized_range(density,
32 | density_range,
33 | DEFAULT_DENSITY_RANGE)
34 |
35 | super().__init__(
36 | size=size,
37 | rgba=rgba,
38 | density_range=density_range,
39 | friction=friction,
40 | )
41 |
42 | def get_collision_attrib_template(self):
43 | super_template = super().get_collision_attrib_template()
44 | super_template['condim'] = '4'
45 | return super_template
46 |
47 | def sanity_check(self):
48 | assert len(self.size) == 3, "box size should have length 3"
49 |
50 | def get_bottom_offset(self):
51 | return np.array([0, 0, -1 * self.size[2]])
52 |
53 | def get_top_offset(self):
54 | return np.array([0, 0, self.size[2]])
55 |
56 | def get_horizontal_radius(self):
57 | return np.linalg.norm(self.size[0:2], 2)
58 |
59 | # returns a copy, Returns xml body node
60 | def get_collision(self, name=None, site=False):
61 | return self._get_collision(name=name, site=site, ob_type="box")
62 |
63 | # returns a copy, Returns xml body node
64 | def get_visual(self, name=None, site=False):
65 | return self._get_visual(name=name, site=site, ob_type="box")
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/models/grippers/__init__.py:
--------------------------------------------------------------------------------
1 | from .gripper_factory import gripper_factory
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/models/grippers/gripper_factory.py:
--------------------------------------------------------------------------------
1 | '''
2 | Taken and modified from the original robosuite repository (version 0.1.0)
3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite
4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite
5 |
6 | Overrides the girpper factory of robosuite in order to add the new grippers for our tasks
7 | '''
8 |
9 |
10 | from robosuite.models.grippers import gripper_factory as robosuite_gripper_factory
11 | from .pushing_gripper import PushingGripper
12 | from .slide_panel_gripper import SlidePanelGripper
13 |
14 | def gripper_factory(name):
15 | if name == "PushingGripper":
16 | # Closed two finger gripper
17 | return PushingGripper()
18 | if name == "SlidePanelGripper":
19 | return SlidePanelGripper()
20 | else:
21 | return robosuite_gripper_factory(name)
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/models/grippers/pushing_gripper.py:
--------------------------------------------------------------------------------
1 | '''
2 | Taken and modified from the original robosuite repository (version 0.1.0)
3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite
4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite
5 |
6 | A version of TwoFingerGripper but always closed.
7 | '''
8 |
9 |
10 | import numpy as np
11 | from robosuite.models.grippers.two_finger_gripper import TwoFingerGripper
12 |
13 |
14 | class PushingGripper(TwoFingerGripper):
15 | """
16 | Same as TwoFingerGripper, but always closed
17 | """
18 | @property
19 | def init_qpos(self):
20 | # Fingers always closed
21 | return np.array([-0.020833, 0.020833])
22 |
23 | def format_action(self, action):
24 | return np.array([1, -1])
25 |
26 | @property
27 | def dof(self):
28 | return 0
29 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/models/grippers/slide_panel_gripper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import os
3 | from robosuite.models.grippers.gripper import Gripper
4 |
5 | class SlidePanelGripper(Gripper):
6 | """
7 | Same as TwoFingerGripper, but always closed
8 | """
9 |
10 | def __init__(self):
11 | file_path = os.path.dirname(os.path.realpath(__file__))
12 | super().__init__(os.path.join(os.path.join(file_path , "../../assets/grippers/slide_panel_gripper.xml")))
13 |
14 | def format_action(self, action):
15 | return np.array([])
16 |
17 | @property
18 | def init_qpos(self):
19 | return np.array([])
20 |
21 | @property
22 | def joints(self):
23 | return []
24 |
25 | @property
26 | def dof(self):
27 | return 0
28 |
29 | @property
30 | def visualization_sites(self):
31 | return ["grip_site", "slide_panel_centre", "grip_site_cylinder" ]
32 |
33 | def contact_geoms(self):
34 | return [
35 | "r_finger_g0",
36 | "r_finger_g1",
37 | "l_finger_g0",
38 | "l_finger_g1",
39 | "r_fingertip_g0",
40 | "l_fingertip_g0",
41 | "slide_panel_g"
42 | ]
43 |
44 | @property
45 | def contact_geom2body(self):
46 | return {
47 | "r_finger_g0": "r_gripper_r_finger",
48 | "r_finger_g1": "r_gripper_r_finger",
49 | "l_finger_g0": "r_gripper_l_finger",
50 | "l_finger_g1": "r_gripper_l_finger",
51 | "r_fingertip_g0": "r_gripper_r_finger_tip",
52 | "l_fingertip_g0": "r_gripper_l_finger_tip",
53 | "slide_panel_g": "slide_panel"
54 | }
55 |
56 | def get_contact_body_from_geom(self, geom):
57 | return self.contact_geom2body[geom]
58 |
59 | @property
60 | def left_finger_geoms(self):
61 | return ["l_finger_g0", "l_finger_g1", "l_fingertip_g0"]
62 |
63 | @property
64 | def right_finger_geoms(self):
65 | return ["r_finger_g0", "r_finger_g1", "r_fingertip_g0"]
66 |
67 |
68 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/models/robot/__init__.py:
--------------------------------------------------------------------------------
1 | from .sawyer_robot import Sawyer
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/models/robot/sawyer_robot.py:
--------------------------------------------------------------------------------
1 | '''
2 | Taken and modified from the original robosuite repository (version 0.1.0)
3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite
4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite
5 |
6 | '''
7 |
8 | import numpy as np
9 | from robosuite.models.robots.robot import Robot
10 | from robosuite.utils.mjcf_utils import array_to_string
11 | import os
12 |
13 | class Sawyer(Robot):
14 | """Sawyer is a witty single-arm robot designed by Rethink Robotics."""
15 |
16 | def __init__(self, torque=False):
17 | self.torque = torque
18 | file_path = os.path.dirname(os.path.realpath(__file__))
19 | if (torque):
20 | super().__init__(os.path.join(os.path.join(file_path , "../../assets/robot/sawyer/robot_torque.xml")))
21 | else:
22 | super().__init__(os.path.join(os.path.join(file_path , "../../assets/robot/sawyer/robot.xml")))
23 |
24 | self.bottom_offset = np.array([0, 0, -0.913])
25 |
26 | def set_base_xpos(self, pos):
27 | """Places the robot on position @pos."""
28 | node = self.worldbody.find("./body[@name='base']")
29 | node.set("pos", array_to_string(pos - self.bottom_offset))
30 |
31 | @property
32 | def dof(self):
33 | return 7
34 |
35 | @property
36 | def joints(self):
37 | return ["right_j{}".format(x) for x in range(7)]
38 |
39 | @property
40 | def links(self):
41 | return ["right_l{}".format(x) for x in range(7)]
42 |
43 | @property
44 | def actuators(self):
45 | if (self.torque):
46 | return ["torq_right_j{}".format(x) for x in range(7)]
47 | else:
48 | return ["vel_right_j{}".format(x) for x in range(7)]
49 |
50 | @property
51 | def init_qpos(self):
52 | return np.array([0, -1.18, 0.00, 2.18, 0.00, 0.57, 3.3161])
53 |
54 | @property
55 | def joint_velocity_limits(self):
56 | return np.array([[-1.74, 1.74],
57 | [-1.328, 1.328],
58 | [-1.957, 1.957],
59 | [-1.957, 1.957],
60 | [-3.485, 3.485],
61 | [-3.485, 3.485],
62 | [-4.545, 4.545]])
63 |
64 | @property
65 | def velocity_pid_gains(self):
66 | return {'right_j0': {'p': 8.55378248e+01, 'i': 4.58211002e+00, 'd': 9.80858595e-02},
67 | 'right_j1': {'p': 7.72400386e+01, 'i': 2.57669899e+00, 'd': 4.23858218e-02},
68 | 'right_j2': {'p':5.35079700e+01, 'i':4.30959305e+00, 'd': 0.00000000e+00},
69 | 'right_j3': {'p': 4.88937010e+01, 'i':2.91438891e+00, 'd': 4.52197212e-02},
70 | 'right_j4': {'p': 3.57707442e+01, 'i': 3.14960177e+00, 'd': 2.03120628e-01},
71 | 'right_j5': {'p': 2.82424289e+01, 'i':2.18658812e+00, 'd': 2.03629986e-01},
72 | 'right_j6': {'p': 1.62834673e+01, 'i': 1.71284099e+00, 'd': 8.44274724e-02},
73 | }
74 |
75 |
76 | @property
77 | def position_pid_gains(self):
78 |
79 | # array([77.39238042, 85.93730043, 53.35490038, 57.10317523, 27.80825017,
80 | # 3.17180638, 2.90868948, 4.57497262, 3.56435536, 2.47908628,
81 | # 0.45245024, 0.31105989, 0.83930337, 0.76906677, 0.42219411])
82 |
83 | return {'right_j0': {'p': 77.39238042, 'i': 3.17180638, 'd': 0.45245024},
84 | 'right_j1': {'p': 85.93730043, 'i': 2.90868948, 'd': 0.31105989},
85 | 'right_j2': {'p': 53.35490038, 'i':4.57497262, 'd': 0.83930337},
86 | 'right_j3': {'p': 57.10317523, 'i':3.56435536, 'd': 0.76906677},
87 | 'right_j4': {'p': 27.808250171, 'i': 2.47908628, 'd': 0.42219411},
88 | 'right_j5': {'p': 2.82424289e+01, 'i':2.18658812e+00, 'd': 2.03629986e-01},
89 | 'right_j6': {'p': 1.62834673e+01, 'i': 1.71284099e+00, 'd': 8.44274724e-02},
90 | }
91 |
92 | # return {'right_j0': {'p': 8.55378248e+01, 'i': 4.58211002e+00, 'd': 9.80858595e-02},
93 | # 'right_j1': {'p': 7.72400386e+01, 'i': 2.57669899e+00, 'd': 4.23858218e-02},
94 | # 'right_j2': {'p': 5.35079700e+01, 'i': 4.30959305e+00, 'd': 0.00000000e+00},
95 | # 'right_j3': {'p': 4.88937010e+01, 'i': 2.91438891e+00, 'd': 4.52197212e-02},
96 | # 'right_j4': {'p': 3.57707442e+01, 'i': 3.14960177e+00, 'd': 2.03120628e-01},
97 | # 'right_j5': {'p': 2.82424289e+01, 'i': 2.18658812e+00, 'd': 2.03629986e-01},
98 | # 'right_j6': {'p': 1.62834673e+01, 'i': 1.71284099e+00, 'd': 8.44274724e-02},
99 | # }
100 |
101 |
102 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/models/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | from .placement_sampler import ObjectPositionSampler, UniformRandomSampler, UniformSelectiveSampler
2 |
3 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/models/tasks/placement_sampler.py:
--------------------------------------------------------------------------------
1 | '''
2 | Taken and modified from the original robosuite repository (version 0.1.0)
3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite
4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite
5 |
6 | '''
7 |
8 | from robosuite.models.tasks.placement_sampler import ObjectPositionSampler, UniformRandomSampler
9 | import collections
10 | import numpy as np
11 |
12 |
13 | def base_sample_new(self, **kwargs):
14 | """
15 | Args:
16 | object_index: index of the current object being sampled
17 | Returns:
18 | xpos((float * 3) * n_obj): x,y,z position of the objects in world frame
19 | xquat((float * 4) * n_obj): quaternion of the objects
20 | """
21 | raise NotImplementedError
22 |
23 | def sample_quat_new(self):
24 | if self.z_rotation is None or self.z_rotation is True:
25 | rot_angle = np.random.uniform(high=2 * np.pi, low=0)
26 | elif isinstance(self.z_rotation, collections.Iterable):
27 | rot_angle = np.random.uniform(
28 | high=max(self.z_rotation), low=min(self.z_rotation)
29 | )
30 | else:
31 | rot_angle = self.z_rotation
32 |
33 | return [np.cos(rot_angle / 2), 0, 0, np.sin(rot_angle / 2)]
34 |
35 | UniformRandomSampler.sample_quat = sample_quat_new
36 | ObjectPositionSampler.sample = base_sample_new
37 |
38 |
39 |
40 | class UniformSelectiveSampler(ObjectPositionSampler):
41 | """Places all objects within the table uniformly random."""
42 |
43 | def __init__(
44 | self,
45 | x_range=None,
46 | y_range=None,
47 | ensure_object_boundary_in_range=True,
48 | z_rotation=False,
49 | np_random=None
50 | ):
51 | """
52 | Args:
53 | x_range(float * 2): override the x_range used to uniformly place objects
54 | if None, default to x-range of table
55 | y_range(float * 2): override the y_range used to uniformly place objects
56 | if None default to y-range of table
57 | x_range and y_range are both with respect to (0,0) = center of table.
58 | ensure_object_boundary_in_range:
59 | True: The center of object is at position:
60 | [uniform(min x_range + radius, max x_range - radius)], [uniform(min x_range + radius, max x_range - radius)]
61 | False:
62 | [uniform(min x_range, max x_range)], [uniform(min x_range, max x_range)]
63 | z_rotation:
64 | None: Add uniform random random z-rotation
65 | iterable (a,b): Uniformly randomize rotation angle between a and b (in radians)
66 | value: Add fixed angle z-rotation
67 | """
68 | self.x_range = x_range
69 | self.y_range = y_range
70 | self.ensure_object_boundary_in_range = ensure_object_boundary_in_range
71 | self.z_rotation = z_rotation
72 | self.np_random = np_random if np_random is not None else np.random
73 |
74 |
75 | def set_random_number_generator(self, np_random):
76 | self.np_random = np_random
77 |
78 | def set_ranges(self, x_range=None, y_range = None, z_rotation_range = None):
79 | if(x_range is not None):
80 | self.x_range = x_range
81 | if(y_range is not None):
82 | self.y_range = y_range
83 |
84 | if(z_rotation_range is not None):
85 | self.z_rotation = z_rotation_range
86 |
87 | def sample_obj_idx(self):
88 | return self.np_random.choice(self.n_obj)
89 |
90 | def sample_x(self, object_horizontal_radius):
91 | x_range = self.x_range
92 | if x_range is None:
93 | x_range = [-self.table_size[0] / 2, self.table_size[0] / 2]
94 | minimum = min(x_range)
95 | maximum = max(x_range)
96 | if self.ensure_object_boundary_in_range:
97 | minimum += object_horizontal_radius
98 | maximum -= object_horizontal_radius
99 | return self.np_random.uniform(high=maximum, low=minimum)
100 |
101 | def sample_y(self, object_horizontal_radius):
102 | y_range = self.y_range
103 | if y_range is None:
104 | y_range = [-self.table_size[0] / 2, self.table_size[0] / 2]
105 | minimum = min(y_range)
106 | maximum = max(y_range)
107 | if self.ensure_object_boundary_in_range:
108 | minimum += object_horizontal_radius
109 | maximum -= object_horizontal_radius
110 | return self.np_random.uniform(high=maximum, low=minimum)
111 |
112 | def sample_quat(self):
113 | if self.z_rotation is None or self.z_rotation is True:
114 | rot_angle = self.np_random.uniform(high=2 * np.pi, low=0)
115 | elif isinstance(self.z_rotation, collections.Iterable):
116 | rot_angle = self.np_random.uniform(
117 | high=max(self.z_rotation), low=min(self.z_rotation)
118 | )
119 | elif (isinstance(self.z_rotation, float) or isinstance(self.z_rotation, int)):
120 | rot_angle = self.z_rotation
121 | else:
122 | return [1, 0, 0, 0]
123 |
124 | return [np.cos(rot_angle / 2), 0, 0, np.sin(rot_angle / 2)]
125 |
126 | def sample(self, push_object_index):
127 | pos_arr = []
128 | quat_arr = []
129 | placed_objects = []
130 | for index, obj_mjcf in enumerate(self.mujoco_objects):
131 | horizontal_radius = obj_mjcf.get_horizontal_radius()
132 | bottom_offset = obj_mjcf.get_bottom_offset()
133 |
134 | if index != push_object_index:
135 | pos_arr.append(np.array([0.0, 0.0, 0.0]))
136 | quat_arr.append([1, 0, 0, 0])
137 |
138 | else:
139 | object_x = self.sample_x(horizontal_radius)
140 | object_y = self.sample_y(horizontal_radius)
141 |
142 | pos = (
143 | self.table_top_offset
144 | - bottom_offset
145 | + np.array([object_x, object_y, 0])
146 | )
147 | placed_objects.append((object_x, object_y, horizontal_radius))
148 | # random z-rotation
149 |
150 | quat = self.sample_quat()
151 |
152 | quat_arr.append(quat)
153 | pos_arr.append(pos)
154 |
155 | return pos_arr, quat_arr
156 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/push_env/__init__.py:
--------------------------------------------------------------------------------
1 | from robosuite_extra.push_env.push_task import PushTask
2 | from robosuite_extra.push_env.sawyer_push import SawyerPush
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/push_env/demo.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from robosuite_extra.env_base import make
3 | from robosuite_extra.push_env.sawyer_push import SawyerPush
4 | import time
5 | from robosuite_extra.wrappers import EEFXVelocityControl, GymWrapper, FlattenWrapper
6 |
7 |
8 | start_pos = np.array([0.,-16.75e-2])
9 | goal1 = np.array([3e-3,15.15e-2])
10 | goal2 = np.array([9.5e-2,11.01e-2])
11 | goal3 = np.array([-11.5e-2,17.2e-2])
12 |
13 | if __name__ == "__main__":
14 | env=make(
15 | 'SawyerPush',
16 | gripper_type="PushingGripper",
17 | parameters_to_randomise = [],
18 | randomise_initial_conditions = False,
19 | table_full_size=(0.8, 1.6, 0.719),
20 | table_friction=(0.001, 5e-3, 1e-4),
21 | use_camera_obs=False,
22 | use_object_obs=True,
23 | reward_shaping=True,
24 | placement_initializer=None,
25 | gripper_visualization=True,
26 | use_indicator_object=False,
27 | has_renderer=True,
28 | has_offscreen_renderer=False,
29 | render_collision_mesh=False,
30 | render_visual_mesh=True,
31 | control_freq=10.,
32 | horizon=80,
33 | ignore_done=False,
34 | camera_name="frontview",
35 | camera_height=256,
36 | camera_width=256,
37 | camera_depth=False,
38 | pid = True,
39 | )
40 |
41 | env = FlattenWrapper( GymWrapper(EEFXVelocityControl(env, dof=2)))
42 |
43 | env._set_gripper_neutral_offset(*start_pos)
44 | env._set_goal_neutral_offset(*goal1)
45 | env.reset()
46 |
47 | env.viewer.set_camera(camera_id=0)
48 | env.render()
49 |
50 | start_time = time.time()
51 | for i in range(0,120):
52 | action = np.zeros(2)
53 | action[0]=1.0
54 |
55 | obs, reward, done, _ = env.step(action)
56 | env.render()
57 |
58 | if(done):
59 | reset_time1 = time.time()
60 | env.reset()
61 | env.viewer.set_camera(camera_id=0)
62 | env.render()
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/push_env/push_task.py:
--------------------------------------------------------------------------------
1 | '''
2 | Taken and modified from the original robosuite repository (version 0.1.0)
3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite
4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite
5 |
6 | '''
7 |
8 | from robosuite.models.tasks import Task
9 | from robosuite_extra.models.tasks import UniformSelectiveSampler
10 | from robosuite.utils.mjcf_utils import new_joint, array_to_string
11 | import numpy as np
12 |
13 | class PushTask(Task):
14 | """
15 | Creates MJCF model of a push task.
16 |
17 | A tabletop task consists of one robot pushing an object to a visual goal on a
18 | tabletop. This class combines the robot, the table
19 | arena, and the objetcts into a single MJCF model.
20 | """
21 |
22 | def __init__(self, mujoco_arena, mujoco_robot, mujoco_goal, mujoco_objects, initializer):
23 | """
24 | Args:
25 | mujoco_arena: MJCF model of robot workspace
26 | mujoco_robot: MJCF model of robot model
27 | mujoco_objects: MJCF model of the objects to be pushed
28 | mujoco_goal: MJCF model of the goal
29 | initializer: placement sampler to initialize object positions.
30 | """
31 | super().__init__()
32 |
33 | self.merge_arena(mujoco_arena)
34 | self.merge_robot(mujoco_robot)
35 | self.merge_objects(mujoco_objects)
36 | self.merge_visual(mujoco_goal)
37 | if initializer is None:
38 | initializer = UniformSelectiveSampler()
39 |
40 |
41 | self.initializer = initializer
42 | self.initializer.setup(self.mujoco_objects, self.table_top_offset, self.table_size)
43 | self.change_push_object()
44 |
45 | def merge_robot(self, mujoco_robot):
46 | """Adds robot model to the MJCF model."""
47 | self.robot = mujoco_robot
48 | self.merge(mujoco_robot)
49 |
50 | def merge_arena(self, mujoco_arena):
51 | """Adds arena model to the MJCF model."""
52 | self.arena = mujoco_arena
53 | self.table_top_offset = mujoco_arena.table_top_abs
54 | self.table_size = mujoco_arena.table_full_size
55 | self.merge(mujoco_arena)
56 |
57 | def merge_objects(self, mujoco_objects ):
58 | """Adds a physical object to the MJCF model."""
59 | self.xml_objects = []
60 | self.mujoco_objects = []
61 | self.object_names = []
62 |
63 | self.push_object_idx = 0
64 | self.max_horizontal_radius = 0
65 |
66 | for obj_name, obj_mjcf in mujoco_objects.items():
67 | self.mujoco_objects.append(obj_mjcf)
68 | self.object_names.append(obj_name)
69 |
70 | self.merge_asset(obj_mjcf)
71 | # Load object
72 | obj = obj_mjcf.get_collision(name=obj_name, site=True)
73 | obj.append(new_joint(name=obj_name, type="free"))
74 | self.xml_objects.append(obj)
75 | self.worldbody.append(obj)
76 | self.max_horizontal_radius = max(
77 | self.max_horizontal_radius, obj_mjcf.get_horizontal_radius()
78 | )
79 |
80 |
81 | def merge_visual(self, mujoco_goal):
82 | """Adds the visual goal to the MJCF model."""
83 | self.mujoco_goal = mujoco_goal
84 | self.merge_asset(mujoco_goal)
85 | # Load goal
86 | goal_visual = mujoco_goal.get_visual(name='goal', site=True)
87 |
88 | self.xml_goal = goal_visual
89 |
90 | self.worldbody.append(goal_visual)
91 |
92 |
93 | def change_push_object(self, idx = None):
94 | if(idx is None):
95 | idx = np.random.choice(len(self.mujoco_objects))
96 |
97 | assert idx < len(self.mujoco_objects), "idx out of range for selecting pushing object"
98 |
99 | self.push_object_idx = idx
100 |
101 |
102 | def place_objects(self):
103 | """Places objects randomly until no collisions or max iterations hit."""
104 |
105 | pos_arr, quat_arr = self.initializer.sample(self.push_object_idx)
106 | for i in range(len(self.xml_objects)):
107 | self.xml_objects[i].set("pos", array_to_string(pos_arr[i]))
108 | self.xml_objects[i].set("quat", array_to_string(quat_arr[i]))
109 |
110 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/reach_env/__init__.py:
--------------------------------------------------------------------------------
1 | from robosuite_extra.reach_env.reach_task import ReachTask
2 | from robosuite_extra.reach_env.sawyer_reach import SawyerReach
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/reach_env/demo.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from robosuite_extra.env_base import make
3 | from robosuite_extra.reach_env.sawyer_reach import SawyerReach
4 | import time
5 | from robosuite_extra.wrappers import EEFXVelocityControl, GymWrapper, FlattenWrapper
6 |
7 |
8 | goal1 = np.array([0.,0.])
9 | goal2 = np.array([-4.465e-2,5.85e-2])
10 | goal3 = np.array([8.37e-2,-5.78e-2])
11 |
12 | if __name__ == "__main__":
13 | env=make(
14 | 'SawyerReach',
15 | gripper_type="PushingGripper",
16 | parameters_to_randomise = [],
17 | randomise_initial_conditions=False,
18 | table_full_size=(0.8, 1.6, 0.719),
19 | use_camera_obs=False,
20 | use_object_obs=True,
21 | reward_shaping=True,
22 | use_indicator_object=False,
23 | has_renderer=False,
24 | has_offscreen_renderer=False,
25 | render_collision_mesh=False,
26 | render_visual_mesh=True,
27 | control_freq=10,
28 | horizon=40,
29 | ignore_done=False,
30 | camera_name="frontview",
31 | camera_height=256,
32 | camera_width=256,
33 | camera_depth=False,
34 | pid=True,
35 | success_radius=0.01
36 | )
37 |
38 | env = FlattenWrapper( GymWrapper(EEFXVelocityControl(env, dof=3,)))
39 | env.renderer_on()
40 | env._set_goal_neutral_offset(*goal2)
41 | env.reset()
42 |
43 | if (env.has_renderer):
44 | env.viewer.set_camera(camera_id=0)
45 | env.render()
46 |
47 | start_time = time.time()
48 | for i in range(1,120):
49 |
50 | action = np.zeros((3,))*0.5
51 | action[1]=1.0
52 |
53 | obs, reward, done, _ = env.step(action)
54 |
55 | if(env.has_renderer):
56 | env.render()
57 |
58 | if(done):
59 | time.sleep(2)
60 | env.reset()
61 | if (env.has_renderer):
62 | env.viewer.set_camera(camera_id=0)
63 | env.render()
64 |
65 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/reach_env/reach_task.py:
--------------------------------------------------------------------------------
1 | '''
2 | Taken and modified from the original robosuite repository (version 0.1.0)
3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite
4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite
5 |
6 | '''
7 |
8 | from robosuite.models.tasks import Task
9 | class ReachTask(Task):
10 | """
11 | Creates MJCF model of a push task.
12 |
13 | A tabletop task consists of one robot pushing an object to a visual goal on a
14 | tabletop. This class combines the robot, the table
15 | arena, and the objetcts into a single MJCF model.
16 | """
17 |
18 | def __init__(self, mujoco_arena, mujoco_robot, mujoco_goal):
19 | """
20 | Args:
21 | mujoco_arena: MJCF model of robot workspace
22 | mujoco_robot: MJCF model of robot model
23 | mujoco_goal: MJCF model of the goal
24 | """
25 | super().__init__()
26 |
27 | self.merge_arena(mujoco_arena)
28 | self.merge_robot(mujoco_robot)
29 | self.merge_visual(mujoco_goal)
30 |
31 |
32 | def merge_robot(self, mujoco_robot):
33 | """Adds robot model to the MJCF model."""
34 | self.robot = mujoco_robot
35 | self.merge(mujoco_robot)
36 |
37 | def merge_arena(self, mujoco_arena):
38 | """Adds arena model to the MJCF model."""
39 | self.arena = mujoco_arena
40 | self.table_top_offset = mujoco_arena.table_top_abs
41 | self.table_size = mujoco_arena.table_full_size
42 | self.merge(mujoco_arena)
43 |
44 | def merge_visual(self, mujoco_goal):
45 | """Adds the visual goal to the MJCF model."""
46 | self.mujoco_goal = mujoco_goal
47 | self.merge_asset(mujoco_goal)
48 | # Load goal
49 | goal_visual = mujoco_goal.get_visual(name='goal', site=True)
50 |
51 | self.xml_goal = goal_visual
52 |
53 | self.worldbody.append(goal_visual)
54 |
55 |
56 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/slide_env/__init__.py:
--------------------------------------------------------------------------------
1 | from robosuite_extra.slide_env.slide_task import SlideTask
2 | from robosuite_extra.slide_env.sawyer_slide import SawyerSlide
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/slide_env/demo.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from robosuite_extra.env_base import make
3 | from robosuite_extra.slide_env.sawyer_slide import SawyerSlide
4 | import time
5 |
6 | if __name__ == "__main__":
7 | env=make(
8 | 'SawyerSlide',
9 | gripper_type="SlidePanelGripper",
10 | parameters_to_randomise = [],
11 | randomise_initial_conditions = True,
12 | use_camera_obs=False,
13 | use_object_obs=True,
14 | reward_shaping=True,
15 | use_indicator_object=False,
16 | has_renderer=True,
17 | has_offscreen_renderer=True,
18 | render_collision_mesh=False,
19 | render_visual_mesh=True,
20 | control_freq=10,
21 | horizon=30,
22 | ignore_done=False,
23 | camera_name="frontview",
24 | camera_height=256,
25 | camera_width=256,
26 | camera_depth=False,
27 | pid=True,
28 | )
29 |
30 | env.reset()
31 | env.viewer.set_camera(camera_id=0)
32 |
33 | env.render()
34 | for i in range(1,20000):
35 |
36 | action = np.zeros(2)
37 | action[0] = 1.
38 | obs, reward, done, _ = env.step(action)
39 | env.render()
40 |
41 | if(done):
42 | time.sleep(2)
43 | env.reset()
44 | env.viewer.set_camera(camera_id=0)
45 | env.render()
46 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/slide_env/slide_task.py:
--------------------------------------------------------------------------------
1 | from robosuite.models.tasks import Task
2 | from robosuite.utils.mjcf_utils import new_joint, array_to_string
3 | import numpy as np
4 |
5 | class SlideTask(Task):
6 | """
7 | Creates MJCF model of a slide t task.
8 | """
9 |
10 | def __init__(self, mujoco_arena, mujoco_robot, slide_object):
11 | """
12 | Args:
13 | mujoco_arena: MJCF model of robot workspace
14 | mujoco_robot: MJCF model of robot model
15 | slide_object: MJCF model of the object to be slided
16 | """
17 | super().__init__()
18 |
19 | self.merge_arena(mujoco_arena)
20 | self.merge_robot(mujoco_robot)
21 | self.merge_objects(slide_object)
22 |
23 | def merge_robot(self, mujoco_robot):
24 | """Adds robot model to the MJCF model."""
25 | self.robot = mujoco_robot
26 | self.merge(mujoco_robot)
27 |
28 | def merge_arena(self, mujoco_arena):
29 | """Adds arena model to the MJCF model."""
30 | self.arena = mujoco_arena
31 | self.merge(mujoco_arena)
32 |
33 | def merge_objects(self, slide_object):
34 | """Adds a physical object to the MJCF model."""
35 | self.push_object_idx = 0
36 | self.max_horizontal_radius = 0
37 |
38 | self.slide_object = slide_object
39 |
40 |
41 | self.merge_asset(self.slide_object)
42 |
43 | # Add object to xml
44 | obj = self.slide_object.get_collision(name="slide_object", site=True)
45 | obj.append(new_joint(name="slide_object", type="free"))
46 | self.xml_object = obj
47 | self.worldbody.append(obj)
48 | self.max_horizontal_radius = self.slide_object.get_horizontal_radius()
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/utils/__init__.py
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/utils/domain_randomisation_utils.py:
--------------------------------------------------------------------------------
1 | def _scale_xml_float_attribute(node, attribute, scaling_factor):
2 | cur_value = float(node.get(attribute))
3 | new_value = cur_value * scaling_factor
4 | node.set(attribute, str(new_value))
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/utils/transform_utils.py:
--------------------------------------------------------------------------------
1 | from robosuite.utils.transform_utils import *
2 | import numpy as np
3 |
4 |
5 |
6 | def eulerx2mat(angle):
7 | cos = np.cos(angle)
8 | sin = np.sin(angle)
9 | return np.array([[1.,0.,0.],
10 | [0.,cos ,-sin],
11 | [0.,sin, cos]])
12 |
13 | def eulery2mat(angle):
14 | cos = np.cos(angle)
15 | sin = np.sin(angle)
16 | return np.array([[cos, 0., sin],
17 | [0.,1.,0.],
18 | [-sin, 0., cos]])
19 |
20 | def eulerz2mat(angle):
21 | cos = np.cos(angle)
22 | sin = np.sin(angle)
23 | return np.array([[cos,-sin, 0.],
24 | [sin, cos, 0.],
25 | [0.,0.,1.]])
26 |
27 |
28 | def euler2mat(euler_x,euler_y, euler_z):
29 | r_x = eulerx2mat(euler_x)
30 | r_y = eulery2mat(euler_y)
31 | r_z = eulerx2mat(euler_z)
32 | return r_z.dot(r_y.dot(r_x))
33 |
34 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | from robosuite_extra.wrappers.gym_wrapper import GymWrapper, FlattenWrapper
2 | from robosuite_extra.wrappers.eef_velocy_control_wrapper import EEFXVelocityControl
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/wrappers/eef_velocy_control_wrapper.py:
--------------------------------------------------------------------------------
1 | """
2 |
3 | Taken and modified from the original robosuite repository (version 0.1.0)
4 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite
5 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite
6 |
7 |
8 |
9 |
10 | This module implements a wrapper for controlling the robot through end effector
11 | movements instead of joint velocities. This is useful in learning pipelines
12 | that want to output actions in end effector space instead of joint space.
13 | """
14 |
15 | import numpy as np
16 | from robosuite.wrappers import Wrapper
17 | from robosuite_extra.controllers import SawyerEEFVelocityController
18 |
19 |
20 | class EEFXVelocityControl(Wrapper):
21 | env = None
22 |
23 | def __init__(self, env, dof = 2, max_action = 0.1, normalised_actions = True):
24 | """
25 | End effector cartesian velocity control wrapper.
26 |
27 | This wrapper allows for controlling the robot by sending end-effector cartesian velocity commands
28 | instead of joint velocities. This includes velocity commands in-plance (v_x,v_y) or in 3D (v_x,v_y,v_z).
29 | Does not implement angular velocity commands.
30 |
31 | Args:
32 | env (MujocoEnv instance): The environment to wrap.
33 | dof (int 2, or 3): Whether to move the end-effector in-plane or in 3d
34 | max_action : The maximum speed that can be achieved in each dimension (in m/sec).
35 |
36 | """
37 | super().__init__(env)
38 | self.controller = SawyerEEFVelocityController()
39 | assert dof == 2 or dof == 3 , "Can only send x-y or x-y-z velocity commands"
40 |
41 | self.wrapper_dof = dof
42 | self.action_range = [-max_action, max_action]
43 |
44 | self.normalised_actions = normalised_actions
45 |
46 |
47 | #Desable the action spec
48 | self.env.normalised_actions = False
49 |
50 |
51 | @property
52 | def action_spec(self):
53 | if(self.normalised_actions):
54 | low = np.ones(self.wrapper_dof) * -1
55 | high = np.ones(self.wrapper_dof)
56 | else:
57 | low = np.array([self.action_range[0]]*self.wrapper_dof )
58 | high = np.array([self.action_range[1]]*self.wrapper_dof )
59 | return low, high
60 |
61 | def _robot_jpos_getter(self):
62 | """
63 | Helper function to pass to the ik controller for access to the
64 | current robot joint positions.
65 | """
66 | return np.array(self.env._joint_positions)
67 |
68 |
69 | def step(self, action):
70 | if self.env.done:
71 | raise ValueError("Executing action in terminated episode")
72 |
73 | low, high = self.action_spec
74 |
75 | # Domain randomisation for the action space
76 | action_range = high-low
77 | additive_noise = action_range*self.env.dynamics_parameters['action_additive_noise'] * self.env.np_random.uniform(-1, 1, action.shape)
78 | additive_systematic_noise = action_range*self.dynamics_parameters['action_systematic_noise']
79 | multiplicative_noise = 1.0 + (
80 | self.dynamics_parameters['action_multiplicative_noise'] * self.env.np_random.uniform(-1, 1,
81 | action.shape))
82 |
83 |
84 | action = action*multiplicative_noise +additive_noise+additive_systematic_noise
85 |
86 | action = np.clip(action, low, high)
87 |
88 | if (self.normalised_actions):
89 | # Rescale action to -max_action, max_action
90 |
91 | # rescale normalized action to control ranges
92 | bias = 0.5 * (self.action_range[1] + self.action_range[0])
93 | weight = 0.5 * (self.action_range[1] - self.action_range[0])
94 | action = bias + weight * action
95 |
96 | current_right_hand_pos_base = self.env._right_hand_pos
97 | current_right_hand_pos_eef = self.env.init_right_hand_orn.dot(current_right_hand_pos_base)
98 |
99 |
100 | if(self.wrapper_dof == 2):
101 | #This only works for the pushing task
102 | # Correct for deviation in the z-position
103 | # Compensate for the fact that right_hand_pos is in the base frame by a minus sign (current - initial)
104 |
105 | # action = self.compensate_for_external_forces(action, current_right_hand_pos_eef)
106 | z_vel = current_right_hand_pos_base[2] - self.env.init_right_hand_pos[2]
107 | xyz_vel = np.concatenate([action, [2*z_vel]])
108 |
109 | elif(self.wrapper_dof ==3):
110 | xyz_vel = action
111 | else:
112 | raise NotImplementedError('Dof can only be 2 or 3!')
113 |
114 | self.target_next_state = current_right_hand_pos_eef + xyz_vel* self.env.control_timestep
115 |
116 | current_right_hand_orn = self.env._right_hand_orn
117 | reference_right_hand_orn = self.env.init_right_hand_orn
118 |
119 | orn_diff = reference_right_hand_orn.dot(current_right_hand_orn.T)
120 | orn_diff_twice = orn_diff.dot(orn_diff).dot(orn_diff)
121 |
122 |
123 |
124 | # Convert xyz_vel from the end-effector frame to the base_frame of the robot
125 | base_frame_in_eef = reference_right_hand_orn.T
126 | xyz_vel_base = base_frame_in_eef.dot(xyz_vel)
127 |
128 |
129 | pose_matrix = np.zeros((4,4))
130 | pose_matrix[:3,:3] = orn_diff_twice
131 | pose_matrix[:3,3] = xyz_vel_base
132 | pose_matrix[3,3] =1
133 | current_joint_angles = self._robot_jpos_getter()
134 |
135 |
136 |
137 | joint_velocities = self.controller.compute_joint_velocities_for_endpoint_velocity(pose_matrix,current_joint_angles)
138 | final_action = np.concatenate([np.asarray(joint_velocities).squeeze()])
139 |
140 | obs, reward, done, info = self.env.step(final_action)
141 | info['joint_velocities'] = final_action
142 | return obs,reward,done,info
143 |
144 |
145 | def reset(self):
146 | ret = self.env.reset()
147 | eff_frame_in_base = self.env.init_right_hand_orn
148 | self.target_next_state = eff_frame_in_base.dot(self.env._right_hand_pos)
149 | return ret
150 |
151 |
152 |
153 |
154 |
--------------------------------------------------------------------------------
/robosuite-extra/robosuite_extra/wrappers/gym_wrapper.py:
--------------------------------------------------------------------------------
1 | '''
2 | Taken and modified from the original robosuite repository (version 0.1.0)
3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite
4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite
5 |
6 | '''
7 |
8 | import numpy as np
9 | from gym import spaces
10 | from robosuite.wrappers import Wrapper
11 |
12 | from gym import Wrapper as OpenAIGymWrapper
13 |
14 |
15 |
16 | class FlattenWrapper(OpenAIGymWrapper):
17 |
18 | def __init__(self, env, keys=None, add_info=False):
19 |
20 | super().__init__(env)
21 |
22 | if keys is None:
23 | assert self.env.use_object_obs, "Object observations need to be enabled."
24 | keys = ["robot-state", "task-state", "target_pos",]
25 | self.keys = keys
26 |
27 | # set up observation and action spaces
28 | flat_ob = self._flatten_obs(self.env.reset(), verbose=True)
29 | self.obs_dim = flat_ob.size
30 | high = np.inf * np.ones(self.obs_dim)
31 | low = -high
32 | self.observation_space = spaces.Box(low=low, high=high)
33 | low, high = self.env.action_spec
34 | self.action_space = spaces.Box(low=low, high=high)
35 |
36 | self.add_info = add_info
37 |
38 | def _flatten_obs(self, obs_dict, verbose=False):
39 | """
40 | Filters keys of interest out and concatenate the information.
41 | Args:
42 | obs_dict: ordered dictionary of observations
43 | """
44 | ob_lst = []
45 | for key in obs_dict:
46 | if key in self.keys:
47 | if verbose:
48 | print("adding key: {}".format(key))
49 | ob_lst.append(obs_dict[key])
50 | return np.concatenate(ob_lst)
51 |
52 | def reset(self):
53 | ob_dict = self.env.reset()
54 | return self._flatten_obs(ob_dict)
55 |
56 | def step(self, action):
57 | ob_dict, reward, done, info = self.env.step(action)
58 |
59 | if(self.add_info):
60 | info.update(ob_dict)
61 |
62 | return self._flatten_obs(ob_dict), reward, done, info
63 |
64 |
65 | def __getattr__(self, attr):
66 | # using getattr ensures that both __getattribute__ and __getattr__ (fallback) get called
67 | # (see https://stackoverflow.com/questions/3278077/difference-between-getattr-vs-getattribute)
68 | return self.env.__getattr__(attr)
69 |
70 |
71 |
72 |
73 |
74 | class GymWrapper(Wrapper):
75 | env = None
76 |
77 | # Set this in SOME subclasses
78 | metadata = {'render.modes': ['human', 'rgb_a']}
79 | reward_range = (-float('inf'), float('inf'))
80 | spec = None
81 |
82 | def __init__(self, env):
83 | """
84 | Initializes the Gym wrapper.
85 | Args:
86 | env (MujocoEnv instance): The environment to wrap.
87 | keys (list of strings): If provided, each observation will
88 | consist of concatenated keys from the wrapped environment's
89 | observation dictionary. Defaults to robot-state and object-state.
90 | """
91 | self.env = env
92 | # set up observation space
93 | high = np.inf
94 | low = -high
95 |
96 | obs_spec = env.observation_spec()
97 |
98 | space_spec = {}
99 |
100 | for k,v in obs_spec.items():
101 | space_spec[k]=spaces.Box(low=low,high=high, shape=v)
102 |
103 |
104 | self.observation_space = spaces.Dict(space_spec)
105 |
106 | # setup action space
107 | low, high = self.env.action_spec
108 | self.action_space = spaces.Box(low=low, high=high)
109 |
110 | self.reward_range = self.env.reward_range
111 |
112 | def step(self, action):
113 | return self.env.step(action)
114 |
115 | def reset(self):
116 | ob_dict = self.env.reset()
117 | return ob_dict
118 |
119 | def render(self, mode):
120 | self.env.render()
121 |
122 |
123 | def close(self):
124 | self.env.close()
125 |
126 | def seed(self, seed):
127 | self.env.seed(seed)
128 |
129 | @property
130 | def action_spec(self):
131 | return self.env.action_spec
132 |
133 | @property
134 | def unwrapped(self):
135 | return self
136 |
137 | def __str__(self):
138 | if self.spec is None:
139 | return '<{} instance>'.format(type(self).__name__)
140 | else:
141 | return '<{}<{}>>'.format(type(self).__name__, self.spec.id)
142 |
143 | def __enter__(self):
144 | return self
145 |
146 | def __exit__(self, *args):
147 | self.close()
148 | # propagate exception
149 | return False
150 |
--------------------------------------------------------------------------------
/robosuite-extra/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(name='robosuite_extra',
4 | version='0.0.1',
5 | packages= find_packages()
6 | )
--------------------------------------------------------------------------------
/sim2real-calibration-characterisation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/sim2real-calibration-characterisation/__init__.py
--------------------------------------------------------------------------------
/sim2real-calibration-characterisation/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(name='sim2real_calibration_characterisationsim2real_calibration_characterisation',
4 | version='0.0.1',
5 | packages= find_packages()
6 | )
--------------------------------------------------------------------------------
/sim2real-calibration-characterisation/sim2real_calibration_characterisation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/sim2real-calibration-characterisation/sim2real_calibration_characterisation/__init__.py
--------------------------------------------------------------------------------
/sim2real-calibration-characterisation/sim2real_calibration_characterisation/sim2real_characterisation/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/sim2real-calibration-characterisation/sim2real_calibration_characterisation/sim2real_characterisation/__init__.py
--------------------------------------------------------------------------------
/sim2real-calibration-characterisation/sim2real_calibration_characterisation/sim2real_characterisation/pushing_characterisation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from robosuite_extra.env_base import make
3 | import robosuite_extra.utils.transform_utils as T
4 | from robosuite_extra.wrappers import EEFXVelocityControl
5 | from sim2real_calibration_characterisation.utils.logger import Logger
6 | from sim2real_policies.utils.choose_env import push_force_noise_randomisation, push_force_randomisation, \
7 | push_full_randomisation,push_no_randomisation
8 |
9 | def grab_data(info, world_pose_in_base):
10 | # Grab the data
11 | goal_pos_in_world = np.concatenate([info['goal_pos_in_world'], [1.0]])
12 |
13 | goal_pos_in_base = world_pose_in_base.dot(goal_pos_in_world)
14 | goal_pos_in_base = goal_pos_in_base / goal_pos_in_base[3]
15 |
16 | gripper_pos_in_world = np.concatenate([info['eef_pos_in_world'], [1.0]])
17 | gripper_vel_in_world = info['eef_vel_in_world']
18 |
19 | gripper_pos_in_base = world_pose_in_base.dot(gripper_pos_in_world)
20 | gripper_pos_in_base = gripper_pos_in_base / gripper_pos_in_base[3]
21 | gripper_vel_in_base = world_pose_in_base[:3, :3].dot(gripper_vel_in_world)
22 |
23 | object_pos_in_world = np.concatenate([info['object_pos_in_world'], [1.0]])
24 | object_vel_in_world = info['object_vel_in_world']
25 | z_angle = info['z_angle']
26 |
27 | object_pos_in_base = world_pose_in_base.dot(object_pos_in_world)
28 | object_pos_in_base = object_pos_in_base / object_pos_in_base[3]
29 | object_vel_in_base = world_pose_in_base[:3, :3].dot(object_vel_in_world)
30 |
31 | return goal_pos_in_base, gripper_pos_in_base, gripper_vel_in_base, object_pos_in_base, object_vel_in_base, z_angle,
32 |
33 |
34 | def main():
35 | ### Script parameters ###
36 | render = False
37 | log_dir = './push'
38 | ####
39 |
40 |
41 | action_ref = np.array([0.1, 0.0])
42 | action = action_ref
43 |
44 | for repeat in range(10):
45 | # Create the environment
46 | env = make(
47 | 'SawyerPush',
48 | gripper_type="PushingGripper",
49 | parameters_to_randomise=push_no_randomisation,
50 | randomise_initial_conditions=False,
51 | table_full_size=(0.8, 1.6, 0.719),
52 | table_friction=(0.001, 5e-3, 1e-4),
53 | use_camera_obs=False,
54 | use_object_obs=True,
55 | reward_shaping=True,
56 | placement_initializer=None,
57 | gripper_visualization=True,
58 | use_indicator_object=False,
59 | has_renderer=False,
60 | has_offscreen_renderer=False,
61 | render_collision_mesh=False,
62 | render_visual_mesh=True,
63 | control_freq=10.,
64 | horizon=80,
65 | ignore_done=False,
66 | camera_name="frontview",
67 | camera_height=256,
68 | camera_width=256,
69 | camera_depth=False,
70 | )
71 |
72 | env = EEFXVelocityControl(env, dof=2, normalised_actions=False)
73 |
74 | if (render):
75 | env.renderer_on()
76 |
77 | obs = env.reset()
78 | if (render):
79 | env.viewer.set_camera(camera_id=0)
80 | env.render()
81 |
82 | ## Setup the logger
83 | # Base frame rotation in the end effector to convert data between those two frames
84 | base_rot_in_eef = env.init_right_hand_orn.T
85 |
86 | base_pos_in_world = env.sim.data.get_body_xpos("base")
87 | base_rot_in_world = env.sim.data.get_body_xmat("base").reshape((3, 3))
88 | base_pose_in_world = T.make_pose(base_pos_in_world, base_rot_in_world)
89 | world_pose_in_base = T.pose_inv(base_pose_in_world)
90 |
91 | world_pose_in_base = T.pose_inv(world_pose_in_base)
92 | log_list = ["step", "time",
93 | "cmd_eef_vx", "cmd_eef_vy",
94 | "goal_x", "goal_y", "goal_z",
95 | "eef_x", "eef_y", "eef_z",
96 | "eef_vx", "eef_vy", "eef_vz",
97 | "object_x", "object_y", "object_z",
98 | "object_vx", "object_vy", "object_vz",
99 | "z_angle", "obs_0", "obs_1", "obs_2", "obs_3", "obs_4", "obs_5", "obs_6", "obs_7", "obs_8", "obs_9" ]
100 |
101 | log_path = '{}/trajectory_{}.csv'.format(log_dir, repeat)
102 |
103 | logger = Logger(log_list, log_path,verbatim=render)
104 |
105 | ## Run the trajectory
106 | i = 0
107 | mujoco_start_time = env.sim.data.time
108 | while (True):
109 | mujoco_elapsed = env.sim.data.time - mujoco_start_time
110 |
111 | next_obs, reward, done, _ = env.step(action)
112 |
113 | goal_pos_in_base, eef_pos_in_base, eef_vel_in_base, \
114 | object_pos_in_base, object_vel_in_base, z_angle, = grab_data(obs, world_pose_in_base)
115 |
116 | action_3d = np.concatenate([action, [0.0]])
117 | action_3d_in_base = base_rot_in_eef.dot(action_3d)
118 |
119 | logger.log(i, mujoco_elapsed,
120 | action_3d_in_base[0], action_3d_in_base[1],
121 | goal_pos_in_base[0], goal_pos_in_base[1], goal_pos_in_base[2],
122 | eef_pos_in_base[0], eef_pos_in_base[1], eef_pos_in_base[2],
123 | eef_vel_in_base[0], eef_vel_in_base[1], eef_vel_in_base[2],
124 | object_pos_in_base[0], object_pos_in_base[1], object_pos_in_base[2],
125 | object_vel_in_base[0], object_vel_in_base[1], object_vel_in_base[2],
126 | z_angle[0],
127 | obs['task-state'][0], obs['task-state'][1], obs['task-state'][2],
128 | obs['task-state'][3], obs['task-state'][4], obs['task-state'][5],
129 | obs['task-state'][6], obs['task-state'][7], obs['task-state'][8], obs['task-state'][9],
130 | )
131 | obs = next_obs
132 |
133 | if(render):
134 | env.render()
135 | i += 1
136 | if (mujoco_elapsed >2.5):
137 | action =np.array([0.0, 0.0])
138 |
139 | if(mujoco_elapsed >3.0):
140 | action = action_ref
141 | env.close()
142 | break
143 |
144 |
145 | if __name__ == "__main__":
146 | main()
147 |
--------------------------------------------------------------------------------
/sim2real-calibration-characterisation/sim2real_calibration_characterisation/sim2real_characterisation/reaching_characterisation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from robosuite_extra.env_base import make
3 | import robosuite_extra.utils.transform_utils as T
4 | from robosuite_extra.wrappers import EEFXVelocityControl
5 | from sim2real_calibration_characterisation.utils.logger import Logger
6 | from sim2real_policies.utils.choose_env import reach_full_randomisation,reach_force_noise_randomisation, \
7 | reach_force_randomisation,reach_no_randomisation
8 |
9 |
10 | def grab_data(info, world_pose_in_base):
11 | # Grab the data
12 | eef_pos_in_world = info['eef_pos_in_world']
13 | eef_vel_in_world = info['eef_vel_in_world']
14 | goal_pos_in_world = info['goal_pos_in_world']
15 |
16 | eef_pos_in_world = np.concatenate([eef_pos_in_world, [1.0]])
17 | goal_pos_in_world = np.concatenate([goal_pos_in_world, [1.0]])
18 |
19 | eef_pos_in_base = world_pose_in_base.dot(eef_pos_in_world)
20 | eef_pos_in_base = eef_pos_in_base / eef_pos_in_base[3]
21 | eef_vel_in_base = world_pose_in_base[:3, :3].dot(eef_vel_in_world)
22 |
23 | goal_pos_in_base = world_pose_in_base.dot(goal_pos_in_world)
24 | goal_pos_in_base = goal_pos_in_base / goal_pos_in_base[3]
25 |
26 |
27 | return eef_pos_in_base, eef_vel_in_base, goal_pos_in_base
28 |
29 |
30 | def main():
31 | ### Script parameters ###
32 | log_dir = './reach'
33 | render = False
34 | ###
35 |
36 | for repeat in range(50):
37 | # Create the environment
38 | env = make(
39 | 'SawyerReach',
40 | gripper_type="PushingGripper",
41 | parameters_to_randomise=reach_no_randomisation,
42 | randomise_initial_conditions=False,
43 | table_full_size=(0.8, 1.6, 0.719),
44 | use_camera_obs=False,
45 | use_object_obs=True,
46 | reward_shaping=True,
47 | use_indicator_object=False,
48 | has_renderer=False,
49 | has_offscreen_renderer=False,
50 | render_collision_mesh=False,
51 | render_visual_mesh=True,
52 | control_freq=10,
53 | horizon=40,
54 | ignore_done=False,
55 | camera_name="frontview",
56 | camera_height=256,
57 | camera_width=256,
58 | camera_depth=False,
59 | pid=True,
60 | success_radius=0.01
61 | )
62 |
63 | env = EEFXVelocityControl(env, dof=3, normalised_actions=True)
64 |
65 | if (render):
66 | env.renderer_on()
67 |
68 | obs = env.reset()
69 | print(env.get_dynamics_parameters())
70 |
71 | if(render):
72 | env.viewer.set_camera(camera_id=0)
73 | env.render()
74 |
75 | # Setup the logger
76 | # Base frame rotation in the end effector to convert data between those two frames
77 | base_rot_in_eef = env.init_right_hand_orn.T
78 |
79 | base_pos_in_world = env.sim.data.get_body_xpos("base")
80 | base_rot_in_world = env.sim.data.get_body_xmat("base").reshape((3, 3))
81 | base_pose_in_world = T.make_pose(base_pos_in_world, base_rot_in_world)
82 | world_pose_in_base = T.pose_inv(base_pose_in_world)
83 |
84 |
85 |
86 | log_list = ["step", "time",
87 | "cmd_eef_vx", "cmd_eef_vy", "cmd_eef_vz",
88 | "eef_x", "eef_y", "eef_z",
89 | "eef_vx", "eef_vy", "eef_vz",
90 | "goal_x", "goal_y", "goal_z",
91 | "obs_0", "obs_1", "obs_2",
92 | "obs_3", "obs_4", "obs_5"
93 | ]
94 |
95 | log_path = '{}/trajectory{}.csv'.format(log_dir,repeat)
96 | logger = Logger(log_list, log_path, verbatim=render)
97 |
98 | i = 0
99 | mujoco_start_time = env.sim.data.time
100 | # Run the trajectories
101 | while (True):
102 | mujoco_elapsed = env.sim.data.time - mujoco_start_time
103 |
104 | action = np.array([0.5, 0.5, 0.5]) # policy.cos_wave(mujoco_elapsed),0.0])
105 |
106 | next_obs, reward, done, _ = env.step(action)
107 |
108 | eef_pos_in_base, eef_vel_in_base, goal_pos_in_base = grab_data(obs, world_pose_in_base)
109 |
110 | action_in_base = base_rot_in_eef.dot(action)
111 |
112 | logger.log(i, mujoco_elapsed,
113 | action_in_base[0], action_in_base[1], action_in_base[2],
114 | eef_pos_in_base[0], eef_pos_in_base[1], eef_pos_in_base[2],
115 | eef_vel_in_base[0], eef_vel_in_base[1], eef_vel_in_base[2],
116 | goal_pos_in_base[0], goal_pos_in_base[1], goal_pos_in_base[2],
117 | obs['task-state'][0], obs['task-state'][1], obs['task-state'][2],
118 | obs['task-state'][3], obs['task-state'][4], obs['task-state'][5],
119 | )
120 |
121 | obs = next_obs
122 |
123 | if(render):
124 | env.render()
125 | i += 1
126 | if (mujoco_elapsed > 2.5):
127 | env.close()
128 | break
129 |
130 |
131 | if __name__ == "__main__":
132 | main()
133 |
--------------------------------------------------------------------------------
/sim2real-calibration-characterisation/sim2real_calibration_characterisation/sim2real_characterisation/sliding_characterisation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from robosuite_extra.env_base import make
3 | from sim2real_calibration_characterisation.utils.logger import Logger
4 | from sim2real_policies.utils.choose_env import slide_force_randomisation, slide_force_noise_randomisation,\
5 | slide_full_randomisation,slide_no_randomisation
6 |
7 | def main():
8 | ### Script parameters ###
9 | render = False
10 | log_dir = './slide'
11 | ####
12 |
13 | action_ref = np.array([0.1, 0.1])
14 | action = action_ref
15 |
16 | for repeat in range(10):
17 | # Create the environment
18 | env = make(
19 | 'SawyerSlide',
20 | gripper_type="SlidePanelGripper",
21 | parameters_to_randomise=slide_no_randomisation,
22 | randomise_initial_conditions=False,
23 | use_camera_obs=False,
24 | use_object_obs=True,
25 | reward_shaping=True,
26 | use_indicator_object=False,
27 | has_renderer=False,
28 | has_offscreen_renderer=False,
29 | render_collision_mesh=False,
30 | render_visual_mesh=True,
31 | control_freq=10,
32 | horizon=60,
33 | ignore_done=False,
34 | camera_name="frontview",
35 | camera_height=256,
36 | camera_width=256,
37 | camera_depth=False,
38 | pid=True,
39 | )
40 |
41 | env.normalised_actions = False
42 |
43 | if (render):
44 | env.renderer_on()
45 |
46 | obs = env.reset()
47 |
48 | if (render):
49 | env.viewer.set_camera(camera_id=0)
50 | env.render()
51 |
52 | # Setup the logger
53 | log_list =["step", "time",
54 | "cmd_j5", "cmd_j6",
55 | "obj_x", "obj_y", "obj_z",
56 | "sin_z", "cos_z",
57 | "obj_vx", "obj_vy", "obj_vz",
58 | "a_j5", "a_j6",
59 | "v_j5", "v_j6",
60 | ]
61 | log_path = '{}/trajectory_{}.csv'.format(log_dir, repeat)
62 | logger = Logger(log_list, log_path,verbatim=render)
63 |
64 | i = 0
65 | mujoco_start_time = env.sim.data.time
66 | # Run the trajectories
67 | while (True):
68 | mujoco_elapsed = env.sim.data.time - mujoco_start_time
69 |
70 | next_obs, reward, done, info = env.step(action)
71 | assert len( obs['task-state']) ==12
72 |
73 | logger.log(i, mujoco_elapsed,
74 | action[0], action[1],
75 | obs['task-state'][0], obs['task-state'][1], obs['task-state'][2],
76 | obs['task-state'][3], obs['task-state'][4], obs['task-state'][5], obs['task-state'][6],
77 | obs['task-state'][7], obs['task-state'][8], obs['task-state'][9],
78 | obs['task-state'][10], obs['task-state'][11],
79 | )
80 | obs = next_obs
81 |
82 | if(render):
83 | env.render()
84 | i += 1
85 | if (mujoco_elapsed >2.):
86 | break
87 |
88 |
89 | if __name__ == "__main__":
90 | main()
91 |
--------------------------------------------------------------------------------
/sim2real-calibration-characterisation/sim2real_calibration_characterisation/simulation_calibration/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/sim2real-calibration-characterisation/sim2real_calibration_characterisation/simulation_calibration/__init__.py
--------------------------------------------------------------------------------
/sim2real-calibration-characterisation/sim2real_calibration_characterisation/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/sim2real-calibration-characterisation/sim2real_calibration_characterisation/utils/__init__.py
--------------------------------------------------------------------------------
/sim2real-calibration-characterisation/sim2real_calibration_characterisation/utils/logger.py:
--------------------------------------------------------------------------------
1 | import os
2 | class Logger:
3 | """Logger to record values to a CSV file."""
4 |
5 | def __init__(self, labels, path, errase=True, verbatim=True):
6 | self.n_labels = len(labels)
7 | self.path = path
8 | self.verbatim = verbatim
9 | assert errase or not os.path.exists(os.path.dirname(path)), self.path
10 | print('[Logger] logging {} to {}'.format(labels, self.path))
11 | if not os.path.exists(os.path.dirname(path)):
12 | os.makedirs(os.path.dirname(path))
13 | with open(self.path, 'w') as csv_file:
14 | csv_file.write(','.join(labels) + '\n')
15 |
16 | def log(self, *values):
17 | assert len(values) == self.n_labels
18 | text = ','.join([str(v) for v in values])
19 | if self.verbatim:
20 | print('[Logger] {}'.format(text))
21 | with open(self.path, 'a') as csv_file:
22 | csv_file.write(text + '\n')
23 |
--------------------------------------------------------------------------------
/sim2real-policies/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 |
3 | setup(name='sim2real_policies',
4 | version='0.0.1',
5 | packages= find_packages()
6 | )
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/sim2real-policies/sim2real_policies/__init__.py
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/after_training_tests/epi_utils.py:
--------------------------------------------------------------------------------
1 | from mujoco_py import MujocoException
2 | import numpy as np
3 | from robosuite_extra.utils import transform_utils as T
4 |
5 |
6 | def log_trajectory_point (env, obs, action,i, mujoco_elapsed, info, logger, data_grabber):
7 |
8 | if(env._name == 'SawyerReach'):
9 | base_pos_in_world = env.sim.data.get_body_xpos("base")
10 | base_rot_in_world = env.sim.data.get_body_xmat("base").reshape((3, 3))
11 | base_pose_in_world = T.make_pose(base_pos_in_world, base_rot_in_world)
12 | world_pose_in_base = T.pose_inv(base_pose_in_world)
13 | base_rot_in_eef = env.init_right_hand_orn.T
14 |
15 | eef_pos_in_base, eef_vel_in_base, goal_pos_in_base = data_grabber(info, world_pose_in_base)
16 | action_in_base = base_rot_in_eef.dot(action)
17 |
18 | logger.log(i, mujoco_elapsed,
19 | action_in_base[0], action_in_base[1], action_in_base[2],
20 | eef_pos_in_base[0], eef_pos_in_base[1], eef_pos_in_base[2],
21 | eef_vel_in_base[0], eef_vel_in_base[1], eef_vel_in_base[2],
22 | goal_pos_in_base[0], goal_pos_in_base[1], goal_pos_in_base[2],
23 | obs[0], obs[1], obs[2],
24 | obs[3], obs[4], obs[5],
25 | )
26 |
27 | elif(env._name == 'SawyerPush'):
28 | base_pos_in_world = env.sim.data.get_body_xpos("base")
29 | base_rot_in_world = env.sim.data.get_body_xmat("base").reshape((3, 3))
30 | base_pose_in_world = T.make_pose(base_pos_in_world, base_rot_in_world)
31 | world_pose_in_base = T.pose_inv(base_pose_in_world)
32 | base_rot_in_eef = env.init_right_hand_orn.T
33 |
34 |
35 | goal_pos_in_base, eef_pos_in_base, eef_vel_in_base, \
36 | object_pos_in_base, object_vel_in_base, z_angle, = data_grabber(info, world_pose_in_base)
37 |
38 | action_3d = np.concatenate([action, [0.0]])
39 | action_3d_in_base = base_rot_in_eef.dot(action_3d)
40 |
41 | logger.log(i, mujoco_elapsed,
42 | action_3d_in_base[0], action_3d_in_base[1],
43 | goal_pos_in_base[0], goal_pos_in_base[1], goal_pos_in_base[2],
44 | eef_pos_in_base[0], eef_pos_in_base[1], eef_pos_in_base[2],
45 | eef_vel_in_base[0], eef_vel_in_base[1], eef_vel_in_base[2],
46 | object_pos_in_base[0], object_pos_in_base[1], object_pos_in_base[2],
47 | object_vel_in_base[0], object_vel_in_base[1], object_vel_in_base[2],
48 | z_angle[0],
49 | obs[0], obs[1], obs[2],
50 | obs[3], obs[4], obs[5],
51 | obs[6], obs[7], obs[8],
52 | obs[9],
53 | )
54 |
55 | elif(env._name == ' SawyerSlide'):
56 |
57 | logger.log(i, mujoco_elapsed,
58 | action[0], action[1],
59 | obs[0], obs[1], obs[2],
60 | obs[3], obs[4], obs[5], obs[6],
61 | obs[7], obs[8], obs[9],
62 | obs[10], obs[11],
63 | )
64 |
65 |
66 |
67 | def EPIpolicy_rollout(env, epi_policy,s, mujoco_start_time , logger= None,data_grabber= None, max_steps=30, params=None):
68 | """
69 | Roll one trajectory with max_steps
70 | return:
71 | traj: shape of (max_steps, state_dim+action_dim+reward_dim)
72 | s_: next observation
73 | env.get_state(): next underlying state
74 | """
75 | if params is not None:
76 | env.set_dynamics_parameters(params)
77 | # env.renderer_on()
78 | for epi_iter in range(10): # max iteration for getting a single rollout
79 | #s = env.reset()
80 | traj=[]
81 | for _ in range(max_steps):
82 | a = epi_policy.get_action(s)
83 | a = np.clip(a, -epi_policy.action_range, epi_policy.action_range)
84 | # env.render()
85 | try:
86 | mujoco_elapsed = env.sim.data.time - mujoco_start_time
87 | s_, r, done, info = env.step(a)
88 | except MujocoException:
89 | print('EPI Rollout: MujocoException')
90 | break
91 |
92 | if(logger is not None):
93 | log_trajectory_point(env, s_,a,epi_iter, mujoco_elapsed ,info,logger,data_grabber)
94 |
95 | s_a_r = np.concatenate((s,a, [r])) # state, action, reward
96 | traj.append(s_a_r)
97 | s=s_
98 | if len(traj) == max_steps:
99 | break
100 |
101 | if len(traj)= horizon):
182 | break
183 |
184 | z_values = np.array(z_values)
185 | env.close()
186 |
187 | if(not os.path.exists(save_path)):
188 | os.mkdir(save_path)
189 | pickle.dump(z_values, open(os.path.join(save_path, 'z_values_array.pckl'), 'wb'))
190 |
191 | return z_values
192 |
193 |
194 |
195 |
196 | def plot_tsne(z_values,save_path):
197 |
198 | tsne_calculator = TSNE(n_iter = 3000, n_iter_without_progress= 600, perplexity =5.)
199 |
200 | fig = plt.figure()
201 |
202 | colors = plt.get_cmap('rainbow')(np.linspace(0.2, 1, 3))
203 |
204 | for param_set_idx in range(z_values.shape[0]):
205 | zs = copy.deepcopy(z_values[param_set_idx,:,:])
206 | zs_2d = tsne_calculator.fit_transform(zs)
207 |
208 | zs_2d_x = [zs_2d[i][0] for i in range(zs_2d.shape[0])]
209 | zs_2d_y = [zs_2d[i][1] for i in range(zs_2d.shape[0])]
210 |
211 | plt.scatter(zs_2d_x, zs_2d_y, c = [colors[param_set_idx]]*len(zs_2d_y), label='params set {}'.format(param_set_idx+1))
212 |
213 | plt.legend()
214 | plt.axis('off')
215 | plt.savefig(save_path+'tsne_plot.pdf')
216 | plt.savefig(save_path+'tsne_plot.svg')
217 |
218 | plt.show()
219 | plt.close()
220 |
221 |
222 |
223 |
224 | if __name__ == "__main__":
225 |
226 |
227 | save_path = '../../../../data/reaching/epi_tsne_results/'
228 |
229 | save_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), save_path)
230 | #z_values = find_zvalues(save_path)
231 | #
232 |
233 |
234 | z_values = pickle.load(open(os.path.join(save_path, 'z_values_array.pckl'), 'rb'))
235 | plot_tsne(copy.deepcopy(z_values),save_path)
236 |
237 |
238 |
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/after_training_tests/tsne_for_epi_pushing.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from robosuite_extra.env_base import make
3 | from robosuite_extra.wrappers import EEFXVelocityControl, GymWrapper, FlattenWrapper
4 | from sim2real_policies.final_policy_testing.network_loading import load, load_model
5 | import os
6 | from sim2real_policies.utils.choose_env import reach_force_noise_randomisation,reach_force_randomisation,reach_full_randomisation, push_full_randomisation
7 | from sim2real_policies.final_policy_testing.epi_utils import EPIpolicy_rollout
8 | import copy
9 |
10 |
11 | PUSH_GOALS = [np.array([3e-3,15.15e-2]),np.array([9.5e-2,11.01e-2]), np.array([-11.5e-2,17.2e-2])]
12 |
13 | import pickle
14 | from sklearn.manifold import TSNE
15 | import matplotlib.pyplot as plt
16 |
17 | def grab_data(info, world_pose_in_base):
18 | # Grab the data
19 | eef_pos_in_world = info['eef_pos_in_world']
20 | eef_vel_in_world = info['eef_vel_in_world']
21 | goal_pos_in_world = info['goal_pos_in_world']
22 |
23 | eef_pos_in_world = np.concatenate([eef_pos_in_world, [1.0]])
24 | goal_pos_in_world = np.concatenate([goal_pos_in_world, [1.0]])
25 |
26 | eef_pos_in_base = world_pose_in_base.dot(eef_pos_in_world)
27 | eef_pos_in_base = eef_pos_in_base / eef_pos_in_base[3]
28 | eef_vel_in_base = world_pose_in_base[:3, :3].dot(eef_vel_in_world)
29 |
30 | goal_pos_in_base = world_pose_in_base.dot(goal_pos_in_world)
31 | goal_pos_in_base = goal_pos_in_base / goal_pos_in_base[3]
32 |
33 |
34 | return eef_pos_in_base, eef_vel_in_base, goal_pos_in_base
35 |
36 |
37 | def find_zvalues(save_path):
38 | render = False
39 |
40 | # Parameters
41 | horizon = 80
42 | total_repeats =50
43 |
44 |
45 | env_name = 'SawyerPush'
46 |
47 | # ### Prepare Environment #####
48 | env = make(
49 | 'SawyerPush',
50 | gripper_type="PushingGripper",
51 | parameters_to_randomise=push_full_randomisation,
52 | randomise_initial_conditions=True,
53 | table_full_size=(0.8, 1.6, 0.719),
54 | table_friction=(0.001, 5e-3, 1e-4),
55 | use_camera_obs=False,
56 | use_object_obs=True,
57 | reward_shaping=True,
58 | placement_initializer=None,
59 | gripper_visualization=True,
60 | use_indicator_object=False,
61 | has_renderer=render,
62 | has_offscreen_renderer=False,
63 | render_collision_mesh=False,
64 | render_visual_mesh=True,
65 | control_freq=10.,
66 | horizon=200,
67 | ignore_done=False,
68 | camera_name="frontview",
69 | camera_height=256,
70 | camera_width=256,
71 | camera_depth=False,
72 | pid=True,
73 | )
74 |
75 | env = FlattenWrapper(GymWrapper(EEFXVelocityControl(env, dof=2,max_action=0.1, ),),keys='task-state',add_info=True)
76 | env._name = env_name
77 | env.reset()
78 | z_values = []
79 |
80 | for param_iteration in range(3):
81 | state_dim = 10
82 | action_dim = 2
83 | z_values.append([])
84 |
85 | ### setting up the env with different but fixed parameters###
86 | if(param_iteration == 1):
87 | parameter_ranges = env.parameter_sampling_ranges
88 | max_parameters = dict([(k,v[-1]*env.factors_for_param_randomisation[k]) for k,v in parameter_ranges.items()])
89 | env.set_dynamics_parameters(max_parameters)
90 | elif(param_iteration == 2):
91 | parameter_ranges = env.parameter_sampling_ranges
92 | min_parameters = dict([(k,v[0]*env.factors_for_param_randomisation[k]) for k, v in parameter_ranges.items()])
93 | env.set_dynamics_parameters(min_parameters)
94 |
95 | env.reset()
96 |
97 | if (render):
98 | env.viewer.set_camera(camera_id=0)
99 | env.render()
100 |
101 | ###############
102 |
103 | ################# SETTING UP THE POLICY #################
104 | method = 'EPI'
105 | alg_name = 'epi_td3'
106 | embed_dim = 10
107 | traj_l = 10
108 | NO_RESET = True
109 | embed_input_dim = traj_l*(state_dim+action_dim)
110 | ori_state_dim = state_dim
111 | state_dim += embed_dim
112 |
113 | # choose which randomisation is applied
114 | number_random_params = 23
115 | folder_path = '../../../../sawyer/src/sim2real_dynamics_sawyer/assets/rl/'+method +'/' + alg_name + '/model/'
116 | path = folder_path + env_name + str(
117 | number_random_params) + '_' + alg_name
118 |
119 | embed_model = load_model(model_name='embedding', path=path, input_dim = embed_input_dim, output_dim = embed_dim )
120 | embed_model.cuda()
121 | epi_policy_path = folder_path + env_name + str(number_random_params) + '_' + 'epi_ppo_epi_policy'
122 | epi_policy = load(path=epi_policy_path, alg='ppo', state_dim=ori_state_dim, action_dim=action_dim )
123 |
124 | policy = load(path=path, alg=alg_name, state_dim=state_dim,
125 | action_dim=action_dim)
126 | #########################################################
127 |
128 | for repeat in range(total_repeats):
129 | #Reset environment
130 | obs = env.reset()
131 | i=0
132 |
133 | mujoco_start_time = env.sim.data.time
134 | if NO_RESET:
135 | i = traj_l - 1
136 | traj, [last_obs, last_state] = EPIpolicy_rollout(env, epi_policy, obs,
137 | mujoco_start_time=mujoco_start_time,
138 | logger=None, data_grabber=None,
139 | max_steps=traj_l,
140 | params=None) # only one traj; pass in params to ensure it's not changed
141 | state_action_in_traj = np.array(traj)[:, :-1] # remove the rewards
142 | embedding = embed_model(state_action_in_traj.reshape(-1))
143 | embedding = embedding.detach().cpu().numpy()
144 |
145 | obs = last_obs # last observation
146 | env.set_state(last_state) # last underlying state
147 | else:
148 |
149 | traj, [last_obs, last_state] = EPIpolicy_rollout(env, epi_policy, obs,
150 | mujoco_start_time=mujoco_start_time,
151 | logger=None, data_grabber=None,
152 | max_steps=traj_l,
153 | params=None) # only one traj; pass in params to ensure it's not changed
154 | state_action_in_traj = np.array(traj)[:, :-1] # remove the rewards
155 | embedding = embed_model(state_action_in_traj.reshape(-1))
156 | embedding = embedding.detach().cpu().numpy()
157 |
158 |
159 | params = env.get_dynamics_parameters()
160 | env.randomisation_off()
161 | env.set_dynamics_parameters(params) # same as the rollout env
162 | obs = env.reset()
163 | env.randomisation_on()
164 |
165 | z_values[param_iteration].append(embedding)
166 |
167 | # z is embedding of initial trajectory for each episode, so no need to run task-policy rollout below
168 |
169 | while (True):
170 |
171 | ############# CHOOSING THE ACTION ##############
172 | obs = np.concatenate((obs, embedding))
173 |
174 | action = policy.get_action(obs)
175 |
176 | ################################################
177 |
178 | next_obs, reward, done, info = env.step(action)
179 |
180 | obs = next_obs
181 |
182 | if(render):
183 | env.render()
184 |
185 | i += 1
186 | if (i >= horizon):
187 | break
188 |
189 | z_values = np.array(z_values)
190 | env.close()
191 |
192 | if(not os.path.exists(save_path)):
193 | os.mkdir(save_path)
194 | pickle.dump(z_values, open(os.path.join(save_path, 'z_values_array.pckl'), 'wb'))
195 |
196 | return z_values
197 |
198 |
199 |
200 |
201 | def plot_tsne(z_values,save_path):
202 |
203 | tsne_calculator = TSNE(n_iter = 3000, n_iter_without_progress= 600, perplexity =5.)
204 |
205 | fig = plt.figure()
206 |
207 | colors = plt.get_cmap('rainbow')(np.linspace(0.2, 1, 3))
208 |
209 | for param_set_idx in range(z_values.shape[0]):
210 | zs = copy.deepcopy(z_values[param_set_idx,:,:])
211 | zs_2d = tsne_calculator.fit_transform(zs)
212 |
213 | zs_2d_x = [zs_2d[i][0] for i in range(zs_2d.shape[0])]
214 | zs_2d_y = [zs_2d[i][1] for i in range(zs_2d.shape[0])]
215 |
216 | plt.scatter(zs_2d_x, zs_2d_y, c = [colors[param_set_idx]]*len(zs_2d_y), label='params set {}'.format(param_set_idx+1))
217 |
218 | plt.legend()
219 | plt.axis('off')
220 | plt.savefig(save_path+'tsne_plot.pdf')
221 | plt.savefig(save_path+'tsne_plot.svg')
222 |
223 | plt.show()
224 | plt.close()
225 |
226 |
227 |
228 |
229 | if __name__ == "__main__":
230 |
231 |
232 | save_path = '../../../../data/pushing/epi_tsne_results/'
233 |
234 | save_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), save_path)
235 | z_values = find_zvalues(save_path)
236 | #
237 |
238 |
239 | z_values = pickle.load(open(os.path.join(save_path, 'z_values_array.pckl'), 'rb'))
240 | plot_tsne(copy.deepcopy(z_values),save_path)
241 |
242 |
243 |
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/lstm_td3/plot.py:
--------------------------------------------------------------------------------
1 | # high-dimensional reacher
2 | import numpy as np
3 | import matplotlib.pyplot as plt
4 |
5 | def smooth(y, radius=2, mode='two_sided'):
6 | if len(y) < 2*radius+1:
7 | return np.ones_like(y) * y.mean()
8 | elif mode == 'two_sided':
9 | convkernel = np.ones(2 * radius+1)
10 | return np.convolve(y, convkernel, mode='same') / \
11 | np.convolve(np.ones_like(y), convkernel, mode='same')
12 | elif mode == 'causal':
13 | convkernel = np.ones(radius)
14 | out = np.convolve(y, convkernel,mode='full') / \
15 | np.convolve(np.ones_like(y), convkernel, mode='full')
16 | return out[:-radius+1]
17 |
18 | def moving_sum(y, window=2):
19 | c = y.cumsum()
20 | c[window:] = c[window:] - c[:-window]
21 | return c/float(window)
22 |
23 |
24 |
25 | def plot(x, data, color, label):
26 | y_m=np.mean(data, axis=0)
27 | y_std=np.std(data, axis=0)
28 | y_upper=y_m+y_std
29 | y_lower=y_m-y_std
30 | plt.fill_between(
31 | x, list(y_lower), list(y_upper), interpolate=True, facecolor=color, linewidth=0.0, alpha=0.3
32 | )
33 | plt.plot(x, list(y_m), color=color, label=label)
34 |
35 |
36 | file_pre = './'
37 | y=np.load(file_pre+'eval_rewards.npy')
38 | s=np.load(file_pre+'eval_success.npy')
39 |
40 | plt.figure(figsize=(8,6))
41 | fig, axs = plt.subplots(2)
42 | x=np.arange(len(y))
43 | axs[0].plot(x, smooth(y), label = 'SawyerPush', color='b')
44 |
45 | axs[0].set_ylabel('Reward')
46 | # plt.ylim(0)
47 | axs[0].legend( loc=2)
48 | axs[0].grid()
49 | axs[1].set_xlabel('Episodes')
50 | axs[1].set_ylabel('Average Success Rate')
51 | axs[1].plot(x, moving_sum(s), label = 'SawyerPush', color='b')
52 | axs[1].grid()
53 |
54 |
55 | plt.savefig('reward.pdf')
56 | plt.show()
57 |
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/ppo/default_params.py:
--------------------------------------------------------------------------------
1 | def get_hyperparams():
2 | hyperparams_dict={
3 | 'alg_name': 'ppo',
4 | 'action_range': 1., # (-action_range, action_range)
5 | 'batch_size': 128, # update batchsize
6 | 'gamma': 0.9, # reward discount
7 | 'random_seed': 2, # random seed
8 | 'actor_update_steps': 10, # actor update iterations
9 | 'critic_update_steps': 10, # critic update iterations
10 | 'eps': 1e-8, # numerical residual
11 | 'actor_lr': 0.0001, # learning rate for actor
12 | 'critic_lr': 0.0002, # learning rate for critic
13 | 'method': [
14 | dict(name='kl_pen', kl_target=0.01, lam=0.5), # KL penalty
15 | dict(name='clip', epsilon=0.2), # Clipped surrogate objective, find this is better
16 | ][1] # choose the method for optimization
17 | }
18 | print('Hyperparameters: ', hyperparams_dict)
19 | return hyperparams_dict
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/sys_id/common/nets.py:
--------------------------------------------------------------------------------
1 | """
2 | commonly used networks for system identification functions
3 | """
4 | import numpy as np
5 | import torch
6 | import torch.nn as nn
7 | import torch.nn.functional as F
8 | import os,sys,inspect
9 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
10 | parentdir = os.path.dirname(currentdir)
11 | sys.path.insert(0,parentdir) # add parent path
12 |
13 |
14 |
15 | class BaseNetwork(nn.Module):
16 | """ Base network class """
17 | def __init__(self, input_dim, output_dim, hidden_dim):
18 | super(BaseNetwork, self).__init__()
19 | self._input_dim = input_dim
20 | self._output_dim = output_dim
21 | self._hidden_dim = hidden_dim
22 |
23 | def forward(self,):
24 | pass
25 |
26 | def save(self, path):
27 | torch.save(self.state_dict(), path)
28 |
29 | def load(self, path):
30 | self.load_state_dict(torch.load(path))
31 | self.eval().cuda()
32 |
33 | class ProjectionNetwork(BaseNetwork):
34 | def __init__(self, input_dim, output_dim, hidden_dim=64, activation = F.relu, output_activation = F.tanh):
35 | """
36 | Compress the dynamics parameters to low-dimensional
37 | https://arxiv.org/abs/1903.01390
38 | """
39 | super().__init__(input_dim, output_dim, hidden_dim)
40 | self.linear1 = nn.Linear(input_dim, hidden_dim)
41 | self.linear2 = nn.Linear(hidden_dim, int(hidden_dim/2))
42 | self.linear3 = nn.Linear(int(hidden_dim/2), int(hidden_dim/4))
43 | self.linear4 = nn.Linear(int(hidden_dim/4), output_dim)
44 | self.activation = activation
45 | self.output_activation = output_activation
46 |
47 | def forward(self, input):
48 | if isinstance(input, np.ndarray) and len(input.shape) < 2:
49 | input = torch.FloatTensor(np.expand_dims(input, 0)).cuda()
50 | x = self.activation(self.linear1(input))
51 | x = self.activation(self.linear2(x))
52 | x = self.activation(self.linear3(x))
53 | x = self.output_activation(self.linear4(x))
54 | return x
55 |
56 | def get_context(self, input):
57 | """
58 | for inference only, no gradient descent
59 | """
60 | context = self.forward(input)
61 | return context.squeeze(0).detach().cpu().numpy()
62 |
63 |
64 | class PredictionNetwork(BaseNetwork):
65 | """
66 | prediction model to predict next state given current state and action
67 | (plus the embedding of dynamics if conditioned on EPI)
68 | """
69 | def __init__(self, input_dim, output_dim, hidden_dim):
70 | super().__init__(input_dim, output_dim, hidden_dim)
71 | self.linear1 = nn.Linear(input_dim, hidden_dim)
72 | self.linear2 = nn.Linear(hidden_dim, hidden_dim)
73 | self.linear3 = nn.Linear(hidden_dim, hidden_dim)
74 | self.linear4 = nn.Linear(hidden_dim, output_dim)
75 |
76 | def forward(self, input, activation=F.relu, output_activation=F.tanh):
77 | if isinstance(input, np.ndarray) and len(input.shape) < 2:
78 | input = torch.FloatTensor(np.expand_dims(input, 0)).cuda()
79 | x = activation(self.linear1(input))
80 | x = activation(self.linear2(x))
81 | x = activation(self.linear3(x))
82 | x = output_activation(self.linear4(x))
83 | return x.squeeze(0)
84 |
85 | class EmbedNetwork(BaseNetwork):
86 | """
87 | Embedding network for compress trajectories from EPI policy to low-dimensional embeddings
88 | """
89 | def __init__(self, input_dim, output_dim, hidden_dim):
90 | super().__init__(input_dim, output_dim, hidden_dim)
91 | self.linear1 = nn.Linear(input_dim, hidden_dim)
92 | self.linear2 = nn.Linear(hidden_dim, hidden_dim)
93 | self.linear3 = nn.Linear(hidden_dim, output_dim)
94 |
95 | def forward(self, input, activation=F.relu, output_activation=F.tanh):
96 | if isinstance(input, np.ndarray) and len(input.shape) < 2:
97 | input = torch.FloatTensor(np.expand_dims(input, 0)).cuda()
98 | x = activation(self.linear1(input))
99 | x = activation(self.linear2(x))
100 | x = output_activation(self.linear3(x))
101 | return x.squeeze(0)
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/sys_id/common/operations.py:
--------------------------------------------------------------------------------
1 | """
2 | """
3 | import numpy as np
4 | import torch
5 | import os,sys,inspect
6 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
7 | parentdir = os.path.dirname(currentdir)
8 | sys.path.insert(0,parentdir) # add parent path
9 | import matplotlib.pyplot as plt
10 |
11 |
12 | def size_splits(tensor, split_sizes, dim=0):
13 | """Splits the tensor according to chunks of split_sizes.
14 |
15 | Arguments:
16 | tensor (Tensor): tensor to split.
17 | split_sizes (list(int)): sizes of chunks
18 | dim (int): dimension along which to split the tensor.
19 | """
20 | if dim < 0:
21 | dim += tensor.dim()
22 |
23 | dim_size = tensor.size(dim)
24 | if dim_size != torch.sum(torch.Tensor(split_sizes)):
25 | raise KeyError("Sum of split sizes exceeds tensor dim")
26 |
27 | splits = torch.cumsum(torch.Tensor([0] + split_sizes), dim=0)[:-1]
28 |
29 | return tuple(tensor.narrow(int(dim), int(start), int(length))
30 | for start, length in zip(splits, split_sizes))
31 |
32 | def size_splits_in_two(tensor, first_size, dim=0):
33 | """Splits a tensor of size=first_size from the original tensor
34 |
35 | Arguments:
36 | tensor (Tensor): tensor to split.
37 | first_size (int): size of the first splitted tensor
38 | dim (int): dimension along which to split the tensor.
39 | """
40 | return size_splits(tensor, [first_size], dim)
41 |
42 |
43 | def stack_data(traj, length):
44 | traj = np.array(traj)
45 | return traj[-length:, :].reshape(-1)
46 |
47 |
48 | def plot(x, name='figure', path='./'):
49 | plt.figure(figsize=(20, 5))
50 | plt.plot(x)
51 | plt.savefig(path+name+'.png')
52 | # plt.show()
53 | plt.clf()
54 |
55 |
56 | def plot_train_test(train_y, test_y, name='figure', path='./'):
57 | """ plot both the training and testing curves during training process """
58 | plt.figure(figsize=(20, 5))
59 | plt.plot(train_y, label='train')
60 | plt.plot(test_y, label='test')
61 | plt.legend()
62 | plt.grid()
63 | plt.savefig(path+name+'.png')
64 | # plt.show()
65 | plt.clf()
66 |
67 |
68 | if __name__ == '__main__':
69 | env, _, _, _ = choose_env('SawyerReach')
70 | query_key_params(env)
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/sys_id/common/utils.py:
--------------------------------------------------------------------------------
1 |
2 | import numpy as np
3 | import torch
4 | import torch.nn as nn
5 | import os,sys,inspect
6 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
7 | parentdir = os.path.dirname(currentdir)
8 | sys.path.insert(0,parentdir) # add parent path
9 | import torch.optim as optim
10 | import matplotlib.pyplot as plt
11 | from sim2real_policies.utils.choose_env import choose_env
12 | from sim2real_policies.utils.policy_networks import DPG_PolicyNetwork, RandomPolicy
13 |
14 | from mujoco_py import MujocoException
15 |
16 | def query_params(env, normalize=True, randomised_only=False, dynamics_only=False):
17 | """
18 | Query the dynamics parameters from the env,
19 | normalize it if normalize is True,
20 | only return randomised parameters if randomised_only is True,
21 | only return dynamics parameters (no noise parameters) if dynamics_only is True.
22 | Return: array of parameters
23 | """
24 | params_dict = env.get_dynamics_parameters()
25 | randomised_params_keys = env.get_randomised_parameters()
26 | params_ranges = env.get_parameter_sampling_ranges() # range
27 | params_factors = env.get_factors_for_randomisation() # multiplied factor
28 | params_value_list = []
29 | for param_key, param_value in params_dict.items():
30 | if randomised_only and (param_key in randomised_params_keys) is False:
31 | continue
32 | if dynamics_only and ("time" in param_key or "noise" in param_key):
33 | # print('Non-dynamics parameters: ', param_key)
34 | continue
35 | else:
36 | if normalize:
37 | if param_key in params_factors.keys():
38 | param_factor = params_factors[param_key]
39 | else:
40 | raise NotImplementedError
41 |
42 | if param_key in params_ranges.keys():
43 | param_range = params_ranges[param_key]
44 | else:
45 | raise NotImplementedError
46 |
47 | scale = param_range[1]-param_range[0]
48 | param_factor = param_factor + 1e-15 # for factor=0.
49 | param_value = np.clip((param_value/(param_factor) - param_range[0])/(scale), 0., 1.)
50 |
51 | if isinstance(param_value, np.ndarray):
52 | params_value_list = list(np.concatenate((params_value_list, param_value)))
53 | else: # scalar
54 | params_value_list.append(param_value)
55 | return np.array(params_value_list)
56 |
57 | def _flatten_obs(obs_dict, verbose=False): # gym observation wrapper
58 | """
59 | Filters keys of interest out and concatenate the information.
60 |
61 | Args:
62 | obs_dict: ordered dictionary of observations
63 | """
64 | keys = ["robot-state", "task-state", "target_pos",]
65 | ob_lst = []
66 | for key in obs_dict:
67 | if key in keys: # hacked
68 | if verbose:
69 | print("adding key: {}".format(key))
70 | ob_lst.append(obs_dict[key])
71 | return np.concatenate(ob_lst)
72 |
73 | def offline_history_collection(env_name, itr=30, policy=None, \
74 | vectorize=True, discrete=False, vine=False, vine_sample_size=500, egreedy=0):
75 | """
76 | Collect random simulation parameters and trajetories with given policy.
77 | ----------------------------------------------------------------
78 | params:
79 | env_name: name of env to collect data from
80 | itr: data episodes
81 | policy: policy used for collecting data
82 | vectorize: vectorized parameters into a list rather than a dictionary, used for system identification
83 | discrete: discrete randomisation range, as in EPI paper
84 | vine: Vine data collection, same state and same action at the initial of trajectory, as in EPI paper
85 | vine_sample_size: number of state action samples in vine trajectory set
86 | egreedy: the factor for collecting data with epsilon-greedy policy
87 | """
88 | env, environment_params, environment_wrappers, environment_wrapper_arguments = choose_env(env_name)
89 | action_space = env.action_space
90 | state_space = env.observation_space
91 | if policy is None: # load off-line policy if no policy
92 | policy=DPG_PolicyNetwork(state_space, action_space, 512).cuda()
93 | # load from somewhere
94 |
95 | history_sa=[]
96 | history_s_=[]
97 | params_list=[]
98 | if vine:
99 | vine_state_set = [] # underlying state of env, not the observation
100 | vine_action_set = [] # initial action after initial state
101 | vine_idx = 0
102 | # collect state action sets according to EPI's vine implementation
103 | while vine_idx\n"
65 | ]
66 | },
67 | {
68 | "data": {
69 | "text/plain": [
70 | "array([ 0.2, 1. ])"
71 | ]
72 | },
73 | "execution_count": 11,
74 | "metadata": {},
75 | "output_type": "execute_result"
76 | }
77 | ],
78 | "source": [
79 | "import numpy as np\n",
80 | "import tensorflow as tf\n",
81 | "a=[0.2, 1.]\n",
82 | "a=np.array(a).astype('float64').reshape(-1)\n",
83 | "print(type(np.array(a).astype('float64').reshape(-1)[0]))\n",
84 | "np.unique(a)\n"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 25,
90 | "metadata": {},
91 | "outputs": [
92 | {
93 | "name": "stdout",
94 | "output_type": "stream",
95 | "text": [
96 | "[[[2 0]\n",
97 | " [4 3]]\n",
98 | "\n",
99 | " [[4 2]\n",
100 | " [2 3]]]\n"
101 | ]
102 | }
103 | ],
104 | "source": [
105 | "a = np.random.randint(5,size=(2,2,2))\n",
106 | "print(a)"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 26,
112 | "metadata": {},
113 | "outputs": [
114 | {
115 | "name": "stdout",
116 | "output_type": "stream",
117 | "text": [
118 | "[[2 0]\n",
119 | " [4 3]]\n",
120 | "[ 3. 1.5]\n",
121 | "[[4 2]\n",
122 | " [2 3]]\n",
123 | "[ 3. 2.5]\n"
124 | ]
125 | }
126 | ],
127 | "source": [
128 | "mu=[]\n",
129 | "for env in a:\n",
130 | " print(env)\n",
131 | " print(np.squeeze(np.mean(env, axis=0)))\n",
132 | " mu.append(np.squeeze(np.mean(env, axis=0)))"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 27,
138 | "metadata": {},
139 | "outputs": [
140 | {
141 | "name": "stdout",
142 | "output_type": "stream",
143 | "text": [
144 | "[[2 0]\n",
145 | " [4 3]]\n",
146 | "[ 0.9 1.4]\n",
147 | "[[4 2]\n",
148 | " [2 3]]\n",
149 | "[ 0.9 0.4]\n"
150 | ]
151 | }
152 | ],
153 | "source": [
154 | "sigma=[]\n",
155 | "for env in a:\n",
156 | " print(env)\n",
157 | " print(np.std(env, axis=0)-0.1)\n",
158 | " sigma.append(np.std(env, axis=0)-0.1)"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 28,
164 | "metadata": {},
165 | "outputs": [
166 | {
167 | "name": "stdout",
168 | "output_type": "stream",
169 | "text": [
170 | "[array([ 3. , 1.5]), array([ 3. , 2.5])]\n",
171 | "[[ 3. 1.5]\n",
172 | " [ 3. 2.5]]\n"
173 | ]
174 | }
175 | ],
176 | "source": [
177 | "print(mu)\n",
178 | "mu = np.stack(mu)\n",
179 | "print(mu)"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": 32,
185 | "metadata": {},
186 | "outputs": [
187 | {
188 | "name": "stdout",
189 | "output_type": "stream",
190 | "text": [
191 | "[ 11.25 15.25]\n"
192 | ]
193 | },
194 | {
195 | "data": {
196 | "text/plain": [
197 | "array([[ 11.25],\n",
198 | " [ 15.25]])"
199 | ]
200 | },
201 | "execution_count": 32,
202 | "metadata": {},
203 | "output_type": "execute_result"
204 | }
205 | ],
206 | "source": [
207 | "r=np.sum(mu*mu, axis=1) # element-wise product\n",
208 | "print(r)\n",
209 | "r=r.reshape(-1,1)\n",
210 | "r"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": 37,
216 | "metadata": {},
217 | "outputs": [
218 | {
219 | "name": "stdout",
220 | "output_type": "stream",
221 | "text": [
222 | "[[ 11.25]\n",
223 | " [ 15.25]]\n",
224 | "[[-22.5 -25.5]\n",
225 | " [-25.5 -30.5]]\n",
226 | "[[ 11.25 15.25]]\n",
227 | "[[ 0. 1.]\n",
228 | " [ 1. 0.]]\n",
229 | "[[ 0. 0.5]\n",
230 | " [ 0.5 0. ]]\n"
231 | ]
232 | }
233 | ],
234 | "source": [
235 | "D=(r-2*np.matmul(mu, mu.T)+r.T)\n",
236 | "print(r)\n",
237 | "print(-2*np.matmul(mu, mu.T))\n",
238 | "print(r.T)\n",
239 | "print(D)\n",
240 | "D=D/len(D)\n",
241 | "print(D)"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": 38,
247 | "metadata": {},
248 | "outputs": [
249 | {
250 | "data": {
251 | "text/plain": [
252 | "array([[ 1. , 0.70710678],\n",
253 | " [ 0.70710678, 1. ]])"
254 | ]
255 | },
256 | "execution_count": 38,
257 | "metadata": {},
258 | "output_type": "execute_result"
259 | }
260 | ],
261 | "source": [
262 | "np.sqrt(D+np.eye(len(D)))"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": null,
268 | "metadata": {},
269 | "outputs": [],
270 | "source": []
271 | }
272 | ],
273 | "metadata": {
274 | "kernelspec": {
275 | "display_name": "Python 3",
276 | "language": "python",
277 | "name": "python3"
278 | },
279 | "language_info": {
280 | "codemirror_mode": {
281 | "name": "ipython",
282 | "version": 2
283 | },
284 | "file_extension": ".py",
285 | "mimetype": "text/x-python",
286 | "name": "python",
287 | "nbconvert_exporter": "python",
288 | "pygments_lexer": "ipython2",
289 | "version": "2.7.15"
290 | }
291 | },
292 | "nbformat": 4,
293 | "nbformat_minor": 2
294 | }
295 |
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/sys_id/universal_policy_online_system_identification/evaluate.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from robosuite_extra.env_base import make
3 | import time
4 | import torch
5 | from sim2real_policies.sys_id.common.utils import query_params
6 |
7 | def testing_envionments_generator_random(env, number_of_envs):
8 | rng = np.random.RandomState()
9 | rng.seed(3)
10 |
11 | def log_uniform(low=1e-10, high=1., size=None):
12 | return np.exp(rng.uniform(np.log(low), np.log(high), size))
13 |
14 | def ranged_random_choice(low, high, size=1):
15 | vals = np.arange(low,high+1)
16 | return rng.choice(vals, size)
17 |
18 | def select_appropriate_distribution(key):
19 | if (
20 | key == 'joint_forces'
21 | or key == 'acceleration_forces'
22 | or key == 'eef_forces'
23 | or key == 'obj_forces'
24 |
25 | or key == 'timestep_parameter'
26 | or key == 'pid_iteration_time'
27 | or key == 'mujoco_timestep'
28 |
29 | or key == 'action_additive_noise'
30 | or key == 'action_multiplicative_noise'
31 | or key == 'action_systematic_noise'
32 |
33 | or key == 'eef_obs_position_noise'
34 | or key == 'eef_obs_velocity_noise'
35 | or key == 'obj_obs_position_noise'
36 | or key == 'obj_obs_velocity_noise'
37 | or key == 'obj_angle_noise'
38 |
39 | or key == 'link_masses'
40 |
41 | or key == 'obj_density'
42 | or key == 'obj_sliding_friction'
43 | ):
44 | return rng.uniform
45 | elif (
46 | key == 'eef_timedelay'
47 | or key == 'obj_timedelay'
48 | ):
49 | return ranged_random_choice
50 | else:
51 | return log_uniform
52 |
53 | factors_for_randomisation = env.get_factors_for_randomisation()
54 | parameter_ranges = env.get_parameter_sampling_ranges()
55 | params_list = []
56 | for _ in range(0, number_of_envs):
57 | params = {}
58 |
59 | randomised_params = env.get_randomised_parameters()
60 | for param_key in randomised_params :
61 | parameter_range = parameter_ranges[param_key]
62 |
63 | if (parameter_range.shape[0] == 1):
64 | params[param_key]= np.asarray(parameter_range[0])
65 | elif (parameter_range.shape[0] == 2):
66 | distribution = select_appropriate_distribution(param_key)
67 | size = env.default_dynamics_parameters[param_key].shape
68 | params[param_key]=np.asarray(
69 | factors_for_randomisation[param_key] * distribution(*parameter_ranges[param_key], size=size))
70 | else:
71 | raise RuntimeError('Parameter radomisation range needs to be of shape {1,2}xN')
72 | params_list.append(params)
73 | return params_list
74 |
75 |
76 | def evaluate(env, policy, up=False, eval_eipsodes=5, Projection=False, proj_net=None, num_envs=5):
77 | '''
78 | Evaluate the policy during training time with fixed dynamics
79 | :param: eval_episodes: evaluating episodes for each env
80 | :param: up (bool): universal policy conditioned on dynamics parameters
81 | '''
82 | number_of_episodes = eval_eipsodes
83 | return_per_test_environment = []
84 | success_per_test_environment = []
85 | # initial_dynamics_params = env.get_dynamics_parameters()
86 | dynamics_params = testing_envionments_generator_random(env, num_envs) # before randomisation off
87 | ### SUGGESTED CHANGE ####
88 | initial_dynamics_params, _ = env.randomisation_off()
89 | ###################
90 |
91 | # do visualization
92 | # print('Starting testing')
93 | for params in dynamics_params:
94 | mean_return = 0.0
95 | success = 0
96 |
97 | # print('with parameters {} ...'.format(params))
98 | for i in range(number_of_episodes):
99 | #### SUGGESTED CHANGE ####
100 | # First set the parameters and then reset for them to take effect.
101 | env.set_dynamics_parameters(params)
102 | obs = env.reset()
103 | p = env.get_dynamics_parameters()
104 | #######
105 | # obs = env.reset()
106 | # env.set_dynamics_parameters(params) # set parameters after reset
107 |
108 | if up:
109 | params_list = query_params(env, randomised_only=True)
110 | if Projection and proj_net is not None:
111 | params_list = proj_net.get_context(params_list)
112 | obs = np.concatenate((params_list, obs))
113 |
114 | episode_return = 0.0
115 | done = False
116 | while not done:
117 | action = policy.get_action(obs)
118 | obs, reward, done, info = env.step(action)
119 | episode_return += reward
120 |
121 | if up:
122 | obs = np.concatenate((params_list, obs))
123 |
124 | if (done):
125 | mean_return += episode_return
126 | success += int(info['success'])
127 | break
128 |
129 | mean_return /= number_of_episodes
130 | success_rate = float(success) / float(number_of_episodes)
131 | # print('the mean return is {}'.format(mean_return))
132 | return_per_test_environment.append(mean_return)
133 | success_per_test_environment.append(success_rate)
134 |
135 | #### SUGGESTED CHANGE ####
136 | # Only need to call randomisation_on, this will restore the previous parameters and the
137 | # randoisation too. Note that this will only take effect in the next reset, where parameters
138 | # are going to be randomised again
139 | env.randomisation_on()
140 | #####
141 |
142 | # env.set_dynamics_parameters(initial_dynamics_params) # for non-randomisation cases, need to set original dynamics parameters
143 | print('total_average_return_over_test_environments : {}'.format(np.mean(return_per_test_environment)))
144 | return np.mean(return_per_test_environment), np.mean(success_per_test_environment)
145 |
146 | def evaluate_epi(env, epi_policy, embed_net, task_policy, traj_length, eval_eipsodes=5, num_envs=5):
147 | """
148 | Apart from evaluate(), epi policy conditions on the embedding of trajectory
149 | :param: epi_policy: the policy for rollout a trajectory
150 | :param: embed_net: embedding network of the trajectory
151 | :param: task_policy: the policy for evaluation, conditioned on the embedding
152 | :param: traj_length: length of trajectory
153 | """
154 | number_of_episodes = eval_eipsodes
155 | return_per_test_environment = []
156 | success_per_test_environment = []
157 | dynamics_params = testing_envionments_generator_random(env, num_envs)
158 | # initial_dynamics_params = env.get_dynamics_parameters()
159 | initial_dynamics_params, _ = env.randomisation_off()
160 |
161 | def policy_rollout(env, policy, max_steps=30, params=None):
162 | """
163 | Roll one trajectory with max_steps
164 | return: traj, shape of (max_steps, state_dim+action_dim+reward_dim)
165 | """
166 | # s = env.reset()
167 | # if params is not None:
168 | # env.set_dynamics_parameters(params) # make sure .reset() not change env params
169 |
170 | if params is not None:
171 | env.set_dynamics_parameters(params) # make sure .reset() not change env params
172 | s = env.reset()
173 |
174 | traj=[]
175 | for _ in range(max_steps):
176 | a = policy.choose_action(s)
177 | s_, r, done, _ = env.step(a)
178 | s_a_r = np.concatenate((s,a, [r])) # state, action, reward
179 | traj.append(s_a_r)
180 | s=s_
181 |
182 | return traj
183 |
184 | # do visualization
185 | # print('Starting testing')
186 | for params in dynamics_params:
187 | mean_return = 0.0
188 | success = 0
189 |
190 | # print('with parameters {} ...'.format(params))
191 | for i in range(number_of_episodes):
192 | traj = policy_rollout(env, epi_policy, max_steps = traj_length, params=params) # only one traj
193 | state_action_in_traj = np.array(traj)[:, :-1] # remove the rewards
194 | embedding = embed_net(state_action_in_traj.reshape(-1))
195 | embedding = embedding.detach().cpu().numpy()
196 | # s = env.reset()
197 | # env.set_dynamics_parameters(params) # set parameters after reset
198 | env.set_dynamics_parameters(params)
199 | s = env.reset()
200 |
201 | ep_r = 0.0
202 | done = False
203 | while not done:
204 | s=np.concatenate((s, embedding))
205 | a = task_policy.get_action(s)
206 | s_, r, done, info = env.step(a)
207 | s = s_
208 | ep_r += r
209 |
210 | if (done):
211 | mean_return += ep_r
212 | success += int(info['success'])
213 | break
214 |
215 | mean_return /= number_of_episodes
216 | success_rate = float(success) / float(number_of_episodes)
217 | # print('the mean return is {}'.format(mean_return))
218 | return_per_test_environment.append(mean_return)
219 | success_per_test_environment.append(success_rate)
220 |
221 | env.randomisation_on()
222 |
223 | # env.set_dynamics_parameters(initial_dynamics_params) # set original dynamics parameters
224 | print('total_average_return_over_test_environments : {}'.format(np.mean(return_per_test_environment)))
225 | return np.mean(return_per_test_environment), np.mean(success_per_test_environment)
226 |
227 |
228 |
229 | def evaluate_lstm(env, policy, hidden_dim, eval_eipsodes=5, num_envs=5):
230 | """
231 | Evaluate the policy during training time with fixed dynamics
232 | :param: eval_episodes: evaluating episodes for each env
233 | """
234 | number_of_episodes = eval_eipsodes
235 | return_per_test_environment = []
236 | success_per_test_environment = []
237 | dynamics_params = testing_envionments_generator_random(env, num_envs)
238 | # initial_dynamics_params = env.get_dynamics_parameters()
239 | initial_dynamics_params, _ = env.randomisation_off()
240 |
241 |
242 | # do visualization
243 | # print('Starting testing')
244 | for params in dynamics_params:
245 | mean_return = 0.0
246 | success = 0
247 |
248 | # print('with parameters {} ...'.format(params))
249 | for i in range(number_of_episodes):
250 | env.set_dynamics_parameters(params)
251 | obs = env.reset()
252 |
253 | # obs = env.reset()
254 | # env.set_dynamics_parameters(params)
255 |
256 | episode_return = 0.0
257 | done = False
258 | hidden_out = (torch.zeros([1, 1, hidden_dim], dtype=torch.float).cuda(), \
259 | torch.zeros([1, 1, hidden_dim], dtype=torch.float).cuda()) # initialize hidden state for lstm, (hidden, cell), each is (layer, batch, dim)
260 | last_action = env.action_space.sample()
261 | while not done:
262 | hidden_in = hidden_out
263 | action, hidden_out= policy.get_action(obs, last_action, hidden_in)
264 | obs, reward, done, info = env.step(action)
265 | episode_return += reward
266 | last_action = action
267 | if (done):
268 | mean_return += episode_return
269 | success += int(info['success'])
270 | break
271 |
272 | mean_return /= number_of_episodes
273 | success_rate = float(success) / float(number_of_episodes)
274 | # print('the mean return is {}'.format(mean_return))
275 | return_per_test_environment.append(mean_return)
276 | success_per_test_environment.append(success_rate)
277 |
278 | env.randomisation_on()
279 |
280 | # env.set_dynamics_parameters(initial_dynamics_params) # set original dynamics parameters
281 | print('total_average_return_over_test_environments : {}'.format(np.mean(return_per_test_environment)))
282 | return np.mean(return_per_test_environment), np.mean(success_per_test_environment)
283 |
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/sys_id/universal_policy_online_system_identification/osi_class.py:
--------------------------------------------------------------------------------
1 | """
2 | System Identification (SI)
3 | https://arxiv.org/abs/1702.02453
4 |
5 | Examples of two types:
6 | 1. Off-line SI: in sim2real_policies.sys_id.common.utils
7 | 2. On-line SI
8 | """
9 |
10 | from sim2real_policies.sys_id.common.operations import *
11 | from sim2real_policies.sys_id.common.utils import *
12 | from sim2real_policies.utils.rl_utils import load, load_model
13 | from sim2real_policies.utils.choose_env import choose_env
14 |
15 |
16 | class OSI(object):
17 | """
18 | The class of online system identification
19 | Args:
20 | Projection (bool): whether exists a projection module for reducing the dimension of state
21 | CAT_INTERNAL (bool): whether concatenate the interal state to the external observation
22 | context_dim (int): the integral compressed dimension for the projcection module
23 | """
24 | def __init__(self, env_name='SawyerReach', length=3, context_dim=3, Projection=True, CAT_INTERNAL=False):
25 | self.cat_internal = CAT_INTERNAL
26 | env, environment_params, environment_wrappers, environment_wrapper_arguments = choose_env(env_name)
27 | state_dim = env.observation_space.shape[0]
28 | action_dim = env.action_space.shape[0]
29 | print('Env name: ', env_name)
30 | print('Dimension of env state: ', state_dim)
31 | print('Dimension of env action: ', action_dim)
32 | self.params_dim = env.get_randomised_parameter_dimensions()
33 | print('Dimension of randomised parameters: ', self.params_dim)
34 | data_dim = length*(state_dim+action_dim)
35 | if CAT_INTERNAL:
36 | internal_state_dim = env.get_internal_state_dimension()
37 | print('Dimension of internal state: ', internal_state_dim)
38 | data_dim = length*(state_dim+action_dim+internal_state_dim)
39 | else:
40 | data_dim = length*(state_dim+action_dim)
41 | self.osi_model = OSINetork(input_dim = data_dim, output_dim = self.params_dim)
42 | self.env_name = env_name
43 | self.length = length # trajectory length for prediction
44 |
45 | if Projection:
46 | self.proj_net = load_model(path = '../../../../data/pup_td3/model/pup_td3_projection', input_dim=self.params_dim, output_dim=context_dim)
47 | self.policy=load(path = '../../../../data/pup_td3/model/pup_td3', alg='TD3', state_dim = state_dim+context_dim, action_dim = action_dim)
48 | self.save_path = '../../../../../data/pup_td3/model/osi'
49 |
50 | else:
51 | self.proj_net = None
52 | self.policy=load(path = '../../../../data/up_td3/model/up_td3', alg='TD3', state_dim = state_dim+self.params_dim, action_dim = action_dim)
53 | self.save_path = '../../../../../data/up_td3/model/osi'
54 |
55 | def predict(self, traj):
56 | traj_input = stack_data(traj, self.length)
57 | print(traj_input)
58 | output = self.osi_model(traj_input).detach().numpy()
59 | print('out: ', output)
60 | return output
61 |
62 | def load_model(self):
63 | self.osi_model.load_state_dict(torch.load(self.save_path, map_location='cuda:0'))
64 | self.osi_model.eval()
65 |
66 | def osi_train(self, itr = 20):
67 | # update with true dynamics parameters from simulator
68 | print('Started OSI training stage I.'+'\n'+'--------------------------------------------------')
69 | params, raw_history = self.online_history_collection(itr=10, PRED_PARAM=False, CAT_INTERNAL=self.cat_internal)
70 | label, data = self.generate_data(params, raw_history)
71 | self.osi_update(data, label, epoch=5)
72 | print('Finished OSI training stage I.')
73 | print('Started OSI training stage II.'+'\n'+'--------------------------------------------------')
74 | # update with predicted dynamics parameters from simulator
75 | losses = []
76 | for _ in range(itr): # while not converge
77 | params, raw_history = self.online_history_collection(PRED_PARAM=True, CAT_INTERNAL = self.cat_internal)
78 | label, data = self.generate_data(params, raw_history)
79 | loss = self.osi_update(data, label, epoch=5)
80 | losses.append(loss)
81 | plot(losses, name='osi_train')
82 | print('Finished OSI training stage II.')
83 |
84 |
85 |
86 | def generate_data(self, params, raw_history):
87 | """
88 | generate training dataset with raw history trajectories;
89 | length is the number of (state, action, next_state) pairs, there are l state-action pairs in length l sequence
90 | """
91 | assert len(params) == len(raw_history)
92 | label=[]
93 | data=[]
94 | for param, epi in zip(params, raw_history):
95 | for i in range(0, len(epi)-self.length):
96 | data.append(epi[i:i+self.length].reshape(-1)) # [s,a,s,a] for length=2
97 | label.append(param)
98 | assert len(label)==len(data)
99 |
100 | return label, data
101 |
102 |
103 | def online_history_collection(self, itr=30, max_steps=30, PRED_PARAM=False, CAT_INTERNAL=False):
104 | """ collect random simulation parameters and trajetories with universal policy
105 | https://arxiv.org/abs/1702.02453 (Preparing for the Unknown: Learning a Universal Policy with Online System Identification)
106 | """
107 | env, environment_params, environment_wrappers, environment_wrapper_arguments = choose_env(self.env_name)
108 | action_space = env.action_space
109 | ini_state_space = env.observation_space
110 | state_space = spaces.Box(-np.inf, np.inf, shape=(ini_state_space.shape[0]+self.params_dim, )) # add the dynamics param dim
111 |
112 | # a random policy
113 | data_collection_policy=DPG_PolicyNetwork(state_space, action_space, hidden_dim=512).cuda()
114 |
115 | params_list=[]
116 | history=[]
117 | for eps in range(itr): # K
118 | state = env.reset()
119 | params = query_params(env, randomised_only=True)
120 | epi_traj = []
121 | params_list.append(params)
122 |
123 | # N is 1 in this implementation, as each env.reset() will have different parameter set
124 |
125 | for step in range(max_steps): # T
126 | if CAT_INTERNAL:
127 | internal_state = env.get_internal_state()
128 | full_state = np.concatenate([state, internal_state])
129 | else:
130 | full_state = state
131 | if len(epi_traj)>=self.length and PRED_PARAM:
132 | osi_input = stack_data(epi_traj, self.length) # stack (s,a) to have same length as in the model input
133 | pre_params = self.osi_model(osi_input).detach().numpy()
134 | else:
135 | pre_params = params
136 |
137 | if self.proj_net is not None: # projected to low dimensions
138 | pre_params = self.proj_net.get_context(pre_params)
139 | else:
140 | pass
141 | # print('No projection network!')
142 | params_state = np.concatenate((pre_params, state)) # use predicted parameters instead of true values for training, according to the paper
143 | action = data_collection_policy.get_action(params_state)
144 | epi_traj.append(np.concatenate((full_state, action)))
145 |
146 | next_state, _, _, _ = env.step(action)
147 | state = next_state
148 | history.append(np.array(epi_traj))
149 | print("Finished collecting data of {} trajectories.".format(itr))
150 | return params_list, history
151 |
152 |
153 | def osi_update(self, input, label, epoch=1, lr=1e-1):
154 | """ Update the system identification (SI) with online data collection """
155 | criterion = nn.MSELoss()
156 | optimizer = optim.Adam(self.osi_model.parameters(), lr)
157 | scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99) # gamma: decay for each step
158 | input = torch.Tensor(input)
159 | label = torch.Tensor(label)
160 |
161 | for i in range(epoch):
162 | predict = self.osi_model(input)
163 | loss = criterion(predict, label)
164 | optimizer.zero_grad()
165 | loss.backward()
166 | print('Train the SI model, Epoch: {} | Loss: {}'.format(i, loss))
167 | optimizer.step()
168 | scheduler.step()
169 |
170 | torch.save(self.osi_model.state_dict(), self.save_path)
171 |
172 | return loss.detach().cpu().numpy()
173 |
174 | class OSINetork(nn.Module):
175 | def __init__(self, input_dim, output_dim, hidden_dim=512, dropout=0.1):
176 | """ same OSI network structure as: https://arxiv.org/abs/1702.02453 """
177 | super(OSINetork, self).__init__()
178 | self.linear1 = nn.Linear(input_dim, hidden_dim)
179 | self.dropout1 = nn.Dropout(dropout)
180 | self.linear2 = nn.Linear(hidden_dim, int(hidden_dim/2))
181 | self.dropout2 = nn.Dropout(dropout)
182 | self.linear3 = nn.Linear(int(hidden_dim/2), int(hidden_dim/4))
183 | self.dropout3 = nn.Dropout(dropout)
184 | self.linear4 = nn.Linear(int(hidden_dim/4), output_dim)
185 |
186 | def forward(self, input):
187 | if len(input.shape) < 2:
188 | input = torch.FloatTensor(np.expand_dims(input, 0))
189 | x = F.tanh(self.linear1(input))
190 | x = self.dropout1(x)
191 | x = F.tanh(self.linear2(x))
192 | x = self.dropout2(x)
193 | x = F.tanh(self.linear3(x))
194 | x = self.dropout3(x)
195 | x = self.linear4(x)
196 | return x.squeeze(0)
197 |
198 | def stack_data(traj, length):
199 | traj = np.array(traj)
200 | return traj[-length:, :].reshape(-1)
201 |
202 | if __name__ == '__main__':
203 | ENV_NAME =['SawyerReach', 'SawyerPush', 'SawyerSlide'][0]
204 |
205 | osi = OSI(env_name = ENV_NAME, length=3, context_dim=3, Projection=False, CAT_INTERNAL=True)
206 | osi.osi_train()
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/td3/default_params.py:
--------------------------------------------------------------------------------
1 | def get_hyperparams():
2 | hyperparams_dict={
3 | 'alg_name': 'td3',
4 | 'action_range': 1.,
5 | 'batch_size': 640,
6 | 'explore_steps': 0,
7 | 'update_itr': 1, # iterative update
8 | 'explore_noise_scale': 0.3,
9 | 'eval_noise_scale': 0.5, # noisy evaluation trick
10 | 'reward_scale': 1., # reward normalization
11 | 'hidden_dim': 512,
12 | 'noise_decay': 0.9999, # decaying exploration noise
13 | 'policy_target_update_interval': 3, # delayed update
14 | 'q_lr': 3e-4,
15 | 'policy_lr': 3e-4,
16 | 'replay_buffer_size': 1e6,
17 | 'deterministic': True
18 | }
19 | print('Hyperparameters: ', hyperparams_dict)
20 | return hyperparams_dict
21 |
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .logger import Logger
2 | from .policy_networks import SAC_PolicyNetwork, DPG_PolicyNetwork, DPG_PolicyNetworkLSTM
3 |
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/utils/buffers.py:
--------------------------------------------------------------------------------
1 | import random
2 | import numpy as np
3 | import torch
4 |
5 |
6 |
7 | class ReplayBuffer:
8 | def __init__(self, capacity):
9 | self.capacity = capacity
10 | self.buffer = []
11 | self.position = 0
12 |
13 | def push(self, state, action, reward, next_state, done):
14 | if len(self.buffer) < self.capacity:
15 | self.buffer.append(None)
16 | self.buffer[self.position] = (state, action, reward, next_state, done)
17 | self.position = int((self.position + 1) % self.capacity) # as a ring buffer
18 |
19 | def sample(self, batch_size):
20 | batch = random.sample(self.buffer, batch_size)
21 | state, action, reward, next_state, done = map(np.stack,
22 | zip(*batch)) # stack for each element
23 | '''
24 | the * serves as unpack: sum(a,b) <=> batch=(a,b), sum(*batch) ;
25 | zip: a=[1,2], b=[2,3], zip(a,b) => [(1, 2), (2, 3)] ;
26 | the map serves as mapping the function on each list element: map(square, [2,3]) => [4,9] ;
27 | np.stack((1,2)) => array([1, 2])
28 | '''
29 | return state, action, reward, next_state, done
30 |
31 | def __len__(
32 | self): # cannot work in multiprocessing case, len(replay_buffer) is not available in proxy of manager!
33 | return len(self.buffer)
34 |
35 | def get_length(self):
36 | return len(self.buffer)
37 |
38 |
39 | class ReplayBufferPUP:
40 | """
41 | ReplayBuffer for Projected Universal Policy
42 | """
43 | def __init__(self, capacity):
44 | self.capacity = capacity
45 | self.buffer = []
46 | self.position = 0
47 |
48 | def push(self, state, action, reward, next_state, done, params):
49 | if len(self.buffer) < self.capacity:
50 | self.buffer.append(None)
51 | self.buffer[self.position] = (state, action, reward, next_state, done, params)
52 | self.position = int((self.position + 1) % self.capacity) # as a ring buffer
53 |
54 | def sample(self, batch_size):
55 | batch = random.sample(self.buffer, batch_size)
56 | state, action, reward, next_state, done, params = map(np.stack,
57 | zip(*batch)) # stack for each element
58 | '''
59 | the * serves as unpack: sum(a,b) <=> batch=(a,b), sum(*batch) ;
60 | zip: a=[1,2], b=[2,3], zip(a,b) => [(1, 2), (2, 3)] ;
61 | the map serves as mapping the function on each list element: map(square, [2,3]) => [4,9] ;
62 | np.stack((1,2)) => array([1, 2])
63 | '''
64 | return state, action, reward, next_state, done, params
65 |
66 | def __len__(
67 | self): # cannot work in multiprocessing case, len(replay_buffer) is not available in proxy of manager!
68 | return len(self.buffer)
69 |
70 | def get_length(self):
71 | return len(self.buffer)
72 |
73 |
74 |
75 | class ReplayBufferLSTM:
76 | """
77 | Replay buffer for agent with LSTM network additionally using previous action, can be used
78 | if the hidden states are not stored (arbitrary initialization of lstm for training).
79 | And each sample contains the whole episode instead of a single step.
80 | """
81 | def __init__(self, capacity):
82 | self.capacity = capacity
83 | self.buffer = []
84 | self.position = 0
85 |
86 | def push(self, state, action, last_action, reward, next_state, done):
87 | if len(self.buffer) < self.capacity:
88 | self.buffer.append(None)
89 | self.buffer[self.position] = (state, action, last_action, reward, next_state, done)
90 | self.position = int((self.position + 1) % self.capacity) # as a ring buffer
91 |
92 | def sample(self, batch_size):
93 | batch = random.sample(self.buffer, batch_size)
94 | state, action, last_action, reward, next_state, done = map(np.stack,
95 | zip(*batch)) # stack for each element
96 | '''
97 | the * serves as unpack: sum(a,b) <=> batch=(a,b), sum(*batch) ;
98 | zip: a=[1,2], b=[2,3], zip(a,b) => [(1, 2), (2, 3)] ;
99 | the map serves as mapping the function on each list element: map(square, [2,3]) => [4,9] ;
100 | np.stack((1,2)) => array([1, 2])
101 | '''
102 | return state, action, last_action, reward, next_state, done
103 |
104 | def __len__(
105 | self): # cannot work in multiprocessing case, len(replay_buffer) is not available in proxy of manager!
106 | return len(self.buffer)
107 |
108 | def get_length(self):
109 | return len(self.buffer)
110 |
111 |
112 |
113 |
114 | class ReplayBufferLSTM2:
115 | """
116 | Replay buffer for agent with LSTM network additionally storing previous action,
117 | initial input hidden state and output hidden state of LSTM.
118 | And each sample contains the whole episode instead of a single step.
119 | 'hidden_in' and 'hidden_out' are only the initial hidden state for each episode, for LSTM initialization.
120 |
121 | """
122 | def __init__(self, capacity):
123 | self.capacity = capacity
124 | self.buffer = []
125 | self.position = 0
126 |
127 | def push(self, hidden_in, hidden_out, state, action, last_action, reward, next_state, done):
128 | if len(self.buffer) < self.capacity:
129 | self.buffer.append(None)
130 | self.buffer[self.position] = (hidden_in, hidden_out, state, action, last_action, reward, next_state, done)
131 | self.position = int((self.position + 1) % self.capacity) # as a ring buffer
132 |
133 | def sample(self, batch_size):
134 | s_lst, a_lst, la_lst, r_lst, ns_lst, hi_lst, ci_lst, ho_lst, co_lst, d_lst=[],[],[],[],[],[],[],[],[],[]
135 | batch = random.sample(self.buffer, batch_size)
136 | for sample in batch:
137 | (h_in, c_in), (h_out, c_out), state, action, last_action, reward, next_state, done = sample
138 | s_lst.append(state)
139 | a_lst.append(action)
140 | la_lst.append(last_action)
141 | r_lst.append(reward)
142 | ns_lst.append(next_state)
143 | d_lst.append(done)
144 | hi_lst.append(h_in) # h_in: (1, batch_size=1, hidden_size)
145 | ci_lst.append(c_in)
146 | ho_lst.append(h_out)
147 | co_lst.append(c_out)
148 | hi_lst = torch.cat(hi_lst, dim=-2).detach() # cat along the batch dim
149 | ho_lst = torch.cat(ho_lst, dim=-2).detach()
150 | ci_lst = torch.cat(ci_lst, dim=-2).detach()
151 | co_lst = torch.cat(co_lst, dim=-2).detach()
152 |
153 | hidden_in = (hi_lst, ci_lst)
154 | hidden_out = (ho_lst, co_lst)
155 | return hidden_in, hidden_out, s_lst, a_lst, la_lst, r_lst, ns_lst, d_lst
156 |
157 | def __len__(
158 | self): # cannot work in multiprocessing case, len(replay_buffer) is not available in proxy of manager!
159 | return len(self.buffer)
160 |
161 | def get_length(self):
162 | return len(self.buffer)
163 |
164 |
165 |
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/utils/choose_env.py:
--------------------------------------------------------------------------------
1 | from robosuite_extra.wrappers import EEFXVelocityControl, GymWrapper, FlattenWrapper
2 | from sim2real_policies.utils.envs import make_env
3 |
4 | push_no_randomisation = []
5 |
6 | push_full_randomisation = ['eef_timedelay', 'obj_timedelay', 'timestep_parameter', 'pid_iteration_time',
7 | 'action_additive_noise', 'action_systematic_noise', 'action_multiplicative_noise',
8 | 'eef_obs_position_noise', 'eef_obs_velocity_noise',
9 | 'obj_obs_position_noise', 'obj_obs_velocity_noise', 'obj_angle_noise',
10 | 'obj_density', 'obj_sliding_friction', 'obj_torsional_friction', 'obj_size',
11 | 'link_masses', 'joint_dampings', 'armatures', 'joint_frictions',
12 | 'kps', 'kis', 'kds', ]
13 |
14 | push_force_randomisation = ['joint_forces', 'obj_forces']
15 |
16 | push_force_noise_randomisation = ['eef_timedelay', 'obj_timedelay',
17 | 'eef_obs_position_noise', 'eef_obs_velocity_noise',
18 | 'obj_obs_position_noise', 'obj_obs_velocity_noise', 'obj_angle_noise',
19 | 'joint_forces', 'obj_forces']
20 |
21 |
22 | reach_no_randomisation = []
23 |
24 | reach_full_randomisation = ['action_additive_noise', 'action_systematic_noise', 'action_multiplicative_noise',
25 | 'eef_obs_position_noise', 'eef_obs_velocity_noise',
26 | 'timestep_parameter', 'pid_iteration_time',
27 | 'link_masses', 'joint_dampings', 'armatures', 'joint_frictions',
28 | 'kps', 'kis', 'kds']
29 |
30 | reach_force_randomisation = ['joint_forces']
31 |
32 | reach_force_noise_randomisation = ['eef_obs_position_noise', 'eef_obs_velocity_noise', 'joint_forces', ]
33 |
34 |
35 | slide_no_randomisation = []
36 |
37 | slide_full_randomisation = ['obj_timedelay', 'timestep_parameter', 'pid_iteration_time',
38 | 'action_additive_noise', 'action_multiplicative_noise', 'action_systematic_noise',
39 | 'joint_obs_position_noise', 'joint_obs_velocity_noise', 'obs_position_noise',
40 | 'obs_velocity_noise', 'obs_angle_noise',
41 | 'obj_density', 'obj_size', 'obj_sliding_friction', 'obj_torsional_friction',
42 | 'link_masses', 'joint_dampings', 'armatures', 'joint_frictions',
43 | 'kps', 'kis', 'kds']
44 |
45 | slide_force_randomisation = ['joint_forces', 'obj_forces']
46 |
47 | slide_force_noise_randomisation = ['joint_obs_position_noise', 'joint_obs_velocity_noise', 'obs_position_noise',
48 | 'obs_velocity_noise', 'obs_angle_noise',
49 | 'obj_timedelay',
50 | 'joint_forces', 'obj_forces'
51 | ]
52 |
53 |
54 | def choose_env(env_name, randomisation_configuration=None, env_params_dict=None):
55 | if env_name == 'SawyerPush':
56 | ## Set the environment parameters
57 | environment_params = {
58 | "gripper_type": "PushingGripper",
59 | "parameters_to_randomise": push_force_noise_randomisation,
60 | "randomise_initial_conditions": True,
61 | "table_full_size": (0.8, 1.6, 0.719),
62 | "table_friction": (1e-4, 5e-3, 1e-4),
63 | "use_camera_obs": False,
64 | "use_object_obs": True,
65 | "reward_shaping": True,
66 | "placement_initializer": None,
67 | "gripper_visualization": False,
68 | "use_indicator_object": False,
69 | "has_renderer": False, # <--------
70 | "has_offscreen_renderer": False,
71 | "render_collision_mesh": False,
72 | "render_visual_mesh": True,
73 | "control_freq": 10, # <--------
74 | "horizon": 80, # <--------
75 | "ignore_done": False,
76 | "camera_name": "frontview",
77 | "camera_height": 256,
78 | "camera_width": 256,
79 | "camera_depth": False,
80 | "pid": True
81 |
82 | }
83 |
84 | environment_wrappers = [EEFXVelocityControl, GymWrapper, FlattenWrapper]
85 | environment_wrapper_arguments = [{'max_action': 0.1, 'dof': 2},
86 | {},
87 | {'keys': 'task-state'}]
88 |
89 |
90 | elif env_name == 'SawyerReach':
91 | ## Set the environment parameters
92 | environment_params = {
93 | "gripper_type": "PushingGripper",
94 | "parameters_to_randomise": reach_full_randomisation,
95 | "randomise_initial_conditions": True,
96 | "table_full_size": (0.8, 1.6, 0.719),
97 | "use_camera_obs": False,
98 | "use_object_obs": True,
99 | "reward_shaping": True,
100 | "use_indicator_object": False,
101 | "has_renderer": False,
102 | "has_offscreen_renderer": False,
103 | "render_collision_mesh": False,
104 | "render_visual_mesh": True,
105 | "control_freq": 10.,
106 | "horizon": 50,
107 | "ignore_done": False,
108 | "camera_name": " frontview",
109 | "camera_height": 256,
110 | "camera_width": 256,
111 | "camera_depth": False,
112 | "pid": True,
113 | "success_radius": 0.01
114 | }
115 |
116 | environment_wrappers = [EEFXVelocityControl, GymWrapper, FlattenWrapper]
117 | environment_wrapper_arguments = [{'max_action': 0.1, 'dof': 3},
118 | {},
119 | {'keys': 'task-state'}]
120 |
121 | elif env_name == 'SawyerSlide':
122 | environment_params = {
123 | 'gripper_type': "SlidePanelGripper",
124 | 'parameters_to_randomise': slide_full_randomisation,
125 | 'randomise_initial_conditions': True,
126 | 'use_camera_obs': False,
127 | 'use_object_obs': True,
128 | 'reward_shaping': True,
129 | 'use_indicator_object': False,
130 | 'has_renderer': False,
131 | 'has_offscreen_renderer': False,
132 | 'render_collision_mesh': False,
133 | 'render_visual_mesh': True,
134 | 'control_freq': 10.,
135 | 'horizon': 60,
136 | 'ignore_done': False,
137 | 'camera_name': "frontview",
138 | 'camera_height': 256,
139 | 'camera_width': 256,
140 | 'camera_depth': False,
141 | 'pid': True,
142 | }
143 |
144 | environment_wrappers = [ GymWrapper, FlattenWrapper]
145 | environment_wrapper_arguments = [{},
146 | {'keys': 'task-state'}]
147 | if randomisation_configuration is not None:
148 | environment_params['parameters_to_randomise'] = eval(randomisation_configuration)
149 |
150 | if env_params_dict is not None:
151 | for key, value in env_params_dict.items():
152 | try:
153 | environment_params[key] = value
154 | except Exception as error:
155 | print(error)
156 |
157 |
158 | env = make_env('robosuite.'+env_name, 1, 0, environment_params, environment_wrappers,
159 | environment_wrapper_arguments)()
160 |
161 | print('Randomised Parameters: {}'.format(environment_params["parameters_to_randomise"]))
162 | return env, environment_params, environment_wrappers, environment_wrapper_arguments
163 |
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/utils/envs.py:
--------------------------------------------------------------------------------
1 | from robosuite_extra.env_base import make
2 |
3 | from robosuite_extra.push_env.sawyer_push import SawyerPush
4 | from robosuite_extra.reach_env.sawyer_reach import SawyerReach
5 | from robosuite_extra.slide_env.sawyer_slide import SawyerSlide
6 |
7 |
8 | def make_env(env_id, seed, rank, environment_arguments = {}, wrappers = [], wrapper_arguments = []):
9 | assert len(wrappers) == len(wrapper_arguments) , 'There needs to be one argument dict per wrapper'
10 |
11 | def _thunk():
12 | if env_id.startswith("gym"):
13 | raise NotImplementedError('Gym environments not implemented')
14 | elif env_id.startswith('robosuite'):
15 | _, robosuite_name = env_id.split('.')
16 |
17 | env = make(robosuite_name, **environment_arguments)
18 | else:
19 | raise NotImplementedError('Only robosuite environments are compatible.')
20 |
21 |
22 | for i, wrapper in enumerate(wrappers):
23 | env = wrapper(env, **wrapper_arguments[i])
24 |
25 | env.seed(seed + rank)
26 |
27 | return env
28 |
29 | return _thunk
30 |
31 |
--------------------------------------------------------------------------------
/sim2real-policies/sim2real_policies/utils/evaluate.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from sim2real_policies.sys_id.common.utils import query_params
4 | from mujoco_py import MujocoException
5 |
6 | def testing_envionments_generator_random(env, number_of_envs):
7 | rng = np.random.RandomState()
8 | rng.seed(3)
9 |
10 | def log_uniform(low=1e-10, high=1., size=None):
11 | return np.exp(rng.uniform(np.log(low), np.log(high), size))
12 |
13 | def ranged_random_choice(low, high, size=1):
14 | vals = np.arange(low,high+1)
15 | return rng.choice(vals, size)
16 |
17 | def select_appropriate_distribution(key):
18 | if (
19 | key == 'joint_forces'
20 | or key == 'acceleration_forces'
21 | or key == 'eef_forces'
22 | or key == 'obj_forces'
23 |
24 | or key == 'timestep_parameter'
25 | or key == 'pid_iteration_time'
26 | or key == 'mujoco_timestep'
27 |
28 | or key == 'action_additive_noise'
29 | or key == 'action_multiplicative_noise'
30 | or key == 'action_systematic_noise'
31 |
32 | or key == 'eef_obs_position_noise'
33 | or key == 'eef_obs_velocity_noise'
34 | or key == 'obj_obs_position_noise'
35 | or key == 'obj_obs_velocity_noise'
36 | or key == 'obj_angle_noise'
37 |
38 | or key == 'link_masses'
39 |
40 | or key == 'obj_density'
41 | or key == 'obj_sliding_friction'
42 | ):
43 | return rng.uniform
44 | elif (
45 | key == 'eef_timedelay'
46 | or key == 'obj_timedelay'
47 | ):
48 | return ranged_random_choice
49 | else:
50 | return log_uniform
51 |
52 | factors_for_randomisation = env.get_factors_for_randomisation()
53 | parameter_ranges = env.get_parameter_sampling_ranges()
54 | params_list = []
55 | for _ in range(0, number_of_envs):
56 | params = {}
57 |
58 | randomised_params = env.get_randomised_parameters()
59 | for param_key in randomised_params :
60 | parameter_range = parameter_ranges[param_key]
61 |
62 | if (parameter_range.shape[0] == 1):
63 | params[param_key]= np.asarray(parameter_range[0])
64 | elif (parameter_range.shape[0] == 2):
65 | distribution = select_appropriate_distribution(param_key)
66 | size = env.default_dynamics_parameters[param_key].shape
67 | params[param_key]=np.asarray(
68 | factors_for_randomisation[param_key] * distribution(*parameter_ranges[param_key], size=size))
69 | else:
70 | raise RuntimeError('Parameter radomisation range needs to be of shape {1,2}xN')
71 | params_list.append(params)
72 | return params_list
73 |
74 |
75 | def evaluate(env, policy, up=False, eval_eipsodes=5, Projection=False, proj_net=None, num_envs=5, randomised_only=False, dynamics_only=False):
76 | '''
77 | Evaluate the policy during training time with fixed dynamics
78 | :param: eval_episodes: evaluating episodes for each env
79 | :param: up (bool): universal policy conditioned on dynamics parameters
80 | '''
81 | number_of_episodes = eval_eipsodes
82 | return_per_test_environment = []
83 | success_per_test_environment = []
84 | # initial_dynamics_params = env.get_dynamics_parameters()
85 | dynamics_params = testing_envionments_generator_random(env, num_envs) # before randomisation off
86 | ### SUGGESTED CHANGE ####
87 | initial_dynamics_params, _ = env.randomisation_off()
88 | ###################
89 |
90 | # do visualization
91 | # print('Starting testing')
92 | for params in dynamics_params:
93 | mean_return = 0.0
94 | success = 0
95 |
96 | # print('with parameters {} ...'.format(params))
97 | for i in range(number_of_episodes):
98 | #### SUGGESTED CHANGE ####
99 | # First set the parameters and then reset for them to take effect.
100 | env.set_dynamics_parameters(params)
101 | obs = env.reset()
102 | p = env.get_dynamics_parameters()
103 | #######
104 | # obs = env.reset()
105 | # env.set_dynamics_parameters(params) # set parameters after reset
106 |
107 | if up:
108 | env.randomisation_on() # open randosimation to query params correctly
109 | params_list = query_params(env, randomised_only=randomised_only, dynamics_only=dynamics_only)
110 | env.randomisation_off()
111 | if Projection and proj_net is not None:
112 | params_list = proj_net.get_context(params_list)
113 | obs = np.concatenate((params_list, obs))
114 |
115 | episode_return = 0.0
116 | done = False
117 | while not done:
118 | action = policy.get_action(obs)
119 | obs, reward, done, info = env.step(action)
120 | episode_return += reward
121 |
122 | if up:
123 | obs = np.concatenate((params_list, obs))
124 |
125 | if (done):
126 | mean_return += episode_return
127 | success += int(info['success'])
128 | break
129 |
130 | mean_return /= number_of_episodes
131 | success_rate = float(success) / float(number_of_episodes)
132 | # print('the mean return is {}'.format(mean_return))
133 | return_per_test_environment.append(mean_return)
134 | success_per_test_environment.append(success_rate)
135 |
136 | #### SUGGESTED CHANGE ####
137 | # Only need to call randomisation_on, this will restore the previous parameters and the
138 | # randoisation too. Note that this will only take effect in the next reset, where parameters
139 | # are going to be randomised again
140 | env.randomisation_on()
141 | #####
142 |
143 | # env.set_dynamics_parameters(initial_dynamics_params) # for non-randomisation cases, need to set original dynamics parameters
144 | print('total_average_return_over_test_environments : {}'.format(np.mean(return_per_test_environment)))
145 | return np.mean(return_per_test_environment), np.mean(success_per_test_environment)
146 |
147 | def evaluate_epi(env, epi_policy, embed_net, task_policy, traj_length, eval_eipsodes=5, num_envs=5):
148 | """
149 | Apart from evaluate(), epi policy conditions on the embedding of trajectory
150 | :param: epi_policy: the policy for rollout a trajectory
151 | :param: embed_net: embedding network of the trajectory
152 | :param: task_policy: the policy for evaluation, conditioned on the embedding
153 | :param: traj_length: length of trajectory
154 | """
155 | number_of_episodes = eval_eipsodes
156 | return_per_test_environment = []
157 | success_per_test_environment = []
158 | dynamics_params = testing_envionments_generator_random(env, num_envs)
159 | # initial_dynamics_params = env.get_dynamics_parameters()
160 | initial_dynamics_params, _ = env.randomisation_off()
161 |
162 | def policy_rollout(env, policy, max_steps=30, params=None):
163 | """
164 | Roll one trajectory with max_steps
165 | return: traj, shape of (max_steps, state_dim+action_dim+reward_dim)
166 | """
167 | # s = env.reset()
168 | # if params is not None:
169 | # env.set_dynamics_parameters(params) # make sure .reset() not change env params
170 |
171 | if params is not None:
172 | env.set_dynamics_parameters(params) # make sure .reset() not change env params
173 | for _ in range(3):
174 | s = env.reset()
175 | traj=[]
176 | for _ in range(max_steps):
177 | a = policy.choose_action(s)
178 | try:
179 | s_, r, done, _ = env.step(a)
180 | except MujocoException:
181 | print('MujocoException')
182 | break
183 | s_a_r = np.concatenate((s,a, [r])) # state, action, reward
184 | traj.append(s_a_r)
185 | s=s_
186 | if len(traj) == max_steps:
187 | break
188 |
189 | if len(traj)