├── LICENSE ├── ReadMe.md ├── requirements.txt ├── robosuite-extra ├── robosuite_extra │ ├── __init__.py │ ├── assets │ │ ├── ReadMe.md │ │ ├── grippers │ │ │ ├── meshes │ │ │ │ ├── electric_gripper_base.STL │ │ │ │ ├── paddle_tip.STL │ │ │ │ └── standard_narrow.STL │ │ │ └── slide_panel_gripper.xml │ │ ├── robot │ │ │ └── sawyer │ │ │ │ ├── meshes │ │ │ │ ├── base.stl │ │ │ │ ├── head.stl │ │ │ │ ├── l0.stl │ │ │ │ ├── l1.stl │ │ │ │ ├── l2.stl │ │ │ │ ├── l3.stl │ │ │ │ ├── l4.stl │ │ │ │ ├── l5.stl │ │ │ │ ├── l6.stl │ │ │ │ └── pedestal.stl │ │ │ │ ├── robot.xml │ │ │ │ └── robot_torque.xml │ │ └── urdf │ │ │ └── sawyer_intera.urdf │ ├── controllers │ │ ├── __init__.py │ │ └── sawyer_eef_velocity_controller.py │ ├── env_base │ │ ├── __init__.py │ │ ├── base.py │ │ └── sawyer.py │ ├── models │ │ ├── __init__.py │ │ ├── generated_objects.py │ │ ├── grippers │ │ │ ├── __init__.py │ │ │ ├── gripper_factory.py │ │ │ ├── pushing_gripper.py │ │ │ └── slide_panel_gripper.py │ │ ├── robot │ │ │ ├── __init__.py │ │ │ └── sawyer_robot.py │ │ └── tasks │ │ │ ├── __init__.py │ │ │ └── placement_sampler.py │ ├── push_env │ │ ├── __init__.py │ │ ├── demo.py │ │ ├── push_task.py │ │ └── sawyer_push.py │ ├── reach_env │ │ ├── __init__.py │ │ ├── demo.py │ │ ├── reach_task.py │ │ └── sawyer_reach.py │ ├── slide_env │ │ ├── __init__.py │ │ ├── demo.py │ │ ├── sawyer_slide.py │ │ └── slide_task.py │ ├── utils │ │ ├── __init__.py │ │ ├── domain_randomisation_utils.py │ │ └── transform_utils.py │ └── wrappers │ │ ├── __init__.py │ │ ├── eef_velocy_control_wrapper.py │ │ └── gym_wrapper.py └── setup.py ├── sim2real-calibration-characterisation ├── __init__.py ├── setup.py └── sim2real_calibration_characterisation │ ├── __init__.py │ ├── sim2real_characterisation │ ├── __init__.py │ ├── pushing_characterisation.py │ ├── reaching_characterisation.py │ └── sliding_characterisation.py │ ├── simulation_calibration │ ├── __init__.py │ └── calibrate_internal_dynamics.py │ └── utils │ ├── __init__.py │ └── logger.py └── sim2real-policies ├── setup.py └── sim2real_policies ├── __init__.py ├── after_training_tests ├── epi_policy_ablation.py ├── epi_utils.py ├── network_loading.py ├── pushing_test.py ├── reaching_test.py ├── run_all_tests.py ├── sliding_test.py ├── tsne_for_epi.py ├── tsne_for_epi_pushing.py └── uposi_policy_ablation.py ├── lstm_td3 ├── lstm_td3_multiprocess.py └── plot.py ├── ppo ├── default_params.py └── ppo_multiprocess.py ├── sys_id ├── common │ ├── nets.py │ ├── operations.py │ └── utils.py ├── env_probing_interaction │ ├── dataset_collection.py │ ├── epi.py │ └── understand_separation_loss.ipynb └── universal_policy_online_system_identification │ ├── evaluate.py │ ├── osi.py │ ├── osi_class.py │ └── uposi_separate_td3_multiprocess.py ├── td3 ├── default_params.py └── td3_multiprocess.py └── utils ├── __init__.py ├── buffers.py ├── choose_env.py ├── envs.py ├── evaluate.py ├── initialize.py ├── load_params.py ├── logger.py ├── optimizers.py ├── policy_networks.py ├── rl_utils.py └── value_networks.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 The Robot Learning Lab at Imperial College London 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ReadMe.md: -------------------------------------------------------------------------------- 1 | # Crossing the Gap: A Deep Dive into Zero-Shot Sim-to-Real Transfer for Dynamics 2 | 3 | This Repository contains the code used for training the manipulation policies in simulation, 4 | before deploying it to the real world using the ros package found here: https://github.com/eugval/sim2real_dynamics_robot 5 | 6 | ### Dependencies 7 | In order to run this project, the following dependencies need to be satisfied. 8 | 9 | Mujoco : http://www.mujoco.org/ \ 10 | Openai gym : https://github.com/openai/gym \ 11 | Openai mujoco-py : https://github.com/openai/mujoco-py \ 12 | urdf_parser_py : https://github.com/ros/urdf_parser_py \ 13 | PyKDL : If the pip version does not work, it will need to be compiled from scratch by following the instructions 14 | at https://github.com/orocos/orocos_kinematics_dynamics/issues/115 \ 15 | Robosuite (version 0.1.0): https://github.com/StanfordVL/robosuite 16 | 17 | 18 | The following forked repositories: 19 | kdl_parser, the branch fixing_setup_py from https://github.com/eugval/kdl_parser/tree/fixing_setup_py 20 | simple-pid, the branch allow arrays from https://github.com/eugval/simple-pid/tree/allow_arrays 21 | 22 | Simpler dependencies are listed (that can be recursively installed) in requirements.txt. 23 | 24 | ### Project Structure 25 | Each of the following need to be installed independently by navigating to the corresponding folder and running ```pip install -e .```. 26 | 27 | ``robosuite-extra`` implements extra environments and functionalities for the robosuite framework 28 | 29 | ``sim2real-policies`` implements the methods for training and evaluating the different sim2real polcies. 30 | 31 | ``reality-calibration-characterisation`` implements code to identify and characterise the different sources of error 32 | when transferring from simulation to the real world for the given tasks, as well as scripts to optimise simulator 33 | parameters. 34 | 35 | 36 | ### Acknowedgments 37 | 38 | Our environments are created using Robosuite (https://github.com/ARISE-Initiative/robosuite), part of which we modified in robosuite-extra to suite our project. 39 | 40 | If you are using this code in your work, please also consider citing our paper: 41 | 42 | ``` 43 | @inproceedings{ 44 | 45 | valassakis2020crossing, 46 | 47 | title={Crossing the Gap: A Deep Dive into Zero-Shot Sim-to-Real Transfer for Dynamics}, 48 | 49 | author={Valassakis, Eugene and Ding, Zihan and Johns, Edward}, 50 | 51 | booktitle={International Conference on Intelligent Robots and Systems (IROS)}, 52 | 53 | year={2020} 54 | 55 | } 56 | ``` 57 | 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | torch 3 | matplotlib -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/__init__.py -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/ReadMe.md: -------------------------------------------------------------------------------- 1 | ### Disclaimer 2 | 3 | Most assets have been taken/modified from Robosuite (version 0.1.0 , https://github.com/ARISE-Initiative/robosuite) 4 | and the sawyer simulator from Rethink Robotics (https://github.com/RethinkRobotics/sawyer_simulator). -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/grippers/meshes/electric_gripper_base.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/grippers/meshes/electric_gripper_base.STL -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/grippers/meshes/paddle_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/grippers/meshes/paddle_tip.STL -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/grippers/meshes/standard_narrow.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/grippers/meshes/standard_narrow.STL -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/grippers/slide_panel_gripper.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/base.stl -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/head.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/head.stl -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l0.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l0.stl -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l1.stl -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l2.stl -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l3.stl -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l4.stl -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l5.stl -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/l6.stl -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/pedestal.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/assets/robot/sawyer/meshes/pedestal.stl -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/assets/robot/sawyer/robot.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/controllers/__init__.py: -------------------------------------------------------------------------------- 1 | from robosuite_extra.controllers.sawyer_eef_velocity_controller import SawyerEEFVelocityController -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/controllers/sawyer_eef_velocity_controller.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sys 3 | import os 4 | 5 | if '/opt/ros/melodic/lib/python2.7/dist-packages' in sys.path: 6 | sys.path.remove('/opt/ros/melodic/lib/python2.7/dist-packages') 7 | sys.path.append('/opt/ros/melodic/lib/python2.7/dist-packages') 8 | 9 | import PyKDL 10 | from urdf_parser_py.urdf import URDF 11 | from kdl_parser_py.urdf import treeFromFile, treeFromUrdfModel 12 | import robosuite_extra.utils.transform_utils as T 13 | 14 | 15 | class SawyerEEFVelocityController(object): 16 | 17 | def __init__(self): 18 | self.num_joints = 7 19 | # Get the URDF 20 | urdf_path = os.path.join( os.path.dirname(os.path.realpath(__file__)), '../assets/urdf/sawyer_intera.urdf') 21 | 22 | self.urdf_model = URDF.from_xml_string(open(urdf_path).read()) 23 | 24 | # Set the base link, and the tip link. 25 | self.base_link = self.urdf_model.get_root() 26 | self.tip_link = "right_hand" 27 | # # Create a KDL tree from the URDF model. This is an object defining the kinematics of the entire robot. 28 | self.kdl_tree = treeFromUrdfModel(self.urdf_model) 29 | # Create a KDL chain from the tree. This is one specific chain within the overall kinematics model. 30 | self.arm_chain = self.kdl_tree[1].getChain(self.base_link, self.tip_link) 31 | # Create a solver which will be used to compute the forward kinematics 32 | self.forward_kinematics_solver = PyKDL.ChainFkSolverPos_recursive(self.arm_chain) 33 | # Create a solver which will be used to compute the Jacobian 34 | self.jacobian_solver = PyKDL.ChainJntToJacSolver(self.arm_chain) 35 | 36 | #Velocity inverse kinematics solver 37 | self.IK_v_solver = PyKDL.ChainIkSolverVel_pinv(self.arm_chain) 38 | 39 | # Create a solver to retrieve the joint angles form the eef position 40 | self.IK_solver = PyKDL.ChainIkSolverPos_NR(self.arm_chain, self.forward_kinematics_solver, self.IK_v_solver) 41 | 42 | 43 | def compute_joint_angles_for_endpoint_pose(self, target_endpoint_pose_in_base_frame, q_guess): 44 | pos_kdl = PyKDL.Vector(*target_endpoint_pose_in_base_frame[:3,-1]) 45 | 46 | rot_kdl = PyKDL.Rotation(target_endpoint_pose_in_base_frame[0,0],target_endpoint_pose_in_base_frame[0,1], target_endpoint_pose_in_base_frame[0,2], 47 | target_endpoint_pose_in_base_frame[1, 0], target_endpoint_pose_in_base_frame[1,1],target_endpoint_pose_in_base_frame[1,2], 48 | target_endpoint_pose_in_base_frame[2, 0], target_endpoint_pose_in_base_frame[2,1], target_endpoint_pose_in_base_frame[2,2]) 49 | 50 | frame_kdl = PyKDL.Frame(rot_kdl, pos_kdl) 51 | q_guess_kdl = self.convert_joint_angles_array_to_kdl_array(q_guess) 52 | 53 | kdl_jnt_array = PyKDL.JntArray(self.num_joints) 54 | self.IK_solver.CartToJnt(q_guess_kdl,frame_kdl,kdl_jnt_array) 55 | 56 | return self.kdl_1Darray_to_numpy(kdl_jnt_array) 57 | 58 | 59 | # Function to compute the robot's joint velocities for a desired Cartesian endpoint (robot gripper) velocity 60 | def compute_joint_velocities_for_endpoint_velocity(self, endpoint_velocity_in_base_frame, joint_angles_array): 61 | 62 | ### 63 | ### Compute the Jacobian inverse at the current set of joint angles 64 | ### 65 | # Create a KDL array of the joint angles 66 | joint_angles_kdl_array = self.convert_joint_angles_array_to_kdl_array(joint_angles_array) 67 | # Compute the Jacobian at the current joint angles 68 | jacobian = self.compute_jacobian(joint_angles_kdl_array) 69 | # Compute the pseudo-inverse 70 | jacobian_inverse = np.linalg.pinv(jacobian) 71 | ### 72 | ### Then, use the Jacobian inverse to compute the required joint velocities 73 | ### 74 | # Multiply the Jacobian inverse by the Cartesian velocity 75 | joint_velocities = jacobian_inverse * np.concatenate([endpoint_velocity_in_base_frame[:3,-1].reshape(3,1), 76 | T.mat2euler(endpoint_velocity_in_base_frame[:3,:3],axes = 'sxyz').reshape(3,1)]) 77 | # Return the velocities 78 | return joint_velocities 79 | 80 | 81 | # Function to convert from a NumPy array of joint angles, to a KDL array of joint angles (its the same data, just a different type of container) 82 | def convert_joint_angles_array_to_kdl_array(self, joint_angles_array): 83 | num_joints = len(joint_angles_array) 84 | kdl_array = PyKDL.JntArray(num_joints) 85 | for i in range(num_joints): 86 | kdl_array[i] = joint_angles_array[i] 87 | return kdl_array 88 | 89 | # Function to compute the jacobian directly with a numpy array 90 | def get_jacobian(self, joint_angles_array): 91 | joint_angles_kdl_array = self.convert_joint_angles_array_to_kdl_array(joint_angles_array) 92 | # Compute the Jacobian at the current joint angles 93 | jacobian = self.compute_jacobian(joint_angles_kdl_array) 94 | return jacobian 95 | 96 | # Function to compute the Jacobian at a particular set of joint angles 97 | def compute_jacobian(self, joint_angles_kdl_array): 98 | jacobian = PyKDL.Jacobian(self.num_joints) 99 | self.jacobian_solver.JntToJac(joint_angles_kdl_array, jacobian) 100 | jacobian_matrix = self.kdl_array_to_numpy_mat(jacobian) 101 | return jacobian_matrix 102 | 103 | # Function to get the endpoint pose in the base frame. It returns a 4x4 NumPy matrix homogeneous transformation 104 | def get_endpoint_pose_matrix(self, joint_angles_array): 105 | # Get the joint angles 106 | joint_angles_kdl_array = self.convert_joint_angles_array_to_kdl_array(joint_angles_array) 107 | # Do the forward kinematics 108 | endpoint_frame = PyKDL.Frame() 109 | self.forward_kinematics_solver.JntToCart(joint_angles_kdl_array, endpoint_frame) 110 | endpoint_frame = self.kdl_frame_to_numpy_mat(endpoint_frame) 111 | return endpoint_frame 112 | 113 | # Function to convert a KDL array to a NumPy matrix (its the same data, just a different type of container) 114 | @staticmethod 115 | def kdl_array_to_numpy_mat(kdl_array): 116 | mat = np.mat(np.zeros((kdl_array.rows(), kdl_array.columns()))) 117 | for i in range(kdl_array.rows()): 118 | for j in range(kdl_array.columns()): 119 | mat[i, j] = kdl_array[i, j] 120 | return mat 121 | 122 | # Function to convert a KDL 1D array to a NumPy array (its the same data, just a different type of container) 123 | @staticmethod 124 | def kdl_1Darray_to_numpy(kdl_array): 125 | array = np.zeros((kdl_array.rows())) 126 | for i in range(kdl_array.rows()): 127 | array[i]=kdl_array[i] 128 | 129 | return array 130 | 131 | @staticmethod 132 | # Function to convert a KDL transformation frame to a NumPy matrix. Code taken from KDL pose math library. 133 | def kdl_frame_to_numpy_mat(kdl_frame): 134 | mat = np.mat(np.zeros([4, 4])) 135 | for i in range(3): 136 | for j in range(3): 137 | mat[i, j] = kdl_frame.M[i, j] 138 | for i in range(3): 139 | mat[i, 3] = kdl_frame.p[i] 140 | mat[3] = [0, 0, 0, 1] 141 | return mat 142 | 143 | 144 | if __name__ =='__main__': 145 | sawyer = Sawyer() 146 | 147 | print(sawyer) 148 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/env_base/__init__.py: -------------------------------------------------------------------------------- 1 | from .base import * 2 | from .sawyer import * 3 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/models/__init__.py -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/models/generated_objects.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Taken and modified from the original robosuite repository (version 0.1.0) 3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite 4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite 5 | 6 | ''' 7 | 8 | from robosuite.models.objects import MujocoGeneratedObject 9 | from robosuite.models.objects.generated_objects import _get_size , _get_randomized_range, DEFAULT_DENSITY_RANGE 10 | import numpy as np 11 | 12 | class FullyFrictionalBoxObject(MujocoGeneratedObject): 13 | """ Generates a mujoco object than can have different values for the sliding and torsional frictions""" 14 | def __init__( 15 | self, 16 | size=None, 17 | size_max=None, 18 | size_min=None, 19 | density=None, 20 | density_range=None, 21 | friction=None, 22 | rgba="random", 23 | ): 24 | 25 | 26 | size = _get_size(size, 27 | size_max, 28 | size_min, 29 | [0.07, 0.07, 0.07], 30 | [0.03, 0.03, 0.03]) 31 | density_range = _get_randomized_range(density, 32 | density_range, 33 | DEFAULT_DENSITY_RANGE) 34 | 35 | super().__init__( 36 | size=size, 37 | rgba=rgba, 38 | density_range=density_range, 39 | friction=friction, 40 | ) 41 | 42 | def get_collision_attrib_template(self): 43 | super_template = super().get_collision_attrib_template() 44 | super_template['condim'] = '4' 45 | return super_template 46 | 47 | def sanity_check(self): 48 | assert len(self.size) == 3, "box size should have length 3" 49 | 50 | def get_bottom_offset(self): 51 | return np.array([0, 0, -1 * self.size[2]]) 52 | 53 | def get_top_offset(self): 54 | return np.array([0, 0, self.size[2]]) 55 | 56 | def get_horizontal_radius(self): 57 | return np.linalg.norm(self.size[0:2], 2) 58 | 59 | # returns a copy, Returns xml body node 60 | def get_collision(self, name=None, site=False): 61 | return self._get_collision(name=name, site=site, ob_type="box") 62 | 63 | # returns a copy, Returns xml body node 64 | def get_visual(self, name=None, site=False): 65 | return self._get_visual(name=name, site=site, ob_type="box") -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/models/grippers/__init__.py: -------------------------------------------------------------------------------- 1 | from .gripper_factory import gripper_factory -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/models/grippers/gripper_factory.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Taken and modified from the original robosuite repository (version 0.1.0) 3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite 4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite 5 | 6 | Overrides the girpper factory of robosuite in order to add the new grippers for our tasks 7 | ''' 8 | 9 | 10 | from robosuite.models.grippers import gripper_factory as robosuite_gripper_factory 11 | from .pushing_gripper import PushingGripper 12 | from .slide_panel_gripper import SlidePanelGripper 13 | 14 | def gripper_factory(name): 15 | if name == "PushingGripper": 16 | # Closed two finger gripper 17 | return PushingGripper() 18 | if name == "SlidePanelGripper": 19 | return SlidePanelGripper() 20 | else: 21 | return robosuite_gripper_factory(name) 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/models/grippers/pushing_gripper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Taken and modified from the original robosuite repository (version 0.1.0) 3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite 4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite 5 | 6 | A version of TwoFingerGripper but always closed. 7 | ''' 8 | 9 | 10 | import numpy as np 11 | from robosuite.models.grippers.two_finger_gripper import TwoFingerGripper 12 | 13 | 14 | class PushingGripper(TwoFingerGripper): 15 | """ 16 | Same as TwoFingerGripper, but always closed 17 | """ 18 | @property 19 | def init_qpos(self): 20 | # Fingers always closed 21 | return np.array([-0.020833, 0.020833]) 22 | 23 | def format_action(self, action): 24 | return np.array([1, -1]) 25 | 26 | @property 27 | def dof(self): 28 | return 0 29 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/models/grippers/slide_panel_gripper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from robosuite.models.grippers.gripper import Gripper 4 | 5 | class SlidePanelGripper(Gripper): 6 | """ 7 | Same as TwoFingerGripper, but always closed 8 | """ 9 | 10 | def __init__(self): 11 | file_path = os.path.dirname(os.path.realpath(__file__)) 12 | super().__init__(os.path.join(os.path.join(file_path , "../../assets/grippers/slide_panel_gripper.xml"))) 13 | 14 | def format_action(self, action): 15 | return np.array([]) 16 | 17 | @property 18 | def init_qpos(self): 19 | return np.array([]) 20 | 21 | @property 22 | def joints(self): 23 | return [] 24 | 25 | @property 26 | def dof(self): 27 | return 0 28 | 29 | @property 30 | def visualization_sites(self): 31 | return ["grip_site", "slide_panel_centre", "grip_site_cylinder" ] 32 | 33 | def contact_geoms(self): 34 | return [ 35 | "r_finger_g0", 36 | "r_finger_g1", 37 | "l_finger_g0", 38 | "l_finger_g1", 39 | "r_fingertip_g0", 40 | "l_fingertip_g0", 41 | "slide_panel_g" 42 | ] 43 | 44 | @property 45 | def contact_geom2body(self): 46 | return { 47 | "r_finger_g0": "r_gripper_r_finger", 48 | "r_finger_g1": "r_gripper_r_finger", 49 | "l_finger_g0": "r_gripper_l_finger", 50 | "l_finger_g1": "r_gripper_l_finger", 51 | "r_fingertip_g0": "r_gripper_r_finger_tip", 52 | "l_fingertip_g0": "r_gripper_l_finger_tip", 53 | "slide_panel_g": "slide_panel" 54 | } 55 | 56 | def get_contact_body_from_geom(self, geom): 57 | return self.contact_geom2body[geom] 58 | 59 | @property 60 | def left_finger_geoms(self): 61 | return ["l_finger_g0", "l_finger_g1", "l_fingertip_g0"] 62 | 63 | @property 64 | def right_finger_geoms(self): 65 | return ["r_finger_g0", "r_finger_g1", "r_fingertip_g0"] 66 | 67 | 68 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/models/robot/__init__.py: -------------------------------------------------------------------------------- 1 | from .sawyer_robot import Sawyer -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/models/robot/sawyer_robot.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Taken and modified from the original robosuite repository (version 0.1.0) 3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite 4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite 5 | 6 | ''' 7 | 8 | import numpy as np 9 | from robosuite.models.robots.robot import Robot 10 | from robosuite.utils.mjcf_utils import array_to_string 11 | import os 12 | 13 | class Sawyer(Robot): 14 | """Sawyer is a witty single-arm robot designed by Rethink Robotics.""" 15 | 16 | def __init__(self, torque=False): 17 | self.torque = torque 18 | file_path = os.path.dirname(os.path.realpath(__file__)) 19 | if (torque): 20 | super().__init__(os.path.join(os.path.join(file_path , "../../assets/robot/sawyer/robot_torque.xml"))) 21 | else: 22 | super().__init__(os.path.join(os.path.join(file_path , "../../assets/robot/sawyer/robot.xml"))) 23 | 24 | self.bottom_offset = np.array([0, 0, -0.913]) 25 | 26 | def set_base_xpos(self, pos): 27 | """Places the robot on position @pos.""" 28 | node = self.worldbody.find("./body[@name='base']") 29 | node.set("pos", array_to_string(pos - self.bottom_offset)) 30 | 31 | @property 32 | def dof(self): 33 | return 7 34 | 35 | @property 36 | def joints(self): 37 | return ["right_j{}".format(x) for x in range(7)] 38 | 39 | @property 40 | def links(self): 41 | return ["right_l{}".format(x) for x in range(7)] 42 | 43 | @property 44 | def actuators(self): 45 | if (self.torque): 46 | return ["torq_right_j{}".format(x) for x in range(7)] 47 | else: 48 | return ["vel_right_j{}".format(x) for x in range(7)] 49 | 50 | @property 51 | def init_qpos(self): 52 | return np.array([0, -1.18, 0.00, 2.18, 0.00, 0.57, 3.3161]) 53 | 54 | @property 55 | def joint_velocity_limits(self): 56 | return np.array([[-1.74, 1.74], 57 | [-1.328, 1.328], 58 | [-1.957, 1.957], 59 | [-1.957, 1.957], 60 | [-3.485, 3.485], 61 | [-3.485, 3.485], 62 | [-4.545, 4.545]]) 63 | 64 | @property 65 | def velocity_pid_gains(self): 66 | return {'right_j0': {'p': 8.55378248e+01, 'i': 4.58211002e+00, 'd': 9.80858595e-02}, 67 | 'right_j1': {'p': 7.72400386e+01, 'i': 2.57669899e+00, 'd': 4.23858218e-02}, 68 | 'right_j2': {'p':5.35079700e+01, 'i':4.30959305e+00, 'd': 0.00000000e+00}, 69 | 'right_j3': {'p': 4.88937010e+01, 'i':2.91438891e+00, 'd': 4.52197212e-02}, 70 | 'right_j4': {'p': 3.57707442e+01, 'i': 3.14960177e+00, 'd': 2.03120628e-01}, 71 | 'right_j5': {'p': 2.82424289e+01, 'i':2.18658812e+00, 'd': 2.03629986e-01}, 72 | 'right_j6': {'p': 1.62834673e+01, 'i': 1.71284099e+00, 'd': 8.44274724e-02}, 73 | } 74 | 75 | 76 | @property 77 | def position_pid_gains(self): 78 | 79 | # array([77.39238042, 85.93730043, 53.35490038, 57.10317523, 27.80825017, 80 | # 3.17180638, 2.90868948, 4.57497262, 3.56435536, 2.47908628, 81 | # 0.45245024, 0.31105989, 0.83930337, 0.76906677, 0.42219411]) 82 | 83 | return {'right_j0': {'p': 77.39238042, 'i': 3.17180638, 'd': 0.45245024}, 84 | 'right_j1': {'p': 85.93730043, 'i': 2.90868948, 'd': 0.31105989}, 85 | 'right_j2': {'p': 53.35490038, 'i':4.57497262, 'd': 0.83930337}, 86 | 'right_j3': {'p': 57.10317523, 'i':3.56435536, 'd': 0.76906677}, 87 | 'right_j4': {'p': 27.808250171, 'i': 2.47908628, 'd': 0.42219411}, 88 | 'right_j5': {'p': 2.82424289e+01, 'i':2.18658812e+00, 'd': 2.03629986e-01}, 89 | 'right_j6': {'p': 1.62834673e+01, 'i': 1.71284099e+00, 'd': 8.44274724e-02}, 90 | } 91 | 92 | # return {'right_j0': {'p': 8.55378248e+01, 'i': 4.58211002e+00, 'd': 9.80858595e-02}, 93 | # 'right_j1': {'p': 7.72400386e+01, 'i': 2.57669899e+00, 'd': 4.23858218e-02}, 94 | # 'right_j2': {'p': 5.35079700e+01, 'i': 4.30959305e+00, 'd': 0.00000000e+00}, 95 | # 'right_j3': {'p': 4.88937010e+01, 'i': 2.91438891e+00, 'd': 4.52197212e-02}, 96 | # 'right_j4': {'p': 3.57707442e+01, 'i': 3.14960177e+00, 'd': 2.03120628e-01}, 97 | # 'right_j5': {'p': 2.82424289e+01, 'i': 2.18658812e+00, 'd': 2.03629986e-01}, 98 | # 'right_j6': {'p': 1.62834673e+01, 'i': 1.71284099e+00, 'd': 8.44274724e-02}, 99 | # } 100 | 101 | 102 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/models/tasks/__init__.py: -------------------------------------------------------------------------------- 1 | from .placement_sampler import ObjectPositionSampler, UniformRandomSampler, UniformSelectiveSampler 2 | 3 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/models/tasks/placement_sampler.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Taken and modified from the original robosuite repository (version 0.1.0) 3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite 4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite 5 | 6 | ''' 7 | 8 | from robosuite.models.tasks.placement_sampler import ObjectPositionSampler, UniformRandomSampler 9 | import collections 10 | import numpy as np 11 | 12 | 13 | def base_sample_new(self, **kwargs): 14 | """ 15 | Args: 16 | object_index: index of the current object being sampled 17 | Returns: 18 | xpos((float * 3) * n_obj): x,y,z position of the objects in world frame 19 | xquat((float * 4) * n_obj): quaternion of the objects 20 | """ 21 | raise NotImplementedError 22 | 23 | def sample_quat_new(self): 24 | if self.z_rotation is None or self.z_rotation is True: 25 | rot_angle = np.random.uniform(high=2 * np.pi, low=0) 26 | elif isinstance(self.z_rotation, collections.Iterable): 27 | rot_angle = np.random.uniform( 28 | high=max(self.z_rotation), low=min(self.z_rotation) 29 | ) 30 | else: 31 | rot_angle = self.z_rotation 32 | 33 | return [np.cos(rot_angle / 2), 0, 0, np.sin(rot_angle / 2)] 34 | 35 | UniformRandomSampler.sample_quat = sample_quat_new 36 | ObjectPositionSampler.sample = base_sample_new 37 | 38 | 39 | 40 | class UniformSelectiveSampler(ObjectPositionSampler): 41 | """Places all objects within the table uniformly random.""" 42 | 43 | def __init__( 44 | self, 45 | x_range=None, 46 | y_range=None, 47 | ensure_object_boundary_in_range=True, 48 | z_rotation=False, 49 | np_random=None 50 | ): 51 | """ 52 | Args: 53 | x_range(float * 2): override the x_range used to uniformly place objects 54 | if None, default to x-range of table 55 | y_range(float * 2): override the y_range used to uniformly place objects 56 | if None default to y-range of table 57 | x_range and y_range are both with respect to (0,0) = center of table. 58 | ensure_object_boundary_in_range: 59 | True: The center of object is at position: 60 | [uniform(min x_range + radius, max x_range - radius)], [uniform(min x_range + radius, max x_range - radius)] 61 | False: 62 | [uniform(min x_range, max x_range)], [uniform(min x_range, max x_range)] 63 | z_rotation: 64 | None: Add uniform random random z-rotation 65 | iterable (a,b): Uniformly randomize rotation angle between a and b (in radians) 66 | value: Add fixed angle z-rotation 67 | """ 68 | self.x_range = x_range 69 | self.y_range = y_range 70 | self.ensure_object_boundary_in_range = ensure_object_boundary_in_range 71 | self.z_rotation = z_rotation 72 | self.np_random = np_random if np_random is not None else np.random 73 | 74 | 75 | def set_random_number_generator(self, np_random): 76 | self.np_random = np_random 77 | 78 | def set_ranges(self, x_range=None, y_range = None, z_rotation_range = None): 79 | if(x_range is not None): 80 | self.x_range = x_range 81 | if(y_range is not None): 82 | self.y_range = y_range 83 | 84 | if(z_rotation_range is not None): 85 | self.z_rotation = z_rotation_range 86 | 87 | def sample_obj_idx(self): 88 | return self.np_random.choice(self.n_obj) 89 | 90 | def sample_x(self, object_horizontal_radius): 91 | x_range = self.x_range 92 | if x_range is None: 93 | x_range = [-self.table_size[0] / 2, self.table_size[0] / 2] 94 | minimum = min(x_range) 95 | maximum = max(x_range) 96 | if self.ensure_object_boundary_in_range: 97 | minimum += object_horizontal_radius 98 | maximum -= object_horizontal_radius 99 | return self.np_random.uniform(high=maximum, low=minimum) 100 | 101 | def sample_y(self, object_horizontal_radius): 102 | y_range = self.y_range 103 | if y_range is None: 104 | y_range = [-self.table_size[0] / 2, self.table_size[0] / 2] 105 | minimum = min(y_range) 106 | maximum = max(y_range) 107 | if self.ensure_object_boundary_in_range: 108 | minimum += object_horizontal_radius 109 | maximum -= object_horizontal_radius 110 | return self.np_random.uniform(high=maximum, low=minimum) 111 | 112 | def sample_quat(self): 113 | if self.z_rotation is None or self.z_rotation is True: 114 | rot_angle = self.np_random.uniform(high=2 * np.pi, low=0) 115 | elif isinstance(self.z_rotation, collections.Iterable): 116 | rot_angle = self.np_random.uniform( 117 | high=max(self.z_rotation), low=min(self.z_rotation) 118 | ) 119 | elif (isinstance(self.z_rotation, float) or isinstance(self.z_rotation, int)): 120 | rot_angle = self.z_rotation 121 | else: 122 | return [1, 0, 0, 0] 123 | 124 | return [np.cos(rot_angle / 2), 0, 0, np.sin(rot_angle / 2)] 125 | 126 | def sample(self, push_object_index): 127 | pos_arr = [] 128 | quat_arr = [] 129 | placed_objects = [] 130 | for index, obj_mjcf in enumerate(self.mujoco_objects): 131 | horizontal_radius = obj_mjcf.get_horizontal_radius() 132 | bottom_offset = obj_mjcf.get_bottom_offset() 133 | 134 | if index != push_object_index: 135 | pos_arr.append(np.array([0.0, 0.0, 0.0])) 136 | quat_arr.append([1, 0, 0, 0]) 137 | 138 | else: 139 | object_x = self.sample_x(horizontal_radius) 140 | object_y = self.sample_y(horizontal_radius) 141 | 142 | pos = ( 143 | self.table_top_offset 144 | - bottom_offset 145 | + np.array([object_x, object_y, 0]) 146 | ) 147 | placed_objects.append((object_x, object_y, horizontal_radius)) 148 | # random z-rotation 149 | 150 | quat = self.sample_quat() 151 | 152 | quat_arr.append(quat) 153 | pos_arr.append(pos) 154 | 155 | return pos_arr, quat_arr 156 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/push_env/__init__.py: -------------------------------------------------------------------------------- 1 | from robosuite_extra.push_env.push_task import PushTask 2 | from robosuite_extra.push_env.sawyer_push import SawyerPush -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/push_env/demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from robosuite_extra.env_base import make 3 | from robosuite_extra.push_env.sawyer_push import SawyerPush 4 | import time 5 | from robosuite_extra.wrappers import EEFXVelocityControl, GymWrapper, FlattenWrapper 6 | 7 | 8 | start_pos = np.array([0.,-16.75e-2]) 9 | goal1 = np.array([3e-3,15.15e-2]) 10 | goal2 = np.array([9.5e-2,11.01e-2]) 11 | goal3 = np.array([-11.5e-2,17.2e-2]) 12 | 13 | if __name__ == "__main__": 14 | env=make( 15 | 'SawyerPush', 16 | gripper_type="PushingGripper", 17 | parameters_to_randomise = [], 18 | randomise_initial_conditions = False, 19 | table_full_size=(0.8, 1.6, 0.719), 20 | table_friction=(0.001, 5e-3, 1e-4), 21 | use_camera_obs=False, 22 | use_object_obs=True, 23 | reward_shaping=True, 24 | placement_initializer=None, 25 | gripper_visualization=True, 26 | use_indicator_object=False, 27 | has_renderer=True, 28 | has_offscreen_renderer=False, 29 | render_collision_mesh=False, 30 | render_visual_mesh=True, 31 | control_freq=10., 32 | horizon=80, 33 | ignore_done=False, 34 | camera_name="frontview", 35 | camera_height=256, 36 | camera_width=256, 37 | camera_depth=False, 38 | pid = True, 39 | ) 40 | 41 | env = FlattenWrapper( GymWrapper(EEFXVelocityControl(env, dof=2))) 42 | 43 | env._set_gripper_neutral_offset(*start_pos) 44 | env._set_goal_neutral_offset(*goal1) 45 | env.reset() 46 | 47 | env.viewer.set_camera(camera_id=0) 48 | env.render() 49 | 50 | start_time = time.time() 51 | for i in range(0,120): 52 | action = np.zeros(2) 53 | action[0]=1.0 54 | 55 | obs, reward, done, _ = env.step(action) 56 | env.render() 57 | 58 | if(done): 59 | reset_time1 = time.time() 60 | env.reset() 61 | env.viewer.set_camera(camera_id=0) 62 | env.render() -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/push_env/push_task.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Taken and modified from the original robosuite repository (version 0.1.0) 3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite 4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite 5 | 6 | ''' 7 | 8 | from robosuite.models.tasks import Task 9 | from robosuite_extra.models.tasks import UniformSelectiveSampler 10 | from robosuite.utils.mjcf_utils import new_joint, array_to_string 11 | import numpy as np 12 | 13 | class PushTask(Task): 14 | """ 15 | Creates MJCF model of a push task. 16 | 17 | A tabletop task consists of one robot pushing an object to a visual goal on a 18 | tabletop. This class combines the robot, the table 19 | arena, and the objetcts into a single MJCF model. 20 | """ 21 | 22 | def __init__(self, mujoco_arena, mujoco_robot, mujoco_goal, mujoco_objects, initializer): 23 | """ 24 | Args: 25 | mujoco_arena: MJCF model of robot workspace 26 | mujoco_robot: MJCF model of robot model 27 | mujoco_objects: MJCF model of the objects to be pushed 28 | mujoco_goal: MJCF model of the goal 29 | initializer: placement sampler to initialize object positions. 30 | """ 31 | super().__init__() 32 | 33 | self.merge_arena(mujoco_arena) 34 | self.merge_robot(mujoco_robot) 35 | self.merge_objects(mujoco_objects) 36 | self.merge_visual(mujoco_goal) 37 | if initializer is None: 38 | initializer = UniformSelectiveSampler() 39 | 40 | 41 | self.initializer = initializer 42 | self.initializer.setup(self.mujoco_objects, self.table_top_offset, self.table_size) 43 | self.change_push_object() 44 | 45 | def merge_robot(self, mujoco_robot): 46 | """Adds robot model to the MJCF model.""" 47 | self.robot = mujoco_robot 48 | self.merge(mujoco_robot) 49 | 50 | def merge_arena(self, mujoco_arena): 51 | """Adds arena model to the MJCF model.""" 52 | self.arena = mujoco_arena 53 | self.table_top_offset = mujoco_arena.table_top_abs 54 | self.table_size = mujoco_arena.table_full_size 55 | self.merge(mujoco_arena) 56 | 57 | def merge_objects(self, mujoco_objects ): 58 | """Adds a physical object to the MJCF model.""" 59 | self.xml_objects = [] 60 | self.mujoco_objects = [] 61 | self.object_names = [] 62 | 63 | self.push_object_idx = 0 64 | self.max_horizontal_radius = 0 65 | 66 | for obj_name, obj_mjcf in mujoco_objects.items(): 67 | self.mujoco_objects.append(obj_mjcf) 68 | self.object_names.append(obj_name) 69 | 70 | self.merge_asset(obj_mjcf) 71 | # Load object 72 | obj = obj_mjcf.get_collision(name=obj_name, site=True) 73 | obj.append(new_joint(name=obj_name, type="free")) 74 | self.xml_objects.append(obj) 75 | self.worldbody.append(obj) 76 | self.max_horizontal_radius = max( 77 | self.max_horizontal_radius, obj_mjcf.get_horizontal_radius() 78 | ) 79 | 80 | 81 | def merge_visual(self, mujoco_goal): 82 | """Adds the visual goal to the MJCF model.""" 83 | self.mujoco_goal = mujoco_goal 84 | self.merge_asset(mujoco_goal) 85 | # Load goal 86 | goal_visual = mujoco_goal.get_visual(name='goal', site=True) 87 | 88 | self.xml_goal = goal_visual 89 | 90 | self.worldbody.append(goal_visual) 91 | 92 | 93 | def change_push_object(self, idx = None): 94 | if(idx is None): 95 | idx = np.random.choice(len(self.mujoco_objects)) 96 | 97 | assert idx < len(self.mujoco_objects), "idx out of range for selecting pushing object" 98 | 99 | self.push_object_idx = idx 100 | 101 | 102 | def place_objects(self): 103 | """Places objects randomly until no collisions or max iterations hit.""" 104 | 105 | pos_arr, quat_arr = self.initializer.sample(self.push_object_idx) 106 | for i in range(len(self.xml_objects)): 107 | self.xml_objects[i].set("pos", array_to_string(pos_arr[i])) 108 | self.xml_objects[i].set("quat", array_to_string(quat_arr[i])) 109 | 110 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/reach_env/__init__.py: -------------------------------------------------------------------------------- 1 | from robosuite_extra.reach_env.reach_task import ReachTask 2 | from robosuite_extra.reach_env.sawyer_reach import SawyerReach -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/reach_env/demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from robosuite_extra.env_base import make 3 | from robosuite_extra.reach_env.sawyer_reach import SawyerReach 4 | import time 5 | from robosuite_extra.wrappers import EEFXVelocityControl, GymWrapper, FlattenWrapper 6 | 7 | 8 | goal1 = np.array([0.,0.]) 9 | goal2 = np.array([-4.465e-2,5.85e-2]) 10 | goal3 = np.array([8.37e-2,-5.78e-2]) 11 | 12 | if __name__ == "__main__": 13 | env=make( 14 | 'SawyerReach', 15 | gripper_type="PushingGripper", 16 | parameters_to_randomise = [], 17 | randomise_initial_conditions=False, 18 | table_full_size=(0.8, 1.6, 0.719), 19 | use_camera_obs=False, 20 | use_object_obs=True, 21 | reward_shaping=True, 22 | use_indicator_object=False, 23 | has_renderer=False, 24 | has_offscreen_renderer=False, 25 | render_collision_mesh=False, 26 | render_visual_mesh=True, 27 | control_freq=10, 28 | horizon=40, 29 | ignore_done=False, 30 | camera_name="frontview", 31 | camera_height=256, 32 | camera_width=256, 33 | camera_depth=False, 34 | pid=True, 35 | success_radius=0.01 36 | ) 37 | 38 | env = FlattenWrapper( GymWrapper(EEFXVelocityControl(env, dof=3,))) 39 | env.renderer_on() 40 | env._set_goal_neutral_offset(*goal2) 41 | env.reset() 42 | 43 | if (env.has_renderer): 44 | env.viewer.set_camera(camera_id=0) 45 | env.render() 46 | 47 | start_time = time.time() 48 | for i in range(1,120): 49 | 50 | action = np.zeros((3,))*0.5 51 | action[1]=1.0 52 | 53 | obs, reward, done, _ = env.step(action) 54 | 55 | if(env.has_renderer): 56 | env.render() 57 | 58 | if(done): 59 | time.sleep(2) 60 | env.reset() 61 | if (env.has_renderer): 62 | env.viewer.set_camera(camera_id=0) 63 | env.render() 64 | 65 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/reach_env/reach_task.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Taken and modified from the original robosuite repository (version 0.1.0) 3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite 4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite 5 | 6 | ''' 7 | 8 | from robosuite.models.tasks import Task 9 | class ReachTask(Task): 10 | """ 11 | Creates MJCF model of a push task. 12 | 13 | A tabletop task consists of one robot pushing an object to a visual goal on a 14 | tabletop. This class combines the robot, the table 15 | arena, and the objetcts into a single MJCF model. 16 | """ 17 | 18 | def __init__(self, mujoco_arena, mujoco_robot, mujoco_goal): 19 | """ 20 | Args: 21 | mujoco_arena: MJCF model of robot workspace 22 | mujoco_robot: MJCF model of robot model 23 | mujoco_goal: MJCF model of the goal 24 | """ 25 | super().__init__() 26 | 27 | self.merge_arena(mujoco_arena) 28 | self.merge_robot(mujoco_robot) 29 | self.merge_visual(mujoco_goal) 30 | 31 | 32 | def merge_robot(self, mujoco_robot): 33 | """Adds robot model to the MJCF model.""" 34 | self.robot = mujoco_robot 35 | self.merge(mujoco_robot) 36 | 37 | def merge_arena(self, mujoco_arena): 38 | """Adds arena model to the MJCF model.""" 39 | self.arena = mujoco_arena 40 | self.table_top_offset = mujoco_arena.table_top_abs 41 | self.table_size = mujoco_arena.table_full_size 42 | self.merge(mujoco_arena) 43 | 44 | def merge_visual(self, mujoco_goal): 45 | """Adds the visual goal to the MJCF model.""" 46 | self.mujoco_goal = mujoco_goal 47 | self.merge_asset(mujoco_goal) 48 | # Load goal 49 | goal_visual = mujoco_goal.get_visual(name='goal', site=True) 50 | 51 | self.xml_goal = goal_visual 52 | 53 | self.worldbody.append(goal_visual) 54 | 55 | 56 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/slide_env/__init__.py: -------------------------------------------------------------------------------- 1 | from robosuite_extra.slide_env.slide_task import SlideTask 2 | from robosuite_extra.slide_env.sawyer_slide import SawyerSlide -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/slide_env/demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from robosuite_extra.env_base import make 3 | from robosuite_extra.slide_env.sawyer_slide import SawyerSlide 4 | import time 5 | 6 | if __name__ == "__main__": 7 | env=make( 8 | 'SawyerSlide', 9 | gripper_type="SlidePanelGripper", 10 | parameters_to_randomise = [], 11 | randomise_initial_conditions = True, 12 | use_camera_obs=False, 13 | use_object_obs=True, 14 | reward_shaping=True, 15 | use_indicator_object=False, 16 | has_renderer=True, 17 | has_offscreen_renderer=True, 18 | render_collision_mesh=False, 19 | render_visual_mesh=True, 20 | control_freq=10, 21 | horizon=30, 22 | ignore_done=False, 23 | camera_name="frontview", 24 | camera_height=256, 25 | camera_width=256, 26 | camera_depth=False, 27 | pid=True, 28 | ) 29 | 30 | env.reset() 31 | env.viewer.set_camera(camera_id=0) 32 | 33 | env.render() 34 | for i in range(1,20000): 35 | 36 | action = np.zeros(2) 37 | action[0] = 1. 38 | obs, reward, done, _ = env.step(action) 39 | env.render() 40 | 41 | if(done): 42 | time.sleep(2) 43 | env.reset() 44 | env.viewer.set_camera(camera_id=0) 45 | env.render() 46 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/slide_env/slide_task.py: -------------------------------------------------------------------------------- 1 | from robosuite.models.tasks import Task 2 | from robosuite.utils.mjcf_utils import new_joint, array_to_string 3 | import numpy as np 4 | 5 | class SlideTask(Task): 6 | """ 7 | Creates MJCF model of a slide t task. 8 | """ 9 | 10 | def __init__(self, mujoco_arena, mujoco_robot, slide_object): 11 | """ 12 | Args: 13 | mujoco_arena: MJCF model of robot workspace 14 | mujoco_robot: MJCF model of robot model 15 | slide_object: MJCF model of the object to be slided 16 | """ 17 | super().__init__() 18 | 19 | self.merge_arena(mujoco_arena) 20 | self.merge_robot(mujoco_robot) 21 | self.merge_objects(slide_object) 22 | 23 | def merge_robot(self, mujoco_robot): 24 | """Adds robot model to the MJCF model.""" 25 | self.robot = mujoco_robot 26 | self.merge(mujoco_robot) 27 | 28 | def merge_arena(self, mujoco_arena): 29 | """Adds arena model to the MJCF model.""" 30 | self.arena = mujoco_arena 31 | self.merge(mujoco_arena) 32 | 33 | def merge_objects(self, slide_object): 34 | """Adds a physical object to the MJCF model.""" 35 | self.push_object_idx = 0 36 | self.max_horizontal_radius = 0 37 | 38 | self.slide_object = slide_object 39 | 40 | 41 | self.merge_asset(self.slide_object) 42 | 43 | # Add object to xml 44 | obj = self.slide_object.get_collision(name="slide_object", site=True) 45 | obj.append(new_joint(name="slide_object", type="free")) 46 | self.xml_object = obj 47 | self.worldbody.append(obj) 48 | self.max_horizontal_radius = self.slide_object.get_horizontal_radius() -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/robosuite-extra/robosuite_extra/utils/__init__.py -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/utils/domain_randomisation_utils.py: -------------------------------------------------------------------------------- 1 | def _scale_xml_float_attribute(node, attribute, scaling_factor): 2 | cur_value = float(node.get(attribute)) 3 | new_value = cur_value * scaling_factor 4 | node.set(attribute, str(new_value)) -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/utils/transform_utils.py: -------------------------------------------------------------------------------- 1 | from robosuite.utils.transform_utils import * 2 | import numpy as np 3 | 4 | 5 | 6 | def eulerx2mat(angle): 7 | cos = np.cos(angle) 8 | sin = np.sin(angle) 9 | return np.array([[1.,0.,0.], 10 | [0.,cos ,-sin], 11 | [0.,sin, cos]]) 12 | 13 | def eulery2mat(angle): 14 | cos = np.cos(angle) 15 | sin = np.sin(angle) 16 | return np.array([[cos, 0., sin], 17 | [0.,1.,0.], 18 | [-sin, 0., cos]]) 19 | 20 | def eulerz2mat(angle): 21 | cos = np.cos(angle) 22 | sin = np.sin(angle) 23 | return np.array([[cos,-sin, 0.], 24 | [sin, cos, 0.], 25 | [0.,0.,1.]]) 26 | 27 | 28 | def euler2mat(euler_x,euler_y, euler_z): 29 | r_x = eulerx2mat(euler_x) 30 | r_y = eulery2mat(euler_y) 31 | r_z = eulerx2mat(euler_z) 32 | return r_z.dot(r_y.dot(r_x)) 33 | 34 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from robosuite_extra.wrappers.gym_wrapper import GymWrapper, FlattenWrapper 2 | from robosuite_extra.wrappers.eef_velocy_control_wrapper import EEFXVelocityControl -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/wrappers/eef_velocy_control_wrapper.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Taken and modified from the original robosuite repository (version 0.1.0) 4 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite 5 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite 6 | 7 | 8 | 9 | 10 | This module implements a wrapper for controlling the robot through end effector 11 | movements instead of joint velocities. This is useful in learning pipelines 12 | that want to output actions in end effector space instead of joint space. 13 | """ 14 | 15 | import numpy as np 16 | from robosuite.wrappers import Wrapper 17 | from robosuite_extra.controllers import SawyerEEFVelocityController 18 | 19 | 20 | class EEFXVelocityControl(Wrapper): 21 | env = None 22 | 23 | def __init__(self, env, dof = 2, max_action = 0.1, normalised_actions = True): 24 | """ 25 | End effector cartesian velocity control wrapper. 26 | 27 | This wrapper allows for controlling the robot by sending end-effector cartesian velocity commands 28 | instead of joint velocities. This includes velocity commands in-plance (v_x,v_y) or in 3D (v_x,v_y,v_z). 29 | Does not implement angular velocity commands. 30 | 31 | Args: 32 | env (MujocoEnv instance): The environment to wrap. 33 | dof (int 2, or 3): Whether to move the end-effector in-plane or in 3d 34 | max_action : The maximum speed that can be achieved in each dimension (in m/sec). 35 | 36 | """ 37 | super().__init__(env) 38 | self.controller = SawyerEEFVelocityController() 39 | assert dof == 2 or dof == 3 , "Can only send x-y or x-y-z velocity commands" 40 | 41 | self.wrapper_dof = dof 42 | self.action_range = [-max_action, max_action] 43 | 44 | self.normalised_actions = normalised_actions 45 | 46 | 47 | #Desable the action spec 48 | self.env.normalised_actions = False 49 | 50 | 51 | @property 52 | def action_spec(self): 53 | if(self.normalised_actions): 54 | low = np.ones(self.wrapper_dof) * -1 55 | high = np.ones(self.wrapper_dof) 56 | else: 57 | low = np.array([self.action_range[0]]*self.wrapper_dof ) 58 | high = np.array([self.action_range[1]]*self.wrapper_dof ) 59 | return low, high 60 | 61 | def _robot_jpos_getter(self): 62 | """ 63 | Helper function to pass to the ik controller for access to the 64 | current robot joint positions. 65 | """ 66 | return np.array(self.env._joint_positions) 67 | 68 | 69 | def step(self, action): 70 | if self.env.done: 71 | raise ValueError("Executing action in terminated episode") 72 | 73 | low, high = self.action_spec 74 | 75 | # Domain randomisation for the action space 76 | action_range = high-low 77 | additive_noise = action_range*self.env.dynamics_parameters['action_additive_noise'] * self.env.np_random.uniform(-1, 1, action.shape) 78 | additive_systematic_noise = action_range*self.dynamics_parameters['action_systematic_noise'] 79 | multiplicative_noise = 1.0 + ( 80 | self.dynamics_parameters['action_multiplicative_noise'] * self.env.np_random.uniform(-1, 1, 81 | action.shape)) 82 | 83 | 84 | action = action*multiplicative_noise +additive_noise+additive_systematic_noise 85 | 86 | action = np.clip(action, low, high) 87 | 88 | if (self.normalised_actions): 89 | # Rescale action to -max_action, max_action 90 | 91 | # rescale normalized action to control ranges 92 | bias = 0.5 * (self.action_range[1] + self.action_range[0]) 93 | weight = 0.5 * (self.action_range[1] - self.action_range[0]) 94 | action = bias + weight * action 95 | 96 | current_right_hand_pos_base = self.env._right_hand_pos 97 | current_right_hand_pos_eef = self.env.init_right_hand_orn.dot(current_right_hand_pos_base) 98 | 99 | 100 | if(self.wrapper_dof == 2): 101 | #This only works for the pushing task 102 | # Correct for deviation in the z-position 103 | # Compensate for the fact that right_hand_pos is in the base frame by a minus sign (current - initial) 104 | 105 | # action = self.compensate_for_external_forces(action, current_right_hand_pos_eef) 106 | z_vel = current_right_hand_pos_base[2] - self.env.init_right_hand_pos[2] 107 | xyz_vel = np.concatenate([action, [2*z_vel]]) 108 | 109 | elif(self.wrapper_dof ==3): 110 | xyz_vel = action 111 | else: 112 | raise NotImplementedError('Dof can only be 2 or 3!') 113 | 114 | self.target_next_state = current_right_hand_pos_eef + xyz_vel* self.env.control_timestep 115 | 116 | current_right_hand_orn = self.env._right_hand_orn 117 | reference_right_hand_orn = self.env.init_right_hand_orn 118 | 119 | orn_diff = reference_right_hand_orn.dot(current_right_hand_orn.T) 120 | orn_diff_twice = orn_diff.dot(orn_diff).dot(orn_diff) 121 | 122 | 123 | 124 | # Convert xyz_vel from the end-effector frame to the base_frame of the robot 125 | base_frame_in_eef = reference_right_hand_orn.T 126 | xyz_vel_base = base_frame_in_eef.dot(xyz_vel) 127 | 128 | 129 | pose_matrix = np.zeros((4,4)) 130 | pose_matrix[:3,:3] = orn_diff_twice 131 | pose_matrix[:3,3] = xyz_vel_base 132 | pose_matrix[3,3] =1 133 | current_joint_angles = self._robot_jpos_getter() 134 | 135 | 136 | 137 | joint_velocities = self.controller.compute_joint_velocities_for_endpoint_velocity(pose_matrix,current_joint_angles) 138 | final_action = np.concatenate([np.asarray(joint_velocities).squeeze()]) 139 | 140 | obs, reward, done, info = self.env.step(final_action) 141 | info['joint_velocities'] = final_action 142 | return obs,reward,done,info 143 | 144 | 145 | def reset(self): 146 | ret = self.env.reset() 147 | eff_frame_in_base = self.env.init_right_hand_orn 148 | self.target_next_state = eff_frame_in_base.dot(self.env._right_hand_pos) 149 | return ret 150 | 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /robosuite-extra/robosuite_extra/wrappers/gym_wrapper.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Taken and modified from the original robosuite repository (version 0.1.0) 3 | Our fork with version 0.1.0 : https://github.com/eugval/robosuite 4 | Official Robosuite Repository : https://github.com/ARISE-Initiative/robosuite 5 | 6 | ''' 7 | 8 | import numpy as np 9 | from gym import spaces 10 | from robosuite.wrappers import Wrapper 11 | 12 | from gym import Wrapper as OpenAIGymWrapper 13 | 14 | 15 | 16 | class FlattenWrapper(OpenAIGymWrapper): 17 | 18 | def __init__(self, env, keys=None, add_info=False): 19 | 20 | super().__init__(env) 21 | 22 | if keys is None: 23 | assert self.env.use_object_obs, "Object observations need to be enabled." 24 | keys = ["robot-state", "task-state", "target_pos",] 25 | self.keys = keys 26 | 27 | # set up observation and action spaces 28 | flat_ob = self._flatten_obs(self.env.reset(), verbose=True) 29 | self.obs_dim = flat_ob.size 30 | high = np.inf * np.ones(self.obs_dim) 31 | low = -high 32 | self.observation_space = spaces.Box(low=low, high=high) 33 | low, high = self.env.action_spec 34 | self.action_space = spaces.Box(low=low, high=high) 35 | 36 | self.add_info = add_info 37 | 38 | def _flatten_obs(self, obs_dict, verbose=False): 39 | """ 40 | Filters keys of interest out and concatenate the information. 41 | Args: 42 | obs_dict: ordered dictionary of observations 43 | """ 44 | ob_lst = [] 45 | for key in obs_dict: 46 | if key in self.keys: 47 | if verbose: 48 | print("adding key: {}".format(key)) 49 | ob_lst.append(obs_dict[key]) 50 | return np.concatenate(ob_lst) 51 | 52 | def reset(self): 53 | ob_dict = self.env.reset() 54 | return self._flatten_obs(ob_dict) 55 | 56 | def step(self, action): 57 | ob_dict, reward, done, info = self.env.step(action) 58 | 59 | if(self.add_info): 60 | info.update(ob_dict) 61 | 62 | return self._flatten_obs(ob_dict), reward, done, info 63 | 64 | 65 | def __getattr__(self, attr): 66 | # using getattr ensures that both __getattribute__ and __getattr__ (fallback) get called 67 | # (see https://stackoverflow.com/questions/3278077/difference-between-getattr-vs-getattribute) 68 | return self.env.__getattr__(attr) 69 | 70 | 71 | 72 | 73 | 74 | class GymWrapper(Wrapper): 75 | env = None 76 | 77 | # Set this in SOME subclasses 78 | metadata = {'render.modes': ['human', 'rgb_a']} 79 | reward_range = (-float('inf'), float('inf')) 80 | spec = None 81 | 82 | def __init__(self, env): 83 | """ 84 | Initializes the Gym wrapper. 85 | Args: 86 | env (MujocoEnv instance): The environment to wrap. 87 | keys (list of strings): If provided, each observation will 88 | consist of concatenated keys from the wrapped environment's 89 | observation dictionary. Defaults to robot-state and object-state. 90 | """ 91 | self.env = env 92 | # set up observation space 93 | high = np.inf 94 | low = -high 95 | 96 | obs_spec = env.observation_spec() 97 | 98 | space_spec = {} 99 | 100 | for k,v in obs_spec.items(): 101 | space_spec[k]=spaces.Box(low=low,high=high, shape=v) 102 | 103 | 104 | self.observation_space = spaces.Dict(space_spec) 105 | 106 | # setup action space 107 | low, high = self.env.action_spec 108 | self.action_space = spaces.Box(low=low, high=high) 109 | 110 | self.reward_range = self.env.reward_range 111 | 112 | def step(self, action): 113 | return self.env.step(action) 114 | 115 | def reset(self): 116 | ob_dict = self.env.reset() 117 | return ob_dict 118 | 119 | def render(self, mode): 120 | self.env.render() 121 | 122 | 123 | def close(self): 124 | self.env.close() 125 | 126 | def seed(self, seed): 127 | self.env.seed(seed) 128 | 129 | @property 130 | def action_spec(self): 131 | return self.env.action_spec 132 | 133 | @property 134 | def unwrapped(self): 135 | return self 136 | 137 | def __str__(self): 138 | if self.spec is None: 139 | return '<{} instance>'.format(type(self).__name__) 140 | else: 141 | return '<{}<{}>>'.format(type(self).__name__, self.spec.id) 142 | 143 | def __enter__(self): 144 | return self 145 | 146 | def __exit__(self, *args): 147 | self.close() 148 | # propagate exception 149 | return False 150 | -------------------------------------------------------------------------------- /robosuite-extra/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup(name='robosuite_extra', 4 | version='0.0.1', 5 | packages= find_packages() 6 | ) -------------------------------------------------------------------------------- /sim2real-calibration-characterisation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/sim2real-calibration-characterisation/__init__.py -------------------------------------------------------------------------------- /sim2real-calibration-characterisation/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup(name='sim2real_calibration_characterisationsim2real_calibration_characterisation', 4 | version='0.0.1', 5 | packages= find_packages() 6 | ) -------------------------------------------------------------------------------- /sim2real-calibration-characterisation/sim2real_calibration_characterisation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/sim2real-calibration-characterisation/sim2real_calibration_characterisation/__init__.py -------------------------------------------------------------------------------- /sim2real-calibration-characterisation/sim2real_calibration_characterisation/sim2real_characterisation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/sim2real-calibration-characterisation/sim2real_calibration_characterisation/sim2real_characterisation/__init__.py -------------------------------------------------------------------------------- /sim2real-calibration-characterisation/sim2real_calibration_characterisation/sim2real_characterisation/pushing_characterisation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from robosuite_extra.env_base import make 3 | import robosuite_extra.utils.transform_utils as T 4 | from robosuite_extra.wrappers import EEFXVelocityControl 5 | from sim2real_calibration_characterisation.utils.logger import Logger 6 | from sim2real_policies.utils.choose_env import push_force_noise_randomisation, push_force_randomisation, \ 7 | push_full_randomisation,push_no_randomisation 8 | 9 | def grab_data(info, world_pose_in_base): 10 | # Grab the data 11 | goal_pos_in_world = np.concatenate([info['goal_pos_in_world'], [1.0]]) 12 | 13 | goal_pos_in_base = world_pose_in_base.dot(goal_pos_in_world) 14 | goal_pos_in_base = goal_pos_in_base / goal_pos_in_base[3] 15 | 16 | gripper_pos_in_world = np.concatenate([info['eef_pos_in_world'], [1.0]]) 17 | gripper_vel_in_world = info['eef_vel_in_world'] 18 | 19 | gripper_pos_in_base = world_pose_in_base.dot(gripper_pos_in_world) 20 | gripper_pos_in_base = gripper_pos_in_base / gripper_pos_in_base[3] 21 | gripper_vel_in_base = world_pose_in_base[:3, :3].dot(gripper_vel_in_world) 22 | 23 | object_pos_in_world = np.concatenate([info['object_pos_in_world'], [1.0]]) 24 | object_vel_in_world = info['object_vel_in_world'] 25 | z_angle = info['z_angle'] 26 | 27 | object_pos_in_base = world_pose_in_base.dot(object_pos_in_world) 28 | object_pos_in_base = object_pos_in_base / object_pos_in_base[3] 29 | object_vel_in_base = world_pose_in_base[:3, :3].dot(object_vel_in_world) 30 | 31 | return goal_pos_in_base, gripper_pos_in_base, gripper_vel_in_base, object_pos_in_base, object_vel_in_base, z_angle, 32 | 33 | 34 | def main(): 35 | ### Script parameters ### 36 | render = False 37 | log_dir = './push' 38 | #### 39 | 40 | 41 | action_ref = np.array([0.1, 0.0]) 42 | action = action_ref 43 | 44 | for repeat in range(10): 45 | # Create the environment 46 | env = make( 47 | 'SawyerPush', 48 | gripper_type="PushingGripper", 49 | parameters_to_randomise=push_no_randomisation, 50 | randomise_initial_conditions=False, 51 | table_full_size=(0.8, 1.6, 0.719), 52 | table_friction=(0.001, 5e-3, 1e-4), 53 | use_camera_obs=False, 54 | use_object_obs=True, 55 | reward_shaping=True, 56 | placement_initializer=None, 57 | gripper_visualization=True, 58 | use_indicator_object=False, 59 | has_renderer=False, 60 | has_offscreen_renderer=False, 61 | render_collision_mesh=False, 62 | render_visual_mesh=True, 63 | control_freq=10., 64 | horizon=80, 65 | ignore_done=False, 66 | camera_name="frontview", 67 | camera_height=256, 68 | camera_width=256, 69 | camera_depth=False, 70 | ) 71 | 72 | env = EEFXVelocityControl(env, dof=2, normalised_actions=False) 73 | 74 | if (render): 75 | env.renderer_on() 76 | 77 | obs = env.reset() 78 | if (render): 79 | env.viewer.set_camera(camera_id=0) 80 | env.render() 81 | 82 | ## Setup the logger 83 | # Base frame rotation in the end effector to convert data between those two frames 84 | base_rot_in_eef = env.init_right_hand_orn.T 85 | 86 | base_pos_in_world = env.sim.data.get_body_xpos("base") 87 | base_rot_in_world = env.sim.data.get_body_xmat("base").reshape((3, 3)) 88 | base_pose_in_world = T.make_pose(base_pos_in_world, base_rot_in_world) 89 | world_pose_in_base = T.pose_inv(base_pose_in_world) 90 | 91 | world_pose_in_base = T.pose_inv(world_pose_in_base) 92 | log_list = ["step", "time", 93 | "cmd_eef_vx", "cmd_eef_vy", 94 | "goal_x", "goal_y", "goal_z", 95 | "eef_x", "eef_y", "eef_z", 96 | "eef_vx", "eef_vy", "eef_vz", 97 | "object_x", "object_y", "object_z", 98 | "object_vx", "object_vy", "object_vz", 99 | "z_angle", "obs_0", "obs_1", "obs_2", "obs_3", "obs_4", "obs_5", "obs_6", "obs_7", "obs_8", "obs_9" ] 100 | 101 | log_path = '{}/trajectory_{}.csv'.format(log_dir, repeat) 102 | 103 | logger = Logger(log_list, log_path,verbatim=render) 104 | 105 | ## Run the trajectory 106 | i = 0 107 | mujoco_start_time = env.sim.data.time 108 | while (True): 109 | mujoco_elapsed = env.sim.data.time - mujoco_start_time 110 | 111 | next_obs, reward, done, _ = env.step(action) 112 | 113 | goal_pos_in_base, eef_pos_in_base, eef_vel_in_base, \ 114 | object_pos_in_base, object_vel_in_base, z_angle, = grab_data(obs, world_pose_in_base) 115 | 116 | action_3d = np.concatenate([action, [0.0]]) 117 | action_3d_in_base = base_rot_in_eef.dot(action_3d) 118 | 119 | logger.log(i, mujoco_elapsed, 120 | action_3d_in_base[0], action_3d_in_base[1], 121 | goal_pos_in_base[0], goal_pos_in_base[1], goal_pos_in_base[2], 122 | eef_pos_in_base[0], eef_pos_in_base[1], eef_pos_in_base[2], 123 | eef_vel_in_base[0], eef_vel_in_base[1], eef_vel_in_base[2], 124 | object_pos_in_base[0], object_pos_in_base[1], object_pos_in_base[2], 125 | object_vel_in_base[0], object_vel_in_base[1], object_vel_in_base[2], 126 | z_angle[0], 127 | obs['task-state'][0], obs['task-state'][1], obs['task-state'][2], 128 | obs['task-state'][3], obs['task-state'][4], obs['task-state'][5], 129 | obs['task-state'][6], obs['task-state'][7], obs['task-state'][8], obs['task-state'][9], 130 | ) 131 | obs = next_obs 132 | 133 | if(render): 134 | env.render() 135 | i += 1 136 | if (mujoco_elapsed >2.5): 137 | action =np.array([0.0, 0.0]) 138 | 139 | if(mujoco_elapsed >3.0): 140 | action = action_ref 141 | env.close() 142 | break 143 | 144 | 145 | if __name__ == "__main__": 146 | main() 147 | -------------------------------------------------------------------------------- /sim2real-calibration-characterisation/sim2real_calibration_characterisation/sim2real_characterisation/reaching_characterisation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from robosuite_extra.env_base import make 3 | import robosuite_extra.utils.transform_utils as T 4 | from robosuite_extra.wrappers import EEFXVelocityControl 5 | from sim2real_calibration_characterisation.utils.logger import Logger 6 | from sim2real_policies.utils.choose_env import reach_full_randomisation,reach_force_noise_randomisation, \ 7 | reach_force_randomisation,reach_no_randomisation 8 | 9 | 10 | def grab_data(info, world_pose_in_base): 11 | # Grab the data 12 | eef_pos_in_world = info['eef_pos_in_world'] 13 | eef_vel_in_world = info['eef_vel_in_world'] 14 | goal_pos_in_world = info['goal_pos_in_world'] 15 | 16 | eef_pos_in_world = np.concatenate([eef_pos_in_world, [1.0]]) 17 | goal_pos_in_world = np.concatenate([goal_pos_in_world, [1.0]]) 18 | 19 | eef_pos_in_base = world_pose_in_base.dot(eef_pos_in_world) 20 | eef_pos_in_base = eef_pos_in_base / eef_pos_in_base[3] 21 | eef_vel_in_base = world_pose_in_base[:3, :3].dot(eef_vel_in_world) 22 | 23 | goal_pos_in_base = world_pose_in_base.dot(goal_pos_in_world) 24 | goal_pos_in_base = goal_pos_in_base / goal_pos_in_base[3] 25 | 26 | 27 | return eef_pos_in_base, eef_vel_in_base, goal_pos_in_base 28 | 29 | 30 | def main(): 31 | ### Script parameters ### 32 | log_dir = './reach' 33 | render = False 34 | ### 35 | 36 | for repeat in range(50): 37 | # Create the environment 38 | env = make( 39 | 'SawyerReach', 40 | gripper_type="PushingGripper", 41 | parameters_to_randomise=reach_no_randomisation, 42 | randomise_initial_conditions=False, 43 | table_full_size=(0.8, 1.6, 0.719), 44 | use_camera_obs=False, 45 | use_object_obs=True, 46 | reward_shaping=True, 47 | use_indicator_object=False, 48 | has_renderer=False, 49 | has_offscreen_renderer=False, 50 | render_collision_mesh=False, 51 | render_visual_mesh=True, 52 | control_freq=10, 53 | horizon=40, 54 | ignore_done=False, 55 | camera_name="frontview", 56 | camera_height=256, 57 | camera_width=256, 58 | camera_depth=False, 59 | pid=True, 60 | success_radius=0.01 61 | ) 62 | 63 | env = EEFXVelocityControl(env, dof=3, normalised_actions=True) 64 | 65 | if (render): 66 | env.renderer_on() 67 | 68 | obs = env.reset() 69 | print(env.get_dynamics_parameters()) 70 | 71 | if(render): 72 | env.viewer.set_camera(camera_id=0) 73 | env.render() 74 | 75 | # Setup the logger 76 | # Base frame rotation in the end effector to convert data between those two frames 77 | base_rot_in_eef = env.init_right_hand_orn.T 78 | 79 | base_pos_in_world = env.sim.data.get_body_xpos("base") 80 | base_rot_in_world = env.sim.data.get_body_xmat("base").reshape((3, 3)) 81 | base_pose_in_world = T.make_pose(base_pos_in_world, base_rot_in_world) 82 | world_pose_in_base = T.pose_inv(base_pose_in_world) 83 | 84 | 85 | 86 | log_list = ["step", "time", 87 | "cmd_eef_vx", "cmd_eef_vy", "cmd_eef_vz", 88 | "eef_x", "eef_y", "eef_z", 89 | "eef_vx", "eef_vy", "eef_vz", 90 | "goal_x", "goal_y", "goal_z", 91 | "obs_0", "obs_1", "obs_2", 92 | "obs_3", "obs_4", "obs_5" 93 | ] 94 | 95 | log_path = '{}/trajectory{}.csv'.format(log_dir,repeat) 96 | logger = Logger(log_list, log_path, verbatim=render) 97 | 98 | i = 0 99 | mujoco_start_time = env.sim.data.time 100 | # Run the trajectories 101 | while (True): 102 | mujoco_elapsed = env.sim.data.time - mujoco_start_time 103 | 104 | action = np.array([0.5, 0.5, 0.5]) # policy.cos_wave(mujoco_elapsed),0.0]) 105 | 106 | next_obs, reward, done, _ = env.step(action) 107 | 108 | eef_pos_in_base, eef_vel_in_base, goal_pos_in_base = grab_data(obs, world_pose_in_base) 109 | 110 | action_in_base = base_rot_in_eef.dot(action) 111 | 112 | logger.log(i, mujoco_elapsed, 113 | action_in_base[0], action_in_base[1], action_in_base[2], 114 | eef_pos_in_base[0], eef_pos_in_base[1], eef_pos_in_base[2], 115 | eef_vel_in_base[0], eef_vel_in_base[1], eef_vel_in_base[2], 116 | goal_pos_in_base[0], goal_pos_in_base[1], goal_pos_in_base[2], 117 | obs['task-state'][0], obs['task-state'][1], obs['task-state'][2], 118 | obs['task-state'][3], obs['task-state'][4], obs['task-state'][5], 119 | ) 120 | 121 | obs = next_obs 122 | 123 | if(render): 124 | env.render() 125 | i += 1 126 | if (mujoco_elapsed > 2.5): 127 | env.close() 128 | break 129 | 130 | 131 | if __name__ == "__main__": 132 | main() 133 | -------------------------------------------------------------------------------- /sim2real-calibration-characterisation/sim2real_calibration_characterisation/sim2real_characterisation/sliding_characterisation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from robosuite_extra.env_base import make 3 | from sim2real_calibration_characterisation.utils.logger import Logger 4 | from sim2real_policies.utils.choose_env import slide_force_randomisation, slide_force_noise_randomisation,\ 5 | slide_full_randomisation,slide_no_randomisation 6 | 7 | def main(): 8 | ### Script parameters ### 9 | render = False 10 | log_dir = './slide' 11 | #### 12 | 13 | action_ref = np.array([0.1, 0.1]) 14 | action = action_ref 15 | 16 | for repeat in range(10): 17 | # Create the environment 18 | env = make( 19 | 'SawyerSlide', 20 | gripper_type="SlidePanelGripper", 21 | parameters_to_randomise=slide_no_randomisation, 22 | randomise_initial_conditions=False, 23 | use_camera_obs=False, 24 | use_object_obs=True, 25 | reward_shaping=True, 26 | use_indicator_object=False, 27 | has_renderer=False, 28 | has_offscreen_renderer=False, 29 | render_collision_mesh=False, 30 | render_visual_mesh=True, 31 | control_freq=10, 32 | horizon=60, 33 | ignore_done=False, 34 | camera_name="frontview", 35 | camera_height=256, 36 | camera_width=256, 37 | camera_depth=False, 38 | pid=True, 39 | ) 40 | 41 | env.normalised_actions = False 42 | 43 | if (render): 44 | env.renderer_on() 45 | 46 | obs = env.reset() 47 | 48 | if (render): 49 | env.viewer.set_camera(camera_id=0) 50 | env.render() 51 | 52 | # Setup the logger 53 | log_list =["step", "time", 54 | "cmd_j5", "cmd_j6", 55 | "obj_x", "obj_y", "obj_z", 56 | "sin_z", "cos_z", 57 | "obj_vx", "obj_vy", "obj_vz", 58 | "a_j5", "a_j6", 59 | "v_j5", "v_j6", 60 | ] 61 | log_path = '{}/trajectory_{}.csv'.format(log_dir, repeat) 62 | logger = Logger(log_list, log_path,verbatim=render) 63 | 64 | i = 0 65 | mujoco_start_time = env.sim.data.time 66 | # Run the trajectories 67 | while (True): 68 | mujoco_elapsed = env.sim.data.time - mujoco_start_time 69 | 70 | next_obs, reward, done, info = env.step(action) 71 | assert len( obs['task-state']) ==12 72 | 73 | logger.log(i, mujoco_elapsed, 74 | action[0], action[1], 75 | obs['task-state'][0], obs['task-state'][1], obs['task-state'][2], 76 | obs['task-state'][3], obs['task-state'][4], obs['task-state'][5], obs['task-state'][6], 77 | obs['task-state'][7], obs['task-state'][8], obs['task-state'][9], 78 | obs['task-state'][10], obs['task-state'][11], 79 | ) 80 | obs = next_obs 81 | 82 | if(render): 83 | env.render() 84 | i += 1 85 | if (mujoco_elapsed >2.): 86 | break 87 | 88 | 89 | if __name__ == "__main__": 90 | main() 91 | -------------------------------------------------------------------------------- /sim2real-calibration-characterisation/sim2real_calibration_characterisation/simulation_calibration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/sim2real-calibration-characterisation/sim2real_calibration_characterisation/simulation_calibration/__init__.py -------------------------------------------------------------------------------- /sim2real-calibration-characterisation/sim2real_calibration_characterisation/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/sim2real-calibration-characterisation/sim2real_calibration_characterisation/utils/__init__.py -------------------------------------------------------------------------------- /sim2real-calibration-characterisation/sim2real_calibration_characterisation/utils/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | class Logger: 3 | """Logger to record values to a CSV file.""" 4 | 5 | def __init__(self, labels, path, errase=True, verbatim=True): 6 | self.n_labels = len(labels) 7 | self.path = path 8 | self.verbatim = verbatim 9 | assert errase or not os.path.exists(os.path.dirname(path)), self.path 10 | print('[Logger] logging {} to {}'.format(labels, self.path)) 11 | if not os.path.exists(os.path.dirname(path)): 12 | os.makedirs(os.path.dirname(path)) 13 | with open(self.path, 'w') as csv_file: 14 | csv_file.write(','.join(labels) + '\n') 15 | 16 | def log(self, *values): 17 | assert len(values) == self.n_labels 18 | text = ','.join([str(v) for v in values]) 19 | if self.verbatim: 20 | print('[Logger] {}'.format(text)) 21 | with open(self.path, 'a') as csv_file: 22 | csv_file.write(text + '\n') 23 | -------------------------------------------------------------------------------- /sim2real-policies/setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup(name='sim2real_policies', 4 | version='0.0.1', 5 | packages= find_packages() 6 | ) -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eugval/sim2real_dynamics_simulation/2ed175803faa38792f6becc2dc91f44ae71ed9c2/sim2real-policies/sim2real_policies/__init__.py -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/after_training_tests/epi_utils.py: -------------------------------------------------------------------------------- 1 | from mujoco_py import MujocoException 2 | import numpy as np 3 | from robosuite_extra.utils import transform_utils as T 4 | 5 | 6 | def log_trajectory_point (env, obs, action,i, mujoco_elapsed, info, logger, data_grabber): 7 | 8 | if(env._name == 'SawyerReach'): 9 | base_pos_in_world = env.sim.data.get_body_xpos("base") 10 | base_rot_in_world = env.sim.data.get_body_xmat("base").reshape((3, 3)) 11 | base_pose_in_world = T.make_pose(base_pos_in_world, base_rot_in_world) 12 | world_pose_in_base = T.pose_inv(base_pose_in_world) 13 | base_rot_in_eef = env.init_right_hand_orn.T 14 | 15 | eef_pos_in_base, eef_vel_in_base, goal_pos_in_base = data_grabber(info, world_pose_in_base) 16 | action_in_base = base_rot_in_eef.dot(action) 17 | 18 | logger.log(i, mujoco_elapsed, 19 | action_in_base[0], action_in_base[1], action_in_base[2], 20 | eef_pos_in_base[0], eef_pos_in_base[1], eef_pos_in_base[2], 21 | eef_vel_in_base[0], eef_vel_in_base[1], eef_vel_in_base[2], 22 | goal_pos_in_base[0], goal_pos_in_base[1], goal_pos_in_base[2], 23 | obs[0], obs[1], obs[2], 24 | obs[3], obs[4], obs[5], 25 | ) 26 | 27 | elif(env._name == 'SawyerPush'): 28 | base_pos_in_world = env.sim.data.get_body_xpos("base") 29 | base_rot_in_world = env.sim.data.get_body_xmat("base").reshape((3, 3)) 30 | base_pose_in_world = T.make_pose(base_pos_in_world, base_rot_in_world) 31 | world_pose_in_base = T.pose_inv(base_pose_in_world) 32 | base_rot_in_eef = env.init_right_hand_orn.T 33 | 34 | 35 | goal_pos_in_base, eef_pos_in_base, eef_vel_in_base, \ 36 | object_pos_in_base, object_vel_in_base, z_angle, = data_grabber(info, world_pose_in_base) 37 | 38 | action_3d = np.concatenate([action, [0.0]]) 39 | action_3d_in_base = base_rot_in_eef.dot(action_3d) 40 | 41 | logger.log(i, mujoco_elapsed, 42 | action_3d_in_base[0], action_3d_in_base[1], 43 | goal_pos_in_base[0], goal_pos_in_base[1], goal_pos_in_base[2], 44 | eef_pos_in_base[0], eef_pos_in_base[1], eef_pos_in_base[2], 45 | eef_vel_in_base[0], eef_vel_in_base[1], eef_vel_in_base[2], 46 | object_pos_in_base[0], object_pos_in_base[1], object_pos_in_base[2], 47 | object_vel_in_base[0], object_vel_in_base[1], object_vel_in_base[2], 48 | z_angle[0], 49 | obs[0], obs[1], obs[2], 50 | obs[3], obs[4], obs[5], 51 | obs[6], obs[7], obs[8], 52 | obs[9], 53 | ) 54 | 55 | elif(env._name == ' SawyerSlide'): 56 | 57 | logger.log(i, mujoco_elapsed, 58 | action[0], action[1], 59 | obs[0], obs[1], obs[2], 60 | obs[3], obs[4], obs[5], obs[6], 61 | obs[7], obs[8], obs[9], 62 | obs[10], obs[11], 63 | ) 64 | 65 | 66 | 67 | def EPIpolicy_rollout(env, epi_policy,s, mujoco_start_time , logger= None,data_grabber= None, max_steps=30, params=None): 68 | """ 69 | Roll one trajectory with max_steps 70 | return: 71 | traj: shape of (max_steps, state_dim+action_dim+reward_dim) 72 | s_: next observation 73 | env.get_state(): next underlying state 74 | """ 75 | if params is not None: 76 | env.set_dynamics_parameters(params) 77 | # env.renderer_on() 78 | for epi_iter in range(10): # max iteration for getting a single rollout 79 | #s = env.reset() 80 | traj=[] 81 | for _ in range(max_steps): 82 | a = epi_policy.get_action(s) 83 | a = np.clip(a, -epi_policy.action_range, epi_policy.action_range) 84 | # env.render() 85 | try: 86 | mujoco_elapsed = env.sim.data.time - mujoco_start_time 87 | s_, r, done, info = env.step(a) 88 | except MujocoException: 89 | print('EPI Rollout: MujocoException') 90 | break 91 | 92 | if(logger is not None): 93 | log_trajectory_point(env, s_,a,epi_iter, mujoco_elapsed ,info,logger,data_grabber) 94 | 95 | s_a_r = np.concatenate((s,a, [r])) # state, action, reward 96 | traj.append(s_a_r) 97 | s=s_ 98 | if len(traj) == max_steps: 99 | break 100 | 101 | if len(traj)= horizon): 182 | break 183 | 184 | z_values = np.array(z_values) 185 | env.close() 186 | 187 | if(not os.path.exists(save_path)): 188 | os.mkdir(save_path) 189 | pickle.dump(z_values, open(os.path.join(save_path, 'z_values_array.pckl'), 'wb')) 190 | 191 | return z_values 192 | 193 | 194 | 195 | 196 | def plot_tsne(z_values,save_path): 197 | 198 | tsne_calculator = TSNE(n_iter = 3000, n_iter_without_progress= 600, perplexity =5.) 199 | 200 | fig = plt.figure() 201 | 202 | colors = plt.get_cmap('rainbow')(np.linspace(0.2, 1, 3)) 203 | 204 | for param_set_idx in range(z_values.shape[0]): 205 | zs = copy.deepcopy(z_values[param_set_idx,:,:]) 206 | zs_2d = tsne_calculator.fit_transform(zs) 207 | 208 | zs_2d_x = [zs_2d[i][0] for i in range(zs_2d.shape[0])] 209 | zs_2d_y = [zs_2d[i][1] for i in range(zs_2d.shape[0])] 210 | 211 | plt.scatter(zs_2d_x, zs_2d_y, c = [colors[param_set_idx]]*len(zs_2d_y), label='params set {}'.format(param_set_idx+1)) 212 | 213 | plt.legend() 214 | plt.axis('off') 215 | plt.savefig(save_path+'tsne_plot.pdf') 216 | plt.savefig(save_path+'tsne_plot.svg') 217 | 218 | plt.show() 219 | plt.close() 220 | 221 | 222 | 223 | 224 | if __name__ == "__main__": 225 | 226 | 227 | save_path = '../../../../data/reaching/epi_tsne_results/' 228 | 229 | save_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), save_path) 230 | #z_values = find_zvalues(save_path) 231 | # 232 | 233 | 234 | z_values = pickle.load(open(os.path.join(save_path, 'z_values_array.pckl'), 'rb')) 235 | plot_tsne(copy.deepcopy(z_values),save_path) 236 | 237 | 238 | -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/after_training_tests/tsne_for_epi_pushing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from robosuite_extra.env_base import make 3 | from robosuite_extra.wrappers import EEFXVelocityControl, GymWrapper, FlattenWrapper 4 | from sim2real_policies.final_policy_testing.network_loading import load, load_model 5 | import os 6 | from sim2real_policies.utils.choose_env import reach_force_noise_randomisation,reach_force_randomisation,reach_full_randomisation, push_full_randomisation 7 | from sim2real_policies.final_policy_testing.epi_utils import EPIpolicy_rollout 8 | import copy 9 | 10 | 11 | PUSH_GOALS = [np.array([3e-3,15.15e-2]),np.array([9.5e-2,11.01e-2]), np.array([-11.5e-2,17.2e-2])] 12 | 13 | import pickle 14 | from sklearn.manifold import TSNE 15 | import matplotlib.pyplot as plt 16 | 17 | def grab_data(info, world_pose_in_base): 18 | # Grab the data 19 | eef_pos_in_world = info['eef_pos_in_world'] 20 | eef_vel_in_world = info['eef_vel_in_world'] 21 | goal_pos_in_world = info['goal_pos_in_world'] 22 | 23 | eef_pos_in_world = np.concatenate([eef_pos_in_world, [1.0]]) 24 | goal_pos_in_world = np.concatenate([goal_pos_in_world, [1.0]]) 25 | 26 | eef_pos_in_base = world_pose_in_base.dot(eef_pos_in_world) 27 | eef_pos_in_base = eef_pos_in_base / eef_pos_in_base[3] 28 | eef_vel_in_base = world_pose_in_base[:3, :3].dot(eef_vel_in_world) 29 | 30 | goal_pos_in_base = world_pose_in_base.dot(goal_pos_in_world) 31 | goal_pos_in_base = goal_pos_in_base / goal_pos_in_base[3] 32 | 33 | 34 | return eef_pos_in_base, eef_vel_in_base, goal_pos_in_base 35 | 36 | 37 | def find_zvalues(save_path): 38 | render = False 39 | 40 | # Parameters 41 | horizon = 80 42 | total_repeats =50 43 | 44 | 45 | env_name = 'SawyerPush' 46 | 47 | # ### Prepare Environment ##### 48 | env = make( 49 | 'SawyerPush', 50 | gripper_type="PushingGripper", 51 | parameters_to_randomise=push_full_randomisation, 52 | randomise_initial_conditions=True, 53 | table_full_size=(0.8, 1.6, 0.719), 54 | table_friction=(0.001, 5e-3, 1e-4), 55 | use_camera_obs=False, 56 | use_object_obs=True, 57 | reward_shaping=True, 58 | placement_initializer=None, 59 | gripper_visualization=True, 60 | use_indicator_object=False, 61 | has_renderer=render, 62 | has_offscreen_renderer=False, 63 | render_collision_mesh=False, 64 | render_visual_mesh=True, 65 | control_freq=10., 66 | horizon=200, 67 | ignore_done=False, 68 | camera_name="frontview", 69 | camera_height=256, 70 | camera_width=256, 71 | camera_depth=False, 72 | pid=True, 73 | ) 74 | 75 | env = FlattenWrapper(GymWrapper(EEFXVelocityControl(env, dof=2,max_action=0.1, ),),keys='task-state',add_info=True) 76 | env._name = env_name 77 | env.reset() 78 | z_values = [] 79 | 80 | for param_iteration in range(3): 81 | state_dim = 10 82 | action_dim = 2 83 | z_values.append([]) 84 | 85 | ### setting up the env with different but fixed parameters### 86 | if(param_iteration == 1): 87 | parameter_ranges = env.parameter_sampling_ranges 88 | max_parameters = dict([(k,v[-1]*env.factors_for_param_randomisation[k]) for k,v in parameter_ranges.items()]) 89 | env.set_dynamics_parameters(max_parameters) 90 | elif(param_iteration == 2): 91 | parameter_ranges = env.parameter_sampling_ranges 92 | min_parameters = dict([(k,v[0]*env.factors_for_param_randomisation[k]) for k, v in parameter_ranges.items()]) 93 | env.set_dynamics_parameters(min_parameters) 94 | 95 | env.reset() 96 | 97 | if (render): 98 | env.viewer.set_camera(camera_id=0) 99 | env.render() 100 | 101 | ############### 102 | 103 | ################# SETTING UP THE POLICY ################# 104 | method = 'EPI' 105 | alg_name = 'epi_td3' 106 | embed_dim = 10 107 | traj_l = 10 108 | NO_RESET = True 109 | embed_input_dim = traj_l*(state_dim+action_dim) 110 | ori_state_dim = state_dim 111 | state_dim += embed_dim 112 | 113 | # choose which randomisation is applied 114 | number_random_params = 23 115 | folder_path = '../../../../sawyer/src/sim2real_dynamics_sawyer/assets/rl/'+method +'/' + alg_name + '/model/' 116 | path = folder_path + env_name + str( 117 | number_random_params) + '_' + alg_name 118 | 119 | embed_model = load_model(model_name='embedding', path=path, input_dim = embed_input_dim, output_dim = embed_dim ) 120 | embed_model.cuda() 121 | epi_policy_path = folder_path + env_name + str(number_random_params) + '_' + 'epi_ppo_epi_policy' 122 | epi_policy = load(path=epi_policy_path, alg='ppo', state_dim=ori_state_dim, action_dim=action_dim ) 123 | 124 | policy = load(path=path, alg=alg_name, state_dim=state_dim, 125 | action_dim=action_dim) 126 | ######################################################### 127 | 128 | for repeat in range(total_repeats): 129 | #Reset environment 130 | obs = env.reset() 131 | i=0 132 | 133 | mujoco_start_time = env.sim.data.time 134 | if NO_RESET: 135 | i = traj_l - 1 136 | traj, [last_obs, last_state] = EPIpolicy_rollout(env, epi_policy, obs, 137 | mujoco_start_time=mujoco_start_time, 138 | logger=None, data_grabber=None, 139 | max_steps=traj_l, 140 | params=None) # only one traj; pass in params to ensure it's not changed 141 | state_action_in_traj = np.array(traj)[:, :-1] # remove the rewards 142 | embedding = embed_model(state_action_in_traj.reshape(-1)) 143 | embedding = embedding.detach().cpu().numpy() 144 | 145 | obs = last_obs # last observation 146 | env.set_state(last_state) # last underlying state 147 | else: 148 | 149 | traj, [last_obs, last_state] = EPIpolicy_rollout(env, epi_policy, obs, 150 | mujoco_start_time=mujoco_start_time, 151 | logger=None, data_grabber=None, 152 | max_steps=traj_l, 153 | params=None) # only one traj; pass in params to ensure it's not changed 154 | state_action_in_traj = np.array(traj)[:, :-1] # remove the rewards 155 | embedding = embed_model(state_action_in_traj.reshape(-1)) 156 | embedding = embedding.detach().cpu().numpy() 157 | 158 | 159 | params = env.get_dynamics_parameters() 160 | env.randomisation_off() 161 | env.set_dynamics_parameters(params) # same as the rollout env 162 | obs = env.reset() 163 | env.randomisation_on() 164 | 165 | z_values[param_iteration].append(embedding) 166 | 167 | # z is embedding of initial trajectory for each episode, so no need to run task-policy rollout below 168 | 169 | while (True): 170 | 171 | ############# CHOOSING THE ACTION ############## 172 | obs = np.concatenate((obs, embedding)) 173 | 174 | action = policy.get_action(obs) 175 | 176 | ################################################ 177 | 178 | next_obs, reward, done, info = env.step(action) 179 | 180 | obs = next_obs 181 | 182 | if(render): 183 | env.render() 184 | 185 | i += 1 186 | if (i >= horizon): 187 | break 188 | 189 | z_values = np.array(z_values) 190 | env.close() 191 | 192 | if(not os.path.exists(save_path)): 193 | os.mkdir(save_path) 194 | pickle.dump(z_values, open(os.path.join(save_path, 'z_values_array.pckl'), 'wb')) 195 | 196 | return z_values 197 | 198 | 199 | 200 | 201 | def plot_tsne(z_values,save_path): 202 | 203 | tsne_calculator = TSNE(n_iter = 3000, n_iter_without_progress= 600, perplexity =5.) 204 | 205 | fig = plt.figure() 206 | 207 | colors = plt.get_cmap('rainbow')(np.linspace(0.2, 1, 3)) 208 | 209 | for param_set_idx in range(z_values.shape[0]): 210 | zs = copy.deepcopy(z_values[param_set_idx,:,:]) 211 | zs_2d = tsne_calculator.fit_transform(zs) 212 | 213 | zs_2d_x = [zs_2d[i][0] for i in range(zs_2d.shape[0])] 214 | zs_2d_y = [zs_2d[i][1] for i in range(zs_2d.shape[0])] 215 | 216 | plt.scatter(zs_2d_x, zs_2d_y, c = [colors[param_set_idx]]*len(zs_2d_y), label='params set {}'.format(param_set_idx+1)) 217 | 218 | plt.legend() 219 | plt.axis('off') 220 | plt.savefig(save_path+'tsne_plot.pdf') 221 | plt.savefig(save_path+'tsne_plot.svg') 222 | 223 | plt.show() 224 | plt.close() 225 | 226 | 227 | 228 | 229 | if __name__ == "__main__": 230 | 231 | 232 | save_path = '../../../../data/pushing/epi_tsne_results/' 233 | 234 | save_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), save_path) 235 | z_values = find_zvalues(save_path) 236 | # 237 | 238 | 239 | z_values = pickle.load(open(os.path.join(save_path, 'z_values_array.pckl'), 'rb')) 240 | plot_tsne(copy.deepcopy(z_values),save_path) 241 | 242 | 243 | -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/lstm_td3/plot.py: -------------------------------------------------------------------------------- 1 | # high-dimensional reacher 2 | import numpy as np 3 | import matplotlib.pyplot as plt 4 | 5 | def smooth(y, radius=2, mode='two_sided'): 6 | if len(y) < 2*radius+1: 7 | return np.ones_like(y) * y.mean() 8 | elif mode == 'two_sided': 9 | convkernel = np.ones(2 * radius+1) 10 | return np.convolve(y, convkernel, mode='same') / \ 11 | np.convolve(np.ones_like(y), convkernel, mode='same') 12 | elif mode == 'causal': 13 | convkernel = np.ones(radius) 14 | out = np.convolve(y, convkernel,mode='full') / \ 15 | np.convolve(np.ones_like(y), convkernel, mode='full') 16 | return out[:-radius+1] 17 | 18 | def moving_sum(y, window=2): 19 | c = y.cumsum() 20 | c[window:] = c[window:] - c[:-window] 21 | return c/float(window) 22 | 23 | 24 | 25 | def plot(x, data, color, label): 26 | y_m=np.mean(data, axis=0) 27 | y_std=np.std(data, axis=0) 28 | y_upper=y_m+y_std 29 | y_lower=y_m-y_std 30 | plt.fill_between( 31 | x, list(y_lower), list(y_upper), interpolate=True, facecolor=color, linewidth=0.0, alpha=0.3 32 | ) 33 | plt.plot(x, list(y_m), color=color, label=label) 34 | 35 | 36 | file_pre = './' 37 | y=np.load(file_pre+'eval_rewards.npy') 38 | s=np.load(file_pre+'eval_success.npy') 39 | 40 | plt.figure(figsize=(8,6)) 41 | fig, axs = plt.subplots(2) 42 | x=np.arange(len(y)) 43 | axs[0].plot(x, smooth(y), label = 'SawyerPush', color='b') 44 | 45 | axs[0].set_ylabel('Reward') 46 | # plt.ylim(0) 47 | axs[0].legend( loc=2) 48 | axs[0].grid() 49 | axs[1].set_xlabel('Episodes') 50 | axs[1].set_ylabel('Average Success Rate') 51 | axs[1].plot(x, moving_sum(s), label = 'SawyerPush', color='b') 52 | axs[1].grid() 53 | 54 | 55 | plt.savefig('reward.pdf') 56 | plt.show() 57 | -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/ppo/default_params.py: -------------------------------------------------------------------------------- 1 | def get_hyperparams(): 2 | hyperparams_dict={ 3 | 'alg_name': 'ppo', 4 | 'action_range': 1., # (-action_range, action_range) 5 | 'batch_size': 128, # update batchsize 6 | 'gamma': 0.9, # reward discount 7 | 'random_seed': 2, # random seed 8 | 'actor_update_steps': 10, # actor update iterations 9 | 'critic_update_steps': 10, # critic update iterations 10 | 'eps': 1e-8, # numerical residual 11 | 'actor_lr': 0.0001, # learning rate for actor 12 | 'critic_lr': 0.0002, # learning rate for critic 13 | 'method': [ 14 | dict(name='kl_pen', kl_target=0.01, lam=0.5), # KL penalty 15 | dict(name='clip', epsilon=0.2), # Clipped surrogate objective, find this is better 16 | ][1] # choose the method for optimization 17 | } 18 | print('Hyperparameters: ', hyperparams_dict) 19 | return hyperparams_dict -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/sys_id/common/nets.py: -------------------------------------------------------------------------------- 1 | """ 2 | commonly used networks for system identification functions 3 | """ 4 | import numpy as np 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | import os,sys,inspect 9 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 10 | parentdir = os.path.dirname(currentdir) 11 | sys.path.insert(0,parentdir) # add parent path 12 | 13 | 14 | 15 | class BaseNetwork(nn.Module): 16 | """ Base network class """ 17 | def __init__(self, input_dim, output_dim, hidden_dim): 18 | super(BaseNetwork, self).__init__() 19 | self._input_dim = input_dim 20 | self._output_dim = output_dim 21 | self._hidden_dim = hidden_dim 22 | 23 | def forward(self,): 24 | pass 25 | 26 | def save(self, path): 27 | torch.save(self.state_dict(), path) 28 | 29 | def load(self, path): 30 | self.load_state_dict(torch.load(path)) 31 | self.eval().cuda() 32 | 33 | class ProjectionNetwork(BaseNetwork): 34 | def __init__(self, input_dim, output_dim, hidden_dim=64, activation = F.relu, output_activation = F.tanh): 35 | """ 36 | Compress the dynamics parameters to low-dimensional 37 | https://arxiv.org/abs/1903.01390 38 | """ 39 | super().__init__(input_dim, output_dim, hidden_dim) 40 | self.linear1 = nn.Linear(input_dim, hidden_dim) 41 | self.linear2 = nn.Linear(hidden_dim, int(hidden_dim/2)) 42 | self.linear3 = nn.Linear(int(hidden_dim/2), int(hidden_dim/4)) 43 | self.linear4 = nn.Linear(int(hidden_dim/4), output_dim) 44 | self.activation = activation 45 | self.output_activation = output_activation 46 | 47 | def forward(self, input): 48 | if isinstance(input, np.ndarray) and len(input.shape) < 2: 49 | input = torch.FloatTensor(np.expand_dims(input, 0)).cuda() 50 | x = self.activation(self.linear1(input)) 51 | x = self.activation(self.linear2(x)) 52 | x = self.activation(self.linear3(x)) 53 | x = self.output_activation(self.linear4(x)) 54 | return x 55 | 56 | def get_context(self, input): 57 | """ 58 | for inference only, no gradient descent 59 | """ 60 | context = self.forward(input) 61 | return context.squeeze(0).detach().cpu().numpy() 62 | 63 | 64 | class PredictionNetwork(BaseNetwork): 65 | """ 66 | prediction model to predict next state given current state and action 67 | (plus the embedding of dynamics if conditioned on EPI) 68 | """ 69 | def __init__(self, input_dim, output_dim, hidden_dim): 70 | super().__init__(input_dim, output_dim, hidden_dim) 71 | self.linear1 = nn.Linear(input_dim, hidden_dim) 72 | self.linear2 = nn.Linear(hidden_dim, hidden_dim) 73 | self.linear3 = nn.Linear(hidden_dim, hidden_dim) 74 | self.linear4 = nn.Linear(hidden_dim, output_dim) 75 | 76 | def forward(self, input, activation=F.relu, output_activation=F.tanh): 77 | if isinstance(input, np.ndarray) and len(input.shape) < 2: 78 | input = torch.FloatTensor(np.expand_dims(input, 0)).cuda() 79 | x = activation(self.linear1(input)) 80 | x = activation(self.linear2(x)) 81 | x = activation(self.linear3(x)) 82 | x = output_activation(self.linear4(x)) 83 | return x.squeeze(0) 84 | 85 | class EmbedNetwork(BaseNetwork): 86 | """ 87 | Embedding network for compress trajectories from EPI policy to low-dimensional embeddings 88 | """ 89 | def __init__(self, input_dim, output_dim, hidden_dim): 90 | super().__init__(input_dim, output_dim, hidden_dim) 91 | self.linear1 = nn.Linear(input_dim, hidden_dim) 92 | self.linear2 = nn.Linear(hidden_dim, hidden_dim) 93 | self.linear3 = nn.Linear(hidden_dim, output_dim) 94 | 95 | def forward(self, input, activation=F.relu, output_activation=F.tanh): 96 | if isinstance(input, np.ndarray) and len(input.shape) < 2: 97 | input = torch.FloatTensor(np.expand_dims(input, 0)).cuda() 98 | x = activation(self.linear1(input)) 99 | x = activation(self.linear2(x)) 100 | x = output_activation(self.linear3(x)) 101 | return x.squeeze(0) -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/sys_id/common/operations.py: -------------------------------------------------------------------------------- 1 | """ 2 | """ 3 | import numpy as np 4 | import torch 5 | import os,sys,inspect 6 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 7 | parentdir = os.path.dirname(currentdir) 8 | sys.path.insert(0,parentdir) # add parent path 9 | import matplotlib.pyplot as plt 10 | 11 | 12 | def size_splits(tensor, split_sizes, dim=0): 13 | """Splits the tensor according to chunks of split_sizes. 14 | 15 | Arguments: 16 | tensor (Tensor): tensor to split. 17 | split_sizes (list(int)): sizes of chunks 18 | dim (int): dimension along which to split the tensor. 19 | """ 20 | if dim < 0: 21 | dim += tensor.dim() 22 | 23 | dim_size = tensor.size(dim) 24 | if dim_size != torch.sum(torch.Tensor(split_sizes)): 25 | raise KeyError("Sum of split sizes exceeds tensor dim") 26 | 27 | splits = torch.cumsum(torch.Tensor([0] + split_sizes), dim=0)[:-1] 28 | 29 | return tuple(tensor.narrow(int(dim), int(start), int(length)) 30 | for start, length in zip(splits, split_sizes)) 31 | 32 | def size_splits_in_two(tensor, first_size, dim=0): 33 | """Splits a tensor of size=first_size from the original tensor 34 | 35 | Arguments: 36 | tensor (Tensor): tensor to split. 37 | first_size (int): size of the first splitted tensor 38 | dim (int): dimension along which to split the tensor. 39 | """ 40 | return size_splits(tensor, [first_size], dim) 41 | 42 | 43 | def stack_data(traj, length): 44 | traj = np.array(traj) 45 | return traj[-length:, :].reshape(-1) 46 | 47 | 48 | def plot(x, name='figure', path='./'): 49 | plt.figure(figsize=(20, 5)) 50 | plt.plot(x) 51 | plt.savefig(path+name+'.png') 52 | # plt.show() 53 | plt.clf() 54 | 55 | 56 | def plot_train_test(train_y, test_y, name='figure', path='./'): 57 | """ plot both the training and testing curves during training process """ 58 | plt.figure(figsize=(20, 5)) 59 | plt.plot(train_y, label='train') 60 | plt.plot(test_y, label='test') 61 | plt.legend() 62 | plt.grid() 63 | plt.savefig(path+name+'.png') 64 | # plt.show() 65 | plt.clf() 66 | 67 | 68 | if __name__ == '__main__': 69 | env, _, _, _ = choose_env('SawyerReach') 70 | query_key_params(env) -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/sys_id/common/utils.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import torch 4 | import torch.nn as nn 5 | import os,sys,inspect 6 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 7 | parentdir = os.path.dirname(currentdir) 8 | sys.path.insert(0,parentdir) # add parent path 9 | import torch.optim as optim 10 | import matplotlib.pyplot as plt 11 | from sim2real_policies.utils.choose_env import choose_env 12 | from sim2real_policies.utils.policy_networks import DPG_PolicyNetwork, RandomPolicy 13 | 14 | from mujoco_py import MujocoException 15 | 16 | def query_params(env, normalize=True, randomised_only=False, dynamics_only=False): 17 | """ 18 | Query the dynamics parameters from the env, 19 | normalize it if normalize is True, 20 | only return randomised parameters if randomised_only is True, 21 | only return dynamics parameters (no noise parameters) if dynamics_only is True. 22 | Return: array of parameters 23 | """ 24 | params_dict = env.get_dynamics_parameters() 25 | randomised_params_keys = env.get_randomised_parameters() 26 | params_ranges = env.get_parameter_sampling_ranges() # range 27 | params_factors = env.get_factors_for_randomisation() # multiplied factor 28 | params_value_list = [] 29 | for param_key, param_value in params_dict.items(): 30 | if randomised_only and (param_key in randomised_params_keys) is False: 31 | continue 32 | if dynamics_only and ("time" in param_key or "noise" in param_key): 33 | # print('Non-dynamics parameters: ', param_key) 34 | continue 35 | else: 36 | if normalize: 37 | if param_key in params_factors.keys(): 38 | param_factor = params_factors[param_key] 39 | else: 40 | raise NotImplementedError 41 | 42 | if param_key in params_ranges.keys(): 43 | param_range = params_ranges[param_key] 44 | else: 45 | raise NotImplementedError 46 | 47 | scale = param_range[1]-param_range[0] 48 | param_factor = param_factor + 1e-15 # for factor=0. 49 | param_value = np.clip((param_value/(param_factor) - param_range[0])/(scale), 0., 1.) 50 | 51 | if isinstance(param_value, np.ndarray): 52 | params_value_list = list(np.concatenate((params_value_list, param_value))) 53 | else: # scalar 54 | params_value_list.append(param_value) 55 | return np.array(params_value_list) 56 | 57 | def _flatten_obs(obs_dict, verbose=False): # gym observation wrapper 58 | """ 59 | Filters keys of interest out and concatenate the information. 60 | 61 | Args: 62 | obs_dict: ordered dictionary of observations 63 | """ 64 | keys = ["robot-state", "task-state", "target_pos",] 65 | ob_lst = [] 66 | for key in obs_dict: 67 | if key in keys: # hacked 68 | if verbose: 69 | print("adding key: {}".format(key)) 70 | ob_lst.append(obs_dict[key]) 71 | return np.concatenate(ob_lst) 72 | 73 | def offline_history_collection(env_name, itr=30, policy=None, \ 74 | vectorize=True, discrete=False, vine=False, vine_sample_size=500, egreedy=0): 75 | """ 76 | Collect random simulation parameters and trajetories with given policy. 77 | ---------------------------------------------------------------- 78 | params: 79 | env_name: name of env to collect data from 80 | itr: data episodes 81 | policy: policy used for collecting data 82 | vectorize: vectorized parameters into a list rather than a dictionary, used for system identification 83 | discrete: discrete randomisation range, as in EPI paper 84 | vine: Vine data collection, same state and same action at the initial of trajectory, as in EPI paper 85 | vine_sample_size: number of state action samples in vine trajectory set 86 | egreedy: the factor for collecting data with epsilon-greedy policy 87 | """ 88 | env, environment_params, environment_wrappers, environment_wrapper_arguments = choose_env(env_name) 89 | action_space = env.action_space 90 | state_space = env.observation_space 91 | if policy is None: # load off-line policy if no policy 92 | policy=DPG_PolicyNetwork(state_space, action_space, 512).cuda() 93 | # load from somewhere 94 | 95 | history_sa=[] 96 | history_s_=[] 97 | params_list=[] 98 | if vine: 99 | vine_state_set = [] # underlying state of env, not the observation 100 | vine_action_set = [] # initial action after initial state 101 | vine_idx = 0 102 | # collect state action sets according to EPI's vine implementation 103 | while vine_idx\n" 65 | ] 66 | }, 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "array([ 0.2, 1. ])" 71 | ] 72 | }, 73 | "execution_count": 11, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "import numpy as np\n", 80 | "import tensorflow as tf\n", 81 | "a=[0.2, 1.]\n", 82 | "a=np.array(a).astype('float64').reshape(-1)\n", 83 | "print(type(np.array(a).astype('float64').reshape(-1)[0]))\n", 84 | "np.unique(a)\n" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 25, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "name": "stdout", 94 | "output_type": "stream", 95 | "text": [ 96 | "[[[2 0]\n", 97 | " [4 3]]\n", 98 | "\n", 99 | " [[4 2]\n", 100 | " [2 3]]]\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "a = np.random.randint(5,size=(2,2,2))\n", 106 | "print(a)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 26, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "name": "stdout", 116 | "output_type": "stream", 117 | "text": [ 118 | "[[2 0]\n", 119 | " [4 3]]\n", 120 | "[ 3. 1.5]\n", 121 | "[[4 2]\n", 122 | " [2 3]]\n", 123 | "[ 3. 2.5]\n" 124 | ] 125 | } 126 | ], 127 | "source": [ 128 | "mu=[]\n", 129 | "for env in a:\n", 130 | " print(env)\n", 131 | " print(np.squeeze(np.mean(env, axis=0)))\n", 132 | " mu.append(np.squeeze(np.mean(env, axis=0)))" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 27, 138 | "metadata": {}, 139 | "outputs": [ 140 | { 141 | "name": "stdout", 142 | "output_type": "stream", 143 | "text": [ 144 | "[[2 0]\n", 145 | " [4 3]]\n", 146 | "[ 0.9 1.4]\n", 147 | "[[4 2]\n", 148 | " [2 3]]\n", 149 | "[ 0.9 0.4]\n" 150 | ] 151 | } 152 | ], 153 | "source": [ 154 | "sigma=[]\n", 155 | "for env in a:\n", 156 | " print(env)\n", 157 | " print(np.std(env, axis=0)-0.1)\n", 158 | " sigma.append(np.std(env, axis=0)-0.1)" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 28, 164 | "metadata": {}, 165 | "outputs": [ 166 | { 167 | "name": "stdout", 168 | "output_type": "stream", 169 | "text": [ 170 | "[array([ 3. , 1.5]), array([ 3. , 2.5])]\n", 171 | "[[ 3. 1.5]\n", 172 | " [ 3. 2.5]]\n" 173 | ] 174 | } 175 | ], 176 | "source": [ 177 | "print(mu)\n", 178 | "mu = np.stack(mu)\n", 179 | "print(mu)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": 32, 185 | "metadata": {}, 186 | "outputs": [ 187 | { 188 | "name": "stdout", 189 | "output_type": "stream", 190 | "text": [ 191 | "[ 11.25 15.25]\n" 192 | ] 193 | }, 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "array([[ 11.25],\n", 198 | " [ 15.25]])" 199 | ] 200 | }, 201 | "execution_count": 32, 202 | "metadata": {}, 203 | "output_type": "execute_result" 204 | } 205 | ], 206 | "source": [ 207 | "r=np.sum(mu*mu, axis=1) # element-wise product\n", 208 | "print(r)\n", 209 | "r=r.reshape(-1,1)\n", 210 | "r" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 37, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "name": "stdout", 220 | "output_type": "stream", 221 | "text": [ 222 | "[[ 11.25]\n", 223 | " [ 15.25]]\n", 224 | "[[-22.5 -25.5]\n", 225 | " [-25.5 -30.5]]\n", 226 | "[[ 11.25 15.25]]\n", 227 | "[[ 0. 1.]\n", 228 | " [ 1. 0.]]\n", 229 | "[[ 0. 0.5]\n", 230 | " [ 0.5 0. ]]\n" 231 | ] 232 | } 233 | ], 234 | "source": [ 235 | "D=(r-2*np.matmul(mu, mu.T)+r.T)\n", 236 | "print(r)\n", 237 | "print(-2*np.matmul(mu, mu.T))\n", 238 | "print(r.T)\n", 239 | "print(D)\n", 240 | "D=D/len(D)\n", 241 | "print(D)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 38, 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/plain": [ 252 | "array([[ 1. , 0.70710678],\n", 253 | " [ 0.70710678, 1. ]])" 254 | ] 255 | }, 256 | "execution_count": 38, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "np.sqrt(D+np.eye(len(D)))" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [] 271 | } 272 | ], 273 | "metadata": { 274 | "kernelspec": { 275 | "display_name": "Python 3", 276 | "language": "python", 277 | "name": "python3" 278 | }, 279 | "language_info": { 280 | "codemirror_mode": { 281 | "name": "ipython", 282 | "version": 2 283 | }, 284 | "file_extension": ".py", 285 | "mimetype": "text/x-python", 286 | "name": "python", 287 | "nbconvert_exporter": "python", 288 | "pygments_lexer": "ipython2", 289 | "version": "2.7.15" 290 | } 291 | }, 292 | "nbformat": 4, 293 | "nbformat_minor": 2 294 | } 295 | -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/sys_id/universal_policy_online_system_identification/evaluate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from robosuite_extra.env_base import make 3 | import time 4 | import torch 5 | from sim2real_policies.sys_id.common.utils import query_params 6 | 7 | def testing_envionments_generator_random(env, number_of_envs): 8 | rng = np.random.RandomState() 9 | rng.seed(3) 10 | 11 | def log_uniform(low=1e-10, high=1., size=None): 12 | return np.exp(rng.uniform(np.log(low), np.log(high), size)) 13 | 14 | def ranged_random_choice(low, high, size=1): 15 | vals = np.arange(low,high+1) 16 | return rng.choice(vals, size) 17 | 18 | def select_appropriate_distribution(key): 19 | if ( 20 | key == 'joint_forces' 21 | or key == 'acceleration_forces' 22 | or key == 'eef_forces' 23 | or key == 'obj_forces' 24 | 25 | or key == 'timestep_parameter' 26 | or key == 'pid_iteration_time' 27 | or key == 'mujoco_timestep' 28 | 29 | or key == 'action_additive_noise' 30 | or key == 'action_multiplicative_noise' 31 | or key == 'action_systematic_noise' 32 | 33 | or key == 'eef_obs_position_noise' 34 | or key == 'eef_obs_velocity_noise' 35 | or key == 'obj_obs_position_noise' 36 | or key == 'obj_obs_velocity_noise' 37 | or key == 'obj_angle_noise' 38 | 39 | or key == 'link_masses' 40 | 41 | or key == 'obj_density' 42 | or key == 'obj_sliding_friction' 43 | ): 44 | return rng.uniform 45 | elif ( 46 | key == 'eef_timedelay' 47 | or key == 'obj_timedelay' 48 | ): 49 | return ranged_random_choice 50 | else: 51 | return log_uniform 52 | 53 | factors_for_randomisation = env.get_factors_for_randomisation() 54 | parameter_ranges = env.get_parameter_sampling_ranges() 55 | params_list = [] 56 | for _ in range(0, number_of_envs): 57 | params = {} 58 | 59 | randomised_params = env.get_randomised_parameters() 60 | for param_key in randomised_params : 61 | parameter_range = parameter_ranges[param_key] 62 | 63 | if (parameter_range.shape[0] == 1): 64 | params[param_key]= np.asarray(parameter_range[0]) 65 | elif (parameter_range.shape[0] == 2): 66 | distribution = select_appropriate_distribution(param_key) 67 | size = env.default_dynamics_parameters[param_key].shape 68 | params[param_key]=np.asarray( 69 | factors_for_randomisation[param_key] * distribution(*parameter_ranges[param_key], size=size)) 70 | else: 71 | raise RuntimeError('Parameter radomisation range needs to be of shape {1,2}xN') 72 | params_list.append(params) 73 | return params_list 74 | 75 | 76 | def evaluate(env, policy, up=False, eval_eipsodes=5, Projection=False, proj_net=None, num_envs=5): 77 | ''' 78 | Evaluate the policy during training time with fixed dynamics 79 | :param: eval_episodes: evaluating episodes for each env 80 | :param: up (bool): universal policy conditioned on dynamics parameters 81 | ''' 82 | number_of_episodes = eval_eipsodes 83 | return_per_test_environment = [] 84 | success_per_test_environment = [] 85 | # initial_dynamics_params = env.get_dynamics_parameters() 86 | dynamics_params = testing_envionments_generator_random(env, num_envs) # before randomisation off 87 | ### SUGGESTED CHANGE #### 88 | initial_dynamics_params, _ = env.randomisation_off() 89 | ################### 90 | 91 | # do visualization 92 | # print('Starting testing') 93 | for params in dynamics_params: 94 | mean_return = 0.0 95 | success = 0 96 | 97 | # print('with parameters {} ...'.format(params)) 98 | for i in range(number_of_episodes): 99 | #### SUGGESTED CHANGE #### 100 | # First set the parameters and then reset for them to take effect. 101 | env.set_dynamics_parameters(params) 102 | obs = env.reset() 103 | p = env.get_dynamics_parameters() 104 | ####### 105 | # obs = env.reset() 106 | # env.set_dynamics_parameters(params) # set parameters after reset 107 | 108 | if up: 109 | params_list = query_params(env, randomised_only=True) 110 | if Projection and proj_net is not None: 111 | params_list = proj_net.get_context(params_list) 112 | obs = np.concatenate((params_list, obs)) 113 | 114 | episode_return = 0.0 115 | done = False 116 | while not done: 117 | action = policy.get_action(obs) 118 | obs, reward, done, info = env.step(action) 119 | episode_return += reward 120 | 121 | if up: 122 | obs = np.concatenate((params_list, obs)) 123 | 124 | if (done): 125 | mean_return += episode_return 126 | success += int(info['success']) 127 | break 128 | 129 | mean_return /= number_of_episodes 130 | success_rate = float(success) / float(number_of_episodes) 131 | # print('the mean return is {}'.format(mean_return)) 132 | return_per_test_environment.append(mean_return) 133 | success_per_test_environment.append(success_rate) 134 | 135 | #### SUGGESTED CHANGE #### 136 | # Only need to call randomisation_on, this will restore the previous parameters and the 137 | # randoisation too. Note that this will only take effect in the next reset, where parameters 138 | # are going to be randomised again 139 | env.randomisation_on() 140 | ##### 141 | 142 | # env.set_dynamics_parameters(initial_dynamics_params) # for non-randomisation cases, need to set original dynamics parameters 143 | print('total_average_return_over_test_environments : {}'.format(np.mean(return_per_test_environment))) 144 | return np.mean(return_per_test_environment), np.mean(success_per_test_environment) 145 | 146 | def evaluate_epi(env, epi_policy, embed_net, task_policy, traj_length, eval_eipsodes=5, num_envs=5): 147 | """ 148 | Apart from evaluate(), epi policy conditions on the embedding of trajectory 149 | :param: epi_policy: the policy for rollout a trajectory 150 | :param: embed_net: embedding network of the trajectory 151 | :param: task_policy: the policy for evaluation, conditioned on the embedding 152 | :param: traj_length: length of trajectory 153 | """ 154 | number_of_episodes = eval_eipsodes 155 | return_per_test_environment = [] 156 | success_per_test_environment = [] 157 | dynamics_params = testing_envionments_generator_random(env, num_envs) 158 | # initial_dynamics_params = env.get_dynamics_parameters() 159 | initial_dynamics_params, _ = env.randomisation_off() 160 | 161 | def policy_rollout(env, policy, max_steps=30, params=None): 162 | """ 163 | Roll one trajectory with max_steps 164 | return: traj, shape of (max_steps, state_dim+action_dim+reward_dim) 165 | """ 166 | # s = env.reset() 167 | # if params is not None: 168 | # env.set_dynamics_parameters(params) # make sure .reset() not change env params 169 | 170 | if params is not None: 171 | env.set_dynamics_parameters(params) # make sure .reset() not change env params 172 | s = env.reset() 173 | 174 | traj=[] 175 | for _ in range(max_steps): 176 | a = policy.choose_action(s) 177 | s_, r, done, _ = env.step(a) 178 | s_a_r = np.concatenate((s,a, [r])) # state, action, reward 179 | traj.append(s_a_r) 180 | s=s_ 181 | 182 | return traj 183 | 184 | # do visualization 185 | # print('Starting testing') 186 | for params in dynamics_params: 187 | mean_return = 0.0 188 | success = 0 189 | 190 | # print('with parameters {} ...'.format(params)) 191 | for i in range(number_of_episodes): 192 | traj = policy_rollout(env, epi_policy, max_steps = traj_length, params=params) # only one traj 193 | state_action_in_traj = np.array(traj)[:, :-1] # remove the rewards 194 | embedding = embed_net(state_action_in_traj.reshape(-1)) 195 | embedding = embedding.detach().cpu().numpy() 196 | # s = env.reset() 197 | # env.set_dynamics_parameters(params) # set parameters after reset 198 | env.set_dynamics_parameters(params) 199 | s = env.reset() 200 | 201 | ep_r = 0.0 202 | done = False 203 | while not done: 204 | s=np.concatenate((s, embedding)) 205 | a = task_policy.get_action(s) 206 | s_, r, done, info = env.step(a) 207 | s = s_ 208 | ep_r += r 209 | 210 | if (done): 211 | mean_return += ep_r 212 | success += int(info['success']) 213 | break 214 | 215 | mean_return /= number_of_episodes 216 | success_rate = float(success) / float(number_of_episodes) 217 | # print('the mean return is {}'.format(mean_return)) 218 | return_per_test_environment.append(mean_return) 219 | success_per_test_environment.append(success_rate) 220 | 221 | env.randomisation_on() 222 | 223 | # env.set_dynamics_parameters(initial_dynamics_params) # set original dynamics parameters 224 | print('total_average_return_over_test_environments : {}'.format(np.mean(return_per_test_environment))) 225 | return np.mean(return_per_test_environment), np.mean(success_per_test_environment) 226 | 227 | 228 | 229 | def evaluate_lstm(env, policy, hidden_dim, eval_eipsodes=5, num_envs=5): 230 | """ 231 | Evaluate the policy during training time with fixed dynamics 232 | :param: eval_episodes: evaluating episodes for each env 233 | """ 234 | number_of_episodes = eval_eipsodes 235 | return_per_test_environment = [] 236 | success_per_test_environment = [] 237 | dynamics_params = testing_envionments_generator_random(env, num_envs) 238 | # initial_dynamics_params = env.get_dynamics_parameters() 239 | initial_dynamics_params, _ = env.randomisation_off() 240 | 241 | 242 | # do visualization 243 | # print('Starting testing') 244 | for params in dynamics_params: 245 | mean_return = 0.0 246 | success = 0 247 | 248 | # print('with parameters {} ...'.format(params)) 249 | for i in range(number_of_episodes): 250 | env.set_dynamics_parameters(params) 251 | obs = env.reset() 252 | 253 | # obs = env.reset() 254 | # env.set_dynamics_parameters(params) 255 | 256 | episode_return = 0.0 257 | done = False 258 | hidden_out = (torch.zeros([1, 1, hidden_dim], dtype=torch.float).cuda(), \ 259 | torch.zeros([1, 1, hidden_dim], dtype=torch.float).cuda()) # initialize hidden state for lstm, (hidden, cell), each is (layer, batch, dim) 260 | last_action = env.action_space.sample() 261 | while not done: 262 | hidden_in = hidden_out 263 | action, hidden_out= policy.get_action(obs, last_action, hidden_in) 264 | obs, reward, done, info = env.step(action) 265 | episode_return += reward 266 | last_action = action 267 | if (done): 268 | mean_return += episode_return 269 | success += int(info['success']) 270 | break 271 | 272 | mean_return /= number_of_episodes 273 | success_rate = float(success) / float(number_of_episodes) 274 | # print('the mean return is {}'.format(mean_return)) 275 | return_per_test_environment.append(mean_return) 276 | success_per_test_environment.append(success_rate) 277 | 278 | env.randomisation_on() 279 | 280 | # env.set_dynamics_parameters(initial_dynamics_params) # set original dynamics parameters 281 | print('total_average_return_over_test_environments : {}'.format(np.mean(return_per_test_environment))) 282 | return np.mean(return_per_test_environment), np.mean(success_per_test_environment) 283 | -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/sys_id/universal_policy_online_system_identification/osi_class.py: -------------------------------------------------------------------------------- 1 | """ 2 | System Identification (SI) 3 | https://arxiv.org/abs/1702.02453 4 | 5 | Examples of two types: 6 | 1. Off-line SI: in sim2real_policies.sys_id.common.utils 7 | 2. On-line SI 8 | """ 9 | 10 | from sim2real_policies.sys_id.common.operations import * 11 | from sim2real_policies.sys_id.common.utils import * 12 | from sim2real_policies.utils.rl_utils import load, load_model 13 | from sim2real_policies.utils.choose_env import choose_env 14 | 15 | 16 | class OSI(object): 17 | """ 18 | The class of online system identification 19 | Args: 20 | Projection (bool): whether exists a projection module for reducing the dimension of state 21 | CAT_INTERNAL (bool): whether concatenate the interal state to the external observation 22 | context_dim (int): the integral compressed dimension for the projcection module 23 | """ 24 | def __init__(self, env_name='SawyerReach', length=3, context_dim=3, Projection=True, CAT_INTERNAL=False): 25 | self.cat_internal = CAT_INTERNAL 26 | env, environment_params, environment_wrappers, environment_wrapper_arguments = choose_env(env_name) 27 | state_dim = env.observation_space.shape[0] 28 | action_dim = env.action_space.shape[0] 29 | print('Env name: ', env_name) 30 | print('Dimension of env state: ', state_dim) 31 | print('Dimension of env action: ', action_dim) 32 | self.params_dim = env.get_randomised_parameter_dimensions() 33 | print('Dimension of randomised parameters: ', self.params_dim) 34 | data_dim = length*(state_dim+action_dim) 35 | if CAT_INTERNAL: 36 | internal_state_dim = env.get_internal_state_dimension() 37 | print('Dimension of internal state: ', internal_state_dim) 38 | data_dim = length*(state_dim+action_dim+internal_state_dim) 39 | else: 40 | data_dim = length*(state_dim+action_dim) 41 | self.osi_model = OSINetork(input_dim = data_dim, output_dim = self.params_dim) 42 | self.env_name = env_name 43 | self.length = length # trajectory length for prediction 44 | 45 | if Projection: 46 | self.proj_net = load_model(path = '../../../../data/pup_td3/model/pup_td3_projection', input_dim=self.params_dim, output_dim=context_dim) 47 | self.policy=load(path = '../../../../data/pup_td3/model/pup_td3', alg='TD3', state_dim = state_dim+context_dim, action_dim = action_dim) 48 | self.save_path = '../../../../../data/pup_td3/model/osi' 49 | 50 | else: 51 | self.proj_net = None 52 | self.policy=load(path = '../../../../data/up_td3/model/up_td3', alg='TD3', state_dim = state_dim+self.params_dim, action_dim = action_dim) 53 | self.save_path = '../../../../../data/up_td3/model/osi' 54 | 55 | def predict(self, traj): 56 | traj_input = stack_data(traj, self.length) 57 | print(traj_input) 58 | output = self.osi_model(traj_input).detach().numpy() 59 | print('out: ', output) 60 | return output 61 | 62 | def load_model(self): 63 | self.osi_model.load_state_dict(torch.load(self.save_path, map_location='cuda:0')) 64 | self.osi_model.eval() 65 | 66 | def osi_train(self, itr = 20): 67 | # update with true dynamics parameters from simulator 68 | print('Started OSI training stage I.'+'\n'+'--------------------------------------------------') 69 | params, raw_history = self.online_history_collection(itr=10, PRED_PARAM=False, CAT_INTERNAL=self.cat_internal) 70 | label, data = self.generate_data(params, raw_history) 71 | self.osi_update(data, label, epoch=5) 72 | print('Finished OSI training stage I.') 73 | print('Started OSI training stage II.'+'\n'+'--------------------------------------------------') 74 | # update with predicted dynamics parameters from simulator 75 | losses = [] 76 | for _ in range(itr): # while not converge 77 | params, raw_history = self.online_history_collection(PRED_PARAM=True, CAT_INTERNAL = self.cat_internal) 78 | label, data = self.generate_data(params, raw_history) 79 | loss = self.osi_update(data, label, epoch=5) 80 | losses.append(loss) 81 | plot(losses, name='osi_train') 82 | print('Finished OSI training stage II.') 83 | 84 | 85 | 86 | def generate_data(self, params, raw_history): 87 | """ 88 | generate training dataset with raw history trajectories; 89 | length is the number of (state, action, next_state) pairs, there are l state-action pairs in length l sequence 90 | """ 91 | assert len(params) == len(raw_history) 92 | label=[] 93 | data=[] 94 | for param, epi in zip(params, raw_history): 95 | for i in range(0, len(epi)-self.length): 96 | data.append(epi[i:i+self.length].reshape(-1)) # [s,a,s,a] for length=2 97 | label.append(param) 98 | assert len(label)==len(data) 99 | 100 | return label, data 101 | 102 | 103 | def online_history_collection(self, itr=30, max_steps=30, PRED_PARAM=False, CAT_INTERNAL=False): 104 | """ collect random simulation parameters and trajetories with universal policy 105 | https://arxiv.org/abs/1702.02453 (Preparing for the Unknown: Learning a Universal Policy with Online System Identification) 106 | """ 107 | env, environment_params, environment_wrappers, environment_wrapper_arguments = choose_env(self.env_name) 108 | action_space = env.action_space 109 | ini_state_space = env.observation_space 110 | state_space = spaces.Box(-np.inf, np.inf, shape=(ini_state_space.shape[0]+self.params_dim, )) # add the dynamics param dim 111 | 112 | # a random policy 113 | data_collection_policy=DPG_PolicyNetwork(state_space, action_space, hidden_dim=512).cuda() 114 | 115 | params_list=[] 116 | history=[] 117 | for eps in range(itr): # K 118 | state = env.reset() 119 | params = query_params(env, randomised_only=True) 120 | epi_traj = [] 121 | params_list.append(params) 122 | 123 | # N is 1 in this implementation, as each env.reset() will have different parameter set 124 | 125 | for step in range(max_steps): # T 126 | if CAT_INTERNAL: 127 | internal_state = env.get_internal_state() 128 | full_state = np.concatenate([state, internal_state]) 129 | else: 130 | full_state = state 131 | if len(epi_traj)>=self.length and PRED_PARAM: 132 | osi_input = stack_data(epi_traj, self.length) # stack (s,a) to have same length as in the model input 133 | pre_params = self.osi_model(osi_input).detach().numpy() 134 | else: 135 | pre_params = params 136 | 137 | if self.proj_net is not None: # projected to low dimensions 138 | pre_params = self.proj_net.get_context(pre_params) 139 | else: 140 | pass 141 | # print('No projection network!') 142 | params_state = np.concatenate((pre_params, state)) # use predicted parameters instead of true values for training, according to the paper 143 | action = data_collection_policy.get_action(params_state) 144 | epi_traj.append(np.concatenate((full_state, action))) 145 | 146 | next_state, _, _, _ = env.step(action) 147 | state = next_state 148 | history.append(np.array(epi_traj)) 149 | print("Finished collecting data of {} trajectories.".format(itr)) 150 | return params_list, history 151 | 152 | 153 | def osi_update(self, input, label, epoch=1, lr=1e-1): 154 | """ Update the system identification (SI) with online data collection """ 155 | criterion = nn.MSELoss() 156 | optimizer = optim.Adam(self.osi_model.parameters(), lr) 157 | scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.99) # gamma: decay for each step 158 | input = torch.Tensor(input) 159 | label = torch.Tensor(label) 160 | 161 | for i in range(epoch): 162 | predict = self.osi_model(input) 163 | loss = criterion(predict, label) 164 | optimizer.zero_grad() 165 | loss.backward() 166 | print('Train the SI model, Epoch: {} | Loss: {}'.format(i, loss)) 167 | optimizer.step() 168 | scheduler.step() 169 | 170 | torch.save(self.osi_model.state_dict(), self.save_path) 171 | 172 | return loss.detach().cpu().numpy() 173 | 174 | class OSINetork(nn.Module): 175 | def __init__(self, input_dim, output_dim, hidden_dim=512, dropout=0.1): 176 | """ same OSI network structure as: https://arxiv.org/abs/1702.02453 """ 177 | super(OSINetork, self).__init__() 178 | self.linear1 = nn.Linear(input_dim, hidden_dim) 179 | self.dropout1 = nn.Dropout(dropout) 180 | self.linear2 = nn.Linear(hidden_dim, int(hidden_dim/2)) 181 | self.dropout2 = nn.Dropout(dropout) 182 | self.linear3 = nn.Linear(int(hidden_dim/2), int(hidden_dim/4)) 183 | self.dropout3 = nn.Dropout(dropout) 184 | self.linear4 = nn.Linear(int(hidden_dim/4), output_dim) 185 | 186 | def forward(self, input): 187 | if len(input.shape) < 2: 188 | input = torch.FloatTensor(np.expand_dims(input, 0)) 189 | x = F.tanh(self.linear1(input)) 190 | x = self.dropout1(x) 191 | x = F.tanh(self.linear2(x)) 192 | x = self.dropout2(x) 193 | x = F.tanh(self.linear3(x)) 194 | x = self.dropout3(x) 195 | x = self.linear4(x) 196 | return x.squeeze(0) 197 | 198 | def stack_data(traj, length): 199 | traj = np.array(traj) 200 | return traj[-length:, :].reshape(-1) 201 | 202 | if __name__ == '__main__': 203 | ENV_NAME =['SawyerReach', 'SawyerPush', 'SawyerSlide'][0] 204 | 205 | osi = OSI(env_name = ENV_NAME, length=3, context_dim=3, Projection=False, CAT_INTERNAL=True) 206 | osi.osi_train() -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/td3/default_params.py: -------------------------------------------------------------------------------- 1 | def get_hyperparams(): 2 | hyperparams_dict={ 3 | 'alg_name': 'td3', 4 | 'action_range': 1., 5 | 'batch_size': 640, 6 | 'explore_steps': 0, 7 | 'update_itr': 1, # iterative update 8 | 'explore_noise_scale': 0.3, 9 | 'eval_noise_scale': 0.5, # noisy evaluation trick 10 | 'reward_scale': 1., # reward normalization 11 | 'hidden_dim': 512, 12 | 'noise_decay': 0.9999, # decaying exploration noise 13 | 'policy_target_update_interval': 3, # delayed update 14 | 'q_lr': 3e-4, 15 | 'policy_lr': 3e-4, 16 | 'replay_buffer_size': 1e6, 17 | 'deterministic': True 18 | } 19 | print('Hyperparameters: ', hyperparams_dict) 20 | return hyperparams_dict 21 | -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .logger import Logger 2 | from .policy_networks import SAC_PolicyNetwork, DPG_PolicyNetwork, DPG_PolicyNetworkLSTM 3 | -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/utils/buffers.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import torch 4 | 5 | 6 | 7 | class ReplayBuffer: 8 | def __init__(self, capacity): 9 | self.capacity = capacity 10 | self.buffer = [] 11 | self.position = 0 12 | 13 | def push(self, state, action, reward, next_state, done): 14 | if len(self.buffer) < self.capacity: 15 | self.buffer.append(None) 16 | self.buffer[self.position] = (state, action, reward, next_state, done) 17 | self.position = int((self.position + 1) % self.capacity) # as a ring buffer 18 | 19 | def sample(self, batch_size): 20 | batch = random.sample(self.buffer, batch_size) 21 | state, action, reward, next_state, done = map(np.stack, 22 | zip(*batch)) # stack for each element 23 | ''' 24 | the * serves as unpack: sum(a,b) <=> batch=(a,b), sum(*batch) ; 25 | zip: a=[1,2], b=[2,3], zip(a,b) => [(1, 2), (2, 3)] ; 26 | the map serves as mapping the function on each list element: map(square, [2,3]) => [4,9] ; 27 | np.stack((1,2)) => array([1, 2]) 28 | ''' 29 | return state, action, reward, next_state, done 30 | 31 | def __len__( 32 | self): # cannot work in multiprocessing case, len(replay_buffer) is not available in proxy of manager! 33 | return len(self.buffer) 34 | 35 | def get_length(self): 36 | return len(self.buffer) 37 | 38 | 39 | class ReplayBufferPUP: 40 | """ 41 | ReplayBuffer for Projected Universal Policy 42 | """ 43 | def __init__(self, capacity): 44 | self.capacity = capacity 45 | self.buffer = [] 46 | self.position = 0 47 | 48 | def push(self, state, action, reward, next_state, done, params): 49 | if len(self.buffer) < self.capacity: 50 | self.buffer.append(None) 51 | self.buffer[self.position] = (state, action, reward, next_state, done, params) 52 | self.position = int((self.position + 1) % self.capacity) # as a ring buffer 53 | 54 | def sample(self, batch_size): 55 | batch = random.sample(self.buffer, batch_size) 56 | state, action, reward, next_state, done, params = map(np.stack, 57 | zip(*batch)) # stack for each element 58 | ''' 59 | the * serves as unpack: sum(a,b) <=> batch=(a,b), sum(*batch) ; 60 | zip: a=[1,2], b=[2,3], zip(a,b) => [(1, 2), (2, 3)] ; 61 | the map serves as mapping the function on each list element: map(square, [2,3]) => [4,9] ; 62 | np.stack((1,2)) => array([1, 2]) 63 | ''' 64 | return state, action, reward, next_state, done, params 65 | 66 | def __len__( 67 | self): # cannot work in multiprocessing case, len(replay_buffer) is not available in proxy of manager! 68 | return len(self.buffer) 69 | 70 | def get_length(self): 71 | return len(self.buffer) 72 | 73 | 74 | 75 | class ReplayBufferLSTM: 76 | """ 77 | Replay buffer for agent with LSTM network additionally using previous action, can be used 78 | if the hidden states are not stored (arbitrary initialization of lstm for training). 79 | And each sample contains the whole episode instead of a single step. 80 | """ 81 | def __init__(self, capacity): 82 | self.capacity = capacity 83 | self.buffer = [] 84 | self.position = 0 85 | 86 | def push(self, state, action, last_action, reward, next_state, done): 87 | if len(self.buffer) < self.capacity: 88 | self.buffer.append(None) 89 | self.buffer[self.position] = (state, action, last_action, reward, next_state, done) 90 | self.position = int((self.position + 1) % self.capacity) # as a ring buffer 91 | 92 | def sample(self, batch_size): 93 | batch = random.sample(self.buffer, batch_size) 94 | state, action, last_action, reward, next_state, done = map(np.stack, 95 | zip(*batch)) # stack for each element 96 | ''' 97 | the * serves as unpack: sum(a,b) <=> batch=(a,b), sum(*batch) ; 98 | zip: a=[1,2], b=[2,3], zip(a,b) => [(1, 2), (2, 3)] ; 99 | the map serves as mapping the function on each list element: map(square, [2,3]) => [4,9] ; 100 | np.stack((1,2)) => array([1, 2]) 101 | ''' 102 | return state, action, last_action, reward, next_state, done 103 | 104 | def __len__( 105 | self): # cannot work in multiprocessing case, len(replay_buffer) is not available in proxy of manager! 106 | return len(self.buffer) 107 | 108 | def get_length(self): 109 | return len(self.buffer) 110 | 111 | 112 | 113 | 114 | class ReplayBufferLSTM2: 115 | """ 116 | Replay buffer for agent with LSTM network additionally storing previous action, 117 | initial input hidden state and output hidden state of LSTM. 118 | And each sample contains the whole episode instead of a single step. 119 | 'hidden_in' and 'hidden_out' are only the initial hidden state for each episode, for LSTM initialization. 120 | 121 | """ 122 | def __init__(self, capacity): 123 | self.capacity = capacity 124 | self.buffer = [] 125 | self.position = 0 126 | 127 | def push(self, hidden_in, hidden_out, state, action, last_action, reward, next_state, done): 128 | if len(self.buffer) < self.capacity: 129 | self.buffer.append(None) 130 | self.buffer[self.position] = (hidden_in, hidden_out, state, action, last_action, reward, next_state, done) 131 | self.position = int((self.position + 1) % self.capacity) # as a ring buffer 132 | 133 | def sample(self, batch_size): 134 | s_lst, a_lst, la_lst, r_lst, ns_lst, hi_lst, ci_lst, ho_lst, co_lst, d_lst=[],[],[],[],[],[],[],[],[],[] 135 | batch = random.sample(self.buffer, batch_size) 136 | for sample in batch: 137 | (h_in, c_in), (h_out, c_out), state, action, last_action, reward, next_state, done = sample 138 | s_lst.append(state) 139 | a_lst.append(action) 140 | la_lst.append(last_action) 141 | r_lst.append(reward) 142 | ns_lst.append(next_state) 143 | d_lst.append(done) 144 | hi_lst.append(h_in) # h_in: (1, batch_size=1, hidden_size) 145 | ci_lst.append(c_in) 146 | ho_lst.append(h_out) 147 | co_lst.append(c_out) 148 | hi_lst = torch.cat(hi_lst, dim=-2).detach() # cat along the batch dim 149 | ho_lst = torch.cat(ho_lst, dim=-2).detach() 150 | ci_lst = torch.cat(ci_lst, dim=-2).detach() 151 | co_lst = torch.cat(co_lst, dim=-2).detach() 152 | 153 | hidden_in = (hi_lst, ci_lst) 154 | hidden_out = (ho_lst, co_lst) 155 | return hidden_in, hidden_out, s_lst, a_lst, la_lst, r_lst, ns_lst, d_lst 156 | 157 | def __len__( 158 | self): # cannot work in multiprocessing case, len(replay_buffer) is not available in proxy of manager! 159 | return len(self.buffer) 160 | 161 | def get_length(self): 162 | return len(self.buffer) 163 | 164 | 165 | -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/utils/choose_env.py: -------------------------------------------------------------------------------- 1 | from robosuite_extra.wrappers import EEFXVelocityControl, GymWrapper, FlattenWrapper 2 | from sim2real_policies.utils.envs import make_env 3 | 4 | push_no_randomisation = [] 5 | 6 | push_full_randomisation = ['eef_timedelay', 'obj_timedelay', 'timestep_parameter', 'pid_iteration_time', 7 | 'action_additive_noise', 'action_systematic_noise', 'action_multiplicative_noise', 8 | 'eef_obs_position_noise', 'eef_obs_velocity_noise', 9 | 'obj_obs_position_noise', 'obj_obs_velocity_noise', 'obj_angle_noise', 10 | 'obj_density', 'obj_sliding_friction', 'obj_torsional_friction', 'obj_size', 11 | 'link_masses', 'joint_dampings', 'armatures', 'joint_frictions', 12 | 'kps', 'kis', 'kds', ] 13 | 14 | push_force_randomisation = ['joint_forces', 'obj_forces'] 15 | 16 | push_force_noise_randomisation = ['eef_timedelay', 'obj_timedelay', 17 | 'eef_obs_position_noise', 'eef_obs_velocity_noise', 18 | 'obj_obs_position_noise', 'obj_obs_velocity_noise', 'obj_angle_noise', 19 | 'joint_forces', 'obj_forces'] 20 | 21 | 22 | reach_no_randomisation = [] 23 | 24 | reach_full_randomisation = ['action_additive_noise', 'action_systematic_noise', 'action_multiplicative_noise', 25 | 'eef_obs_position_noise', 'eef_obs_velocity_noise', 26 | 'timestep_parameter', 'pid_iteration_time', 27 | 'link_masses', 'joint_dampings', 'armatures', 'joint_frictions', 28 | 'kps', 'kis', 'kds'] 29 | 30 | reach_force_randomisation = ['joint_forces'] 31 | 32 | reach_force_noise_randomisation = ['eef_obs_position_noise', 'eef_obs_velocity_noise', 'joint_forces', ] 33 | 34 | 35 | slide_no_randomisation = [] 36 | 37 | slide_full_randomisation = ['obj_timedelay', 'timestep_parameter', 'pid_iteration_time', 38 | 'action_additive_noise', 'action_multiplicative_noise', 'action_systematic_noise', 39 | 'joint_obs_position_noise', 'joint_obs_velocity_noise', 'obs_position_noise', 40 | 'obs_velocity_noise', 'obs_angle_noise', 41 | 'obj_density', 'obj_size', 'obj_sliding_friction', 'obj_torsional_friction', 42 | 'link_masses', 'joint_dampings', 'armatures', 'joint_frictions', 43 | 'kps', 'kis', 'kds'] 44 | 45 | slide_force_randomisation = ['joint_forces', 'obj_forces'] 46 | 47 | slide_force_noise_randomisation = ['joint_obs_position_noise', 'joint_obs_velocity_noise', 'obs_position_noise', 48 | 'obs_velocity_noise', 'obs_angle_noise', 49 | 'obj_timedelay', 50 | 'joint_forces', 'obj_forces' 51 | ] 52 | 53 | 54 | def choose_env(env_name, randomisation_configuration=None, env_params_dict=None): 55 | if env_name == 'SawyerPush': 56 | ## Set the environment parameters 57 | environment_params = { 58 | "gripper_type": "PushingGripper", 59 | "parameters_to_randomise": push_force_noise_randomisation, 60 | "randomise_initial_conditions": True, 61 | "table_full_size": (0.8, 1.6, 0.719), 62 | "table_friction": (1e-4, 5e-3, 1e-4), 63 | "use_camera_obs": False, 64 | "use_object_obs": True, 65 | "reward_shaping": True, 66 | "placement_initializer": None, 67 | "gripper_visualization": False, 68 | "use_indicator_object": False, 69 | "has_renderer": False, # <-------- 70 | "has_offscreen_renderer": False, 71 | "render_collision_mesh": False, 72 | "render_visual_mesh": True, 73 | "control_freq": 10, # <-------- 74 | "horizon": 80, # <-------- 75 | "ignore_done": False, 76 | "camera_name": "frontview", 77 | "camera_height": 256, 78 | "camera_width": 256, 79 | "camera_depth": False, 80 | "pid": True 81 | 82 | } 83 | 84 | environment_wrappers = [EEFXVelocityControl, GymWrapper, FlattenWrapper] 85 | environment_wrapper_arguments = [{'max_action': 0.1, 'dof': 2}, 86 | {}, 87 | {'keys': 'task-state'}] 88 | 89 | 90 | elif env_name == 'SawyerReach': 91 | ## Set the environment parameters 92 | environment_params = { 93 | "gripper_type": "PushingGripper", 94 | "parameters_to_randomise": reach_full_randomisation, 95 | "randomise_initial_conditions": True, 96 | "table_full_size": (0.8, 1.6, 0.719), 97 | "use_camera_obs": False, 98 | "use_object_obs": True, 99 | "reward_shaping": True, 100 | "use_indicator_object": False, 101 | "has_renderer": False, 102 | "has_offscreen_renderer": False, 103 | "render_collision_mesh": False, 104 | "render_visual_mesh": True, 105 | "control_freq": 10., 106 | "horizon": 50, 107 | "ignore_done": False, 108 | "camera_name": " frontview", 109 | "camera_height": 256, 110 | "camera_width": 256, 111 | "camera_depth": False, 112 | "pid": True, 113 | "success_radius": 0.01 114 | } 115 | 116 | environment_wrappers = [EEFXVelocityControl, GymWrapper, FlattenWrapper] 117 | environment_wrapper_arguments = [{'max_action': 0.1, 'dof': 3}, 118 | {}, 119 | {'keys': 'task-state'}] 120 | 121 | elif env_name == 'SawyerSlide': 122 | environment_params = { 123 | 'gripper_type': "SlidePanelGripper", 124 | 'parameters_to_randomise': slide_full_randomisation, 125 | 'randomise_initial_conditions': True, 126 | 'use_camera_obs': False, 127 | 'use_object_obs': True, 128 | 'reward_shaping': True, 129 | 'use_indicator_object': False, 130 | 'has_renderer': False, 131 | 'has_offscreen_renderer': False, 132 | 'render_collision_mesh': False, 133 | 'render_visual_mesh': True, 134 | 'control_freq': 10., 135 | 'horizon': 60, 136 | 'ignore_done': False, 137 | 'camera_name': "frontview", 138 | 'camera_height': 256, 139 | 'camera_width': 256, 140 | 'camera_depth': False, 141 | 'pid': True, 142 | } 143 | 144 | environment_wrappers = [ GymWrapper, FlattenWrapper] 145 | environment_wrapper_arguments = [{}, 146 | {'keys': 'task-state'}] 147 | if randomisation_configuration is not None: 148 | environment_params['parameters_to_randomise'] = eval(randomisation_configuration) 149 | 150 | if env_params_dict is not None: 151 | for key, value in env_params_dict.items(): 152 | try: 153 | environment_params[key] = value 154 | except Exception as error: 155 | print(error) 156 | 157 | 158 | env = make_env('robosuite.'+env_name, 1, 0, environment_params, environment_wrappers, 159 | environment_wrapper_arguments)() 160 | 161 | print('Randomised Parameters: {}'.format(environment_params["parameters_to_randomise"])) 162 | return env, environment_params, environment_wrappers, environment_wrapper_arguments 163 | -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/utils/envs.py: -------------------------------------------------------------------------------- 1 | from robosuite_extra.env_base import make 2 | 3 | from robosuite_extra.push_env.sawyer_push import SawyerPush 4 | from robosuite_extra.reach_env.sawyer_reach import SawyerReach 5 | from robosuite_extra.slide_env.sawyer_slide import SawyerSlide 6 | 7 | 8 | def make_env(env_id, seed, rank, environment_arguments = {}, wrappers = [], wrapper_arguments = []): 9 | assert len(wrappers) == len(wrapper_arguments) , 'There needs to be one argument dict per wrapper' 10 | 11 | def _thunk(): 12 | if env_id.startswith("gym"): 13 | raise NotImplementedError('Gym environments not implemented') 14 | elif env_id.startswith('robosuite'): 15 | _, robosuite_name = env_id.split('.') 16 | 17 | env = make(robosuite_name, **environment_arguments) 18 | else: 19 | raise NotImplementedError('Only robosuite environments are compatible.') 20 | 21 | 22 | for i, wrapper in enumerate(wrappers): 23 | env = wrapper(env, **wrapper_arguments[i]) 24 | 25 | env.seed(seed + rank) 26 | 27 | return env 28 | 29 | return _thunk 30 | 31 | -------------------------------------------------------------------------------- /sim2real-policies/sim2real_policies/utils/evaluate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from sim2real_policies.sys_id.common.utils import query_params 4 | from mujoco_py import MujocoException 5 | 6 | def testing_envionments_generator_random(env, number_of_envs): 7 | rng = np.random.RandomState() 8 | rng.seed(3) 9 | 10 | def log_uniform(low=1e-10, high=1., size=None): 11 | return np.exp(rng.uniform(np.log(low), np.log(high), size)) 12 | 13 | def ranged_random_choice(low, high, size=1): 14 | vals = np.arange(low,high+1) 15 | return rng.choice(vals, size) 16 | 17 | def select_appropriate_distribution(key): 18 | if ( 19 | key == 'joint_forces' 20 | or key == 'acceleration_forces' 21 | or key == 'eef_forces' 22 | or key == 'obj_forces' 23 | 24 | or key == 'timestep_parameter' 25 | or key == 'pid_iteration_time' 26 | or key == 'mujoco_timestep' 27 | 28 | or key == 'action_additive_noise' 29 | or key == 'action_multiplicative_noise' 30 | or key == 'action_systematic_noise' 31 | 32 | or key == 'eef_obs_position_noise' 33 | or key == 'eef_obs_velocity_noise' 34 | or key == 'obj_obs_position_noise' 35 | or key == 'obj_obs_velocity_noise' 36 | or key == 'obj_angle_noise' 37 | 38 | or key == 'link_masses' 39 | 40 | or key == 'obj_density' 41 | or key == 'obj_sliding_friction' 42 | ): 43 | return rng.uniform 44 | elif ( 45 | key == 'eef_timedelay' 46 | or key == 'obj_timedelay' 47 | ): 48 | return ranged_random_choice 49 | else: 50 | return log_uniform 51 | 52 | factors_for_randomisation = env.get_factors_for_randomisation() 53 | parameter_ranges = env.get_parameter_sampling_ranges() 54 | params_list = [] 55 | for _ in range(0, number_of_envs): 56 | params = {} 57 | 58 | randomised_params = env.get_randomised_parameters() 59 | for param_key in randomised_params : 60 | parameter_range = parameter_ranges[param_key] 61 | 62 | if (parameter_range.shape[0] == 1): 63 | params[param_key]= np.asarray(parameter_range[0]) 64 | elif (parameter_range.shape[0] == 2): 65 | distribution = select_appropriate_distribution(param_key) 66 | size = env.default_dynamics_parameters[param_key].shape 67 | params[param_key]=np.asarray( 68 | factors_for_randomisation[param_key] * distribution(*parameter_ranges[param_key], size=size)) 69 | else: 70 | raise RuntimeError('Parameter radomisation range needs to be of shape {1,2}xN') 71 | params_list.append(params) 72 | return params_list 73 | 74 | 75 | def evaluate(env, policy, up=False, eval_eipsodes=5, Projection=False, proj_net=None, num_envs=5, randomised_only=False, dynamics_only=False): 76 | ''' 77 | Evaluate the policy during training time with fixed dynamics 78 | :param: eval_episodes: evaluating episodes for each env 79 | :param: up (bool): universal policy conditioned on dynamics parameters 80 | ''' 81 | number_of_episodes = eval_eipsodes 82 | return_per_test_environment = [] 83 | success_per_test_environment = [] 84 | # initial_dynamics_params = env.get_dynamics_parameters() 85 | dynamics_params = testing_envionments_generator_random(env, num_envs) # before randomisation off 86 | ### SUGGESTED CHANGE #### 87 | initial_dynamics_params, _ = env.randomisation_off() 88 | ################### 89 | 90 | # do visualization 91 | # print('Starting testing') 92 | for params in dynamics_params: 93 | mean_return = 0.0 94 | success = 0 95 | 96 | # print('with parameters {} ...'.format(params)) 97 | for i in range(number_of_episodes): 98 | #### SUGGESTED CHANGE #### 99 | # First set the parameters and then reset for them to take effect. 100 | env.set_dynamics_parameters(params) 101 | obs = env.reset() 102 | p = env.get_dynamics_parameters() 103 | ####### 104 | # obs = env.reset() 105 | # env.set_dynamics_parameters(params) # set parameters after reset 106 | 107 | if up: 108 | env.randomisation_on() # open randosimation to query params correctly 109 | params_list = query_params(env, randomised_only=randomised_only, dynamics_only=dynamics_only) 110 | env.randomisation_off() 111 | if Projection and proj_net is not None: 112 | params_list = proj_net.get_context(params_list) 113 | obs = np.concatenate((params_list, obs)) 114 | 115 | episode_return = 0.0 116 | done = False 117 | while not done: 118 | action = policy.get_action(obs) 119 | obs, reward, done, info = env.step(action) 120 | episode_return += reward 121 | 122 | if up: 123 | obs = np.concatenate((params_list, obs)) 124 | 125 | if (done): 126 | mean_return += episode_return 127 | success += int(info['success']) 128 | break 129 | 130 | mean_return /= number_of_episodes 131 | success_rate = float(success) / float(number_of_episodes) 132 | # print('the mean return is {}'.format(mean_return)) 133 | return_per_test_environment.append(mean_return) 134 | success_per_test_environment.append(success_rate) 135 | 136 | #### SUGGESTED CHANGE #### 137 | # Only need to call randomisation_on, this will restore the previous parameters and the 138 | # randoisation too. Note that this will only take effect in the next reset, where parameters 139 | # are going to be randomised again 140 | env.randomisation_on() 141 | ##### 142 | 143 | # env.set_dynamics_parameters(initial_dynamics_params) # for non-randomisation cases, need to set original dynamics parameters 144 | print('total_average_return_over_test_environments : {}'.format(np.mean(return_per_test_environment))) 145 | return np.mean(return_per_test_environment), np.mean(success_per_test_environment) 146 | 147 | def evaluate_epi(env, epi_policy, embed_net, task_policy, traj_length, eval_eipsodes=5, num_envs=5): 148 | """ 149 | Apart from evaluate(), epi policy conditions on the embedding of trajectory 150 | :param: epi_policy: the policy for rollout a trajectory 151 | :param: embed_net: embedding network of the trajectory 152 | :param: task_policy: the policy for evaluation, conditioned on the embedding 153 | :param: traj_length: length of trajectory 154 | """ 155 | number_of_episodes = eval_eipsodes 156 | return_per_test_environment = [] 157 | success_per_test_environment = [] 158 | dynamics_params = testing_envionments_generator_random(env, num_envs) 159 | # initial_dynamics_params = env.get_dynamics_parameters() 160 | initial_dynamics_params, _ = env.randomisation_off() 161 | 162 | def policy_rollout(env, policy, max_steps=30, params=None): 163 | """ 164 | Roll one trajectory with max_steps 165 | return: traj, shape of (max_steps, state_dim+action_dim+reward_dim) 166 | """ 167 | # s = env.reset() 168 | # if params is not None: 169 | # env.set_dynamics_parameters(params) # make sure .reset() not change env params 170 | 171 | if params is not None: 172 | env.set_dynamics_parameters(params) # make sure .reset() not change env params 173 | for _ in range(3): 174 | s = env.reset() 175 | traj=[] 176 | for _ in range(max_steps): 177 | a = policy.choose_action(s) 178 | try: 179 | s_, r, done, _ = env.step(a) 180 | except MujocoException: 181 | print('MujocoException') 182 | break 183 | s_a_r = np.concatenate((s,a, [r])) # state, action, reward 184 | traj.append(s_a_r) 185 | s=s_ 186 | if len(traj) == max_steps: 187 | break 188 | 189 | if len(traj)