├── cfg ├── pbt │ └── no_pbt.yaml ├── train │ ├── SoloParkourDDPG_demos_generate.yaml │ ├── SoloParkourDDPG_demos_rnn_vision.yaml │ └── SoloParkourPPO.yaml ├── config.yaml └── task │ └── SoloParkour.yaml ├── assets └── teaser.jpeg ├── solo12 ├── meshes │ ├── checker_blue.png │ ├── stl │ │ ├── solo12 │ │ │ ├── solo_12_base.stl │ │ │ ├── solo12_hip_fe_fl.stl │ │ │ ├── solo12_hip_fe_fr.stl │ │ │ ├── solo12_hip_fe_hl.stl │ │ │ ├── solo12_hip_fe_hr.stl │ │ │ ├── solo12_coordinate_systems.png │ │ │ ├── _solo12_visualisation_model.PDF │ │ │ ├── solo12_coordinate_systems_2.png │ │ │ ├── solo12_coordinate_systems_3.png │ │ │ ├── solo12_coordinate_systems_4.png │ │ │ ├── solo12_lower_leg_left_side.stl │ │ │ ├── solo12_lower_leg_right_side.stl │ │ │ ├── solo12_upper_leg_left_side.stl │ │ │ └── solo12_upper_leg_right_side.stl │ │ ├── with_foot │ │ │ ├── solo_body.stl │ │ │ ├── solo_foot.stl │ │ │ ├── solo_lower_leg_left_side.stl │ │ │ ├── solo_upper_leg_left_side.stl │ │ │ ├── solo_lower_leg_right_side.stl │ │ │ └── solo_upper_leg_right_side.stl │ │ └── without_foot │ │ │ ├── solo_body.stl │ │ │ ├── solo_lower_leg_left_side.stl │ │ │ ├── solo_upper_leg_left_side.stl │ │ │ ├── solo_lower_leg_right_side.stl │ │ │ ├── solo_upper_leg_right_side.stl │ │ │ ├── solo_lower_leg_v2_left_side.stl │ │ │ └── solo_lower_leg_v2_right_side.stl │ ├── obj │ │ ├── solo12 │ │ │ └── _solo12_visualisation_model.PDF │ │ ├── with_foot │ │ │ ├── solo_body.mtl │ │ │ ├── solo_foot.mtl │ │ │ ├── solo_lower_leg_left_side.mtl │ │ │ ├── solo_lower_leg_right_side.mtl │ │ │ ├── solo_upper_leg_left_side.mtl │ │ │ └── solo_upper_leg_right_side.mtl │ │ └── without_foot │ │ │ ├── solo_body.mtl │ │ │ ├── solo_lower_leg_left_side.mtl │ │ │ ├── solo_upper_leg_left_side.mtl │ │ │ ├── solo_lower_leg_right_side.mtl │ │ │ └── solo_upper_leg_right_side.mtl │ ├── plane.mtl │ ├── plane.obj │ ├── cube.obj │ ├── sphere.obj │ └── cylinder.obj ├── srdf │ └── solo.srdf ├── solo12_mpi_scaled.urdf └── solo12_mpi.urdf ├── LICENSE.txt ├── README.md ├── train.py ├── utils ├── constraint_manager.py └── isaacgymenvs_make.py ├── algos ├── PPO.py ├── DDPG_demos_generate.py └── DDPG_demos_rnn_vision.py └── tasks └── terrainParkour.py /cfg/pbt/no_pbt.yaml: -------------------------------------------------------------------------------- 1 | enabled: False -------------------------------------------------------------------------------- /assets/teaser.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/assets/teaser.jpeg -------------------------------------------------------------------------------- /solo12/meshes/checker_blue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/checker_blue.png -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/solo_12_base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo_12_base.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/with_foot/solo_body.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_body.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/with_foot/solo_foot.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_foot.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/without_foot/solo_body.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_body.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/solo12_hip_fe_fl.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_hip_fe_fl.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/solo12_hip_fe_fr.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_hip_fe_fr.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/solo12_hip_fe_hl.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_hip_fe_hl.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/solo12_hip_fe_hr.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_hip_fe_hr.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/solo12_coordinate_systems.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_coordinate_systems.png -------------------------------------------------------------------------------- /solo12/meshes/obj/solo12/_solo12_visualisation_model.PDF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/obj/solo12/_solo12_visualisation_model.PDF -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/_solo12_visualisation_model.PDF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/_solo12_visualisation_model.PDF -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/solo12_coordinate_systems_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_coordinate_systems_2.png -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/solo12_coordinate_systems_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_coordinate_systems_3.png -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/solo12_coordinate_systems_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_coordinate_systems_4.png -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/solo12_lower_leg_left_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_lower_leg_left_side.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/solo12_lower_leg_right_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_lower_leg_right_side.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/solo12_upper_leg_left_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_upper_leg_left_side.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/solo12/solo12_upper_leg_right_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_upper_leg_right_side.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/with_foot/solo_lower_leg_left_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_lower_leg_left_side.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/with_foot/solo_upper_leg_left_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_upper_leg_left_side.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/with_foot/solo_lower_leg_right_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_lower_leg_right_side.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/with_foot/solo_upper_leg_right_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_upper_leg_right_side.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/without_foot/solo_lower_leg_left_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_lower_leg_left_side.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/without_foot/solo_upper_leg_left_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_upper_leg_left_side.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/without_foot/solo_lower_leg_right_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_lower_leg_right_side.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/without_foot/solo_upper_leg_right_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_upper_leg_right_side.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/without_foot/solo_lower_leg_v2_left_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_lower_leg_v2_left_side.stl -------------------------------------------------------------------------------- /solo12/meshes/stl/without_foot/solo_lower_leg_v2_right_side.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_lower_leg_v2_right_side.stl -------------------------------------------------------------------------------- /solo12/meshes/plane.mtl: -------------------------------------------------------------------------------- 1 | newmtl Material 2 | Ns 10.0000 3 | Ni 1.5000 4 | d 1.0000 5 | Tr 0.0000 6 | Tf 1.0000 1.0000 1.0000 7 | illum 2 8 | Ka 0.0000 0.0000 0.0000 9 | Kd 0.5880 0.5880 0.5880 10 | Ks 0.0000 0.0000 0.0000 11 | Ke 0.0000 0.0000 0.0000 12 | map_Ka cube.tga 13 | map_Kd checker_blue.png 14 | 15 | 16 | -------------------------------------------------------------------------------- /solo12/meshes/obj/with_foot/solo_body.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'None' 2 | # Material Count: 2 3 | 4 | newmtl Material 5 | Ns 96.078431 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 0.640000 0.640000 0.640000 8 | Ks 0.500000 0.500000 0.500000 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl None 15 | Ns 0 16 | Ka 0.000000 0.000000 0.000000 17 | Kd 0.8 0.8 0.8 18 | Ks 0.8 0.8 0.8 19 | d 1 20 | illum 2 21 | -------------------------------------------------------------------------------- /solo12/meshes/obj/with_foot/solo_foot.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'None' 2 | # Material Count: 2 3 | 4 | newmtl Material 5 | Ns 96.078431 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 0.640000 0.640000 0.640000 8 | Ks 0.500000 0.500000 0.500000 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl None 15 | Ns 0 16 | Ka 0.000000 0.000000 0.000000 17 | Kd 0.8 0.8 0.8 18 | Ks 0.8 0.8 0.8 19 | d 1 20 | illum 2 21 | -------------------------------------------------------------------------------- /solo12/meshes/obj/without_foot/solo_body.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'None' 2 | # Material Count: 2 3 | 4 | newmtl Material 5 | Ns 96.078431 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 0.640000 0.640000 0.640000 8 | Ks 0.500000 0.500000 0.500000 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl None 15 | Ns 0 16 | Ka 0.000000 0.000000 0.000000 17 | Kd 0.8 0.8 0.8 18 | Ks 0.8 0.8 0.8 19 | d 1 20 | illum 2 21 | -------------------------------------------------------------------------------- /solo12/meshes/plane.obj: -------------------------------------------------------------------------------- 1 | # Blender v2.66 (sub 1) OBJ File: '' 2 | # www.blender.org 3 | mtllib plane.mtl 4 | o Plane 5 | v 15.000000 -15.000000 0.000000 6 | v 15.000000 15.000000 0.000000 7 | v -15.000000 15.000000 0.000000 8 | v -15.000000 -15.000000 0.000000 9 | 10 | vt 15.000000 0.000000 11 | vt 15.000000 15.000000 12 | vt 0.000000 15.000000 13 | vt 0.000000 0.000000 14 | 15 | usemtl Material 16 | s off 17 | f 1/1 2/2 3/3 18 | f 1/1 3/3 4/4 19 | -------------------------------------------------------------------------------- /solo12/meshes/obj/with_foot/solo_lower_leg_left_side.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'None' 2 | # Material Count: 2 3 | 4 | newmtl Material 5 | Ns 96.078431 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 0.640000 0.640000 0.640000 8 | Ks 0.500000 0.500000 0.500000 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl None 15 | Ns 0 16 | Ka 0.000000 0.000000 0.000000 17 | Kd 0.8 0.8 0.8 18 | Ks 0.8 0.8 0.8 19 | d 1 20 | illum 2 21 | -------------------------------------------------------------------------------- /solo12/meshes/obj/with_foot/solo_lower_leg_right_side.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'None' 2 | # Material Count: 2 3 | 4 | newmtl Material 5 | Ns 96.078431 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 0.640000 0.640000 0.640000 8 | Ks 0.500000 0.500000 0.500000 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl None 15 | Ns 0 16 | Ka 0.000000 0.000000 0.000000 17 | Kd 0.8 0.8 0.8 18 | Ks 0.8 0.8 0.8 19 | d 1 20 | illum 2 21 | -------------------------------------------------------------------------------- /solo12/meshes/obj/with_foot/solo_upper_leg_left_side.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'None' 2 | # Material Count: 2 3 | 4 | newmtl Material 5 | Ns 96.078431 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 0.640000 0.640000 0.640000 8 | Ks 0.500000 0.500000 0.500000 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl None 15 | Ns 0 16 | Ka 0.000000 0.000000 0.000000 17 | Kd 0.8 0.8 0.8 18 | Ks 0.8 0.8 0.8 19 | d 1 20 | illum 2 21 | -------------------------------------------------------------------------------- /solo12/meshes/obj/with_foot/solo_upper_leg_right_side.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'None' 2 | # Material Count: 2 3 | 4 | newmtl Material 5 | Ns 96.078431 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 0.640000 0.640000 0.640000 8 | Ks 0.500000 0.500000 0.500000 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl None 15 | Ns 0 16 | Ka 0.000000 0.000000 0.000000 17 | Kd 0.8 0.8 0.8 18 | Ks 0.8 0.8 0.8 19 | d 1 20 | illum 2 21 | -------------------------------------------------------------------------------- /solo12/meshes/obj/without_foot/solo_lower_leg_left_side.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'None' 2 | # Material Count: 2 3 | 4 | newmtl Material 5 | Ns 96.078431 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 0.640000 0.640000 0.640000 8 | Ks 0.500000 0.500000 0.500000 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl None 15 | Ns 0 16 | Ka 0.000000 0.000000 0.000000 17 | Kd 0.8 0.8 0.8 18 | Ks 0.8 0.8 0.8 19 | d 1 20 | illum 2 21 | -------------------------------------------------------------------------------- /solo12/meshes/obj/without_foot/solo_upper_leg_left_side.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'None' 2 | # Material Count: 2 3 | 4 | newmtl Material 5 | Ns 96.078431 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 0.640000 0.640000 0.640000 8 | Ks 0.500000 0.500000 0.500000 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl None 15 | Ns 0 16 | Ka 0.000000 0.000000 0.000000 17 | Kd 0.8 0.8 0.8 18 | Ks 0.8 0.8 0.8 19 | d 1 20 | illum 2 21 | -------------------------------------------------------------------------------- /solo12/meshes/obj/without_foot/solo_lower_leg_right_side.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'None' 2 | # Material Count: 2 3 | 4 | newmtl Material 5 | Ns 96.078431 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 0.640000 0.640000 0.640000 8 | Ks 0.500000 0.500000 0.500000 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl None 15 | Ns 0 16 | Ka 0.000000 0.000000 0.000000 17 | Kd 0.8 0.8 0.8 18 | Ks 0.8 0.8 0.8 19 | d 1 20 | illum 2 21 | -------------------------------------------------------------------------------- /solo12/meshes/obj/without_foot/solo_upper_leg_right_side.mtl: -------------------------------------------------------------------------------- 1 | # Blender MTL File: 'None' 2 | # Material Count: 2 3 | 4 | newmtl Material 5 | Ns 96.078431 6 | Ka 1.000000 1.000000 1.000000 7 | Kd 0.640000 0.640000 0.640000 8 | Ks 0.500000 0.500000 0.500000 9 | Ke 0.000000 0.000000 0.000000 10 | Ni 1.000000 11 | d 1.000000 12 | illum 2 13 | 14 | newmtl None 15 | Ns 0 16 | Ka 0.000000 0.000000 0.000000 17 | Kd 0.8 0.8 0.8 18 | Ks 0.8 0.8 0.8 19 | d 1 20 | illum 2 21 | -------------------------------------------------------------------------------- /cfg/train/SoloParkourDDPG_demos_generate.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: ${...seed} 3 | 4 | algo: 5 | name: cat_ddpg_demos_generate_continuous 6 | 7 | config: 8 | name: ${resolve_default:SoloParkour,${....experiment}} 9 | full_experiment_name: ${.name} 10 | env_name: rlgpu 11 | total_timesteps: 2e8 12 | buffer_size: 4999680 # % (5 * 256) = 0 13 | batch_size: 512 14 | target_policy_path: "path/to/cleanrl_model.pt" 15 | demos_buffer_size: 1999360 # % (5 * 256) = 0 16 | horizon_length: 24 # 24 for t = 0.02 (decimation = 4) 17 | 18 | max_epochs: ${resolve_default:1000,${....max_iterations}} 19 | -------------------------------------------------------------------------------- /cfg/train/SoloParkourDDPG_demos_rnn_vision.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: ${...seed} 3 | 4 | algo: 5 | name: cat_ddpg_demos_rnn_vision_continuous 6 | 7 | config: 8 | name: ${resolve_default:SoloParkour,${....experiment}} 9 | full_experiment_name: ${.name} 10 | env_name: rlgpu 11 | gamma: 0.99 12 | tau: 0.05 13 | critic_learning_rate: 3.e-4 14 | actor_learning_rate: 5.e-4 15 | total_timesteps: 2e8 16 | buffer_size: 4999680 # % (5 * 256) = 0 17 | learning_starts: 25e3 18 | policy_frequency: 2 19 | batch_size: 128 20 | seq_len: 50 21 | policy_noise: 0.8 22 | noise_clip: 0.2 23 | demos_rb_path: "path/to/privileged/experience" 24 | horizon_length: 24 # 24 for t = 0.02 (decimation = 4) 25 | 26 | max_epochs: 5000 27 | -------------------------------------------------------------------------------- /solo12/meshes/cube.obj: -------------------------------------------------------------------------------- 1 | o Box 2 | v 0.5 0.5 0.5 3 | v 0.5 0.5 -0.5 4 | v 0.5 -0.5 0.5 5 | v 0.5 -0.5 -0.5 6 | v -0.5 0.5 -0.5 7 | v -0.5 0.5 0.5 8 | v -0.5 -0.5 -0.5 9 | v -0.5 -0.5 0.5 10 | v -0.5 0.5 -0.5 11 | v 0.5 0.5 -0.5 12 | v -0.5 0.5 0.5 13 | v 0.5 0.5 0.5 14 | v -0.5 -0.5 0.5 15 | v 0.5 -0.5 0.5 16 | v -0.5 -0.5 -0.5 17 | v 0.5 -0.5 -0.5 18 | v -0.5 0.5 0.5 19 | v 0.5 0.5 0.5 20 | v -0.5 -0.5 0.5 21 | v 0.5 -0.5 0.5 22 | v 0.5 0.5 -0.5 23 | v -0.5 0.5 -0.5 24 | v 0.5 -0.5 -0.5 25 | v -0.5 -0.5 -0.5 26 | vt 0 1 27 | vt 1 1 28 | vt 0 0 29 | vt 1 0 30 | vt 0 1 31 | vt 1 1 32 | vt 0 0 33 | vt 1 0 34 | vt 0 1 35 | vt 1 1 36 | vt 0 0 37 | vt 1 0 38 | vt 0 1 39 | vt 1 1 40 | vt 0 0 41 | vt 1 0 42 | vt 0 1 43 | vt 1 1 44 | vt 0 0 45 | vt 1 0 46 | vt 0 1 47 | vt 1 1 48 | vt 0 0 49 | vt 1 0 50 | vn 1 0 0 51 | vn 1 0 0 52 | vn 1 0 0 53 | vn 1 0 0 54 | vn -1 0 0 55 | vn -1 0 0 56 | vn -1 0 0 57 | vn -1 0 0 58 | vn 0 1 0 59 | vn 0 1 0 60 | vn 0 1 0 61 | vn 0 1 0 62 | vn 0 -1 0 63 | vn 0 -1 0 64 | vn 0 -1 0 65 | vn 0 -1 0 66 | vn 0 0 1 67 | vn 0 0 1 68 | vn 0 0 1 69 | vn 0 0 1 70 | vn 0 0 -1 71 | vn 0 0 -1 72 | vn 0 0 -1 73 | vn 0 0 -1 74 | f 1/1/1 3/3/3 2/2/2 75 | f 3/3/3 4/4/4 2/2/2 76 | f 5/5/5 7/7/7 6/6/6 77 | f 7/7/7 8/8/8 6/6/6 78 | f 9/9/9 11/11/11 10/10/10 79 | f 11/11/11 12/12/12 10/10/10 80 | f 13/13/13 15/15/15 14/14/14 81 | f 15/15/15 16/16/16 14/14/14 82 | f 17/17/17 19/19/19 18/18/18 83 | f 19/19/19 20/20/20 18/18/18 84 | f 21/21/21 23/23/23 22/22/22 85 | f 23/23/23 24/24/24 22/22/22 86 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 2-Clause License 2 | 3 | Copyright (c) 2024, LAAS-CNRS 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | 1. Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | 2. Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 | 26 | Files with a specific license in their header (like train.py) abide by their 27 | respective license. 28 | 29 | -------------------------------------------------------------------------------- /cfg/config.yaml: -------------------------------------------------------------------------------- 1 | 2 | # Task name - used to pick the class to load 3 | task_name: ${task.name} 4 | # experiment name. defaults to name of training config 5 | experiment: '' 6 | 7 | # if set to positive integer, overrides the default number of environments 8 | num_envs: '' 9 | 10 | # seed - set to -1 to choose random seed 11 | seed: 42 12 | # set to True for deterministic performance 13 | torch_deterministic: False 14 | 15 | # set the maximum number of learning iterations to train for. overrides default per-environment setting 16 | max_iterations: '' 17 | 18 | ## Device config 19 | # 'physx' or 'flex' 20 | physics_engine: 'physx' 21 | # whether to use cpu or gpu pipeline 22 | pipeline: 'gpu' 23 | # device for running physics simulation 24 | sim_device: 'cuda:0' 25 | # device to run RL 26 | rl_device: 'cuda:0' 27 | graphics_device_id: 0 28 | 29 | ## PhysX arguments 30 | num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only. 31 | solver_type: 1 # 0: pgs, 1: tgs 32 | num_subscenes: 4 # Splits the simulation into N physics scenes and runs each one in a separate thread 33 | 34 | # RLGames Arguments 35 | # test - if set, run policy in inference mode (requires setting checkpoint to load) 36 | test: False 37 | # used to set checkpoint path 38 | checkpoint: '' 39 | # set sigma when restoring network 40 | sigma: '' 41 | # set to True to use multi-gpu training 42 | multi_gpu: False 43 | 44 | wandb_activate: False 45 | wandb_group: '' 46 | wandb_name: ${train.params.config.name} 47 | wandb_entity: '' 48 | wandb_project: 'isaacgymenvs' 49 | wandb_tags: [] 50 | wandb_logcode_dir: '' 51 | 52 | capture_video: False 53 | capture_video_freq: 1464 54 | capture_video_len: 100 55 | force_render: True 56 | 57 | # disables rendering 58 | headless: False 59 | 60 | # set default task and default training config based on task 61 | defaults: 62 | - task: Ant 63 | - train: ${task}PPO 64 | - hydra/job_logging: disabled 65 | - pbt: no_pbt 66 | 67 | # set the directory where the output files get saved 68 | hydra: 69 | output_subdir: null 70 | run: 71 | dir: . 72 | 73 | -------------------------------------------------------------------------------- /cfg/train/SoloParkourPPO.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: ${...seed} 3 | 4 | algo: 5 | name: cat_a2c_continuous 6 | 7 | model: 8 | name: continuous_a2c_logstd 9 | 10 | network: 11 | name: actor_critic 12 | separate: True 13 | 14 | space: 15 | continuous: 16 | mu_activation: None 17 | sigma_activation: None 18 | mu_init: 19 | name: default 20 | sigma_init: 21 | name: const_initializer 22 | val: 0. # std = 1. 23 | fixed_sigma: True 24 | 25 | mlp: 26 | units: [512, 256, 128] 27 | activation: elu 28 | d2rl: False 29 | 30 | initializer: 31 | name: default 32 | regularizer: 33 | name: None 34 | # rnn: 35 | # name: lstm 36 | # units: 128 37 | # layers: 1 38 | # before_mlp: True 39 | # concat_input: True 40 | # layer_norm: False 41 | 42 | 43 | load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint 44 | load_path: ${...checkpoint} # path to the checkpoint to load 45 | 46 | config: 47 | name: ${resolve_default:SoloParkour,${....experiment}} 48 | full_experiment_name: ${.name} 49 | env_name: rlgpu 50 | ppo: True 51 | multi_gpu: ${....multi_gpu} 52 | mixed_precision: True 53 | normalize_input: True 54 | normalize_value: True 55 | normalize_advantage: True 56 | value_bootstrap: True 57 | clip_actions: False 58 | num_actors: ${....task.env.numEnvs} 59 | reward_shaper: 60 | scale_value: 1.0 61 | gamma: 0.99 62 | tau: 0.95 63 | e_clip: 0.2 64 | entropy_coef: 0.001 65 | learning_rate: 3.e-4 # overwritten by adaptive lr_schedule 66 | lr_schedule: adaptive 67 | kl_threshold: 0.008 # target kl for adaptive lr 68 | truncate_grads: True 69 | grad_norm: 1. 70 | horizon_length: 24 # 24 for t = 0.02 (decimation = 4) 71 | minibatch_size: 16384 # 16384 for horizon_length 24 72 | mini_epochs: 5 73 | critic_coef: 2 74 | clip_value: True 75 | seq_len: 4 # only for rnn 76 | bounds_loss_coef: 0. 77 | kappa: 1.0 78 | 79 | max_epochs: ${resolve_default:20000,${....max_iterations}} 80 | save_best_after: 1000 81 | score_to_win: 2000000000 82 | save_frequency: 1000 83 | print_stats: False 84 | 85 | player: 86 | deterministic: True 87 | use_vecenv: True 88 | games_num: 2000 89 | print_stats: False 90 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SoloParkour: Constrained Reinforcement Learning for Visual Locomotion from Privileged Experience 2 | 3 | ![Figure 1 from paper](assets/teaser.jpeg) 4 | 5 | > [SoloParkour: Constrained Reinforcement Learning for Visual Locomotion from Privileged Experience](https://gepetto.github.io/SoloParkour/) 6 | > Elliot Chane-Sane*, Joseph Amigo*, Thomas Flayols, Ludovic Righetti, Nicolas Mansard, 7 | > **CoRL 2024** 8 | 9 | 10 | 11 | 12 | 13 | ## Usage 14 | 15 | 1. Train the privileged (Stage 1) policy: 16 | ```bash 17 | python train.py task=SoloParkour headless=True task.env.control.damping_curriculum.enable=True 18 | ``` 19 | 20 | 2. Generate the dataset of privileged experience: 21 | ```bash 22 | # replace with the correct path to the privileged policy weights 23 | policy_path=path/to/cleanrl_model.pt 24 | 25 | python train.py task=SoloParkour train=SoloParkourDDPG_demos_generate headless=True task.env.numEnvs=256 task.env.enableCameraSensors=True task.env.depth.use_depth=True task.env.terrain.maxInitMapLevel=9 train.params.config.target_policy_path=${policy_path} 26 | ``` 27 | 28 | 3. Train the visual (Stage 2) policy from privileged experience: 29 | ```bash 30 | # replace with the correct path to the privileged experience folder 31 | demo_path=path/to/folder/ # folder which contains rb_demos.pkl 32 | 33 | python train.py task=SoloParkour train=SoloParkourDDPG_demos_rnn_vision headless=True task.env.numEnvs=256 task.env.enableCameraSensors=True task.env.depth.use_depth=True train.params.config.demos_rb_path=${demo_path} 34 | ``` 35 | 36 | ## Installation 37 | 38 | ```bash 39 | conda create -n IsaacGymEnvs python=3.8 40 | conda activate IsaacGymEnvs 41 | 42 | conda install pip 43 | conda install -c conda-forge ninja 44 | 45 | # Download Isaac Gym: https://developer.nvidia.com/isaac-gym 46 | cd path/to/isaacgym/python && pip install --user --no-cache-dir -e . 47 | 48 | # Clone IsaacGymEnvs: https://github.com/isaac-sim/IsaacGymEnvs 49 | cd path/to/IsaacGymEnvs/ && pip install --user --no-cache-dir -e . 50 | 51 | pip uninstall torch tochvision 52 | pip install --user --no-cache-dir torch==2.2.0 torchvision==0.17.0 53 | # Fix compability problem with numpy version 54 | pip install --user --no-cache-dir "numpy<1.24" 55 | pip install --user --no-cache-dir texttable av 56 | 57 | # Add correct LD_LIBRARY_PATH on env start 58 | conda env config vars set LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib --name $CONDA_DEFAULT_ENV 59 | ``` 60 | 61 | ### Acknowledgements 62 | 63 | This code was based on the following repositories: 64 | - https://github.com/vwxyzjn/cleanrl 65 | - https://github.com/isaac-sim/IsaacGymEnvs 66 | - https://github.com/gepetto/constraints-as-terminations 67 | 68 | ### Citation 69 | 70 | ``` 71 | @inproceedings{chanesane2024solo, 72 | title = {SoloParkour: Constrained Reinforcement Learning for Visual Locomotion from Privileged Experience}, 73 | author = {Elliot Chane-Sane and Joseph Amigo and Thomas Flayols and Ludovic Righetti and Nicolas Mansard}, 74 | booktitle = {Conference on Robot Learning (CoRL)}, 75 | year = {2024}, 76 | } 77 | ``` 78 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # train.py 2 | # Script to train policies in Isaac Gym 3 | # 4 | # Copyright (c) 2018-2023, NVIDIA Corporation 5 | # All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions are met: 9 | # 10 | # 1. Redistributions of source code must retain the above copyright notice, this 11 | # list of conditions and the following disclaimer. 12 | # 13 | # 2. Redistributions in binary form must reproduce the above copyright notice, 14 | # this list of conditions and the following disclaimer in the documentation 15 | # and/or other materials provided with the distribution. 16 | # 17 | # 3. Neither the name of the copyright holder nor the names of its 18 | # contributors may be used to endorse or promote products derived from 19 | # this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | import os 32 | import random 33 | 34 | import numpy as np 35 | 36 | import isaacgym 37 | from isaacgymenvs.pbt.pbt import PbtAlgoObserver, initial_pbt_check 38 | from omegaconf import DictConfig, OmegaConf 39 | import hydra 40 | from utils.isaacgymenvs_make import isaacgym_task_map, make 41 | from isaacgymenvs.utils.reformat import omegaconf_to_dict, print_dict 42 | from isaacgymenvs.utils.utils import set_np_formatting, set_seed 43 | 44 | import torch 45 | from algos.PPO import PPO, eval_PPO 46 | from algos.DDPG_demos_rnn_vision import DDPG_demos_rnn_vision, eval_DDPG_demos_rnn_vision 47 | from algos.DDPG_demos_generate import DDPG_demos_generate 48 | 49 | 50 | @hydra.main(config_name="config", config_path="./cfg") 51 | def launch_rlg_hydra(cfg: DictConfig): 52 | if cfg.pbt.enabled: 53 | initial_pbt_check(cfg) 54 | 55 | cfg_dict = omegaconf_to_dict(cfg) 56 | print_dict(cfg_dict) 57 | 58 | # set numpy formatting for printing only 59 | set_np_formatting() 60 | 61 | # global rank of the GPU 62 | global_rank = int(os.getenv("RANK", "0")) 63 | 64 | # sets seed. if seed is -1 will pick a random one 65 | cfg.seed = set_seed( 66 | cfg.seed, torch_deterministic=cfg.torch_deterministic, rank=global_rank 67 | ) 68 | 69 | # TRY NOT TO MODIFY: seeding 70 | random.seed(cfg.seed) 71 | np.random.seed(cfg.seed) 72 | torch.manual_seed(cfg.seed) 73 | torch.backends.cudnn.deterministic = True 74 | 75 | algo = cfg["train"]["params"]["algo"]["name"] 76 | 77 | envs = make( 78 | cfg.seed, 79 | cfg.task_name, 80 | cfg.task.env.numEnvs, 81 | cfg.sim_device, 82 | cfg.rl_device, 83 | cfg.graphics_device_id, 84 | cfg.headless, 85 | cfg.multi_gpu, 86 | cfg.capture_video, 87 | cfg.force_render, 88 | cfg 89 | ) 90 | 91 | if cfg["test"]: 92 | if algo == "cat_a2c_continuous": 93 | eval_PPO(cfg, envs) 94 | elif algo == "cat_ddpg_demos_rnn_vision_continuous": 95 | eval_DDPG_demos_rnn_vision(cfg, envs) 96 | else: 97 | raise NotImplementedError 98 | else: 99 | if algo == "cat_a2c_continuous": 100 | PPO(cfg, envs) 101 | elif algo == "cat_ddpg_demos_rnn_vision_continuous": 102 | DDPG_demos_rnn_vision(cfg, envs) 103 | elif algo == "cat_ddpg_demos_generate_continuous": 104 | DDPG_demos_generate(cfg, envs) 105 | else: 106 | raise NotImplementedError 107 | 108 | if __name__ == "__main__": 109 | launch_rlg_hydra() 110 | -------------------------------------------------------------------------------- /solo12/srdf/solo.srdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | -------------------------------------------------------------------------------- /utils/constraint_manager.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class ConstraintManager: 4 | """Handle the computation of termination probabilities based on constraints 5 | violations (Constraints as Terminations). 6 | 7 | Args: 8 | tau (float): discount factor 9 | min_p (float): minimum termination probability 10 | """ 11 | 12 | def __init__(self, tau=0.95, min_p=0.0): 13 | self.running_maxes = {} # Polyak average of the maximum constraint violation 14 | self.running_mins = {} # Polyak average of the minimum constraint violation 15 | self.probs = {} # Termination probabilities for each constraint 16 | self.max_p = {} 17 | self.raw_constraints = {} 18 | self.tau = tau # Discount factor 19 | self.min_p = min_p # Minimum termination probability 20 | 21 | def reset(self): 22 | """Reset the termination probabilities of the constraint manager.""" 23 | self.probs = {} 24 | self.raw_constraints = {} 25 | 26 | def add(self, name, constraint, max_p=0.1): 27 | """Add a constraint violation to the constraint manager and compute the 28 | associated termination probability. 29 | 30 | Args: 31 | name (string): name of the constraint 32 | constraint (float tensor): value of constraint violations for this constraint 33 | max_p (float): maximum termination probability 34 | """ 35 | 36 | # First, put constraint in the form Torch.FloatTensor((num_envs, n_constraints)) 37 | # Convert constraints violation to float if they are not 38 | if not torch.is_floating_point(constraint): 39 | constraint = constraint.float() 40 | 41 | # Ensure constraint is 2-dimensional even with a single element 42 | if len(constraint.size()) == 1: 43 | constraint = constraint.unsqueeze(1) 44 | 45 | # Get the maximum constraint violation for the current step 46 | constraint_max = constraint.max(dim=0, keepdim=True)[0].clamp(min=1e-6) 47 | 48 | # Compute polyak average of the maximum constraint violation for this constraint 49 | if name not in self.running_maxes: 50 | self.running_maxes[name] = constraint_max 51 | else: 52 | self.running_maxes[name] = ( 53 | self.tau * self.running_maxes[name] + (1.0 - self.tau) * constraint_max 54 | ) 55 | 56 | self.raw_constraints[name] = constraint 57 | 58 | # Get samples for which there is a constraint violation 59 | mask = constraint > 0.0 60 | 61 | # Compute the termination probability which scales between min_p and max_p with 62 | # increasing constraint violation. Remains at 0 when there is no violation. 63 | probs = torch.zeros_like(constraint) 64 | probs[mask] = self.min_p + torch.clamp( 65 | constraint[mask] 66 | / (self.running_maxes[name].expand(constraint.size())[mask]), 67 | min=0.0, 68 | max=1.0, 69 | ) * (max_p - self.min_p) 70 | self.probs[name] = probs 71 | self.max_p[name] = torch.tensor(max_p, device = constraint.device).repeat(constraint.shape[1]) 72 | 73 | def get_probs(self): 74 | """Returns the termination probabilities due to constraint violations.""" 75 | probs = torch.cat(list(self.probs.values()), dim=1) 76 | probs = probs.max(1).values 77 | return probs 78 | 79 | def get_raw_constraints(self): 80 | return torch.cat(list(self.raw_constraints.values()), dim=1) 81 | 82 | def get_running_maxes(self): 83 | return torch.cat(list(self.running_maxes.values()), dim = 1) 84 | 85 | def get_max_p(self): 86 | return torch.cat(list(self.max_p.values())) 87 | 88 | def get_str(self, names=None): 89 | """Get a debug string with constraints names and their average termination probabilities""" 90 | if names is None: 91 | names = list(self.probs.keys()) 92 | txt = "" 93 | for name in names: 94 | txt += " {}: {}".format( 95 | name, 96 | str( 97 | 100.0 * self.probs[name].max(1).values.gt(0.0).float().mean().item() 98 | )[:4], 99 | ) 100 | # txt += " {}: {}".format(name, str(100.0*self.probs[name].max(1).values.float().mean().item())[:4]) 101 | 102 | return txt[1:] 103 | 104 | def log_all(self, episode_sums): 105 | """Log terminations probabilities in episode_sums with cstr_NAME key.""" 106 | for name in list(self.probs.keys()): 107 | values = self.probs[name].max(1).values.gt(0.0).float() 108 | if "cstr_" + name not in episode_sums: 109 | episode_sums["cstr_" + name] = torch.zeros_like(values) 110 | episode_sums["cstr_" + name] += values 111 | 112 | def get_names(self): 113 | """Return a list of all constraint names.""" 114 | return list(self.probs.keys()) 115 | 116 | def get_vals(self): 117 | """Return a list of all constraint termination probabilities.""" 118 | res = [] 119 | for key in self.probs.keys(): 120 | res += [100.0 * self.probs[key].max(1).values.gt(0.0).float().mean().item()] 121 | return res 122 | -------------------------------------------------------------------------------- /cfg/task/SoloParkour.yaml: -------------------------------------------------------------------------------- 1 | # used to create the object 2 | name: SoloParkour 3 | 4 | physics_engine: "physx" 5 | 6 | env: 7 | numEnvs: ${resolve_default:4096,${...num_envs}} 8 | numActions: 12 9 | numLatent: 0 # (8, 3, 3) 10 | numHistorySamples: 1 # t-1, t-4, t-7 11 | numHistoryStep: 0 # t-1 -> t-4 -> t-7 12 | envSpacing: 3. # [m] 13 | enableDebugVis: False 14 | enableDebugPlots: False 15 | enableJoystick: False 16 | onlyForwards: False 17 | onlyForwardsVelocity: 1.0 18 | startAtLevel: -1 19 | 20 | terrain: 21 | terrainType: trimesh # none, plane, or trimesh 22 | staticFriction: 1.0 # [-] 23 | dynamicFriction: 1.0 # [-] 24 | restitution: 0. # [-] 25 | # rough terrain only: 26 | curriculum: true 27 | maxInitMapLevel: 0 28 | minInitMapLevel: 0 29 | mapLength: 8. 30 | mapWidth: 4. 31 | numLevels: 10 32 | numTerrains: 20 33 | 34 | # tri mesh only: 35 | slopeTreshold: 0.2 36 | 37 | terrainProportions: # Proportions for each kind of terrain 38 | gap_parkour: 0.55 39 | jump_parkour: 0.3 40 | stairs_parkour: 0.0 41 | hurdle_parkour: 0.1 42 | crawl_parkour: 0.05 43 | random_uniform: 0.0 44 | 45 | baseInitState: 46 | pos: [0.0, 0.0, 0.3] # x,y,z [m] 47 | rot: [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat] 48 | vLinear: [0.0, 0.0, 0.0] # x,y,z [m/s] 49 | vAngular: [0.0, 0.0, 0.0] # x,y,z [rad/s] 50 | 51 | randomCommandVelocityRanges: 52 | # train 53 | linear_x: [0.0, 1.5] # min max [m/s] # -0.3 1.0 54 | linear_y: [-0.5, 0.5] # min max [m/s] 55 | yaw: [-0.78, 0.78] # min max [rad/s] 56 | 57 | control: 58 | # PD Drive parameters: 59 | stiffness: 4.0 # [N*m/rad] 60 | damping: 0.2 # [N*m*s/rad] 61 | # action scale: target angle = actionScale * action + defaultAngle 62 | actionScale: 0.5 63 | # decimation: Number of control action updates @ sim DT per policy DT 64 | decimation: 4 65 | damping_curriculum: 66 | enable: False 67 | num_steps: 400000 68 | init_damping: 0.05 69 | 70 | defaultJointAngles: # = target angles when action = 0.0 71 | FL_HAA: 0.05 # [rad] 72 | HL_HAA: 0.05 # [rad] 73 | FR_HAA: -0.05 # [rad] 74 | HR_HAA: -0.05 # [rad] 75 | 76 | FL_HFE: 0.6 # [rad] 77 | HL_HFE: 0.6 # [rad] 78 | FR_HFE: 0.6 # [rad] 79 | HR_HFE: 0.6 # [rad] 80 | 81 | FL_KFE: -1.2 # [rad] 82 | HL_KFE: -1.2 # [rad] 83 | FR_KFE: -1.2 # [rad] 84 | HR_KFE: -1.2 # [rad] 85 | 86 | urdfAsset: 87 | file: "solo12/solo12_mpi.urdf" 88 | baseName: base_link 89 | footName: FOOT 90 | shinName: LOWER 91 | kneeName: UPPER 92 | collapseFixedJoints: True 93 | fixBaseLink: false 94 | defaultDofDriveMode: 4 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 4 effort) 95 | flip_visual_attachments: False 96 | armature: 0.00036207 # i.e motor inertia at joint level 97 | 98 | learn: 99 | allowShinContacts: True 100 | allowKneeContacts: false 101 | 102 | # Scales of rewards 103 | rewMult: 1.0 104 | survivalBonus: 0.5 105 | 106 | # Misc quantities for rewards 107 | linearVelocityXYRewardDelta: 0.25 108 | angularVelocityZRewardDelta: 0.25 109 | feetAirTimeRewardTarget: 0.25 110 | 111 | # Constraints 112 | enableConstraints: "cat" # "none", "cat" 113 | constraints_CaT: 114 | feetAirTimeConstraint: 0.2 115 | tauConstraint: 0.95 116 | minPConstraint: 0.0 117 | softPConstraint: 0.1 118 | useSoftPCurriculum: True 119 | trackingConstraint: 1000.0 120 | 121 | limits: 122 | torque: 3.0 123 | acc: 800.0 124 | vel: 16.0 125 | action_rate: 80.0 126 | base_orientation: 0.1 127 | foot_contact_force: 80.0 128 | HFE: 1.9 129 | HFE_min: -0.2 130 | HAA: 0.2 131 | min_base_height: 0.06 132 | heading: 0.1 133 | 134 | # Other 135 | flatTerrainThreshold: 0.001 136 | vel_deadzone: 0.2 137 | base_height_target: 0.245 138 | gait_period: 0.4 139 | 140 | # observations 141 | observe: 142 | base_lin_vel: false 143 | base_ang_vel: true 144 | commands: true 145 | misc: true 146 | heights: true 147 | ceilings: true 148 | phases: false 149 | imu: false 150 | measured_points_step: 0.08 151 | measured_points_x: [-3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] 152 | measured_points_y: [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5] 153 | phases_freq: 2.0 154 | 155 | # normalization 156 | linearVelocityScale: 2.0 157 | angularVelocityScale: 0.25 158 | dofPositionScale: 1.0 159 | dofVelocityScale: 0.05 160 | heightMeasurementScale: 5.0 161 | imuAccelerationScale: 0.1 162 | 163 | # noise 164 | addNoise: true 165 | noiseLevel: 1.0 # scales other values 166 | dofPositionNoise: 0.01 167 | dofVelocityNoise: 0.2 168 | linearVelocityNoise: 0.0 169 | angularVelocityNoise: 0.001 170 | gravityNoise: 0.05 171 | heightMeasurementNoise: 0.01 172 | 173 | # friction randomization 174 | randomizeFriction: true 175 | frictionRange: [0.5, 1.25] 176 | 177 | # random pushes during training 178 | pushRobots: true 179 | pushInterval_s: 8 180 | 181 | # episode length in seconds 182 | episodeLength_s: 15 183 | 184 | # viewer cam: 185 | viewer: 186 | refEnv: 0 187 | pos: [0, 0, 5] # [m] 188 | lookat: [1., 1, 4.5] # [m] 189 | 190 | # set to True if you use camera sensors in the environment 191 | enableCameraSensors: False 192 | depth: 193 | use_depth: False 194 | position: [0.24, 0, 0.03] # [0.21, 0, 0.03] 195 | angle: [-5, 5] 196 | update_interval: 5 197 | image_size: [48, 85] # [58, 87] 198 | depth_clip_max: 1.0 199 | depth_clip_min: 0.04 200 | horizontal_fov: 87 201 | 202 | sim: 203 | dt: 0.005 204 | substeps: 1 205 | up_axis: "z" 206 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"} 207 | gravity: [0.0, 0.0, -9.81] 208 | physx: 209 | num_threads: ${....num_threads} 210 | solver_type: 0 # ${....solver_type} 211 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU 212 | num_position_iterations: 4 213 | num_velocity_iterations: 1 214 | contact_offset: 0.02 215 | rest_offset: 0.0 216 | bounce_threshold_velocity: 0.2 217 | max_depenetration_velocity: 100.0 218 | default_buffer_size_multiplier: 5.0 219 | max_gpu_contact_pairs: 33554432 # 8388608 = 8*1024*1024 220 | num_subscenes: ${....num_subscenes} 221 | contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!) 222 | 223 | task: 224 | randomize: False 225 | -------------------------------------------------------------------------------- /utils/isaacgymenvs_make.py: -------------------------------------------------------------------------------- 1 | # train.py 2 | # Script to train policies in Isaac Gym 3 | # 4 | # Copyright (c) 2018-2023, NVIDIA Corporation 5 | # All rights reserved. 6 | # 7 | # Redistribution and use in source and binary forms, with or without 8 | # modification, are permitted provided that the following conditions are met: 9 | # 10 | # 1. Redistributions of source code must retain the above copyright notice, this 11 | # list of conditions and the following disclaimer. 12 | # 13 | # 2. Redistributions in binary form must reproduce the above copyright notice, 14 | # this list of conditions and the following disclaimer in the documentation 15 | # and/or other materials provided with the distribution. 16 | # 17 | # 3. Neither the name of the copyright holder nor the names of its 18 | # contributors may be used to endorse or promote products derived from 19 | # this software without specific prior written permission. 20 | # 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | 32 | 33 | import os 34 | from typing import Callable, Dict, Tuple, Any 35 | import hydra 36 | from hydra import compose, initialize 37 | from hydra.core.hydra_config import HydraConfig 38 | from omegaconf import DictConfig 39 | from isaacgymenvs.utils.reformat import omegaconf_to_dict 40 | from tasks.solo_parkour import SoloParkour 41 | 42 | isaacgym_task_map = { 43 | "SoloParkour": SoloParkour, 44 | } 45 | 46 | def make( 47 | seed: int, 48 | task: str, 49 | num_envs: int, 50 | sim_device: str, 51 | rl_device: str, 52 | graphics_device_id: int = -1, 53 | headless: bool = False, 54 | multi_gpu: bool = False, 55 | virtual_screen_capture: bool = False, 56 | force_render: bool = True, 57 | cfg: DictConfig = None 58 | ): 59 | # from isaacgymenvs.utils.rlgames_utils import get_rlgames_env_creator 60 | # create hydra config if no config passed in 61 | if cfg is None: 62 | # reset current hydra config if already parsed (but not passed in here) 63 | if HydraConfig.initialized(): 64 | task = HydraConfig.get().runtime.choices['task'] 65 | hydra.core.global_hydra.GlobalHydra.instance().clear() 66 | 67 | with initialize(config_path="./cfg"): 68 | cfg = compose(config_name="config", overrides=[f"task={task}"]) 69 | cfg_dict = omegaconf_to_dict(cfg.task) 70 | cfg_dict['env']['numEnvs'] = num_envs 71 | # reuse existing config 72 | else: 73 | cfg_dict = omegaconf_to_dict(cfg.task) 74 | 75 | # Adding custom info to access within the env 76 | cfg_dict['test'] = cfg.test 77 | cfg_dict['horizon_length'] = cfg.train.params.config.horizon_length 78 | cfg_dict['max_epochs'] = cfg.train.params.config.max_epochs 79 | 80 | create_rlgpu_env = get_rlgames_env_creator( 81 | seed=seed, 82 | task_config=cfg_dict, 83 | task_name=cfg_dict["name"], 84 | sim_device=sim_device, 85 | rl_device=rl_device, 86 | graphics_device_id=graphics_device_id, 87 | headless=headless, 88 | multi_gpu=multi_gpu, 89 | virtual_screen_capture=virtual_screen_capture, 90 | force_render=force_render, 91 | ) 92 | return create_rlgpu_env() 93 | 94 | def get_rlgames_env_creator( 95 | # used to create the vec task 96 | seed: int, 97 | task_config: dict, 98 | task_name: str, 99 | sim_device: str, 100 | rl_device: str, 101 | graphics_device_id: int, 102 | headless: bool, 103 | # used to handle multi-gpu case 104 | multi_gpu: bool = False, 105 | post_create_hook: Callable = None, 106 | virtual_screen_capture: bool = False, 107 | force_render: bool = False, 108 | ): 109 | """Parses the configuration parameters for the environment task and creates a VecTask 110 | 111 | Args: 112 | task_config: environment configuration. 113 | task_name: Name of the task, used to evaluate based on the imported name (eg 'Trifinger') 114 | sim_device: The type of env device, eg 'cuda:0' 115 | rl_device: Device that RL will be done on, eg 'cuda:0' 116 | graphics_device_id: Graphics device ID. 117 | headless: Whether to run in headless mode. 118 | multi_gpu: Whether to use multi gpu 119 | post_create_hook: Hooks to be called after environment creation. 120 | [Needed to setup WandB only for one of the RL Games instances when doing multiple GPUs] 121 | virtual_screen_capture: Set to True to allow the users get captured screen in RGB array via `env.render(mode='rgb_array')`. 122 | force_render: Set to True to always force rendering in the steps (if the `control_freq_inv` is greater than 1 we suggest stting this arg to True) 123 | Returns: 124 | A VecTaskPython object. 125 | """ 126 | def create_rlgpu_env(): 127 | """ 128 | Creates the task from configurations and wraps it using RL-games wrappers if required. 129 | """ 130 | if multi_gpu: 131 | 132 | local_rank = int(os.getenv("LOCAL_RANK", "0")) 133 | global_rank = int(os.getenv("RANK", "0")) 134 | 135 | # local rank of the GPU in a node 136 | local_rank = int(os.getenv("LOCAL_RANK", "0")) 137 | # global rank of the GPU 138 | global_rank = int(os.getenv("RANK", "0")) 139 | # total number of GPUs across all nodes 140 | world_size = int(os.getenv("WORLD_SIZE", "1")) 141 | 142 | print(f"global_rank = {global_rank} local_rank = {local_rank} world_size = {world_size}") 143 | 144 | _sim_device = f'cuda:{local_rank}' 145 | _rl_device = f'cuda:{local_rank}' 146 | 147 | task_config['rank'] = local_rank 148 | task_config['rl_device'] = _rl_device 149 | else: 150 | _sim_device = sim_device 151 | _rl_device = rl_device 152 | 153 | # create native task and pass custom config 154 | env = isaacgym_task_map[task_name]( 155 | cfg=task_config, 156 | rl_device=_rl_device, 157 | sim_device=_sim_device, 158 | graphics_device_id=graphics_device_id, 159 | headless=headless, 160 | virtual_screen_capture=virtual_screen_capture, 161 | force_render=force_render, 162 | ) 163 | 164 | if post_create_hook is not None: 165 | post_create_hook() 166 | 167 | return env 168 | return create_rlgpu_env 169 | -------------------------------------------------------------------------------- /solo12/meshes/sphere.obj: -------------------------------------------------------------------------------- 1 | o Sphere 2 | v 0 1 0 3 | v 0 1 0 4 | v 0 1 0 5 | v 0 1 0 6 | v 0 1 0 7 | v 0 1 0 8 | v 0 1 0 9 | v 0 1 0 10 | v 0 1 0 11 | v -0.5 0.8660253882408142 0 12 | v -0.3535533845424652 0.8660253882408142 0.3535533845424652 13 | v -3.0616171314629196e-17 0.8660253882408142 0.5 14 | v 0.3535533845424652 0.8660253882408142 0.3535533845424652 15 | v 0.5 0.8660253882408142 6.123234262925839e-17 16 | v 0.3535533845424652 0.8660253882408142 -0.3535533845424652 17 | v 9.184850732644269e-17 0.8660253882408142 -0.5 18 | v -0.3535533845424652 0.8660253882408142 -0.3535533845424652 19 | v -0.5 0.8660253882408142 -1.2246468525851679e-16 20 | v -0.8660253882408142 0.5 0 21 | v -0.6123724579811096 0.5 0.6123724579811096 22 | v -5.302876236065149e-17 0.5 0.8660253882408142 23 | v 0.6123724579811096 0.5 0.6123724579811096 24 | v 0.8660253882408142 0.5 1.0605752472130298e-16 25 | v 0.6123724579811096 0.5 -0.6123724579811096 26 | v 1.5908628708195447e-16 0.5 -0.8660253882408142 27 | v -0.6123724579811096 0.5 -0.6123724579811096 28 | v -0.8660253882408142 0.5 -2.1211504944260596e-16 29 | v -1 6.123234262925839e-17 0 30 | v -0.7071067690849304 6.123234262925839e-17 0.7071067690849304 31 | v -6.123234262925839e-17 6.123234262925839e-17 1 32 | v 0.7071067690849304 6.123234262925839e-17 0.7071067690849304 33 | v 1 6.123234262925839e-17 1.2246468525851679e-16 34 | v 0.7071067690849304 6.123234262925839e-17 -0.7071067690849304 35 | v 1.8369701465288538e-16 6.123234262925839e-17 -1 36 | v -0.7071067690849304 6.123234262925839e-17 -0.7071067690849304 37 | v -1 6.123234262925839e-17 -2.4492937051703357e-16 38 | v -0.8660253882408142 -0.5 0 39 | v -0.6123724579811096 -0.5 0.6123724579811096 40 | v -5.302876236065149e-17 -0.5 0.8660253882408142 41 | v 0.6123724579811096 -0.5 0.6123724579811096 42 | v 0.8660253882408142 -0.5 1.0605752472130298e-16 43 | v 0.6123724579811096 -0.5 -0.6123724579811096 44 | v 1.5908628708195447e-16 -0.5 -0.8660253882408142 45 | v -0.6123724579811096 -0.5 -0.6123724579811096 46 | v -0.8660253882408142 -0.5 -2.1211504944260596e-16 47 | v -0.5 -0.8660253882408142 0 48 | v -0.3535533845424652 -0.8660253882408142 0.3535533845424652 49 | v -3.0616171314629196e-17 -0.8660253882408142 0.5 50 | v 0.3535533845424652 -0.8660253882408142 0.3535533845424652 51 | v 0.5 -0.8660253882408142 6.123234262925839e-17 52 | v 0.3535533845424652 -0.8660253882408142 -0.3535533845424652 53 | v 9.184850732644269e-17 -0.8660253882408142 -0.5 54 | v -0.3535533845424652 -0.8660253882408142 -0.3535533845424652 55 | v -0.5 -0.8660253882408142 -1.2246468525851679e-16 56 | v -1.2246468525851679e-16 -1 0 57 | v -8.659560603426554e-17 -1 8.659560603426554e-17 58 | v -7.498798786105971e-33 -1 1.2246468525851679e-16 59 | v 8.659560603426554e-17 -1 8.659560603426554e-17 60 | v 1.2246468525851679e-16 -1 1.4997597572211942e-32 61 | v 8.659560603426554e-17 -1 -8.659560603426554e-17 62 | v 2.2496396358317913e-32 -1 -1.2246468525851679e-16 63 | v -8.659560603426554e-17 -1 -8.659560603426554e-17 64 | v -1.2246468525851679e-16 -1 -2.9995195144423884e-32 65 | vt 0 1 66 | vt 0.125 1 67 | vt 0.25 1 68 | vt 0.375 1 69 | vt 0.5 1 70 | vt 0.625 1 71 | vt 0.75 1 72 | vt 0.875 1 73 | vt 1 1 74 | vt 0 0.8333333134651184 75 | vt 0.125 0.8333333134651184 76 | vt 0.25 0.8333333134651184 77 | vt 0.375 0.8333333134651184 78 | vt 0.5 0.8333333134651184 79 | vt 0.625 0.8333333134651184 80 | vt 0.75 0.8333333134651184 81 | vt 0.875 0.8333333134651184 82 | vt 1 0.8333333134651184 83 | vt 0 0.6666666865348816 84 | vt 0.125 0.6666666865348816 85 | vt 0.25 0.6666666865348816 86 | vt 0.375 0.6666666865348816 87 | vt 0.5 0.6666666865348816 88 | vt 0.625 0.6666666865348816 89 | vt 0.75 0.6666666865348816 90 | vt 0.875 0.6666666865348816 91 | vt 1 0.6666666865348816 92 | vt 0 0.5 93 | vt 0.125 0.5 94 | vt 0.25 0.5 95 | vt 0.375 0.5 96 | vt 0.5 0.5 97 | vt 0.625 0.5 98 | vt 0.75 0.5 99 | vt 0.875 0.5 100 | vt 1 0.5 101 | vt 0 0.3333333432674408 102 | vt 0.125 0.3333333432674408 103 | vt 0.25 0.3333333432674408 104 | vt 0.375 0.3333333432674408 105 | vt 0.5 0.3333333432674408 106 | vt 0.625 0.3333333432674408 107 | vt 0.75 0.3333333432674408 108 | vt 0.875 0.3333333432674408 109 | vt 1 0.3333333432674408 110 | vt 0 0.1666666716337204 111 | vt 0.125 0.1666666716337204 112 | vt 0.25 0.1666666716337204 113 | vt 0.375 0.1666666716337204 114 | vt 0.5 0.1666666716337204 115 | vt 0.625 0.1666666716337204 116 | vt 0.75 0.1666666716337204 117 | vt 0.875 0.1666666716337204 118 | vt 1 0.1666666716337204 119 | vt 0 0 120 | vt 0.125 0 121 | vt 0.25 0 122 | vt 0.375 0 123 | vt 0.5 0 124 | vt 0.625 0 125 | vt 0.75 0 126 | vt 0.875 0 127 | vt 1 0 128 | vn 0 1 0 129 | vn 0 1 0 130 | vn 0 1 0 131 | vn 0 1 0 132 | vn 0 1 0 133 | vn 0 1 0 134 | vn 0 1 0 135 | vn 0 1 0 136 | vn 0 1 0 137 | vn -0.5 0.8660253882408142 0 138 | vn -0.3535533845424652 0.8660253882408142 0.3535533845424652 139 | vn -3.0616171314629196e-17 0.8660253882408142 0.5 140 | vn 0.3535533845424652 0.8660253882408142 0.3535533845424652 141 | vn 0.5 0.8660253882408142 6.123234262925839e-17 142 | vn 0.3535533845424652 0.8660253882408142 -0.3535533845424652 143 | vn 9.184850732644269e-17 0.8660253882408142 -0.5 144 | vn -0.3535533845424652 0.8660253882408142 -0.3535533845424652 145 | vn -0.5 0.8660253882408142 -1.2246468525851679e-16 146 | vn -0.8660253882408142 0.5 0 147 | vn -0.6123724579811096 0.5 0.6123724579811096 148 | vn -5.302876236065149e-17 0.5 0.8660253882408142 149 | vn 0.6123724579811096 0.5 0.6123724579811096 150 | vn 0.8660253882408142 0.5 1.0605752472130298e-16 151 | vn 0.6123724579811096 0.5 -0.6123724579811096 152 | vn 1.5908628708195447e-16 0.5 -0.8660253882408142 153 | vn -0.6123724579811096 0.5 -0.6123724579811096 154 | vn -0.8660253882408142 0.5 -2.1211504944260596e-16 155 | vn -1 6.123234262925839e-17 0 156 | vn -0.7071067690849304 6.123234262925839e-17 0.7071067690849304 157 | vn -6.123234262925839e-17 6.123234262925839e-17 1 158 | vn 0.7071067690849304 6.123234262925839e-17 0.7071067690849304 159 | vn 1 6.123234262925839e-17 1.2246468525851679e-16 160 | vn 0.7071067690849304 6.123234262925839e-17 -0.7071067690849304 161 | vn 1.8369701465288538e-16 6.123234262925839e-17 -1 162 | vn -0.7071067690849304 6.123234262925839e-17 -0.7071067690849304 163 | vn -1 6.123234262925839e-17 -2.4492937051703357e-16 164 | vn -0.8660253882408142 -0.5 0 165 | vn -0.6123724579811096 -0.5 0.6123724579811096 166 | vn -5.302876236065149e-17 -0.5 0.8660253882408142 167 | vn 0.6123724579811096 -0.5 0.6123724579811096 168 | vn 0.8660253882408142 -0.5 1.0605752472130298e-16 169 | vn 0.6123724579811096 -0.5 -0.6123724579811096 170 | vn 1.5908628708195447e-16 -0.5 -0.8660253882408142 171 | vn -0.6123724579811096 -0.5 -0.6123724579811096 172 | vn -0.8660253882408142 -0.5 -2.1211504944260596e-16 173 | vn -0.5 -0.8660253882408142 0 174 | vn -0.3535533845424652 -0.8660253882408142 0.3535533845424652 175 | vn -3.0616171314629196e-17 -0.8660253882408142 0.5 176 | vn 0.3535533845424652 -0.8660253882408142 0.3535533845424652 177 | vn 0.5 -0.8660253882408142 6.123234262925839e-17 178 | vn 0.3535533845424652 -0.8660253882408142 -0.3535533845424652 179 | vn 9.184850732644269e-17 -0.8660253882408142 -0.5 180 | vn -0.3535533845424652 -0.8660253882408142 -0.3535533845424652 181 | vn -0.5 -0.8660253882408142 -1.2246468525851679e-16 182 | vn -1.2246468525851679e-16 -1 0 183 | vn -8.659560603426554e-17 -1 8.659560603426554e-17 184 | vn -7.498798786105971e-33 -1 1.2246468525851679e-16 185 | vn 8.659560603426554e-17 -1 8.659560603426554e-17 186 | vn 1.2246468525851679e-16 -1 1.4997597572211942e-32 187 | vn 8.659560603426554e-17 -1 -8.659560603426554e-17 188 | vn 2.2496396358317913e-32 -1 -1.2246468525851679e-16 189 | vn -8.659560603426554e-17 -1 -8.659560603426554e-17 190 | vn -1.2246468525851679e-16 -1 -2.9995195144423884e-32 191 | f 1/1/1 10/10/10 11/11/11 192 | f 2/2/2 11/11/11 12/12/12 193 | f 3/3/3 12/12/12 13/13/13 194 | f 4/4/4 13/13/13 14/14/14 195 | f 5/5/5 14/14/14 15/15/15 196 | f 6/6/6 15/15/15 16/16/16 197 | f 7/7/7 16/16/16 17/17/17 198 | f 8/8/8 17/17/17 18/18/18 199 | f 11/11/11 10/10/10 20/20/20 200 | f 10/10/10 19/19/19 20/20/20 201 | f 12/12/12 11/11/11 21/21/21 202 | f 11/11/11 20/20/20 21/21/21 203 | f 13/13/13 12/12/12 22/22/22 204 | f 12/12/12 21/21/21 22/22/22 205 | f 14/14/14 13/13/13 23/23/23 206 | f 13/13/13 22/22/22 23/23/23 207 | f 15/15/15 14/14/14 24/24/24 208 | f 14/14/14 23/23/23 24/24/24 209 | f 16/16/16 15/15/15 25/25/25 210 | f 15/15/15 24/24/24 25/25/25 211 | f 17/17/17 16/16/16 26/26/26 212 | f 16/16/16 25/25/25 26/26/26 213 | f 18/18/18 17/17/17 27/27/27 214 | f 17/17/17 26/26/26 27/27/27 215 | f 20/20/20 19/19/19 29/29/29 216 | f 19/19/19 28/28/28 29/29/29 217 | f 21/21/21 20/20/20 30/30/30 218 | f 20/20/20 29/29/29 30/30/30 219 | f 22/22/22 21/21/21 31/31/31 220 | f 21/21/21 30/30/30 31/31/31 221 | f 23/23/23 22/22/22 32/32/32 222 | f 22/22/22 31/31/31 32/32/32 223 | f 24/24/24 23/23/23 33/33/33 224 | f 23/23/23 32/32/32 33/33/33 225 | f 25/25/25 24/24/24 34/34/34 226 | f 24/24/24 33/33/33 34/34/34 227 | f 26/26/26 25/25/25 35/35/35 228 | f 25/25/25 34/34/34 35/35/35 229 | f 27/27/27 26/26/26 36/36/36 230 | f 26/26/26 35/35/35 36/36/36 231 | f 29/29/29 28/28/28 38/38/38 232 | f 28/28/28 37/37/37 38/38/38 233 | f 30/30/30 29/29/29 39/39/39 234 | f 29/29/29 38/38/38 39/39/39 235 | f 31/31/31 30/30/30 40/40/40 236 | f 30/30/30 39/39/39 40/40/40 237 | f 32/32/32 31/31/31 41/41/41 238 | f 31/31/31 40/40/40 41/41/41 239 | f 33/33/33 32/32/32 42/42/42 240 | f 32/32/32 41/41/41 42/42/42 241 | f 34/34/34 33/33/33 43/43/43 242 | f 33/33/33 42/42/42 43/43/43 243 | f 35/35/35 34/34/34 44/44/44 244 | f 34/34/34 43/43/43 44/44/44 245 | f 36/36/36 35/35/35 45/45/45 246 | f 35/35/35 44/44/44 45/45/45 247 | f 38/38/38 37/37/37 47/47/47 248 | f 37/37/37 46/46/46 47/47/47 249 | f 39/39/39 38/38/38 48/48/48 250 | f 38/38/38 47/47/47 48/48/48 251 | f 40/40/40 39/39/39 49/49/49 252 | f 39/39/39 48/48/48 49/49/49 253 | f 41/41/41 40/40/40 50/50/50 254 | f 40/40/40 49/49/49 50/50/50 255 | f 42/42/42 41/41/41 51/51/51 256 | f 41/41/41 50/50/50 51/51/51 257 | f 43/43/43 42/42/42 52/52/52 258 | f 42/42/42 51/51/51 52/52/52 259 | f 44/44/44 43/43/43 53/53/53 260 | f 43/43/43 52/52/52 53/53/53 261 | f 45/45/45 44/44/44 54/54/54 262 | f 44/44/44 53/53/53 54/54/54 263 | f 47/47/47 46/46/46 56/56/56 264 | f 48/48/48 47/47/47 57/57/57 265 | f 49/49/49 48/48/48 58/58/58 266 | f 50/50/50 49/49/49 59/59/59 267 | f 51/51/51 50/50/50 60/60/60 268 | f 52/52/52 51/51/51 61/61/61 269 | f 53/53/53 52/52/52 62/62/62 270 | f 54/54/54 53/53/53 63/63/63 271 | -------------------------------------------------------------------------------- /algos/PPO.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | import os 4 | import random 5 | import time 6 | from datetime import datetime 7 | 8 | from omegaconf import DictConfig, OmegaConf 9 | 10 | import numpy as np 11 | 12 | import torch 13 | import torch.nn as nn 14 | import torch.optim as optim 15 | from torch.distributions.normal import Normal 16 | from torch.utils.tensorboard import SummaryWriter 17 | 18 | class RunningMeanStd(nn.Module): 19 | def __init__(self, shape = (), epsilon=1e-08): 20 | super(RunningMeanStd, self).__init__() 21 | self.register_buffer("running_mean", torch.zeros(shape)) 22 | self.register_buffer("running_var", torch.ones(shape)) 23 | self.register_buffer("count", torch.ones(())) 24 | 25 | self.epsilon = epsilon 26 | 27 | def forward(self, obs, update = True): 28 | if update: 29 | self.update(obs) 30 | 31 | return (obs - self.running_mean) / torch.sqrt(self.running_var + self.epsilon) 32 | 33 | def update(self, x): 34 | """Updates the mean, var and count from a batch of samples.""" 35 | batch_mean = torch.mean(x, dim=0) 36 | batch_var = torch.var(x, correction=0, dim=0) 37 | batch_count = x.shape[0] 38 | self.update_from_moments(batch_mean, batch_var, batch_count) 39 | 40 | def update_from_moments(self, batch_mean, batch_var, batch_count): 41 | """Updates from batch mean, variance and count moments.""" 42 | self.running_mean, self.running_var, self.count = update_mean_var_count_from_moments( 43 | self.running_mean, self.running_var, self.count, batch_mean, batch_var, batch_count 44 | ) 45 | 46 | def update_mean_var_count_from_moments( 47 | mean, var, count, batch_mean, batch_var, batch_count 48 | ): 49 | """Updates the mean, var and count using the previous mean, var, count and batch values.""" 50 | delta = batch_mean - mean 51 | tot_count = count + batch_count 52 | 53 | new_mean = mean + delta * batch_count / tot_count 54 | m_a = var * count 55 | m_b = batch_var * batch_count 56 | M2 = m_a + m_b + torch.square(delta) * count * batch_count / tot_count 57 | new_var = M2 / tot_count 58 | new_count = tot_count 59 | 60 | return new_mean, new_var, new_count 61 | 62 | def layer_init(layer, std=np.sqrt(2), bias_const=0.0): 63 | torch.nn.init.orthogonal_(layer.weight, std) 64 | torch.nn.init.constant_(layer.bias, bias_const) 65 | return layer 66 | 67 | class Agent(nn.Module): 68 | def __init__(self, envs): 69 | super().__init__() 70 | self.critic = nn.Sequential( 71 | layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 512)), 72 | nn.ELU(), 73 | layer_init(nn.Linear(512, 256)), 74 | nn.ELU(), 75 | layer_init(nn.Linear(256, 128)), 76 | nn.ELU(), 77 | layer_init(nn.Linear(128, 1), std=1.0), 78 | ) 79 | self.actor_mean = nn.Sequential( 80 | layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 512)), 81 | nn.ELU(), 82 | layer_init(nn.Linear(512, 256)), 83 | nn.ELU(), 84 | layer_init(nn.Linear(256, 128)), 85 | nn.ELU(), 86 | layer_init(nn.Linear(128, np.prod(envs.single_action_space.shape)), std=0.01), 87 | ) 88 | self.actor_logstd = nn.Parameter(torch.zeros(1, np.prod(envs.single_action_space.shape))) 89 | 90 | self.obs_rms = RunningMeanStd(shape = envs.single_observation_space.shape) 91 | self.value_rms = RunningMeanStd(shape = ()) 92 | 93 | def get_value(self, x): 94 | return self.critic(x) 95 | 96 | def get_action_and_value(self, x, action=None): 97 | action_mean = self.actor_mean(x) 98 | action_logstd = self.actor_logstd.expand_as(action_mean) 99 | action_std = torch.exp(action_logstd) 100 | probs = Normal(action_mean, action_std) 101 | if action is None: 102 | action = probs.sample() 103 | return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(x) 104 | 105 | class ExtractObsWrapper(gym.ObservationWrapper): 106 | def observation(self, obs): 107 | return obs["obs"] 108 | 109 | def PPO(cfg: DictConfig, envs): 110 | run_path = f"runs/{cfg['train']['params']['config']['name']}_{datetime.now().strftime('%d-%H-%M-%S')}" 111 | 112 | writer = SummaryWriter(run_path) 113 | if not os.path.exists(run_path): 114 | os.makedirs(run_path) 115 | OmegaConf.save(config = cfg, f = f"{run_path}/config.yaml") 116 | 117 | LEARNING_RATE = cfg["train"]["params"]["config"]["learning_rate"] 118 | NUM_STEPS = cfg["train"]["params"]["config"]["horizon_length"] 119 | NUM_ITERATIONS = cfg["train"]["params"]["config"]["max_epochs"] 120 | GAMMA = cfg["train"]["params"]["config"]["gamma"] 121 | GAE_LAMBDA = cfg["train"]["params"]["config"]["tau"] 122 | UPDATES_EPOCHS = cfg["train"]["params"]["config"]["mini_epochs"] 123 | MINIBATCH_SIZE = cfg["train"]["params"]["config"]["minibatch_size"] 124 | CLIP_COEF = cfg["train"]["params"]["config"]["e_clip"] 125 | ENT_COEF = cfg["train"]["params"]["config"]["entropy_coef"] 126 | VF_COEF = cfg["train"]["params"]["config"]["critic_coef"] 127 | MAX_GRAD_NORM = cfg["train"]["params"]["config"]["grad_norm"] 128 | NORM_ADV = cfg["train"]["params"]["config"]["normalize_advantage"] 129 | CLIP_VLOSS = cfg["train"]["params"]["config"]["clip_value"] 130 | ANNEAL_LR = True 131 | 132 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 133 | 134 | envs.single_action_space = envs.action_space 135 | envs.single_observation_space = envs.observation_space 136 | 137 | envs = ExtractObsWrapper(envs) 138 | 139 | BATCH_SIZE = int(envs.num_envs * NUM_STEPS) 140 | 141 | agent = Agent(envs).to(device) 142 | optimizer = optim.Adam(agent.parameters(), lr=LEARNING_RATE, eps=1e-5) 143 | 144 | obs = torch.zeros((NUM_STEPS, envs.num_envs) + envs.single_observation_space.shape, dtype=torch.float).to(device) 145 | actions = torch.zeros((NUM_STEPS, envs.num_envs) + envs.single_action_space.shape, dtype=torch.float).to(device) 146 | logprobs = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device) 147 | rewards = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device) 148 | dones = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device) 149 | true_dones = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device) 150 | values = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device) 151 | advantages = torch.zeros_like(rewards, dtype=torch.float).to(device) 152 | 153 | # TRY NOT TO MODIFY: start the game 154 | global_step = 0 155 | start_time = time.time() 156 | next_obs = envs.reset() 157 | next_obs = agent.obs_rms(next_obs) 158 | next_done = torch.zeros(envs.num_envs, dtype=torch.float).to(device) 159 | next_true_done = torch.zeros(envs.num_envs, dtype=torch.float).to(device) 160 | 161 | for iteration in range(1, NUM_ITERATIONS + 1): 162 | if ANNEAL_LR: 163 | frac = 1.0 - (iteration - 1.0) / NUM_ITERATIONS 164 | lrnow = frac * LEARNING_RATE 165 | optimizer.param_groups[0]["lr"] = lrnow 166 | 167 | for step in range(0, NUM_STEPS): 168 | global_step += envs.num_envs 169 | obs[step] = next_obs 170 | dones[step] = next_done 171 | true_dones[step] = next_true_done 172 | 173 | # ALGO LOGIC: action logic 174 | with torch.no_grad(): 175 | action, logprob, _, value = agent.get_action_and_value(next_obs) 176 | values[step] = value.flatten() 177 | actions[step] = action 178 | logprobs[step] = logprob 179 | 180 | # TRY NOT TO MODIFY: execute the game and log data. 181 | next_obs, rewards[step], next_done, info = envs.step(action) 182 | next_obs = agent.obs_rms(next_obs) 183 | next_true_done = info["true_dones"].float() 184 | if "time_outs" in info: 185 | if info["time_outs"].any(): 186 | print("time outs", info["time_outs"].sum()) 187 | exit(0) 188 | 189 | for el, v in zip(list(envs.episode_sums.keys())[:envs.numRewards], (torch.mean(envs.rew_mean_reset, dim=0)).tolist()): 190 | writer.add_scalar(f"reward/{el}", v, iteration) 191 | writer.add_scalar(f"reward/cum_rew", (torch.sum(torch.mean(envs.rew_cum_reset, dim=0))).item(), iteration) 192 | writer.add_scalar(f"reward/avg_rew", envs.terrain_levels.float().mean().item(), iteration) 193 | for el, v in zip(envs.cstr_manager.get_names(), (100.0 * torch.mean(envs.cstr_mean_reset, dim=0)).tolist()): 194 | writer.add_scalar(f"cstr/{el}", v, iteration) 195 | 196 | # CaT: must compute the CaT quantity 197 | not_dones = 1.0 - dones 198 | rewards *= not_dones 199 | 200 | # bootstrap value if not done 201 | with torch.no_grad(): 202 | next_value = agent.get_value(next_obs).reshape(1, -1) 203 | advantages = torch.zeros_like(rewards).to(device) 204 | lastgaelam = 0 205 | for t in reversed(range(NUM_STEPS)): 206 | if t == NUM_STEPS - 1: 207 | nextnonterminal = 1.0 - next_done 208 | true_nextnonterminal = 1 - next_true_done 209 | nextvalues = next_value 210 | else: 211 | nextnonterminal = 1.0 - dones[t + 1] 212 | true_nextnonterminal = 1 - true_dones[t + 1] 213 | nextvalues = values[t + 1] 214 | delta = rewards[t] + GAMMA * nextvalues * nextnonterminal * true_nextnonterminal - values[t] 215 | advantages[t] = lastgaelam = delta + GAMMA * GAE_LAMBDA * nextnonterminal * true_nextnonterminal * lastgaelam 216 | returns = advantages + values 217 | 218 | # flatten the batch 219 | b_obs = obs.reshape((-1,) + envs.single_observation_space.shape) 220 | b_logprobs = logprobs.reshape(-1) 221 | b_actions = actions.reshape((-1,) + envs.single_action_space.shape) 222 | b_advantages = advantages.reshape(-1) 223 | b_returns = returns.reshape(-1) 224 | b_values = values.reshape(-1) 225 | 226 | b_values = agent.value_rms(b_values) 227 | b_returns = agent.value_rms(b_returns) 228 | 229 | # Optimizing the policy and value network 230 | clipfracs = [] 231 | for epoch in range(UPDATES_EPOCHS): 232 | b_inds = torch.randperm(BATCH_SIZE, device=device) 233 | for start in range(0, BATCH_SIZE, MINIBATCH_SIZE): 234 | end = start + MINIBATCH_SIZE 235 | mb_inds = b_inds[start:end] 236 | 237 | _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions[mb_inds]) 238 | logratio = newlogprob - b_logprobs[mb_inds] 239 | ratio = logratio.exp() 240 | 241 | with torch.no_grad(): 242 | # calculate approx_kl http://joschu.net/blog/kl-approx.html 243 | old_approx_kl = (-logratio).mean() 244 | approx_kl = ((ratio - 1) - logratio).mean() 245 | clipfracs += [((ratio - 1.0).abs() > CLIP_COEF).float().mean().item()] 246 | 247 | mb_advantages = b_advantages[mb_inds] 248 | if NORM_ADV: 249 | mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8) 250 | 251 | # Policy loss 252 | pg_loss1 = -mb_advantages * ratio 253 | pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - CLIP_COEF, 1 + CLIP_COEF) 254 | pg_loss = torch.max(pg_loss1, pg_loss2).mean() 255 | 256 | # Value loss 257 | newvalue = newvalue.view(-1) 258 | newvalue = agent.value_rms(newvalue, update = False) 259 | if CLIP_VLOSS: 260 | v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2 261 | v_clipped = b_values[mb_inds] + torch.clamp( 262 | newvalue - b_values[mb_inds], 263 | -CLIP_COEF, 264 | CLIP_COEF, 265 | ) 266 | v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2 267 | v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) 268 | v_loss = 0.5 * v_loss_max.mean() 269 | else: 270 | v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean() 271 | 272 | entropy_loss = entropy.mean() 273 | loss = pg_loss - ENT_COEF * entropy_loss + v_loss * VF_COEF 274 | 275 | optimizer.zero_grad() 276 | loss.backward() 277 | nn.utils.clip_grad_norm_(agent.parameters(), MAX_GRAD_NORM) 278 | optimizer.step() 279 | 280 | if (iteration + 1) % 24 == 0: 281 | model_path = f"{run_path}/cleanrl_model.pt" 282 | torch.save(agent.state_dict(), model_path) 283 | print("Saved model") 284 | 285 | def eval_PPO(cfg: DictConfig, envs): 286 | checkpoint = cfg["checkpoint"] 287 | actor_sd = torch.load(checkpoint) 288 | 289 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 290 | 291 | envs.single_action_space = envs.action_space 292 | envs.single_observation_space = envs.observation_space 293 | 294 | envs = ExtractObsWrapper(envs) 295 | 296 | actor = Agent(envs).to(device) 297 | actor.load_state_dict(actor_sd) 298 | 299 | obs = envs.reset() 300 | 301 | for _ in range(2000): 302 | with torch.no_grad(): 303 | actions, _, _, _ = actor.get_action_and_value(actor.obs_rms(obs, update = False)) 304 | 305 | next_obs, rewards, terminations, infos = envs.step(actions) 306 | obs = next_obs 307 | -------------------------------------------------------------------------------- /solo12/meshes/cylinder.obj: -------------------------------------------------------------------------------- 1 | o Cylinder 2 | v 0 -0.998248581950179 0.5034876052441187 3 | v 0 -1.0017392333654023 -0.4965063024136719 4 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156 5 | v 0 -1.0017392333654023 -0.4965063024136719 6 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749 7 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156 8 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156 9 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749 10 | v 1 0.00174532570761165 0.4999969538288953 11 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749 12 | v 1 -0.0017453257076117724 -0.4999969538288953 13 | v 1 0.00174532570761165 0.4999969538288953 14 | v 1 0.00174532570761165 0.4999969538288953 15 | v 1 -0.0017453257076117724 -0.4999969538288953 16 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749 17 | v 1 -0.0017453257076117724 -0.4999969538288953 18 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156 19 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749 20 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749 21 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156 22 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719 23 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156 24 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187 25 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719 26 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719 27 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187 28 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749 29 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187 30 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156 31 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749 32 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749 33 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156 34 | v -1 0.001745325707611895 0.4999969538288953 35 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156 36 | v -1 -0.0017453257076115276 -0.4999969538288953 37 | v -1 0.001745325707611895 0.4999969538288953 38 | v -1 0.001745325707611895 0.4999969538288953 39 | v -1 -0.0017453257076115276 -0.4999969538288953 40 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156 41 | v -1 -0.0017453257076115276 -0.4999969538288953 42 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749 43 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156 44 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156 45 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749 46 | v -2.4492937051703357e-16 -0.998248581950179 0.5034876052441187 47 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749 48 | v -2.4492937051703357e-16 -1.0017392333654023 -0.4965063024136719 49 | v -2.4492937051703357e-16 -0.998248581950179 0.5034876052441187 50 | v 0 -0.998248581950179 0.5034876052441187 51 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156 52 | v 0 0.0017453257076117112 0.4999969538288953 53 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156 54 | v 1 0.00174532570761165 0.4999969538288953 55 | v 0 0.0017453257076117112 0.4999969538288953 56 | v 1 0.00174532570761165 0.4999969538288953 57 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749 58 | v 0 0.0017453257076117112 0.4999969538288953 59 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749 60 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719 61 | v 0 0.0017453257076117112 0.4999969538288953 62 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719 63 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749 64 | v 0 0.0017453257076117112 0.4999969538288953 65 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749 66 | v -1 0.001745325707611895 0.4999969538288953 67 | v 0 0.0017453257076117112 0.4999969538288953 68 | v -1 0.001745325707611895 0.4999969538288953 69 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156 70 | v 0 0.0017453257076117112 0.4999969538288953 71 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156 72 | v -2.4492937051703357e-16 -0.998248581950179 0.5034876052441187 73 | v 0 0.0017453257076117112 0.4999969538288953 74 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749 75 | v 0 -1.0017392333654023 -0.4965063024136719 76 | v 0 -0.0017453257076117112 -0.4999969538288953 77 | v 1 -0.0017453257076117724 -0.4999969538288953 78 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749 79 | v 0 -0.0017453257076117112 -0.4999969538288953 80 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156 81 | v 1 -0.0017453257076117724 -0.4999969538288953 82 | v 0 -0.0017453257076117112 -0.4999969538288953 83 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187 84 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156 85 | v 0 -0.0017453257076117112 -0.4999969538288953 86 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156 87 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187 88 | v 0 -0.0017453257076117112 -0.4999969538288953 89 | v -1 -0.0017453257076115276 -0.4999969538288953 90 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156 91 | v 0 -0.0017453257076117112 -0.4999969538288953 92 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749 93 | v -1 -0.0017453257076115276 -0.4999969538288953 94 | v 0 -0.0017453257076117112 -0.4999969538288953 95 | v -2.4492937051703357e-16 -1.0017392333654023 -0.4965063024136719 96 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749 97 | v 0 -0.0017453257076117112 -0.4999969538288953 98 | vt 0 1 99 | vt 0 0 100 | vt 0.125 1 101 | vt 0 0 102 | vt 0.125 0 103 | vt 0.125 1 104 | vt 0.125 1 105 | vt 0.125 0 106 | vt 0.25 1 107 | vt 0.125 0 108 | vt 0.25 0 109 | vt 0.25 1 110 | vt 0.25 1 111 | vt 0.25 0 112 | vt 0.375 1 113 | vt 0.25 0 114 | vt 0.375 0 115 | vt 0.375 1 116 | vt 0.375 1 117 | vt 0.375 0 118 | vt 0.5 1 119 | vt 0.375 0 120 | vt 0.5 0 121 | vt 0.5 1 122 | vt 0.5 1 123 | vt 0.5 0 124 | vt 0.625 1 125 | vt 0.5 0 126 | vt 0.625 0 127 | vt 0.625 1 128 | vt 0.625 1 129 | vt 0.625 0 130 | vt 0.75 1 131 | vt 0.625 0 132 | vt 0.75 0 133 | vt 0.75 1 134 | vt 0.75 1 135 | vt 0.75 0 136 | vt 0.875 1 137 | vt 0.75 0 138 | vt 0.875 0 139 | vt 0.875 1 140 | vt 0.875 1 141 | vt 0.875 0 142 | vt 1 1 143 | vt 0.875 0 144 | vt 1 0 145 | vt 1 1 146 | vt 1 0.5 147 | vt 0.8535534143447876 0.8535534143447876 148 | vt 0.5 0.5 149 | vt 0.8535534143447876 0.8535534143447876 150 | vt 0.5 1 151 | vt 0.5 0.5 152 | vt 0.5 1 153 | vt 0.1464466154575348 0.8535534143447876 154 | vt 0.5 0.5 155 | vt 0.1464466154575348 0.8535534143447876 156 | vt 0 0.5 157 | vt 0.5 0.5 158 | vt 0 0.5 159 | vt 0.1464466154575348 0.1464466154575348 160 | vt 0.5 0.5 161 | vt 0.1464466154575348 0.1464466154575348 162 | vt 0.5 0 163 | vt 0.5 0.5 164 | vt 0.5 0 165 | vt 0.8535534143447876 0.1464466154575348 166 | vt 0.5 0.5 167 | vt 0.8535534143447876 0.1464466154575348 168 | vt 1 0.5 169 | vt 0.5 0.5 170 | vt 0.8535534143447876 0.1464466154575348 171 | vt 1 0.5 172 | vt 0.5 0.5 173 | vt 0.5 0 174 | vt 0.8535534143447876 0.1464466154575348 175 | vt 0.5 0.5 176 | vt 0.1464466154575348 0.1464466154575348 177 | vt 0.5 0 178 | vt 0.5 0.5 179 | vt 0 0.5 180 | vt 0.1464466154575348 0.1464466154575348 181 | vt 0.5 0.5 182 | vt 0.1464466154575348 0.8535534143447876 183 | vt 0 0.5 184 | vt 0.5 0.5 185 | vt 0.5 1 186 | vt 0.1464466154575348 0.8535534143447876 187 | vt 0.5 0.5 188 | vt 0.8535534143447876 0.8535534143447876 189 | vt 0.5 1 190 | vt 0.5 0.5 191 | vt 1 0.5 192 | vt 0.8535534143447876 0.8535534143447876 193 | vt 0.5 0.5 194 | vn 0 -0.9999939076577902 0.0034906514152234207 195 | vn 0 -0.9999939076577902 0.0034906514152234207 196 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373 197 | vn 0 -0.9999939076577902 0.0034906514152234207 198 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373 199 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373 200 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373 201 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373 202 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19 203 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373 204 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19 205 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19 206 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19 207 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19 208 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373 209 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19 210 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373 211 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373 212 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373 213 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373 214 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207 215 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373 216 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207 217 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207 218 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207 219 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207 220 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373 221 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207 222 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373 223 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373 224 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373 225 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373 226 | vn -1 1.836958955078092e-16 -6.412222441704118e-19 227 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373 228 | vn -1 1.836958955078092e-16 -6.412222441704118e-19 229 | vn -1 1.836958955078092e-16 -6.412222441704118e-19 230 | vn -1 1.836958955078092e-16 -6.412222441704118e-19 231 | vn -1 1.836958955078092e-16 -6.412222441704118e-19 232 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373 233 | vn -1 1.836958955078092e-16 -6.412222441704118e-19 234 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373 235 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373 236 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373 237 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373 238 | vn -2.4492937051703357e-16 -0.9999939076577902 0.0034906514152234207 239 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373 240 | vn -2.4492937051703357e-16 -0.9999939076577902 0.0034906514152234207 241 | vn -2.4492937051703357e-16 -0.9999939076577902 0.0034906514152234207 242 | vn 0 0.0034906514152234207 0.9999939076577902 243 | vn 0 0.0034906514152234207 0.9999939076577902 244 | vn 0 0.0034906514152234207 0.9999939076577902 245 | vn 0 0.0034906514152234207 0.9999939076577902 246 | vn 0 0.0034906514152234207 0.9999939076577902 247 | vn 0 0.0034906514152234207 0.9999939076577902 248 | vn 0 0.0034906514152234207 0.9999939076577902 249 | vn 0 0.0034906514152234207 0.9999939076577902 250 | vn 0 0.0034906514152234207 0.9999939076577902 251 | vn 0 0.0034906514152234207 0.9999939076577902 252 | vn 0 0.0034906514152234207 0.9999939076577902 253 | vn 0 0.0034906514152234207 0.9999939076577902 254 | vn 0 0.0034906514152234207 0.9999939076577902 255 | vn 0 0.0034906514152234207 0.9999939076577902 256 | vn 0 0.0034906514152234207 0.9999939076577902 257 | vn 0 0.0034906514152234207 0.9999939076577902 258 | vn 0 0.0034906514152234207 0.9999939076577902 259 | vn 0 0.0034906514152234207 0.9999939076577902 260 | vn 0 0.0034906514152234207 0.9999939076577902 261 | vn 0 0.0034906514152234207 0.9999939076577902 262 | vn 0 0.0034906514152234207 0.9999939076577902 263 | vn 0 0.0034906514152234207 0.9999939076577902 264 | vn 0 0.0034906514152234207 0.9999939076577902 265 | vn 0 0.0034906514152234207 0.9999939076577902 266 | vn 0 -0.0034906514152234207 -0.9999939076577902 267 | vn 0 -0.0034906514152234207 -0.9999939076577902 268 | vn 0 -0.0034906514152234207 -0.9999939076577902 269 | vn 0 -0.0034906514152234207 -0.9999939076577902 270 | vn 0 -0.0034906514152234207 -0.9999939076577902 271 | vn 0 -0.0034906514152234207 -0.9999939076577902 272 | vn 0 -0.0034906514152234207 -0.9999939076577902 273 | vn 0 -0.0034906514152234207 -0.9999939076577902 274 | vn 0 -0.0034906514152234207 -0.9999939076577902 275 | vn 0 -0.0034906514152234207 -0.9999939076577902 276 | vn 0 -0.0034906514152234207 -0.9999939076577902 277 | vn 0 -0.0034906514152234207 -0.9999939076577902 278 | vn 0 -0.0034906514152234207 -0.9999939076577902 279 | vn 0 -0.0034906514152234207 -0.9999939076577902 280 | vn 0 -0.0034906514152234207 -0.9999939076577902 281 | vn 0 -0.0034906514152234207 -0.9999939076577902 282 | vn 0 -0.0034906514152234207 -0.9999939076577902 283 | vn 0 -0.0034906514152234207 -0.9999939076577902 284 | vn 0 -0.0034906514152234207 -0.9999939076577902 285 | vn 0 -0.0034906514152234207 -0.9999939076577902 286 | vn 0 -0.0034906514152234207 -0.9999939076577902 287 | vn 0 -0.0034906514152234207 -0.9999939076577902 288 | vn 0 -0.0034906514152234207 -0.9999939076577902 289 | vn 0 -0.0034906514152234207 -0.9999939076577902 290 | f 1/1/1 2/2/2 3/3/3 291 | f 4/4/4 5/5/5 6/6/6 292 | f 7/7/7 8/8/8 9/9/9 293 | f 10/10/10 11/11/11 12/12/12 294 | f 13/13/13 14/14/14 15/15/15 295 | f 16/16/16 17/17/17 18/18/18 296 | f 19/19/19 20/20/20 21/21/21 297 | f 22/22/22 23/23/23 24/24/24 298 | f 25/25/25 26/26/26 27/27/27 299 | f 28/28/28 29/29/29 30/30/30 300 | f 31/31/31 32/32/32 33/33/33 301 | f 34/34/34 35/35/35 36/36/36 302 | f 37/37/37 38/38/38 39/39/39 303 | f 40/40/40 41/41/41 42/42/42 304 | f 43/43/43 44/44/44 45/45/45 305 | f 46/46/46 47/47/47 48/48/48 306 | f 49/49/49 50/50/50 51/51/51 307 | f 52/52/52 53/53/53 54/54/54 308 | f 55/55/55 56/56/56 57/57/57 309 | f 58/58/58 59/59/59 60/60/60 310 | f 61/61/61 62/62/62 63/63/63 311 | f 64/64/64 65/65/65 66/66/66 312 | f 67/67/67 68/68/68 69/69/69 313 | f 70/70/70 71/71/71 72/72/72 314 | f 73/73/73 74/74/74 75/75/75 315 | f 76/76/76 77/77/77 78/78/78 316 | f 79/79/79 80/80/80 81/81/81 317 | f 82/82/82 83/83/83 84/84/84 318 | f 85/85/85 86/86/86 87/87/87 319 | f 88/88/88 89/89/89 90/90/90 320 | f 91/91/91 92/92/92 93/93/93 321 | f 94/94/94 95/95/95 96/96/96 322 | -------------------------------------------------------------------------------- /tasks/terrainParkour.py: -------------------------------------------------------------------------------- 1 | #################### 2 | # Terrain generator 3 | #################### 4 | import numpy as np 5 | 6 | from isaacgym.terrain_utils import * 7 | 8 | class Terrain: 9 | """Class to handle terrain creation, usually a bunch of subterrains surrounding by a flat border area. 10 | 11 | Subterrains are spread on a given number of columns and rows. Each column might correspond to a given 12 | type of subterrains (slope, stairs, ...) and each row to a given difficulty (from easiest to hardest). 13 | """ 14 | 15 | def __init__(self, cfg, num_robots) -> None: 16 | 17 | self.type = cfg["terrainType"] 18 | if self.type in ["none", 'plane']: 19 | return 20 | 21 | # Create subterrains on the fly based on Isaac subterrain primitives 22 | 23 | # Retrieving proportions of each kind of subterrains 24 | keys = list(cfg["terrainProportions"].keys()) 25 | vals = list(cfg["terrainProportions"].values()) 26 | self.terrain_keys = [] 27 | self.terrain_proportions = [] 28 | sum = 0.0 29 | for key, val in zip(keys, vals): 30 | if val != 0.0: 31 | sum += float(val) 32 | self.terrain_keys.append(key) 33 | self.terrain_proportions.append(np.round(sum, 2)) 34 | 35 | self.horizontal_scale = 0.05 # Resolution of the terrain height map 36 | self.border_size = 8.0 # Size of the flat border area all around the terrains 37 | self.env_length = cfg["mapLength"] # Length of subterrains 38 | self.env_width = cfg["mapWidth"] # Width of subterrains 39 | self.env_rows = cfg["numLevels"] 40 | self.env_cols = cfg["numTerrains"] 41 | self.num_maps = self.env_rows * self.env_cols 42 | self.num_per_env = int(num_robots / self.num_maps) 43 | self.env_origins = np.zeros((self.env_rows, self.env_cols, 3)) 44 | 45 | # Number of height map cells for each subterrain in width and length 46 | self.width_per_env_pixels = int(self.env_width / self.horizontal_scale) 47 | self.length_per_env_pixels = int(self.env_length / self.horizontal_scale) 48 | 49 | self.border = int(self.border_size / self.horizontal_scale) 50 | self.tot_cols = int(self.env_cols * self.width_per_env_pixels) + 2 * self.border 51 | self.tot_rows = int(self.env_rows * self.length_per_env_pixels) + 2 * self.border 52 | self.height_field_raw = np.zeros((self.tot_rows , self.tot_cols), dtype=np.float32) 53 | self.ceilings = np.zeros((self.tot_rows , self.tot_cols), dtype=np.float32) 54 | 55 | self.boxes = [] 56 | self.vertical_scale = 0.005 57 | if cfg["curriculum"]: 58 | self.curiculum(num_robots, num_terrains=self.env_cols, num_levels=self.env_rows) 59 | else: 60 | self.curiculum(num_robots, num_terrains=self.env_cols, num_levels=self.env_rows) 61 | #self.randomized_terrain() 62 | self.vertices, self.triangles = convert_heightfield_to_trimesh(self.height_field_raw, self.horizontal_scale, self.vertical_scale, cfg["slopeTreshold"]) 63 | if len(self.boxes) > 0: 64 | for box in self.boxes: 65 | self.vertices, self.triangles = combine_trimeshes((self.vertices, self.triangles), box) 66 | self.heightsamples = self.height_field_raw 67 | 68 | def randomized_terrain(self): 69 | """Spawn random subterrain without ordering them by type or difficulty 70 | according to their rows and columns""" 71 | 72 | for k in range(self.num_maps): 73 | # Env coordinates in the world 74 | (i, j) = np.unravel_index(k, (self.env_rows, self.env_cols)) 75 | 76 | # Heightfield coordinate system from now on 77 | start_x = self.border + i * self.length_per_env_pixels 78 | end_x = self.border + (i + 1) * self.length_per_env_pixels 79 | start_y = self.border + j * self.width_per_env_pixels 80 | end_y = self.border + (j + 1) * self.width_per_env_pixels 81 | 82 | terrain = SubTerrain("terrain", 83 | width=self.width_per_env_pixels, 84 | length=self.width_per_env_pixels, 85 | vertical_scale=self.vertical_scale, 86 | horizontal_scale=self.horizontal_scale) 87 | choice = np.random.uniform(0, 1) 88 | if choice < 0.1: 89 | if np.random.choice([0, 1]): 90 | pyramid_sloped_terrain(terrain, np.random.choice([-0.3, -0.2, 0, 0.2, 0.3])) 91 | random_uniform_terrain(terrain, min_height=-0.1, max_height=0.1, step=0.05, downsampled_scale=0.2) 92 | else: 93 | pyramid_sloped_terrain(terrain, np.random.choice([-0.3, -0.2, 0, 0.2, 0.3])) 94 | elif choice < 0.6: 95 | # step_height = np.random.choice([-0.18, -0.15, -0.1, -0.05, 0.05, 0.1, 0.15, 0.18]) 96 | step_height = np.random.choice([-0.15, 0.15]) 97 | pyramid_stairs_terrain(terrain, step_width=0.31, step_height=step_height, platform_size=3.) 98 | elif choice < 1.: 99 | discrete_obstacles_terrain(terrain, 0.15, 1., 2., 40, platform_size=3.) 100 | 101 | self.height_field_raw[start_x: end_x, start_y:end_y] = terrain.height_field_raw 102 | 103 | env_origin_x = (i + 0.5) * self.env_length 104 | env_origin_y = (j + 0.5) * self.env_width 105 | x1 = int((self.env_length / 2. - 1) / self.horizontal_scale) 106 | x2 = int((self.env_length / 2. + 1) / self.horizontal_scale) 107 | y1 = int((self.env_width / 2. - 1) / self.horizontal_scale) 108 | y2 = int((self.env_width / 2. + 1) / self.horizontal_scale) 109 | env_origin_z = np.max(terrain.height_field_raw[x1:x2, y1:y2])*self.vertical_scale 110 | self.env_origins[i, j] = [env_origin_x, env_origin_y, env_origin_z] 111 | 112 | def curiculum(self, num_robots, num_terrains, num_levels): 113 | """Spawn subterrain ordering them by type (one type per column) 114 | and by difficulty (first row is easiest, last one is hardest) 115 | """ 116 | 117 | num_robots_per_map = int(num_robots / num_terrains) 118 | left_over = num_robots % num_terrains 119 | idx = 0 120 | for j in range(num_terrains): 121 | for i in range(num_levels): 122 | terrain = SubTerrain("terrain", 123 | length=self.length_per_env_pixels, # seems to be inverted 124 | width=self.width_per_env_pixels, 125 | vertical_scale=self.vertical_scale, 126 | horizontal_scale=self.horizontal_scale) 127 | difficulty = i / (num_levels-1.0) 128 | choice = j / num_terrains 129 | lava_depth=-np.random.uniform(0.7, 1.3) 130 | boxes=None 131 | ceiling = 0.4 132 | 133 | k = 0 134 | while k < len(self.terrain_proportions) and choice >= self.terrain_proportions[k]: 135 | k += 1 136 | if k == len(self.terrain_proportions): 137 | # The sum of terrain proportions is not >= 1 138 | # Defaulting to flat ground 139 | continue 140 | 141 | if self.terrain_keys[k] == "gap_parkour": 142 | gap_length = 0.15 + i*0.05 143 | gap_length = np.round(gap_length, 2) 144 | 145 | gap_parkour( 146 | terrain, 147 | lava_depth=lava_depth, 148 | gap_length=gap_length, 149 | gap_platform_height=0.1 150 | ) 151 | add_roughness(terrain, np.random.uniform(0.01, 0.03)) 152 | elif self.terrain_keys[k] == "jump_parkour": 153 | height = 0.05 + 0.37*difficulty 154 | jump_parkour( 155 | terrain, 156 | lava_depth=lava_depth, 157 | height=height, 158 | ) 159 | add_roughness(terrain, np.random.uniform(0.01, 0.03)) 160 | 161 | elif self.terrain_keys[k] == "stairs_parkour": 162 | stairs_parkour( 163 | terrain, 164 | lava_depth=lava_depth, 165 | height=0.02 + 0.18*difficulty, 166 | 167 | ) 168 | add_roughness(terrain, np.random.uniform(0.01, 0.03)) 169 | 170 | elif self.terrain_keys[k] == "hurdle_parkour": 171 | hurdle_parkour( 172 | terrain, 173 | lava_depth=lava_depth, 174 | height=0.05 + 0.3*difficulty, 175 | ) 176 | add_roughness(terrain, np.random.uniform(0.01, 0.03)) 177 | elif self.terrain_keys[k] == "crawl_parkour": 178 | ceiling = 0.34 - 0.08*difficulty 179 | boxes = crawl_parkour( 180 | terrain, 181 | lava_depth=lava_depth, 182 | height=ceiling 183 | ) 184 | add_roughness(terrain, np.random.uniform(0.01, 0.03)) 185 | elif self.terrain_keys[k] == "random_uniform": 186 | add_roughness(terrain, np.random.uniform(0.01, 0.03)) 187 | 188 | else: 189 | # Flat ground 190 | pass 191 | 192 | # Heightfield coordinate system 193 | start_x = self.border + i * self.length_per_env_pixels 194 | end_x = self.border + (i + 1) * self.length_per_env_pixels 195 | start_y = self.border + j * self.width_per_env_pixels 196 | end_y = self.border + (j + 1) * self.width_per_env_pixels 197 | self.height_field_raw[start_x: end_x, start_y:end_y] = terrain.height_field_raw.transpose() # Seems that width and length are inverter in SubTerrain 198 | 199 | if boxes is not None: 200 | for box in boxes: 201 | box = move_trimesh(box, np.array([[ 202 | self.border_size+i*self.env_length, self.border_size + (j+0.5) * self.env_width, 0.0 203 | ]])) 204 | self.boxes.append(box) 205 | 206 | robots_in_map = num_robots_per_map 207 | if j < left_over: 208 | robots_in_map +=1 209 | 210 | env_origin_x = (i + 0.0) * self.env_length 211 | env_origin_y = (j + 0.5) * self.env_width 212 | env_origin_z = np.random.uniform(0.0, 0.4) 213 | self.env_origins[i, j] = [env_origin_x, env_origin_y, env_origin_z] 214 | self.ceilings[i, j] = ceiling 215 | 216 | def add_roughness(terrain, noise_magnitude=0.02): 217 | random_uniform_terrain( 218 | terrain, 219 | min_height=-noise_magnitude, 220 | max_height=noise_magnitude, 221 | step=0.005, 222 | downsampled_scale=0.075, 223 | ) 224 | 225 | def gap_parkour(terrain, platform_length=1., lava_width=0.5, lava_depth=-1.0, gap_length=0.5, gap_platform_length_min=1.25, gap_platform_length_max=1.5, gap_platform_height=0.05): 226 | platform_length = int(platform_length / terrain.horizontal_scale) 227 | lava_width = int(lava_width / terrain.horizontal_scale) 228 | lava_depth = int(lava_depth / terrain.vertical_scale) 229 | 230 | gap_platform_length_min = int(gap_platform_length_min / terrain.horizontal_scale) 231 | gap_platform_length_max = int(gap_platform_length_max / terrain.horizontal_scale) 232 | 233 | gap_length = int(gap_length / terrain.horizontal_scale) 234 | gap_platform_height = int(gap_platform_height / terrain.vertical_scale) 235 | 236 | # add gap 237 | start_gap = platform_length 238 | while start_gap + gap_length <= terrain.length - platform_length//2: 239 | gap_platform_length = np.random.randint(gap_platform_length_min, gap_platform_length_max) 240 | terrain.height_field_raw[:, start_gap:start_gap+gap_length] = lava_depth 241 | # randomize gap platform height 242 | if start_gap + gap_length + gap_platform_length <= terrain.length - platform_length //2: 243 | #terrain.height_field_raw[:, start_gap+gap_length:start_gap+gap_length+gap_platform_length] = np.random.randint(-gap_platform_height, gap_platform_height) 244 | terrain.height_field_raw[:, start_gap+gap_length:start_gap+gap_length+gap_platform_length] = -gap_platform_height 245 | 246 | start_gap += gap_length + gap_platform_length 247 | 248 | # the floor is lava 249 | terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth 250 | terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth 251 | 252 | def jump_parkour(terrain, platform_length=1.25, lava_width=0.5, lava_depth=-1.0, height=0.5, height_platform_length=1.5): 253 | platform_length = int(platform_length / terrain.horizontal_scale) 254 | lava_width = int(lava_width / terrain.horizontal_scale) 255 | lava_depth = int(lava_depth / terrain.vertical_scale) 256 | 257 | height_platform_length = int(height_platform_length / terrain.horizontal_scale) 258 | height = int(height / terrain.vertical_scale) 259 | 260 | # Version with 2 jumps 261 | #terrain.height_field_raw[:, platform_length:platform_length+3*height_platform_length] = height 262 | #terrain.height_field_raw[:, platform_length+height_platform_length:platform_length+2*height_platform_length] = 2*height 263 | 264 | # Version with 3 jumps 265 | terrain.height_field_raw[:, 1*platform_length:6*platform_length] = 1*height 266 | terrain.height_field_raw[:, 2*platform_length:5*platform_length] = 2*height 267 | terrain.height_field_raw[:, 3*platform_length:4*platform_length] = 3*height 268 | 269 | # the floor is lava 270 | terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth 271 | terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth 272 | 273 | def stairs_parkour(terrain, platform_length=1., lava_width=0.5, lava_depth=-1.0, height=0.18, width=0.3, stairs_platform_length=1.25): 274 | platform_length = int(platform_length / terrain.horizontal_scale) 275 | lava_width = int(lava_width / terrain.horizontal_scale) 276 | lava_depth = int(lava_depth / terrain.vertical_scale) 277 | 278 | stairs_platform_length = int(stairs_platform_length / terrain.horizontal_scale) 279 | height = int(height / terrain.vertical_scale) 280 | width = int(width / terrain.horizontal_scale) 281 | 282 | start = platform_length 283 | stop = terrain.length - platform_length//2 284 | curr_height = height 285 | while stop - start > platform_length: 286 | terrain.height_field_raw[:, start:stop] = curr_height 287 | curr_height += height 288 | start += width 289 | stop -= width 290 | 291 | # the floor is lava 292 | terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth 293 | terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth 294 | 295 | 296 | def hurdle_parkour(terrain, platform_length=1.5, lava_width=0.5, lava_depth=-1.0, height=0.2, width_min=0.3, width_max=0.5): 297 | platform_length = int(platform_length / terrain.horizontal_scale) 298 | lava_width = int(lava_width / terrain.horizontal_scale) 299 | lava_depth = int(lava_depth / terrain.vertical_scale) 300 | 301 | height = int(height / terrain.vertical_scale) 302 | width_min = int(width_min / terrain.horizontal_scale) 303 | width_max = int(width_max / terrain.horizontal_scale) 304 | 305 | start = platform_length 306 | width = np.random.randint(width_min, width_max) 307 | while start + platform_length + width <= terrain.length - platform_length//2: 308 | terrain.height_field_raw[:, start:start+width] = height 309 | start += platform_length + width 310 | width = np.random.randint(width_min, width_max) 311 | 312 | 313 | # the floor is lava 314 | terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth 315 | terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth 316 | 317 | def crawl_parkour(terrain, platform_length=2.0, lava_width=0.5, lava_depth=-1.0, height=0.2, depth=1.0, width=3.0, height_step=0.15): 318 | # First put the barriers 319 | boxes = [] 320 | boxes += box_trimesh(np.array([depth, width, 0.5]), np.array([2.5, 0.0, height+0.25])), 321 | boxes += box_trimesh(np.array([depth, width, 0.5]), np.array([6.5, 0.0, height+0.25+height_step])), 322 | 323 | # Then create the heightmap 324 | platform_length = int(platform_length / terrain.horizontal_scale) 325 | lava_width = int(lava_width / terrain.horizontal_scale) 326 | lava_depth = int(lava_depth / terrain.vertical_scale) 327 | 328 | height = int(height / terrain.vertical_scale) 329 | height_step = int(height_step / terrain.vertical_scale) 330 | depth = int(depth / terrain.horizontal_scale) 331 | 332 | terrain.height_field_raw[:, int(6.0/terrain.horizontal_scale):int(7.0/terrain.horizontal_scale)] = 1*height_step 333 | 334 | # the floor is lava 335 | terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth 336 | terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth 337 | 338 | return boxes 339 | 340 | 341 | 342 | 343 | def box_trimesh( 344 | size, # float [3] for x, y, z axis length (in meter) under box frame 345 | center_position, # float [3] position (in meter) in world frame 346 | ): 347 | 348 | vertices = np.empty((8, 3), dtype= np.float32) 349 | vertices[:] = center_position 350 | vertices[[0, 4, 2, 6], 0] -= size[0] / 2 351 | vertices[[1, 5, 3, 7], 0] += size[0] / 2 352 | vertices[[0, 1, 2, 3], 1] -= size[1] / 2 353 | vertices[[4, 5, 6, 7], 1] += size[1] / 2 354 | vertices[[2, 3, 6, 7], 2] -= size[2] / 2 355 | vertices[[0, 1, 4, 5], 2] += size[2] / 2 356 | 357 | triangles = -np.ones((12, 3), dtype= np.uint32) 358 | triangles[0] = [0, 2, 1] # 359 | triangles[1] = [1, 2, 3] 360 | triangles[2] = [0, 4, 2] # 361 | triangles[3] = [2, 4, 6] 362 | triangles[4] = [4, 5, 6] # 363 | triangles[5] = [5, 7, 6] 364 | triangles[6] = [1, 3, 5] # 365 | triangles[7] = [3, 7, 5] 366 | triangles[8] = [0, 1, 4] # 367 | triangles[9] = [1, 5, 4] 368 | triangles[10]= [2, 6, 3] # 369 | triangles[11]= [3, 6, 7] 370 | 371 | return vertices, triangles 372 | 373 | def combine_trimeshes(*trimeshes): 374 | if len(trimeshes) > 2: 375 | return combine_trimeshes( 376 | trimeshes[0], 377 | combine_trimeshes(trimeshes[1:]) 378 | ) 379 | 380 | # only two trimesh to combine 381 | trimesh_0, trimesh_1 = trimeshes 382 | if trimesh_0[1].shape[0] < trimesh_1[1].shape[0]: 383 | trimesh_0, trimesh_1 = trimesh_1, trimesh_0 384 | 385 | trimesh_1 = (trimesh_1[0], trimesh_1[1] + trimesh_0[0].shape[0]) 386 | vertices = np.concatenate((trimesh_0[0], trimesh_1[0]), axis= 0) 387 | triangles = np.concatenate((trimesh_0[1], trimesh_1[1]), axis= 0) 388 | 389 | return vertices, triangles 390 | 391 | def move_trimesh(trimesh, move: np.ndarray): 392 | trimesh = list(trimesh) 393 | trimesh[0] += move 394 | return tuple(trimesh) 395 | -------------------------------------------------------------------------------- /algos/DDPG_demos_generate.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | import os 4 | import random 5 | import time 6 | from tqdm.auto import tqdm 7 | import pickle as pkl 8 | 9 | from typing import NamedTuple 10 | 11 | from omegaconf import DictConfig, OmegaConf 12 | 13 | import numpy as np 14 | 15 | import torch 16 | import torch.nn as nn 17 | import torch.nn.functional as F 18 | import torch.optim as optim 19 | from torch.distributions.normal import Normal 20 | from torch.utils.tensorboard import SummaryWriter 21 | 22 | class RunningMeanStd(nn.Module): 23 | def __init__(self, shape = (), epsilon=1e-08): 24 | super(RunningMeanStd, self).__init__() 25 | self.register_buffer("running_mean", torch.zeros(shape)) 26 | self.register_buffer("running_var", torch.ones(shape)) 27 | self.register_buffer("count", torch.ones(())) 28 | 29 | self.epsilon = epsilon 30 | 31 | def forward(self, obs, update = True): 32 | if update: 33 | self.update(obs) 34 | 35 | return (obs - self.running_mean) / torch.sqrt(self.running_var + self.epsilon) 36 | 37 | def update(self, x): 38 | """Updates the mean, var and count from a batch of samples.""" 39 | batch_mean = torch.mean(x, dim=0) 40 | batch_var = torch.var(x, correction=0, dim=0) 41 | batch_count = x.shape[0] 42 | self.update_from_moments(batch_mean, batch_var, batch_count) 43 | 44 | def update_from_moments(self, batch_mean, batch_var, batch_count): 45 | """Updates from batch mean, variance and count moments.""" 46 | self.running_mean, self.running_var, self.count = update_mean_var_count_from_moments( 47 | self.running_mean, self.running_var, self.count, batch_mean, batch_var, batch_count 48 | ) 49 | 50 | def update_mean_var_count_from_moments( 51 | mean, var, count, batch_mean, batch_var, batch_count 52 | ): 53 | """Updates the mean, var and count using the previous mean, var, count and batch values.""" 54 | delta = batch_mean - mean 55 | tot_count = count + batch_count 56 | 57 | new_mean = mean + delta * batch_count / tot_count 58 | m_a = var * count 59 | m_b = batch_var * batch_count 60 | M2 = m_a + m_b + torch.square(delta) * count * batch_count / tot_count 61 | new_var = M2 / tot_count 62 | new_count = tot_count 63 | 64 | return new_mean, new_var, new_count 65 | 66 | def layer_init(layer, std=np.sqrt(2), bias_const=0.0): 67 | torch.nn.init.orthogonal_(layer.weight, std) 68 | torch.nn.init.constant_(layer.bias, bias_const) 69 | return layer 70 | 71 | class Agent(nn.Module): 72 | def __init__(self, envs): 73 | super().__init__() 74 | self.critic = nn.Sequential( 75 | layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 512)), 76 | nn.ELU(), 77 | layer_init(nn.Linear(512, 256)), 78 | nn.ELU(), 79 | layer_init(nn.Linear(256, 128)), 80 | nn.ELU(), 81 | layer_init(nn.Linear(128, 1), std=1.0), 82 | ) 83 | self.actor_mean = nn.Sequential( 84 | layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 512)), 85 | nn.ELU(), 86 | layer_init(nn.Linear(512, 256)), 87 | nn.ELU(), 88 | layer_init(nn.Linear(256, 128)), 89 | nn.ELU(), 90 | layer_init(nn.Linear(128, np.prod(envs.single_action_space.shape)), std=0.01), 91 | ) 92 | self.actor_logstd = nn.Parameter(torch.zeros(1, np.prod(envs.single_action_space.shape))) 93 | 94 | self.obs_rms = RunningMeanStd(shape = envs.single_observation_space.shape) 95 | self.value_rms = RunningMeanStd(shape = ()) 96 | 97 | def get_value(self, x): 98 | return self.critic(x) 99 | 100 | def get_action_and_value(self, x, action=None): 101 | action_mean = self.actor_mean(x) 102 | return action_mean, None, None, None 103 | 104 | class SeqReplayBufferSamples(NamedTuple): 105 | observations: torch.Tensor 106 | privileged_observations: torch.Tensor 107 | vision_observations: torch.Tensor 108 | actions: torch.Tensor 109 | next_observations: torch.Tensor 110 | privileged_next_observations: torch.Tensor 111 | vision_next_observations: torch.Tensor 112 | cat_dones: torch.Tensor 113 | raw_constraints: torch.Tensor 114 | dones: torch.Tensor 115 | rewards: torch.Tensor 116 | p_ini_hidden_in: torch.Tensor 117 | p_ini_hidden_out: torch.Tensor 118 | mask: torch.Tensor 119 | 120 | class SeqReplayBuffer(): 121 | def __init__( 122 | self, 123 | buffer_size, 124 | observation_space, 125 | privileged_observation_space, 126 | vision_space, 127 | action_space, 128 | num_constraints, 129 | max_episode_length, 130 | seq_len, 131 | num_envs, 132 | critic_rnn = False, 133 | storing_device = "cpu", 134 | training_device = "cpu", 135 | handle_timeout_termination = True, 136 | ): 137 | self.buffer_size = buffer_size 138 | self.max_episode_length = max_episode_length 139 | self.seq_len = seq_len 140 | self.observation_space = observation_space 141 | self.privileged_observation_space = privileged_observation_space.shape 142 | self.vision_space = vision_space 143 | self.action_space = action_space 144 | self.num_envs = num_envs 145 | 146 | self.num_constraints = num_constraints 147 | 148 | self.action_dim = int(np.prod(action_space.shape)) 149 | self.pos = 0 150 | self.full = False 151 | self.critic_rnn = critic_rnn 152 | self.storing_device = storing_device 153 | self.training_device = training_device 154 | 155 | self.observations = torch.zeros((self.buffer_size, *self.observation_space), dtype=torch.float32, device = storing_device) 156 | self.next_observations = torch.zeros((self.buffer_size, *self.observation_space), dtype=torch.float32, device = storing_device) 157 | self.privileged_observations = torch.zeros((self.buffer_size, *self.privileged_observation_space), dtype=torch.float32, device = storing_device) 158 | self.privileged_next_observations = torch.zeros((self.buffer_size, *self.privileged_observation_space), dtype=torch.float32, device = storing_device) 159 | 160 | self.vision_observations = torch.zeros((self.buffer_size // 5, *self.vision_space), dtype = torch.uint8) 161 | self.next_vision_observations = torch.zeros_like(self.vision_observations) 162 | 163 | self.actions = torch.zeros((self.buffer_size, self.action_dim), dtype=torch.float32, device = storing_device) 164 | self.rewards = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device) 165 | self.cat_dones = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device) 166 | self.raw_constraints = torch.zeros((self.buffer_size, self.num_constraints), dtype=torch.float32, device = storing_device) 167 | self.dones = torch.zeros((self.buffer_size,), dtype=torch.bool, device = storing_device) 168 | self.p_ini_hidden_in = torch.zeros((self.buffer_size, 1, 256), dtype=torch.float32, device = storing_device) 169 | 170 | # For the current episodes that started being added to the replay buffer 171 | # but aren't done yet. We want to still sample from them, however the masking 172 | # needs a termination point to not overlap to the next episode when full or even to the empty 173 | # part of the buffer when not full. 174 | self.markers = torch.zeros((self.buffer_size,), dtype=torch.bool, device = storing_device) 175 | self.started_adding = False 176 | 177 | # Handle timeouts termination properly if needed 178 | # see https://github.com/DLR-RM/stable-baselines3/issues/284 179 | self.handle_timeout_termination = handle_timeout_termination 180 | self.timeouts = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device) 181 | 182 | def add( 183 | self, 184 | obs, 185 | privi_obs, 186 | vobs, 187 | next_obs, 188 | privi_next_obs, 189 | next_vobs, 190 | action, 191 | reward, 192 | cat_done, 193 | raw_constraints, 194 | done, 195 | p_ini_hidden_in, 196 | truncateds = None 197 | ): 198 | start_idx = self.pos 199 | stop_idx = min(self.pos + obs.shape[0], self.buffer_size) 200 | b_max_idx = stop_idx - start_idx 201 | 202 | overflow = False 203 | overflow_size = 0 204 | if self.pos + obs.shape[0] > self.buffer_size: 205 | overflow = True 206 | overflow_size = self.pos + obs.shape[0] - self.buffer_size 207 | 208 | assert start_idx % self.num_envs == 0, f"start_idx is not a multiple of {self.num_envs}" 209 | assert stop_idx % self.num_envs == 0, f"stop_idx is not a multiple of {self.num_envs}" 210 | assert b_max_idx == 0 or b_max_idx == self.num_envs, f"b_max_idx is not either 0 or {self.num_envs}" 211 | 212 | # Copy to avoid modification by reference 213 | self.observations[start_idx : stop_idx] = obs[: b_max_idx].clone().to(self.storing_device) 214 | self.vision_observations[(start_idx // (self.num_envs * 5)) * self.num_envs : (start_idx // (self.num_envs * 5)) * self.num_envs + b_max_idx] = \ 215 | vobs[: b_max_idx].clone().to(self.storing_device) 216 | 217 | self.next_observations[start_idx : stop_idx] = next_obs[: b_max_idx].clone().to(self.storing_device) 218 | self.next_vision_observations[(start_idx // (self.num_envs * 5)) * self.num_envs : (start_idx // (self.num_envs * 5)) * self.num_envs + b_max_idx] = \ 219 | next_vobs[: b_max_idx].clone().to(self.storing_device) 220 | 221 | self.privileged_observations[start_idx : stop_idx] = privi_obs[: b_max_idx].clone().to(self.storing_device) 222 | self.privileged_next_observations[start_idx : stop_idx] = privi_next_obs[: b_max_idx].clone().to(self.storing_device) 223 | self.actions[start_idx : stop_idx] = action[: b_max_idx].clone().to(self.storing_device) 224 | self.rewards[start_idx : stop_idx] = reward[: b_max_idx].clone().to(self.storing_device) 225 | self.cat_dones[start_idx : stop_idx] = cat_done[: b_max_idx].clone().to(self.storing_device) 226 | self.raw_constraints[start_idx : stop_idx] = raw_constraints[: b_max_idx].clone().to(self.storing_device) 227 | self.dones[start_idx : stop_idx] = done[: b_max_idx].clone().to(self.storing_device) 228 | self.p_ini_hidden_in[start_idx : stop_idx] = p_ini_hidden_in.swapaxes(0, 1)[: b_max_idx].clone().to(self.storing_device) 229 | 230 | # Current episodes last transition marker 231 | self.markers[start_idx : stop_idx] = 1 232 | # We need to unmark previous transitions as last 233 | # but only if it is not the first add to the replay buffer 234 | if self.started_adding: 235 | self.markers[self.prev_start_idx : self.prev_stop_idx] = 0 236 | if self.prev_overflow: 237 | self.markers[: self.prev_overflow_size] = 0 238 | self.started_adding = True 239 | self.prev_start_idx = start_idx 240 | self.prev_stop_idx = stop_idx 241 | self.prev_overflow = overflow 242 | self.prev_overflow_size = overflow_size 243 | 244 | if self.handle_timeout_termination: 245 | self.timeouts[start_idx : stop_idx] = truncateds[: b_max_idx].clone().to(self.storing_device) 246 | 247 | assert overflow_size == 0 or overflow_size == self.num_envs, f"overflow_size is not either 0 or {self.num_envs}" 248 | if overflow: 249 | self.full = True 250 | self.observations[: overflow_size] = obs[b_max_idx :].clone().to(self.storing_device) 251 | self.vision_observations[: overflow_size] = vobs[b_max_idx :].clone().to(self.storing_device) 252 | 253 | self.next_observations[: overflow_size] = next_obs[b_max_idx :].clone().to(self.storing_device) 254 | self.next_vision_observations[: overflow_size] = next_vobs[b_max_idx :].clone().to(self.storing_device) 255 | 256 | self.privileged_observations[: overflow_size] = privi_obs[b_max_idx :].clone().to(self.storing_device) 257 | self.privileged_next_observations[: overflow_size] = privi_next_obs[b_max_idx :].clone().to(self.storing_device) 258 | self.actions[: overflow_size] = action[b_max_idx :].clone().to(self.storing_device) 259 | self.rewards[: overflow_size] = reward[b_max_idx :].clone().to(self.storing_device) 260 | self.cat_dones[: overflow_size] = cat_done[b_max_idx :].clone().to(self.storing_device) 261 | self.raw_constraints[: overflow_size] = raw_constraints[b_max_idx :].clone().to(self.storing_device) 262 | self.dones[: overflow_size] = done[b_max_idx :].clone().to(self.storing_device) 263 | self.p_ini_hidden_in[: overflow_size] = p_ini_hidden_in.swapaxes(0, 1)[b_max_idx :].clone().to(self.storing_device) 264 | 265 | # Current episodes last transition marker 266 | self.markers[: overflow_size] = 1 267 | if self.handle_timeout_termination: 268 | self.timeouts[: overflow_size] = truncateds[b_max_idx :].clone().to(self.storing_device) 269 | self.pos = overflow_size 270 | else: 271 | self.pos += obs.shape[0] 272 | 273 | def sample(self, batch_size) -> SeqReplayBufferSamples: 274 | upper_bound = self.buffer_size if self.full else self.pos 275 | batch_inds = torch.randint(0, upper_bound, size = (batch_size,), device = self.storing_device) 276 | return self._get_samples(batch_inds) 277 | 278 | def _get_samples(self, batch_inds) -> SeqReplayBufferSamples: 279 | # Using modular arithmetic we get the indices of all the transitions of the episode starting from batch_inds 280 | # we get "episodes" of length self.seq_len, but their true length may be less, they can have ended before that 281 | # we'll deal with that using a mask 282 | # Using flat indexing we can actually slice through a tensor using 283 | # different starting points for each dimension of an axis 284 | # as long as the slice size remains constant 285 | # [1, 2, 3].repeat_interleave(3) -> [1, 1, 1, 2, 2, 2, 3, 3, 3] 286 | # [1, 2, 3].repeat(3) -> [1, 2, 3, 1, 2, 3, 1, 2, 3] 287 | all_indices_flat = (batch_inds.repeat_interleave(self.seq_len) + torch.arange(self.seq_len, device = self.storing_device).repeat(batch_inds.shape[0]) * self.num_envs) % self.buffer_size 288 | #all_indices_next_flat = (batch_inds.repeat_interleave(self.seq_len) + torch.arange(1, self.seq_len + 1, device = self.device).repeat(batch_inds.shape[0]) * self.num_envs) % self.buffer_size 289 | gathered_obs = self.observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.observations.shape[1:])) 290 | gathered_next_obs = self.next_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.observations.shape[1:])) 291 | 292 | gathered_privi_obs = self.privileged_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.privileged_observations.shape[1:])) 293 | gathered_privi_next_obs = self.privileged_next_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.privileged_observations.shape[1:])) 294 | 295 | all_indices_flat_vision = ( (((batch_inds // self.num_envs) // 5) * self.num_envs + batch_inds % self.num_envs).repeat_interleave((self.seq_len // 5)) + \ 296 | torch.arange(start = 0, end = (self.seq_len // 5), device = self.storing_device).repeat(batch_inds.shape[0]) * self.num_envs) % (self.buffer_size // 5) 297 | gathered_vobs = self.vision_observations[all_indices_flat_vision].reshape((batch_inds.shape[0], (self.seq_len // 5), *self.vision_observations.shape[1:])) 298 | gathered_next_vobs = self.next_vision_observations[all_indices_flat_vision].reshape((batch_inds.shape[0], (self.seq_len // 5), *self.vision_observations.shape[1:])) 299 | 300 | gathered_actions = self.actions[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.actions.shape[1:])) 301 | gathered_cat_dones = self.cat_dones[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len)) 302 | gathered_raw_constraints = self.raw_constraints[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, self.num_constraints)) 303 | gathered_dones = self.dones[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len)) 304 | gathered_truncateds = self.timeouts[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len)) 305 | gathered_rewards = self.rewards[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len)) 306 | 307 | gathered_p_ini_hidden_in = self.p_ini_hidden_in[batch_inds].swapaxes(0, 1) 308 | gathered_p_ini_hidden_out = self.p_ini_hidden_in[(batch_inds + self.num_envs) % self.buffer_size].swapaxes(0, 1) 309 | 310 | gathered_markers = self.markers[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len)) 311 | mask = torch.cat([ 312 | torch.ones((batch_inds.shape[0], 1), device = self.storing_device), 313 | (1 - (gathered_dones | gathered_markers).float()).cumprod(dim = 1)[:, 1:] 314 | ], dim = 1) 315 | data = ( 316 | gathered_obs.to(self.training_device), 317 | gathered_privi_obs.to(self.training_device), 318 | gathered_vobs.to(self.training_device), 319 | gathered_actions.to(self.training_device), 320 | gathered_next_obs.to(self.training_device), 321 | gathered_privi_next_obs.to(self.training_device), 322 | gathered_next_vobs.to(self.training_device), 323 | gathered_cat_dones.to(self.training_device), 324 | gathered_raw_constraints.to(self.training_device), 325 | # Only use dones that are not due to timeouts 326 | # deactivated by default (timeouts is initialized as an array of False) 327 | (gathered_dones.float() * (1 - gathered_truncateds)).to(self.training_device), 328 | gathered_rewards.to(self.training_device), 329 | gathered_p_ini_hidden_in.to(self.training_device), 330 | gathered_p_ini_hidden_out.to(self.training_device), 331 | mask.to(self.training_device), 332 | ) 333 | return SeqReplayBufferSamples(*data) 334 | 335 | class ExtractObsWrapper(gym.ObservationWrapper): 336 | def observation(self, obs): 337 | return obs["obs"] 338 | 339 | def DDPG_demos_generate(cfg: DictConfig, envs): 340 | TARGET_POLICY_PATH = cfg["train"]["params"]["config"]["target_policy_path"] 341 | DEMOS_BUFFER_SIZE = int(cfg["train"]["params"]["config"]["demos_buffer_size"]) 342 | MAX_EPISODE_LENGTH = 500 343 | SEQ_LEN = 5 344 | NUM_CONSTRAINTS = 107 345 | RNN_CRITIC = True 346 | vis_h = 48 347 | vis_w = 48 348 | 349 | assert SEQ_LEN % 5 == 0, "SEQ_LEN must be a multiple of 5" 350 | assert DEMOS_BUFFER_SIZE % (5 * envs.num_envs) == 0, "DEMO_BUFFER_SIZE must be a multiple of 5 * num_envs" 351 | 352 | actor_sd = torch.load(TARGET_POLICY_PATH) 353 | 354 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 355 | 356 | run_path = f"runs/{cfg['task']['name']}_DDPG_demos_generate_D405_parkour40_CORR_CONTROL_{int(time.time())}" 357 | writer = SummaryWriter(run_path) 358 | 359 | if not os.path.exists(run_path): 360 | os.makedirs(run_path) 361 | 362 | OmegaConf.save(config = cfg, f = f"{run_path}/config.yaml") 363 | 364 | envs.single_action_space = envs.action_space 365 | envs.single_observation_space = envs.observation_space 366 | 367 | envs = ExtractObsWrapper(envs) 368 | 369 | actor_demos = Agent(envs).to(device) 370 | actor_demos.load_state_dict(actor_sd) 371 | print("Target actor for demonstrations loaded succesfully.") 372 | envs.single_observation_space.dtype = np.float32 373 | rb_demos = SeqReplayBuffer( 374 | DEMOS_BUFFER_SIZE, 375 | (45,), 376 | envs.single_observation_space, 377 | (vis_h, vis_w), 378 | envs.single_action_space, 379 | NUM_CONSTRAINTS, 380 | MAX_EPISODE_LENGTH, 381 | SEQ_LEN, 382 | envs.num_envs, 383 | critic_rnn = RNN_CRITIC, 384 | storing_device = "cpu", 385 | training_device = device, 386 | handle_timeout_termination=True, 387 | ) 388 | 389 | filling_iterations = DEMOS_BUFFER_SIZE // envs.num_envs 390 | obs_privi = envs.reset() 391 | obs = obs_privi.clone()[:, : 45] 392 | vobs = torch.zeros((envs.num_envs, vis_h, vis_w), dtype = torch.uint8, device = device) 393 | next_vobs = vobs.clone() 394 | with torch.no_grad(): 395 | dummy_actions, _, _, _ = actor_demos.get_action_and_value(actor_demos.obs_rms(obs_privi, update = False)) 396 | actions_min, _ = dummy_actions.min(dim = 0) 397 | actions_max, _ = dummy_actions.max(dim = 0) 398 | 399 | dummy_hidddens = torch.zeros((1, envs.num_envs, 256), device = device) 400 | 401 | for fi in range(filling_iterations): 402 | with torch.no_grad(): 403 | actions, _, _, _ = actor_demos.get_action_and_value(actor_demos.obs_rms(obs_privi, update = False)) 404 | cur_min, _ = actions.min(dim = 0) 405 | cur_max, _ = actions.max(dim = 0) 406 | actions_min = torch.min(actions_min, cur_min) 407 | actions_max = torch.max(actions_max, cur_max) 408 | 409 | next_obs_privi, rewards, terminations, infos = envs.step(actions) 410 | true_dones = infos["true_dones"].float() 411 | truncateds = infos["truncateds"].float() 412 | raw_constraints = infos["raw_constraints"] 413 | 414 | next_obs = next_obs_privi.clone()[:, : 45] 415 | 416 | real_next_obs_privi = next_obs_privi.clone() 417 | real_next_obs = next_obs.clone() 418 | 419 | if "depth" in infos: 420 | next_vobs = (infos["depth"].clone()[..., 19:-18] * 255).to(torch.uint8) 421 | 422 | rb_demos.add(obs, obs_privi, vobs, real_next_obs, real_next_obs_privi, next_vobs, actions, rewards, terminations, raw_constraints, true_dones, dummy_hidddens, truncateds) 423 | 424 | # TRY NOT TO MODIFY: CRUCIAL step easy to overlook 425 | obs_privi = next_obs_privi 426 | obs = next_obs 427 | vobs = next_vobs 428 | 429 | torch.save((actions_min.cpu(), actions_max.cpu()), f"{run_path}/ppo_actions_minmax.pt") 430 | pkl.dump(rb_demos, open(f"{run_path}/rb_demos.pkl", "wb")) 431 | print(f"Demo replay buffer filled with {DEMOS_BUFFER_SIZE} experts demonstrations") 432 | -------------------------------------------------------------------------------- /solo12/solo12_mpi_scaled.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 12 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 65 | 66 | 67 | 68 | 69 | 70 | 79 | 80 | 81 | 82 | 83 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 213 | 214 | 215 | 216 | 217 | 218 | 227 | 228 | 229 | 230 | 231 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 360 | 361 | 362 | 363 | 364 | 365 | 374 | 375 | 376 | 377 | 378 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 507 | 508 | 509 | 510 | 511 | 512 | 521 | 522 | 523 | 524 | 525 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | -------------------------------------------------------------------------------- /solo12/solo12_mpi.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 6 | 7 | 12 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 84 | 85 | 86 | 87 | 88 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 253 | 254 | 255 | 256 | 257 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 422 | 423 | 424 | 425 | 426 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | 538 | 539 | 540 | 541 | 542 | 543 | 544 | 545 | 546 | 547 | 548 | 549 | 550 | 551 | 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | 563 | 564 | 565 | 566 | 567 | 568 | 569 | 570 | 571 | 572 | 573 | 574 | 575 | 576 | 577 | 578 | 579 | 580 | 581 | 582 | 591 | 592 | 593 | 594 | 595 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | 696 | 697 | 698 | 699 | 700 | 701 | 702 | 703 | 704 | 705 | 706 | 707 | 708 | 709 | 710 | 711 | 712 | 713 | 714 | 715 | 716 | 717 | 718 | 719 | 720 | 721 | 722 | 723 | 724 | 725 | 726 | 727 | 728 | 729 | 730 | -------------------------------------------------------------------------------- /algos/DDPG_demos_rnn_vision.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | import os 4 | import random 5 | import time 6 | from tqdm.auto import tqdm 7 | import pickle as pkl 8 | import itertools 9 | 10 | from typing import NamedTuple 11 | 12 | from omegaconf import DictConfig, OmegaConf 13 | 14 | import numpy as np 15 | 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | import torch.optim as optim 20 | from torch.distributions.normal import Normal 21 | from torch.utils.tensorboard import SummaryWriter 22 | 23 | class SeqReplayBufferSamples(NamedTuple): 24 | observations: torch.Tensor 25 | privileged_observations: torch.Tensor 26 | vision_observations: torch.Tensor 27 | actions: torch.Tensor 28 | next_observations: torch.Tensor 29 | privileged_next_observations: torch.Tensor 30 | vision_next_observations: torch.Tensor 31 | cat_dones: torch.Tensor 32 | raw_constraints: torch.Tensor 33 | dones: torch.Tensor 34 | rewards: torch.Tensor 35 | p_ini_hidden_in: torch.Tensor 36 | p_ini_hidden_out: torch.Tensor 37 | mask: torch.Tensor 38 | 39 | class SeqReplayBuffer(): 40 | def __init__( 41 | self, 42 | buffer_size, 43 | observation_space, 44 | privileged_observation_space, 45 | vision_space, 46 | action_space, 47 | num_constraints, 48 | max_episode_length, 49 | seq_len, 50 | num_envs, 51 | storing_device = "cpu", 52 | training_device = "cpu", 53 | handle_timeout_termination = True, 54 | ): 55 | self.buffer_size = buffer_size 56 | self.max_episode_length = max_episode_length 57 | self.seq_len = seq_len 58 | self.observation_space = observation_space 59 | self.privileged_observation_space = privileged_observation_space.shape 60 | self.vision_space = vision_space 61 | self.action_space = action_space 62 | self.num_envs = num_envs 63 | 64 | self.num_constraints = num_constraints 65 | 66 | self.action_dim = int(np.prod(action_space.shape)) 67 | self.pos = 0 68 | self.full = False 69 | self.storing_device = storing_device 70 | self.training_device = training_device 71 | 72 | self.observations = torch.zeros((self.buffer_size, *self.observation_space), dtype=torch.float32, device = storing_device) 73 | self.next_observations = torch.zeros((self.buffer_size, *self.observation_space), dtype=torch.float32, device = storing_device) 74 | self.privileged_observations = torch.zeros((self.buffer_size, *self.privileged_observation_space), dtype=torch.float32, device = storing_device) 75 | self.privileged_next_observations = torch.zeros((self.buffer_size, *self.privileged_observation_space), dtype=torch.float32, device = storing_device) 76 | 77 | self.vision_observations = torch.zeros((self.buffer_size // 5, *self.vision_space), dtype = torch.uint8) 78 | self.next_vision_observations = torch.zeros_like(self.vision_observations) 79 | 80 | self.actions = torch.zeros((self.buffer_size, self.action_dim), dtype=torch.float32, device = storing_device) 81 | self.rewards = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device) 82 | self.cat_dones = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device) 83 | self.raw_constraints = torch.zeros((self.buffer_size, self.num_constraints), dtype=torch.float32, device = storing_device) 84 | self.dones = torch.zeros((self.buffer_size,), dtype=torch.bool, device = storing_device) 85 | self.p_ini_hidden_in = torch.zeros((self.buffer_size, 1, 256), dtype=torch.float32, device = storing_device) 86 | 87 | # For the current episodes that started being added to the replay buffer 88 | # but aren't done yet. We want to still sample from them, however the masking 89 | # needs a termination point to not overlap to the next episode when full or even to the empty 90 | # part of the buffer when not full. 91 | self.markers = torch.zeros((self.buffer_size,), dtype=torch.bool, device = storing_device) 92 | self.started_adding = False 93 | 94 | # Handle timeouts termination properly if needed 95 | # see https://github.com/DLR-RM/stable-baselines3/issues/284 96 | self.handle_timeout_termination = handle_timeout_termination 97 | self.timeouts = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device) 98 | 99 | def add( 100 | self, 101 | obs, 102 | privi_obs, 103 | vobs, 104 | next_obs, 105 | privi_next_obs, 106 | next_vobs, 107 | action, 108 | reward, 109 | cat_done, 110 | raw_constraints, 111 | done, 112 | p_ini_hidden_in, 113 | truncateds = None 114 | ): 115 | start_idx = self.pos 116 | stop_idx = min(self.pos + obs.shape[0], self.buffer_size) 117 | b_max_idx = stop_idx - start_idx 118 | 119 | overflow = False 120 | overflow_size = 0 121 | if self.pos + obs.shape[0] > self.buffer_size: 122 | overflow = True 123 | overflow_size = self.pos + obs.shape[0] - self.buffer_size 124 | 125 | assert start_idx % self.num_envs == 0, f"start_idx is not a multiple of {self.num_envs}" 126 | assert stop_idx % self.num_envs == 0, f"stop_idx is not a multiple of {self.num_envs}" 127 | assert b_max_idx == 0 or b_max_idx == self.num_envs, f"b_max_idx is not either 0 or {self.num_envs}" 128 | 129 | # Copy to avoid modification by reference 130 | self.observations[start_idx : stop_idx] = obs[: b_max_idx].clone().to(self.storing_device) 131 | self.vision_observations[(start_idx // (self.num_envs * 5)) * self.num_envs : (start_idx // (self.num_envs * 5)) * self.num_envs + b_max_idx] = \ 132 | vobs[: b_max_idx].clone().to(self.storing_device) 133 | 134 | self.next_observations[start_idx : stop_idx] = next_obs[: b_max_idx].clone().to(self.storing_device) 135 | self.next_vision_observations[(start_idx // (self.num_envs * 5)) * self.num_envs : (start_idx // (self.num_envs * 5)) * self.num_envs + b_max_idx] = \ 136 | next_vobs[: b_max_idx].clone().to(self.storing_device) 137 | 138 | self.privileged_observations[start_idx : stop_idx] = privi_obs[: b_max_idx].clone().to(self.storing_device) 139 | self.privileged_next_observations[start_idx : stop_idx] = privi_next_obs[: b_max_idx].clone().to(self.storing_device) 140 | self.actions[start_idx : stop_idx] = action[: b_max_idx].clone().to(self.storing_device) 141 | self.rewards[start_idx : stop_idx] = reward[: b_max_idx].clone().to(self.storing_device) 142 | self.cat_dones[start_idx : stop_idx] = cat_done[: b_max_idx].clone().to(self.storing_device) 143 | self.raw_constraints[start_idx : stop_idx] = raw_constraints[: b_max_idx].clone().to(self.storing_device) 144 | self.dones[start_idx : stop_idx] = done[: b_max_idx].clone().to(self.storing_device) 145 | self.p_ini_hidden_in[start_idx : stop_idx] = p_ini_hidden_in.swapaxes(0, 1)[: b_max_idx].clone().to(self.storing_device) 146 | 147 | # Current episodes last transition marker 148 | self.markers[start_idx : stop_idx] = 1 149 | # We need to unmark previous transitions as last 150 | # but only if it is not the first add to the replay buffer 151 | if self.started_adding: 152 | self.markers[self.prev_start_idx : self.prev_stop_idx] = 0 153 | if self.prev_overflow: 154 | self.markers[: self.prev_overflow_size] = 0 155 | self.started_adding = True 156 | self.prev_start_idx = start_idx 157 | self.prev_stop_idx = stop_idx 158 | self.prev_overflow = overflow 159 | self.prev_overflow_size = overflow_size 160 | 161 | if self.handle_timeout_termination: 162 | self.timeouts[start_idx : stop_idx] = truncateds[: b_max_idx].clone().to(self.storing_device) 163 | 164 | assert overflow_size == 0 or overflow_size == self.num_envs, f"overflow_size is not either 0 or {self.num_envs}" 165 | if overflow: 166 | self.full = True 167 | self.observations[: overflow_size] = obs[b_max_idx :].clone().to(self.storing_device) 168 | self.vision_observations[: overflow_size] = vobs[b_max_idx :].clone().to(self.storing_device) 169 | 170 | self.next_observations[: overflow_size] = next_obs[b_max_idx :].clone().to(self.storing_device) 171 | self.next_vision_observations[: overflow_size] = next_vobs[b_max_idx :].clone().to(self.storing_device) 172 | 173 | self.privileged_observations[: overflow_size] = privi_obs[b_max_idx :].clone().to(self.storing_device) 174 | self.privileged_next_observations[: overflow_size] = privi_next_obs[b_max_idx :].clone().to(self.storing_device) 175 | self.actions[: overflow_size] = action[b_max_idx :].clone().to(self.storing_device) 176 | self.rewards[: overflow_size] = reward[b_max_idx :].clone().to(self.storing_device) 177 | self.cat_dones[: overflow_size] = cat_done[b_max_idx :].clone().to(self.storing_device) 178 | self.raw_constraints[: overflow_size] = raw_constraints[b_max_idx :].clone().to(self.storing_device) 179 | self.dones[: overflow_size] = done[b_max_idx :].clone().to(self.storing_device) 180 | self.p_ini_hidden_in[: overflow_size] = p_ini_hidden_in.swapaxes(0, 1)[b_max_idx :].clone().to(self.storing_device) 181 | 182 | # Current episodes last transition marker 183 | self.markers[: overflow_size] = 1 184 | if self.handle_timeout_termination: 185 | self.timeouts[: overflow_size] = truncateds[b_max_idx :].clone().to(self.storing_device) 186 | self.pos = overflow_size 187 | else: 188 | self.pos += obs.shape[0] 189 | 190 | def sample(self, batch_size) -> SeqReplayBufferSamples: 191 | upper_bound = self.buffer_size if self.full else self.pos 192 | batch_inds = torch.randint(0, upper_bound, size = (batch_size,), device = self.storing_device) 193 | return self._get_samples(batch_inds) 194 | 195 | def _get_samples(self, batch_inds) -> SeqReplayBufferSamples: 196 | # Using modular arithmetic we get the indices of all the transitions of the episode starting from batch_inds 197 | # we get "episodes" of length self.seq_len, but their true length may be less, they can have ended before that 198 | # we'll deal with that using a mask 199 | # Using flat indexing we can actually slice through a tensor using 200 | # different starting points for each dimension of an axis 201 | # as long as the slice size remains constant 202 | # [1, 2, 3].repeat_interleave(3) -> [1, 1, 1, 2, 2, 2, 3, 3, 3] 203 | # [1, 2, 3].repeat(3) -> [1, 2, 3, 1, 2, 3, 1, 2, 3] 204 | all_indices_flat = (batch_inds.repeat_interleave(self.seq_len) + torch.arange(self.seq_len, device = self.storing_device).repeat(batch_inds.shape[0]) * self.num_envs) % self.buffer_size 205 | #all_indices_next_flat = (batch_inds.repeat_interleave(self.seq_len) + torch.arange(1, self.seq_len + 1, device = self.device).repeat(batch_inds.shape[0]) * self.num_envs) % self.buffer_size 206 | gathered_obs = self.observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.observations.shape[1:])) 207 | gathered_next_obs = self.next_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.observations.shape[1:])) 208 | 209 | gathered_privi_obs = self.privileged_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.privileged_observations.shape[1:])) 210 | gathered_privi_next_obs = self.privileged_next_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.privileged_observations.shape[1:])) 211 | 212 | all_indices_flat_vision = ( (((batch_inds // self.num_envs) // 5) * self.num_envs + batch_inds % self.num_envs).repeat_interleave((self.seq_len // 5)) + \ 213 | torch.arange(start = 0, end = (self.seq_len // 5), device = self.storing_device).repeat(batch_inds.shape[0]) * self.num_envs) % (self.buffer_size // 5) 214 | gathered_vobs = self.vision_observations[all_indices_flat_vision].reshape((batch_inds.shape[0], (self.seq_len // 5), *self.vision_observations.shape[1:])) 215 | gathered_next_vobs = self.next_vision_observations[all_indices_flat_vision].reshape((batch_inds.shape[0], (self.seq_len // 5), *self.vision_observations.shape[1:])) 216 | 217 | gathered_actions = self.actions[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.actions.shape[1:])) 218 | gathered_cat_dones = self.cat_dones[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len)) 219 | gathered_raw_constraints = self.raw_constraints[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, self.num_constraints)) 220 | gathered_dones = self.dones[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len)) 221 | gathered_truncateds = self.timeouts[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len)) 222 | gathered_rewards = self.rewards[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len)) 223 | 224 | gathered_p_ini_hidden_in = self.p_ini_hidden_in[batch_inds].swapaxes(0, 1) 225 | gathered_p_ini_hidden_out = self.p_ini_hidden_in[(batch_inds + self.num_envs) % self.buffer_size].swapaxes(0, 1) 226 | 227 | gathered_markers = self.markers[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len)) 228 | mask = torch.cat([ 229 | torch.ones((batch_inds.shape[0], 1), device = self.storing_device), 230 | (1 - (gathered_dones | gathered_markers).float()).cumprod(dim = 1)[:, 1:] 231 | ], dim = 1) 232 | data = ( 233 | gathered_obs.to(self.training_device), 234 | gathered_privi_obs.to(self.training_device), 235 | gathered_vobs.to(self.training_device), 236 | gathered_actions.to(self.training_device), 237 | gathered_next_obs.to(self.training_device), 238 | gathered_privi_next_obs.to(self.training_device), 239 | gathered_next_vobs.to(self.training_device), 240 | gathered_cat_dones.to(self.training_device), 241 | gathered_raw_constraints.to(self.training_device), 242 | # Only use dones that are not due to timeouts 243 | # deactivated by default (timeouts is initialized as an array of False) 244 | (gathered_dones.float() * (1 - gathered_truncateds)).to(self.training_device), 245 | gathered_rewards.to(self.training_device), 246 | gathered_p_ini_hidden_in.to(self.training_device), 247 | gathered_p_ini_hidden_out.to(self.training_device), 248 | mask.to(self.training_device), 249 | ) 250 | return SeqReplayBufferSamples(*data) 251 | 252 | def random_translate(imgs, pad = 4): 253 | n, c, h, w = imgs.size() 254 | imgs = torch.nn.functional.pad(imgs, (pad, pad, pad, pad)) #, mode = "replicate") 255 | w1 = torch.randint(0, 2*pad + 1, (n,)) 256 | h1 = torch.randint(0, 2*pad + 1, (n,)) 257 | cropped = torch.empty((n, c, h, w), dtype=imgs.dtype, device=imgs.device) 258 | for i, (img, w11, h11) in enumerate(zip(imgs, w1, h1)): 259 | cropped[i][:] = img[:, h11:h11 + h, w11:w11 + w] 260 | return cropped 261 | 262 | def random_noise(images, noise_std=0.02): 263 | N, C, H, W = images.size() 264 | noise = noise_std*torch.randn_like(images) 265 | noise *= torch.bernoulli(0.5*torch.ones(N, 1, 1, 1).to(images)) # Only applied on half of the images 266 | return images + noise 267 | 268 | def random_shift(images, padding=4): 269 | N, C, H, W = images.size() 270 | padded_images = torch.nn.functional.pad(images, (padding, padding, padding, padding), mode='replicate') 271 | crop_x = torch.randint(0, 2 * padding, (N,)) 272 | crop_y = torch.randint(0, 2 * padding, (N,)) 273 | shifted_images = torch.zeros_like(images) 274 | for i in range(N): 275 | shifted_images[i] = padded_images[i, :, crop_y[i]:crop_y[i] + H, crop_x[i]:crop_x[i] + W] 276 | return shifted_images 277 | 278 | def random_cutout(images, min_size=2, max_size=24): 279 | N, C, H, W = images.size() 280 | for i in range(N): 281 | size_h = random.randint(min_size, max_size) 282 | size_w = random.randint(min_size, max_size) 283 | top = random.randint(0, H - size_h) 284 | left = random.randint(0, W - size_w) 285 | coin_flip = random.random() 286 | if coin_flip < 0.2: 287 | fill_value = 0.0 288 | images[i, :, top:top + size_h, left:left + size_w] = fill_value 289 | elif coin_flip < 0.4: 290 | fill_value = 1.0 291 | images[i, :, top:top + size_h, left:left + size_w] = fill_value 292 | elif coin_flip < 0.6: 293 | fill_value = torch.rand((C, size_h, size_w), device=images.device) 294 | images[i, :, top:top + size_h, left:left + size_w] = fill_value 295 | return images 296 | 297 | class DepthOnlyFCBackbone58x87(nn.Module): 298 | def __init__(self, scandots_output_dim, seq_len, batch_size): 299 | super().__init__() 300 | 301 | activation = nn.ELU() 302 | self.image_compression = nn.Sequential( 303 | nn.Conv2d(1, 16, 5), nn.LeakyReLU(), nn.MaxPool2d(2), 304 | nn.Conv2d(16, 32, 4), nn.LeakyReLU(), nn.MaxPool2d(2), 305 | nn.Conv2d(32, 32, 3), nn.LeakyReLU(), 306 | nn.Flatten(), 307 | nn.Linear(1568, scandots_output_dim), # 48x48 308 | nn.LeakyReLU(), 309 | nn.Linear(128, scandots_output_dim) 310 | ) 311 | 312 | self.output_activation = activation 313 | 314 | def forward(self, vobs, hist = True, augment = False): 315 | bs, seql, w, h = vobs.size() 316 | 317 | vobs = vobs.view(-1, 1, w, h) 318 | if augment: 319 | vobs = random_cutout(random_noise(random_shift(vobs))) 320 | 321 | vision_latent = self.output_activation(self.image_compression(vobs)) 322 | vision_latent = vision_latent.view(bs, seql, 128) 323 | 324 | if hist: 325 | vision_latent = vision_latent.repeat_interleave(5, axis = 1) 326 | 327 | return vision_latent 328 | 329 | class ExtraCorpusQMemory(nn.Module): 330 | def __init__(self, seq_len, batch_size): 331 | super().__init__() 332 | self.vision = DepthOnlyFCBackbone58x87(128, seq_len, batch_size) 333 | self.memory = nn.GRU(128, hidden_size = 256, batch_first = True) 334 | 335 | def forward(self, x, hidden_in): 336 | if hidden_in is None: 337 | raise NotImplementedError 338 | 339 | vision_latent = self.vision(x) 340 | time_latent, hidden_out = self.memory(vision_latent, hidden_in) 341 | return time_latent, hidden_out 342 | 343 | class QNetworkVanilla(nn.Module): 344 | def __init__(self, env, device): 345 | super().__init__() 346 | self.memory = nn.GRU(np.array(env.single_observation_space.shape).prod() + np.prod(env.single_action_space.shape), hidden_size = 256, batch_first = True) # dummy memory for compatibility 347 | self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod() + np.prod(env.single_action_space.shape), 512) 348 | self.ln1 = nn.LayerNorm(512) 349 | self.fc2 = nn.Linear(512, 256) 350 | self.ln2 = nn.LayerNorm(256) 351 | self.fc3 = nn.Linear(256, 128) 352 | self.ln3 = nn.LayerNorm(128) 353 | self.fc4 = nn.Linear(128, 1) 354 | 355 | def forward(self, x, a, latent): 356 | x = torch.cat([x, a], -1) 357 | x = F.elu(self.ln1(self.fc1(x))) 358 | x = F.elu(self.ln2(self.fc2(x))) 359 | x = F.elu(self.ln3(self.fc3(x))) 360 | x = self.fc4(x) 361 | return x, None 362 | 363 | class Actor(nn.Module): 364 | def __init__(self, env): 365 | super().__init__() 366 | self.memory = nn.GRU(128 + 45, hidden_size = 256, batch_first = True) 367 | self.fc1 = nn.Linear(256, 512) 368 | self.fc2 = nn.Linear(512, 256) 369 | self.fc3 = nn.Linear(256, 128) 370 | self.fc_mu = nn.Linear(128, np.prod(env.single_action_space.shape)) 371 | 372 | # action rescaling 373 | self.register_buffer( 374 | "action_scale", torch.tensor((env.action_space.high - env.action_space.low) / 2.0, dtype=torch.float32) 375 | ) 376 | self.register_buffer( 377 | "action_bias", torch.tensor((env.action_space.high + env.action_space.low) / 2.0, dtype=torch.float32) 378 | ) 379 | 380 | def forward(self, x, vision_latent, hidden_in): 381 | if hidden_in is None: 382 | raise NotImplementedError 383 | 384 | x = torch.cat([x, vision_latent], -1) 385 | time_latent, hidden_out = self.memory(x, hidden_in) 386 | x = F.elu(self.fc1(time_latent)) 387 | x = F.elu(self.fc2(x)) 388 | x = F.elu(self.fc3(x)) 389 | x = torch.tanh(self.fc_mu(x)) 390 | 391 | return x * self.action_scale + self.action_bias, hidden_out, None 392 | 393 | class ExtractObsWrapper(gym.ObservationWrapper): 394 | def observation(self, obs): 395 | return obs["obs"] 396 | 397 | def DDPG_demos_rnn_vision(cfg: DictConfig, envs): 398 | TOTAL_TIMESTEPS = int(cfg["train"]["params"]["config"]["total_timesteps"]) 399 | CRITIC_LEARNING_RATE = cfg["train"]["params"]["config"]["critic_learning_rate"] 400 | ACTOR_LEARNING_RATE = cfg["train"]["params"]["config"]["actor_learning_rate"] 401 | BUFFER_SIZE = int(cfg["train"]["params"]["config"]["buffer_size"]) 402 | LEARNING_STARTS = cfg["train"]["params"]["config"]["learning_starts"] 403 | GAMMA = cfg["train"]["params"]["config"]["gamma"] 404 | POLICY_FREQUENCY = cfg["train"]["params"]["config"]["policy_frequency"] 405 | TAU = cfg["train"]["params"]["config"]["tau"] 406 | BATCH_SIZE = cfg["train"]["params"]["config"]["batch_size"] 407 | POLICY_NOISE = cfg["train"]["params"]["config"]["policy_noise"] 408 | NOISE_CLIP = cfg["train"]["params"]["config"]["noise_clip"] 409 | DEMOS_RB_PATH = cfg["train"]["params"]["config"]["demos_rb_path"] 410 | MAX_EPISODE_LENGTH = 500 411 | SEQ_LEN = cfg["train"]["params"]["config"]["seq_len"] 412 | EVAL_AT_END = True 413 | NUM_CONSTRAINTS = 107 414 | CRITIC_NB = 10 415 | num_atoms = 101 416 | vis_h = 48 417 | vis_w = 48 418 | 419 | assert SEQ_LEN % 5 == 0, "SEQ_LEN must be a multiple of 5" 420 | assert BUFFER_SIZE % (5 * envs.num_envs) == 0, "BUFFER_SIZE must be a multiple of 5 * num_envs" 421 | 422 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 423 | 424 | run_path = f"runs/{cfg['task']['name']}_DDPG_demos_rnn_redq_D405_z0.03_x0.24_parkour40_CORR_AUG_48x48_bigconv_seqlen{SEQ_LEN}_bs{BATCH_SIZE}_seed{cfg.seed}_{int(time.time())}" 425 | writer = SummaryWriter(run_path) 426 | 427 | if not os.path.exists(run_path): 428 | os.makedirs(run_path) 429 | 430 | OmegaConf.save(config = cfg, f = f"{run_path}/config.yaml") 431 | 432 | envs.single_action_space = envs.action_space 433 | envs.single_observation_space = envs.observation_space 434 | 435 | envs = ExtractObsWrapper(envs) 436 | 437 | rb_demos = pkl.load(open(f"{DEMOS_RB_PATH}/rb_demos.pkl", "rb")) 438 | ###### 439 | rb_demos.seq_len = SEQ_LEN 440 | ###### 441 | actions_min, actions_max = torch.load(f"{DEMOS_RB_PATH}/ppo_actions_minmax.pt") 442 | actions_min, actions_max = actions_min.to(device), actions_max.to(device) 443 | envs.action_space.low = actions_min.cpu().numpy() 444 | envs.action_space.high = actions_max.cpu().numpy() 445 | envs.single_action_space = envs.action_space 446 | envs.single_observation_space = envs.observation_space 447 | 448 | vision_nn = DepthOnlyFCBackbone58x87(128, SEQ_LEN, BATCH_SIZE).to(device) 449 | actor = Actor(envs).to(device) 450 | 451 | QNetwork = QNetworkVanilla 452 | qfs = [QNetwork(envs, device = device).to(device) for _ in range(CRITIC_NB)] 453 | qf_targets = [QNetwork(envs, device = device).to(device) for _ in range(CRITIC_NB)] 454 | for i in range(CRITIC_NB): 455 | qf_targets[i].load_state_dict(qfs[i].state_dict()) 456 | 457 | q_optimizer = optim.Adam(itertools.chain(*([q.parameters() for q in qfs])), lr=CRITIC_LEARNING_RATE) 458 | qf1 = QNetwork(envs, device = device).to(device) 459 | qf2 = QNetwork(envs, device = device).to(device) 460 | actor_optimizer = optim.Adam(list(actor.parameters()) + list(vision_nn.parameters()), lr=ACTOR_LEARNING_RATE) 461 | 462 | envs.single_observation_space.dtype = np.float32 463 | rb = SeqReplayBuffer( 464 | BUFFER_SIZE, 465 | (45,), 466 | envs.single_observation_space, 467 | (vis_h, vis_w), 468 | envs.single_action_space, 469 | NUM_CONSTRAINTS, 470 | MAX_EPISODE_LENGTH, 471 | SEQ_LEN, 472 | envs.num_envs, 473 | storing_device = "cpu", 474 | training_device = device, 475 | handle_timeout_termination=True, 476 | ) 477 | start_time = time.time() 478 | 479 | # TRY NOT TO MODIFY: start the game 480 | obs_privi = envs.reset() 481 | obs = obs_privi.clone()[:, : 45] 482 | vobs = torch.zeros((envs.num_envs, vis_h, vis_w), device = device) 483 | next_vobs = vobs.clone() 484 | vision_latent = None 485 | 486 | isaac_env_steps = TOTAL_TIMESTEPS // envs.num_envs 487 | print(f"Starting training for {isaac_env_steps} isaac env steps") 488 | 489 | gru_p_hidden_in = torch.zeros((actor.memory.num_layers, envs.num_envs, actor.memory.hidden_size), device = device) # p for policy 490 | gru_p_hidden_out = gru_p_hidden_in.clone() 491 | 492 | true_steps_num = 0 493 | actor_training_step = 0 494 | for global_step in range(isaac_env_steps): 495 | # ALGO LOGIC: put action logic here 496 | with torch.no_grad(): 497 | if global_step % 5 == 0: 498 | vision_latent = vision_nn(vobs.unsqueeze(1), hist = False) 499 | if true_steps_num < LEARNING_STARTS: 500 | actions = torch.zeros((envs.num_envs, rb.action_dim), device = device).uniform_(-1, 1) 501 | else: 502 | with torch.no_grad(): 503 | actions, gru_p_hidden_out, _ = actor(obs.unsqueeze(1), vision_latent, gru_p_hidden_in) 504 | actions = actions.squeeze(1) 505 | 506 | true_steps_num += envs.num_envs 507 | 508 | # TRY NOT TO MODIFY: execute the game and log data. 509 | next_obs_privi, rewards, terminations, infos = envs.step(actions) 510 | true_dones = infos["true_dones"] 511 | truncateds = infos["truncateds"].float() 512 | raw_constraints = infos["raw_constraints"] 513 | 514 | next_obs = next_obs_privi.clone()[:, : 45] 515 | 516 | real_next_obs_privi = next_obs_privi.clone() 517 | real_next_obs = next_obs.clone() 518 | if(true_dones.any()): 519 | true_dones_idx = torch.argwhere(true_dones).squeeze() 520 | gru_p_hidden_out[:, true_dones_idx] = 0 521 | 522 | if "depth" in infos: 523 | next_vobs = infos["depth"].clone()[..., 19:-18] 524 | 525 | rb.add(obs, obs_privi, (vobs * 255).to(torch.uint8), real_next_obs, real_next_obs_privi, (next_vobs * 255).to(torch.uint8), \ 526 | actions, rewards, terminations, raw_constraints, true_dones, gru_p_hidden_in, truncateds) 527 | gru_p_hidden_in = gru_p_hidden_out 528 | 529 | if (global_step + 1) % 24 == 0: 530 | for el, v in zip(list(envs.episode_sums.keys())[:envs.numRewards], (torch.mean(envs.rew_mean_reset, dim=0)).tolist()): 531 | writer.add_scalar(f"reward/{el}", v, (global_step + 1) // 24) 532 | writer.add_scalar(f"reward/cum_rew", (torch.sum(torch.mean(envs.rew_cum_reset, dim=0))).item(), (global_step + 1) // 24) 533 | writer.add_scalar(f"reward/avg_rew", envs.terrain_levels.float().mean().item(), (global_step + 1) // 24) 534 | for el, v in zip(envs.cstr_manager.get_names(), (100.0 * torch.mean(envs.cstr_mean_reset, dim=0)).tolist()): 535 | writer.add_scalar(f"cstr/{el}", v, (global_step + 1) // 24) 536 | 537 | # TRY NOT TO MODIFY: CRUCIAL step easy to overlook 538 | obs_privi = next_obs_privi 539 | obs = next_obs 540 | vobs = next_vobs 541 | 542 | # ALGO LOGIC: training. 543 | if true_steps_num > LEARNING_STARTS: 544 | for local_steps in range(8): 545 | data = rb.sample(BATCH_SIZE // 2) 546 | data_expert = rb_demos.sample(BATCH_SIZE // 2) 547 | data = SeqReplayBufferSamples( 548 | observations=torch.cat([data.observations, data_expert.observations], dim=0), 549 | privileged_observations=torch.cat([data.privileged_observations, data_expert.privileged_observations], dim=0), 550 | vision_observations=torch.cat([data.vision_observations, data_expert.vision_observations], dim=0).float() / 255, 551 | actions=torch.cat([data.actions, data_expert.actions], dim=0), 552 | next_observations=torch.cat([data.next_observations, data_expert.next_observations], dim=0), 553 | privileged_next_observations=torch.cat([data.privileged_next_observations, data_expert.privileged_next_observations], dim=0), 554 | vision_next_observations=torch.cat([data.vision_next_observations, data_expert.vision_next_observations], dim=0).float() / 255, 555 | cat_dones=torch.cat([data.cat_dones, data_expert.cat_dones], dim=0), 556 | raw_constraints=torch.cat([data.raw_constraints, data_expert.raw_constraints], dim=0), 557 | dones=torch.cat([data.dones, data_expert.dones], dim=0), 558 | rewards=torch.cat([data.rewards, data_expert.rewards], dim=0), 559 | p_ini_hidden_in=torch.cat([data.p_ini_hidden_in, data_expert.p_ini_hidden_in], dim=1), 560 | p_ini_hidden_out=torch.cat([data.p_ini_hidden_out, data_expert.p_ini_hidden_out], dim=1), 561 | mask=torch.cat([data.mask, data_expert.mask], dim = 0), 562 | ) 563 | 564 | norm_const = data.raw_constraints / envs.cstr_manager.get_running_maxes() 565 | recomputed_cat_dones = (envs.cstr_manager.min_p + torch.clamp( 566 | norm_const, 567 | min=0.0, 568 | max=1.0 569 | ) * (envs.cstr_manager.get_max_p() - envs.cstr_manager.min_p)).max(-1).values 570 | 571 | with torch.no_grad(): 572 | clipped_noise = (torch.randn_like(data.actions, device=device) * POLICY_NOISE).clamp( 573 | -NOISE_CLIP, NOISE_CLIP 574 | ) #* actor.action_scale 575 | 576 | vlatent = vision_nn(data.vision_next_observations) 577 | qvlatent = None 578 | next_state_actions, _, _ = actor(data.next_observations, vlatent, data.p_ini_hidden_out) 579 | next_state_actions = (next_state_actions + clipped_noise).clamp( 580 | actions_min, actions_max 581 | ) 582 | targets_selected = torch.randperm(CRITIC_NB)[:2] 583 | qf_next_targets = torch.stack([qf_targets[i](data.privileged_next_observations, next_state_actions, qvlatent)[0] for i in targets_selected]) 584 | min_qf_next_target = qf_next_targets.min(dim = 0).values 585 | next_q_value = (1 - recomputed_cat_dones.flatten()) * data.rewards.flatten() + (1 - recomputed_cat_dones.flatten()) * (1 - data.dones.flatten()) * GAMMA * (min_qf_next_target).view(-1) 586 | 587 | true_samples_nb = data.mask.sum() 588 | qvlatent = None 589 | qf_a_values = torch.stack([qf(data.privileged_observations, data.actions, qvlatent)[0].view(-1) for qf in qfs]) 590 | qf_loss = (((qf_a_values - next_q_value.unsqueeze(0)) ** 2) * data.mask.view(-1).unsqueeze(0)).sum() / (true_samples_nb * CRITIC_NB) 591 | 592 | # optimize the model 593 | q_optimizer.zero_grad() 594 | qf_loss.backward() 595 | nn.utils.clip_grad_norm_(qf1.parameters(), 0.5) 596 | q_optimizer.step() 597 | 598 | if local_steps % POLICY_FREQUENCY == 0: 599 | for i in range(CRITIC_NB): 600 | for param, target_param in zip(qfs[i].parameters(), qf_targets[i].parameters()): 601 | target_param.data.copy_(TAU * param.data + (1 - TAU) * target_param.data) 602 | 603 | if local_steps == 7: 604 | actor_training_step += 1 605 | vlatent = vision_nn(data.vision_observations, augment = True) 606 | qvlatent = None 607 | diff_actions, _, recons_height_maps = actor(data.observations, vlatent, data.p_ini_hidden_in) 608 | 609 | qs = torch.stack([qf(data.privileged_observations, diff_actions, qvlatent)[0] for qf in qfs]) 610 | actor_loss = -(qs.squeeze(-1) * data.mask.unsqueeze(0)).sum() / (true_samples_nb * CRITIC_NB) 611 | final_loss = actor_loss 612 | 613 | actor_optimizer.zero_grad() 614 | final_loss.backward() 615 | nn.utils.clip_grad_norm_(actor.parameters(), 0.5) 616 | actor_optimizer.step() 617 | 618 | writer.add_scalar(f"loss/actor_loss", actor_loss.item(), actor_training_step) 619 | writer.add_scalar(f"infos/Q_max", qs.max().item(), actor_training_step) 620 | 621 | if (global_step + 1) % (500 * 24) == 0: 622 | model_path = f"{run_path}/cleanrl_model_e{(global_step + 1) // 24}.pt" 623 | torch.save((actor.state_dict(), vision_nn.state_dict()), model_path) 624 | print("Saved model checkpoint") 625 | 626 | if (global_step + 1) % (5 * 24) == 0: 627 | model_path = f"{run_path}/cleanrl_model.pt" 628 | torch.save((actor.state_dict(), vision_nn.state_dict()), model_path) 629 | print("Saved model") 630 | 631 | def eval_DDPG_demos_rnn_vision(cfg: DictConfig, envs): 632 | import cv2 633 | vis_h = 48 634 | vis_w = 48 635 | is_video_gen = "video_save_path" in cfg["task"] 636 | 637 | if is_video_gen: 638 | video_save_path = cfg["task"]["video_save_path"] 639 | position = video_save_path.rfind(".mp4") 640 | output_file = video_save_path[:position] + "_depth" + video_save_path[position:] 641 | fps = 10 642 | 643 | checkpoint = cfg["checkpoint"] 644 | actor_sd, vision_sd = torch.load(checkpoint) 645 | 646 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 647 | 648 | envs.single_action_space = envs.action_space 649 | envs.single_observation_space = envs.observation_space 650 | 651 | envs = ExtractObsWrapper(envs) 652 | 653 | vision_nn = DepthOnlyFCBackbone58x87(128, 0, 0).to(device) 654 | vision_nn.load_state_dict(vision_sd) 655 | actor = Actor(envs).to(device) 656 | actor.load_state_dict(actor_sd) 657 | 658 | obs_privi = envs.reset() 659 | obs = obs_privi.clone()[:, : 45] 660 | vobs = torch.zeros((envs.num_envs, vis_h, vis_w), device = device) 661 | next_vobs = vobs.clone() 662 | vision_latent = None 663 | gru_p_hidden_in = torch.zeros((actor.memory.num_layers, envs.num_envs, actor.memory.hidden_size), device = device) # p for policy 664 | 665 | if is_video_gen: 666 | depth_images = [] 667 | 668 | for global_step in range(2000): 669 | with torch.no_grad(): 670 | if global_step % 5 == 0: 671 | vision_latent = vision_nn(vobs.unsqueeze(1), hist = False) 672 | 673 | with torch.no_grad(): 674 | actions, gru_p_hidden_out, _ = actor(obs.unsqueeze(1), vision_latent, gru_p_hidden_in) 675 | actions = actions.squeeze(1) 676 | 677 | next_obs_privi, rewards, terminations, infos = envs.step(actions) 678 | next_obs = next_obs_privi.clone()[:, : 45] 679 | obs = next_obs 680 | if "depth" in infos: 681 | next_vobs = infos["depth"].clone()[..., 19:-18] 682 | vobs = next_vobs 683 | gru_p_hidden_in = gru_p_hidden_out 684 | 685 | if is_video_gen: 686 | depth_images.append((vobs * 255).to(torch.uint8).squeeze().cpu().numpy()) 687 | if global_step == 200: 688 | fourcc = cv2.VideoWriter_fourcc(*'mp4v') 689 | out = cv2.VideoWriter(output_file, fourcc, fps, (vis_w, vis_h), isColor=True) 690 | out.set(cv2.VIDEOWRITER_PROP_QUALITY, 1) # 1 is the highest quality setting 691 | for i in range(len(depth_images)): 692 | # Get the i-th frame 693 | frame = depth_images[i] 694 | 695 | if frame.shape != (vis_h, vis_w): 696 | raise ValueError(f"Frame {i} has incorrect shape: {frame.shape}") 697 | 698 | # Write the frame to the video (as a single-channel image) 699 | rgb_image = cv2.applyColorMap(frame, cv2.COLORMAP_MAGMA) 700 | out.write(rgb_image) 701 | 702 | # Release the VideoWriter object 703 | out.release() 704 | print(f"Video saved to {output_file}") 705 | --------------------------------------------------------------------------------