├── cfg
    ├── pbt
    │   └── no_pbt.yaml
    ├── train
    │   ├── SoloParkourDDPG_demos_generate.yaml
    │   ├── SoloParkourDDPG_demos_rnn_vision.yaml
    │   └── SoloParkourPPO.yaml
    ├── config.yaml
    └── task
    │   └── SoloParkour.yaml
├── assets
    └── teaser.jpeg
├── solo12
    ├── meshes
    │   ├── checker_blue.png
    │   ├── stl
    │   │   ├── solo12
    │   │   │   ├── solo_12_base.stl
    │   │   │   ├── solo12_hip_fe_fl.stl
    │   │   │   ├── solo12_hip_fe_fr.stl
    │   │   │   ├── solo12_hip_fe_hl.stl
    │   │   │   ├── solo12_hip_fe_hr.stl
    │   │   │   ├── solo12_coordinate_systems.png
    │   │   │   ├── _solo12_visualisation_model.PDF
    │   │   │   ├── solo12_coordinate_systems_2.png
    │   │   │   ├── solo12_coordinate_systems_3.png
    │   │   │   ├── solo12_coordinate_systems_4.png
    │   │   │   ├── solo12_lower_leg_left_side.stl
    │   │   │   ├── solo12_lower_leg_right_side.stl
    │   │   │   ├── solo12_upper_leg_left_side.stl
    │   │   │   └── solo12_upper_leg_right_side.stl
    │   │   ├── with_foot
    │   │   │   ├── solo_body.stl
    │   │   │   ├── solo_foot.stl
    │   │   │   ├── solo_lower_leg_left_side.stl
    │   │   │   ├── solo_upper_leg_left_side.stl
    │   │   │   ├── solo_lower_leg_right_side.stl
    │   │   │   └── solo_upper_leg_right_side.stl
    │   │   └── without_foot
    │   │   │   ├── solo_body.stl
    │   │   │   ├── solo_lower_leg_left_side.stl
    │   │   │   ├── solo_upper_leg_left_side.stl
    │   │   │   ├── solo_lower_leg_right_side.stl
    │   │   │   ├── solo_upper_leg_right_side.stl
    │   │   │   ├── solo_lower_leg_v2_left_side.stl
    │   │   │   └── solo_lower_leg_v2_right_side.stl
    │   ├── obj
    │   │   ├── solo12
    │   │   │   └── _solo12_visualisation_model.PDF
    │   │   ├── with_foot
    │   │   │   ├── solo_body.mtl
    │   │   │   ├── solo_foot.mtl
    │   │   │   ├── solo_lower_leg_left_side.mtl
    │   │   │   ├── solo_lower_leg_right_side.mtl
    │   │   │   ├── solo_upper_leg_left_side.mtl
    │   │   │   └── solo_upper_leg_right_side.mtl
    │   │   └── without_foot
    │   │   │   ├── solo_body.mtl
    │   │   │   ├── solo_lower_leg_left_side.mtl
    │   │   │   ├── solo_upper_leg_left_side.mtl
    │   │   │   ├── solo_lower_leg_right_side.mtl
    │   │   │   └── solo_upper_leg_right_side.mtl
    │   ├── plane.mtl
    │   ├── plane.obj
    │   ├── cube.obj
    │   ├── sphere.obj
    │   └── cylinder.obj
    ├── srdf
    │   └── solo.srdf
    ├── solo12_mpi_scaled.urdf
    └── solo12_mpi.urdf
├── LICENSE.txt
├── README.md
├── train.py
├── utils
    ├── constraint_manager.py
    └── isaacgymenvs_make.py
├── algos
    ├── PPO.py
    ├── DDPG_demos_generate.py
    └── DDPG_demos_rnn_vision.py
└── tasks
    └── terrainParkour.py


/cfg/pbt/no_pbt.yaml:
--------------------------------------------------------------------------------
1 | enabled: False


--------------------------------------------------------------------------------
/assets/teaser.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/assets/teaser.jpeg


--------------------------------------------------------------------------------
/solo12/meshes/checker_blue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/checker_blue.png


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo_12_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo_12_base.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/with_foot/solo_body.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_body.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/with_foot/solo_foot.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_foot.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_body.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_body.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_hip_fe_fl.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_hip_fe_fl.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_hip_fe_fr.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_hip_fe_fr.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_hip_fe_hl.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_hip_fe_hl.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_hip_fe_hr.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_hip_fe_hr.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_coordinate_systems.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_coordinate_systems.png


--------------------------------------------------------------------------------
/solo12/meshes/obj/solo12/_solo12_visualisation_model.PDF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/obj/solo12/_solo12_visualisation_model.PDF


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/_solo12_visualisation_model.PDF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/_solo12_visualisation_model.PDF


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_coordinate_systems_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_coordinate_systems_2.png


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_coordinate_systems_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_coordinate_systems_3.png


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_coordinate_systems_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_coordinate_systems_4.png


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_lower_leg_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_lower_leg_left_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_lower_leg_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_lower_leg_right_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_upper_leg_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_upper_leg_left_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_upper_leg_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_upper_leg_right_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/with_foot/solo_lower_leg_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_lower_leg_left_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/with_foot/solo_upper_leg_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_upper_leg_left_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/with_foot/solo_lower_leg_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_lower_leg_right_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/with_foot/solo_upper_leg_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_upper_leg_right_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_lower_leg_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_lower_leg_left_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_upper_leg_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_upper_leg_left_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_lower_leg_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_lower_leg_right_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_upper_leg_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_upper_leg_right_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_lower_leg_v2_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_lower_leg_v2_left_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_lower_leg_v2_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_lower_leg_v2_right_side.stl


--------------------------------------------------------------------------------
/solo12/meshes/plane.mtl:
--------------------------------------------------------------------------------
 1 | newmtl Material
 2 |   Ns 10.0000
 3 |   Ni 1.5000
 4 |   d 1.0000
 5 |   Tr 0.0000
 6 |   Tf 1.0000 1.0000 1.0000 
 7 |   illum 2
 8 |   Ka 0.0000 0.0000 0.0000
 9 |   Kd 0.5880 0.5880 0.5880
10 |   Ks 0.0000 0.0000 0.0000
11 |   Ke 0.0000 0.0000 0.0000
12 |   map_Ka cube.tga
13 |   map_Kd checker_blue.png
14 |   
15 |   
16 | 


--------------------------------------------------------------------------------
/solo12/meshes/obj/with_foot/solo_body.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 2
 3 | 
 4 | newmtl Material
 5 | Ns 96.078431
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.640000 0.640000 0.640000
 8 | Ks 0.500000 0.500000 0.500000
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 | 


--------------------------------------------------------------------------------
/solo12/meshes/obj/with_foot/solo_foot.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 2
 3 | 
 4 | newmtl Material
 5 | Ns 96.078431
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.640000 0.640000 0.640000
 8 | Ks 0.500000 0.500000 0.500000
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 | 


--------------------------------------------------------------------------------
/solo12/meshes/obj/without_foot/solo_body.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 2
 3 | 
 4 | newmtl Material
 5 | Ns 96.078431
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.640000 0.640000 0.640000
 8 | Ks 0.500000 0.500000 0.500000
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 | 


--------------------------------------------------------------------------------
/solo12/meshes/plane.obj:
--------------------------------------------------------------------------------
 1 | # Blender v2.66 (sub 1) OBJ File: ''
 2 | # www.blender.org
 3 | mtllib plane.mtl
 4 | o Plane
 5 | v 15.000000 -15.000000 0.000000
 6 | v 15.000000 15.000000 0.000000
 7 | v -15.000000 15.000000 0.000000
 8 | v -15.000000  -15.000000 0.000000
 9 | 
10 | vt 15.000000 0.000000
11 | vt 15.000000 15.000000
12 | vt 0.000000 15.000000
13 | vt 0.000000 0.000000
14 | 
15 | usemtl Material
16 | s off
17 | f 1/1 2/2 3/3
18 | f 1/1 3/3 4/4
19 | 


--------------------------------------------------------------------------------
/solo12/meshes/obj/with_foot/solo_lower_leg_left_side.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 2
 3 | 
 4 | newmtl Material
 5 | Ns 96.078431
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.640000 0.640000 0.640000
 8 | Ks 0.500000 0.500000 0.500000
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 | 


--------------------------------------------------------------------------------
/solo12/meshes/obj/with_foot/solo_lower_leg_right_side.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 2
 3 | 
 4 | newmtl Material
 5 | Ns 96.078431
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.640000 0.640000 0.640000
 8 | Ks 0.500000 0.500000 0.500000
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 | 


--------------------------------------------------------------------------------
/solo12/meshes/obj/with_foot/solo_upper_leg_left_side.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 2
 3 | 
 4 | newmtl Material
 5 | Ns 96.078431
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.640000 0.640000 0.640000
 8 | Ks 0.500000 0.500000 0.500000
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 | 


--------------------------------------------------------------------------------
/solo12/meshes/obj/with_foot/solo_upper_leg_right_side.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 2
 3 | 
 4 | newmtl Material
 5 | Ns 96.078431
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.640000 0.640000 0.640000
 8 | Ks 0.500000 0.500000 0.500000
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 | 


--------------------------------------------------------------------------------
/solo12/meshes/obj/without_foot/solo_lower_leg_left_side.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 2
 3 | 
 4 | newmtl Material
 5 | Ns 96.078431
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.640000 0.640000 0.640000
 8 | Ks 0.500000 0.500000 0.500000
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 | 


--------------------------------------------------------------------------------
/solo12/meshes/obj/without_foot/solo_upper_leg_left_side.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 2
 3 | 
 4 | newmtl Material
 5 | Ns 96.078431
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.640000 0.640000 0.640000
 8 | Ks 0.500000 0.500000 0.500000
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 | 


--------------------------------------------------------------------------------
/solo12/meshes/obj/without_foot/solo_lower_leg_right_side.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 2
 3 | 
 4 | newmtl Material
 5 | Ns 96.078431
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.640000 0.640000 0.640000
 8 | Ks 0.500000 0.500000 0.500000
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 | 


--------------------------------------------------------------------------------
/solo12/meshes/obj/without_foot/solo_upper_leg_right_side.mtl:
--------------------------------------------------------------------------------
 1 | # Blender MTL File: 'None'
 2 | # Material Count: 2
 3 | 
 4 | newmtl Material
 5 | Ns 96.078431
 6 | Ka 1.000000 1.000000 1.000000
 7 | Kd 0.640000 0.640000 0.640000
 8 | Ks 0.500000 0.500000 0.500000
 9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 | 
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 | 


--------------------------------------------------------------------------------
/cfg/train/SoloParkourDDPG_demos_generate.yaml:
--------------------------------------------------------------------------------
 1 | params:
 2 |   seed: ${...seed}
 3 | 
 4 |   algo:
 5 |     name: cat_ddpg_demos_generate_continuous
 6 | 
 7 |   config:
 8 |     name: ${resolve_default:SoloParkour,${....experiment}}
 9 |     full_experiment_name: ${.name}
10 |     env_name: rlgpu
11 |     total_timesteps: 2e8
12 |     buffer_size: 4999680 # % (5 * 256) = 0
13 |     batch_size: 512
14 |     target_policy_path: "path/to/cleanrl_model.pt"
15 |     demos_buffer_size: 1999360 # % (5 * 256) = 0
16 |     horizon_length: 24 # 24 for t = 0.02 (decimation = 4)
17 | 
18 |     max_epochs: ${resolve_default:1000,${....max_iterations}}
19 | 


--------------------------------------------------------------------------------
/cfg/train/SoloParkourDDPG_demos_rnn_vision.yaml:
--------------------------------------------------------------------------------
 1 | params:
 2 |   seed: ${...seed}
 3 | 
 4 |   algo:
 5 |     name: cat_ddpg_demos_rnn_vision_continuous
 6 | 
 7 |   config:
 8 |     name: ${resolve_default:SoloParkour,${....experiment}}
 9 |     full_experiment_name: ${.name}
10 |     env_name: rlgpu
11 |     gamma: 0.99
12 |     tau: 0.05
13 |     critic_learning_rate: 3.e-4
14 |     actor_learning_rate: 5.e-4
15 |     total_timesteps: 2e8
16 |     buffer_size: 4999680 # % (5 * 256) = 0
17 |     learning_starts: 25e3
18 |     policy_frequency: 2
19 |     batch_size: 128
20 |     seq_len: 50
21 |     policy_noise: 0.8
22 |     noise_clip: 0.2
23 |     demos_rb_path: "path/to/privileged/experience"
24 |     horizon_length: 24 # 24 for t = 0.02 (decimation = 4)
25 | 
26 |     max_epochs: 5000
27 | 


--------------------------------------------------------------------------------
/solo12/meshes/cube.obj:
--------------------------------------------------------------------------------
 1 | o Box
 2 | v 0.5 0.5 0.5
 3 | v 0.5 0.5 -0.5
 4 | v 0.5 -0.5 0.5
 5 | v 0.5 -0.5 -0.5
 6 | v -0.5 0.5 -0.5
 7 | v -0.5 0.5 0.5
 8 | v -0.5 -0.5 -0.5
 9 | v -0.5 -0.5 0.5
10 | v -0.5 0.5 -0.5
11 | v 0.5 0.5 -0.5
12 | v -0.5 0.5 0.5
13 | v 0.5 0.5 0.5
14 | v -0.5 -0.5 0.5
15 | v 0.5 -0.5 0.5
16 | v -0.5 -0.5 -0.5
17 | v 0.5 -0.5 -0.5
18 | v -0.5 0.5 0.5
19 | v 0.5 0.5 0.5
20 | v -0.5 -0.5 0.5
21 | v 0.5 -0.5 0.5
22 | v 0.5 0.5 -0.5
23 | v -0.5 0.5 -0.5
24 | v 0.5 -0.5 -0.5
25 | v -0.5 -0.5 -0.5
26 | vt 0 1
27 | vt 1 1
28 | vt 0 0
29 | vt 1 0
30 | vt 0 1
31 | vt 1 1
32 | vt 0 0
33 | vt 1 0
34 | vt 0 1
35 | vt 1 1
36 | vt 0 0
37 | vt 1 0
38 | vt 0 1
39 | vt 1 1
40 | vt 0 0
41 | vt 1 0
42 | vt 0 1
43 | vt 1 1
44 | vt 0 0
45 | vt 1 0
46 | vt 0 1
47 | vt 1 1
48 | vt 0 0
49 | vt 1 0
50 | vn 1 0 0
51 | vn 1 0 0
52 | vn 1 0 0
53 | vn 1 0 0
54 | vn -1 0 0
55 | vn -1 0 0
56 | vn -1 0 0
57 | vn -1 0 0
58 | vn 0 1 0
59 | vn 0 1 0
60 | vn 0 1 0
61 | vn 0 1 0
62 | vn 0 -1 0
63 | vn 0 -1 0
64 | vn 0 -1 0
65 | vn 0 -1 0
66 | vn 0 0 1
67 | vn 0 0 1
68 | vn 0 0 1
69 | vn 0 0 1
70 | vn 0 0 -1
71 | vn 0 0 -1
72 | vn 0 0 -1
73 | vn 0 0 -1
74 | f 1/1/1 3/3/3 2/2/2
75 | f 3/3/3 4/4/4 2/2/2
76 | f 5/5/5 7/7/7 6/6/6
77 | f 7/7/7 8/8/8 6/6/6
78 | f 9/9/9 11/11/11 10/10/10
79 | f 11/11/11 12/12/12 10/10/10
80 | f 13/13/13 15/15/15 14/14/14
81 | f 15/15/15 16/16/16 14/14/14
82 | f 17/17/17 19/19/19 18/18/18
83 | f 19/19/19 20/20/20 18/18/18
84 | f 21/21/21 23/23/23 22/22/22
85 | f 23/23/23 24/24/24 22/22/22
86 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | BSD 2-Clause License
 2 | 
 3 | Copyright (c) 2024, LAAS-CNRS
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | 1. Redistributions of source code must retain the above copyright notice, this
 9 |    list of conditions and the following disclaimer.
10 | 
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 |    this list of conditions and the following disclaimer in the documentation
13 |    and/or other materials provided with the distribution.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 
26 | Files with a specific license in their header (like train.py) abide by their
27 | respective license.
28 | 
29 | 


--------------------------------------------------------------------------------
/cfg/config.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | # Task name - used to pick the class to load
 3 | task_name: ${task.name}
 4 | # experiment name. defaults to name of training config
 5 | experiment: ''
 6 | 
 7 | # if set to positive integer, overrides the default number of environments
 8 | num_envs: ''
 9 | 
10 | # seed - set to -1 to choose random seed
11 | seed: 42
12 | # set to True for deterministic performance
13 | torch_deterministic: False
14 | 
15 | # set the maximum number of learning iterations to train for. overrides default per-environment setting
16 | max_iterations: ''
17 | 
18 | ## Device config
19 | #  'physx' or 'flex'
20 | physics_engine: 'physx'
21 | # whether to use cpu or gpu pipeline
22 | pipeline: 'gpu'
23 | # device for running physics simulation
24 | sim_device: 'cuda:0'
25 | # device to run RL
26 | rl_device: 'cuda:0'
27 | graphics_device_id: 0
28 | 
29 | ## PhysX arguments
30 | num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only.
31 | solver_type: 1 # 0: pgs, 1: tgs
32 | num_subscenes: 4 # Splits the simulation into N physics scenes and runs each one in a separate thread
33 | 
34 | # RLGames Arguments
35 | # test - if set, run policy in inference mode (requires setting checkpoint to load)
36 | test: False
37 | # used to set checkpoint path
38 | checkpoint: ''
39 | # set sigma when restoring network
40 | sigma: ''
41 | # set to True to use multi-gpu training
42 | multi_gpu: False
43 | 
44 | wandb_activate: False
45 | wandb_group: ''
46 | wandb_name: ${train.params.config.name}
47 | wandb_entity: ''
48 | wandb_project: 'isaacgymenvs'
49 | wandb_tags: []
50 | wandb_logcode_dir: '' 
51 | 
52 | capture_video: False
53 | capture_video_freq: 1464
54 | capture_video_len: 100
55 | force_render: True
56 | 
57 | # disables rendering
58 | headless: False
59 | 
60 | # set default task and default training config based on task
61 | defaults:
62 |   - task: Ant
63 |   - train: ${task}PPO
64 |   - hydra/job_logging: disabled
65 |   - pbt: no_pbt
66 | 
67 | # set the directory where the output files get saved
68 | hydra:
69 |   output_subdir: null
70 |   run:
71 |     dir: .
72 | 
73 | 


--------------------------------------------------------------------------------
/cfg/train/SoloParkourPPO.yaml:
--------------------------------------------------------------------------------
 1 | params:
 2 |   seed: ${...seed}
 3 | 
 4 |   algo:
 5 |     name: cat_a2c_continuous
 6 | 
 7 |   model:
 8 |     name: continuous_a2c_logstd
 9 | 
10 |   network:
11 |     name: actor_critic
12 |     separate: True
13 | 
14 |     space:
15 |       continuous:
16 |         mu_activation: None
17 |         sigma_activation: None
18 |         mu_init:
19 |           name: default
20 |         sigma_init:
21 |           name: const_initializer
22 |           val: 0. # std = 1.
23 |         fixed_sigma: True
24 | 
25 |     mlp:
26 |       units: [512, 256, 128]
27 |       activation: elu
28 |       d2rl: False
29 | 
30 |       initializer:
31 |         name: default
32 |       regularizer:
33 |         name: None
34 |     # rnn:
35 |     #   name: lstm
36 |     #   units: 128
37 |     #   layers: 1
38 |     #   before_mlp: True
39 |     #   concat_input: True
40 |     #   layer_norm: False
41 | 
42 | 
43 |   load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
44 |   load_path: ${...checkpoint} # path to the checkpoint to load
45 | 
46 |   config:
47 |     name: ${resolve_default:SoloParkour,${....experiment}}
48 |     full_experiment_name: ${.name}
49 |     env_name: rlgpu
50 |     ppo: True
51 |     multi_gpu: ${....multi_gpu}
52 |     mixed_precision: True
53 |     normalize_input: True
54 |     normalize_value: True
55 |     normalize_advantage: True
56 |     value_bootstrap: True
57 |     clip_actions: False
58 |     num_actors: ${....task.env.numEnvs}
59 |     reward_shaper:
60 |       scale_value: 1.0
61 |     gamma: 0.99
62 |     tau: 0.95
63 |     e_clip: 0.2
64 |     entropy_coef: 0.001
65 |     learning_rate: 3.e-4 # overwritten by adaptive lr_schedule
66 |     lr_schedule: adaptive
67 |     kl_threshold: 0.008 # target kl for adaptive lr
68 |     truncate_grads: True
69 |     grad_norm: 1.
70 |     horizon_length: 24 # 24 for t = 0.02 (decimation = 4)
71 |     minibatch_size: 16384 # 16384 for horizon_length 24
72 |     mini_epochs: 5
73 |     critic_coef: 2
74 |     clip_value: True
75 |     seq_len: 4 # only for rnn
76 |     bounds_loss_coef: 0.
77 |     kappa: 1.0
78 | 
79 |     max_epochs: ${resolve_default:20000,${....max_iterations}}
80 |     save_best_after: 1000
81 |     score_to_win: 2000000000
82 |     save_frequency: 1000
83 |     print_stats: False
84 | 
85 |     player:
86 |       deterministic: True
87 |       use_vecenv: True
88 |       games_num: 2000
89 |       print_stats: False
90 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SoloParkour: Constrained Reinforcement Learning for Visual Locomotion from Privileged Experience
 2 | 
 3 | ![Figure 1 from paper](assets/teaser.jpeg)
 4 | 
 5 | > [SoloParkour: Constrained Reinforcement Learning for Visual Locomotion from Privileged Experience](https://gepetto.github.io/SoloParkour/)  
 6 | >  Elliot Chane-Sane*, Joseph Amigo*, Thomas Flayols, Ludovic Righetti, Nicolas Mansard,  
 7 | > **CoRL 2024**  
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | ## Usage
14 | 
15 | 1. Train the privileged (Stage 1) policy:
16 | ```bash
17 | python train.py task=SoloParkour headless=True task.env.control.damping_curriculum.enable=True
18 | ```
19 | 
20 | 2. Generate the dataset of privileged experience:
21 | ```bash
22 | # replace with the correct path to the privileged policy weights
23 | policy_path=path/to/cleanrl_model.pt
24 | 
25 | python train.py task=SoloParkour train=SoloParkourDDPG_demos_generate headless=True task.env.numEnvs=256 task.env.enableCameraSensors=True task.env.depth.use_depth=True task.env.terrain.maxInitMapLevel=9 train.params.config.target_policy_path=${policy_path}
26 | ```
27 | 
28 | 3. Train the visual (Stage 2) policy from privileged experience:
29 | ```bash
30 | # replace with the correct path to the privileged experience folder
31 | demo_path=path/to/folder/ # folder which contains rb_demos.pkl
32 | 
33 | python train.py task=SoloParkour train=SoloParkourDDPG_demos_rnn_vision headless=True task.env.numEnvs=256 task.env.enableCameraSensors=True task.env.depth.use_depth=True train.params.config.demos_rb_path=${demo_path}
34 | ```
35 | 
36 | ## Installation
37 | 
38 | ```bash
39 | conda create -n IsaacGymEnvs python=3.8
40 | conda activate IsaacGymEnvs
41 | 
42 | conda install pip
43 | conda install -c conda-forge ninja
44 | 
45 | # Download Isaac Gym: https://developer.nvidia.com/isaac-gym 
46 | cd path/to/isaacgym/python && pip install --user --no-cache-dir -e . 
47 | 
48 | # Clone IsaacGymEnvs: https://github.com/isaac-sim/IsaacGymEnvs
49 | cd path/to/IsaacGymEnvs/ && pip install --user --no-cache-dir  -e .
50 | 
51 | pip uninstall torch tochvision
52 | pip install --user --no-cache-dir torch==2.2.0 torchvision==0.17.0
53 | # Fix compability problem with numpy version
54 | pip install --user --no-cache-dir "numpy<1.24"
55 | pip install --user --no-cache-dir texttable av
56 | 
57 | # Add correct LD_LIBRARY_PATH on env start
58 | conda env config vars set LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib --name $CONDA_DEFAULT_ENV
59 | ```
60 | 
61 | ### Acknowledgements
62 | 
63 | This code was based on the following repositories:
64 | - https://github.com/vwxyzjn/cleanrl
65 | - https://github.com/isaac-sim/IsaacGymEnvs
66 | - https://github.com/gepetto/constraints-as-terminations
67 | 
68 | ### Citation
69 | 
70 | ```
71 | @inproceedings{chanesane2024solo,
72 |     title = {SoloParkour: Constrained Reinforcement Learning for Visual Locomotion from Privileged Experience},
73 |     author = {Elliot Chane-Sane and Joseph Amigo and Thomas Flayols and Ludovic Righetti and Nicolas Mansard},
74 |     booktitle = {Conference on Robot Learning (CoRL)},
75 |     year = {2024},
76 | }
77 | ```
78 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | # train.py
  2 | # Script to train policies in Isaac Gym
  3 | #
  4 | # Copyright (c) 2018-2023, NVIDIA Corporation
  5 | # All rights reserved.
  6 | #
  7 | # Redistribution and use in source and binary forms, with or without
  8 | # modification, are permitted provided that the following conditions are met:
  9 | #
 10 | # 1. Redistributions of source code must retain the above copyright notice, this
 11 | #    list of conditions and the following disclaimer.
 12 | #
 13 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 14 | #    this list of conditions and the following disclaimer in the documentation
 15 | #    and/or other materials provided with the distribution.
 16 | #
 17 | # 3. Neither the name of the copyright holder nor the names of its
 18 | #    contributors may be used to endorse or promote products derived from
 19 | #    this software without specific prior written permission.
 20 | #
 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 | import os
 32 | import random
 33 | 
 34 | import numpy as np
 35 | 
 36 | import isaacgym
 37 | from isaacgymenvs.pbt.pbt import PbtAlgoObserver, initial_pbt_check
 38 | from omegaconf import DictConfig, OmegaConf
 39 | import hydra
 40 | from utils.isaacgymenvs_make import isaacgym_task_map, make
 41 | from isaacgymenvs.utils.reformat import omegaconf_to_dict, print_dict
 42 | from isaacgymenvs.utils.utils import set_np_formatting, set_seed
 43 | 
 44 | import torch
 45 | from algos.PPO import PPO, eval_PPO
 46 | from algos.DDPG_demos_rnn_vision import DDPG_demos_rnn_vision, eval_DDPG_demos_rnn_vision
 47 | from algos.DDPG_demos_generate import DDPG_demos_generate
 48 | 
 49 | 
 50 | @hydra.main(config_name="config", config_path="./cfg")
 51 | def launch_rlg_hydra(cfg: DictConfig):
 52 |     if cfg.pbt.enabled:
 53 |         initial_pbt_check(cfg)
 54 | 
 55 |     cfg_dict = omegaconf_to_dict(cfg)
 56 |     print_dict(cfg_dict)
 57 | 
 58 |     # set numpy formatting for printing only
 59 |     set_np_formatting()
 60 | 
 61 |     # global rank of the GPU
 62 |     global_rank = int(os.getenv("RANK", "0"))
 63 | 
 64 |     # sets seed. if seed is -1 will pick a random one
 65 |     cfg.seed = set_seed(
 66 |         cfg.seed, torch_deterministic=cfg.torch_deterministic, rank=global_rank
 67 |     )
 68 | 
 69 |     # TRY NOT TO MODIFY: seeding
 70 |     random.seed(cfg.seed)
 71 |     np.random.seed(cfg.seed)
 72 |     torch.manual_seed(cfg.seed)
 73 |     torch.backends.cudnn.deterministic = True
 74 | 
 75 |     algo = cfg["train"]["params"]["algo"]["name"]
 76 | 
 77 |     envs = make(
 78 |             cfg.seed,
 79 |             cfg.task_name,
 80 |             cfg.task.env.numEnvs,
 81 |             cfg.sim_device,
 82 |             cfg.rl_device,
 83 |             cfg.graphics_device_id,
 84 |             cfg.headless,
 85 |             cfg.multi_gpu,
 86 |             cfg.capture_video,
 87 |             cfg.force_render,
 88 |             cfg
 89 |         )
 90 | 
 91 |     if cfg["test"]:
 92 |         if algo == "cat_a2c_continuous":
 93 |             eval_PPO(cfg, envs)
 94 |         elif algo == "cat_ddpg_demos_rnn_vision_continuous":
 95 |             eval_DDPG_demos_rnn_vision(cfg, envs)
 96 |         else:
 97 |             raise NotImplementedError
 98 |     else:
 99 |         if algo == "cat_a2c_continuous":
100 |             PPO(cfg, envs)
101 |         elif algo == "cat_ddpg_demos_rnn_vision_continuous":
102 |             DDPG_demos_rnn_vision(cfg, envs)
103 |         elif algo == "cat_ddpg_demos_generate_continuous":
104 |             DDPG_demos_generate(cfg, envs)
105 |         else:
106 |             raise NotImplementedError
107 | 
108 | if __name__ == "__main__":
109 |     launch_rlg_hydra()
110 | 


--------------------------------------------------------------------------------
/solo12/srdf/solo.srdf:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" ?>
  2 | <robot name="solo">
  3 | 
  4 |     <!-- left front leg -->
  5 |     <group name="lf_leg">
  6 |         <joint name="FL_HAA" />
  7 |         <joint name="FL_HFE" />
  8 |         <joint name="FL_KFE" />
  9 |         <chain base_link="base_link" tip_link="FL_FOOT" />
 10 |     </group>
 11 |     <!-- right front leg -->
 12 |     <group name="rf_leg">
 13 |         <joint name="FR_HAA" />
 14 |         <joint name="FR_HFE" />
 15 |         <joint name="FR_KFE" />
 16 |         <chain base_link="base_link" tip_link="FR_FOOT" />
 17 |     </group>
 18 |     <!-- left hind leg -->
 19 |     <group name="lh_leg">
 20 |         <joint name="HL_HAA" />
 21 |         <joint name="HL_HFE" />
 22 |         <joint name="HL_KFE" />
 23 |         <chain base_link="base_link" tip_link="HL_FOOT" />
 24 |     </group>
 25 |     <!-- right hind leg -->
 26 |     <group name="rh_leg">
 27 |         <joint name="HR_HAA" />
 28 |         <joint name="HR_HFE" />
 29 |         <joint name="HR_KFE" />
 30 |         <chain base_link="base_link" tip_link="HR_FOOT" />
 31 |     </group>
 32 |     <group name="all_legs">
 33 |         <group name="lf" />
 34 |         <group name="rf" />
 35 |         <group name="lh" />
 36 |         <group name="rh" />
 37 |     </group>
 38 |     <group name="r_legs">
 39 |         <group name="rf" />
 40 |         <group name="rh" />
 41 |     </group>
 42 |     <group name="l_legs">
 43 |         <group name="lf" />
 44 |         <group name="lh" />
 45 |     </group>
 46 |     <group name="f_legs">
 47 |         <group name="lf" />
 48 |         <group name="rf" />
 49 |     </group>
 50 |     <group name="h_legs">
 51 |         <group name="lh" />
 52 |         <group name="rh" />
 53 |     </group>
 54 |     <group name="ld_legs">
 55 |         <group name="lf" />
 56 |         <group name="rh" />
 57 |     </group>
 58 |     <group name="rd_legs">
 59 |         <group name="rf" />
 60 |         <group name="lh" />
 61 |     </group>
 62 | 
 63 |     <end_effector name="lf_foot" parent_link="FL_FOOT" group="lf_leg" />
 64 |     <end_effector name="rf_foot" parent_link="FR_FOOT" group="rf_leg" />
 65 |     <end_effector name="lh_foot" parent_link="HL_FOOT" group="lh_leg" />
 66 |     <end_effector name="rh_foot" parent_link="HR_FOOT" group="rh_leg" />
 67 | 
 68 |     <group_state name="standing" group="all_legs">
 69 |         <joint name="root_joint" value="0. 0. 0.235 0. 0. 0. 1." />
 70 |         <joint name="FL_HAA" value="0.1" />
 71 |         <joint name="FL_HFE" value="0.8" />
 72 |         <joint name="FL_KFE" value="-1.6" />
 73 |         <joint name="HL_HAA" value="0.1" />
 74 |         <joint name="HL_HFE" value="-0.8" />
 75 |         <joint name="HL_KFE" value="1.6" />
 76 |         <joint name="FR_HAA" value="-0.1" />
 77 |         <joint name="FR_HFE" value="0.8" />
 78 |         <joint name="FR_KFE" value="-1.6" />
 79 |         <joint name="HR_HAA" value="-0.1" />
 80 |         <joint name="HR_HFE" value="-0.8" />
 81 |         <joint name="HR_KFE" value="1.6" />
 82 |     </group_state>
 83 | 
 84 |     <group_state name="straight_standing" group="all_legs">
 85 |         <joint name="root_joint" value="0. 0. 0.235 0. 0. 0. 1." />
 86 |         <joint name="FL_HAA" value="0." />
 87 |         <joint name="FL_HFE" value="0.8" />
 88 |         <joint name="FL_KFE" value="-1.6" />
 89 |         <joint name="HL_HAA" value="0." />
 90 |         <joint name="HL_HFE" value="-0.8" />
 91 |         <joint name="HL_KFE" value="1.6" />
 92 |         <joint name="FR_HAA" value="0." />
 93 |         <joint name="FR_HFE" value="0.8" />
 94 |         <joint name="FR_KFE" value="-1.6" />
 95 |         <joint name="HR_HAA" value="0." />
 96 |         <joint name="HR_HFE" value="-0.8" />
 97 |         <joint name="HR_KFE" value="1.6" />
 98 |     </group_state>
 99 | 
100 |     <disable_collisions link1="FL_SHOULDER" link2="base_link" reason="Adjacent" />
101 |     <disable_collisions link1="FR_SHOULDER" link2="base_link" reason="Adjacent" />
102 |     <disable_collisions link1="HL_SHOULDER" link2="base_link" reason="Adjacent" />
103 |     <disable_collisions link1="HR_SHOULDER" link2="base_link" reason="Adjacent" />
104 | 
105 |     <disable_collisions link1="FL_UPPER_LEG" link2="FL_SHOULDER" reason="Adjacent" />
106 |     <disable_collisions link1="FR_UPPER_LEG" link2="FR_SHOULDER" reason="Adjacent" />
107 |     <disable_collisions link1="HL_UPPER_LEG" link2="HL_SHOULDER" reason="Adjacent" />
108 |     <disable_collisions link1="HR_UPPER_LEG" link2="HR_SHOULDER" reason="Adjacent" />
109 | 
110 |     <disable_collisions link1="FL_LOWER_LEG" link2="FL_UPPER_LEG" reason="Adjacent" />
111 |     <disable_collisions link1="FR_LOWER_LEG" link2="FR_UPPER_LEG" reason="Adjacent" />
112 |     <disable_collisions link1="HL_LOWER_LEG" link2="HL_UPPER_LEG" reason="Adjacent" />
113 |     <disable_collisions link1="HR_LOWER_LEG" link2="HR_UPPER_LEG" reason="Adjacent" />
114 | </robot>
115 | 


--------------------------------------------------------------------------------
/utils/constraint_manager.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | 
  3 | class ConstraintManager:
  4 |     """Handle the computation of termination probabilities based on constraints
  5 |     violations (Constraints as Terminations).
  6 | 
  7 |     Args:
  8 |         tau (float): discount factor
  9 |         min_p (float): minimum termination probability
 10 |     """
 11 | 
 12 |     def __init__(self, tau=0.95, min_p=0.0):
 13 |         self.running_maxes = {}  # Polyak average of the maximum constraint violation
 14 |         self.running_mins = {}  # Polyak average of the minimum constraint violation
 15 |         self.probs = {}  # Termination probabilities for each constraint
 16 |         self.max_p = {}
 17 |         self.raw_constraints = {}
 18 |         self.tau = tau  # Discount factor
 19 |         self.min_p = min_p  # Minimum termination probability
 20 | 
 21 |     def reset(self):
 22 |         """Reset the termination probabilities of the constraint manager."""
 23 |         self.probs = {}
 24 |         self.raw_constraints = {}
 25 | 
 26 |     def add(self, name, constraint, max_p=0.1):
 27 |         """Add a constraint violation to the constraint manager and compute the
 28 |         associated termination probability.
 29 | 
 30 |         Args:
 31 |             name (string): name of the constraint
 32 |             constraint (float tensor): value of constraint violations for this constraint
 33 |             max_p (float): maximum termination probability
 34 |         """
 35 | 
 36 |         # First, put constraint in the form Torch.FloatTensor((num_envs, n_constraints))
 37 |         # Convert constraints violation to float if they are not
 38 |         if not torch.is_floating_point(constraint):
 39 |             constraint = constraint.float()
 40 | 
 41 |         # Ensure constraint is 2-dimensional even with a single element
 42 |         if len(constraint.size()) == 1:
 43 |             constraint = constraint.unsqueeze(1)
 44 | 
 45 |         # Get the maximum constraint violation for the current step
 46 |         constraint_max = constraint.max(dim=0, keepdim=True)[0].clamp(min=1e-6)
 47 | 
 48 |         # Compute polyak average of the maximum constraint violation for this constraint
 49 |         if name not in self.running_maxes:
 50 |             self.running_maxes[name] = constraint_max
 51 |         else:
 52 |             self.running_maxes[name] = (
 53 |                 self.tau * self.running_maxes[name] + (1.0 - self.tau) * constraint_max
 54 |             )
 55 | 
 56 |         self.raw_constraints[name] = constraint
 57 | 
 58 |         # Get samples for which there is a constraint violation
 59 |         mask = constraint > 0.0
 60 | 
 61 |         # Compute the termination probability which scales between min_p and max_p with
 62 |         # increasing constraint violation. Remains at 0 when there is no violation.
 63 |         probs = torch.zeros_like(constraint)
 64 |         probs[mask] = self.min_p + torch.clamp(
 65 |             constraint[mask]
 66 |             / (self.running_maxes[name].expand(constraint.size())[mask]),
 67 |             min=0.0,
 68 |             max=1.0,
 69 |         ) * (max_p - self.min_p)
 70 |         self.probs[name] = probs
 71 |         self.max_p[name] = torch.tensor(max_p, device = constraint.device).repeat(constraint.shape[1])
 72 | 
 73 |     def get_probs(self):
 74 |         """Returns the termination probabilities due to constraint violations."""
 75 |         probs = torch.cat(list(self.probs.values()), dim=1)
 76 |         probs = probs.max(1).values
 77 |         return probs
 78 | 
 79 |     def get_raw_constraints(self):
 80 |         return torch.cat(list(self.raw_constraints.values()), dim=1)
 81 | 
 82 |     def get_running_maxes(self):
 83 |         return torch.cat(list(self.running_maxes.values()), dim = 1)
 84 | 
 85 |     def get_max_p(self):
 86 |         return torch.cat(list(self.max_p.values()))
 87 | 
 88 |     def get_str(self, names=None):
 89 |         """Get a debug string with constraints names and their average termination probabilities"""
 90 |         if names is None:
 91 |             names = list(self.probs.keys())
 92 |         txt = ""
 93 |         for name in names:
 94 |             txt += " {}: {}".format(
 95 |                 name,
 96 |                 str(
 97 |                     100.0 * self.probs[name].max(1).values.gt(0.0).float().mean().item()
 98 |                 )[:4],
 99 |             )
100 |             # txt += " {}: {}".format(name, str(100.0*self.probs[name].max(1).values.float().mean().item())[:4])
101 | 
102 |         return txt[1:]
103 | 
104 |     def log_all(self, episode_sums):
105 |         """Log terminations probabilities in episode_sums with cstr_NAME key."""
106 |         for name in list(self.probs.keys()):
107 |             values = self.probs[name].max(1).values.gt(0.0).float()
108 |             if "cstr_" + name not in episode_sums:
109 |                 episode_sums["cstr_" + name] = torch.zeros_like(values)
110 |             episode_sums["cstr_" + name] += values
111 | 
112 |     def get_names(self):
113 |         """Return a list of all constraint names."""
114 |         return list(self.probs.keys())
115 | 
116 |     def get_vals(self):
117 |         """Return a list of all constraint termination probabilities."""
118 |         res = []
119 |         for key in self.probs.keys():
120 |             res += [100.0 * self.probs[key].max(1).values.gt(0.0).float().mean().item()]
121 |         return res
122 | 


--------------------------------------------------------------------------------
/cfg/task/SoloParkour.yaml:
--------------------------------------------------------------------------------
  1 | # used to create the object
  2 | name: SoloParkour
  3 | 
  4 | physics_engine: "physx"
  5 | 
  6 | env:
  7 |   numEnvs: ${resolve_default:4096,${...num_envs}}
  8 |   numActions: 12
  9 |   numLatent: 0 # (8, 3, 3)
 10 |   numHistorySamples: 1 # t-1, t-4, t-7
 11 |   numHistoryStep: 0 # t-1 -> t-4 -> t-7
 12 |   envSpacing: 3. # [m]
 13 |   enableDebugVis: False
 14 |   enableDebugPlots: False
 15 |   enableJoystick: False
 16 |   onlyForwards: False
 17 |   onlyForwardsVelocity: 1.0
 18 |   startAtLevel: -1
 19 | 
 20 |   terrain:
 21 |     terrainType: trimesh # none, plane, or trimesh
 22 |     staticFriction: 1.0 # [-]
 23 |     dynamicFriction: 1.0 # [-]
 24 |     restitution: 0. # [-]
 25 |     # rough terrain only:
 26 |     curriculum: true
 27 |     maxInitMapLevel: 0
 28 |     minInitMapLevel: 0
 29 |     mapLength: 8.
 30 |     mapWidth: 4.
 31 |     numLevels: 10
 32 |     numTerrains: 20
 33 | 
 34 |     # tri mesh only:
 35 |     slopeTreshold: 0.2
 36 | 
 37 |     terrainProportions: # Proportions for each kind of terrain
 38 |       gap_parkour: 0.55
 39 |       jump_parkour: 0.3
 40 |       stairs_parkour: 0.0
 41 |       hurdle_parkour: 0.1
 42 |       crawl_parkour: 0.05
 43 |       random_uniform: 0.0
 44 | 
 45 |   baseInitState:
 46 |     pos: [0.0, 0.0, 0.3] # x,y,z [m]
 47 |     rot: [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
 48 |     vLinear: [0.0, 0.0, 0.0] # x,y,z [m/s]
 49 |     vAngular: [0.0, 0.0, 0.0] # x,y,z [rad/s]
 50 | 
 51 |   randomCommandVelocityRanges:
 52 |     # train
 53 |     linear_x: [0.0, 1.5] # min max [m/s] # -0.3 1.0
 54 |     linear_y: [-0.5, 0.5] # min max [m/s]
 55 |     yaw: [-0.78, 0.78] # min max [rad/s]
 56 | 
 57 |   control:
 58 |     # PD Drive parameters:
 59 |     stiffness: 4.0 # [N*m/rad]
 60 |     damping: 0.2 # [N*m*s/rad]
 61 |     # action scale: target angle = actionScale * action + defaultAngle
 62 |     actionScale: 0.5
 63 |     # decimation: Number of control action updates @ sim DT per policy DT
 64 |     decimation: 4
 65 |     damping_curriculum:
 66 |       enable: False
 67 |       num_steps: 400000
 68 |       init_damping: 0.05
 69 | 
 70 |   defaultJointAngles: # = target angles when action = 0.0
 71 |     FL_HAA: 0.05 # [rad]
 72 |     HL_HAA: 0.05 # [rad]
 73 |     FR_HAA: -0.05 # [rad]
 74 |     HR_HAA: -0.05 # [rad]
 75 | 
 76 |     FL_HFE: 0.6 # [rad]
 77 |     HL_HFE: 0.6 # [rad]
 78 |     FR_HFE: 0.6 # [rad]
 79 |     HR_HFE: 0.6 # [rad]
 80 | 
 81 |     FL_KFE: -1.2 # [rad]
 82 |     HL_KFE: -1.2 # [rad]
 83 |     FR_KFE: -1.2 # [rad]
 84 |     HR_KFE: -1.2 # [rad]
 85 | 
 86 |   urdfAsset:
 87 |     file: "solo12/solo12_mpi.urdf"
 88 |     baseName: base_link
 89 |     footName: FOOT
 90 |     shinName: LOWER
 91 |     kneeName: UPPER
 92 |     collapseFixedJoints: True
 93 |     fixBaseLink: false
 94 |     defaultDofDriveMode: 4 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 4 effort)
 95 |     flip_visual_attachments: False
 96 |     armature: 0.00036207 # i.e motor inertia at joint level
 97 | 
 98 |   learn:
 99 |     allowShinContacts: True
100 |     allowKneeContacts: false
101 |   
102 |     # Scales of rewards
103 |     rewMult: 1.0
104 |     survivalBonus: 0.5
105 | 
106 |     # Misc quantities for rewards
107 |     linearVelocityXYRewardDelta: 0.25
108 |     angularVelocityZRewardDelta: 0.25
109 |     feetAirTimeRewardTarget: 0.25
110 | 
111 |     # Constraints
112 |     enableConstraints: "cat"  # "none", "cat"
113 |     constraints_CaT:
114 |       feetAirTimeConstraint: 0.2
115 |       tauConstraint: 0.95
116 |       minPConstraint: 0.0
117 |       softPConstraint: 0.1
118 |       useSoftPCurriculum: True
119 |       trackingConstraint: 1000.0
120 | 
121 |     limits:
122 |       torque: 3.0
123 |       acc: 800.0
124 |       vel: 16.0
125 |       action_rate: 80.0
126 |       base_orientation: 0.1
127 |       foot_contact_force: 80.0
128 |       HFE: 1.9
129 |       HFE_min: -0.2
130 |       HAA: 0.2
131 |       min_base_height: 0.06
132 |       heading: 0.1
133 | 
134 |     # Other
135 |     flatTerrainThreshold: 0.001
136 |     vel_deadzone: 0.2
137 |     base_height_target: 0.245
138 |     gait_period: 0.4
139 | 
140 |     # observations
141 |     observe:
142 |       base_lin_vel: false
143 |       base_ang_vel: true
144 |       commands: true
145 |       misc: true
146 |       heights: true 
147 |       ceilings: true
148 |       phases: false
149 |       imu: false
150 |     measured_points_step: 0.08
151 |     measured_points_x: [-3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
152 |     measured_points_y: [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]
153 |     phases_freq: 2.0
154 | 
155 |     # normalization
156 |     linearVelocityScale: 2.0
157 |     angularVelocityScale: 0.25
158 |     dofPositionScale: 1.0
159 |     dofVelocityScale: 0.05
160 |     heightMeasurementScale: 5.0
161 |     imuAccelerationScale: 0.1
162 | 
163 |     # noise
164 |     addNoise: true
165 |     noiseLevel: 1.0 # scales other values
166 |     dofPositionNoise: 0.01
167 |     dofVelocityNoise: 0.2
168 |     linearVelocityNoise: 0.0
169 |     angularVelocityNoise: 0.001
170 |     gravityNoise: 0.05
171 |     heightMeasurementNoise: 0.01
172 | 
173 |     # friction randomization
174 |     randomizeFriction: true
175 |     frictionRange: [0.5, 1.25]
176 | 
177 |     # random pushes during training
178 |     pushRobots: true
179 |     pushInterval_s: 8
180 | 
181 |     # episode length in seconds
182 |     episodeLength_s: 15
183 | 
184 |   # viewer cam:
185 |   viewer:
186 |     refEnv: 0
187 |     pos: [0, 0, 5] # [m]
188 |     lookat: [1., 1, 4.5] # [m]
189 | 
190 |   # set to True if you use camera sensors in the environment
191 |   enableCameraSensors: False
192 |   depth:
193 |     use_depth: False
194 |     position: [0.24, 0, 0.03] # [0.21, 0, 0.03]
195 |     angle: [-5, 5]
196 |     update_interval: 5
197 |     image_size: [48, 85] # [58, 87]
198 |     depth_clip_max: 1.0
199 |     depth_clip_min: 0.04
200 |     horizontal_fov: 87
201 | 
202 | sim:
203 |   dt: 0.005
204 |   substeps: 1
205 |   up_axis: "z"
206 |   use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
207 |   gravity: [0.0, 0.0, -9.81]
208 |   physx:
209 |     num_threads: ${....num_threads}
210 |     solver_type: 0 # ${....solver_type}
211 |     use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
212 |     num_position_iterations: 4
213 |     num_velocity_iterations: 1
214 |     contact_offset: 0.02
215 |     rest_offset: 0.0
216 |     bounce_threshold_velocity: 0.2
217 |     max_depenetration_velocity: 100.0
218 |     default_buffer_size_multiplier: 5.0
219 |     max_gpu_contact_pairs: 33554432 # 8388608 = 8*1024*1024
220 |     num_subscenes: ${....num_subscenes}
221 |     contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
222 | 
223 | task:
224 |   randomize: False
225 | 


--------------------------------------------------------------------------------
/utils/isaacgymenvs_make.py:
--------------------------------------------------------------------------------
  1 | # train.py
  2 | # Script to train policies in Isaac Gym
  3 | #
  4 | # Copyright (c) 2018-2023, NVIDIA Corporation
  5 | # All rights reserved.
  6 | #
  7 | # Redistribution and use in source and binary forms, with or without
  8 | # modification, are permitted provided that the following conditions are met:
  9 | #
 10 | # 1. Redistributions of source code must retain the above copyright notice, this
 11 | #    list of conditions and the following disclaimer.
 12 | #
 13 | # 2. Redistributions in binary form must reproduce the above copyright notice,
 14 | #    this list of conditions and the following disclaimer in the documentation
 15 | #    and/or other materials provided with the distribution.
 16 | #
 17 | # 3. Neither the name of the copyright holder nor the names of its
 18 | #    contributors may be used to endorse or promote products derived from
 19 | #    this software without specific prior written permission.
 20 | #
 21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 | 
 32 | 
 33 | import os
 34 | from typing import Callable, Dict, Tuple, Any
 35 | import hydra
 36 | from hydra import compose, initialize
 37 | from hydra.core.hydra_config import HydraConfig
 38 | from omegaconf import DictConfig
 39 | from isaacgymenvs.utils.reformat import omegaconf_to_dict
 40 | from tasks.solo_parkour import SoloParkour
 41 | 
 42 | isaacgym_task_map = {
 43 |     "SoloParkour": SoloParkour,
 44 | }
 45 | 
 46 | def make(
 47 |     seed: int, 
 48 |     task: str, 
 49 |     num_envs: int, 
 50 |     sim_device: str,
 51 |     rl_device: str,
 52 |     graphics_device_id: int = -1,
 53 |     headless: bool = False,
 54 |     multi_gpu: bool = False,
 55 |     virtual_screen_capture: bool = False,
 56 |     force_render: bool = True,
 57 |     cfg: DictConfig = None
 58 | ): 
 59 |     # from isaacgymenvs.utils.rlgames_utils import get_rlgames_env_creator
 60 |     # create hydra config if no config passed in
 61 |     if cfg is None:
 62 |         # reset current hydra config if already parsed (but not passed in here)
 63 |         if HydraConfig.initialized():
 64 |             task = HydraConfig.get().runtime.choices['task']
 65 |             hydra.core.global_hydra.GlobalHydra.instance().clear()
 66 | 
 67 |         with initialize(config_path="./cfg"):
 68 |             cfg = compose(config_name="config", overrides=[f"task={task}"])
 69 |             cfg_dict = omegaconf_to_dict(cfg.task)
 70 |             cfg_dict['env']['numEnvs'] = num_envs
 71 |     # reuse existing config
 72 |     else:
 73 |         cfg_dict = omegaconf_to_dict(cfg.task)
 74 | 
 75 |     # Adding custom info to access within the env
 76 |     cfg_dict['test'] = cfg.test
 77 |     cfg_dict['horizon_length'] = cfg.train.params.config.horizon_length
 78 |     cfg_dict['max_epochs'] = cfg.train.params.config.max_epochs
 79 | 
 80 |     create_rlgpu_env = get_rlgames_env_creator(
 81 |         seed=seed,
 82 |         task_config=cfg_dict,
 83 |         task_name=cfg_dict["name"],
 84 |         sim_device=sim_device,
 85 |         rl_device=rl_device,
 86 |         graphics_device_id=graphics_device_id,
 87 |         headless=headless,
 88 |         multi_gpu=multi_gpu,
 89 |         virtual_screen_capture=virtual_screen_capture,
 90 |         force_render=force_render,
 91 |     )
 92 |     return create_rlgpu_env()
 93 | 
 94 | def get_rlgames_env_creator(
 95 |         # used to create the vec task
 96 |         seed: int,
 97 |         task_config: dict,
 98 |         task_name: str,
 99 |         sim_device: str,
100 |         rl_device: str,
101 |         graphics_device_id: int,
102 |         headless: bool,
103 |         # used to handle multi-gpu case
104 |         multi_gpu: bool = False,
105 |         post_create_hook: Callable = None,
106 |         virtual_screen_capture: bool = False,
107 |         force_render: bool = False,
108 | ):
109 |     """Parses the configuration parameters for the environment task and creates a VecTask
110 | 
111 |     Args:
112 |         task_config: environment configuration.
113 |         task_name: Name of the task, used to evaluate based on the imported name (eg 'Trifinger')
114 |         sim_device: The type of env device, eg 'cuda:0'
115 |         rl_device: Device that RL will be done on, eg 'cuda:0'
116 |         graphics_device_id: Graphics device ID.
117 |         headless: Whether to run in headless mode.
118 |         multi_gpu: Whether to use multi gpu
119 |         post_create_hook: Hooks to be called after environment creation.
120 |             [Needed to setup WandB only for one of the RL Games instances when doing multiple GPUs]
121 |         virtual_screen_capture: Set to True to allow the users get captured screen in RGB array via `env.render(mode='rgb_array')`. 
122 |         force_render: Set to True to always force rendering in the steps (if the `control_freq_inv` is greater than 1 we suggest stting this arg to True)
123 |     Returns:
124 |         A VecTaskPython object.
125 |     """
126 |     def create_rlgpu_env():
127 |         """
128 |         Creates the task from configurations and wraps it using RL-games wrappers if required.
129 |         """
130 |         if multi_gpu:
131 | 
132 |             local_rank = int(os.getenv("LOCAL_RANK", "0"))
133 |             global_rank = int(os.getenv("RANK", "0"))
134 | 
135 |             # local rank of the GPU in a node
136 |             local_rank = int(os.getenv("LOCAL_RANK", "0"))
137 |             # global rank of the GPU
138 |             global_rank = int(os.getenv("RANK", "0"))
139 |             # total number of GPUs across all nodes
140 |             world_size = int(os.getenv("WORLD_SIZE", "1"))
141 | 
142 |             print(f"global_rank = {global_rank} local_rank = {local_rank} world_size = {world_size}")
143 | 
144 |             _sim_device = f'cuda:{local_rank}'
145 |             _rl_device = f'cuda:{local_rank}'
146 | 
147 |             task_config['rank'] = local_rank
148 |             task_config['rl_device'] = _rl_device
149 |         else:
150 |             _sim_device = sim_device
151 |             _rl_device = rl_device
152 | 
153 |         # create native task and pass custom config
154 |         env = isaacgym_task_map[task_name](
155 |             cfg=task_config,
156 |             rl_device=_rl_device,
157 |             sim_device=_sim_device,
158 |             graphics_device_id=graphics_device_id,
159 |             headless=headless,
160 |             virtual_screen_capture=virtual_screen_capture,
161 |             force_render=force_render,
162 |         )
163 | 
164 |         if post_create_hook is not None:
165 |             post_create_hook()
166 | 
167 |         return env
168 |     return create_rlgpu_env
169 | 


--------------------------------------------------------------------------------
/solo12/meshes/sphere.obj:
--------------------------------------------------------------------------------
  1 | o Sphere
  2 | v 0 1 0
  3 | v 0 1 0
  4 | v 0 1 0
  5 | v 0 1 0
  6 | v 0 1 0
  7 | v 0 1 0
  8 | v 0 1 0
  9 | v 0 1 0
 10 | v 0 1 0
 11 | v -0.5 0.8660253882408142 0
 12 | v -0.3535533845424652 0.8660253882408142 0.3535533845424652
 13 | v -3.0616171314629196e-17 0.8660253882408142 0.5
 14 | v 0.3535533845424652 0.8660253882408142 0.3535533845424652
 15 | v 0.5 0.8660253882408142 6.123234262925839e-17
 16 | v 0.3535533845424652 0.8660253882408142 -0.3535533845424652
 17 | v 9.184850732644269e-17 0.8660253882408142 -0.5
 18 | v -0.3535533845424652 0.8660253882408142 -0.3535533845424652
 19 | v -0.5 0.8660253882408142 -1.2246468525851679e-16
 20 | v -0.8660253882408142 0.5 0
 21 | v -0.6123724579811096 0.5 0.6123724579811096
 22 | v -5.302876236065149e-17 0.5 0.8660253882408142
 23 | v 0.6123724579811096 0.5 0.6123724579811096
 24 | v 0.8660253882408142 0.5 1.0605752472130298e-16
 25 | v 0.6123724579811096 0.5 -0.6123724579811096
 26 | v 1.5908628708195447e-16 0.5 -0.8660253882408142
 27 | v -0.6123724579811096 0.5 -0.6123724579811096
 28 | v -0.8660253882408142 0.5 -2.1211504944260596e-16
 29 | v -1 6.123234262925839e-17 0
 30 | v -0.7071067690849304 6.123234262925839e-17 0.7071067690849304
 31 | v -6.123234262925839e-17 6.123234262925839e-17 1
 32 | v 0.7071067690849304 6.123234262925839e-17 0.7071067690849304
 33 | v 1 6.123234262925839e-17 1.2246468525851679e-16
 34 | v 0.7071067690849304 6.123234262925839e-17 -0.7071067690849304
 35 | v 1.8369701465288538e-16 6.123234262925839e-17 -1
 36 | v -0.7071067690849304 6.123234262925839e-17 -0.7071067690849304
 37 | v -1 6.123234262925839e-17 -2.4492937051703357e-16
 38 | v -0.8660253882408142 -0.5 0
 39 | v -0.6123724579811096 -0.5 0.6123724579811096
 40 | v -5.302876236065149e-17 -0.5 0.8660253882408142
 41 | v 0.6123724579811096 -0.5 0.6123724579811096
 42 | v 0.8660253882408142 -0.5 1.0605752472130298e-16
 43 | v 0.6123724579811096 -0.5 -0.6123724579811096
 44 | v 1.5908628708195447e-16 -0.5 -0.8660253882408142
 45 | v -0.6123724579811096 -0.5 -0.6123724579811096
 46 | v -0.8660253882408142 -0.5 -2.1211504944260596e-16
 47 | v -0.5 -0.8660253882408142 0
 48 | v -0.3535533845424652 -0.8660253882408142 0.3535533845424652
 49 | v -3.0616171314629196e-17 -0.8660253882408142 0.5
 50 | v 0.3535533845424652 -0.8660253882408142 0.3535533845424652
 51 | v 0.5 -0.8660253882408142 6.123234262925839e-17
 52 | v 0.3535533845424652 -0.8660253882408142 -0.3535533845424652
 53 | v 9.184850732644269e-17 -0.8660253882408142 -0.5
 54 | v -0.3535533845424652 -0.8660253882408142 -0.3535533845424652
 55 | v -0.5 -0.8660253882408142 -1.2246468525851679e-16
 56 | v -1.2246468525851679e-16 -1 0
 57 | v -8.659560603426554e-17 -1 8.659560603426554e-17
 58 | v -7.498798786105971e-33 -1 1.2246468525851679e-16
 59 | v 8.659560603426554e-17 -1 8.659560603426554e-17
 60 | v 1.2246468525851679e-16 -1 1.4997597572211942e-32
 61 | v 8.659560603426554e-17 -1 -8.659560603426554e-17
 62 | v 2.2496396358317913e-32 -1 -1.2246468525851679e-16
 63 | v -8.659560603426554e-17 -1 -8.659560603426554e-17
 64 | v -1.2246468525851679e-16 -1 -2.9995195144423884e-32
 65 | vt 0 1
 66 | vt 0.125 1
 67 | vt 0.25 1
 68 | vt 0.375 1
 69 | vt 0.5 1
 70 | vt 0.625 1
 71 | vt 0.75 1
 72 | vt 0.875 1
 73 | vt 1 1
 74 | vt 0 0.8333333134651184
 75 | vt 0.125 0.8333333134651184
 76 | vt 0.25 0.8333333134651184
 77 | vt 0.375 0.8333333134651184
 78 | vt 0.5 0.8333333134651184
 79 | vt 0.625 0.8333333134651184
 80 | vt 0.75 0.8333333134651184
 81 | vt 0.875 0.8333333134651184
 82 | vt 1 0.8333333134651184
 83 | vt 0 0.6666666865348816
 84 | vt 0.125 0.6666666865348816
 85 | vt 0.25 0.6666666865348816
 86 | vt 0.375 0.6666666865348816
 87 | vt 0.5 0.6666666865348816
 88 | vt 0.625 0.6666666865348816
 89 | vt 0.75 0.6666666865348816
 90 | vt 0.875 0.6666666865348816
 91 | vt 1 0.6666666865348816
 92 | vt 0 0.5
 93 | vt 0.125 0.5
 94 | vt 0.25 0.5
 95 | vt 0.375 0.5
 96 | vt 0.5 0.5
 97 | vt 0.625 0.5
 98 | vt 0.75 0.5
 99 | vt 0.875 0.5
100 | vt 1 0.5
101 | vt 0 0.3333333432674408
102 | vt 0.125 0.3333333432674408
103 | vt 0.25 0.3333333432674408
104 | vt 0.375 0.3333333432674408
105 | vt 0.5 0.3333333432674408
106 | vt 0.625 0.3333333432674408
107 | vt 0.75 0.3333333432674408
108 | vt 0.875 0.3333333432674408
109 | vt 1 0.3333333432674408
110 | vt 0 0.1666666716337204
111 | vt 0.125 0.1666666716337204
112 | vt 0.25 0.1666666716337204
113 | vt 0.375 0.1666666716337204
114 | vt 0.5 0.1666666716337204
115 | vt 0.625 0.1666666716337204
116 | vt 0.75 0.1666666716337204
117 | vt 0.875 0.1666666716337204
118 | vt 1 0.1666666716337204
119 | vt 0 0
120 | vt 0.125 0
121 | vt 0.25 0
122 | vt 0.375 0
123 | vt 0.5 0
124 | vt 0.625 0
125 | vt 0.75 0
126 | vt 0.875 0
127 | vt 1 0
128 | vn 0 1 0
129 | vn 0 1 0
130 | vn 0 1 0
131 | vn 0 1 0
132 | vn 0 1 0
133 | vn 0 1 0
134 | vn 0 1 0
135 | vn 0 1 0
136 | vn 0 1 0
137 | vn -0.5 0.8660253882408142 0
138 | vn -0.3535533845424652 0.8660253882408142 0.3535533845424652
139 | vn -3.0616171314629196e-17 0.8660253882408142 0.5
140 | vn 0.3535533845424652 0.8660253882408142 0.3535533845424652
141 | vn 0.5 0.8660253882408142 6.123234262925839e-17
142 | vn 0.3535533845424652 0.8660253882408142 -0.3535533845424652
143 | vn 9.184850732644269e-17 0.8660253882408142 -0.5
144 | vn -0.3535533845424652 0.8660253882408142 -0.3535533845424652
145 | vn -0.5 0.8660253882408142 -1.2246468525851679e-16
146 | vn -0.8660253882408142 0.5 0
147 | vn -0.6123724579811096 0.5 0.6123724579811096
148 | vn -5.302876236065149e-17 0.5 0.8660253882408142
149 | vn 0.6123724579811096 0.5 0.6123724579811096
150 | vn 0.8660253882408142 0.5 1.0605752472130298e-16
151 | vn 0.6123724579811096 0.5 -0.6123724579811096
152 | vn 1.5908628708195447e-16 0.5 -0.8660253882408142
153 | vn -0.6123724579811096 0.5 -0.6123724579811096
154 | vn -0.8660253882408142 0.5 -2.1211504944260596e-16
155 | vn -1 6.123234262925839e-17 0
156 | vn -0.7071067690849304 6.123234262925839e-17 0.7071067690849304
157 | vn -6.123234262925839e-17 6.123234262925839e-17 1
158 | vn 0.7071067690849304 6.123234262925839e-17 0.7071067690849304
159 | vn 1 6.123234262925839e-17 1.2246468525851679e-16
160 | vn 0.7071067690849304 6.123234262925839e-17 -0.7071067690849304
161 | vn 1.8369701465288538e-16 6.123234262925839e-17 -1
162 | vn -0.7071067690849304 6.123234262925839e-17 -0.7071067690849304
163 | vn -1 6.123234262925839e-17 -2.4492937051703357e-16
164 | vn -0.8660253882408142 -0.5 0
165 | vn -0.6123724579811096 -0.5 0.6123724579811096
166 | vn -5.302876236065149e-17 -0.5 0.8660253882408142
167 | vn 0.6123724579811096 -0.5 0.6123724579811096
168 | vn 0.8660253882408142 -0.5 1.0605752472130298e-16
169 | vn 0.6123724579811096 -0.5 -0.6123724579811096
170 | vn 1.5908628708195447e-16 -0.5 -0.8660253882408142
171 | vn -0.6123724579811096 -0.5 -0.6123724579811096
172 | vn -0.8660253882408142 -0.5 -2.1211504944260596e-16
173 | vn -0.5 -0.8660253882408142 0
174 | vn -0.3535533845424652 -0.8660253882408142 0.3535533845424652
175 | vn -3.0616171314629196e-17 -0.8660253882408142 0.5
176 | vn 0.3535533845424652 -0.8660253882408142 0.3535533845424652
177 | vn 0.5 -0.8660253882408142 6.123234262925839e-17
178 | vn 0.3535533845424652 -0.8660253882408142 -0.3535533845424652
179 | vn 9.184850732644269e-17 -0.8660253882408142 -0.5
180 | vn -0.3535533845424652 -0.8660253882408142 -0.3535533845424652
181 | vn -0.5 -0.8660253882408142 -1.2246468525851679e-16
182 | vn -1.2246468525851679e-16 -1 0
183 | vn -8.659560603426554e-17 -1 8.659560603426554e-17
184 | vn -7.498798786105971e-33 -1 1.2246468525851679e-16
185 | vn 8.659560603426554e-17 -1 8.659560603426554e-17
186 | vn 1.2246468525851679e-16 -1 1.4997597572211942e-32
187 | vn 8.659560603426554e-17 -1 -8.659560603426554e-17
188 | vn 2.2496396358317913e-32 -1 -1.2246468525851679e-16
189 | vn -8.659560603426554e-17 -1 -8.659560603426554e-17
190 | vn -1.2246468525851679e-16 -1 -2.9995195144423884e-32
191 | f 1/1/1 10/10/10 11/11/11
192 | f 2/2/2 11/11/11 12/12/12
193 | f 3/3/3 12/12/12 13/13/13
194 | f 4/4/4 13/13/13 14/14/14
195 | f 5/5/5 14/14/14 15/15/15
196 | f 6/6/6 15/15/15 16/16/16
197 | f 7/7/7 16/16/16 17/17/17
198 | f 8/8/8 17/17/17 18/18/18
199 | f 11/11/11 10/10/10 20/20/20
200 | f 10/10/10 19/19/19 20/20/20
201 | f 12/12/12 11/11/11 21/21/21
202 | f 11/11/11 20/20/20 21/21/21
203 | f 13/13/13 12/12/12 22/22/22
204 | f 12/12/12 21/21/21 22/22/22
205 | f 14/14/14 13/13/13 23/23/23
206 | f 13/13/13 22/22/22 23/23/23
207 | f 15/15/15 14/14/14 24/24/24
208 | f 14/14/14 23/23/23 24/24/24
209 | f 16/16/16 15/15/15 25/25/25
210 | f 15/15/15 24/24/24 25/25/25
211 | f 17/17/17 16/16/16 26/26/26
212 | f 16/16/16 25/25/25 26/26/26
213 | f 18/18/18 17/17/17 27/27/27
214 | f 17/17/17 26/26/26 27/27/27
215 | f 20/20/20 19/19/19 29/29/29
216 | f 19/19/19 28/28/28 29/29/29
217 | f 21/21/21 20/20/20 30/30/30
218 | f 20/20/20 29/29/29 30/30/30
219 | f 22/22/22 21/21/21 31/31/31
220 | f 21/21/21 30/30/30 31/31/31
221 | f 23/23/23 22/22/22 32/32/32
222 | f 22/22/22 31/31/31 32/32/32
223 | f 24/24/24 23/23/23 33/33/33
224 | f 23/23/23 32/32/32 33/33/33
225 | f 25/25/25 24/24/24 34/34/34
226 | f 24/24/24 33/33/33 34/34/34
227 | f 26/26/26 25/25/25 35/35/35
228 | f 25/25/25 34/34/34 35/35/35
229 | f 27/27/27 26/26/26 36/36/36
230 | f 26/26/26 35/35/35 36/36/36
231 | f 29/29/29 28/28/28 38/38/38
232 | f 28/28/28 37/37/37 38/38/38
233 | f 30/30/30 29/29/29 39/39/39
234 | f 29/29/29 38/38/38 39/39/39
235 | f 31/31/31 30/30/30 40/40/40
236 | f 30/30/30 39/39/39 40/40/40
237 | f 32/32/32 31/31/31 41/41/41
238 | f 31/31/31 40/40/40 41/41/41
239 | f 33/33/33 32/32/32 42/42/42
240 | f 32/32/32 41/41/41 42/42/42
241 | f 34/34/34 33/33/33 43/43/43
242 | f 33/33/33 42/42/42 43/43/43
243 | f 35/35/35 34/34/34 44/44/44
244 | f 34/34/34 43/43/43 44/44/44
245 | f 36/36/36 35/35/35 45/45/45
246 | f 35/35/35 44/44/44 45/45/45
247 | f 38/38/38 37/37/37 47/47/47
248 | f 37/37/37 46/46/46 47/47/47
249 | f 39/39/39 38/38/38 48/48/48
250 | f 38/38/38 47/47/47 48/48/48
251 | f 40/40/40 39/39/39 49/49/49
252 | f 39/39/39 48/48/48 49/49/49
253 | f 41/41/41 40/40/40 50/50/50
254 | f 40/40/40 49/49/49 50/50/50
255 | f 42/42/42 41/41/41 51/51/51
256 | f 41/41/41 50/50/50 51/51/51
257 | f 43/43/43 42/42/42 52/52/52
258 | f 42/42/42 51/51/51 52/52/52
259 | f 44/44/44 43/43/43 53/53/53
260 | f 43/43/43 52/52/52 53/53/53
261 | f 45/45/45 44/44/44 54/54/54
262 | f 44/44/44 53/53/53 54/54/54
263 | f 47/47/47 46/46/46 56/56/56
264 | f 48/48/48 47/47/47 57/57/57
265 | f 49/49/49 48/48/48 58/58/58
266 | f 50/50/50 49/49/49 59/59/59
267 | f 51/51/51 50/50/50 60/60/60
268 | f 52/52/52 51/51/51 61/61/61
269 | f 53/53/53 52/52/52 62/62/62
270 | f 54/54/54 53/53/53 63/63/63
271 | 


--------------------------------------------------------------------------------
/algos/PPO.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | 
  3 | import os
  4 | import random
  5 | import time
  6 | from datetime import datetime
  7 | 
  8 | from omegaconf import DictConfig, OmegaConf
  9 | 
 10 | import numpy as np
 11 | 
 12 | import torch
 13 | import torch.nn as nn
 14 | import torch.optim as optim
 15 | from torch.distributions.normal import Normal
 16 | from torch.utils.tensorboard import SummaryWriter
 17 | 
 18 | class RunningMeanStd(nn.Module):
 19 |     def __init__(self, shape = (), epsilon=1e-08):
 20 |         super(RunningMeanStd, self).__init__()
 21 |         self.register_buffer("running_mean", torch.zeros(shape))
 22 |         self.register_buffer("running_var", torch.ones(shape))
 23 |         self.register_buffer("count", torch.ones(()))
 24 | 
 25 |         self.epsilon = epsilon
 26 | 
 27 |     def forward(self, obs, update = True):
 28 |         if update:
 29 |             self.update(obs)
 30 | 
 31 |         return (obs - self.running_mean) / torch.sqrt(self.running_var + self.epsilon)
 32 | 
 33 |     def update(self, x):
 34 |         """Updates the mean, var and count from a batch of samples."""
 35 |         batch_mean = torch.mean(x, dim=0)
 36 |         batch_var = torch.var(x, correction=0, dim=0)
 37 |         batch_count = x.shape[0]
 38 |         self.update_from_moments(batch_mean, batch_var, batch_count)
 39 | 
 40 |     def update_from_moments(self, batch_mean, batch_var, batch_count):
 41 |         """Updates from batch mean, variance and count moments."""
 42 |         self.running_mean, self.running_var, self.count = update_mean_var_count_from_moments(
 43 |             self.running_mean, self.running_var, self.count, batch_mean, batch_var, batch_count
 44 |         )
 45 | 
 46 | def update_mean_var_count_from_moments(
 47 |     mean, var, count, batch_mean, batch_var, batch_count
 48 | ):
 49 |     """Updates the mean, var and count using the previous mean, var, count and batch values."""
 50 |     delta = batch_mean - mean
 51 |     tot_count = count + batch_count
 52 | 
 53 |     new_mean = mean + delta * batch_count / tot_count
 54 |     m_a = var * count
 55 |     m_b = batch_var * batch_count
 56 |     M2 = m_a + m_b + torch.square(delta) * count * batch_count / tot_count
 57 |     new_var = M2 / tot_count
 58 |     new_count = tot_count
 59 | 
 60 |     return new_mean, new_var, new_count
 61 | 
 62 | def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
 63 |     torch.nn.init.orthogonal_(layer.weight, std)
 64 |     torch.nn.init.constant_(layer.bias, bias_const)
 65 |     return layer
 66 | 
 67 | class Agent(nn.Module):
 68 |     def __init__(self, envs):
 69 |         super().__init__()
 70 |         self.critic = nn.Sequential(
 71 |             layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 512)),
 72 |             nn.ELU(),
 73 |             layer_init(nn.Linear(512, 256)),
 74 |             nn.ELU(),
 75 |             layer_init(nn.Linear(256, 128)),
 76 |             nn.ELU(),
 77 |             layer_init(nn.Linear(128, 1), std=1.0),
 78 |         )
 79 |         self.actor_mean = nn.Sequential(
 80 |             layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 512)),
 81 |             nn.ELU(),
 82 |             layer_init(nn.Linear(512, 256)),
 83 |             nn.ELU(),
 84 |             layer_init(nn.Linear(256, 128)),
 85 |             nn.ELU(),
 86 |             layer_init(nn.Linear(128, np.prod(envs.single_action_space.shape)), std=0.01),
 87 |         )
 88 |         self.actor_logstd = nn.Parameter(torch.zeros(1, np.prod(envs.single_action_space.shape)))
 89 | 
 90 |         self.obs_rms = RunningMeanStd(shape = envs.single_observation_space.shape)
 91 |         self.value_rms = RunningMeanStd(shape = ())
 92 | 
 93 |     def get_value(self, x):
 94 |         return self.critic(x)
 95 | 
 96 |     def get_action_and_value(self, x, action=None):
 97 |         action_mean = self.actor_mean(x)
 98 |         action_logstd = self.actor_logstd.expand_as(action_mean)
 99 |         action_std = torch.exp(action_logstd)
100 |         probs = Normal(action_mean, action_std)
101 |         if action is None:
102 |             action = probs.sample()
103 |         return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(x)
104 | 
105 | class ExtractObsWrapper(gym.ObservationWrapper):
106 |     def observation(self, obs):
107 |         return obs["obs"]
108 | 
109 | def PPO(cfg: DictConfig, envs):
110 |     run_path = f"runs/{cfg['train']['params']['config']['name']}_{datetime.now().strftime('%d-%H-%M-%S')}"
111 | 
112 |     writer = SummaryWriter(run_path)
113 |     if not os.path.exists(run_path):
114 |         os.makedirs(run_path)
115 |     OmegaConf.save(config = cfg, f = f"{run_path}/config.yaml")
116 | 
117 |     LEARNING_RATE = cfg["train"]["params"]["config"]["learning_rate"]
118 |     NUM_STEPS = cfg["train"]["params"]["config"]["horizon_length"]
119 |     NUM_ITERATIONS = cfg["train"]["params"]["config"]["max_epochs"]
120 |     GAMMA = cfg["train"]["params"]["config"]["gamma"]
121 |     GAE_LAMBDA = cfg["train"]["params"]["config"]["tau"]
122 |     UPDATES_EPOCHS = cfg["train"]["params"]["config"]["mini_epochs"]
123 |     MINIBATCH_SIZE = cfg["train"]["params"]["config"]["minibatch_size"]
124 |     CLIP_COEF = cfg["train"]["params"]["config"]["e_clip"]
125 |     ENT_COEF = cfg["train"]["params"]["config"]["entropy_coef"]
126 |     VF_COEF = cfg["train"]["params"]["config"]["critic_coef"]
127 |     MAX_GRAD_NORM = cfg["train"]["params"]["config"]["grad_norm"]
128 |     NORM_ADV = cfg["train"]["params"]["config"]["normalize_advantage"]
129 |     CLIP_VLOSS = cfg["train"]["params"]["config"]["clip_value"]
130 |     ANNEAL_LR = True
131 | 
132 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
133 | 
134 |     envs.single_action_space = envs.action_space
135 |     envs.single_observation_space = envs.observation_space
136 | 
137 |     envs = ExtractObsWrapper(envs)
138 | 
139 |     BATCH_SIZE = int(envs.num_envs * NUM_STEPS)
140 | 
141 |     agent = Agent(envs).to(device)
142 |     optimizer = optim.Adam(agent.parameters(), lr=LEARNING_RATE, eps=1e-5)
143 | 
144 |     obs = torch.zeros((NUM_STEPS, envs.num_envs) + envs.single_observation_space.shape, dtype=torch.float).to(device)
145 |     actions = torch.zeros((NUM_STEPS, envs.num_envs) + envs.single_action_space.shape, dtype=torch.float).to(device)
146 |     logprobs = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device)
147 |     rewards = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device)
148 |     dones = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device)
149 |     true_dones = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device)
150 |     values = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device)
151 |     advantages = torch.zeros_like(rewards, dtype=torch.float).to(device)
152 | 
153 |     # TRY NOT TO MODIFY: start the game
154 |     global_step = 0
155 |     start_time = time.time()
156 |     next_obs = envs.reset()
157 |     next_obs = agent.obs_rms(next_obs)
158 |     next_done = torch.zeros(envs.num_envs, dtype=torch.float).to(device)
159 |     next_true_done = torch.zeros(envs.num_envs, dtype=torch.float).to(device)
160 | 
161 |     for iteration in range(1, NUM_ITERATIONS + 1):
162 |         if ANNEAL_LR:
163 |             frac = 1.0 - (iteration - 1.0) / NUM_ITERATIONS
164 |             lrnow = frac * LEARNING_RATE
165 |             optimizer.param_groups[0]["lr"] = lrnow
166 | 
167 |         for step in range(0, NUM_STEPS):
168 |             global_step += envs.num_envs
169 |             obs[step] = next_obs
170 |             dones[step] = next_done
171 |             true_dones[step] = next_true_done
172 | 
173 |             # ALGO LOGIC: action logic
174 |             with torch.no_grad():
175 |                 action, logprob, _, value = agent.get_action_and_value(next_obs)
176 |                 values[step] = value.flatten()
177 |             actions[step] = action
178 |             logprobs[step] = logprob
179 | 
180 |             # TRY NOT TO MODIFY: execute the game and log data.
181 |             next_obs, rewards[step], next_done, info = envs.step(action)
182 |             next_obs = agent.obs_rms(next_obs)
183 |             next_true_done = info["true_dones"].float()
184 |             if "time_outs" in info:
185 |                 if info["time_outs"].any():
186 |                     print("time outs", info["time_outs"].sum())
187 |                     exit(0)
188 | 
189 |         for el, v in zip(list(envs.episode_sums.keys())[:envs.numRewards], (torch.mean(envs.rew_mean_reset, dim=0)).tolist()):
190 |             writer.add_scalar(f"reward/{el}", v, iteration)
191 |         writer.add_scalar(f"reward/cum_rew", (torch.sum(torch.mean(envs.rew_cum_reset, dim=0))).item(), iteration)
192 |         writer.add_scalar(f"reward/avg_rew", envs.terrain_levels.float().mean().item(), iteration)
193 |         for el, v in zip(envs.cstr_manager.get_names(), (100.0 * torch.mean(envs.cstr_mean_reset, dim=0)).tolist()):
194 |             writer.add_scalar(f"cstr/{el}", v, iteration)
195 | 
196 |         # CaT: must compute the CaT quantity
197 |         not_dones = 1.0 - dones
198 |         rewards *= not_dones
199 | 
200 |         # bootstrap value if not done
201 |         with torch.no_grad():
202 |             next_value = agent.get_value(next_obs).reshape(1, -1)
203 |             advantages = torch.zeros_like(rewards).to(device)
204 |             lastgaelam = 0
205 |             for t in reversed(range(NUM_STEPS)):
206 |                 if t == NUM_STEPS - 1:
207 |                     nextnonterminal = 1.0 - next_done
208 |                     true_nextnonterminal = 1 - next_true_done
209 |                     nextvalues = next_value
210 |                 else:
211 |                     nextnonterminal = 1.0 - dones[t + 1]
212 |                     true_nextnonterminal = 1 - true_dones[t + 1]
213 |                     nextvalues = values[t + 1]
214 |                 delta = rewards[t] + GAMMA * nextvalues * nextnonterminal * true_nextnonterminal - values[t]
215 |                 advantages[t] = lastgaelam = delta + GAMMA * GAE_LAMBDA * nextnonterminal * true_nextnonterminal * lastgaelam
216 |             returns = advantages + values
217 | 
218 |         # flatten the batch
219 |         b_obs = obs.reshape((-1,) + envs.single_observation_space.shape)
220 |         b_logprobs = logprobs.reshape(-1)
221 |         b_actions = actions.reshape((-1,) + envs.single_action_space.shape)
222 |         b_advantages = advantages.reshape(-1)
223 |         b_returns = returns.reshape(-1)
224 |         b_values = values.reshape(-1)
225 | 
226 |         b_values = agent.value_rms(b_values)
227 |         b_returns = agent.value_rms(b_returns)
228 | 
229 |         # Optimizing the policy and value network
230 |         clipfracs = []
231 |         for epoch in range(UPDATES_EPOCHS):
232 |             b_inds = torch.randperm(BATCH_SIZE, device=device)
233 |             for start in range(0, BATCH_SIZE, MINIBATCH_SIZE):
234 |                 end = start + MINIBATCH_SIZE
235 |                 mb_inds = b_inds[start:end]
236 | 
237 |                 _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions[mb_inds])
238 |                 logratio = newlogprob - b_logprobs[mb_inds]
239 |                 ratio = logratio.exp()
240 | 
241 |                 with torch.no_grad():
242 |                     # calculate approx_kl http://joschu.net/blog/kl-approx.html
243 |                     old_approx_kl = (-logratio).mean()
244 |                     approx_kl = ((ratio - 1) - logratio).mean()
245 |                     clipfracs += [((ratio - 1.0).abs() > CLIP_COEF).float().mean().item()]
246 | 
247 |                 mb_advantages = b_advantages[mb_inds]
248 |                 if NORM_ADV:
249 |                     mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8)
250 | 
251 |                 # Policy loss
252 |                 pg_loss1 = -mb_advantages * ratio
253 |                 pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - CLIP_COEF, 1 + CLIP_COEF)
254 |                 pg_loss = torch.max(pg_loss1, pg_loss2).mean()
255 | 
256 |                 # Value loss
257 |                 newvalue = newvalue.view(-1)
258 |                 newvalue = agent.value_rms(newvalue, update = False)
259 |                 if CLIP_VLOSS:
260 |                     v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2
261 |                     v_clipped = b_values[mb_inds] + torch.clamp(
262 |                         newvalue - b_values[mb_inds],
263 |                         -CLIP_COEF,
264 |                         CLIP_COEF,
265 |                     )
266 |                     v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2
267 |                     v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped)
268 |                     v_loss = 0.5 * v_loss_max.mean()
269 |                 else:
270 |                     v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean()
271 | 
272 |                 entropy_loss = entropy.mean()
273 |                 loss = pg_loss - ENT_COEF * entropy_loss + v_loss * VF_COEF
274 | 
275 |                 optimizer.zero_grad()
276 |                 loss.backward()
277 |                 nn.utils.clip_grad_norm_(agent.parameters(), MAX_GRAD_NORM)
278 |                 optimizer.step()
279 | 
280 |         if (iteration + 1) % 24 == 0:
281 |             model_path = f"{run_path}/cleanrl_model.pt"
282 |             torch.save(agent.state_dict(), model_path)
283 |             print("Saved model")
284 | 
285 | def eval_PPO(cfg: DictConfig, envs):
286 |     checkpoint = cfg["checkpoint"]
287 |     actor_sd = torch.load(checkpoint)
288 | 
289 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
290 | 
291 |     envs.single_action_space = envs.action_space
292 |     envs.single_observation_space = envs.observation_space
293 | 
294 |     envs = ExtractObsWrapper(envs)
295 | 
296 |     actor = Agent(envs).to(device)
297 |     actor.load_state_dict(actor_sd)
298 | 
299 |     obs = envs.reset()
300 | 
301 |     for _ in range(2000):
302 |         with torch.no_grad():
303 |             actions, _, _, _ = actor.get_action_and_value(actor.obs_rms(obs, update = False))
304 | 
305 |         next_obs, rewards, terminations, infos = envs.step(actions)
306 |         obs = next_obs
307 | 


--------------------------------------------------------------------------------
/solo12/meshes/cylinder.obj:
--------------------------------------------------------------------------------
  1 | o Cylinder
  2 | v 0 -0.998248581950179 0.5034876052441187
  3 | v 0 -1.0017392333654023 -0.4965063024136719
  4 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156
  5 | v 0 -1.0017392333654023 -0.4965063024136719
  6 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749
  7 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156
  8 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156
  9 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749
 10 | v 1 0.00174532570761165 0.4999969538288953
 11 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749
 12 | v 1 -0.0017453257076117724 -0.4999969538288953
 13 | v 1 0.00174532570761165 0.4999969538288953
 14 | v 1 0.00174532570761165 0.4999969538288953
 15 | v 1 -0.0017453257076117724 -0.4999969538288953
 16 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749
 17 | v 1 -0.0017453257076117724 -0.4999969538288953
 18 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156
 19 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749
 20 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749
 21 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156
 22 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719
 23 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156
 24 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187
 25 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719
 26 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719
 27 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187
 28 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749
 29 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187
 30 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156
 31 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749
 32 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749
 33 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156
 34 | v -1 0.001745325707611895 0.4999969538288953
 35 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156
 36 | v -1 -0.0017453257076115276 -0.4999969538288953
 37 | v -1 0.001745325707611895 0.4999969538288953
 38 | v -1 0.001745325707611895 0.4999969538288953
 39 | v -1 -0.0017453257076115276 -0.4999969538288953
 40 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156
 41 | v -1 -0.0017453257076115276 -0.4999969538288953
 42 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749
 43 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156
 44 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156
 45 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749
 46 | v -2.4492937051703357e-16 -0.998248581950179 0.5034876052441187
 47 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749
 48 | v -2.4492937051703357e-16 -1.0017392333654023 -0.4965063024136719
 49 | v -2.4492937051703357e-16 -0.998248581950179 0.5034876052441187
 50 | v 0 -0.998248581950179 0.5034876052441187
 51 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156
 52 | v 0 0.0017453257076117112 0.4999969538288953
 53 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156
 54 | v 1 0.00174532570761165 0.4999969538288953
 55 | v 0 0.0017453257076117112 0.4999969538288953
 56 | v 1 0.00174532570761165 0.4999969538288953
 57 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749
 58 | v 0 0.0017453257076117112 0.4999969538288953
 59 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749
 60 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719
 61 | v 0 0.0017453257076117112 0.4999969538288953
 62 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719
 63 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749
 64 | v 0 0.0017453257076117112 0.4999969538288953
 65 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749
 66 | v -1 0.001745325707611895 0.4999969538288953
 67 | v 0 0.0017453257076117112 0.4999969538288953
 68 | v -1 0.001745325707611895 0.4999969538288953
 69 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156
 70 | v 0 0.0017453257076117112 0.4999969538288953
 71 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156
 72 | v -2.4492937051703357e-16 -0.998248581950179 0.5034876052441187
 73 | v 0 0.0017453257076117112 0.4999969538288953
 74 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749
 75 | v 0 -1.0017392333654023 -0.4965063024136719
 76 | v 0 -0.0017453257076117112 -0.4999969538288953
 77 | v 1 -0.0017453257076117724 -0.4999969538288953
 78 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749
 79 | v 0 -0.0017453257076117112 -0.4999969538288953
 80 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156
 81 | v 1 -0.0017453257076117724 -0.4999969538288953
 82 | v 0 -0.0017453257076117112 -0.4999969538288953
 83 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187
 84 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156
 85 | v 0 -0.0017453257076117112 -0.4999969538288953
 86 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156
 87 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187
 88 | v 0 -0.0017453257076117112 -0.4999969538288953
 89 | v -1 -0.0017453257076115276 -0.4999969538288953
 90 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156
 91 | v 0 -0.0017453257076117112 -0.4999969538288953
 92 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749
 93 | v -1 -0.0017453257076115276 -0.4999969538288953
 94 | v 0 -0.0017453257076117112 -0.4999969538288953
 95 | v -2.4492937051703357e-16 -1.0017392333654023 -0.4965063024136719
 96 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749
 97 | v 0 -0.0017453257076117112 -0.4999969538288953
 98 | vt 0 1
 99 | vt 0 0
100 | vt 0.125 1
101 | vt 0 0
102 | vt 0.125 0
103 | vt 0.125 1
104 | vt 0.125 1
105 | vt 0.125 0
106 | vt 0.25 1
107 | vt 0.125 0
108 | vt 0.25 0
109 | vt 0.25 1
110 | vt 0.25 1
111 | vt 0.25 0
112 | vt 0.375 1
113 | vt 0.25 0
114 | vt 0.375 0
115 | vt 0.375 1
116 | vt 0.375 1
117 | vt 0.375 0
118 | vt 0.5 1
119 | vt 0.375 0
120 | vt 0.5 0
121 | vt 0.5 1
122 | vt 0.5 1
123 | vt 0.5 0
124 | vt 0.625 1
125 | vt 0.5 0
126 | vt 0.625 0
127 | vt 0.625 1
128 | vt 0.625 1
129 | vt 0.625 0
130 | vt 0.75 1
131 | vt 0.625 0
132 | vt 0.75 0
133 | vt 0.75 1
134 | vt 0.75 1
135 | vt 0.75 0
136 | vt 0.875 1
137 | vt 0.75 0
138 | vt 0.875 0
139 | vt 0.875 1
140 | vt 0.875 1
141 | vt 0.875 0
142 | vt 1 1
143 | vt 0.875 0
144 | vt 1 0
145 | vt 1 1
146 | vt 1 0.5
147 | vt 0.8535534143447876 0.8535534143447876
148 | vt 0.5 0.5
149 | vt 0.8535534143447876 0.8535534143447876
150 | vt 0.5 1
151 | vt 0.5 0.5
152 | vt 0.5 1
153 | vt 0.1464466154575348 0.8535534143447876
154 | vt 0.5 0.5
155 | vt 0.1464466154575348 0.8535534143447876
156 | vt 0 0.5
157 | vt 0.5 0.5
158 | vt 0 0.5
159 | vt 0.1464466154575348 0.1464466154575348
160 | vt 0.5 0.5
161 | vt 0.1464466154575348 0.1464466154575348
162 | vt 0.5 0
163 | vt 0.5 0.5
164 | vt 0.5 0
165 | vt 0.8535534143447876 0.1464466154575348
166 | vt 0.5 0.5
167 | vt 0.8535534143447876 0.1464466154575348
168 | vt 1 0.5
169 | vt 0.5 0.5
170 | vt 0.8535534143447876 0.1464466154575348
171 | vt 1 0.5
172 | vt 0.5 0.5
173 | vt 0.5 0
174 | vt 0.8535534143447876 0.1464466154575348
175 | vt 0.5 0.5
176 | vt 0.1464466154575348 0.1464466154575348
177 | vt 0.5 0
178 | vt 0.5 0.5
179 | vt 0 0.5
180 | vt 0.1464466154575348 0.1464466154575348
181 | vt 0.5 0.5
182 | vt 0.1464466154575348 0.8535534143447876
183 | vt 0 0.5
184 | vt 0.5 0.5
185 | vt 0.5 1
186 | vt 0.1464466154575348 0.8535534143447876
187 | vt 0.5 0.5
188 | vt 0.8535534143447876 0.8535534143447876
189 | vt 0.5 1
190 | vt 0.5 0.5
191 | vt 1 0.5
192 | vt 0.8535534143447876 0.8535534143447876
193 | vt 0.5 0.5
194 | vn 0 -0.9999939076577902 0.0034906514152234207
195 | vn 0 -0.9999939076577902 0.0034906514152234207
196 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373
197 | vn 0 -0.9999939076577902 0.0034906514152234207
198 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373
199 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373
200 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373
201 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373
202 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19
203 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373
204 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19
205 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19
206 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19
207 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19
208 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373
209 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19
210 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373
211 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373
212 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373
213 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373
214 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207
215 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373
216 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207
217 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207
218 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207
219 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207
220 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373
221 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207
222 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373
223 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373
224 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373
225 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373
226 | vn -1 1.836958955078092e-16 -6.412222441704118e-19
227 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373
228 | vn -1 1.836958955078092e-16 -6.412222441704118e-19
229 | vn -1 1.836958955078092e-16 -6.412222441704118e-19
230 | vn -1 1.836958955078092e-16 -6.412222441704118e-19
231 | vn -1 1.836958955078092e-16 -6.412222441704118e-19
232 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373
233 | vn -1 1.836958955078092e-16 -6.412222441704118e-19
234 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373
235 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373
236 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373
237 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373
238 | vn -2.4492937051703357e-16 -0.9999939076577902 0.0034906514152234207
239 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373
240 | vn -2.4492937051703357e-16 -0.9999939076577902 0.0034906514152234207
241 | vn -2.4492937051703357e-16 -0.9999939076577902 0.0034906514152234207
242 | vn 0 0.0034906514152234207 0.9999939076577902
243 | vn 0 0.0034906514152234207 0.9999939076577902
244 | vn 0 0.0034906514152234207 0.9999939076577902
245 | vn 0 0.0034906514152234207 0.9999939076577902
246 | vn 0 0.0034906514152234207 0.9999939076577902
247 | vn 0 0.0034906514152234207 0.9999939076577902
248 | vn 0 0.0034906514152234207 0.9999939076577902
249 | vn 0 0.0034906514152234207 0.9999939076577902
250 | vn 0 0.0034906514152234207 0.9999939076577902
251 | vn 0 0.0034906514152234207 0.9999939076577902
252 | vn 0 0.0034906514152234207 0.9999939076577902
253 | vn 0 0.0034906514152234207 0.9999939076577902
254 | vn 0 0.0034906514152234207 0.9999939076577902
255 | vn 0 0.0034906514152234207 0.9999939076577902
256 | vn 0 0.0034906514152234207 0.9999939076577902
257 | vn 0 0.0034906514152234207 0.9999939076577902
258 | vn 0 0.0034906514152234207 0.9999939076577902
259 | vn 0 0.0034906514152234207 0.9999939076577902
260 | vn 0 0.0034906514152234207 0.9999939076577902
261 | vn 0 0.0034906514152234207 0.9999939076577902
262 | vn 0 0.0034906514152234207 0.9999939076577902
263 | vn 0 0.0034906514152234207 0.9999939076577902
264 | vn 0 0.0034906514152234207 0.9999939076577902
265 | vn 0 0.0034906514152234207 0.9999939076577902
266 | vn 0 -0.0034906514152234207 -0.9999939076577902
267 | vn 0 -0.0034906514152234207 -0.9999939076577902
268 | vn 0 -0.0034906514152234207 -0.9999939076577902
269 | vn 0 -0.0034906514152234207 -0.9999939076577902
270 | vn 0 -0.0034906514152234207 -0.9999939076577902
271 | vn 0 -0.0034906514152234207 -0.9999939076577902
272 | vn 0 -0.0034906514152234207 -0.9999939076577902
273 | vn 0 -0.0034906514152234207 -0.9999939076577902
274 | vn 0 -0.0034906514152234207 -0.9999939076577902
275 | vn 0 -0.0034906514152234207 -0.9999939076577902
276 | vn 0 -0.0034906514152234207 -0.9999939076577902
277 | vn 0 -0.0034906514152234207 -0.9999939076577902
278 | vn 0 -0.0034906514152234207 -0.9999939076577902
279 | vn 0 -0.0034906514152234207 -0.9999939076577902
280 | vn 0 -0.0034906514152234207 -0.9999939076577902
281 | vn 0 -0.0034906514152234207 -0.9999939076577902
282 | vn 0 -0.0034906514152234207 -0.9999939076577902
283 | vn 0 -0.0034906514152234207 -0.9999939076577902
284 | vn 0 -0.0034906514152234207 -0.9999939076577902
285 | vn 0 -0.0034906514152234207 -0.9999939076577902
286 | vn 0 -0.0034906514152234207 -0.9999939076577902
287 | vn 0 -0.0034906514152234207 -0.9999939076577902
288 | vn 0 -0.0034906514152234207 -0.9999939076577902
289 | vn 0 -0.0034906514152234207 -0.9999939076577902
290 | f 1/1/1 2/2/2 3/3/3
291 | f 4/4/4 5/5/5 6/6/6
292 | f 7/7/7 8/8/8 9/9/9
293 | f 10/10/10 11/11/11 12/12/12
294 | f 13/13/13 14/14/14 15/15/15
295 | f 16/16/16 17/17/17 18/18/18
296 | f 19/19/19 20/20/20 21/21/21
297 | f 22/22/22 23/23/23 24/24/24
298 | f 25/25/25 26/26/26 27/27/27
299 | f 28/28/28 29/29/29 30/30/30
300 | f 31/31/31 32/32/32 33/33/33
301 | f 34/34/34 35/35/35 36/36/36
302 | f 37/37/37 38/38/38 39/39/39
303 | f 40/40/40 41/41/41 42/42/42
304 | f 43/43/43 44/44/44 45/45/45
305 | f 46/46/46 47/47/47 48/48/48
306 | f 49/49/49 50/50/50 51/51/51
307 | f 52/52/52 53/53/53 54/54/54
308 | f 55/55/55 56/56/56 57/57/57
309 | f 58/58/58 59/59/59 60/60/60
310 | f 61/61/61 62/62/62 63/63/63
311 | f 64/64/64 65/65/65 66/66/66
312 | f 67/67/67 68/68/68 69/69/69
313 | f 70/70/70 71/71/71 72/72/72
314 | f 73/73/73 74/74/74 75/75/75
315 | f 76/76/76 77/77/77 78/78/78
316 | f 79/79/79 80/80/80 81/81/81
317 | f 82/82/82 83/83/83 84/84/84
318 | f 85/85/85 86/86/86 87/87/87
319 | f 88/88/88 89/89/89 90/90/90
320 | f 91/91/91 92/92/92 93/93/93
321 | f 94/94/94 95/95/95 96/96/96
322 | 


--------------------------------------------------------------------------------
/tasks/terrainParkour.py:
--------------------------------------------------------------------------------
  1 | ####################
  2 | # Terrain generator
  3 | ####################
  4 | import numpy as np
  5 | 
  6 | from isaacgym.terrain_utils import *
  7 | 
  8 | class Terrain:
  9 |     """Class to handle terrain creation, usually a bunch of subterrains surrounding by a flat border area.
 10 | 
 11 |     Subterrains are spread on a given number of columns and rows. Each column might correspond to a given
 12 |     type of subterrains (slope, stairs, ...) and each row to a given difficulty (from easiest to hardest).
 13 |     """
 14 | 
 15 |     def __init__(self, cfg, num_robots) -> None:
 16 | 
 17 |         self.type = cfg["terrainType"]
 18 |         if self.type in ["none", 'plane']:
 19 |             return
 20 | 
 21 |         # Create subterrains on the fly based on Isaac subterrain primitives
 22 | 
 23 |         # Retrieving proportions of each kind of subterrains
 24 |         keys = list(cfg["terrainProportions"].keys())
 25 |         vals = list(cfg["terrainProportions"].values())
 26 |         self.terrain_keys = []
 27 |         self.terrain_proportions = []
 28 |         sum = 0.0
 29 |         for key, val in zip(keys, vals):
 30 |             if val != 0.0:
 31 |                 sum += float(val)
 32 |                 self.terrain_keys.append(key)
 33 |                 self.terrain_proportions.append(np.round(sum, 2))
 34 | 
 35 |         self.horizontal_scale = 0.05  # Resolution of the terrain height map
 36 |         self.border_size = 8.0  # Size of the flat border area all around the terrains
 37 |         self.env_length = cfg["mapLength"]  # Length of subterrains
 38 |         self.env_width = cfg["mapWidth"]  # Width of subterrains
 39 |         self.env_rows = cfg["numLevels"]
 40 |         self.env_cols = cfg["numTerrains"]
 41 |         self.num_maps = self.env_rows * self.env_cols
 42 |         self.num_per_env = int(num_robots / self.num_maps)
 43 |         self.env_origins = np.zeros((self.env_rows, self.env_cols, 3))
 44 | 
 45 |         # Number of height map cells for each subterrain in width and length
 46 |         self.width_per_env_pixels = int(self.env_width / self.horizontal_scale)
 47 |         self.length_per_env_pixels = int(self.env_length / self.horizontal_scale)
 48 | 
 49 |         self.border = int(self.border_size / self.horizontal_scale)
 50 |         self.tot_cols = int(self.env_cols * self.width_per_env_pixels) + 2 * self.border
 51 |         self.tot_rows = int(self.env_rows * self.length_per_env_pixels) + 2 * self.border
 52 |         self.height_field_raw = np.zeros((self.tot_rows , self.tot_cols), dtype=np.float32)
 53 |         self.ceilings = np.zeros((self.tot_rows , self.tot_cols), dtype=np.float32)
 54 | 
 55 |         self.boxes = []
 56 |         self.vertical_scale = 0.005
 57 |         if cfg["curriculum"]:
 58 |             self.curiculum(num_robots, num_terrains=self.env_cols, num_levels=self.env_rows)
 59 |         else:
 60 |             self.curiculum(num_robots, num_terrains=self.env_cols, num_levels=self.env_rows)
 61 |             #self.randomized_terrain()
 62 |         self.vertices, self.triangles = convert_heightfield_to_trimesh(self.height_field_raw, self.horizontal_scale, self.vertical_scale, cfg["slopeTreshold"])
 63 |         if len(self.boxes) > 0:
 64 |             for box in self.boxes:
 65 |                 self.vertices, self.triangles = combine_trimeshes((self.vertices, self.triangles), box)
 66 |         self.heightsamples = self.height_field_raw
 67 | 
 68 |     def randomized_terrain(self):
 69 |         """Spawn random subterrain without ordering them by type or difficulty
 70 |         according to their rows and columns"""
 71 | 
 72 |         for k in range(self.num_maps):
 73 |             # Env coordinates in the world
 74 |             (i, j) = np.unravel_index(k, (self.env_rows, self.env_cols))
 75 | 
 76 |             # Heightfield coordinate system from now on
 77 |             start_x = self.border + i * self.length_per_env_pixels
 78 |             end_x = self.border + (i + 1) * self.length_per_env_pixels
 79 |             start_y = self.border + j * self.width_per_env_pixels
 80 |             end_y = self.border + (j + 1) * self.width_per_env_pixels
 81 | 
 82 |             terrain = SubTerrain("terrain",
 83 |                               width=self.width_per_env_pixels,
 84 |                               length=self.width_per_env_pixels,
 85 |                               vertical_scale=self.vertical_scale,
 86 |                               horizontal_scale=self.horizontal_scale)
 87 |             choice = np.random.uniform(0, 1)
 88 |             if choice < 0.1:
 89 |                 if np.random.choice([0, 1]):
 90 |                     pyramid_sloped_terrain(terrain, np.random.choice([-0.3, -0.2, 0, 0.2, 0.3]))
 91 |                     random_uniform_terrain(terrain, min_height=-0.1, max_height=0.1, step=0.05, downsampled_scale=0.2)
 92 |                 else:
 93 |                     pyramid_sloped_terrain(terrain, np.random.choice([-0.3, -0.2, 0, 0.2, 0.3]))
 94 |             elif choice < 0.6:
 95 |                 # step_height = np.random.choice([-0.18, -0.15, -0.1, -0.05, 0.05, 0.1, 0.15, 0.18])
 96 |                 step_height = np.random.choice([-0.15, 0.15])
 97 |                 pyramid_stairs_terrain(terrain, step_width=0.31, step_height=step_height, platform_size=3.)
 98 |             elif choice < 1.:
 99 |                 discrete_obstacles_terrain(terrain, 0.15, 1., 2., 40, platform_size=3.)
100 | 
101 |             self.height_field_raw[start_x: end_x, start_y:end_y] = terrain.height_field_raw
102 | 
103 |             env_origin_x = (i + 0.5) * self.env_length
104 |             env_origin_y = (j + 0.5) * self.env_width
105 |             x1 = int((self.env_length / 2. - 1) / self.horizontal_scale)
106 |             x2 = int((self.env_length / 2. + 1) / self.horizontal_scale)
107 |             y1 = int((self.env_width / 2. - 1) / self.horizontal_scale)
108 |             y2 = int((self.env_width / 2. + 1) / self.horizontal_scale)
109 |             env_origin_z = np.max(terrain.height_field_raw[x1:x2, y1:y2])*self.vertical_scale
110 |             self.env_origins[i, j] = [env_origin_x, env_origin_y, env_origin_z]
111 | 
112 |     def curiculum(self, num_robots, num_terrains, num_levels):
113 |         """Spawn subterrain ordering them by type (one type per column)
114 |         and by difficulty (first row is easiest, last one is hardest)
115 |         """
116 | 
117 |         num_robots_per_map = int(num_robots / num_terrains)
118 |         left_over = num_robots % num_terrains
119 |         idx = 0
120 |         for j in range(num_terrains):
121 |             for i in range(num_levels):
122 |                 terrain = SubTerrain("terrain",
123 |                                     length=self.length_per_env_pixels, # seems to be inverted
124 |                                     width=self.width_per_env_pixels,
125 |                                     vertical_scale=self.vertical_scale,
126 |                                     horizontal_scale=self.horizontal_scale)
127 |                 difficulty = i / (num_levels-1.0)
128 |                 choice = j / num_terrains
129 |                 lava_depth=-np.random.uniform(0.7, 1.3)
130 |                 boxes=None
131 |                 ceiling = 0.4
132 | 
133 |                 k = 0
134 |                 while k < len(self.terrain_proportions) and choice >= self.terrain_proportions[k]:
135 |                     k += 1
136 |                 if k == len(self.terrain_proportions):
137 |                     # The sum of terrain proportions is not >= 1
138 |                     # Defaulting to flat ground
139 |                     continue
140 | 
141 |                 if self.terrain_keys[k] == "gap_parkour":
142 |                     gap_length = 0.15 + i*0.05
143 |                     gap_length = np.round(gap_length, 2)
144 | 
145 |                     gap_parkour(
146 |                         terrain,
147 |                         lava_depth=lava_depth,
148 |                         gap_length=gap_length,
149 |                         gap_platform_height=0.1
150 |                     )
151 |                     add_roughness(terrain, np.random.uniform(0.01, 0.03))
152 |                 elif self.terrain_keys[k] == "jump_parkour":
153 |                     height = 0.05 + 0.37*difficulty
154 |                     jump_parkour(
155 |                         terrain,
156 |                         lava_depth=lava_depth,
157 |                         height=height,
158 |                     )
159 |                     add_roughness(terrain, np.random.uniform(0.01, 0.03))
160 | 
161 |                 elif self.terrain_keys[k] == "stairs_parkour":
162 |                     stairs_parkour(
163 |                         terrain,
164 |                         lava_depth=lava_depth,
165 |                         height=0.02 + 0.18*difficulty,
166 | 
167 |                     )
168 |                     add_roughness(terrain, np.random.uniform(0.01, 0.03))
169 | 
170 |                 elif self.terrain_keys[k] == "hurdle_parkour":
171 |                     hurdle_parkour(
172 |                         terrain,
173 |                         lava_depth=lava_depth,
174 |                         height=0.05 + 0.3*difficulty,
175 |                     )
176 |                     add_roughness(terrain, np.random.uniform(0.01, 0.03))
177 |                 elif self.terrain_keys[k] == "crawl_parkour":
178 |                     ceiling = 0.34 - 0.08*difficulty
179 |                     boxes = crawl_parkour(
180 |                         terrain,
181 |                         lava_depth=lava_depth,
182 |                         height=ceiling
183 |                     )
184 |                     add_roughness(terrain, np.random.uniform(0.01, 0.03))
185 |                 elif self.terrain_keys[k] == "random_uniform":                                                                                                                                                
186 |                     add_roughness(terrain, np.random.uniform(0.01, 0.03))
187 | 
188 |                 else:
189 |                     # Flat ground
190 |                     pass
191 | 
192 |                 # Heightfield coordinate system
193 |                 start_x = self.border + i * self.length_per_env_pixels
194 |                 end_x = self.border + (i + 1) * self.length_per_env_pixels
195 |                 start_y = self.border + j * self.width_per_env_pixels
196 |                 end_y = self.border + (j + 1) * self.width_per_env_pixels
197 |                 self.height_field_raw[start_x: end_x, start_y:end_y] = terrain.height_field_raw.transpose() # Seems that width and length are inverter in SubTerrain
198 | 
199 |                 if boxes is not None:
200 |                     for box in boxes:
201 |                         box = move_trimesh(box, np.array([[
202 |                             self.border_size+i*self.env_length, self.border_size + (j+0.5) * self.env_width, 0.0
203 |                         ]]))
204 |                         self.boxes.append(box)
205 | 
206 |                 robots_in_map = num_robots_per_map
207 |                 if j < left_over:
208 |                     robots_in_map +=1
209 | 
210 |                 env_origin_x = (i + 0.0) * self.env_length
211 |                 env_origin_y = (j + 0.5) * self.env_width
212 |                 env_origin_z = np.random.uniform(0.0, 0.4)
213 |                 self.env_origins[i, j] = [env_origin_x, env_origin_y, env_origin_z]
214 |                 self.ceilings[i, j] = ceiling
215 | 
216 | def add_roughness(terrain, noise_magnitude=0.02):
217 |     random_uniform_terrain(
218 |         terrain,
219 |         min_height=-noise_magnitude,
220 |         max_height=noise_magnitude,
221 |         step=0.005,
222 |         downsampled_scale=0.075,
223 |     )
224 | 
225 | def gap_parkour(terrain, platform_length=1., lava_width=0.5, lava_depth=-1.0, gap_length=0.5, gap_platform_length_min=1.25, gap_platform_length_max=1.5, gap_platform_height=0.05):
226 |     platform_length = int(platform_length / terrain.horizontal_scale)
227 |     lava_width = int(lava_width / terrain.horizontal_scale)
228 |     lava_depth = int(lava_depth / terrain.vertical_scale)
229 | 
230 |     gap_platform_length_min = int(gap_platform_length_min / terrain.horizontal_scale)
231 |     gap_platform_length_max = int(gap_platform_length_max / terrain.horizontal_scale)
232 | 
233 |     gap_length = int(gap_length / terrain.horizontal_scale)
234 |     gap_platform_height = int(gap_platform_height / terrain.vertical_scale)
235 | 
236 |     # add gap
237 |     start_gap = platform_length
238 |     while start_gap + gap_length <= terrain.length - platform_length//2:
239 |         gap_platform_length = np.random.randint(gap_platform_length_min, gap_platform_length_max)
240 |         terrain.height_field_raw[:, start_gap:start_gap+gap_length] = lava_depth
241 |         # randomize gap platform height
242 |         if start_gap + gap_length + gap_platform_length <= terrain.length - platform_length //2:
243 |             #terrain.height_field_raw[:, start_gap+gap_length:start_gap+gap_length+gap_platform_length] = np.random.randint(-gap_platform_height, gap_platform_height)
244 |             terrain.height_field_raw[:, start_gap+gap_length:start_gap+gap_length+gap_platform_length] = -gap_platform_height
245 | 
246 |         start_gap += gap_length + gap_platform_length
247 | 
248 |     # the floor is lava
249 |     terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth
250 |     terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth
251 | 
252 | def jump_parkour(terrain, platform_length=1.25, lava_width=0.5, lava_depth=-1.0, height=0.5, height_platform_length=1.5):
253 |     platform_length = int(platform_length / terrain.horizontal_scale)
254 |     lava_width = int(lava_width / terrain.horizontal_scale)
255 |     lava_depth = int(lava_depth / terrain.vertical_scale)
256 | 
257 |     height_platform_length = int(height_platform_length / terrain.horizontal_scale)
258 |     height = int(height / terrain.vertical_scale)
259 | 
260 |     # Version with 2 jumps
261 |     #terrain.height_field_raw[:, platform_length:platform_length+3*height_platform_length] = height
262 |     #terrain.height_field_raw[:, platform_length+height_platform_length:platform_length+2*height_platform_length] = 2*height
263 | 
264 |     # Version with 3 jumps
265 |     terrain.height_field_raw[:, 1*platform_length:6*platform_length] = 1*height
266 |     terrain.height_field_raw[:, 2*platform_length:5*platform_length] = 2*height
267 |     terrain.height_field_raw[:, 3*platform_length:4*platform_length] = 3*height
268 |  
269 |     # the floor is lava
270 |     terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth
271 |     terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth
272 | 
273 | def stairs_parkour(terrain, platform_length=1., lava_width=0.5, lava_depth=-1.0, height=0.18, width=0.3, stairs_platform_length=1.25):
274 |     platform_length = int(platform_length / terrain.horizontal_scale)
275 |     lava_width = int(lava_width / terrain.horizontal_scale)
276 |     lava_depth = int(lava_depth / terrain.vertical_scale)
277 | 
278 |     stairs_platform_length = int(stairs_platform_length / terrain.horizontal_scale)
279 |     height = int(height / terrain.vertical_scale)
280 |     width = int(width / terrain.horizontal_scale)
281 | 
282 |     start = platform_length
283 |     stop = terrain.length - platform_length//2
284 |     curr_height = height
285 |     while stop - start > platform_length:
286 |         terrain.height_field_raw[:, start:stop] = curr_height
287 |         curr_height += height
288 |         start += width
289 |         stop -= width
290 |     
291 |     # the floor is lava
292 |     terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth
293 |     terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth
294 | 
295 | 
296 | def hurdle_parkour(terrain, platform_length=1.5, lava_width=0.5, lava_depth=-1.0, height=0.2, width_min=0.3, width_max=0.5):
297 |     platform_length = int(platform_length / terrain.horizontal_scale)
298 |     lava_width = int(lava_width / terrain.horizontal_scale)
299 |     lava_depth = int(lava_depth / terrain.vertical_scale)
300 | 
301 |     height = int(height / terrain.vertical_scale)
302 |     width_min = int(width_min / terrain.horizontal_scale)
303 |     width_max = int(width_max / terrain.horizontal_scale)
304 | 
305 |     start = platform_length
306 |     width = np.random.randint(width_min, width_max)
307 |     while start + platform_length + width <= terrain.length - platform_length//2:
308 |         terrain.height_field_raw[:, start:start+width] = height
309 |         start += platform_length + width
310 |         width = np.random.randint(width_min, width_max)
311 | 
312 |     
313 |     # the floor is lava
314 |     terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth
315 |     terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth
316 | 
317 | def crawl_parkour(terrain, platform_length=2.0, lava_width=0.5, lava_depth=-1.0, height=0.2, depth=1.0, width=3.0, height_step=0.15):
318 |     # First put the barriers
319 |     boxes = []
320 |     boxes += box_trimesh(np.array([depth, width, 0.5]), np.array([2.5, 0.0, height+0.25])),
321 |     boxes += box_trimesh(np.array([depth, width, 0.5]), np.array([6.5, 0.0, height+0.25+height_step])),
322 | 
323 |     # Then create the heightmap
324 |     platform_length = int(platform_length / terrain.horizontal_scale)
325 |     lava_width = int(lava_width / terrain.horizontal_scale)
326 |     lava_depth = int(lava_depth / terrain.vertical_scale)
327 | 
328 |     height = int(height / terrain.vertical_scale)
329 |     height_step = int(height_step / terrain.vertical_scale)
330 |     depth = int(depth / terrain.horizontal_scale)
331 |     
332 |     terrain.height_field_raw[:, int(6.0/terrain.horizontal_scale):int(7.0/terrain.horizontal_scale)] = 1*height_step
333 | 
334 |     # the floor is lava
335 |     terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth
336 |     terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth
337 |     
338 |     return boxes
339 | 
340 | 
341 | 
342 | 
343 | def box_trimesh(
344 |         size, # float [3] for x, y, z axis length (in meter) under box frame
345 |         center_position, # float [3] position (in meter) in world frame
346 |     ):
347 | 
348 |     vertices = np.empty((8, 3), dtype= np.float32)
349 |     vertices[:] = center_position
350 |     vertices[[0, 4, 2, 6], 0] -= size[0] / 2
351 |     vertices[[1, 5, 3, 7], 0] += size[0] / 2
352 |     vertices[[0, 1, 2, 3], 1] -= size[1] / 2
353 |     vertices[[4, 5, 6, 7], 1] += size[1] / 2
354 |     vertices[[2, 3, 6, 7], 2] -= size[2] / 2
355 |     vertices[[0, 1, 4, 5], 2] += size[2] / 2
356 | 
357 |     triangles = -np.ones((12, 3), dtype= np.uint32)
358 |     triangles[0] = [0, 2, 1] #
359 |     triangles[1] = [1, 2, 3]
360 |     triangles[2] = [0, 4, 2] #
361 |     triangles[3] = [2, 4, 6]
362 |     triangles[4] = [4, 5, 6] #
363 |     triangles[5] = [5, 7, 6]
364 |     triangles[6] = [1, 3, 5] #
365 |     triangles[7] = [3, 7, 5]
366 |     triangles[8] = [0, 1, 4] #
367 |     triangles[9] = [1, 5, 4]
368 |     triangles[10]= [2, 6, 3] #
369 |     triangles[11]= [3, 6, 7]
370 | 
371 |     return vertices, triangles
372 | 
373 | def combine_trimeshes(*trimeshes):
374 |     if len(trimeshes) > 2:
375 |         return combine_trimeshes(
376 |             trimeshes[0],
377 |             combine_trimeshes(trimeshes[1:])
378 |         )
379 | 
380 |     # only two trimesh to combine
381 |     trimesh_0, trimesh_1 = trimeshes
382 |     if trimesh_0[1].shape[0] < trimesh_1[1].shape[0]:
383 |         trimesh_0, trimesh_1 = trimesh_1, trimesh_0
384 |     
385 |     trimesh_1 = (trimesh_1[0], trimesh_1[1] + trimesh_0[0].shape[0])
386 |     vertices = np.concatenate((trimesh_0[0], trimesh_1[0]), axis= 0)
387 |     triangles = np.concatenate((trimesh_0[1], trimesh_1[1]), axis= 0)
388 | 
389 |     return vertices, triangles
390 | 
391 | def move_trimesh(trimesh, move: np.ndarray):
392 |     trimesh = list(trimesh)
393 |     trimesh[0] += move
394 |     return tuple(trimesh)
395 | 


--------------------------------------------------------------------------------
/algos/DDPG_demos_generate.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | 
  3 | import os
  4 | import random
  5 | import time
  6 | from tqdm.auto import tqdm
  7 | import pickle as pkl
  8 | 
  9 | from typing import NamedTuple
 10 | 
 11 | from omegaconf import DictConfig, OmegaConf
 12 | 
 13 | import numpy as np
 14 | 
 15 | import torch
 16 | import torch.nn as nn
 17 | import torch.nn.functional as F
 18 | import torch.optim as optim
 19 | from torch.distributions.normal import Normal
 20 | from torch.utils.tensorboard import SummaryWriter
 21 | 
 22 | class RunningMeanStd(nn.Module):
 23 |     def __init__(self, shape = (), epsilon=1e-08):
 24 |         super(RunningMeanStd, self).__init__()
 25 |         self.register_buffer("running_mean", torch.zeros(shape))
 26 |         self.register_buffer("running_var", torch.ones(shape))
 27 |         self.register_buffer("count", torch.ones(()))
 28 | 
 29 |         self.epsilon = epsilon
 30 | 
 31 |     def forward(self, obs, update = True):
 32 |         if update:
 33 |             self.update(obs)
 34 | 
 35 |         return (obs - self.running_mean) / torch.sqrt(self.running_var + self.epsilon)
 36 | 
 37 |     def update(self, x):
 38 |         """Updates the mean, var and count from a batch of samples."""
 39 |         batch_mean = torch.mean(x, dim=0)
 40 |         batch_var = torch.var(x, correction=0, dim=0)
 41 |         batch_count = x.shape[0]
 42 |         self.update_from_moments(batch_mean, batch_var, batch_count)
 43 | 
 44 |     def update_from_moments(self, batch_mean, batch_var, batch_count):
 45 |         """Updates from batch mean, variance and count moments."""
 46 |         self.running_mean, self.running_var, self.count = update_mean_var_count_from_moments(
 47 |             self.running_mean, self.running_var, self.count, batch_mean, batch_var, batch_count
 48 |         )
 49 | 
 50 | def update_mean_var_count_from_moments(
 51 |     mean, var, count, batch_mean, batch_var, batch_count
 52 | ):
 53 |     """Updates the mean, var and count using the previous mean, var, count and batch values."""
 54 |     delta = batch_mean - mean
 55 |     tot_count = count + batch_count
 56 | 
 57 |     new_mean = mean + delta * batch_count / tot_count
 58 |     m_a = var * count
 59 |     m_b = batch_var * batch_count
 60 |     M2 = m_a + m_b + torch.square(delta) * count * batch_count / tot_count
 61 |     new_var = M2 / tot_count
 62 |     new_count = tot_count
 63 | 
 64 |     return new_mean, new_var, new_count
 65 | 
 66 | def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
 67 |     torch.nn.init.orthogonal_(layer.weight, std)
 68 |     torch.nn.init.constant_(layer.bias, bias_const)
 69 |     return layer
 70 | 
 71 | class Agent(nn.Module):
 72 |     def __init__(self, envs):
 73 |         super().__init__()
 74 |         self.critic = nn.Sequential(
 75 |             layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 512)),
 76 |             nn.ELU(),
 77 |             layer_init(nn.Linear(512, 256)),
 78 |             nn.ELU(),
 79 |             layer_init(nn.Linear(256, 128)),
 80 |             nn.ELU(),
 81 |             layer_init(nn.Linear(128, 1), std=1.0),
 82 |         )
 83 |         self.actor_mean = nn.Sequential(
 84 |             layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 512)),
 85 |             nn.ELU(),
 86 |             layer_init(nn.Linear(512, 256)),
 87 |             nn.ELU(),
 88 |             layer_init(nn.Linear(256, 128)),
 89 |             nn.ELU(),
 90 |             layer_init(nn.Linear(128, np.prod(envs.single_action_space.shape)), std=0.01),
 91 |         )
 92 |         self.actor_logstd = nn.Parameter(torch.zeros(1, np.prod(envs.single_action_space.shape)))
 93 | 
 94 |         self.obs_rms = RunningMeanStd(shape = envs.single_observation_space.shape)
 95 |         self.value_rms = RunningMeanStd(shape = ())
 96 | 
 97 |     def get_value(self, x):
 98 |         return self.critic(x)
 99 | 
100 |     def get_action_and_value(self, x, action=None):
101 |         action_mean = self.actor_mean(x)
102 |         return action_mean, None, None, None
103 | 
104 | class SeqReplayBufferSamples(NamedTuple):
105 |     observations: torch.Tensor
106 |     privileged_observations: torch.Tensor
107 |     vision_observations: torch.Tensor
108 |     actions: torch.Tensor
109 |     next_observations: torch.Tensor
110 |     privileged_next_observations: torch.Tensor
111 |     vision_next_observations: torch.Tensor
112 |     cat_dones: torch.Tensor
113 |     raw_constraints: torch.Tensor
114 |     dones: torch.Tensor
115 |     rewards: torch.Tensor
116 |     p_ini_hidden_in: torch.Tensor
117 |     p_ini_hidden_out: torch.Tensor
118 |     mask: torch.Tensor
119 | 
120 | class SeqReplayBuffer():
121 |     def __init__(
122 |         self,
123 |         buffer_size,
124 |         observation_space,
125 |         privileged_observation_space,
126 |         vision_space,
127 |         action_space,
128 |         num_constraints,
129 |         max_episode_length,
130 |         seq_len,
131 |         num_envs,
132 |         critic_rnn = False,
133 |         storing_device = "cpu",
134 |         training_device = "cpu",
135 |         handle_timeout_termination = True,
136 |     ):
137 |         self.buffer_size = buffer_size
138 |         self.max_episode_length = max_episode_length
139 |         self.seq_len = seq_len
140 |         self.observation_space = observation_space
141 |         self.privileged_observation_space = privileged_observation_space.shape
142 |         self.vision_space = vision_space
143 |         self.action_space = action_space
144 |         self.num_envs = num_envs
145 | 
146 |         self.num_constraints = num_constraints
147 | 
148 |         self.action_dim = int(np.prod(action_space.shape))
149 |         self.pos = 0
150 |         self.full = False
151 |         self.critic_rnn = critic_rnn
152 |         self.storing_device = storing_device
153 |         self.training_device = training_device
154 | 
155 |         self.observations = torch.zeros((self.buffer_size, *self.observation_space), dtype=torch.float32, device = storing_device)
156 |         self.next_observations = torch.zeros((self.buffer_size, *self.observation_space), dtype=torch.float32, device = storing_device)
157 |         self.privileged_observations = torch.zeros((self.buffer_size, *self.privileged_observation_space), dtype=torch.float32, device = storing_device)
158 |         self.privileged_next_observations = torch.zeros((self.buffer_size, *self.privileged_observation_space), dtype=torch.float32, device = storing_device)
159 | 
160 |         self.vision_observations = torch.zeros((self.buffer_size // 5, *self.vision_space), dtype = torch.uint8)
161 |         self.next_vision_observations = torch.zeros_like(self.vision_observations)
162 | 
163 |         self.actions = torch.zeros((self.buffer_size, self.action_dim), dtype=torch.float32, device = storing_device)
164 |         self.rewards = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device)
165 |         self.cat_dones = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device)
166 |         self.raw_constraints = torch.zeros((self.buffer_size, self.num_constraints), dtype=torch.float32, device = storing_device)
167 |         self.dones = torch.zeros((self.buffer_size,), dtype=torch.bool, device = storing_device)
168 |         self.p_ini_hidden_in = torch.zeros((self.buffer_size, 1, 256), dtype=torch.float32, device = storing_device)
169 | 
170 |         # For the current episodes that started being added to the replay buffer
171 |         # but aren't done yet. We want to still sample from them, however the masking
172 |         # needs a termination point to not overlap to the next episode when full or even to the empty
173 |         # part of the buffer when not full.
174 |         self.markers = torch.zeros((self.buffer_size,), dtype=torch.bool, device = storing_device)
175 |         self.started_adding = False
176 | 
177 |         # Handle timeouts termination properly if needed
178 |         # see https://github.com/DLR-RM/stable-baselines3/issues/284
179 |         self.handle_timeout_termination = handle_timeout_termination
180 |         self.timeouts = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device)
181 | 
182 |     def add(
183 |         self,
184 |         obs,
185 |         privi_obs,
186 |         vobs,
187 |         next_obs,
188 |         privi_next_obs,
189 |         next_vobs,
190 |         action,
191 |         reward,
192 |         cat_done,
193 |         raw_constraints,
194 |         done,
195 |         p_ini_hidden_in,
196 |         truncateds = None
197 |     ):
198 |         start_idx = self.pos
199 |         stop_idx = min(self.pos + obs.shape[0], self.buffer_size)
200 |         b_max_idx = stop_idx - start_idx
201 | 
202 |         overflow = False
203 |         overflow_size = 0
204 |         if self.pos + obs.shape[0] > self.buffer_size:
205 |             overflow = True
206 |             overflow_size = self.pos + obs.shape[0] - self.buffer_size
207 | 
208 |         assert start_idx % self.num_envs == 0, f"start_idx is not a multiple of {self.num_envs}"
209 |         assert stop_idx % self.num_envs == 0, f"stop_idx is not a multiple of {self.num_envs}"
210 |         assert b_max_idx == 0 or b_max_idx == self.num_envs, f"b_max_idx is not either 0 or {self.num_envs}"
211 | 
212 |         # Copy to avoid modification by reference
213 |         self.observations[start_idx : stop_idx] = obs[: b_max_idx].clone().to(self.storing_device)
214 |         self.vision_observations[(start_idx // (self.num_envs * 5)) * self.num_envs : (start_idx // (self.num_envs * 5)) * self.num_envs + b_max_idx] = \
215 |             vobs[: b_max_idx].clone().to(self.storing_device)
216 | 
217 |         self.next_observations[start_idx : stop_idx] = next_obs[: b_max_idx].clone().to(self.storing_device)
218 |         self.next_vision_observations[(start_idx // (self.num_envs * 5)) * self.num_envs : (start_idx // (self.num_envs * 5)) * self.num_envs + b_max_idx] = \
219 |             next_vobs[: b_max_idx].clone().to(self.storing_device)
220 | 
221 |         self.privileged_observations[start_idx : stop_idx] = privi_obs[: b_max_idx].clone().to(self.storing_device)
222 |         self.privileged_next_observations[start_idx : stop_idx] = privi_next_obs[: b_max_idx].clone().to(self.storing_device)
223 |         self.actions[start_idx : stop_idx] = action[: b_max_idx].clone().to(self.storing_device)
224 |         self.rewards[start_idx : stop_idx] = reward[: b_max_idx].clone().to(self.storing_device)
225 |         self.cat_dones[start_idx : stop_idx] = cat_done[: b_max_idx].clone().to(self.storing_device)
226 |         self.raw_constraints[start_idx : stop_idx] = raw_constraints[: b_max_idx].clone().to(self.storing_device)
227 |         self.dones[start_idx : stop_idx] = done[: b_max_idx].clone().to(self.storing_device)
228 |         self.p_ini_hidden_in[start_idx : stop_idx] = p_ini_hidden_in.swapaxes(0, 1)[: b_max_idx].clone().to(self.storing_device)
229 | 
230 |         # Current episodes last transition marker
231 |         self.markers[start_idx : stop_idx] = 1
232 |         # We need to unmark previous transitions as last
233 |         # but only if it is not the first add to the replay buffer
234 |         if self.started_adding:
235 |             self.markers[self.prev_start_idx : self.prev_stop_idx] = 0
236 |             if self.prev_overflow:
237 |                 self.markers[: self.prev_overflow_size] = 0
238 |         self.started_adding = True
239 |         self.prev_start_idx = start_idx
240 |         self.prev_stop_idx = stop_idx
241 |         self.prev_overflow = overflow
242 |         self.prev_overflow_size = overflow_size
243 | 
244 |         if self.handle_timeout_termination:
245 |             self.timeouts[start_idx : stop_idx] = truncateds[: b_max_idx].clone().to(self.storing_device)
246 | 
247 |         assert overflow_size == 0 or overflow_size == self.num_envs, f"overflow_size is not either 0 or {self.num_envs}"
248 |         if overflow:
249 |             self.full = True
250 |             self.observations[: overflow_size] = obs[b_max_idx :].clone().to(self.storing_device)
251 |             self.vision_observations[: overflow_size] = vobs[b_max_idx :].clone().to(self.storing_device)
252 | 
253 |             self.next_observations[: overflow_size] = next_obs[b_max_idx :].clone().to(self.storing_device)
254 |             self.next_vision_observations[: overflow_size] = next_vobs[b_max_idx :].clone().to(self.storing_device)
255 | 
256 |             self.privileged_observations[: overflow_size] = privi_obs[b_max_idx :].clone().to(self.storing_device)
257 |             self.privileged_next_observations[: overflow_size] = privi_next_obs[b_max_idx :].clone().to(self.storing_device)
258 |             self.actions[: overflow_size] = action[b_max_idx :].clone().to(self.storing_device)
259 |             self.rewards[: overflow_size] = reward[b_max_idx :].clone().to(self.storing_device)
260 |             self.cat_dones[: overflow_size] = cat_done[b_max_idx :].clone().to(self.storing_device)
261 |             self.raw_constraints[: overflow_size] = raw_constraints[b_max_idx :].clone().to(self.storing_device)
262 |             self.dones[: overflow_size] = done[b_max_idx :].clone().to(self.storing_device)
263 |             self.p_ini_hidden_in[: overflow_size] = p_ini_hidden_in.swapaxes(0, 1)[b_max_idx :].clone().to(self.storing_device)
264 | 
265 |             # Current episodes last transition marker
266 |             self.markers[: overflow_size] = 1
267 |             if self.handle_timeout_termination:
268 |                 self.timeouts[: overflow_size] = truncateds[b_max_idx :].clone().to(self.storing_device)
269 |             self.pos = overflow_size
270 |         else:
271 |             self.pos += obs.shape[0]
272 | 
273 |     def sample(self, batch_size) -> SeqReplayBufferSamples:
274 |         upper_bound = self.buffer_size if self.full else self.pos
275 |         batch_inds = torch.randint(0, upper_bound, size = (batch_size,), device = self.storing_device)
276 |         return self._get_samples(batch_inds)
277 | 
278 |     def _get_samples(self, batch_inds) -> SeqReplayBufferSamples:
279 |         # Using modular arithmetic we get the indices of all the transitions of the episode starting from batch_inds
280 |         # we get "episodes" of length self.seq_len, but their true length may be less, they can have ended before that
281 |         # we'll deal with that using a mask
282 |         # Using flat indexing we can actually slice through a tensor using
283 |         # different starting points for each dimension of an axis
284 |         # as long as the slice size remains constant
285 |         # [1, 2, 3].repeat_interleave(3) -> [1, 1, 1, 2, 2, 2, 3, 3, 3]
286 |         # [1, 2, 3].repeat(3) -> [1, 2, 3, 1, 2, 3, 1, 2, 3]
287 |         all_indices_flat = (batch_inds.repeat_interleave(self.seq_len) + torch.arange(self.seq_len, device = self.storing_device).repeat(batch_inds.shape[0]) * self.num_envs) % self.buffer_size
288 |         #all_indices_next_flat = (batch_inds.repeat_interleave(self.seq_len) + torch.arange(1, self.seq_len + 1, device = self.device).repeat(batch_inds.shape[0]) * self.num_envs) % self.buffer_size
289 |         gathered_obs = self.observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.observations.shape[1:]))
290 |         gathered_next_obs = self.next_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.observations.shape[1:]))
291 | 
292 |         gathered_privi_obs = self.privileged_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.privileged_observations.shape[1:]))
293 |         gathered_privi_next_obs = self.privileged_next_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.privileged_observations.shape[1:]))
294 | 
295 |         all_indices_flat_vision = ( (((batch_inds // self.num_envs) // 5) * self.num_envs + batch_inds % self.num_envs).repeat_interleave((self.seq_len // 5)) + \
296 |             torch.arange(start = 0, end = (self.seq_len // 5), device = self.storing_device).repeat(batch_inds.shape[0]) * self.num_envs) % (self.buffer_size // 5)
297 |         gathered_vobs = self.vision_observations[all_indices_flat_vision].reshape((batch_inds.shape[0], (self.seq_len // 5), *self.vision_observations.shape[1:]))
298 |         gathered_next_vobs = self.next_vision_observations[all_indices_flat_vision].reshape((batch_inds.shape[0], (self.seq_len // 5), *self.vision_observations.shape[1:]))
299 | 
300 |         gathered_actions = self.actions[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.actions.shape[1:]))
301 |         gathered_cat_dones = self.cat_dones[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
302 |         gathered_raw_constraints = self.raw_constraints[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, self.num_constraints))
303 |         gathered_dones = self.dones[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
304 |         gathered_truncateds = self.timeouts[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
305 |         gathered_rewards = self.rewards[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
306 | 
307 |         gathered_p_ini_hidden_in = self.p_ini_hidden_in[batch_inds].swapaxes(0, 1)
308 |         gathered_p_ini_hidden_out = self.p_ini_hidden_in[(batch_inds + self.num_envs) % self.buffer_size].swapaxes(0, 1)
309 | 
310 |         gathered_markers = self.markers[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
311 |         mask = torch.cat([
312 |             torch.ones((batch_inds.shape[0], 1), device = self.storing_device),
313 |             (1 - (gathered_dones | gathered_markers).float()).cumprod(dim = 1)[:, 1:]
314 |         ], dim = 1)
315 |         data = (
316 |             gathered_obs.to(self.training_device),
317 |             gathered_privi_obs.to(self.training_device),
318 |             gathered_vobs.to(self.training_device),
319 |             gathered_actions.to(self.training_device),
320 |             gathered_next_obs.to(self.training_device),
321 |             gathered_privi_next_obs.to(self.training_device),
322 |             gathered_next_vobs.to(self.training_device),
323 |             gathered_cat_dones.to(self.training_device),
324 |             gathered_raw_constraints.to(self.training_device),
325 |             # Only use dones that are not due to timeouts
326 |             # deactivated by default (timeouts is initialized as an array of False)
327 |             (gathered_dones.float() * (1 - gathered_truncateds)).to(self.training_device),
328 |             gathered_rewards.to(self.training_device),
329 |             gathered_p_ini_hidden_in.to(self.training_device),
330 |             gathered_p_ini_hidden_out.to(self.training_device),
331 |             mask.to(self.training_device),
332 |         )
333 |         return SeqReplayBufferSamples(*data)
334 | 
335 | class ExtractObsWrapper(gym.ObservationWrapper):
336 |     def observation(self, obs):
337 |         return obs["obs"]
338 | 
339 | def DDPG_demos_generate(cfg: DictConfig, envs):
340 |     TARGET_POLICY_PATH = cfg["train"]["params"]["config"]["target_policy_path"]
341 |     DEMOS_BUFFER_SIZE = int(cfg["train"]["params"]["config"]["demos_buffer_size"])
342 |     MAX_EPISODE_LENGTH = 500
343 |     SEQ_LEN = 5
344 |     NUM_CONSTRAINTS = 107
345 |     RNN_CRITIC = True
346 |     vis_h = 48
347 |     vis_w = 48
348 | 
349 |     assert SEQ_LEN % 5 == 0, "SEQ_LEN must be a multiple of 5"
350 |     assert DEMOS_BUFFER_SIZE % (5 * envs.num_envs) == 0, "DEMO_BUFFER_SIZE must be a multiple of 5 * num_envs"
351 | 
352 |     actor_sd = torch.load(TARGET_POLICY_PATH)
353 | 
354 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
355 | 
356 |     run_path = f"runs/{cfg['task']['name']}_DDPG_demos_generate_D405_parkour40_CORR_CONTROL_{int(time.time())}"
357 |     writer = SummaryWriter(run_path)
358 | 
359 |     if not os.path.exists(run_path):
360 |         os.makedirs(run_path)
361 | 
362 |     OmegaConf.save(config = cfg, f = f"{run_path}/config.yaml")
363 | 
364 |     envs.single_action_space = envs.action_space
365 |     envs.single_observation_space = envs.observation_space
366 | 
367 |     envs = ExtractObsWrapper(envs)
368 | 
369 |     actor_demos = Agent(envs).to(device)
370 |     actor_demos.load_state_dict(actor_sd)
371 |     print("Target actor for demonstrations loaded succesfully.")
372 |     envs.single_observation_space.dtype = np.float32
373 |     rb_demos = SeqReplayBuffer(
374 |         DEMOS_BUFFER_SIZE,
375 |         (45,),
376 |         envs.single_observation_space,
377 |         (vis_h, vis_w),
378 |         envs.single_action_space,
379 |         NUM_CONSTRAINTS,
380 |         MAX_EPISODE_LENGTH,
381 |         SEQ_LEN,
382 |         envs.num_envs,
383 |         critic_rnn = RNN_CRITIC,
384 |         storing_device = "cpu",
385 |         training_device = device,
386 |         handle_timeout_termination=True,
387 |     )
388 | 
389 |     filling_iterations = DEMOS_BUFFER_SIZE // envs.num_envs
390 |     obs_privi = envs.reset()
391 |     obs = obs_privi.clone()[:, : 45]
392 |     vobs = torch.zeros((envs.num_envs, vis_h, vis_w), dtype = torch.uint8, device = device)
393 |     next_vobs = vobs.clone()
394 |     with torch.no_grad():
395 |         dummy_actions, _, _, _ = actor_demos.get_action_and_value(actor_demos.obs_rms(obs_privi, update = False))
396 |         actions_min, _ = dummy_actions.min(dim = 0)
397 |         actions_max, _ = dummy_actions.max(dim = 0)
398 | 
399 |     dummy_hidddens = torch.zeros((1, envs.num_envs, 256), device = device)
400 | 
401 |     for fi in range(filling_iterations):
402 |         with torch.no_grad():
403 |             actions, _, _, _ = actor_demos.get_action_and_value(actor_demos.obs_rms(obs_privi, update = False))
404 |             cur_min, _ = actions.min(dim = 0)
405 |             cur_max, _ = actions.max(dim = 0)
406 |             actions_min = torch.min(actions_min, cur_min)
407 |             actions_max = torch.max(actions_max, cur_max)
408 | 
409 |         next_obs_privi, rewards, terminations, infos = envs.step(actions)
410 |         true_dones = infos["true_dones"].float()
411 |         truncateds = infos["truncateds"].float()
412 |         raw_constraints = infos["raw_constraints"]
413 | 
414 |         next_obs = next_obs_privi.clone()[:, : 45]
415 | 
416 |         real_next_obs_privi = next_obs_privi.clone()
417 |         real_next_obs = next_obs.clone()
418 | 
419 |         if "depth" in infos:
420 |             next_vobs = (infos["depth"].clone()[..., 19:-18] * 255).to(torch.uint8)
421 | 
422 |         rb_demos.add(obs, obs_privi, vobs, real_next_obs, real_next_obs_privi, next_vobs, actions, rewards, terminations, raw_constraints, true_dones, dummy_hidddens, truncateds)
423 | 
424 |         # TRY NOT TO MODIFY: CRUCIAL step easy to overlook
425 |         obs_privi = next_obs_privi
426 |         obs = next_obs
427 |         vobs = next_vobs
428 | 
429 |     torch.save((actions_min.cpu(), actions_max.cpu()), f"{run_path}/ppo_actions_minmax.pt")
430 |     pkl.dump(rb_demos, open(f"{run_path}/rb_demos.pkl", "wb"))
431 |     print(f"Demo replay buffer filled with {DEMOS_BUFFER_SIZE} experts demonstrations")
432 | 


--------------------------------------------------------------------------------
/solo12/solo12_mpi_scaled.urdf:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <!-- =================================================================================== -->
  3 | <!-- |    This document was autogenerated by xacro from
  4 | /tmp/solo/robot_properties_solo/src/robot_properties_solo/resources/xacro/solo12.urdf.xacro | -->
  5 | <!-- |    EDITING THIS FILE BY HAND IS NOT RECOMMENDED                                 | -->
  6 | <!-- =================================================================================== -->
  7 | <robot name="solo"
  8 |   xmlns:controller="http://playerstage.sourceforge.net/gazebo/xmlschema/#controller"
  9 |   xmlns:interface="http://playerstage.sourceforge.net/gazebo/xmlschema/#interface"
 10 |   xmlns:sensor="http://playerstage.sourceforge.net/gazebo/xmlschema/#sensor"
 11 |   xmlns:xacro="http://playerstage.sourceforge.net/gazebo/xmlschema/#interface">
 12 |   <!-- This file is based on:
 13 |   https://github.com/open-dynamic-robot-initiative/open_robot_actuator_hardware/blob/master/mechanics/quadruped_robot_12dof_v1/README.md#quadruped-robot-12dof-v1 -->
 14 |   <link name="base_link">
 15 |     <!-- BASE LINK INERTIAL -->
 16 |     <inertial>
 17 |       <origin rpy="0 0 0" xyz="-0.00082966 0.00000105 -0.00060210" />
 18 |       <mass value="26.5" />
 19 |       <!-- The base is extremely symmetrical. -->
 20 |       <inertia ixx="0.00266220" ixy="-0.00003684" ixz="-0.00001716" iyy="0.01388510"
 21 |         iyz="-0.00000009" izz="0.01605370" />
 22 |     </inertial>
 23 |     <!-- BASE LINK VISUAL -->
 24 |     <visual>
 25 |       <origin rpy="0 0 0" xyz="0 0 0" />
 26 |       <geometry>
 27 |         <box size="1.14 0.46 0.1" />
 28 |       </geometry>
 29 |       <material name="grey">
 30 |         <color rgba="0.8 0.8 0.8 1.0" />
 31 |       </material>
 32 |     </visual>
 33 |     <!-- BASE LINK COLLISION -->
 34 |     <collision>
 35 |       <origin rpy="0 0 0" xyz="0 0 0" />
 36 |       <!-- <geometry>
 37 |         <mesh filename="meshes/stl/solo12/solo_12_base.stl" />
 38 |       </geometry> -->
 39 |       <geometry>
 40 |         <box size="1.14 0.46 0.01" />
 41 |       </geometry>
 42 |       <material name="grey">
 43 |         <color rgba="0.8 0.8 0.8 1.0" />
 44 |       </material>
 45 |     </collision>
 46 |   </link>
 47 |   <!-- END BASE LINK -->
 48 |   <joint name="FL_HAA" type="revolute">
 49 |     <parent link="base_link" />
 50 |     <child link="FL_SHOULDER" />
 51 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
 52 |     <!-- joints rotates around the x-axis -->
 53 |     <axis xyz="1 0 0" />
 54 |     <origin rpy="0 0 0" xyz="0.57 0.23625 0" />
 55 |     <!-- pybullet simulation parameters -->
 56 |     <dynamics damping="0.0" friction="0.0" />
 57 |   </joint>
 58 |   <link name="FL_SHOULDER">
 59 |     <!-- HAA VISUAL -->
 60 |     <visual>
 61 |       <origin rpy="0 0 0" xyz="0.052650 0 0" />
 62 |       <!--<geometry>
 63 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_fl.stl" />
 64 |       </geometry>-->
 65 |       <material name="grey">
 66 |         <color rgba="0.8 0.8 0.8 1.0" />
 67 |       </material>
 68 |     </visual>
 69 |     <!-- HAA LINK COLLISION -->
 70 |     <!-- <collision>
 71 |       <origin rpy="0 0 0" xyz="0.052650 0 0" />
 72 |       <geometry>
 73 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_fl.stl" />
 74 |       </geometry>
 75 |       <material name="grey">
 76 |         <color rgba="0.8 0.8 0.8 1.0" />
 77 |       </material>
 78 |     </collision> -->
 79 |     <inertial>
 80 |       <mass value="3.0" />
 81 |       <!-- HAA body inertia -->
 82 |       <origin rpy="0 0 0" xyz="-0.09407251 -0.00438935 0.00001384" />
 83 |       <inertia ixx="0.00002802" ixy="0.00003687" ixz="-0.00000009" iyy="0.00038264" iyz="0.00000000"
 84 |         izz="0.00038050" />
 85 |     </inertial>
 86 |   </link>
 87 |   <joint name="FL_HFE" type="revolute">
 88 |     <parent link="FL_SHOULDER" />
 89 |     <child link="FL_UPPER_LEG" />
 90 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
 91 |     <!-- joints rotates around the y-axis -->
 92 |     <axis xyz="0 1 0" />
 93 |     <origin rpy="0 0 0" xyz="-0.05265 0.0378 0" />
 94 |     <!-- pybullet simulation parameters -->
 95 |     <dynamics damping="0.0" friction="0.0" />
 96 |   </joint>
 97 |   <link name="FL_UPPER_LEG">
 98 |     <!-- Left upper leg inertia -->
 99 |     <inertial>
100 |       <mass value="3.12" />
101 |       <origin rpy="0 0 0" xyz="0.00001530 0.04772628 -0.21163221" />
102 |       <inertia ixx="0.00041540" ixy="0.00000000" ixz="0.00000010" iyy="0.00041637" iyz="-0.00004589"
103 |         izz="0.00002982" />
104 |     </inertial>
105 |     <!-- UPPER LEG LINK VISUAL -->
106 |     <visual>
107 |       <origin rpy="0 0 0" xyz="0 0 -0.175" />
108 |       <geometry>
109 |         <cylinder length="0.35" radius="0.03" />
110 |       </geometry>
111 |       <material name="grey">
112 |         <color rgba="0.8 0.8 0.8 1.0" />
113 |       </material>
114 |     </visual>
115 |     <!-- UPPER LEG LINK COLLISION -->
116 |     <collision>
117 |       <origin rpy="0 0 0" xyz="0 0 -0.175" />
118 |       <!-- <geometry>
119 |         <mesh filename="meshes/stl/without_foot/solo_upper_leg_left_side.stl" />
120 |       </geometry> -->
121 |       <geometry>
122 |         <cylinder length="0.35" radius="0.03" />
123 |       </geometry>
124 |       <material name="grey">
125 |         <color rgba="0.8 0.8 0.8 1.0" />
126 |       </material>
127 |     </collision>
128 |   </link>
129 |   <!-- END UPPER LEG LINK -->
130 |   <!-- KFE: Joint between the upper leg and the lower leg -->
131 |   <joint name="FL_KFE" type="revolute">
132 |     <parent link="FL_UPPER_LEG" />
133 |     <child link="FL_LOWER_LEG" />
134 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
135 |     <!-- joints rotates around the y-axis -->
136 |     <axis xyz="0 1 0" />
137 |     <origin rpy="0 0 0" xyz="0 0.04 -0.35" />
138 |     <!-- pybullet simulation parameters -->
139 |     <dynamics damping="0.0" friction="0.0" />
140 |   </joint>
141 |   <link name="FL_LOWER_LEG">
142 |     <!-- Left lower leg inertia -->
143 |     <inertial>
144 |       <mass value="0.5" />
145 |       <origin rpy="0 0 0" xyz="0.00000000 0.020971332 -0.189104652" />
146 |       <inertia ixx="0.00008508" ixy="0.00000000" ixz="0.00000000" iyy="0.00008580" iyz="-0.00000200"
147 |         izz="0.00000139" />
148 |     </inertial>
149 |     <!-- LOWER LEG LINK VISUAL -->
150 |     <visual>
151 |       <origin rpy="0 0 0" xyz="0 0 -0.175" />
152 |       <geometry>
153 |         <cylinder length="0.35" radius="0.03" />
154 |       </geometry>
155 |       <material name="grey">
156 |         <color rgba="0.8 0.8 0.8 1.0" />
157 |       </material>
158 |     </visual>
159 |     <!-- LOWER LEG LINK COLLISION -->
160 |     <collision>
161 |       <origin rpy="0 0 0" xyz="0 0 -0.35" />
162 |       <!-- <geometry>
163 |         <mesh filename="meshes/stl/without_foot/solo_lower_leg_v2_left_side.stl" />
164 |       </geometry> -->
165 |       <geometry>
166 |         <sphere radius="0.03" />
167 |       </geometry>
168 |       <material name="grey">
169 |         <color rgba="0.8 0.8 0.8 1.0" />
170 |       </material>
171 |     </collision>
172 |     
173 |   </link>
174 |   <!-- END LOWER LEG LINK -->
175 |   <!-- Joint between the lower leg and the ankle,
176 |          this is only a dummy joint such that we
177 |          can assign coordinate frames to foot-->
178 |   <joint name="FL_ANKLE" type="fixed" dont_collapse="true">
179 |     <parent link="FL_LOWER_LEG" />
180 |     <child link="FL_FOOT" />
181 |     <origin rpy="0 0 0" xyz="0 0.0243 -0.35" />
182 |     <!-- Limits -->
183 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
184 |     <!-- pybullet simulation parameters -->
185 |     <dynamics damping="0.0" friction="0.0" />
186 |   </joint>
187 |   <link name="FL_FOOT">
188 |     <!-- FOOT INERTIAL -->
189 |     <inertial>
190 |       <origin rpy="0 0 0" xyz="0 0 0.00035767" />
191 |       <mass value="0.15" />
192 |       <inertia ixx="0.00000057" ixy="0.0" ixz="0.0" iyy="0.00000084" iyz="0.0" izz="0.00000053" />
193 |     </inertial>
194 |   </link>
195 |   <!-- END FOOT LINK -->
196 |   <joint name="FR_HAA" type="revolute">
197 |     <parent link="base_link" />
198 |     <child link="FR_SHOULDER" />
199 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
200 |     <!-- joints rotates around the x-axis -->
201 |     <axis xyz="1 0 0" />
202 |     <origin rpy="0 0 0" xyz="0.57 -0.23625 0" />
203 |     <!-- pybullet simulation parameters -->
204 |     <dynamics damping="0.0" friction="0.0" />
205 |   </joint>
206 |   <link name="FR_SHOULDER">
207 |     <!-- HAA VISUAL -->
208 |     <visual>
209 |       <origin rpy="0 0 0" xyz="0.052650 0 0" />
210 |       <!--<geometry>
211 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_fr.stl" />
212 |       </geometry>-->
213 |       <material name="grey">
214 |         <color rgba="0.8 0.8 0.8 1.0" />
215 |       </material>
216 |     </visual>
217 |     <!-- HAA LINK COLLISION -->
218 |     <!-- <collision>
219 |       <origin rpy="0 0 0" xyz="0.052650 0 0" />
220 |       <geometry>
221 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_fr.stl" />
222 |       </geometry>
223 |       <material name="grey">
224 |         <color rgba="0.8 0.8 0.8 1.0" />
225 |       </material>
226 |     </collision> -->
227 |     <inertial>
228 |       <mass value="3.0" />
229 |       <!-- HAA body inertia -->
230 |       <origin rpy="0 0 0" xyz="-0.09407251 0.00438935 -0.00001384" />
231 |       <inertia ixx="0.00002802" ixy="-0.00003687" ixz="0.00000009" iyy="0.00038264" iyz="0.00000000"
232 |         izz="0.00038050" />
233 |     </inertial>
234 |   </link>
235 |   <joint name="FR_HFE" type="revolute">
236 |     <parent link="FR_SHOULDER" />
237 |     <child link="FR_UPPER_LEG" />
238 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
239 |     <!-- joints rotates around the y-axis -->
240 |     <axis xyz="0 1 0" />
241 |     <origin rpy="0 0 0" xyz="-0.05265 -0.0378 0" />
242 |     <!-- pybullet simulation parameters -->
243 |     <dynamics damping="0.0" friction="0.0" />
244 |   </joint>
245 |   <link name="FR_UPPER_LEG">
246 |     <!-- Right upper leg inertia -->
247 |     <inertial>
248 |       <mass value="3.12" />
249 |       <origin rpy="0 0 0" xyz="-0.00001530 -0.04772628 -0.21163221" />
250 |       <inertia ixx="0.00041540" ixy="0.00000000" ixz="-0.00000010" iyy="0.00041637" iyz="0.00004589"
251 |         izz="0.00002982" />
252 |     </inertial>
253 |     <!-- UPPER LEG LINK VISUAL -->
254 |     <visual>
255 |       <origin rpy="0 0 0" xyz="0 0 -0.175" />
256 |       <geometry>
257 |         <cylinder length="0.35" radius="0.03" />
258 |       </geometry>
259 |       <material name="grey">
260 |         <color rgba="0.8 0.8 0.8 1.0" />
261 |       </material>
262 |     </visual>
263 |     <!-- UPPER LEG LINK COLLISION -->
264 |     <collision>
265 |       <origin rpy="0 0 0" xyz="0 0 -0.175" />
266 |       <!-- <geometry>
267 |         <mesh filename="meshes/stl/without_foot/solo_upper_leg_right_side.stl" />
268 |       </geometry> -->
269 |       <geometry>
270 |         <cylinder length="0.35" radius="0.03" />
271 |       </geometry>
272 |       <material name="grey">
273 |         <color rgba="0.8 0.8 0.8 1.0" />
274 |       </material>
275 |     </collision>
276 |   </link>
277 |   <!-- END UPPER LEG LINK -->
278 |   <!-- KFE: Joint between the upper leg and the lower leg -->
279 |   <joint name="FR_KFE" type="revolute">
280 |     <parent link="FR_UPPER_LEG" />
281 |     <child link="FR_LOWER_LEG" />
282 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
283 |     <!-- joints rotates around the y-axis -->
284 |     <axis xyz="0 1 0" />
285 |     <origin rpy="0 0 0" xyz="0 -0.04 -0.35" />
286 |     <!-- pybullet simulation parameters -->
287 |     <dynamics damping="0.0" friction="0.0" />
288 |   </joint>
289 |   <link name="FR_LOWER_LEG">
290 |     <!-- Right lower leg inertia -->
291 |     <inertial>
292 |       <mass value="0.5" />
293 |       <origin rpy="0 0 0" xyz="0.00000000 -0.020971332 -0.189104652" />
294 |       <inertia ixx="0.00008508" ixy="0.00000000" ixz="0.00000000" iyy="0.00008580" iyz="0.00000200"
295 |         izz="0.00000139" />
296 |     </inertial>
297 |     <!-- LOWER LEG LINK VISUAL -->
298 |     <visual>
299 |       <origin rpy="0 0 0" xyz="0 0 -0.175" />
300 |       <geometry>
301 |         <cylinder length="0.35" radius="0.03" />
302 |       </geometry>
303 |       <material name="grey">
304 |         <color rgba="0.8 0.8 0.8 1.0" />
305 |       </material>
306 |     </visual>
307 |     <!-- LOWER LEG LINK COLLISION -->
308 |     <collision>
309 |       <origin rpy="0 0 0" xyz="0 0 -0.35" />
310 |       <!-- <geometry>
311 |         <mesh filename="meshes/stl/without_foot/solo_lower_leg_v2_right_side.stl" />
312 |       </geometry> -->
313 |       <geometry>
314 |         <sphere radius="0.03" />
315 |       </geometry>
316 |       <material name="grey">
317 |         <color rgba="0.8 0.8 0.8 1.0" />
318 |       </material>
319 |     </collision>
320 |   </link>
321 |   <!-- END LOWER LEG LINK -->
322 |   <!-- Joint between the lower leg and the ankle,
323 |          this is only a dummy joint such that we
324 |          can assign coordinate frames to foot-->
325 |   <joint name="FR_ANKLE" type="fixed" dont_collapse="true">
326 |     <parent link="FR_LOWER_LEG" />
327 |     <child link="FR_FOOT" />
328 |     <origin rpy="0 0 0" xyz="0 -0.0243 -0.35" />
329 |     <!-- Limits -->
330 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
331 |     <!-- pybullet simulation parameters -->
332 |     <dynamics damping="0.0" friction="0.0" />
333 |   </joint>
334 |   <link name="FR_FOOT">
335 |     <!-- FOOT INERTIAL -->
336 |     <inertial>
337 |       <origin rpy="0 0 0" xyz="0 0 0.00035767" />
338 |       <mass value="0.15" />
339 |       <inertia ixx="0.00000057" ixy="0.0" ixz="0.0" iyy="0.00000084" iyz="0.0" izz="0.00000053" />
340 |     </inertial>
341 |   </link>
342 |   <!-- END FOOT LINK -->
343 |   <joint name="HL_HAA" type="revolute">
344 |     <parent link="base_link" />
345 |     <child link="HL_SHOULDER" />
346 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
347 |     <!-- joints rotates around the x-axis -->
348 |     <axis xyz="1 0 0" />
349 |     <origin rpy="0 0 0" xyz="-0.57 0.23625 0" />
350 |     <!-- pybullet simulation parameters -->
351 |     <dynamics damping="0.0" friction="0.0" />
352 |   </joint>
353 |   <link name="HL_SHOULDER">
354 |     <!-- HAA VISUAL -->
355 |     <visual>
356 |       <origin rpy="0 0 0" xyz="-0.052650 0 0" />
357 |       <!--<geometry>
358 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_hl.stl" />
359 |       </geometry>-->
360 |       <material name="grey">
361 |         <color rgba="0.8 0.8 0.8 1.0" />
362 |       </material>
363 |     </visual>
364 |     <!-- HAA LINK COLLISION -->
365 |     <!-- <collision>
366 |       <origin rpy="0 0 0" xyz="-0.052650 0 0" />
367 |       <geometry>
368 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_hl.stl" />
369 |       </geometry>
370 |       <material name="grey">
371 |         <color rgba="0.8 0.8 0.8 1.0" />
372 |       </material>
373 |     </collision> -->
374 |     <inertial>
375 |       <mass value="3.0" />
376 |       <!-- HAA body inertia -->
377 |       <origin rpy="0 0 0" xyz="0.09407251 -0.00438935 -0.00001384" />
378 |       <inertia ixx="0.00002802" ixy="-0.00003687" ixz="-0.00000009" iyy="0.00038264"
379 |         iyz="0.00000000" izz="0.00038050" />
380 |     </inertial>
381 |   </link>
382 |   <joint name="HL_HFE" type="revolute">
383 |     <parent link="HL_SHOULDER" />
384 |     <child link="HL_UPPER_LEG" />
385 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
386 |     <!-- joints rotates around the y-axis -->
387 |     <axis xyz="0 1 0" />
388 |     <origin rpy="0 0 0" xyz="0.05265 0.0378 0" />
389 |     <!-- pybullet simulation parameters -->
390 |     <dynamics damping="0.0" friction="0.0" />
391 |   </joint>
392 |   <link name="HL_UPPER_LEG">
393 |     <!-- Left upper leg inertia -->
394 |     <inertial>
395 |       <mass value="3.12" />
396 |       <origin rpy="0 0 0" xyz="0.00001530 0.04772628 -0.21163221" />
397 |       <inertia ixx="0.00041540" ixy="0.00000000" ixz="0.00000010" iyy="0.00041637" iyz="-0.00004589"
398 |         izz="0.00002982" />
399 |     </inertial>
400 |     <!-- UPPER LEG LINK VISUAL -->
401 |     <visual>
402 |       <origin rpy="0 0 0" xyz="0 0 -0.175" />
403 |       <geometry>
404 |         <cylinder length="0.35" radius="0.03" />
405 |       </geometry>
406 |       <material name="grey">
407 |         <color rgba="0.8 0.8 0.8 1.0" />
408 |       </material>
409 |     </visual>
410 |     <!-- UPPER LEG LINK COLLISION -->
411 |     <collision>
412 |       <origin rpy="0 0 0" xyz="0 0 -0.175" />
413 |       <!-- <geometry>
414 |         <mesh filename="meshes/stl/without_foot/solo_upper_leg_left_side.stl" />
415 |       </geometry> -->
416 |       <geometry>
417 |         <cylinder length="0.35" radius="0.03" />
418 |       </geometry>
419 |       <material name="grey">
420 |         <color rgba="0.8 0.8 0.8 1.0" />
421 |       </material>
422 |     </collision>
423 |   </link>
424 |   <!-- END UPPER LEG LINK -->
425 |   <!-- KFE: Joint between the upper leg and the lower leg -->
426 |   <joint name="HL_KFE" type="revolute">
427 |     <parent link="HL_UPPER_LEG" />
428 |     <child link="HL_LOWER_LEG" />
429 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
430 |     <!-- joints rotates around the y-axis -->
431 |     <axis xyz="0 1 0" />
432 |     <origin rpy="0 0 0" xyz="0 0.04 -0.35" />
433 |     <!-- pybullet simulation parameters -->
434 |     <dynamics damping="0.0" friction="0.0" />
435 |   </joint>
436 |   <link name="HL_LOWER_LEG">
437 |     <!-- Left lower leg inertia -->
438 |     <inertial>
439 |       <mass value="0.5" />
440 |       <origin rpy="0 0 0" xyz="0.00000000 0.020971332 -0.189104652" />
441 |       <inertia ixx="0.00008508" ixy="0.00000000" ixz="0.00000000" iyy="0.00008580" iyz="-0.00000200"
442 |         izz="0.00000139" />
443 |     </inertial>
444 |     <!-- LOWER LEG LINK VISUAL -->
445 |     <visual>
446 |       <origin rpy="0 0 0" xyz="0 0 -0.175" />
447 |       <geometry>
448 |         <cylinder length="0.35" radius="0.03" />
449 |       </geometry>
450 |       <material name="grey">
451 |         <color rgba="0.8 0.8 0.8 1.0" />
452 |       </material>
453 |     </visual>
454 |     <!-- LOWER LEG LINK COLLISION -->
455 |     <collision>
456 |       <origin rpy="0 0 0" xyz="0 0 -0.35" />
457 |       <!-- <geometry>
458 |         <mesh filename="meshes/stl/without_foot/solo_lower_leg_v2_left_side.stl" />
459 |       </geometry> -->
460 |       <geometry>
461 |         <sphere radius="0.03" />
462 |       </geometry>
463 |       <material name="grey">
464 |         <color rgba="0.8 0.8 0.8 1.0" />
465 |       </material>
466 |     </collision>
467 |   </link>
468 |   <!-- END LOWER LEG LINK -->
469 |   <!-- Joint between the lower leg and the ankle,
470 |          this is only a dummy joint such that we
471 |          can assign coordinate frames to foot-->
472 |   <joint name="HL_ANKLE" type="fixed" dont_collapse="true">
473 |     <parent link="HL_LOWER_LEG" />
474 |     <child link="HL_FOOT" />
475 |     <origin rpy="0 0 0" xyz="0 0.0243 -0.35" />
476 |     <!-- Limits -->
477 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
478 |     <!-- pybullet simulation parameters -->
479 |     <dynamics damping="0.0" friction="0.0" />
480 |   </joint>
481 |   <link name="HL_FOOT">
482 |     <!-- FOOT INERTIAL -->
483 |     <inertial>
484 |       <origin rpy="0 0 0" xyz="0 0 0.00035767" />
485 |       <mass value="0.15" />
486 |       <inertia ixx="0.00000057" ixy="0.0" ixz="0.0" iyy="0.00000084" iyz="0.0" izz="0.00000053" />
487 |     </inertial>
488 |   </link>
489 |   <!-- END FOOT LINK -->
490 |   <joint name="HR_HAA" type="revolute">
491 |     <parent link="base_link" />
492 |     <child link="HR_SHOULDER" />
493 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
494 |     <!-- joints rotates around the x-axis -->
495 |     <axis xyz="1 0 0" />
496 |     <origin rpy="0 0 0" xyz="-0.57 -0.23625 0" />
497 |     <!-- pybullet simulation parameters -->
498 |     <dynamics damping="0.0" friction="0.0" />
499 |   </joint>
500 |   <link name="HR_SHOULDER">
501 |     <!-- HAA VISUAL -->
502 |     <visual>
503 |       <origin rpy="0 0 0" xyz="-0.052650 0 0" />
504 |       <!--<geometry>
505 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_hr.stl" />
506 |       </geometry>-->
507 |       <material name="grey">
508 |         <color rgba="0.8 0.8 0.8 1.0" />
509 |       </material>
510 |     </visual>
511 |     <!-- HAA LINK COLLISION -->
512 |     <!-- <collision>
513 |       <origin rpy="0 0 0" xyz="-0.052650 0 0" />
514 |       <geometry>
515 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_hr.stl" />
516 |       </geometry>
517 |       <material name="grey">
518 |         <color rgba="0.8 0.8 0.8 1.0" />
519 |       </material>
520 |     </collision> -->
521 |     <inertial>
522 |       <mass value="3.0" />
523 |       <!-- HAA body inertia -->
524 |       <origin rpy="0 0 0" xyz="0.09407251 0.00438935 0.00001384" />
525 |       <inertia ixx="0.00002802" ixy="0.00003687" ixz="0.00000009" iyy="0.00038264" iyz="0.00000000"
526 |         izz="0.00038050" />
527 |     </inertial>
528 |   </link>
529 |   <joint name="HR_HFE" type="revolute">
530 |     <parent link="HR_SHOULDER" />
531 |     <child link="HR_UPPER_LEG" />
532 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
533 |     <!-- joints rotates around the y-axis -->
534 |     <axis xyz="0 1 0" />
535 |     <origin rpy="0 0 0" xyz="0.05265 -0.0378 0" />
536 |     <!-- pybullet simulation parameters -->
537 |     <dynamics damping="0.0" friction="0.0" />
538 |   </joint>
539 |   <link name="HR_UPPER_LEG">
540 |     <!-- Right upper leg inertia -->
541 |     <inertial>
542 |       <mass value="3.12" />
543 |       <origin rpy="0 0 0" xyz="-0.00001530 -0.04772628 -0.21163221" />
544 |       <inertia ixx="0.00041540" ixy="0.00000000" ixz="-0.00000010" iyy="0.00041637" iyz="0.00004589"
545 |         izz="0.00002982" />
546 |     </inertial>
547 |     <!-- UPPER LEG LINK VISUAL -->
548 |     <visual>
549 |       <origin rpy="0 0 0" xyz="0 0 -0.175" />
550 |       <geometry>
551 |         <cylinder length="0.35" radius="0.03" />
552 |       </geometry>
553 |       <material name="grey">
554 |         <color rgba="0.8 0.8 0.8 1.0" />
555 |       </material>
556 |     </visual>
557 |     <!-- UPPER LEG LINK COLLISION -->
558 |     <collision>
559 |       <origin rpy="0 0 0" xyz="0 0 -0.175" />
560 |       <!-- <geometry>
561 |         <mesh filename="meshes/stl/without_foot/solo_upper_leg_right_side.stl" />
562 |       </geometry> -->
563 |       <geometry>
564 |         <cylinder length="0.35" radius="0.03" />
565 |       </geometry>
566 |       <material name="grey">
567 |         <color rgba="0.8 0.8 0.8 1.0" />
568 |       </material>
569 |     </collision>
570 |   </link>
571 |   <!-- END UPPER LEG LINK -->
572 |   <!-- KFE: Joint between the upper leg and the lower leg -->
573 |   <joint name="HR_KFE" type="revolute">
574 |     <parent link="HR_UPPER_LEG" />
575 |     <child link="HR_LOWER_LEG" />
576 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
577 |     <!-- joints rotates around the y-axis -->
578 |     <axis xyz="0 1 0" />
579 |     <origin rpy="0 0 0" xyz="0 -0.04 -0.35" />
580 |     <!-- pybullet simulation parameters -->
581 |     <dynamics damping="0.0" friction="0.0" />
582 |   </joint>
583 |   <link name="HR_LOWER_LEG">
584 |     <!-- Right lower leg inertia -->
585 |     <inertial>
586 |       <mass value="0.5" />
587 |       <origin rpy="0 0 0" xyz="0.00000000 -0.020971332 -0.189104652" />
588 |       <inertia ixx="0.00008508" ixy="0.00000000" ixz="0.00000000" iyy="0.00008580" iyz="0.00000200"
589 |         izz="0.00000139" />
590 |     </inertial>
591 |     <!-- LOWER LEG LINK VISUAL -->
592 |     <visual>
593 |       <origin rpy="0 0 0" xyz="0 0 -0.175" />
594 |       <geometry>
595 |         <cylinder length="0.35" radius="0.03" />
596 |       </geometry>
597 |       <material name="grey">
598 |         <color rgba="0.8 0.8 0.8 1.0" />
599 |       </material>
600 |     </visual>
601 |     <!-- LOWER LEG LINK COLLISION -->
602 |     <collision>
603 |       <origin rpy="0 0 0" xyz="0 0 -0.35" />
604 |       <!-- <geometry>
605 |         <mesh filename="meshes/stl/without_foot/solo_lower_leg_v2_right_side.stl" />
606 |       </geometry> -->
607 |       <geometry>
608 |         <sphere radius="0.03" />
609 |       </geometry>
610 |       <material name="grey">
611 |         <color rgba="0.8 0.8 0.8 1.0" />
612 |       </material>
613 |     </collision>
614 |   </link>
615 |   <!-- END LOWER LEG LINK -->
616 |   <!-- Joint between the lower leg and the ankle,
617 |          this is only a dummy joint such that we
618 |          can assign coordinate frames to foot-->
619 |   <joint name="HR_ANKLE" type="fixed" dont_collapse="true">
620 |     <parent link="HR_LOWER_LEG" />
621 |     <child link="HR_FOOT" />
622 |     <origin rpy="0 0 0" xyz="0 -0.0243 -0.35" />
623 |     <!-- Limits -->
624 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
625 |     <!-- pybullet simulation parameters -->
626 |     <dynamics damping="0.0" friction="0.0" />
627 |   </joint>
628 |   <link name="HR_FOOT">
629 |     <!-- FOOT INERTIAL -->
630 |     <inertial>
631 |       <origin rpy="0 0 0" xyz="0 0 0.00035767" />
632 |       <mass value="0.15" />
633 |       <inertia ixx="0.00000057" ixy="0.0" ixz="0.0" iyy="0.00000084" iyz="0.0" izz="0.00000053" />
634 |     </inertial>
635 |   </link>
636 |   <!-- END FOOT LINK -->
637 | </robot>


--------------------------------------------------------------------------------
/solo12/solo12_mpi.urdf:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0"?>
  2 | <!-- =================================================================================== -->
  3 | <!-- |    This document was autogenerated by xacro from
  4 | /tmp/solo/robot_properties_solo/src/robot_properties_solo/resources/xacro/solo12.urdf.xacro | -->
  5 | <!-- |    EDITING THIS FILE BY HAND IS NOT RECOMMENDED                                 | -->
  6 | <!-- =================================================================================== -->
  7 | <robot name="solo"
  8 |   xmlns:controller="http://playerstage.sourceforge.net/gazebo/xmlschema/#controller"
  9 |   xmlns:interface="http://playerstage.sourceforge.net/gazebo/xmlschema/#interface"
 10 |   xmlns:sensor="http://playerstage.sourceforge.net/gazebo/xmlschema/#sensor"
 11 |   xmlns:xacro="http://playerstage.sourceforge.net/gazebo/xmlschema/#interface">
 12 |   <!-- This file is based on:
 13 |   https://github.com/open-dynamic-robot-initiative/open_robot_actuator_hardware/blob/master/mechanics/quadruped_robot_12dof_v1/README.md#quadruped-robot-12dof-v1 -->
 14 |   <link name="base_link">
 15 |     <!-- BASE LINK INERTIAL -->
 16 |     <inertial>
 17 |       <origin rpy="0 0 0" xyz="-0.00082966 0.00000105 -0.00060210" />
 18 |       <mass value="1.572" /> <!-- <mass value="1.25123725" /> -->
 19 |       <!-- The base is extremely symmetrical. -->
 20 |       <inertia ixx="0.00266220" ixy="-0.00003684" ixz="-0.00001716" iyy="0.01388510"
 21 |         iyz="-0.00000009" izz="0.01605370" />
 22 |     </inertial>
 23 |     <!-- BASE LINK VISUAL -->
 24 |     <visual>
 25 |       <origin rpy="0 0 0" xyz="0 0 0" />
 26 |       <geometry>
 27 |         <mesh filename="meshes/stl/solo12/solo_12_base.stl" />
 28 |       </geometry>
 29 |       <material name="black">
 30 |         <color rgba="0.2 0.2 0.2 1.0" />
 31 |       </material>
 32 |     </visual>
 33 |     <!-- BASE LINK COLLISION -->
 34 |     <collision>
 35 |       <origin rpy="0 0 0" xyz="0 0 0.03" />
 36 |       <!-- <geometry>
 37 |         <mesh filename="meshes/stl/solo12/solo_12_base.stl" />
 38 |       </geometry> -->
 39 |       <geometry>
 40 |         <box size="0.5 0.1 0.1" />
 41 |       </geometry>
 42 |       <material name="grey">
 43 |         <color rgba="0.8 0.8 0.8 1.0" />
 44 |       </material>
 45 |     </collision>
 46 |     <!-- Bullet specific paramters -->
 47 |     <contact>
 48 |       <lateral_friction value="1.0" />
 49 |       <restitution value="0.5" />
 50 |     </contact>
 51 |   </link>
 52 |   <!-- END BASE LINK -->
 53 |   <joint name="FL_HAA" type="revolute">
 54 |     <parent link="base_link" />
 55 |     <child link="FL_SHOULDER" />
 56 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
 57 |     <!-- joints rotates around the x-axis -->
 58 |     <axis xyz="1 0 0" />
 59 |     <origin rpy="0 0 0" xyz="0.2141 0.08750000000000001 0" />
 60 |     <!-- pybullet simulation parameters -->
 61 |     <dynamics damping="0.0" friction="0.0" />
 62 |   </joint>
 63 |   <link name="FL_SHOULDER">
 64 |     <!-- HAA VISUAL -->
 65 |     <visual>
 66 |       <origin rpy="0 0 0" xyz="0.0 0 0" />
 67 |       <geometry>
 68 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_fl.stl" />
 69 |       </geometry>
 70 |       <material name="black">
 71 |         <color rgba="0.2 0.2 0.2 1.0" />
 72 |       </material>
 73 |     </visual>
 74 |     <!-- HAA LINK COLLISION -->
 75 |     <!-- <collision>
 76 |       <origin rpy="0 0 0" xyz="0.0 0 0" />
 77 |       <geometry>
 78 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_fl.stl" />
 79 |       </geometry>
 80 |       <material name="grey">
 81 |         <color rgba="0.8 0.8 0.8 1.0" />
 82 |       </material>
 83 |     </collision> -->
 84 |     <inertial>
 85 |       <mass value="0.14196048" />
 86 |       <!-- HAA body inertia -->
 87 |       <origin rpy="0 0 0" xyz="-0.09407251 -0.00438935 0.00001384" />
 88 |       <inertia ixx="0.00002802" ixy="0.00003687" ixz="-0.00000009" iyy="0.00038264" iyz="0.00000000"
 89 |         izz="0.00038050" />
 90 |     </inertial>
 91 |   </link>
 92 |   <joint name="FL_HFE" type="revolute">
 93 |     <parent link="FL_SHOULDER" />
 94 |     <child link="FL_UPPER_LEG" />
 95 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
 96 |     <!-- joints rotates around the y-axis -->
 97 |     <axis xyz="0 1 0" />
 98 |     <origin rpy="0 0 0" xyz="-0.0195 0.014 0" />
 99 |     <!-- pybullet simulation parameters -->
100 |     <dynamics damping="0.0" friction="0.0" />
101 |   </joint>
102 |   <link name="FL_UPPER_LEG">
103 |     <!-- Left upper leg inertia -->
104 |     <inertial>
105 |       <mass value="0.14737324" />
106 |       <origin rpy="0 0 0" xyz="0.00001530 0.01767640 -0.07838230" />
107 |       <inertia ixx="0.00041540" ixy="0.00000000" ixz="0.00000010" iyy="0.00041637" iyz="-0.00004589"
108 |         izz="0.00002982" />
109 |     </inertial>
110 |     <!-- UPPER LEG LINK VISUAL -->
111 |     <visual>
112 |       <origin rpy="0 0 0" xyz="0 0 0" />
113 |       <geometry>
114 |         <mesh filename="meshes/stl/without_foot/solo_upper_leg_left_side.stl" />
115 |       </geometry>
116 |       <material name="red">
117 |         <color rgba="0.5 0.0 0.0 1.0" />
118 |       </material>
119 |     </visual>
120 |     <!-- UPPER LEG LINK COLLISION -->
121 |     <collision>
122 |       <origin rpy="0 0 0" xyz="0 0 -0.08" />
123 |       <!-- <geometry>
124 |         <mesh filename="meshes/stl/without_foot/solo_upper_leg_left_side.stl" />
125 |       </geometry> -->
126 |       <geometry>
127 |         <cylinder length="0.16" radius="0.02" />
128 |       </geometry>
129 |       <material name="grey">
130 |         <color rgba="0.8 0.8 0.8 1.0" />
131 |       </material>
132 |     </collision>
133 |     <!-- Bullet specific paramters -->
134 |     <contact>
135 |       <lateral_friction value="1.0" />
136 |       <restitution value="0.5" />
137 |     </contact>
138 |   </link>
139 |   <!-- END UPPER LEG LINK -->
140 |   <!-- KFE: Joint between the upper leg and the lower leg -->
141 |   <joint name="FL_KFE" type="revolute">
142 |     <parent link="FL_UPPER_LEG" />
143 |     <child link="FL_LOWER_LEG" />
144 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
145 |     <!-- joints rotates around the y-axis -->
146 |     <axis xyz="0 1 0" />
147 |     <origin rpy="0 0 0" xyz="0 0.037450000000000004 -0.16" />
148 |     <!-- pybullet simulation parameters -->
149 |     <dynamics damping="0.0" friction="0.0" />
150 |   </joint>
151 |   <link name="FL_LOWER_LEG">
152 |     <!-- Left lower leg inertia -->
153 |     <inertial>
154 |       <mass value="0.02318294" />
155 |       <origin rpy="0 0 0" xyz="0.00000000 0.00776716 -0.07003876" />
156 |       <inertia ixx="0.00008508" ixy="0.00000000" ixz="0.00000000" iyy="0.00008580" iyz="-0.00000200"
157 |         izz="0.00000139" />
158 |     </inertial>
159 |     <!-- LOWER LEG LINK VISUAL -->
160 |     <visual>
161 |       <origin rpy="0 0 0" xyz="0 0 0" />
162 |       <geometry>
163 |         <mesh filename="meshes/stl/without_foot/solo_lower_leg_v2_left_side.stl" />
164 |       </geometry>
165 |       <material name="black">
166 |         <color rgba="0.2 0.2 0.2 1.0" />
167 |       </material>
168 |     </visual>
169 |     <!-- LOWER LEG LINK COLLISION -->
170 |     <collision>
171 |       <origin rpy="0 0 0" xyz="0 0 -0.08" />
172 |       <geometry>
173 |         <cylinder length="0.10" radius="0.01" />
174 |       </geometry>
175 |       <material name="grey">
176 |         <color rgba="0.8 0.8 0.8 1.0" />
177 |       </material>
178 |     </collision>
179 |     <!-- Bullet specific paramters -->
180 |     <contact>
181 |       <lateral_friction value="1.0" />
182 |       <restitution value="0.5" />
183 |     </contact>
184 |   </link>
185 |   <!-- END LOWER LEG LINK -->
186 |   <!-- Joint between the lower leg and the ankle,
187 |          this is only a dummy joint such that we
188 |          can assign coordinate frames to foot-->
189 |   <joint name="FL_ANKLE" type="fixed" dont_collapse="true">
190 |     <parent link="FL_LOWER_LEG" />
191 |     <child link="FL_FOOT" />
192 |     <origin rpy="0 0 0" xyz="0 0.009000000000000001 -0.16" />
193 |     <!-- Limits -->
194 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
195 |     <!-- pybullet simulation parameters -->
196 |     <dynamics damping="0.0" friction="0.0" />
197 |   </joint>
198 |   <link name="FL_FOOT">
199 |     <!-- FOOT INERTIAL -->
200 |     <inertial>
201 |       <origin rpy="0 0 0" xyz="0 0 0.00035767" />
202 |       <mass value="0.00693606" />
203 |       <inertia ixx="0.00000057" ixy="0.0" ixz="0.0" iyy="0.00000084" iyz="0.0" izz="0.00000053" />
204 |     </inertial>
205 |     <!-- FOOT LINK COLLISION -->
206 |     <collision>
207 |       <origin rpy="0 0 0" xyz="0 0 0" />
208 |       <geometry>
209 |         <sphere radius="0.02" />
210 |       </geometry>
211 |       <material name="grey">
212 |         <color rgba="0.8 0.8 0.8 1.0" />
213 |       </material>
214 |     </collision>
215 |     <!-- Bullet specific paramters -->
216 |     <contact>
217 |       <lateral_friction value="1.0" />
218 |       <restitution value="0.5" />
219 |     </contact>
220 |   </link>
221 |   <!-- END FOOT LINK -->
222 |   <joint name="FR_HAA" type="revolute">
223 |     <parent link="base_link" />
224 |     <child link="FR_SHOULDER" />
225 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
226 |     <!-- joints rotates around the x-axis -->
227 |     <axis xyz="1 0 0" />
228 |     <origin rpy="0 0 0" xyz="0.2141 -0.08750000000000001 0" />
229 |     <!-- pybullet simulation parameters -->
230 |     <dynamics damping="0.0" friction="0.0" />
231 |   </joint>
232 |   <link name="FR_SHOULDER">
233 |     <!-- HAA VISUAL -->
234 |     <visual>
235 |       <origin rpy="0 0 0" xyz="0.0 0 0" />
236 |       <geometry>
237 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_fr.stl" />
238 |       </geometry>
239 |       <material name="black">
240 |         <color rgba="0.2 0.2 0.2 1.0" />
241 |       </material>
242 |     </visual>
243 |     <!-- HAA LINK COLLISION -->
244 |     <!-- <collision>
245 |       <origin rpy="0 0 0" xyz="0.0 0 0" />
246 |       <geometry>
247 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_fr.stl" />
248 |       </geometry>
249 |       <material name="grey">
250 |         <color rgba="0.8 0.8 0.8 1.0" />
251 |       </material>
252 |     </collision> -->
253 |     <inertial>
254 |       <mass value="0.14196048" />
255 |       <!-- HAA body inertia -->
256 |       <origin rpy="0 0 0" xyz="-0.09407251 0.00438935 -0.00001384" />
257 |       <inertia ixx="0.00002802" ixy="-0.00003687" ixz="0.00000009" iyy="0.00038264" iyz="0.00000000"
258 |         izz="0.00038050" />
259 |     </inertial>
260 |   </link>
261 |   <joint name="FR_HFE" type="revolute">
262 |     <parent link="FR_SHOULDER" />
263 |     <child link="FR_UPPER_LEG" />
264 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
265 |     <!-- joints rotates around the y-axis -->
266 |     <axis xyz="0 1 0" />
267 |     <origin rpy="0 0 0" xyz="-0.0195 -0.014 0" />
268 |     <!-- pybullet simulation parameters -->
269 |     <dynamics damping="0.0" friction="0.0" />
270 |   </joint>
271 |   <link name="FR_UPPER_LEG">
272 |     <!-- Right upper leg inertia -->
273 |     <inertial>
274 |       <mass value="0.14737324" />
275 |       <origin rpy="0 0 0" xyz="-0.00001530 -0.01767640 -0.07838230" />
276 |       <inertia ixx="0.00041540" ixy="0.00000000" ixz="-0.00000010" iyy="0.00041637" iyz="0.00004589"
277 |         izz="0.00002982" />
278 |     </inertial>
279 |     <!-- UPPER LEG LINK VISUAL -->
280 |     <visual>
281 |       <origin rpy="0 0 0" xyz="0 0 0" />
282 |       <geometry>
283 |         <mesh filename="meshes/stl/without_foot/solo_upper_leg_right_side.stl" />
284 |       </geometry>
285 |       <material name="red">
286 |         <color rgba="0.8 0.0 0.0 1.0" />
287 |       </material>
288 |     </visual>
289 |     <!-- UPPER LEG LINK COLLISION -->
290 |     <collision>
291 |       <origin rpy="0 0 0" xyz="0 0 -0.08" />
292 |       <!-- <geometry>
293 |         <mesh filename="meshes/stl/without_foot/solo_upper_leg_right_side.stl" />
294 |       </geometry> -->
295 |       <geometry>
296 |         <cylinder length="0.16" radius="0.02" />
297 |       </geometry>
298 |       <material name="grey">
299 |         <color rgba="0.8 0.8 0.8 1.0" />
300 |       </material>
301 |     </collision>
302 |     <!-- Bullet specific paramters -->
303 |     <contact>
304 |       <lateral_friction value="1.0" />
305 |       <restitution value="0.5" />
306 |     </contact>
307 |   </link>
308 |   <!-- END UPPER LEG LINK -->
309 |   <!-- KFE: Joint between the upper leg and the lower leg -->
310 |   <joint name="FR_KFE" type="revolute">
311 |     <parent link="FR_UPPER_LEG" />
312 |     <child link="FR_LOWER_LEG" />
313 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
314 |     <!-- joints rotates around the y-axis -->
315 |     <axis xyz="0 1 0" />
316 |     <origin rpy="0 0 0" xyz="0 -0.037450000000000004 -0.16" />
317 |     <!-- pybullet simulation parameters -->
318 |     <dynamics damping="0.0" friction="0.0" />
319 |   </joint>
320 |   <link name="FR_LOWER_LEG">
321 |     <!-- Right lower leg inertia -->
322 |     <inertial>
323 |       <mass value="0.02318294" />
324 |       <origin rpy="0 0 0" xyz="0.00000000 -0.00776716 -0.07003876" />
325 |       <inertia ixx="0.00008508" ixy="0.00000000" ixz="0.00000000" iyy="0.00008580" iyz="0.00000200"
326 |         izz="0.00000139" />
327 |     </inertial>
328 |     <!-- LOWER LEG LINK VISUAL -->
329 |     <visual>
330 |       <origin rpy="0 0 0" xyz="0 0 0" />
331 |       <geometry>
332 |         <mesh filename="meshes/stl/without_foot/solo_lower_leg_v2_right_side.stl" />
333 |       </geometry>
334 |       <material name="black">
335 |         <color rgba="0.2 0.2 0.2 1.0" />
336 |       </material>
337 |     </visual>
338 |     <!-- LOWER LEG LINK COLLISION -->
339 |     <collision>
340 |       <origin rpy="0 0 0" xyz="0 0 -0.08" />
341 |       <geometry>
342 |         <cylinder length="0.10" radius="0.01" />
343 |       </geometry>
344 |       <material name="grey">
345 |         <color rgba="0.8 0.8 0.8 1.0" />
346 |       </material>
347 |     </collision>
348 |     <!-- Bullet specific paramters -->
349 |     <contact>
350 |       <lateral_friction value="1.0" />
351 |       <restitution value="0.5" />
352 |     </contact>
353 |   </link>
354 |   <!-- END LOWER LEG LINK -->
355 |   <!-- Joint between the lower leg and the ankle,
356 |          this is only a dummy joint such that we
357 |          can assign coordinate frames to foot-->
358 |   <joint name="FR_ANKLE" type="fixed" dont_collapse="true">
359 |     <parent link="FR_LOWER_LEG" />
360 |     <child link="FR_FOOT" />
361 |     <origin rpy="0 0 0" xyz="0 -0.009000000000000001 -0.16" />
362 |     <!-- Limits -->
363 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
364 |     <!-- pybullet simulation parameters -->
365 |     <dynamics damping="0.0" friction="0.0" />
366 |   </joint>
367 |   <link name="FR_FOOT">
368 |     <!-- FOOT INERTIAL -->
369 |     <inertial>
370 |       <origin rpy="0 0 0" xyz="0 0 0.00035767" />
371 |       <mass value="0.00693606" />
372 |       <inertia ixx="0.00000057" ixy="0.0" ixz="0.0" iyy="0.00000084" iyz="0.0" izz="0.00000053" />
373 |     </inertial>
374 |     <!-- FOOT LINK COLLISION -->
375 |     <collision>
376 |       <origin rpy="0 0 0" xyz="0 0 0" />
377 |       <geometry>
378 |         <sphere radius="0.02" />
379 |       </geometry>
380 |       <material name="grey">
381 |         <color rgba="0.8 0.8 0.8 1.0" />
382 |       </material>
383 |     </collision>
384 |     <!-- Bullet specific paramters -->
385 |     <contact>
386 |       <lateral_friction value="1.0" />
387 |       <restitution value="0.5" />
388 |     </contact>
389 |   </link>
390 |   <!-- END FOOT LINK -->
391 |   <joint name="HL_HAA" type="revolute">
392 |     <parent link="base_link" />
393 |     <child link="HL_SHOULDER" />
394 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
395 |     <!-- joints rotates around the x-axis -->
396 |     <axis xyz="1 0 0" />
397 |     <origin rpy="0 0 0" xyz="-0.2141 0.08750000000000001 0" />
398 |     <!-- pybullet simulation parameters -->
399 |     <dynamics damping="0.0" friction="0.0" />
400 |   </joint>
401 |   <link name="HL_SHOULDER">
402 |     <!-- HAA VISUAL -->
403 |     <visual>
404 |       <origin rpy="0 0 0" xyz="-0.0 0 0" />
405 |       <geometry>
406 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_hl.stl" />
407 |       </geometry>
408 |       <material name="black">
409 |         <color rgba="0.2 0.2 0.2 1.0" />
410 |       </material>
411 |     </visual>
412 |     <!-- HAA LINK COLLISION -->
413 |     <!-- <collision>
414 |       <origin rpy="0 0 0" xyz="-0.0 0 0" />
415 |       <geometry>
416 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_hl.stl" />
417 |       </geometry>
418 |       <material name="grey">
419 |         <color rgba="0.8 0.8 0.8 1.0" />
420 |       </material>
421 |     </collision> -->
422 |     <inertial>
423 |       <mass value="0.14196048" />
424 |       <!-- HAA body inertia -->
425 |       <origin rpy="0 0 0" xyz="0.09407251 -0.00438935 -0.00001384" />
426 |       <inertia ixx="0.00002802" ixy="-0.00003687" ixz="-0.00000009" iyy="0.00038264"
427 |         iyz="0.00000000" izz="0.00038050" />
428 |     </inertial>
429 |   </link>
430 |   <joint name="HL_HFE" type="revolute">
431 |     <parent link="HL_SHOULDER" />
432 |     <child link="HL_UPPER_LEG" />
433 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
434 |     <!-- joints rotates around the y-axis -->
435 |     <axis xyz="0 1 0" />
436 |     <origin rpy="0 0 0" xyz="0.0195 0.014 0" />
437 |     <!-- pybullet simulation parameters -->
438 |     <dynamics damping="0.0" friction="0.0" />
439 |   </joint>
440 |   <link name="HL_UPPER_LEG">
441 |     <!-- Left upper leg inertia -->
442 |     <inertial>
443 |       <mass value="0.14737324" />
444 |       <origin rpy="0 0 0" xyz="0.00001530 0.01767640 -0.07838230" />
445 |       <inertia ixx="0.00041540" ixy="0.00000000" ixz="0.00000010" iyy="0.00041637" iyz="-0.00004589"
446 |         izz="0.00002982" />
447 |     </inertial>
448 |     <!-- UPPER LEG LINK VISUAL -->
449 |     <visual>
450 |       <origin rpy="0 0 0" xyz="0 0 0" />
451 |       <geometry>
452 |         <mesh filename="meshes/stl/without_foot/solo_upper_leg_left_side.stl" />
453 |       </geometry>
454 |       <material name="red">
455 |         <color rgba="0.8 0.0 0.0 1.0" />
456 |       </material>
457 |     </visual>
458 |     <!-- UPPER LEG LINK COLLISION -->
459 |     <collision>
460 |       <origin rpy="0 0 0" xyz="0 0 -0.08" />
461 |       <!-- <geometry>
462 |         <mesh filename="meshes/stl/without_foot/solo_upper_leg_left_side.stl" />
463 |       </geometry> -->
464 |       <geometry>
465 |         <cylinder length="0.16" radius="0.02" />
466 |       </geometry>
467 |       <material name="grey">
468 |         <color rgba="0.8 0.8 0.8 1.0" />
469 |       </material>
470 |     </collision>
471 |     <!-- Bullet specific paramters -->
472 |     <contact>
473 |       <lateral_friction value="1.0" />
474 |       <restitution value="0.5" />
475 |     </contact>
476 |   </link>
477 |   <!-- END UPPER LEG LINK -->
478 |   <!-- KFE: Joint between the upper leg and the lower leg -->
479 |   <joint name="HL_KFE" type="revolute">
480 |     <parent link="HL_UPPER_LEG" />
481 |     <child link="HL_LOWER_LEG" />
482 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
483 |     <!-- joints rotates around the y-axis -->
484 |     <axis xyz="0 1 0" />
485 |     <origin rpy="0 0 0" xyz="0 0.037450000000000004 -0.16" />
486 |     <!-- pybullet simulation parameters -->
487 |     <dynamics damping="0.0" friction="0.0" />
488 |   </joint>
489 |   <link name="HL_LOWER_LEG">
490 |     <!-- Left lower leg inertia -->
491 |     <inertial>
492 |       <mass value="0.02318294" />
493 |       <origin rpy="0 0 0" xyz="0.00000000 0.00776716 -0.07003876" />
494 |       <inertia ixx="0.00008508" ixy="0.00000000" ixz="0.00000000" iyy="0.00008580" iyz="-0.00000200"
495 |         izz="0.00000139" />
496 |     </inertial>
497 |     <!-- LOWER LEG LINK VISUAL -->
498 |     <visual>
499 |       <origin rpy="0 0 0" xyz="0 0 0" />
500 |       <geometry>
501 |         <mesh filename="meshes/stl/without_foot/solo_lower_leg_v2_left_side.stl" />
502 |       </geometry>
503 |       <material name="black">
504 |         <color rgba="0.2 0.2 0.2 1.0" />
505 |       </material>
506 |     </visual>
507 |     <!-- LOWER LEG LINK COLLISION -->
508 |     <collision>
509 |       <origin rpy="0 0 0" xyz="0 0 -0.08" />
510 |       <geometry>
511 |         <cylinder length="0.10" radius="0.01" />
512 |       </geometry>
513 |       <material name="grey">
514 |         <color rgba="0.8 0.8 0.8 1.0" />
515 |       </material>
516 |     </collision>
517 |     <!-- Bullet specific paramters -->
518 |     <contact>
519 |       <lateral_friction value="1.0" />
520 |       <restitution value="0.5" />
521 |     </contact>
522 |   </link>
523 |   <!-- END LOWER LEG LINK -->
524 |   <!-- Joint between the lower leg and the ankle,
525 |          this is only a dummy joint such that we
526 |          can assign coordinate frames to foot-->
527 |   <joint name="HL_ANKLE" type="fixed" dont_collapse="true">
528 |     <parent link="HL_LOWER_LEG" />
529 |     <child link="HL_FOOT" />
530 |     <origin rpy="0 0 0" xyz="0 0.009000000000000001 -0.16" />
531 |     <!-- Limits -->
532 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
533 |     <!-- pybullet simulation parameters -->
534 |     <dynamics damping="0.0" friction="0.0" />
535 |   </joint>
536 |   <link name="HL_FOOT">
537 |     <!-- FOOT INERTIAL -->
538 |     <inertial>
539 |       <origin rpy="0 0 0" xyz="0 0 0.00035767" />
540 |       <mass value="0.00693606" />
541 |       <inertia ixx="0.00000057" ixy="0.0" ixz="0.0" iyy="0.00000084" iyz="0.0" izz="0.00000053" />
542 |     </inertial>
543 |     <!-- FOOT LINK COLLISION -->
544 |     <collision>
545 |       <origin rpy="0 0 0" xyz="0 0 0" />
546 |       <geometry>
547 |         <sphere radius="0.02" />
548 |       </geometry>
549 |       <material name="grey">
550 |         <color rgba="0.8 0.8 0.8 1.0" />
551 |       </material>
552 |     </collision>
553 |     <!-- Bullet specific paramters -->
554 |     <contact>
555 |       <lateral_friction value="1.0" />
556 |       <restitution value="0.5" />
557 |     </contact>
558 |   </link>
559 |   <!-- END FOOT LINK -->
560 |   <joint name="HR_HAA" type="revolute">
561 |     <parent link="base_link" />
562 |     <child link="HR_SHOULDER" />
563 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
564 |     <!-- joints rotates around the x-axis -->
565 |     <axis xyz="1 0 0" />
566 |     <origin rpy="0 0 0" xyz="-0.2141 -0.08750000000000001 0" />
567 |     <!-- pybullet simulation parameters -->
568 |     <dynamics damping="0.0" friction="0.0" />
569 |   </joint>
570 |   <link name="HR_SHOULDER">
571 |     <!-- HAA VISUAL -->
572 |     <visual>
573 |       <origin rpy="0 0 0" xyz="-0.0 0 0" />
574 |       <geometry>
575 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_hr.stl" />
576 |       </geometry>
577 |       <material name="black">
578 |         <color rgba="0.2 0.2 0.2 1.0" />
579 |       </material>
580 |     </visual>
581 |     <!-- HAA LINK COLLISION -->
582 |     <!-- <collision>
583 |       <origin rpy="0 0 0" xyz="-0.0 0 0" />
584 |       <geometry>
585 |         <mesh filename="meshes/stl/solo12/solo12_hip_fe_hr.stl" />
586 |       </geometry>
587 |       <material name="grey">
588 |         <color rgba="0.8 0.8 0.8 1.0" />
589 |       </material>
590 |     </collision> -->
591 |     <inertial>
592 |       <mass value="0.14196048" />
593 |       <!-- HAA body inertia -->
594 |       <origin rpy="0 0 0" xyz="0.09407251 0.00438935 0.00001384" />
595 |       <inertia ixx="0.00002802" ixy="0.00003687" ixz="0.00000009" iyy="0.00038264" iyz="0.00000000"
596 |         izz="0.00038050" />
597 |     </inertial>
598 |   </link>
599 |   <joint name="HR_HFE" type="revolute">
600 |     <parent link="HR_SHOULDER" />
601 |     <child link="HR_UPPER_LEG" />
602 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
603 |     <!-- joints rotates around the y-axis -->
604 |     <axis xyz="0 1 0" />
605 |     <origin rpy="0 0 0" xyz="0.0195 -0.014 0" />
606 |     <!-- pybullet simulation parameters -->
607 |     <dynamics damping="0.0" friction="0.0" />
608 |   </joint>
609 |   <link name="HR_UPPER_LEG">
610 |     <!-- Right upper leg inertia -->
611 |     <inertial>
612 |       <mass value="0.14737324" />
613 |       <origin rpy="0 0 0" xyz="-0.00001530 -0.01767640 -0.07838230" />
614 |       <inertia ixx="0.00041540" ixy="0.00000000" ixz="-0.00000010" iyy="0.00041637" iyz="0.00004589"
615 |         izz="0.00002982" />
616 |     </inertial>
617 |     <!-- UPPER LEG LINK VISUAL -->
618 |     <visual>
619 |       <origin rpy="0 0 0" xyz="0 0 0" />
620 |       <geometry>
621 |         <mesh filename="meshes/stl/without_foot/solo_upper_leg_right_side.stl" />
622 |       </geometry>
623 |       <material name="red">
624 |         <color rgba="0.8 0.0 0.0 1.0" />
625 |       </material>
626 |     </visual>
627 |     <!-- UPPER LEG LINK COLLISION -->
628 |     <collision>
629 |       <origin rpy="0 0 0" xyz="0 0 -0.08" />
630 |       <!-- <geometry>
631 |         <mesh filename="meshes/stl/without_foot/solo_upper_leg_right_side.stl" />
632 |       </geometry> -->
633 |       <geometry>
634 |         <cylinder length="0.16" radius="0.02" />
635 |       </geometry>
636 |       <material name="grey">
637 |         <color rgba="0.8 0.8 0.8 1.0" />
638 |       </material>
639 |     </collision>
640 |     <!-- Bullet specific paramters -->
641 |     <contact>
642 |       <lateral_friction value="1.0" />
643 |       <restitution value="0.5" />
644 |     </contact>
645 |   </link>
646 |   <!-- END UPPER LEG LINK -->
647 |   <!-- KFE: Joint between the upper leg and the lower leg -->
648 |   <joint name="HR_KFE" type="revolute">
649 |     <parent link="HR_UPPER_LEG" />
650 |     <child link="HR_LOWER_LEG" />
651 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
652 |     <!-- joints rotates around the y-axis -->
653 |     <axis xyz="0 1 0" />
654 |     <origin rpy="0 0 0" xyz="0 -0.037450000000000004 -0.16" />
655 |     <!-- pybullet simulation parameters -->
656 |     <dynamics damping="0.0" friction="0.0" />
657 |   </joint>
658 |   <link name="HR_LOWER_LEG">
659 |     <!-- Right lower leg inertia -->
660 |     <inertial>
661 |       <mass value="0.02318294" />
662 |       <origin rpy="0 0 0" xyz="0.00000000 -0.00776716 -0.07003876" />
663 |       <inertia ixx="0.00008508" ixy="0.00000000" ixz="0.00000000" iyy="0.00008580" iyz="0.00000200"
664 |         izz="0.00000139" />
665 |     </inertial>
666 |     <!-- LOWER LEG LINK VISUAL -->
667 |     <visual>
668 |       <origin rpy="0 0 0" xyz="0 0 0" />
669 |       <geometry>
670 |         <mesh filename="meshes/stl/without_foot/solo_lower_leg_v2_right_side.stl" />
671 |       </geometry>
672 |       <material name="black">
673 |         <color rgba="0.2 0.2 0.2 1.0" />
674 |       </material>
675 |     </visual>
676 |     <!-- LOWER LEG LINK COLLISION -->
677 |     <collision>
678 |       <origin rpy="0 0 0" xyz="0 0 -0.08" />
679 |       <geometry>
680 |         <cylinder length="0.10" radius="0.01" />
681 |       </geometry>
682 |       <material name="grey">
683 |         <color rgba="0.8 0.8 0.8 1.0" />
684 |       </material>
685 |     </collision>
686 |     <!-- Bullet specific paramters -->
687 |     <contact>
688 |       <lateral_friction value="1.0" />
689 |       <restitution value="0.5" />
690 |     </contact>
691 |   </link>
692 |   <!-- END LOWER LEG LINK -->
693 |   <!-- Joint between the lower leg and the ankle,
694 |          this is only a dummy joint such that we
695 |          can assign coordinate frames to foot-->
696 |   <joint name="HR_ANKLE" type="fixed" dont_collapse="true">
697 |     <parent link="HR_LOWER_LEG" />
698 |     <child link="HR_FOOT" />
699 |     <origin rpy="0 0 0" xyz="0 -0.009000000000000001 -0.16" />
700 |     <!-- Limits -->
701 |     <limit effort="3" lower="-10" upper="10" velocity="20" />
702 |     <!-- pybullet simulation parameters -->
703 |     <dynamics damping="0.0" friction="0.0" />
704 |   </joint>
705 |   <link name="HR_FOOT">
706 |     <!-- FOOT INERTIAL -->
707 |     <inertial>
708 |       <origin rpy="0 0 0" xyz="0 0 0.00035767" />
709 |       <mass value="0.00693606" />
710 |       <inertia ixx="0.00000057" ixy="0.0" ixz="0.0" iyy="0.00000084" iyz="0.0" izz="0.00000053" />
711 |     </inertial>
712 |     <!-- FOOT LINK COLLISION -->
713 |     <collision>
714 |       <origin rpy="0 0 0" xyz="0 0 0" />
715 |       <geometry>
716 |         <sphere radius="0.02" />
717 |       </geometry>
718 |       <material name="grey">
719 |         <color rgba="0.8 0.8 0.8 1.0" />
720 |       </material>
721 |     </collision>
722 |     <!-- Bullet specific paramters -->
723 |     <contact>
724 |       <lateral_friction value="1.0" />
725 |       <restitution value="0.5" />
726 |     </contact>
727 |   </link>
728 |   <!-- END FOOT LINK -->
729 | </robot>
730 | 


--------------------------------------------------------------------------------
/algos/DDPG_demos_rnn_vision.py:
--------------------------------------------------------------------------------
  1 | import gym
  2 | 
  3 | import os
  4 | import random
  5 | import time
  6 | from tqdm.auto import tqdm
  7 | import pickle as pkl
  8 | import itertools
  9 | 
 10 | from typing import NamedTuple
 11 | 
 12 | from omegaconf import DictConfig, OmegaConf
 13 | 
 14 | import numpy as np
 15 | 
 16 | import torch
 17 | import torch.nn as nn
 18 | import torch.nn.functional as F
 19 | import torch.optim as optim
 20 | from torch.distributions.normal import Normal
 21 | from torch.utils.tensorboard import SummaryWriter
 22 | 
 23 | class SeqReplayBufferSamples(NamedTuple):
 24 |     observations: torch.Tensor
 25 |     privileged_observations: torch.Tensor
 26 |     vision_observations: torch.Tensor
 27 |     actions: torch.Tensor
 28 |     next_observations: torch.Tensor
 29 |     privileged_next_observations: torch.Tensor
 30 |     vision_next_observations: torch.Tensor
 31 |     cat_dones: torch.Tensor
 32 |     raw_constraints: torch.Tensor
 33 |     dones: torch.Tensor
 34 |     rewards: torch.Tensor
 35 |     p_ini_hidden_in: torch.Tensor
 36 |     p_ini_hidden_out: torch.Tensor
 37 |     mask: torch.Tensor
 38 | 
 39 | class SeqReplayBuffer():
 40 |     def __init__(
 41 |         self,
 42 |         buffer_size,
 43 |         observation_space,
 44 |         privileged_observation_space,
 45 |         vision_space,
 46 |         action_space,
 47 |         num_constraints,
 48 |         max_episode_length,
 49 |         seq_len,
 50 |         num_envs,
 51 |         storing_device = "cpu",
 52 |         training_device = "cpu",
 53 |         handle_timeout_termination = True,
 54 |     ):
 55 |         self.buffer_size = buffer_size
 56 |         self.max_episode_length = max_episode_length
 57 |         self.seq_len = seq_len
 58 |         self.observation_space = observation_space
 59 |         self.privileged_observation_space = privileged_observation_space.shape
 60 |         self.vision_space = vision_space
 61 |         self.action_space = action_space
 62 |         self.num_envs = num_envs
 63 | 
 64 |         self.num_constraints = num_constraints
 65 | 
 66 |         self.action_dim = int(np.prod(action_space.shape))
 67 |         self.pos = 0
 68 |         self.full = False
 69 |         self.storing_device = storing_device
 70 |         self.training_device = training_device
 71 | 
 72 |         self.observations = torch.zeros((self.buffer_size, *self.observation_space), dtype=torch.float32, device = storing_device)
 73 |         self.next_observations = torch.zeros((self.buffer_size, *self.observation_space), dtype=torch.float32, device = storing_device)
 74 |         self.privileged_observations = torch.zeros((self.buffer_size, *self.privileged_observation_space), dtype=torch.float32, device = storing_device)
 75 |         self.privileged_next_observations = torch.zeros((self.buffer_size, *self.privileged_observation_space), dtype=torch.float32, device = storing_device)
 76 | 
 77 |         self.vision_observations = torch.zeros((self.buffer_size // 5, *self.vision_space), dtype = torch.uint8)
 78 |         self.next_vision_observations = torch.zeros_like(self.vision_observations)
 79 | 
 80 |         self.actions = torch.zeros((self.buffer_size, self.action_dim), dtype=torch.float32, device = storing_device)
 81 |         self.rewards = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device)
 82 |         self.cat_dones = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device)
 83 |         self.raw_constraints = torch.zeros((self.buffer_size, self.num_constraints), dtype=torch.float32, device = storing_device)
 84 |         self.dones = torch.zeros((self.buffer_size,), dtype=torch.bool, device = storing_device)
 85 |         self.p_ini_hidden_in = torch.zeros((self.buffer_size, 1, 256), dtype=torch.float32, device = storing_device)
 86 | 
 87 |         # For the current episodes that started being added to the replay buffer
 88 |         # but aren't done yet. We want to still sample from them, however the masking
 89 |         # needs a termination point to not overlap to the next episode when full or even to the empty
 90 |         # part of the buffer when not full.
 91 |         self.markers = torch.zeros((self.buffer_size,), dtype=torch.bool, device = storing_device)
 92 |         self.started_adding = False
 93 | 
 94 |         # Handle timeouts termination properly if needed
 95 |         # see https://github.com/DLR-RM/stable-baselines3/issues/284
 96 |         self.handle_timeout_termination = handle_timeout_termination
 97 |         self.timeouts = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device)
 98 | 
 99 |     def add(
100 |         self,
101 |         obs,
102 |         privi_obs,
103 |         vobs,
104 |         next_obs,
105 |         privi_next_obs,
106 |         next_vobs,
107 |         action,
108 |         reward,
109 |         cat_done,
110 |         raw_constraints,
111 |         done,
112 |         p_ini_hidden_in,
113 |         truncateds = None
114 |     ):
115 |         start_idx = self.pos
116 |         stop_idx = min(self.pos + obs.shape[0], self.buffer_size)
117 |         b_max_idx = stop_idx - start_idx
118 | 
119 |         overflow = False
120 |         overflow_size = 0
121 |         if self.pos + obs.shape[0] > self.buffer_size:
122 |             overflow = True
123 |             overflow_size = self.pos + obs.shape[0] - self.buffer_size
124 | 
125 |         assert start_idx % self.num_envs == 0, f"start_idx is not a multiple of {self.num_envs}"
126 |         assert stop_idx % self.num_envs == 0, f"stop_idx is not a multiple of {self.num_envs}"
127 |         assert b_max_idx == 0 or b_max_idx == self.num_envs, f"b_max_idx is not either 0 or {self.num_envs}"
128 | 
129 |         # Copy to avoid modification by reference
130 |         self.observations[start_idx : stop_idx] = obs[: b_max_idx].clone().to(self.storing_device)
131 |         self.vision_observations[(start_idx // (self.num_envs * 5)) * self.num_envs : (start_idx // (self.num_envs * 5)) * self.num_envs + b_max_idx] = \
132 |             vobs[: b_max_idx].clone().to(self.storing_device)
133 | 
134 |         self.next_observations[start_idx : stop_idx] = next_obs[: b_max_idx].clone().to(self.storing_device)
135 |         self.next_vision_observations[(start_idx // (self.num_envs * 5)) * self.num_envs : (start_idx // (self.num_envs * 5)) * self.num_envs + b_max_idx] = \
136 |             next_vobs[: b_max_idx].clone().to(self.storing_device)
137 | 
138 |         self.privileged_observations[start_idx : stop_idx] = privi_obs[: b_max_idx].clone().to(self.storing_device)
139 |         self.privileged_next_observations[start_idx : stop_idx] = privi_next_obs[: b_max_idx].clone().to(self.storing_device)
140 |         self.actions[start_idx : stop_idx] = action[: b_max_idx].clone().to(self.storing_device)
141 |         self.rewards[start_idx : stop_idx] = reward[: b_max_idx].clone().to(self.storing_device)
142 |         self.cat_dones[start_idx : stop_idx] = cat_done[: b_max_idx].clone().to(self.storing_device)
143 |         self.raw_constraints[start_idx : stop_idx] = raw_constraints[: b_max_idx].clone().to(self.storing_device)
144 |         self.dones[start_idx : stop_idx] = done[: b_max_idx].clone().to(self.storing_device)
145 |         self.p_ini_hidden_in[start_idx : stop_idx] = p_ini_hidden_in.swapaxes(0, 1)[: b_max_idx].clone().to(self.storing_device)
146 | 
147 |         # Current episodes last transition marker
148 |         self.markers[start_idx : stop_idx] = 1
149 |         # We need to unmark previous transitions as last
150 |         # but only if it is not the first add to the replay buffer
151 |         if self.started_adding:
152 |             self.markers[self.prev_start_idx : self.prev_stop_idx] = 0
153 |             if self.prev_overflow:
154 |                 self.markers[: self.prev_overflow_size] = 0
155 |         self.started_adding = True
156 |         self.prev_start_idx = start_idx
157 |         self.prev_stop_idx = stop_idx
158 |         self.prev_overflow = overflow
159 |         self.prev_overflow_size = overflow_size
160 | 
161 |         if self.handle_timeout_termination:
162 |             self.timeouts[start_idx : stop_idx] = truncateds[: b_max_idx].clone().to(self.storing_device)
163 | 
164 |         assert overflow_size == 0 or overflow_size == self.num_envs, f"overflow_size is not either 0 or {self.num_envs}"
165 |         if overflow:
166 |             self.full = True
167 |             self.observations[: overflow_size] = obs[b_max_idx :].clone().to(self.storing_device)
168 |             self.vision_observations[: overflow_size] = vobs[b_max_idx :].clone().to(self.storing_device)
169 | 
170 |             self.next_observations[: overflow_size] = next_obs[b_max_idx :].clone().to(self.storing_device)
171 |             self.next_vision_observations[: overflow_size] = next_vobs[b_max_idx :].clone().to(self.storing_device)
172 | 
173 |             self.privileged_observations[: overflow_size] = privi_obs[b_max_idx :].clone().to(self.storing_device)
174 |             self.privileged_next_observations[: overflow_size] = privi_next_obs[b_max_idx :].clone().to(self.storing_device)
175 |             self.actions[: overflow_size] = action[b_max_idx :].clone().to(self.storing_device)
176 |             self.rewards[: overflow_size] = reward[b_max_idx :].clone().to(self.storing_device)
177 |             self.cat_dones[: overflow_size] = cat_done[b_max_idx :].clone().to(self.storing_device)
178 |             self.raw_constraints[: overflow_size] = raw_constraints[b_max_idx :].clone().to(self.storing_device)
179 |             self.dones[: overflow_size] = done[b_max_idx :].clone().to(self.storing_device)
180 |             self.p_ini_hidden_in[: overflow_size] = p_ini_hidden_in.swapaxes(0, 1)[b_max_idx :].clone().to(self.storing_device)
181 | 
182 |             # Current episodes last transition marker
183 |             self.markers[: overflow_size] = 1
184 |             if self.handle_timeout_termination:
185 |                 self.timeouts[: overflow_size] = truncateds[b_max_idx :].clone().to(self.storing_device)
186 |             self.pos = overflow_size
187 |         else:
188 |             self.pos += obs.shape[0]
189 | 
190 |     def sample(self, batch_size) -> SeqReplayBufferSamples:
191 |         upper_bound = self.buffer_size if self.full else self.pos
192 |         batch_inds = torch.randint(0, upper_bound, size = (batch_size,), device = self.storing_device)
193 |         return self._get_samples(batch_inds)
194 | 
195 |     def _get_samples(self, batch_inds) -> SeqReplayBufferSamples:
196 |         # Using modular arithmetic we get the indices of all the transitions of the episode starting from batch_inds
197 |         # we get "episodes" of length self.seq_len, but their true length may be less, they can have ended before that
198 |         # we'll deal with that using a mask
199 |         # Using flat indexing we can actually slice through a tensor using
200 |         # different starting points for each dimension of an axis
201 |         # as long as the slice size remains constant
202 |         # [1, 2, 3].repeat_interleave(3) -> [1, 1, 1, 2, 2, 2, 3, 3, 3]
203 |         # [1, 2, 3].repeat(3) -> [1, 2, 3, 1, 2, 3, 1, 2, 3]
204 |         all_indices_flat = (batch_inds.repeat_interleave(self.seq_len) + torch.arange(self.seq_len, device = self.storing_device).repeat(batch_inds.shape[0]) * self.num_envs) % self.buffer_size
205 |         #all_indices_next_flat = (batch_inds.repeat_interleave(self.seq_len) + torch.arange(1, self.seq_len + 1, device = self.device).repeat(batch_inds.shape[0]) * self.num_envs) % self.buffer_size
206 |         gathered_obs = self.observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.observations.shape[1:]))
207 |         gathered_next_obs = self.next_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.observations.shape[1:]))
208 | 
209 |         gathered_privi_obs = self.privileged_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.privileged_observations.shape[1:]))
210 |         gathered_privi_next_obs = self.privileged_next_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.privileged_observations.shape[1:]))
211 | 
212 |         all_indices_flat_vision = ( (((batch_inds // self.num_envs) // 5) * self.num_envs + batch_inds % self.num_envs).repeat_interleave((self.seq_len // 5)) + \
213 |             torch.arange(start = 0, end = (self.seq_len // 5), device = self.storing_device).repeat(batch_inds.shape[0]) * self.num_envs) % (self.buffer_size // 5)
214 |         gathered_vobs = self.vision_observations[all_indices_flat_vision].reshape((batch_inds.shape[0], (self.seq_len // 5), *self.vision_observations.shape[1:]))
215 |         gathered_next_vobs = self.next_vision_observations[all_indices_flat_vision].reshape((batch_inds.shape[0], (self.seq_len // 5), *self.vision_observations.shape[1:]))
216 | 
217 |         gathered_actions = self.actions[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.actions.shape[1:]))
218 |         gathered_cat_dones = self.cat_dones[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
219 |         gathered_raw_constraints = self.raw_constraints[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, self.num_constraints))
220 |         gathered_dones = self.dones[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
221 |         gathered_truncateds = self.timeouts[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
222 |         gathered_rewards = self.rewards[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
223 | 
224 |         gathered_p_ini_hidden_in = self.p_ini_hidden_in[batch_inds].swapaxes(0, 1)
225 |         gathered_p_ini_hidden_out = self.p_ini_hidden_in[(batch_inds + self.num_envs) % self.buffer_size].swapaxes(0, 1)
226 | 
227 |         gathered_markers = self.markers[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
228 |         mask = torch.cat([
229 |             torch.ones((batch_inds.shape[0], 1), device = self.storing_device),
230 |             (1 - (gathered_dones | gathered_markers).float()).cumprod(dim = 1)[:, 1:]
231 |         ], dim = 1)
232 |         data = (
233 |             gathered_obs.to(self.training_device),
234 |             gathered_privi_obs.to(self.training_device),
235 |             gathered_vobs.to(self.training_device),
236 |             gathered_actions.to(self.training_device),
237 |             gathered_next_obs.to(self.training_device),
238 |             gathered_privi_next_obs.to(self.training_device),
239 |             gathered_next_vobs.to(self.training_device),
240 |             gathered_cat_dones.to(self.training_device),
241 |             gathered_raw_constraints.to(self.training_device),
242 |             # Only use dones that are not due to timeouts
243 |             # deactivated by default (timeouts is initialized as an array of False)
244 |             (gathered_dones.float() * (1 - gathered_truncateds)).to(self.training_device),
245 |             gathered_rewards.to(self.training_device),
246 |             gathered_p_ini_hidden_in.to(self.training_device),
247 |             gathered_p_ini_hidden_out.to(self.training_device),
248 |             mask.to(self.training_device),
249 |         )
250 |         return SeqReplayBufferSamples(*data)
251 | 
252 | def random_translate(imgs, pad = 4):
253 |     n, c, h, w = imgs.size()
254 |     imgs = torch.nn.functional.pad(imgs, (pad, pad, pad, pad)) #, mode = "replicate")
255 |     w1 = torch.randint(0, 2*pad + 1, (n,))
256 |     h1 = torch.randint(0, 2*pad + 1, (n,))
257 |     cropped = torch.empty((n, c, h, w), dtype=imgs.dtype, device=imgs.device)
258 |     for i, (img, w11, h11) in enumerate(zip(imgs, w1, h1)):
259 |         cropped[i][:] = img[:, h11:h11 + h, w11:w11 + w]
260 |     return cropped
261 | 
262 | def random_noise(images, noise_std=0.02):
263 |     N, C, H, W = images.size()
264 |     noise = noise_std*torch.randn_like(images)
265 |     noise *= torch.bernoulli(0.5*torch.ones(N, 1, 1, 1).to(images)) # Only applied on half of the images
266 |     return images + noise
267 | 
268 | def random_shift(images, padding=4):
269 |     N, C, H, W = images.size()
270 |     padded_images = torch.nn.functional.pad(images, (padding, padding, padding, padding), mode='replicate')
271 |     crop_x = torch.randint(0, 2 * padding, (N,))
272 |     crop_y = torch.randint(0, 2 * padding, (N,))
273 |     shifted_images = torch.zeros_like(images)
274 |     for i in range(N):
275 |         shifted_images[i] = padded_images[i, :, crop_y[i]:crop_y[i] + H, crop_x[i]:crop_x[i] + W]
276 |     return shifted_images
277 | 
278 | def random_cutout(images, min_size=2, max_size=24):
279 |     N, C, H, W = images.size()
280 |     for i in range(N):
281 |         size_h = random.randint(min_size, max_size)
282 |         size_w = random.randint(min_size, max_size)
283 |         top = random.randint(0, H - size_h)
284 |         left = random.randint(0, W - size_w)
285 |         coin_flip = random.random()
286 |         if coin_flip < 0.2:
287 |             fill_value = 0.0
288 |             images[i, :, top:top + size_h, left:left + size_w] = fill_value
289 |         elif coin_flip < 0.4:
290 |             fill_value = 1.0
291 |             images[i, :, top:top + size_h, left:left + size_w] = fill_value
292 |         elif coin_flip < 0.6:
293 |             fill_value = torch.rand((C, size_h, size_w), device=images.device)
294 |             images[i, :, top:top + size_h, left:left + size_w] = fill_value
295 |     return images
296 | 
297 | class DepthOnlyFCBackbone58x87(nn.Module):
298 |     def __init__(self, scandots_output_dim, seq_len, batch_size):
299 |         super().__init__()
300 | 
301 |         activation = nn.ELU()
302 |         self.image_compression = nn.Sequential(
303 |             nn.Conv2d(1, 16, 5), nn.LeakyReLU(), nn.MaxPool2d(2),
304 |             nn.Conv2d(16, 32, 4), nn.LeakyReLU(), nn.MaxPool2d(2),
305 |             nn.Conv2d(32, 32, 3), nn.LeakyReLU(),
306 |             nn.Flatten(),
307 |             nn.Linear(1568, scandots_output_dim), # 48x48
308 |             nn.LeakyReLU(),
309 |             nn.Linear(128, scandots_output_dim)
310 |         )
311 | 
312 |         self.output_activation = activation
313 | 
314 |     def forward(self, vobs, hist = True, augment = False):
315 |         bs, seql, w, h = vobs.size()
316 | 
317 |         vobs = vobs.view(-1, 1, w, h)
318 |         if augment:
319 |             vobs = random_cutout(random_noise(random_shift(vobs)))
320 | 
321 |         vision_latent = self.output_activation(self.image_compression(vobs))
322 |         vision_latent = vision_latent.view(bs, seql, 128)
323 | 
324 |         if hist:
325 |             vision_latent = vision_latent.repeat_interleave(5, axis = 1)
326 | 
327 |         return vision_latent
328 | 
329 | class ExtraCorpusQMemory(nn.Module):
330 |     def __init__(self, seq_len, batch_size):
331 |         super().__init__()
332 |         self.vision = DepthOnlyFCBackbone58x87(128, seq_len, batch_size)
333 |         self.memory = nn.GRU(128, hidden_size = 256, batch_first = True)
334 | 
335 |     def forward(self, x, hidden_in):
336 |         if hidden_in is None:
337 |             raise NotImplementedError
338 | 
339 |         vision_latent = self.vision(x)
340 |         time_latent, hidden_out = self.memory(vision_latent, hidden_in)
341 |         return time_latent, hidden_out
342 | 
343 | class QNetworkVanilla(nn.Module):
344 |     def __init__(self, env, device):
345 |         super().__init__()
346 |         self.memory = nn.GRU(np.array(env.single_observation_space.shape).prod() + np.prod(env.single_action_space.shape), hidden_size = 256, batch_first = True) # dummy memory for compatibility
347 |         self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod() + np.prod(env.single_action_space.shape), 512)
348 |         self.ln1 = nn.LayerNorm(512)
349 |         self.fc2 = nn.Linear(512, 256)
350 |         self.ln2 = nn.LayerNorm(256)
351 |         self.fc3 = nn.Linear(256, 128)
352 |         self.ln3 = nn.LayerNorm(128)
353 |         self.fc4 = nn.Linear(128, 1)
354 | 
355 |     def forward(self, x, a, latent):
356 |         x = torch.cat([x, a], -1)
357 |         x = F.elu(self.ln1(self.fc1(x)))
358 |         x = F.elu(self.ln2(self.fc2(x)))
359 |         x = F.elu(self.ln3(self.fc3(x)))
360 |         x = self.fc4(x)
361 |         return x, None
362 | 
363 | class Actor(nn.Module):
364 |     def __init__(self, env):
365 |         super().__init__()
366 |         self.memory = nn.GRU(128 + 45, hidden_size = 256, batch_first = True)
367 |         self.fc1 = nn.Linear(256, 512)
368 |         self.fc2 = nn.Linear(512, 256)
369 |         self.fc3 = nn.Linear(256, 128)
370 |         self.fc_mu = nn.Linear(128, np.prod(env.single_action_space.shape))
371 | 
372 |         # action rescaling
373 |         self.register_buffer(
374 |             "action_scale", torch.tensor((env.action_space.high - env.action_space.low) / 2.0, dtype=torch.float32)
375 |         )
376 |         self.register_buffer(
377 |             "action_bias", torch.tensor((env.action_space.high + env.action_space.low) / 2.0, dtype=torch.float32)
378 |         )
379 | 
380 |     def forward(self, x, vision_latent, hidden_in):
381 |         if hidden_in is None:
382 |             raise NotImplementedError
383 | 
384 |         x = torch.cat([x, vision_latent], -1)
385 |         time_latent, hidden_out = self.memory(x, hidden_in)
386 |         x = F.elu(self.fc1(time_latent))
387 |         x = F.elu(self.fc2(x))
388 |         x = F.elu(self.fc3(x))
389 |         x = torch.tanh(self.fc_mu(x))
390 | 
391 |         return x * self.action_scale + self.action_bias, hidden_out, None
392 | 
393 | class ExtractObsWrapper(gym.ObservationWrapper):
394 |     def observation(self, obs):
395 |         return obs["obs"]
396 | 
397 | def DDPG_demos_rnn_vision(cfg: DictConfig, envs):
398 |     TOTAL_TIMESTEPS = int(cfg["train"]["params"]["config"]["total_timesteps"])
399 |     CRITIC_LEARNING_RATE = cfg["train"]["params"]["config"]["critic_learning_rate"]
400 |     ACTOR_LEARNING_RATE = cfg["train"]["params"]["config"]["actor_learning_rate"]
401 |     BUFFER_SIZE = int(cfg["train"]["params"]["config"]["buffer_size"])
402 |     LEARNING_STARTS = cfg["train"]["params"]["config"]["learning_starts"]
403 |     GAMMA = cfg["train"]["params"]["config"]["gamma"]
404 |     POLICY_FREQUENCY = cfg["train"]["params"]["config"]["policy_frequency"]
405 |     TAU = cfg["train"]["params"]["config"]["tau"]
406 |     BATCH_SIZE = cfg["train"]["params"]["config"]["batch_size"]
407 |     POLICY_NOISE = cfg["train"]["params"]["config"]["policy_noise"]
408 |     NOISE_CLIP = cfg["train"]["params"]["config"]["noise_clip"]
409 |     DEMOS_RB_PATH = cfg["train"]["params"]["config"]["demos_rb_path"]
410 |     MAX_EPISODE_LENGTH = 500
411 |     SEQ_LEN = cfg["train"]["params"]["config"]["seq_len"]
412 |     EVAL_AT_END = True
413 |     NUM_CONSTRAINTS = 107
414 |     CRITIC_NB = 10
415 |     num_atoms = 101
416 |     vis_h = 48
417 |     vis_w = 48
418 | 
419 |     assert SEQ_LEN % 5 == 0, "SEQ_LEN must be a multiple of 5"
420 |     assert BUFFER_SIZE % (5 * envs.num_envs) == 0, "BUFFER_SIZE must be a multiple of 5 * num_envs"
421 | 
422 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
423 | 
424 |     run_path = f"runs/{cfg['task']['name']}_DDPG_demos_rnn_redq_D405_z0.03_x0.24_parkour40_CORR_AUG_48x48_bigconv_seqlen{SEQ_LEN}_bs{BATCH_SIZE}_seed{cfg.seed}_{int(time.time())}"
425 |     writer = SummaryWriter(run_path)
426 | 
427 |     if not os.path.exists(run_path):
428 |         os.makedirs(run_path)
429 | 
430 |     OmegaConf.save(config = cfg, f = f"{run_path}/config.yaml")
431 | 
432 |     envs.single_action_space = envs.action_space
433 |     envs.single_observation_space = envs.observation_space
434 | 
435 |     envs = ExtractObsWrapper(envs)
436 | 
437 |     rb_demos = pkl.load(open(f"{DEMOS_RB_PATH}/rb_demos.pkl", "rb"))
438 |     ######
439 |     rb_demos.seq_len = SEQ_LEN
440 |     ######
441 |     actions_min, actions_max = torch.load(f"{DEMOS_RB_PATH}/ppo_actions_minmax.pt")
442 |     actions_min, actions_max = actions_min.to(device), actions_max.to(device)
443 |     envs.action_space.low = actions_min.cpu().numpy()
444 |     envs.action_space.high = actions_max.cpu().numpy()
445 |     envs.single_action_space = envs.action_space
446 |     envs.single_observation_space = envs.observation_space
447 | 
448 |     vision_nn = DepthOnlyFCBackbone58x87(128, SEQ_LEN, BATCH_SIZE).to(device)
449 |     actor = Actor(envs).to(device)
450 | 
451 |     QNetwork = QNetworkVanilla
452 |     qfs = [QNetwork(envs, device = device).to(device) for _ in range(CRITIC_NB)]
453 |     qf_targets = [QNetwork(envs, device = device).to(device) for _ in range(CRITIC_NB)]
454 |     for i in range(CRITIC_NB):
455 |         qf_targets[i].load_state_dict(qfs[i].state_dict())
456 | 
457 |     q_optimizer = optim.Adam(itertools.chain(*([q.parameters() for q in qfs])), lr=CRITIC_LEARNING_RATE)
458 |     qf1 = QNetwork(envs, device = device).to(device)
459 |     qf2 = QNetwork(envs, device = device).to(device)
460 |     actor_optimizer = optim.Adam(list(actor.parameters()) + list(vision_nn.parameters()), lr=ACTOR_LEARNING_RATE)
461 | 
462 |     envs.single_observation_space.dtype = np.float32
463 |     rb = SeqReplayBuffer(
464 |         BUFFER_SIZE,
465 |         (45,),
466 |         envs.single_observation_space,
467 |         (vis_h, vis_w),
468 |         envs.single_action_space,
469 |         NUM_CONSTRAINTS,
470 |         MAX_EPISODE_LENGTH,
471 |         SEQ_LEN,
472 |         envs.num_envs,
473 |         storing_device = "cpu",
474 |         training_device = device,
475 |         handle_timeout_termination=True,
476 |     )
477 |     start_time = time.time()
478 | 
479 |     # TRY NOT TO MODIFY: start the game
480 |     obs_privi = envs.reset()
481 |     obs = obs_privi.clone()[:, : 45]
482 |     vobs = torch.zeros((envs.num_envs, vis_h, vis_w), device = device)
483 |     next_vobs = vobs.clone()
484 |     vision_latent = None
485 | 
486 |     isaac_env_steps = TOTAL_TIMESTEPS // envs.num_envs
487 |     print(f"Starting training for {isaac_env_steps} isaac env steps")
488 | 
489 |     gru_p_hidden_in = torch.zeros((actor.memory.num_layers, envs.num_envs, actor.memory.hidden_size), device = device) # p for policy
490 |     gru_p_hidden_out = gru_p_hidden_in.clone()
491 | 
492 |     true_steps_num = 0
493 |     actor_training_step = 0
494 |     for global_step in range(isaac_env_steps):
495 |         # ALGO LOGIC: put action logic here
496 |         with torch.no_grad():
497 |             if global_step % 5 == 0:
498 |                 vision_latent = vision_nn(vobs.unsqueeze(1), hist = False)
499 |         if true_steps_num < LEARNING_STARTS:
500 |             actions = torch.zeros((envs.num_envs, rb.action_dim), device = device).uniform_(-1, 1)
501 |         else:
502 |             with torch.no_grad():
503 |                 actions, gru_p_hidden_out, _ = actor(obs.unsqueeze(1), vision_latent, gru_p_hidden_in)
504 |                 actions = actions.squeeze(1)
505 | 
506 |         true_steps_num += envs.num_envs
507 | 
508 |         # TRY NOT TO MODIFY: execute the game and log data.
509 |         next_obs_privi, rewards, terminations, infos = envs.step(actions)
510 |         true_dones = infos["true_dones"]
511 |         truncateds = infos["truncateds"].float()
512 |         raw_constraints = infos["raw_constraints"]
513 | 
514 |         next_obs = next_obs_privi.clone()[:, : 45]
515 | 
516 |         real_next_obs_privi = next_obs_privi.clone()
517 |         real_next_obs = next_obs.clone()
518 |         if(true_dones.any()):
519 |             true_dones_idx = torch.argwhere(true_dones).squeeze()
520 |             gru_p_hidden_out[:, true_dones_idx] = 0
521 | 
522 |         if "depth" in infos:
523 |             next_vobs = infos["depth"].clone()[..., 19:-18]
524 | 
525 |         rb.add(obs, obs_privi, (vobs * 255).to(torch.uint8), real_next_obs, real_next_obs_privi, (next_vobs * 255).to(torch.uint8), \
526 |             actions, rewards, terminations, raw_constraints, true_dones, gru_p_hidden_in, truncateds)
527 |         gru_p_hidden_in = gru_p_hidden_out
528 | 
529 |         if (global_step + 1) % 24 == 0:
530 |             for el, v in zip(list(envs.episode_sums.keys())[:envs.numRewards], (torch.mean(envs.rew_mean_reset, dim=0)).tolist()):
531 |                 writer.add_scalar(f"reward/{el}", v, (global_step + 1) // 24)
532 |             writer.add_scalar(f"reward/cum_rew", (torch.sum(torch.mean(envs.rew_cum_reset, dim=0))).item(), (global_step + 1) // 24)
533 |             writer.add_scalar(f"reward/avg_rew", envs.terrain_levels.float().mean().item(), (global_step + 1) // 24)
534 |             for el, v in zip(envs.cstr_manager.get_names(), (100.0 * torch.mean(envs.cstr_mean_reset, dim=0)).tolist()):
535 |                 writer.add_scalar(f"cstr/{el}", v, (global_step + 1) // 24)
536 | 
537 |         # TRY NOT TO MODIFY: CRUCIAL step easy to overlook
538 |         obs_privi = next_obs_privi
539 |         obs = next_obs
540 |         vobs = next_vobs
541 | 
542 |         # ALGO LOGIC: training.
543 |         if true_steps_num > LEARNING_STARTS:
544 |             for local_steps in range(8):
545 |                 data = rb.sample(BATCH_SIZE // 2)
546 |                 data_expert = rb_demos.sample(BATCH_SIZE // 2)
547 |                 data = SeqReplayBufferSamples(
548 |                     observations=torch.cat([data.observations, data_expert.observations], dim=0),
549 |                     privileged_observations=torch.cat([data.privileged_observations, data_expert.privileged_observations], dim=0),
550 |                     vision_observations=torch.cat([data.vision_observations, data_expert.vision_observations], dim=0).float() / 255,
551 |                     actions=torch.cat([data.actions, data_expert.actions], dim=0),
552 |                     next_observations=torch.cat([data.next_observations, data_expert.next_observations], dim=0),
553 |                     privileged_next_observations=torch.cat([data.privileged_next_observations, data_expert.privileged_next_observations], dim=0),
554 |                     vision_next_observations=torch.cat([data.vision_next_observations, data_expert.vision_next_observations], dim=0).float() / 255,
555 |                     cat_dones=torch.cat([data.cat_dones, data_expert.cat_dones], dim=0),
556 |                     raw_constraints=torch.cat([data.raw_constraints, data_expert.raw_constraints], dim=0),
557 |                     dones=torch.cat([data.dones, data_expert.dones], dim=0),
558 |                     rewards=torch.cat([data.rewards, data_expert.rewards], dim=0),
559 |                     p_ini_hidden_in=torch.cat([data.p_ini_hidden_in, data_expert.p_ini_hidden_in], dim=1),
560 |                     p_ini_hidden_out=torch.cat([data.p_ini_hidden_out, data_expert.p_ini_hidden_out], dim=1),
561 |                     mask=torch.cat([data.mask, data_expert.mask], dim = 0),
562 |                 )
563 | 
564 |                 norm_const = data.raw_constraints / envs.cstr_manager.get_running_maxes()
565 |                 recomputed_cat_dones = (envs.cstr_manager.min_p + torch.clamp(
566 |                     norm_const,
567 |                     min=0.0,
568 |                     max=1.0
569 |                 ) * (envs.cstr_manager.get_max_p() - envs.cstr_manager.min_p)).max(-1).values
570 | 
571 |                 with torch.no_grad():
572 |                     clipped_noise = (torch.randn_like(data.actions, device=device) * POLICY_NOISE).clamp(
573 |                         -NOISE_CLIP, NOISE_CLIP
574 |                     ) #* actor.action_scale
575 | 
576 |                     vlatent = vision_nn(data.vision_next_observations)
577 |                     qvlatent = None
578 |                     next_state_actions, _, _ = actor(data.next_observations, vlatent, data.p_ini_hidden_out)
579 |                     next_state_actions = (next_state_actions + clipped_noise).clamp(
580 |                         actions_min, actions_max
581 |                     )
582 |                     targets_selected = torch.randperm(CRITIC_NB)[:2]
583 |                     qf_next_targets = torch.stack([qf_targets[i](data.privileged_next_observations, next_state_actions, qvlatent)[0] for i in targets_selected])
584 |                     min_qf_next_target = qf_next_targets.min(dim = 0).values
585 |                     next_q_value = (1 - recomputed_cat_dones.flatten()) * data.rewards.flatten() + (1 - recomputed_cat_dones.flatten()) * (1 - data.dones.flatten()) * GAMMA * (min_qf_next_target).view(-1)
586 | 
587 |                 true_samples_nb = data.mask.sum()
588 |                 qvlatent = None
589 |                 qf_a_values = torch.stack([qf(data.privileged_observations, data.actions, qvlatent)[0].view(-1) for qf in qfs])
590 |                 qf_loss = (((qf_a_values - next_q_value.unsqueeze(0)) ** 2) * data.mask.view(-1).unsqueeze(0)).sum() / (true_samples_nb * CRITIC_NB)
591 | 
592 |                 # optimize the model
593 |                 q_optimizer.zero_grad()
594 |                 qf_loss.backward()
595 |                 nn.utils.clip_grad_norm_(qf1.parameters(), 0.5)
596 |                 q_optimizer.step()
597 | 
598 |                 if local_steps % POLICY_FREQUENCY == 0:
599 |                     for i in range(CRITIC_NB):
600 |                         for param, target_param in zip(qfs[i].parameters(), qf_targets[i].parameters()):
601 |                             target_param.data.copy_(TAU * param.data + (1 - TAU) * target_param.data)
602 | 
603 |                 if local_steps == 7:
604 |                     actor_training_step += 1
605 |                     vlatent = vision_nn(data.vision_observations, augment = True)
606 |                     qvlatent = None
607 |                     diff_actions, _, recons_height_maps = actor(data.observations, vlatent, data.p_ini_hidden_in)
608 | 
609 |                     qs = torch.stack([qf(data.privileged_observations, diff_actions, qvlatent)[0] for qf in qfs])
610 |                     actor_loss = -(qs.squeeze(-1) * data.mask.unsqueeze(0)).sum() / (true_samples_nb * CRITIC_NB)
611 |                     final_loss = actor_loss
612 | 
613 |                     actor_optimizer.zero_grad()
614 |                     final_loss.backward()
615 |                     nn.utils.clip_grad_norm_(actor.parameters(), 0.5)
616 |                     actor_optimizer.step()
617 | 
618 |                     writer.add_scalar(f"loss/actor_loss", actor_loss.item(), actor_training_step)
619 |                     writer.add_scalar(f"infos/Q_max", qs.max().item(), actor_training_step)
620 | 
621 |         if (global_step + 1) % (500 * 24) == 0:
622 |             model_path = f"{run_path}/cleanrl_model_e{(global_step + 1) // 24}.pt"
623 |             torch.save((actor.state_dict(), vision_nn.state_dict()), model_path)
624 |             print("Saved model checkpoint")
625 | 
626 |         if (global_step + 1) % (5 * 24) == 0:
627 |             model_path = f"{run_path}/cleanrl_model.pt"
628 |             torch.save((actor.state_dict(), vision_nn.state_dict()), model_path)
629 |             print("Saved model")
630 | 
631 | def eval_DDPG_demos_rnn_vision(cfg: DictConfig, envs):
632 |     import cv2
633 |     vis_h = 48
634 |     vis_w = 48
635 |     is_video_gen = "video_save_path" in cfg["task"]
636 | 
637 |     if is_video_gen:
638 |         video_save_path = cfg["task"]["video_save_path"]
639 |         position = video_save_path.rfind(".mp4")
640 |         output_file = video_save_path[:position] + "_depth" + video_save_path[position:]
641 |         fps = 10
642 | 
643 |     checkpoint = cfg["checkpoint"]
644 |     actor_sd, vision_sd = torch.load(checkpoint)
645 | 
646 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
647 | 
648 |     envs.single_action_space = envs.action_space
649 |     envs.single_observation_space = envs.observation_space
650 | 
651 |     envs = ExtractObsWrapper(envs)
652 | 
653 |     vision_nn = DepthOnlyFCBackbone58x87(128, 0, 0).to(device)
654 |     vision_nn.load_state_dict(vision_sd)
655 |     actor = Actor(envs).to(device)
656 |     actor.load_state_dict(actor_sd)
657 | 
658 |     obs_privi = envs.reset()
659 |     obs = obs_privi.clone()[:, : 45]
660 |     vobs = torch.zeros((envs.num_envs, vis_h, vis_w), device = device)
661 |     next_vobs = vobs.clone()
662 |     vision_latent = None
663 |     gru_p_hidden_in = torch.zeros((actor.memory.num_layers, envs.num_envs, actor.memory.hidden_size), device = device) # p for policy
664 | 
665 |     if is_video_gen:
666 |         depth_images = []
667 | 
668 |     for global_step in range(2000):
669 |         with torch.no_grad():
670 |             if global_step % 5 == 0:
671 |                 vision_latent = vision_nn(vobs.unsqueeze(1), hist = False)
672 | 
673 |         with torch.no_grad():
674 |             actions, gru_p_hidden_out, _ = actor(obs.unsqueeze(1), vision_latent, gru_p_hidden_in)
675 |             actions = actions.squeeze(1)
676 | 
677 |         next_obs_privi, rewards, terminations, infos = envs.step(actions)
678 |         next_obs = next_obs_privi.clone()[:, : 45]
679 |         obs = next_obs
680 |         if "depth" in infos:
681 |             next_vobs = infos["depth"].clone()[..., 19:-18]
682 |         vobs = next_vobs
683 |         gru_p_hidden_in = gru_p_hidden_out
684 | 
685 |         if is_video_gen:
686 |             depth_images.append((vobs * 255).to(torch.uint8).squeeze().cpu().numpy())
687 |             if global_step == 200:
688 |                 fourcc = cv2.VideoWriter_fourcc(*'mp4v')
689 |                 out = cv2.VideoWriter(output_file, fourcc, fps, (vis_w, vis_h), isColor=True)
690 |                 out.set(cv2.VIDEOWRITER_PROP_QUALITY, 1)  # 1 is the highest quality setting
691 |                 for i in range(len(depth_images)):
692 |                     # Get the i-th frame
693 |                     frame = depth_images[i]
694 | 
695 |                     if frame.shape != (vis_h, vis_w):
696 |                         raise ValueError(f"Frame {i} has incorrect shape: {frame.shape}")
697 | 
698 |                     # Write the frame to the video (as a single-channel image)
699 |                     rgb_image = cv2.applyColorMap(frame, cv2.COLORMAP_MAGMA)
700 |                     out.write(rgb_image)
701 | 
702 |                 # Release the VideoWriter object
703 |                 out.release()
704 |                 print(f"Video saved to {output_file}")
705 | 


--------------------------------------------------------------------------------