├── cfg
├── pbt
│ └── no_pbt.yaml
├── train
│ ├── SoloParkourDDPG_demos_generate.yaml
│ ├── SoloParkourDDPG_demos_rnn_vision.yaml
│ └── SoloParkourPPO.yaml
├── config.yaml
└── task
│ └── SoloParkour.yaml
├── assets
└── teaser.jpeg
├── solo12
├── meshes
│ ├── checker_blue.png
│ ├── stl
│ │ ├── solo12
│ │ │ ├── solo_12_base.stl
│ │ │ ├── solo12_hip_fe_fl.stl
│ │ │ ├── solo12_hip_fe_fr.stl
│ │ │ ├── solo12_hip_fe_hl.stl
│ │ │ ├── solo12_hip_fe_hr.stl
│ │ │ ├── solo12_coordinate_systems.png
│ │ │ ├── _solo12_visualisation_model.PDF
│ │ │ ├── solo12_coordinate_systems_2.png
│ │ │ ├── solo12_coordinate_systems_3.png
│ │ │ ├── solo12_coordinate_systems_4.png
│ │ │ ├── solo12_lower_leg_left_side.stl
│ │ │ ├── solo12_lower_leg_right_side.stl
│ │ │ ├── solo12_upper_leg_left_side.stl
│ │ │ └── solo12_upper_leg_right_side.stl
│ │ ├── with_foot
│ │ │ ├── solo_body.stl
│ │ │ ├── solo_foot.stl
│ │ │ ├── solo_lower_leg_left_side.stl
│ │ │ ├── solo_upper_leg_left_side.stl
│ │ │ ├── solo_lower_leg_right_side.stl
│ │ │ └── solo_upper_leg_right_side.stl
│ │ └── without_foot
│ │ │ ├── solo_body.stl
│ │ │ ├── solo_lower_leg_left_side.stl
│ │ │ ├── solo_upper_leg_left_side.stl
│ │ │ ├── solo_lower_leg_right_side.stl
│ │ │ ├── solo_upper_leg_right_side.stl
│ │ │ ├── solo_lower_leg_v2_left_side.stl
│ │ │ └── solo_lower_leg_v2_right_side.stl
│ ├── obj
│ │ ├── solo12
│ │ │ └── _solo12_visualisation_model.PDF
│ │ ├── with_foot
│ │ │ ├── solo_body.mtl
│ │ │ ├── solo_foot.mtl
│ │ │ ├── solo_lower_leg_left_side.mtl
│ │ │ ├── solo_lower_leg_right_side.mtl
│ │ │ ├── solo_upper_leg_left_side.mtl
│ │ │ └── solo_upper_leg_right_side.mtl
│ │ └── without_foot
│ │ │ ├── solo_body.mtl
│ │ │ ├── solo_lower_leg_left_side.mtl
│ │ │ ├── solo_upper_leg_left_side.mtl
│ │ │ ├── solo_lower_leg_right_side.mtl
│ │ │ └── solo_upper_leg_right_side.mtl
│ ├── plane.mtl
│ ├── plane.obj
│ ├── cube.obj
│ ├── sphere.obj
│ └── cylinder.obj
├── srdf
│ └── solo.srdf
├── solo12_mpi_scaled.urdf
└── solo12_mpi.urdf
├── LICENSE.txt
├── README.md
├── train.py
├── utils
├── constraint_manager.py
└── isaacgymenvs_make.py
├── algos
├── PPO.py
├── DDPG_demos_generate.py
└── DDPG_demos_rnn_vision.py
└── tasks
└── terrainParkour.py
/cfg/pbt/no_pbt.yaml:
--------------------------------------------------------------------------------
1 | enabled: False
--------------------------------------------------------------------------------
/assets/teaser.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/assets/teaser.jpeg
--------------------------------------------------------------------------------
/solo12/meshes/checker_blue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/checker_blue.png
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo_12_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo_12_base.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/with_foot/solo_body.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_body.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/with_foot/solo_foot.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_foot.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_body.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_body.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_hip_fe_fl.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_hip_fe_fl.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_hip_fe_fr.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_hip_fe_fr.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_hip_fe_hl.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_hip_fe_hl.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_hip_fe_hr.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_hip_fe_hr.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_coordinate_systems.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_coordinate_systems.png
--------------------------------------------------------------------------------
/solo12/meshes/obj/solo12/_solo12_visualisation_model.PDF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/obj/solo12/_solo12_visualisation_model.PDF
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/_solo12_visualisation_model.PDF:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/_solo12_visualisation_model.PDF
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_coordinate_systems_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_coordinate_systems_2.png
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_coordinate_systems_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_coordinate_systems_3.png
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_coordinate_systems_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_coordinate_systems_4.png
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_lower_leg_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_lower_leg_left_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_lower_leg_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_lower_leg_right_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_upper_leg_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_upper_leg_left_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/solo12/solo12_upper_leg_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/solo12/solo12_upper_leg_right_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/with_foot/solo_lower_leg_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_lower_leg_left_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/with_foot/solo_upper_leg_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_upper_leg_left_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/with_foot/solo_lower_leg_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_lower_leg_right_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/with_foot/solo_upper_leg_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/with_foot/solo_upper_leg_right_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_lower_leg_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_lower_leg_left_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_upper_leg_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_upper_leg_left_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_lower_leg_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_lower_leg_right_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_upper_leg_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_upper_leg_right_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_lower_leg_v2_left_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_lower_leg_v2_left_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/stl/without_foot/solo_lower_leg_v2_right_side.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Gepetto/SoloParkour/HEAD/solo12/meshes/stl/without_foot/solo_lower_leg_v2_right_side.stl
--------------------------------------------------------------------------------
/solo12/meshes/plane.mtl:
--------------------------------------------------------------------------------
1 | newmtl Material
2 | Ns 10.0000
3 | Ni 1.5000
4 | d 1.0000
5 | Tr 0.0000
6 | Tf 1.0000 1.0000 1.0000
7 | illum 2
8 | Ka 0.0000 0.0000 0.0000
9 | Kd 0.5880 0.5880 0.5880
10 | Ks 0.0000 0.0000 0.0000
11 | Ke 0.0000 0.0000 0.0000
12 | map_Ka cube.tga
13 | map_Kd checker_blue.png
14 |
15 |
16 |
--------------------------------------------------------------------------------
/solo12/meshes/obj/with_foot/solo_body.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 2
3 |
4 | newmtl Material
5 | Ns 96.078431
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.640000 0.640000 0.640000
8 | Ks 0.500000 0.500000 0.500000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 |
--------------------------------------------------------------------------------
/solo12/meshes/obj/with_foot/solo_foot.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 2
3 |
4 | newmtl Material
5 | Ns 96.078431
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.640000 0.640000 0.640000
8 | Ks 0.500000 0.500000 0.500000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 |
--------------------------------------------------------------------------------
/solo12/meshes/obj/without_foot/solo_body.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 2
3 |
4 | newmtl Material
5 | Ns 96.078431
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.640000 0.640000 0.640000
8 | Ks 0.500000 0.500000 0.500000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 |
--------------------------------------------------------------------------------
/solo12/meshes/plane.obj:
--------------------------------------------------------------------------------
1 | # Blender v2.66 (sub 1) OBJ File: ''
2 | # www.blender.org
3 | mtllib plane.mtl
4 | o Plane
5 | v 15.000000 -15.000000 0.000000
6 | v 15.000000 15.000000 0.000000
7 | v -15.000000 15.000000 0.000000
8 | v -15.000000 -15.000000 0.000000
9 |
10 | vt 15.000000 0.000000
11 | vt 15.000000 15.000000
12 | vt 0.000000 15.000000
13 | vt 0.000000 0.000000
14 |
15 | usemtl Material
16 | s off
17 | f 1/1 2/2 3/3
18 | f 1/1 3/3 4/4
19 |
--------------------------------------------------------------------------------
/solo12/meshes/obj/with_foot/solo_lower_leg_left_side.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 2
3 |
4 | newmtl Material
5 | Ns 96.078431
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.640000 0.640000 0.640000
8 | Ks 0.500000 0.500000 0.500000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 |
--------------------------------------------------------------------------------
/solo12/meshes/obj/with_foot/solo_lower_leg_right_side.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 2
3 |
4 | newmtl Material
5 | Ns 96.078431
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.640000 0.640000 0.640000
8 | Ks 0.500000 0.500000 0.500000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 |
--------------------------------------------------------------------------------
/solo12/meshes/obj/with_foot/solo_upper_leg_left_side.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 2
3 |
4 | newmtl Material
5 | Ns 96.078431
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.640000 0.640000 0.640000
8 | Ks 0.500000 0.500000 0.500000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 |
--------------------------------------------------------------------------------
/solo12/meshes/obj/with_foot/solo_upper_leg_right_side.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 2
3 |
4 | newmtl Material
5 | Ns 96.078431
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.640000 0.640000 0.640000
8 | Ks 0.500000 0.500000 0.500000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 |
--------------------------------------------------------------------------------
/solo12/meshes/obj/without_foot/solo_lower_leg_left_side.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 2
3 |
4 | newmtl Material
5 | Ns 96.078431
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.640000 0.640000 0.640000
8 | Ks 0.500000 0.500000 0.500000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 |
--------------------------------------------------------------------------------
/solo12/meshes/obj/without_foot/solo_upper_leg_left_side.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 2
3 |
4 | newmtl Material
5 | Ns 96.078431
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.640000 0.640000 0.640000
8 | Ks 0.500000 0.500000 0.500000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 |
--------------------------------------------------------------------------------
/solo12/meshes/obj/without_foot/solo_lower_leg_right_side.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 2
3 |
4 | newmtl Material
5 | Ns 96.078431
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.640000 0.640000 0.640000
8 | Ks 0.500000 0.500000 0.500000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 |
--------------------------------------------------------------------------------
/solo12/meshes/obj/without_foot/solo_upper_leg_right_side.mtl:
--------------------------------------------------------------------------------
1 | # Blender MTL File: 'None'
2 | # Material Count: 2
3 |
4 | newmtl Material
5 | Ns 96.078431
6 | Ka 1.000000 1.000000 1.000000
7 | Kd 0.640000 0.640000 0.640000
8 | Ks 0.500000 0.500000 0.500000
9 | Ke 0.000000 0.000000 0.000000
10 | Ni 1.000000
11 | d 1.000000
12 | illum 2
13 |
14 | newmtl None
15 | Ns 0
16 | Ka 0.000000 0.000000 0.000000
17 | Kd 0.8 0.8 0.8
18 | Ks 0.8 0.8 0.8
19 | d 1
20 | illum 2
21 |
--------------------------------------------------------------------------------
/cfg/train/SoloParkourDDPG_demos_generate.yaml:
--------------------------------------------------------------------------------
1 | params:
2 | seed: ${...seed}
3 |
4 | algo:
5 | name: cat_ddpg_demos_generate_continuous
6 |
7 | config:
8 | name: ${resolve_default:SoloParkour,${....experiment}}
9 | full_experiment_name: ${.name}
10 | env_name: rlgpu
11 | total_timesteps: 2e8
12 | buffer_size: 4999680 # % (5 * 256) = 0
13 | batch_size: 512
14 | target_policy_path: "path/to/cleanrl_model.pt"
15 | demos_buffer_size: 1999360 # % (5 * 256) = 0
16 | horizon_length: 24 # 24 for t = 0.02 (decimation = 4)
17 |
18 | max_epochs: ${resolve_default:1000,${....max_iterations}}
19 |
--------------------------------------------------------------------------------
/cfg/train/SoloParkourDDPG_demos_rnn_vision.yaml:
--------------------------------------------------------------------------------
1 | params:
2 | seed: ${...seed}
3 |
4 | algo:
5 | name: cat_ddpg_demos_rnn_vision_continuous
6 |
7 | config:
8 | name: ${resolve_default:SoloParkour,${....experiment}}
9 | full_experiment_name: ${.name}
10 | env_name: rlgpu
11 | gamma: 0.99
12 | tau: 0.05
13 | critic_learning_rate: 3.e-4
14 | actor_learning_rate: 5.e-4
15 | total_timesteps: 2e8
16 | buffer_size: 4999680 # % (5 * 256) = 0
17 | learning_starts: 25e3
18 | policy_frequency: 2
19 | batch_size: 128
20 | seq_len: 50
21 | policy_noise: 0.8
22 | noise_clip: 0.2
23 | demos_rb_path: "path/to/privileged/experience"
24 | horizon_length: 24 # 24 for t = 0.02 (decimation = 4)
25 |
26 | max_epochs: 5000
27 |
--------------------------------------------------------------------------------
/solo12/meshes/cube.obj:
--------------------------------------------------------------------------------
1 | o Box
2 | v 0.5 0.5 0.5
3 | v 0.5 0.5 -0.5
4 | v 0.5 -0.5 0.5
5 | v 0.5 -0.5 -0.5
6 | v -0.5 0.5 -0.5
7 | v -0.5 0.5 0.5
8 | v -0.5 -0.5 -0.5
9 | v -0.5 -0.5 0.5
10 | v -0.5 0.5 -0.5
11 | v 0.5 0.5 -0.5
12 | v -0.5 0.5 0.5
13 | v 0.5 0.5 0.5
14 | v -0.5 -0.5 0.5
15 | v 0.5 -0.5 0.5
16 | v -0.5 -0.5 -0.5
17 | v 0.5 -0.5 -0.5
18 | v -0.5 0.5 0.5
19 | v 0.5 0.5 0.5
20 | v -0.5 -0.5 0.5
21 | v 0.5 -0.5 0.5
22 | v 0.5 0.5 -0.5
23 | v -0.5 0.5 -0.5
24 | v 0.5 -0.5 -0.5
25 | v -0.5 -0.5 -0.5
26 | vt 0 1
27 | vt 1 1
28 | vt 0 0
29 | vt 1 0
30 | vt 0 1
31 | vt 1 1
32 | vt 0 0
33 | vt 1 0
34 | vt 0 1
35 | vt 1 1
36 | vt 0 0
37 | vt 1 0
38 | vt 0 1
39 | vt 1 1
40 | vt 0 0
41 | vt 1 0
42 | vt 0 1
43 | vt 1 1
44 | vt 0 0
45 | vt 1 0
46 | vt 0 1
47 | vt 1 1
48 | vt 0 0
49 | vt 1 0
50 | vn 1 0 0
51 | vn 1 0 0
52 | vn 1 0 0
53 | vn 1 0 0
54 | vn -1 0 0
55 | vn -1 0 0
56 | vn -1 0 0
57 | vn -1 0 0
58 | vn 0 1 0
59 | vn 0 1 0
60 | vn 0 1 0
61 | vn 0 1 0
62 | vn 0 -1 0
63 | vn 0 -1 0
64 | vn 0 -1 0
65 | vn 0 -1 0
66 | vn 0 0 1
67 | vn 0 0 1
68 | vn 0 0 1
69 | vn 0 0 1
70 | vn 0 0 -1
71 | vn 0 0 -1
72 | vn 0 0 -1
73 | vn 0 0 -1
74 | f 1/1/1 3/3/3 2/2/2
75 | f 3/3/3 4/4/4 2/2/2
76 | f 5/5/5 7/7/7 6/6/6
77 | f 7/7/7 8/8/8 6/6/6
78 | f 9/9/9 11/11/11 10/10/10
79 | f 11/11/11 12/12/12 10/10/10
80 | f 13/13/13 15/15/15 14/14/14
81 | f 15/15/15 16/16/16 14/14/14
82 | f 17/17/17 19/19/19 18/18/18
83 | f 19/19/19 20/20/20 18/18/18
84 | f 21/21/21 23/23/23 22/22/22
85 | f 23/23/23 24/24/24 22/22/22
86 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | BSD 2-Clause License
2 |
3 | Copyright (c) 2024, LAAS-CNRS
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions are met:
7 |
8 | 1. Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | 2. Redistributions in binary form must reproduce the above copyright notice,
12 | this list of conditions and the following disclaimer in the documentation
13 | and/or other materials provided with the distribution.
14 |
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 |
26 | Files with a specific license in their header (like train.py) abide by their
27 | respective license.
28 |
29 |
--------------------------------------------------------------------------------
/cfg/config.yaml:
--------------------------------------------------------------------------------
1 |
2 | # Task name - used to pick the class to load
3 | task_name: ${task.name}
4 | # experiment name. defaults to name of training config
5 | experiment: ''
6 |
7 | # if set to positive integer, overrides the default number of environments
8 | num_envs: ''
9 |
10 | # seed - set to -1 to choose random seed
11 | seed: 42
12 | # set to True for deterministic performance
13 | torch_deterministic: False
14 |
15 | # set the maximum number of learning iterations to train for. overrides default per-environment setting
16 | max_iterations: ''
17 |
18 | ## Device config
19 | # 'physx' or 'flex'
20 | physics_engine: 'physx'
21 | # whether to use cpu or gpu pipeline
22 | pipeline: 'gpu'
23 | # device for running physics simulation
24 | sim_device: 'cuda:0'
25 | # device to run RL
26 | rl_device: 'cuda:0'
27 | graphics_device_id: 0
28 |
29 | ## PhysX arguments
30 | num_threads: 4 # Number of worker threads per scene used by PhysX - for CPU PhysX only.
31 | solver_type: 1 # 0: pgs, 1: tgs
32 | num_subscenes: 4 # Splits the simulation into N physics scenes and runs each one in a separate thread
33 |
34 | # RLGames Arguments
35 | # test - if set, run policy in inference mode (requires setting checkpoint to load)
36 | test: False
37 | # used to set checkpoint path
38 | checkpoint: ''
39 | # set sigma when restoring network
40 | sigma: ''
41 | # set to True to use multi-gpu training
42 | multi_gpu: False
43 |
44 | wandb_activate: False
45 | wandb_group: ''
46 | wandb_name: ${train.params.config.name}
47 | wandb_entity: ''
48 | wandb_project: 'isaacgymenvs'
49 | wandb_tags: []
50 | wandb_logcode_dir: ''
51 |
52 | capture_video: False
53 | capture_video_freq: 1464
54 | capture_video_len: 100
55 | force_render: True
56 |
57 | # disables rendering
58 | headless: False
59 |
60 | # set default task and default training config based on task
61 | defaults:
62 | - task: Ant
63 | - train: ${task}PPO
64 | - hydra/job_logging: disabled
65 | - pbt: no_pbt
66 |
67 | # set the directory where the output files get saved
68 | hydra:
69 | output_subdir: null
70 | run:
71 | dir: .
72 |
73 |
--------------------------------------------------------------------------------
/cfg/train/SoloParkourPPO.yaml:
--------------------------------------------------------------------------------
1 | params:
2 | seed: ${...seed}
3 |
4 | algo:
5 | name: cat_a2c_continuous
6 |
7 | model:
8 | name: continuous_a2c_logstd
9 |
10 | network:
11 | name: actor_critic
12 | separate: True
13 |
14 | space:
15 | continuous:
16 | mu_activation: None
17 | sigma_activation: None
18 | mu_init:
19 | name: default
20 | sigma_init:
21 | name: const_initializer
22 | val: 0. # std = 1.
23 | fixed_sigma: True
24 |
25 | mlp:
26 | units: [512, 256, 128]
27 | activation: elu
28 | d2rl: False
29 |
30 | initializer:
31 | name: default
32 | regularizer:
33 | name: None
34 | # rnn:
35 | # name: lstm
36 | # units: 128
37 | # layers: 1
38 | # before_mlp: True
39 | # concat_input: True
40 | # layer_norm: False
41 |
42 |
43 | load_checkpoint: ${if:${...checkpoint},True,False} # flag which sets whether to load the checkpoint
44 | load_path: ${...checkpoint} # path to the checkpoint to load
45 |
46 | config:
47 | name: ${resolve_default:SoloParkour,${....experiment}}
48 | full_experiment_name: ${.name}
49 | env_name: rlgpu
50 | ppo: True
51 | multi_gpu: ${....multi_gpu}
52 | mixed_precision: True
53 | normalize_input: True
54 | normalize_value: True
55 | normalize_advantage: True
56 | value_bootstrap: True
57 | clip_actions: False
58 | num_actors: ${....task.env.numEnvs}
59 | reward_shaper:
60 | scale_value: 1.0
61 | gamma: 0.99
62 | tau: 0.95
63 | e_clip: 0.2
64 | entropy_coef: 0.001
65 | learning_rate: 3.e-4 # overwritten by adaptive lr_schedule
66 | lr_schedule: adaptive
67 | kl_threshold: 0.008 # target kl for adaptive lr
68 | truncate_grads: True
69 | grad_norm: 1.
70 | horizon_length: 24 # 24 for t = 0.02 (decimation = 4)
71 | minibatch_size: 16384 # 16384 for horizon_length 24
72 | mini_epochs: 5
73 | critic_coef: 2
74 | clip_value: True
75 | seq_len: 4 # only for rnn
76 | bounds_loss_coef: 0.
77 | kappa: 1.0
78 |
79 | max_epochs: ${resolve_default:20000,${....max_iterations}}
80 | save_best_after: 1000
81 | score_to_win: 2000000000
82 | save_frequency: 1000
83 | print_stats: False
84 |
85 | player:
86 | deterministic: True
87 | use_vecenv: True
88 | games_num: 2000
89 | print_stats: False
90 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SoloParkour: Constrained Reinforcement Learning for Visual Locomotion from Privileged Experience
2 |
3 | 
4 |
5 | > [SoloParkour: Constrained Reinforcement Learning for Visual Locomotion from Privileged Experience](https://gepetto.github.io/SoloParkour/)
6 | > Elliot Chane-Sane*, Joseph Amigo*, Thomas Flayols, Ludovic Righetti, Nicolas Mansard,
7 | > **CoRL 2024**
8 |
9 |
10 |
11 |
12 |
13 | ## Usage
14 |
15 | 1. Train the privileged (Stage 1) policy:
16 | ```bash
17 | python train.py task=SoloParkour headless=True task.env.control.damping_curriculum.enable=True
18 | ```
19 |
20 | 2. Generate the dataset of privileged experience:
21 | ```bash
22 | # replace with the correct path to the privileged policy weights
23 | policy_path=path/to/cleanrl_model.pt
24 |
25 | python train.py task=SoloParkour train=SoloParkourDDPG_demos_generate headless=True task.env.numEnvs=256 task.env.enableCameraSensors=True task.env.depth.use_depth=True task.env.terrain.maxInitMapLevel=9 train.params.config.target_policy_path=${policy_path}
26 | ```
27 |
28 | 3. Train the visual (Stage 2) policy from privileged experience:
29 | ```bash
30 | # replace with the correct path to the privileged experience folder
31 | demo_path=path/to/folder/ # folder which contains rb_demos.pkl
32 |
33 | python train.py task=SoloParkour train=SoloParkourDDPG_demos_rnn_vision headless=True task.env.numEnvs=256 task.env.enableCameraSensors=True task.env.depth.use_depth=True train.params.config.demos_rb_path=${demo_path}
34 | ```
35 |
36 | ## Installation
37 |
38 | ```bash
39 | conda create -n IsaacGymEnvs python=3.8
40 | conda activate IsaacGymEnvs
41 |
42 | conda install pip
43 | conda install -c conda-forge ninja
44 |
45 | # Download Isaac Gym: https://developer.nvidia.com/isaac-gym
46 | cd path/to/isaacgym/python && pip install --user --no-cache-dir -e .
47 |
48 | # Clone IsaacGymEnvs: https://github.com/isaac-sim/IsaacGymEnvs
49 | cd path/to/IsaacGymEnvs/ && pip install --user --no-cache-dir -e .
50 |
51 | pip uninstall torch tochvision
52 | pip install --user --no-cache-dir torch==2.2.0 torchvision==0.17.0
53 | # Fix compability problem with numpy version
54 | pip install --user --no-cache-dir "numpy<1.24"
55 | pip install --user --no-cache-dir texttable av
56 |
57 | # Add correct LD_LIBRARY_PATH on env start
58 | conda env config vars set LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib --name $CONDA_DEFAULT_ENV
59 | ```
60 |
61 | ### Acknowledgements
62 |
63 | This code was based on the following repositories:
64 | - https://github.com/vwxyzjn/cleanrl
65 | - https://github.com/isaac-sim/IsaacGymEnvs
66 | - https://github.com/gepetto/constraints-as-terminations
67 |
68 | ### Citation
69 |
70 | ```
71 | @inproceedings{chanesane2024solo,
72 | title = {SoloParkour: Constrained Reinforcement Learning for Visual Locomotion from Privileged Experience},
73 | author = {Elliot Chane-Sane and Joseph Amigo and Thomas Flayols and Ludovic Righetti and Nicolas Mansard},
74 | booktitle = {Conference on Robot Learning (CoRL)},
75 | year = {2024},
76 | }
77 | ```
78 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | # train.py
2 | # Script to train policies in Isaac Gym
3 | #
4 | # Copyright (c) 2018-2023, NVIDIA Corporation
5 | # All rights reserved.
6 | #
7 | # Redistribution and use in source and binary forms, with or without
8 | # modification, are permitted provided that the following conditions are met:
9 | #
10 | # 1. Redistributions of source code must retain the above copyright notice, this
11 | # list of conditions and the following disclaimer.
12 | #
13 | # 2. Redistributions in binary form must reproduce the above copyright notice,
14 | # this list of conditions and the following disclaimer in the documentation
15 | # and/or other materials provided with the distribution.
16 | #
17 | # 3. Neither the name of the copyright holder nor the names of its
18 | # contributors may be used to endorse or promote products derived from
19 | # this software without specific prior written permission.
20 | #
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 | import os
32 | import random
33 |
34 | import numpy as np
35 |
36 | import isaacgym
37 | from isaacgymenvs.pbt.pbt import PbtAlgoObserver, initial_pbt_check
38 | from omegaconf import DictConfig, OmegaConf
39 | import hydra
40 | from utils.isaacgymenvs_make import isaacgym_task_map, make
41 | from isaacgymenvs.utils.reformat import omegaconf_to_dict, print_dict
42 | from isaacgymenvs.utils.utils import set_np_formatting, set_seed
43 |
44 | import torch
45 | from algos.PPO import PPO, eval_PPO
46 | from algos.DDPG_demos_rnn_vision import DDPG_demos_rnn_vision, eval_DDPG_demos_rnn_vision
47 | from algos.DDPG_demos_generate import DDPG_demos_generate
48 |
49 |
50 | @hydra.main(config_name="config", config_path="./cfg")
51 | def launch_rlg_hydra(cfg: DictConfig):
52 | if cfg.pbt.enabled:
53 | initial_pbt_check(cfg)
54 |
55 | cfg_dict = omegaconf_to_dict(cfg)
56 | print_dict(cfg_dict)
57 |
58 | # set numpy formatting for printing only
59 | set_np_formatting()
60 |
61 | # global rank of the GPU
62 | global_rank = int(os.getenv("RANK", "0"))
63 |
64 | # sets seed. if seed is -1 will pick a random one
65 | cfg.seed = set_seed(
66 | cfg.seed, torch_deterministic=cfg.torch_deterministic, rank=global_rank
67 | )
68 |
69 | # TRY NOT TO MODIFY: seeding
70 | random.seed(cfg.seed)
71 | np.random.seed(cfg.seed)
72 | torch.manual_seed(cfg.seed)
73 | torch.backends.cudnn.deterministic = True
74 |
75 | algo = cfg["train"]["params"]["algo"]["name"]
76 |
77 | envs = make(
78 | cfg.seed,
79 | cfg.task_name,
80 | cfg.task.env.numEnvs,
81 | cfg.sim_device,
82 | cfg.rl_device,
83 | cfg.graphics_device_id,
84 | cfg.headless,
85 | cfg.multi_gpu,
86 | cfg.capture_video,
87 | cfg.force_render,
88 | cfg
89 | )
90 |
91 | if cfg["test"]:
92 | if algo == "cat_a2c_continuous":
93 | eval_PPO(cfg, envs)
94 | elif algo == "cat_ddpg_demos_rnn_vision_continuous":
95 | eval_DDPG_demos_rnn_vision(cfg, envs)
96 | else:
97 | raise NotImplementedError
98 | else:
99 | if algo == "cat_a2c_continuous":
100 | PPO(cfg, envs)
101 | elif algo == "cat_ddpg_demos_rnn_vision_continuous":
102 | DDPG_demos_rnn_vision(cfg, envs)
103 | elif algo == "cat_ddpg_demos_generate_continuous":
104 | DDPG_demos_generate(cfg, envs)
105 | else:
106 | raise NotImplementedError
107 |
108 | if __name__ == "__main__":
109 | launch_rlg_hydra()
110 |
--------------------------------------------------------------------------------
/solo12/srdf/solo.srdf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
--------------------------------------------------------------------------------
/utils/constraint_manager.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | class ConstraintManager:
4 | """Handle the computation of termination probabilities based on constraints
5 | violations (Constraints as Terminations).
6 |
7 | Args:
8 | tau (float): discount factor
9 | min_p (float): minimum termination probability
10 | """
11 |
12 | def __init__(self, tau=0.95, min_p=0.0):
13 | self.running_maxes = {} # Polyak average of the maximum constraint violation
14 | self.running_mins = {} # Polyak average of the minimum constraint violation
15 | self.probs = {} # Termination probabilities for each constraint
16 | self.max_p = {}
17 | self.raw_constraints = {}
18 | self.tau = tau # Discount factor
19 | self.min_p = min_p # Minimum termination probability
20 |
21 | def reset(self):
22 | """Reset the termination probabilities of the constraint manager."""
23 | self.probs = {}
24 | self.raw_constraints = {}
25 |
26 | def add(self, name, constraint, max_p=0.1):
27 | """Add a constraint violation to the constraint manager and compute the
28 | associated termination probability.
29 |
30 | Args:
31 | name (string): name of the constraint
32 | constraint (float tensor): value of constraint violations for this constraint
33 | max_p (float): maximum termination probability
34 | """
35 |
36 | # First, put constraint in the form Torch.FloatTensor((num_envs, n_constraints))
37 | # Convert constraints violation to float if they are not
38 | if not torch.is_floating_point(constraint):
39 | constraint = constraint.float()
40 |
41 | # Ensure constraint is 2-dimensional even with a single element
42 | if len(constraint.size()) == 1:
43 | constraint = constraint.unsqueeze(1)
44 |
45 | # Get the maximum constraint violation for the current step
46 | constraint_max = constraint.max(dim=0, keepdim=True)[0].clamp(min=1e-6)
47 |
48 | # Compute polyak average of the maximum constraint violation for this constraint
49 | if name not in self.running_maxes:
50 | self.running_maxes[name] = constraint_max
51 | else:
52 | self.running_maxes[name] = (
53 | self.tau * self.running_maxes[name] + (1.0 - self.tau) * constraint_max
54 | )
55 |
56 | self.raw_constraints[name] = constraint
57 |
58 | # Get samples for which there is a constraint violation
59 | mask = constraint > 0.0
60 |
61 | # Compute the termination probability which scales between min_p and max_p with
62 | # increasing constraint violation. Remains at 0 when there is no violation.
63 | probs = torch.zeros_like(constraint)
64 | probs[mask] = self.min_p + torch.clamp(
65 | constraint[mask]
66 | / (self.running_maxes[name].expand(constraint.size())[mask]),
67 | min=0.0,
68 | max=1.0,
69 | ) * (max_p - self.min_p)
70 | self.probs[name] = probs
71 | self.max_p[name] = torch.tensor(max_p, device = constraint.device).repeat(constraint.shape[1])
72 |
73 | def get_probs(self):
74 | """Returns the termination probabilities due to constraint violations."""
75 | probs = torch.cat(list(self.probs.values()), dim=1)
76 | probs = probs.max(1).values
77 | return probs
78 |
79 | def get_raw_constraints(self):
80 | return torch.cat(list(self.raw_constraints.values()), dim=1)
81 |
82 | def get_running_maxes(self):
83 | return torch.cat(list(self.running_maxes.values()), dim = 1)
84 |
85 | def get_max_p(self):
86 | return torch.cat(list(self.max_p.values()))
87 |
88 | def get_str(self, names=None):
89 | """Get a debug string with constraints names and their average termination probabilities"""
90 | if names is None:
91 | names = list(self.probs.keys())
92 | txt = ""
93 | for name in names:
94 | txt += " {}: {}".format(
95 | name,
96 | str(
97 | 100.0 * self.probs[name].max(1).values.gt(0.0).float().mean().item()
98 | )[:4],
99 | )
100 | # txt += " {}: {}".format(name, str(100.0*self.probs[name].max(1).values.float().mean().item())[:4])
101 |
102 | return txt[1:]
103 |
104 | def log_all(self, episode_sums):
105 | """Log terminations probabilities in episode_sums with cstr_NAME key."""
106 | for name in list(self.probs.keys()):
107 | values = self.probs[name].max(1).values.gt(0.0).float()
108 | if "cstr_" + name not in episode_sums:
109 | episode_sums["cstr_" + name] = torch.zeros_like(values)
110 | episode_sums["cstr_" + name] += values
111 |
112 | def get_names(self):
113 | """Return a list of all constraint names."""
114 | return list(self.probs.keys())
115 |
116 | def get_vals(self):
117 | """Return a list of all constraint termination probabilities."""
118 | res = []
119 | for key in self.probs.keys():
120 | res += [100.0 * self.probs[key].max(1).values.gt(0.0).float().mean().item()]
121 | return res
122 |
--------------------------------------------------------------------------------
/cfg/task/SoloParkour.yaml:
--------------------------------------------------------------------------------
1 | # used to create the object
2 | name: SoloParkour
3 |
4 | physics_engine: "physx"
5 |
6 | env:
7 | numEnvs: ${resolve_default:4096,${...num_envs}}
8 | numActions: 12
9 | numLatent: 0 # (8, 3, 3)
10 | numHistorySamples: 1 # t-1, t-4, t-7
11 | numHistoryStep: 0 # t-1 -> t-4 -> t-7
12 | envSpacing: 3. # [m]
13 | enableDebugVis: False
14 | enableDebugPlots: False
15 | enableJoystick: False
16 | onlyForwards: False
17 | onlyForwardsVelocity: 1.0
18 | startAtLevel: -1
19 |
20 | terrain:
21 | terrainType: trimesh # none, plane, or trimesh
22 | staticFriction: 1.0 # [-]
23 | dynamicFriction: 1.0 # [-]
24 | restitution: 0. # [-]
25 | # rough terrain only:
26 | curriculum: true
27 | maxInitMapLevel: 0
28 | minInitMapLevel: 0
29 | mapLength: 8.
30 | mapWidth: 4.
31 | numLevels: 10
32 | numTerrains: 20
33 |
34 | # tri mesh only:
35 | slopeTreshold: 0.2
36 |
37 | terrainProportions: # Proportions for each kind of terrain
38 | gap_parkour: 0.55
39 | jump_parkour: 0.3
40 | stairs_parkour: 0.0
41 | hurdle_parkour: 0.1
42 | crawl_parkour: 0.05
43 | random_uniform: 0.0
44 |
45 | baseInitState:
46 | pos: [0.0, 0.0, 0.3] # x,y,z [m]
47 | rot: [0.0, 0.0, 0.0, 1.0] # x,y,z,w [quat]
48 | vLinear: [0.0, 0.0, 0.0] # x,y,z [m/s]
49 | vAngular: [0.0, 0.0, 0.0] # x,y,z [rad/s]
50 |
51 | randomCommandVelocityRanges:
52 | # train
53 | linear_x: [0.0, 1.5] # min max [m/s] # -0.3 1.0
54 | linear_y: [-0.5, 0.5] # min max [m/s]
55 | yaw: [-0.78, 0.78] # min max [rad/s]
56 |
57 | control:
58 | # PD Drive parameters:
59 | stiffness: 4.0 # [N*m/rad]
60 | damping: 0.2 # [N*m*s/rad]
61 | # action scale: target angle = actionScale * action + defaultAngle
62 | actionScale: 0.5
63 | # decimation: Number of control action updates @ sim DT per policy DT
64 | decimation: 4
65 | damping_curriculum:
66 | enable: False
67 | num_steps: 400000
68 | init_damping: 0.05
69 |
70 | defaultJointAngles: # = target angles when action = 0.0
71 | FL_HAA: 0.05 # [rad]
72 | HL_HAA: 0.05 # [rad]
73 | FR_HAA: -0.05 # [rad]
74 | HR_HAA: -0.05 # [rad]
75 |
76 | FL_HFE: 0.6 # [rad]
77 | HL_HFE: 0.6 # [rad]
78 | FR_HFE: 0.6 # [rad]
79 | HR_HFE: 0.6 # [rad]
80 |
81 | FL_KFE: -1.2 # [rad]
82 | HL_KFE: -1.2 # [rad]
83 | FR_KFE: -1.2 # [rad]
84 | HR_KFE: -1.2 # [rad]
85 |
86 | urdfAsset:
87 | file: "solo12/solo12_mpi.urdf"
88 | baseName: base_link
89 | footName: FOOT
90 | shinName: LOWER
91 | kneeName: UPPER
92 | collapseFixedJoints: True
93 | fixBaseLink: false
94 | defaultDofDriveMode: 4 # see GymDofDriveModeFlags (0 is none, 1 is pos tgt, 2 is vel tgt, 4 effort)
95 | flip_visual_attachments: False
96 | armature: 0.00036207 # i.e motor inertia at joint level
97 |
98 | learn:
99 | allowShinContacts: True
100 | allowKneeContacts: false
101 |
102 | # Scales of rewards
103 | rewMult: 1.0
104 | survivalBonus: 0.5
105 |
106 | # Misc quantities for rewards
107 | linearVelocityXYRewardDelta: 0.25
108 | angularVelocityZRewardDelta: 0.25
109 | feetAirTimeRewardTarget: 0.25
110 |
111 | # Constraints
112 | enableConstraints: "cat" # "none", "cat"
113 | constraints_CaT:
114 | feetAirTimeConstraint: 0.2
115 | tauConstraint: 0.95
116 | minPConstraint: 0.0
117 | softPConstraint: 0.1
118 | useSoftPCurriculum: True
119 | trackingConstraint: 1000.0
120 |
121 | limits:
122 | torque: 3.0
123 | acc: 800.0
124 | vel: 16.0
125 | action_rate: 80.0
126 | base_orientation: 0.1
127 | foot_contact_force: 80.0
128 | HFE: 1.9
129 | HFE_min: -0.2
130 | HAA: 0.2
131 | min_base_height: 0.06
132 | heading: 0.1
133 |
134 | # Other
135 | flatTerrainThreshold: 0.001
136 | vel_deadzone: 0.2
137 | base_height_target: 0.245
138 | gait_period: 0.4
139 |
140 | # observations
141 | observe:
142 | base_lin_vel: false
143 | base_ang_vel: true
144 | commands: true
145 | misc: true
146 | heights: true
147 | ceilings: true
148 | phases: false
149 | imu: false
150 | measured_points_step: 0.08
151 | measured_points_x: [-3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
152 | measured_points_y: [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5]
153 | phases_freq: 2.0
154 |
155 | # normalization
156 | linearVelocityScale: 2.0
157 | angularVelocityScale: 0.25
158 | dofPositionScale: 1.0
159 | dofVelocityScale: 0.05
160 | heightMeasurementScale: 5.0
161 | imuAccelerationScale: 0.1
162 |
163 | # noise
164 | addNoise: true
165 | noiseLevel: 1.0 # scales other values
166 | dofPositionNoise: 0.01
167 | dofVelocityNoise: 0.2
168 | linearVelocityNoise: 0.0
169 | angularVelocityNoise: 0.001
170 | gravityNoise: 0.05
171 | heightMeasurementNoise: 0.01
172 |
173 | # friction randomization
174 | randomizeFriction: true
175 | frictionRange: [0.5, 1.25]
176 |
177 | # random pushes during training
178 | pushRobots: true
179 | pushInterval_s: 8
180 |
181 | # episode length in seconds
182 | episodeLength_s: 15
183 |
184 | # viewer cam:
185 | viewer:
186 | refEnv: 0
187 | pos: [0, 0, 5] # [m]
188 | lookat: [1., 1, 4.5] # [m]
189 |
190 | # set to True if you use camera sensors in the environment
191 | enableCameraSensors: False
192 | depth:
193 | use_depth: False
194 | position: [0.24, 0, 0.03] # [0.21, 0, 0.03]
195 | angle: [-5, 5]
196 | update_interval: 5
197 | image_size: [48, 85] # [58, 87]
198 | depth_clip_max: 1.0
199 | depth_clip_min: 0.04
200 | horizontal_fov: 87
201 |
202 | sim:
203 | dt: 0.005
204 | substeps: 1
205 | up_axis: "z"
206 | use_gpu_pipeline: ${eq:${...pipeline},"gpu"}
207 | gravity: [0.0, 0.0, -9.81]
208 | physx:
209 | num_threads: ${....num_threads}
210 | solver_type: 0 # ${....solver_type}
211 | use_gpu: ${contains:"cuda",${....sim_device}} # set to False to run on CPU
212 | num_position_iterations: 4
213 | num_velocity_iterations: 1
214 | contact_offset: 0.02
215 | rest_offset: 0.0
216 | bounce_threshold_velocity: 0.2
217 | max_depenetration_velocity: 100.0
218 | default_buffer_size_multiplier: 5.0
219 | max_gpu_contact_pairs: 33554432 # 8388608 = 8*1024*1024
220 | num_subscenes: ${....num_subscenes}
221 | contact_collection: 1 # 0: CC_NEVER (don't collect contact info), 1: CC_LAST_SUBSTEP (collect only contacts on last substep), 2: CC_ALL_SUBSTEPS (broken - do not use!)
222 |
223 | task:
224 | randomize: False
225 |
--------------------------------------------------------------------------------
/utils/isaacgymenvs_make.py:
--------------------------------------------------------------------------------
1 | # train.py
2 | # Script to train policies in Isaac Gym
3 | #
4 | # Copyright (c) 2018-2023, NVIDIA Corporation
5 | # All rights reserved.
6 | #
7 | # Redistribution and use in source and binary forms, with or without
8 | # modification, are permitted provided that the following conditions are met:
9 | #
10 | # 1. Redistributions of source code must retain the above copyright notice, this
11 | # list of conditions and the following disclaimer.
12 | #
13 | # 2. Redistributions in binary form must reproduce the above copyright notice,
14 | # this list of conditions and the following disclaimer in the documentation
15 | # and/or other materials provided with the distribution.
16 | #
17 | # 3. Neither the name of the copyright holder nor the names of its
18 | # contributors may be used to endorse or promote products derived from
19 | # this software without specific prior written permission.
20 | #
21 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
24 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
25 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
27 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
28 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
29 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |
32 |
33 | import os
34 | from typing import Callable, Dict, Tuple, Any
35 | import hydra
36 | from hydra import compose, initialize
37 | from hydra.core.hydra_config import HydraConfig
38 | from omegaconf import DictConfig
39 | from isaacgymenvs.utils.reformat import omegaconf_to_dict
40 | from tasks.solo_parkour import SoloParkour
41 |
42 | isaacgym_task_map = {
43 | "SoloParkour": SoloParkour,
44 | }
45 |
46 | def make(
47 | seed: int,
48 | task: str,
49 | num_envs: int,
50 | sim_device: str,
51 | rl_device: str,
52 | graphics_device_id: int = -1,
53 | headless: bool = False,
54 | multi_gpu: bool = False,
55 | virtual_screen_capture: bool = False,
56 | force_render: bool = True,
57 | cfg: DictConfig = None
58 | ):
59 | # from isaacgymenvs.utils.rlgames_utils import get_rlgames_env_creator
60 | # create hydra config if no config passed in
61 | if cfg is None:
62 | # reset current hydra config if already parsed (but not passed in here)
63 | if HydraConfig.initialized():
64 | task = HydraConfig.get().runtime.choices['task']
65 | hydra.core.global_hydra.GlobalHydra.instance().clear()
66 |
67 | with initialize(config_path="./cfg"):
68 | cfg = compose(config_name="config", overrides=[f"task={task}"])
69 | cfg_dict = omegaconf_to_dict(cfg.task)
70 | cfg_dict['env']['numEnvs'] = num_envs
71 | # reuse existing config
72 | else:
73 | cfg_dict = omegaconf_to_dict(cfg.task)
74 |
75 | # Adding custom info to access within the env
76 | cfg_dict['test'] = cfg.test
77 | cfg_dict['horizon_length'] = cfg.train.params.config.horizon_length
78 | cfg_dict['max_epochs'] = cfg.train.params.config.max_epochs
79 |
80 | create_rlgpu_env = get_rlgames_env_creator(
81 | seed=seed,
82 | task_config=cfg_dict,
83 | task_name=cfg_dict["name"],
84 | sim_device=sim_device,
85 | rl_device=rl_device,
86 | graphics_device_id=graphics_device_id,
87 | headless=headless,
88 | multi_gpu=multi_gpu,
89 | virtual_screen_capture=virtual_screen_capture,
90 | force_render=force_render,
91 | )
92 | return create_rlgpu_env()
93 |
94 | def get_rlgames_env_creator(
95 | # used to create the vec task
96 | seed: int,
97 | task_config: dict,
98 | task_name: str,
99 | sim_device: str,
100 | rl_device: str,
101 | graphics_device_id: int,
102 | headless: bool,
103 | # used to handle multi-gpu case
104 | multi_gpu: bool = False,
105 | post_create_hook: Callable = None,
106 | virtual_screen_capture: bool = False,
107 | force_render: bool = False,
108 | ):
109 | """Parses the configuration parameters for the environment task and creates a VecTask
110 |
111 | Args:
112 | task_config: environment configuration.
113 | task_name: Name of the task, used to evaluate based on the imported name (eg 'Trifinger')
114 | sim_device: The type of env device, eg 'cuda:0'
115 | rl_device: Device that RL will be done on, eg 'cuda:0'
116 | graphics_device_id: Graphics device ID.
117 | headless: Whether to run in headless mode.
118 | multi_gpu: Whether to use multi gpu
119 | post_create_hook: Hooks to be called after environment creation.
120 | [Needed to setup WandB only for one of the RL Games instances when doing multiple GPUs]
121 | virtual_screen_capture: Set to True to allow the users get captured screen in RGB array via `env.render(mode='rgb_array')`.
122 | force_render: Set to True to always force rendering in the steps (if the `control_freq_inv` is greater than 1 we suggest stting this arg to True)
123 | Returns:
124 | A VecTaskPython object.
125 | """
126 | def create_rlgpu_env():
127 | """
128 | Creates the task from configurations and wraps it using RL-games wrappers if required.
129 | """
130 | if multi_gpu:
131 |
132 | local_rank = int(os.getenv("LOCAL_RANK", "0"))
133 | global_rank = int(os.getenv("RANK", "0"))
134 |
135 | # local rank of the GPU in a node
136 | local_rank = int(os.getenv("LOCAL_RANK", "0"))
137 | # global rank of the GPU
138 | global_rank = int(os.getenv("RANK", "0"))
139 | # total number of GPUs across all nodes
140 | world_size = int(os.getenv("WORLD_SIZE", "1"))
141 |
142 | print(f"global_rank = {global_rank} local_rank = {local_rank} world_size = {world_size}")
143 |
144 | _sim_device = f'cuda:{local_rank}'
145 | _rl_device = f'cuda:{local_rank}'
146 |
147 | task_config['rank'] = local_rank
148 | task_config['rl_device'] = _rl_device
149 | else:
150 | _sim_device = sim_device
151 | _rl_device = rl_device
152 |
153 | # create native task and pass custom config
154 | env = isaacgym_task_map[task_name](
155 | cfg=task_config,
156 | rl_device=_rl_device,
157 | sim_device=_sim_device,
158 | graphics_device_id=graphics_device_id,
159 | headless=headless,
160 | virtual_screen_capture=virtual_screen_capture,
161 | force_render=force_render,
162 | )
163 |
164 | if post_create_hook is not None:
165 | post_create_hook()
166 |
167 | return env
168 | return create_rlgpu_env
169 |
--------------------------------------------------------------------------------
/solo12/meshes/sphere.obj:
--------------------------------------------------------------------------------
1 | o Sphere
2 | v 0 1 0
3 | v 0 1 0
4 | v 0 1 0
5 | v 0 1 0
6 | v 0 1 0
7 | v 0 1 0
8 | v 0 1 0
9 | v 0 1 0
10 | v 0 1 0
11 | v -0.5 0.8660253882408142 0
12 | v -0.3535533845424652 0.8660253882408142 0.3535533845424652
13 | v -3.0616171314629196e-17 0.8660253882408142 0.5
14 | v 0.3535533845424652 0.8660253882408142 0.3535533845424652
15 | v 0.5 0.8660253882408142 6.123234262925839e-17
16 | v 0.3535533845424652 0.8660253882408142 -0.3535533845424652
17 | v 9.184850732644269e-17 0.8660253882408142 -0.5
18 | v -0.3535533845424652 0.8660253882408142 -0.3535533845424652
19 | v -0.5 0.8660253882408142 -1.2246468525851679e-16
20 | v -0.8660253882408142 0.5 0
21 | v -0.6123724579811096 0.5 0.6123724579811096
22 | v -5.302876236065149e-17 0.5 0.8660253882408142
23 | v 0.6123724579811096 0.5 0.6123724579811096
24 | v 0.8660253882408142 0.5 1.0605752472130298e-16
25 | v 0.6123724579811096 0.5 -0.6123724579811096
26 | v 1.5908628708195447e-16 0.5 -0.8660253882408142
27 | v -0.6123724579811096 0.5 -0.6123724579811096
28 | v -0.8660253882408142 0.5 -2.1211504944260596e-16
29 | v -1 6.123234262925839e-17 0
30 | v -0.7071067690849304 6.123234262925839e-17 0.7071067690849304
31 | v -6.123234262925839e-17 6.123234262925839e-17 1
32 | v 0.7071067690849304 6.123234262925839e-17 0.7071067690849304
33 | v 1 6.123234262925839e-17 1.2246468525851679e-16
34 | v 0.7071067690849304 6.123234262925839e-17 -0.7071067690849304
35 | v 1.8369701465288538e-16 6.123234262925839e-17 -1
36 | v -0.7071067690849304 6.123234262925839e-17 -0.7071067690849304
37 | v -1 6.123234262925839e-17 -2.4492937051703357e-16
38 | v -0.8660253882408142 -0.5 0
39 | v -0.6123724579811096 -0.5 0.6123724579811096
40 | v -5.302876236065149e-17 -0.5 0.8660253882408142
41 | v 0.6123724579811096 -0.5 0.6123724579811096
42 | v 0.8660253882408142 -0.5 1.0605752472130298e-16
43 | v 0.6123724579811096 -0.5 -0.6123724579811096
44 | v 1.5908628708195447e-16 -0.5 -0.8660253882408142
45 | v -0.6123724579811096 -0.5 -0.6123724579811096
46 | v -0.8660253882408142 -0.5 -2.1211504944260596e-16
47 | v -0.5 -0.8660253882408142 0
48 | v -0.3535533845424652 -0.8660253882408142 0.3535533845424652
49 | v -3.0616171314629196e-17 -0.8660253882408142 0.5
50 | v 0.3535533845424652 -0.8660253882408142 0.3535533845424652
51 | v 0.5 -0.8660253882408142 6.123234262925839e-17
52 | v 0.3535533845424652 -0.8660253882408142 -0.3535533845424652
53 | v 9.184850732644269e-17 -0.8660253882408142 -0.5
54 | v -0.3535533845424652 -0.8660253882408142 -0.3535533845424652
55 | v -0.5 -0.8660253882408142 -1.2246468525851679e-16
56 | v -1.2246468525851679e-16 -1 0
57 | v -8.659560603426554e-17 -1 8.659560603426554e-17
58 | v -7.498798786105971e-33 -1 1.2246468525851679e-16
59 | v 8.659560603426554e-17 -1 8.659560603426554e-17
60 | v 1.2246468525851679e-16 -1 1.4997597572211942e-32
61 | v 8.659560603426554e-17 -1 -8.659560603426554e-17
62 | v 2.2496396358317913e-32 -1 -1.2246468525851679e-16
63 | v -8.659560603426554e-17 -1 -8.659560603426554e-17
64 | v -1.2246468525851679e-16 -1 -2.9995195144423884e-32
65 | vt 0 1
66 | vt 0.125 1
67 | vt 0.25 1
68 | vt 0.375 1
69 | vt 0.5 1
70 | vt 0.625 1
71 | vt 0.75 1
72 | vt 0.875 1
73 | vt 1 1
74 | vt 0 0.8333333134651184
75 | vt 0.125 0.8333333134651184
76 | vt 0.25 0.8333333134651184
77 | vt 0.375 0.8333333134651184
78 | vt 0.5 0.8333333134651184
79 | vt 0.625 0.8333333134651184
80 | vt 0.75 0.8333333134651184
81 | vt 0.875 0.8333333134651184
82 | vt 1 0.8333333134651184
83 | vt 0 0.6666666865348816
84 | vt 0.125 0.6666666865348816
85 | vt 0.25 0.6666666865348816
86 | vt 0.375 0.6666666865348816
87 | vt 0.5 0.6666666865348816
88 | vt 0.625 0.6666666865348816
89 | vt 0.75 0.6666666865348816
90 | vt 0.875 0.6666666865348816
91 | vt 1 0.6666666865348816
92 | vt 0 0.5
93 | vt 0.125 0.5
94 | vt 0.25 0.5
95 | vt 0.375 0.5
96 | vt 0.5 0.5
97 | vt 0.625 0.5
98 | vt 0.75 0.5
99 | vt 0.875 0.5
100 | vt 1 0.5
101 | vt 0 0.3333333432674408
102 | vt 0.125 0.3333333432674408
103 | vt 0.25 0.3333333432674408
104 | vt 0.375 0.3333333432674408
105 | vt 0.5 0.3333333432674408
106 | vt 0.625 0.3333333432674408
107 | vt 0.75 0.3333333432674408
108 | vt 0.875 0.3333333432674408
109 | vt 1 0.3333333432674408
110 | vt 0 0.1666666716337204
111 | vt 0.125 0.1666666716337204
112 | vt 0.25 0.1666666716337204
113 | vt 0.375 0.1666666716337204
114 | vt 0.5 0.1666666716337204
115 | vt 0.625 0.1666666716337204
116 | vt 0.75 0.1666666716337204
117 | vt 0.875 0.1666666716337204
118 | vt 1 0.1666666716337204
119 | vt 0 0
120 | vt 0.125 0
121 | vt 0.25 0
122 | vt 0.375 0
123 | vt 0.5 0
124 | vt 0.625 0
125 | vt 0.75 0
126 | vt 0.875 0
127 | vt 1 0
128 | vn 0 1 0
129 | vn 0 1 0
130 | vn 0 1 0
131 | vn 0 1 0
132 | vn 0 1 0
133 | vn 0 1 0
134 | vn 0 1 0
135 | vn 0 1 0
136 | vn 0 1 0
137 | vn -0.5 0.8660253882408142 0
138 | vn -0.3535533845424652 0.8660253882408142 0.3535533845424652
139 | vn -3.0616171314629196e-17 0.8660253882408142 0.5
140 | vn 0.3535533845424652 0.8660253882408142 0.3535533845424652
141 | vn 0.5 0.8660253882408142 6.123234262925839e-17
142 | vn 0.3535533845424652 0.8660253882408142 -0.3535533845424652
143 | vn 9.184850732644269e-17 0.8660253882408142 -0.5
144 | vn -0.3535533845424652 0.8660253882408142 -0.3535533845424652
145 | vn -0.5 0.8660253882408142 -1.2246468525851679e-16
146 | vn -0.8660253882408142 0.5 0
147 | vn -0.6123724579811096 0.5 0.6123724579811096
148 | vn -5.302876236065149e-17 0.5 0.8660253882408142
149 | vn 0.6123724579811096 0.5 0.6123724579811096
150 | vn 0.8660253882408142 0.5 1.0605752472130298e-16
151 | vn 0.6123724579811096 0.5 -0.6123724579811096
152 | vn 1.5908628708195447e-16 0.5 -0.8660253882408142
153 | vn -0.6123724579811096 0.5 -0.6123724579811096
154 | vn -0.8660253882408142 0.5 -2.1211504944260596e-16
155 | vn -1 6.123234262925839e-17 0
156 | vn -0.7071067690849304 6.123234262925839e-17 0.7071067690849304
157 | vn -6.123234262925839e-17 6.123234262925839e-17 1
158 | vn 0.7071067690849304 6.123234262925839e-17 0.7071067690849304
159 | vn 1 6.123234262925839e-17 1.2246468525851679e-16
160 | vn 0.7071067690849304 6.123234262925839e-17 -0.7071067690849304
161 | vn 1.8369701465288538e-16 6.123234262925839e-17 -1
162 | vn -0.7071067690849304 6.123234262925839e-17 -0.7071067690849304
163 | vn -1 6.123234262925839e-17 -2.4492937051703357e-16
164 | vn -0.8660253882408142 -0.5 0
165 | vn -0.6123724579811096 -0.5 0.6123724579811096
166 | vn -5.302876236065149e-17 -0.5 0.8660253882408142
167 | vn 0.6123724579811096 -0.5 0.6123724579811096
168 | vn 0.8660253882408142 -0.5 1.0605752472130298e-16
169 | vn 0.6123724579811096 -0.5 -0.6123724579811096
170 | vn 1.5908628708195447e-16 -0.5 -0.8660253882408142
171 | vn -0.6123724579811096 -0.5 -0.6123724579811096
172 | vn -0.8660253882408142 -0.5 -2.1211504944260596e-16
173 | vn -0.5 -0.8660253882408142 0
174 | vn -0.3535533845424652 -0.8660253882408142 0.3535533845424652
175 | vn -3.0616171314629196e-17 -0.8660253882408142 0.5
176 | vn 0.3535533845424652 -0.8660253882408142 0.3535533845424652
177 | vn 0.5 -0.8660253882408142 6.123234262925839e-17
178 | vn 0.3535533845424652 -0.8660253882408142 -0.3535533845424652
179 | vn 9.184850732644269e-17 -0.8660253882408142 -0.5
180 | vn -0.3535533845424652 -0.8660253882408142 -0.3535533845424652
181 | vn -0.5 -0.8660253882408142 -1.2246468525851679e-16
182 | vn -1.2246468525851679e-16 -1 0
183 | vn -8.659560603426554e-17 -1 8.659560603426554e-17
184 | vn -7.498798786105971e-33 -1 1.2246468525851679e-16
185 | vn 8.659560603426554e-17 -1 8.659560603426554e-17
186 | vn 1.2246468525851679e-16 -1 1.4997597572211942e-32
187 | vn 8.659560603426554e-17 -1 -8.659560603426554e-17
188 | vn 2.2496396358317913e-32 -1 -1.2246468525851679e-16
189 | vn -8.659560603426554e-17 -1 -8.659560603426554e-17
190 | vn -1.2246468525851679e-16 -1 -2.9995195144423884e-32
191 | f 1/1/1 10/10/10 11/11/11
192 | f 2/2/2 11/11/11 12/12/12
193 | f 3/3/3 12/12/12 13/13/13
194 | f 4/4/4 13/13/13 14/14/14
195 | f 5/5/5 14/14/14 15/15/15
196 | f 6/6/6 15/15/15 16/16/16
197 | f 7/7/7 16/16/16 17/17/17
198 | f 8/8/8 17/17/17 18/18/18
199 | f 11/11/11 10/10/10 20/20/20
200 | f 10/10/10 19/19/19 20/20/20
201 | f 12/12/12 11/11/11 21/21/21
202 | f 11/11/11 20/20/20 21/21/21
203 | f 13/13/13 12/12/12 22/22/22
204 | f 12/12/12 21/21/21 22/22/22
205 | f 14/14/14 13/13/13 23/23/23
206 | f 13/13/13 22/22/22 23/23/23
207 | f 15/15/15 14/14/14 24/24/24
208 | f 14/14/14 23/23/23 24/24/24
209 | f 16/16/16 15/15/15 25/25/25
210 | f 15/15/15 24/24/24 25/25/25
211 | f 17/17/17 16/16/16 26/26/26
212 | f 16/16/16 25/25/25 26/26/26
213 | f 18/18/18 17/17/17 27/27/27
214 | f 17/17/17 26/26/26 27/27/27
215 | f 20/20/20 19/19/19 29/29/29
216 | f 19/19/19 28/28/28 29/29/29
217 | f 21/21/21 20/20/20 30/30/30
218 | f 20/20/20 29/29/29 30/30/30
219 | f 22/22/22 21/21/21 31/31/31
220 | f 21/21/21 30/30/30 31/31/31
221 | f 23/23/23 22/22/22 32/32/32
222 | f 22/22/22 31/31/31 32/32/32
223 | f 24/24/24 23/23/23 33/33/33
224 | f 23/23/23 32/32/32 33/33/33
225 | f 25/25/25 24/24/24 34/34/34
226 | f 24/24/24 33/33/33 34/34/34
227 | f 26/26/26 25/25/25 35/35/35
228 | f 25/25/25 34/34/34 35/35/35
229 | f 27/27/27 26/26/26 36/36/36
230 | f 26/26/26 35/35/35 36/36/36
231 | f 29/29/29 28/28/28 38/38/38
232 | f 28/28/28 37/37/37 38/38/38
233 | f 30/30/30 29/29/29 39/39/39
234 | f 29/29/29 38/38/38 39/39/39
235 | f 31/31/31 30/30/30 40/40/40
236 | f 30/30/30 39/39/39 40/40/40
237 | f 32/32/32 31/31/31 41/41/41
238 | f 31/31/31 40/40/40 41/41/41
239 | f 33/33/33 32/32/32 42/42/42
240 | f 32/32/32 41/41/41 42/42/42
241 | f 34/34/34 33/33/33 43/43/43
242 | f 33/33/33 42/42/42 43/43/43
243 | f 35/35/35 34/34/34 44/44/44
244 | f 34/34/34 43/43/43 44/44/44
245 | f 36/36/36 35/35/35 45/45/45
246 | f 35/35/35 44/44/44 45/45/45
247 | f 38/38/38 37/37/37 47/47/47
248 | f 37/37/37 46/46/46 47/47/47
249 | f 39/39/39 38/38/38 48/48/48
250 | f 38/38/38 47/47/47 48/48/48
251 | f 40/40/40 39/39/39 49/49/49
252 | f 39/39/39 48/48/48 49/49/49
253 | f 41/41/41 40/40/40 50/50/50
254 | f 40/40/40 49/49/49 50/50/50
255 | f 42/42/42 41/41/41 51/51/51
256 | f 41/41/41 50/50/50 51/51/51
257 | f 43/43/43 42/42/42 52/52/52
258 | f 42/42/42 51/51/51 52/52/52
259 | f 44/44/44 43/43/43 53/53/53
260 | f 43/43/43 52/52/52 53/53/53
261 | f 45/45/45 44/44/44 54/54/54
262 | f 44/44/44 53/53/53 54/54/54
263 | f 47/47/47 46/46/46 56/56/56
264 | f 48/48/48 47/47/47 57/57/57
265 | f 49/49/49 48/48/48 58/58/58
266 | f 50/50/50 49/49/49 59/59/59
267 | f 51/51/51 50/50/50 60/60/60
268 | f 52/52/52 51/51/51 61/61/61
269 | f 53/53/53 52/52/52 62/62/62
270 | f 54/54/54 53/53/53 63/63/63
271 |
--------------------------------------------------------------------------------
/algos/PPO.py:
--------------------------------------------------------------------------------
1 | import gym
2 |
3 | import os
4 | import random
5 | import time
6 | from datetime import datetime
7 |
8 | from omegaconf import DictConfig, OmegaConf
9 |
10 | import numpy as np
11 |
12 | import torch
13 | import torch.nn as nn
14 | import torch.optim as optim
15 | from torch.distributions.normal import Normal
16 | from torch.utils.tensorboard import SummaryWriter
17 |
18 | class RunningMeanStd(nn.Module):
19 | def __init__(self, shape = (), epsilon=1e-08):
20 | super(RunningMeanStd, self).__init__()
21 | self.register_buffer("running_mean", torch.zeros(shape))
22 | self.register_buffer("running_var", torch.ones(shape))
23 | self.register_buffer("count", torch.ones(()))
24 |
25 | self.epsilon = epsilon
26 |
27 | def forward(self, obs, update = True):
28 | if update:
29 | self.update(obs)
30 |
31 | return (obs - self.running_mean) / torch.sqrt(self.running_var + self.epsilon)
32 |
33 | def update(self, x):
34 | """Updates the mean, var and count from a batch of samples."""
35 | batch_mean = torch.mean(x, dim=0)
36 | batch_var = torch.var(x, correction=0, dim=0)
37 | batch_count = x.shape[0]
38 | self.update_from_moments(batch_mean, batch_var, batch_count)
39 |
40 | def update_from_moments(self, batch_mean, batch_var, batch_count):
41 | """Updates from batch mean, variance and count moments."""
42 | self.running_mean, self.running_var, self.count = update_mean_var_count_from_moments(
43 | self.running_mean, self.running_var, self.count, batch_mean, batch_var, batch_count
44 | )
45 |
46 | def update_mean_var_count_from_moments(
47 | mean, var, count, batch_mean, batch_var, batch_count
48 | ):
49 | """Updates the mean, var and count using the previous mean, var, count and batch values."""
50 | delta = batch_mean - mean
51 | tot_count = count + batch_count
52 |
53 | new_mean = mean + delta * batch_count / tot_count
54 | m_a = var * count
55 | m_b = batch_var * batch_count
56 | M2 = m_a + m_b + torch.square(delta) * count * batch_count / tot_count
57 | new_var = M2 / tot_count
58 | new_count = tot_count
59 |
60 | return new_mean, new_var, new_count
61 |
62 | def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
63 | torch.nn.init.orthogonal_(layer.weight, std)
64 | torch.nn.init.constant_(layer.bias, bias_const)
65 | return layer
66 |
67 | class Agent(nn.Module):
68 | def __init__(self, envs):
69 | super().__init__()
70 | self.critic = nn.Sequential(
71 | layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 512)),
72 | nn.ELU(),
73 | layer_init(nn.Linear(512, 256)),
74 | nn.ELU(),
75 | layer_init(nn.Linear(256, 128)),
76 | nn.ELU(),
77 | layer_init(nn.Linear(128, 1), std=1.0),
78 | )
79 | self.actor_mean = nn.Sequential(
80 | layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 512)),
81 | nn.ELU(),
82 | layer_init(nn.Linear(512, 256)),
83 | nn.ELU(),
84 | layer_init(nn.Linear(256, 128)),
85 | nn.ELU(),
86 | layer_init(nn.Linear(128, np.prod(envs.single_action_space.shape)), std=0.01),
87 | )
88 | self.actor_logstd = nn.Parameter(torch.zeros(1, np.prod(envs.single_action_space.shape)))
89 |
90 | self.obs_rms = RunningMeanStd(shape = envs.single_observation_space.shape)
91 | self.value_rms = RunningMeanStd(shape = ())
92 |
93 | def get_value(self, x):
94 | return self.critic(x)
95 |
96 | def get_action_and_value(self, x, action=None):
97 | action_mean = self.actor_mean(x)
98 | action_logstd = self.actor_logstd.expand_as(action_mean)
99 | action_std = torch.exp(action_logstd)
100 | probs = Normal(action_mean, action_std)
101 | if action is None:
102 | action = probs.sample()
103 | return action, probs.log_prob(action).sum(1), probs.entropy().sum(1), self.critic(x)
104 |
105 | class ExtractObsWrapper(gym.ObservationWrapper):
106 | def observation(self, obs):
107 | return obs["obs"]
108 |
109 | def PPO(cfg: DictConfig, envs):
110 | run_path = f"runs/{cfg['train']['params']['config']['name']}_{datetime.now().strftime('%d-%H-%M-%S')}"
111 |
112 | writer = SummaryWriter(run_path)
113 | if not os.path.exists(run_path):
114 | os.makedirs(run_path)
115 | OmegaConf.save(config = cfg, f = f"{run_path}/config.yaml")
116 |
117 | LEARNING_RATE = cfg["train"]["params"]["config"]["learning_rate"]
118 | NUM_STEPS = cfg["train"]["params"]["config"]["horizon_length"]
119 | NUM_ITERATIONS = cfg["train"]["params"]["config"]["max_epochs"]
120 | GAMMA = cfg["train"]["params"]["config"]["gamma"]
121 | GAE_LAMBDA = cfg["train"]["params"]["config"]["tau"]
122 | UPDATES_EPOCHS = cfg["train"]["params"]["config"]["mini_epochs"]
123 | MINIBATCH_SIZE = cfg["train"]["params"]["config"]["minibatch_size"]
124 | CLIP_COEF = cfg["train"]["params"]["config"]["e_clip"]
125 | ENT_COEF = cfg["train"]["params"]["config"]["entropy_coef"]
126 | VF_COEF = cfg["train"]["params"]["config"]["critic_coef"]
127 | MAX_GRAD_NORM = cfg["train"]["params"]["config"]["grad_norm"]
128 | NORM_ADV = cfg["train"]["params"]["config"]["normalize_advantage"]
129 | CLIP_VLOSS = cfg["train"]["params"]["config"]["clip_value"]
130 | ANNEAL_LR = True
131 |
132 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
133 |
134 | envs.single_action_space = envs.action_space
135 | envs.single_observation_space = envs.observation_space
136 |
137 | envs = ExtractObsWrapper(envs)
138 |
139 | BATCH_SIZE = int(envs.num_envs * NUM_STEPS)
140 |
141 | agent = Agent(envs).to(device)
142 | optimizer = optim.Adam(agent.parameters(), lr=LEARNING_RATE, eps=1e-5)
143 |
144 | obs = torch.zeros((NUM_STEPS, envs.num_envs) + envs.single_observation_space.shape, dtype=torch.float).to(device)
145 | actions = torch.zeros((NUM_STEPS, envs.num_envs) + envs.single_action_space.shape, dtype=torch.float).to(device)
146 | logprobs = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device)
147 | rewards = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device)
148 | dones = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device)
149 | true_dones = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device)
150 | values = torch.zeros((NUM_STEPS, envs.num_envs), dtype=torch.float).to(device)
151 | advantages = torch.zeros_like(rewards, dtype=torch.float).to(device)
152 |
153 | # TRY NOT TO MODIFY: start the game
154 | global_step = 0
155 | start_time = time.time()
156 | next_obs = envs.reset()
157 | next_obs = agent.obs_rms(next_obs)
158 | next_done = torch.zeros(envs.num_envs, dtype=torch.float).to(device)
159 | next_true_done = torch.zeros(envs.num_envs, dtype=torch.float).to(device)
160 |
161 | for iteration in range(1, NUM_ITERATIONS + 1):
162 | if ANNEAL_LR:
163 | frac = 1.0 - (iteration - 1.0) / NUM_ITERATIONS
164 | lrnow = frac * LEARNING_RATE
165 | optimizer.param_groups[0]["lr"] = lrnow
166 |
167 | for step in range(0, NUM_STEPS):
168 | global_step += envs.num_envs
169 | obs[step] = next_obs
170 | dones[step] = next_done
171 | true_dones[step] = next_true_done
172 |
173 | # ALGO LOGIC: action logic
174 | with torch.no_grad():
175 | action, logprob, _, value = agent.get_action_and_value(next_obs)
176 | values[step] = value.flatten()
177 | actions[step] = action
178 | logprobs[step] = logprob
179 |
180 | # TRY NOT TO MODIFY: execute the game and log data.
181 | next_obs, rewards[step], next_done, info = envs.step(action)
182 | next_obs = agent.obs_rms(next_obs)
183 | next_true_done = info["true_dones"].float()
184 | if "time_outs" in info:
185 | if info["time_outs"].any():
186 | print("time outs", info["time_outs"].sum())
187 | exit(0)
188 |
189 | for el, v in zip(list(envs.episode_sums.keys())[:envs.numRewards], (torch.mean(envs.rew_mean_reset, dim=0)).tolist()):
190 | writer.add_scalar(f"reward/{el}", v, iteration)
191 | writer.add_scalar(f"reward/cum_rew", (torch.sum(torch.mean(envs.rew_cum_reset, dim=0))).item(), iteration)
192 | writer.add_scalar(f"reward/avg_rew", envs.terrain_levels.float().mean().item(), iteration)
193 | for el, v in zip(envs.cstr_manager.get_names(), (100.0 * torch.mean(envs.cstr_mean_reset, dim=0)).tolist()):
194 | writer.add_scalar(f"cstr/{el}", v, iteration)
195 |
196 | # CaT: must compute the CaT quantity
197 | not_dones = 1.0 - dones
198 | rewards *= not_dones
199 |
200 | # bootstrap value if not done
201 | with torch.no_grad():
202 | next_value = agent.get_value(next_obs).reshape(1, -1)
203 | advantages = torch.zeros_like(rewards).to(device)
204 | lastgaelam = 0
205 | for t in reversed(range(NUM_STEPS)):
206 | if t == NUM_STEPS - 1:
207 | nextnonterminal = 1.0 - next_done
208 | true_nextnonterminal = 1 - next_true_done
209 | nextvalues = next_value
210 | else:
211 | nextnonterminal = 1.0 - dones[t + 1]
212 | true_nextnonterminal = 1 - true_dones[t + 1]
213 | nextvalues = values[t + 1]
214 | delta = rewards[t] + GAMMA * nextvalues * nextnonterminal * true_nextnonterminal - values[t]
215 | advantages[t] = lastgaelam = delta + GAMMA * GAE_LAMBDA * nextnonterminal * true_nextnonterminal * lastgaelam
216 | returns = advantages + values
217 |
218 | # flatten the batch
219 | b_obs = obs.reshape((-1,) + envs.single_observation_space.shape)
220 | b_logprobs = logprobs.reshape(-1)
221 | b_actions = actions.reshape((-1,) + envs.single_action_space.shape)
222 | b_advantages = advantages.reshape(-1)
223 | b_returns = returns.reshape(-1)
224 | b_values = values.reshape(-1)
225 |
226 | b_values = agent.value_rms(b_values)
227 | b_returns = agent.value_rms(b_returns)
228 |
229 | # Optimizing the policy and value network
230 | clipfracs = []
231 | for epoch in range(UPDATES_EPOCHS):
232 | b_inds = torch.randperm(BATCH_SIZE, device=device)
233 | for start in range(0, BATCH_SIZE, MINIBATCH_SIZE):
234 | end = start + MINIBATCH_SIZE
235 | mb_inds = b_inds[start:end]
236 |
237 | _, newlogprob, entropy, newvalue = agent.get_action_and_value(b_obs[mb_inds], b_actions[mb_inds])
238 | logratio = newlogprob - b_logprobs[mb_inds]
239 | ratio = logratio.exp()
240 |
241 | with torch.no_grad():
242 | # calculate approx_kl http://joschu.net/blog/kl-approx.html
243 | old_approx_kl = (-logratio).mean()
244 | approx_kl = ((ratio - 1) - logratio).mean()
245 | clipfracs += [((ratio - 1.0).abs() > CLIP_COEF).float().mean().item()]
246 |
247 | mb_advantages = b_advantages[mb_inds]
248 | if NORM_ADV:
249 | mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8)
250 |
251 | # Policy loss
252 | pg_loss1 = -mb_advantages * ratio
253 | pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - CLIP_COEF, 1 + CLIP_COEF)
254 | pg_loss = torch.max(pg_loss1, pg_loss2).mean()
255 |
256 | # Value loss
257 | newvalue = newvalue.view(-1)
258 | newvalue = agent.value_rms(newvalue, update = False)
259 | if CLIP_VLOSS:
260 | v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2
261 | v_clipped = b_values[mb_inds] + torch.clamp(
262 | newvalue - b_values[mb_inds],
263 | -CLIP_COEF,
264 | CLIP_COEF,
265 | )
266 | v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2
267 | v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped)
268 | v_loss = 0.5 * v_loss_max.mean()
269 | else:
270 | v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean()
271 |
272 | entropy_loss = entropy.mean()
273 | loss = pg_loss - ENT_COEF * entropy_loss + v_loss * VF_COEF
274 |
275 | optimizer.zero_grad()
276 | loss.backward()
277 | nn.utils.clip_grad_norm_(agent.parameters(), MAX_GRAD_NORM)
278 | optimizer.step()
279 |
280 | if (iteration + 1) % 24 == 0:
281 | model_path = f"{run_path}/cleanrl_model.pt"
282 | torch.save(agent.state_dict(), model_path)
283 | print("Saved model")
284 |
285 | def eval_PPO(cfg: DictConfig, envs):
286 | checkpoint = cfg["checkpoint"]
287 | actor_sd = torch.load(checkpoint)
288 |
289 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
290 |
291 | envs.single_action_space = envs.action_space
292 | envs.single_observation_space = envs.observation_space
293 |
294 | envs = ExtractObsWrapper(envs)
295 |
296 | actor = Agent(envs).to(device)
297 | actor.load_state_dict(actor_sd)
298 |
299 | obs = envs.reset()
300 |
301 | for _ in range(2000):
302 | with torch.no_grad():
303 | actions, _, _, _ = actor.get_action_and_value(actor.obs_rms(obs, update = False))
304 |
305 | next_obs, rewards, terminations, infos = envs.step(actions)
306 | obs = next_obs
307 |
--------------------------------------------------------------------------------
/solo12/meshes/cylinder.obj:
--------------------------------------------------------------------------------
1 | o Cylinder
2 | v 0 -0.998248581950179 0.5034876052441187
3 | v 0 -1.0017392333654023 -0.4965063024136719
4 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156
5 | v 0 -1.0017392333654023 -0.4965063024136719
6 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749
7 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156
8 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156
9 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749
10 | v 1 0.00174532570761165 0.4999969538288953
11 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749
12 | v 1 -0.0017453257076117724 -0.4999969538288953
13 | v 1 0.00174532570761165 0.4999969538288953
14 | v 1 0.00174532570761165 0.4999969538288953
15 | v 1 -0.0017453257076117724 -0.4999969538288953
16 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749
17 | v 1 -0.0017453257076117724 -0.4999969538288953
18 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156
19 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749
20 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749
21 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156
22 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719
23 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156
24 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187
25 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719
26 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719
27 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187
28 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749
29 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187
30 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156
31 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749
32 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749
33 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156
34 | v -1 0.001745325707611895 0.4999969538288953
35 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156
36 | v -1 -0.0017453257076115276 -0.4999969538288953
37 | v -1 0.001745325707611895 0.4999969538288953
38 | v -1 0.001745325707611895 0.4999969538288953
39 | v -1 -0.0017453257076115276 -0.4999969538288953
40 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156
41 | v -1 -0.0017453257076115276 -0.4999969538288953
42 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749
43 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156
44 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156
45 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749
46 | v -2.4492937051703357e-16 -0.998248581950179 0.5034876052441187
47 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749
48 | v -2.4492937051703357e-16 -1.0017392333654023 -0.4965063024136719
49 | v -2.4492937051703357e-16 -0.998248581950179 0.5034876052441187
50 | v 0 -0.998248581950179 0.5034876052441187
51 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156
52 | v 0 0.0017453257076117112 0.4999969538288953
53 | v 0.7071067690849304 -0.705357135440903 0.5024652170731156
54 | v 1 0.00174532570761165 0.4999969538288953
55 | v 0 0.0017453257076117112 0.4999969538288953
56 | v 1 0.00174532570761165 0.4999969538288953
57 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749
58 | v 0 0.0017453257076117112 0.4999969538288953
59 | v 0.7071067690849304 0.7088477868561263 0.4975286905846749
60 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719
61 | v 0 0.0017453257076117112 0.4999969538288953
62 | v 1.2246468525851679e-16 1.0017392333654023 0.4965063024136719
63 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749
64 | v 0 0.0017453257076117112 0.4999969538288953
65 | v -0.7071067690849304 0.7088477868561263 0.4975286905846749
66 | v -1 0.001745325707611895 0.4999969538288953
67 | v 0 0.0017453257076117112 0.4999969538288953
68 | v -1 0.001745325707611895 0.4999969538288953
69 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156
70 | v 0 0.0017453257076117112 0.4999969538288953
71 | v -0.7071067690849304 -0.705357135440903 0.5024652170731156
72 | v -2.4492937051703357e-16 -0.998248581950179 0.5034876052441187
73 | v 0 0.0017453257076117112 0.4999969538288953
74 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749
75 | v 0 -1.0017392333654023 -0.4965063024136719
76 | v 0 -0.0017453257076117112 -0.4999969538288953
77 | v 1 -0.0017453257076117724 -0.4999969538288953
78 | v 0.7071067690849304 -0.7088477868561263 -0.4975286905846749
79 | v 0 -0.0017453257076117112 -0.4999969538288953
80 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156
81 | v 1 -0.0017453257076117724 -0.4999969538288953
82 | v 0 -0.0017453257076117112 -0.4999969538288953
83 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187
84 | v 0.7071067690849304 0.705357135440903 -0.5024652170731156
85 | v 0 -0.0017453257076117112 -0.4999969538288953
86 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156
87 | v 1.2246468525851679e-16 0.998248581950179 -0.5034876052441187
88 | v 0 -0.0017453257076117112 -0.4999969538288953
89 | v -1 -0.0017453257076115276 -0.4999969538288953
90 | v -0.7071067690849304 0.705357135440903 -0.5024652170731156
91 | v 0 -0.0017453257076117112 -0.4999969538288953
92 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749
93 | v -1 -0.0017453257076115276 -0.4999969538288953
94 | v 0 -0.0017453257076117112 -0.4999969538288953
95 | v -2.4492937051703357e-16 -1.0017392333654023 -0.4965063024136719
96 | v -0.7071067690849304 -0.7088477868561263 -0.4975286905846749
97 | v 0 -0.0017453257076117112 -0.4999969538288953
98 | vt 0 1
99 | vt 0 0
100 | vt 0.125 1
101 | vt 0 0
102 | vt 0.125 0
103 | vt 0.125 1
104 | vt 0.125 1
105 | vt 0.125 0
106 | vt 0.25 1
107 | vt 0.125 0
108 | vt 0.25 0
109 | vt 0.25 1
110 | vt 0.25 1
111 | vt 0.25 0
112 | vt 0.375 1
113 | vt 0.25 0
114 | vt 0.375 0
115 | vt 0.375 1
116 | vt 0.375 1
117 | vt 0.375 0
118 | vt 0.5 1
119 | vt 0.375 0
120 | vt 0.5 0
121 | vt 0.5 1
122 | vt 0.5 1
123 | vt 0.5 0
124 | vt 0.625 1
125 | vt 0.5 0
126 | vt 0.625 0
127 | vt 0.625 1
128 | vt 0.625 1
129 | vt 0.625 0
130 | vt 0.75 1
131 | vt 0.625 0
132 | vt 0.75 0
133 | vt 0.75 1
134 | vt 0.75 1
135 | vt 0.75 0
136 | vt 0.875 1
137 | vt 0.75 0
138 | vt 0.875 0
139 | vt 0.875 1
140 | vt 0.875 1
141 | vt 0.875 0
142 | vt 1 1
143 | vt 0.875 0
144 | vt 1 0
145 | vt 1 1
146 | vt 1 0.5
147 | vt 0.8535534143447876 0.8535534143447876
148 | vt 0.5 0.5
149 | vt 0.8535534143447876 0.8535534143447876
150 | vt 0.5 1
151 | vt 0.5 0.5
152 | vt 0.5 1
153 | vt 0.1464466154575348 0.8535534143447876
154 | vt 0.5 0.5
155 | vt 0.1464466154575348 0.8535534143447876
156 | vt 0 0.5
157 | vt 0.5 0.5
158 | vt 0 0.5
159 | vt 0.1464466154575348 0.1464466154575348
160 | vt 0.5 0.5
161 | vt 0.1464466154575348 0.1464466154575348
162 | vt 0.5 0
163 | vt 0.5 0.5
164 | vt 0.5 0
165 | vt 0.8535534143447876 0.1464466154575348
166 | vt 0.5 0.5
167 | vt 0.8535534143447876 0.1464466154575348
168 | vt 1 0.5
169 | vt 0.5 0.5
170 | vt 0.8535534143447876 0.1464466154575348
171 | vt 1 0.5
172 | vt 0.5 0.5
173 | vt 0.5 0
174 | vt 0.8535534143447876 0.1464466154575348
175 | vt 0.5 0.5
176 | vt 0.1464466154575348 0.1464466154575348
177 | vt 0.5 0
178 | vt 0.5 0.5
179 | vt 0 0.5
180 | vt 0.1464466154575348 0.1464466154575348
181 | vt 0.5 0.5
182 | vt 0.1464466154575348 0.8535534143447876
183 | vt 0 0.5
184 | vt 0.5 0.5
185 | vt 0.5 1
186 | vt 0.1464466154575348 0.8535534143447876
187 | vt 0.5 0.5
188 | vt 0.8535534143447876 0.8535534143447876
189 | vt 0.5 1
190 | vt 0.5 0.5
191 | vt 1 0.5
192 | vt 0.8535534143447876 0.8535534143447876
193 | vt 0.5 0.5
194 | vn 0 -0.9999939076577902 0.0034906514152234207
195 | vn 0 -0.9999939076577902 0.0034906514152234207
196 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373
197 | vn 0 -0.9999939076577902 0.0034906514152234207
198 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373
199 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373
200 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373
201 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373
202 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19
203 | vn 0.7071067690849304 -0.7071024611485143 0.002468263244220373
204 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19
205 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19
206 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19
207 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19
208 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373
209 | vn 1 -6.123196958087279e-17 2.1374076345626621e-19
210 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373
211 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373
212 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373
213 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373
214 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207
215 | vn 0.7071067690849304 0.7071024611485143 -0.002468263244220373
216 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207
217 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207
218 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207
219 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207
220 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373
221 | vn 1.2246468525851679e-16 0.9999939076577902 -0.0034906514152234207
222 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373
223 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373
224 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373
225 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373
226 | vn -1 1.836958955078092e-16 -6.412222441704118e-19
227 | vn -0.7071067690849304 0.7071024611485143 -0.002468263244220373
228 | vn -1 1.836958955078092e-16 -6.412222441704118e-19
229 | vn -1 1.836958955078092e-16 -6.412222441704118e-19
230 | vn -1 1.836958955078092e-16 -6.412222441704118e-19
231 | vn -1 1.836958955078092e-16 -6.412222441704118e-19
232 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373
233 | vn -1 1.836958955078092e-16 -6.412222441704118e-19
234 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373
235 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373
236 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373
237 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373
238 | vn -2.4492937051703357e-16 -0.9999939076577902 0.0034906514152234207
239 | vn -0.7071067690849304 -0.7071024611485143 0.002468263244220373
240 | vn -2.4492937051703357e-16 -0.9999939076577902 0.0034906514152234207
241 | vn -2.4492937051703357e-16 -0.9999939076577902 0.0034906514152234207
242 | vn 0 0.0034906514152234207 0.9999939076577902
243 | vn 0 0.0034906514152234207 0.9999939076577902
244 | vn 0 0.0034906514152234207 0.9999939076577902
245 | vn 0 0.0034906514152234207 0.9999939076577902
246 | vn 0 0.0034906514152234207 0.9999939076577902
247 | vn 0 0.0034906514152234207 0.9999939076577902
248 | vn 0 0.0034906514152234207 0.9999939076577902
249 | vn 0 0.0034906514152234207 0.9999939076577902
250 | vn 0 0.0034906514152234207 0.9999939076577902
251 | vn 0 0.0034906514152234207 0.9999939076577902
252 | vn 0 0.0034906514152234207 0.9999939076577902
253 | vn 0 0.0034906514152234207 0.9999939076577902
254 | vn 0 0.0034906514152234207 0.9999939076577902
255 | vn 0 0.0034906514152234207 0.9999939076577902
256 | vn 0 0.0034906514152234207 0.9999939076577902
257 | vn 0 0.0034906514152234207 0.9999939076577902
258 | vn 0 0.0034906514152234207 0.9999939076577902
259 | vn 0 0.0034906514152234207 0.9999939076577902
260 | vn 0 0.0034906514152234207 0.9999939076577902
261 | vn 0 0.0034906514152234207 0.9999939076577902
262 | vn 0 0.0034906514152234207 0.9999939076577902
263 | vn 0 0.0034906514152234207 0.9999939076577902
264 | vn 0 0.0034906514152234207 0.9999939076577902
265 | vn 0 0.0034906514152234207 0.9999939076577902
266 | vn 0 -0.0034906514152234207 -0.9999939076577902
267 | vn 0 -0.0034906514152234207 -0.9999939076577902
268 | vn 0 -0.0034906514152234207 -0.9999939076577902
269 | vn 0 -0.0034906514152234207 -0.9999939076577902
270 | vn 0 -0.0034906514152234207 -0.9999939076577902
271 | vn 0 -0.0034906514152234207 -0.9999939076577902
272 | vn 0 -0.0034906514152234207 -0.9999939076577902
273 | vn 0 -0.0034906514152234207 -0.9999939076577902
274 | vn 0 -0.0034906514152234207 -0.9999939076577902
275 | vn 0 -0.0034906514152234207 -0.9999939076577902
276 | vn 0 -0.0034906514152234207 -0.9999939076577902
277 | vn 0 -0.0034906514152234207 -0.9999939076577902
278 | vn 0 -0.0034906514152234207 -0.9999939076577902
279 | vn 0 -0.0034906514152234207 -0.9999939076577902
280 | vn 0 -0.0034906514152234207 -0.9999939076577902
281 | vn 0 -0.0034906514152234207 -0.9999939076577902
282 | vn 0 -0.0034906514152234207 -0.9999939076577902
283 | vn 0 -0.0034906514152234207 -0.9999939076577902
284 | vn 0 -0.0034906514152234207 -0.9999939076577902
285 | vn 0 -0.0034906514152234207 -0.9999939076577902
286 | vn 0 -0.0034906514152234207 -0.9999939076577902
287 | vn 0 -0.0034906514152234207 -0.9999939076577902
288 | vn 0 -0.0034906514152234207 -0.9999939076577902
289 | vn 0 -0.0034906514152234207 -0.9999939076577902
290 | f 1/1/1 2/2/2 3/3/3
291 | f 4/4/4 5/5/5 6/6/6
292 | f 7/7/7 8/8/8 9/9/9
293 | f 10/10/10 11/11/11 12/12/12
294 | f 13/13/13 14/14/14 15/15/15
295 | f 16/16/16 17/17/17 18/18/18
296 | f 19/19/19 20/20/20 21/21/21
297 | f 22/22/22 23/23/23 24/24/24
298 | f 25/25/25 26/26/26 27/27/27
299 | f 28/28/28 29/29/29 30/30/30
300 | f 31/31/31 32/32/32 33/33/33
301 | f 34/34/34 35/35/35 36/36/36
302 | f 37/37/37 38/38/38 39/39/39
303 | f 40/40/40 41/41/41 42/42/42
304 | f 43/43/43 44/44/44 45/45/45
305 | f 46/46/46 47/47/47 48/48/48
306 | f 49/49/49 50/50/50 51/51/51
307 | f 52/52/52 53/53/53 54/54/54
308 | f 55/55/55 56/56/56 57/57/57
309 | f 58/58/58 59/59/59 60/60/60
310 | f 61/61/61 62/62/62 63/63/63
311 | f 64/64/64 65/65/65 66/66/66
312 | f 67/67/67 68/68/68 69/69/69
313 | f 70/70/70 71/71/71 72/72/72
314 | f 73/73/73 74/74/74 75/75/75
315 | f 76/76/76 77/77/77 78/78/78
316 | f 79/79/79 80/80/80 81/81/81
317 | f 82/82/82 83/83/83 84/84/84
318 | f 85/85/85 86/86/86 87/87/87
319 | f 88/88/88 89/89/89 90/90/90
320 | f 91/91/91 92/92/92 93/93/93
321 | f 94/94/94 95/95/95 96/96/96
322 |
--------------------------------------------------------------------------------
/tasks/terrainParkour.py:
--------------------------------------------------------------------------------
1 | ####################
2 | # Terrain generator
3 | ####################
4 | import numpy as np
5 |
6 | from isaacgym.terrain_utils import *
7 |
8 | class Terrain:
9 | """Class to handle terrain creation, usually a bunch of subterrains surrounding by a flat border area.
10 |
11 | Subterrains are spread on a given number of columns and rows. Each column might correspond to a given
12 | type of subterrains (slope, stairs, ...) and each row to a given difficulty (from easiest to hardest).
13 | """
14 |
15 | def __init__(self, cfg, num_robots) -> None:
16 |
17 | self.type = cfg["terrainType"]
18 | if self.type in ["none", 'plane']:
19 | return
20 |
21 | # Create subterrains on the fly based on Isaac subterrain primitives
22 |
23 | # Retrieving proportions of each kind of subterrains
24 | keys = list(cfg["terrainProportions"].keys())
25 | vals = list(cfg["terrainProportions"].values())
26 | self.terrain_keys = []
27 | self.terrain_proportions = []
28 | sum = 0.0
29 | for key, val in zip(keys, vals):
30 | if val != 0.0:
31 | sum += float(val)
32 | self.terrain_keys.append(key)
33 | self.terrain_proportions.append(np.round(sum, 2))
34 |
35 | self.horizontal_scale = 0.05 # Resolution of the terrain height map
36 | self.border_size = 8.0 # Size of the flat border area all around the terrains
37 | self.env_length = cfg["mapLength"] # Length of subterrains
38 | self.env_width = cfg["mapWidth"] # Width of subterrains
39 | self.env_rows = cfg["numLevels"]
40 | self.env_cols = cfg["numTerrains"]
41 | self.num_maps = self.env_rows * self.env_cols
42 | self.num_per_env = int(num_robots / self.num_maps)
43 | self.env_origins = np.zeros((self.env_rows, self.env_cols, 3))
44 |
45 | # Number of height map cells for each subterrain in width and length
46 | self.width_per_env_pixels = int(self.env_width / self.horizontal_scale)
47 | self.length_per_env_pixels = int(self.env_length / self.horizontal_scale)
48 |
49 | self.border = int(self.border_size / self.horizontal_scale)
50 | self.tot_cols = int(self.env_cols * self.width_per_env_pixels) + 2 * self.border
51 | self.tot_rows = int(self.env_rows * self.length_per_env_pixels) + 2 * self.border
52 | self.height_field_raw = np.zeros((self.tot_rows , self.tot_cols), dtype=np.float32)
53 | self.ceilings = np.zeros((self.tot_rows , self.tot_cols), dtype=np.float32)
54 |
55 | self.boxes = []
56 | self.vertical_scale = 0.005
57 | if cfg["curriculum"]:
58 | self.curiculum(num_robots, num_terrains=self.env_cols, num_levels=self.env_rows)
59 | else:
60 | self.curiculum(num_robots, num_terrains=self.env_cols, num_levels=self.env_rows)
61 | #self.randomized_terrain()
62 | self.vertices, self.triangles = convert_heightfield_to_trimesh(self.height_field_raw, self.horizontal_scale, self.vertical_scale, cfg["slopeTreshold"])
63 | if len(self.boxes) > 0:
64 | for box in self.boxes:
65 | self.vertices, self.triangles = combine_trimeshes((self.vertices, self.triangles), box)
66 | self.heightsamples = self.height_field_raw
67 |
68 | def randomized_terrain(self):
69 | """Spawn random subterrain without ordering them by type or difficulty
70 | according to their rows and columns"""
71 |
72 | for k in range(self.num_maps):
73 | # Env coordinates in the world
74 | (i, j) = np.unravel_index(k, (self.env_rows, self.env_cols))
75 |
76 | # Heightfield coordinate system from now on
77 | start_x = self.border + i * self.length_per_env_pixels
78 | end_x = self.border + (i + 1) * self.length_per_env_pixels
79 | start_y = self.border + j * self.width_per_env_pixels
80 | end_y = self.border + (j + 1) * self.width_per_env_pixels
81 |
82 | terrain = SubTerrain("terrain",
83 | width=self.width_per_env_pixels,
84 | length=self.width_per_env_pixels,
85 | vertical_scale=self.vertical_scale,
86 | horizontal_scale=self.horizontal_scale)
87 | choice = np.random.uniform(0, 1)
88 | if choice < 0.1:
89 | if np.random.choice([0, 1]):
90 | pyramid_sloped_terrain(terrain, np.random.choice([-0.3, -0.2, 0, 0.2, 0.3]))
91 | random_uniform_terrain(terrain, min_height=-0.1, max_height=0.1, step=0.05, downsampled_scale=0.2)
92 | else:
93 | pyramid_sloped_terrain(terrain, np.random.choice([-0.3, -0.2, 0, 0.2, 0.3]))
94 | elif choice < 0.6:
95 | # step_height = np.random.choice([-0.18, -0.15, -0.1, -0.05, 0.05, 0.1, 0.15, 0.18])
96 | step_height = np.random.choice([-0.15, 0.15])
97 | pyramid_stairs_terrain(terrain, step_width=0.31, step_height=step_height, platform_size=3.)
98 | elif choice < 1.:
99 | discrete_obstacles_terrain(terrain, 0.15, 1., 2., 40, platform_size=3.)
100 |
101 | self.height_field_raw[start_x: end_x, start_y:end_y] = terrain.height_field_raw
102 |
103 | env_origin_x = (i + 0.5) * self.env_length
104 | env_origin_y = (j + 0.5) * self.env_width
105 | x1 = int((self.env_length / 2. - 1) / self.horizontal_scale)
106 | x2 = int((self.env_length / 2. + 1) / self.horizontal_scale)
107 | y1 = int((self.env_width / 2. - 1) / self.horizontal_scale)
108 | y2 = int((self.env_width / 2. + 1) / self.horizontal_scale)
109 | env_origin_z = np.max(terrain.height_field_raw[x1:x2, y1:y2])*self.vertical_scale
110 | self.env_origins[i, j] = [env_origin_x, env_origin_y, env_origin_z]
111 |
112 | def curiculum(self, num_robots, num_terrains, num_levels):
113 | """Spawn subterrain ordering them by type (one type per column)
114 | and by difficulty (first row is easiest, last one is hardest)
115 | """
116 |
117 | num_robots_per_map = int(num_robots / num_terrains)
118 | left_over = num_robots % num_terrains
119 | idx = 0
120 | for j in range(num_terrains):
121 | for i in range(num_levels):
122 | terrain = SubTerrain("terrain",
123 | length=self.length_per_env_pixels, # seems to be inverted
124 | width=self.width_per_env_pixels,
125 | vertical_scale=self.vertical_scale,
126 | horizontal_scale=self.horizontal_scale)
127 | difficulty = i / (num_levels-1.0)
128 | choice = j / num_terrains
129 | lava_depth=-np.random.uniform(0.7, 1.3)
130 | boxes=None
131 | ceiling = 0.4
132 |
133 | k = 0
134 | while k < len(self.terrain_proportions) and choice >= self.terrain_proportions[k]:
135 | k += 1
136 | if k == len(self.terrain_proportions):
137 | # The sum of terrain proportions is not >= 1
138 | # Defaulting to flat ground
139 | continue
140 |
141 | if self.terrain_keys[k] == "gap_parkour":
142 | gap_length = 0.15 + i*0.05
143 | gap_length = np.round(gap_length, 2)
144 |
145 | gap_parkour(
146 | terrain,
147 | lava_depth=lava_depth,
148 | gap_length=gap_length,
149 | gap_platform_height=0.1
150 | )
151 | add_roughness(terrain, np.random.uniform(0.01, 0.03))
152 | elif self.terrain_keys[k] == "jump_parkour":
153 | height = 0.05 + 0.37*difficulty
154 | jump_parkour(
155 | terrain,
156 | lava_depth=lava_depth,
157 | height=height,
158 | )
159 | add_roughness(terrain, np.random.uniform(0.01, 0.03))
160 |
161 | elif self.terrain_keys[k] == "stairs_parkour":
162 | stairs_parkour(
163 | terrain,
164 | lava_depth=lava_depth,
165 | height=0.02 + 0.18*difficulty,
166 |
167 | )
168 | add_roughness(terrain, np.random.uniform(0.01, 0.03))
169 |
170 | elif self.terrain_keys[k] == "hurdle_parkour":
171 | hurdle_parkour(
172 | terrain,
173 | lava_depth=lava_depth,
174 | height=0.05 + 0.3*difficulty,
175 | )
176 | add_roughness(terrain, np.random.uniform(0.01, 0.03))
177 | elif self.terrain_keys[k] == "crawl_parkour":
178 | ceiling = 0.34 - 0.08*difficulty
179 | boxes = crawl_parkour(
180 | terrain,
181 | lava_depth=lava_depth,
182 | height=ceiling
183 | )
184 | add_roughness(terrain, np.random.uniform(0.01, 0.03))
185 | elif self.terrain_keys[k] == "random_uniform":
186 | add_roughness(terrain, np.random.uniform(0.01, 0.03))
187 |
188 | else:
189 | # Flat ground
190 | pass
191 |
192 | # Heightfield coordinate system
193 | start_x = self.border + i * self.length_per_env_pixels
194 | end_x = self.border + (i + 1) * self.length_per_env_pixels
195 | start_y = self.border + j * self.width_per_env_pixels
196 | end_y = self.border + (j + 1) * self.width_per_env_pixels
197 | self.height_field_raw[start_x: end_x, start_y:end_y] = terrain.height_field_raw.transpose() # Seems that width and length are inverter in SubTerrain
198 |
199 | if boxes is not None:
200 | for box in boxes:
201 | box = move_trimesh(box, np.array([[
202 | self.border_size+i*self.env_length, self.border_size + (j+0.5) * self.env_width, 0.0
203 | ]]))
204 | self.boxes.append(box)
205 |
206 | robots_in_map = num_robots_per_map
207 | if j < left_over:
208 | robots_in_map +=1
209 |
210 | env_origin_x = (i + 0.0) * self.env_length
211 | env_origin_y = (j + 0.5) * self.env_width
212 | env_origin_z = np.random.uniform(0.0, 0.4)
213 | self.env_origins[i, j] = [env_origin_x, env_origin_y, env_origin_z]
214 | self.ceilings[i, j] = ceiling
215 |
216 | def add_roughness(terrain, noise_magnitude=0.02):
217 | random_uniform_terrain(
218 | terrain,
219 | min_height=-noise_magnitude,
220 | max_height=noise_magnitude,
221 | step=0.005,
222 | downsampled_scale=0.075,
223 | )
224 |
225 | def gap_parkour(terrain, platform_length=1., lava_width=0.5, lava_depth=-1.0, gap_length=0.5, gap_platform_length_min=1.25, gap_platform_length_max=1.5, gap_platform_height=0.05):
226 | platform_length = int(platform_length / terrain.horizontal_scale)
227 | lava_width = int(lava_width / terrain.horizontal_scale)
228 | lava_depth = int(lava_depth / terrain.vertical_scale)
229 |
230 | gap_platform_length_min = int(gap_platform_length_min / terrain.horizontal_scale)
231 | gap_platform_length_max = int(gap_platform_length_max / terrain.horizontal_scale)
232 |
233 | gap_length = int(gap_length / terrain.horizontal_scale)
234 | gap_platform_height = int(gap_platform_height / terrain.vertical_scale)
235 |
236 | # add gap
237 | start_gap = platform_length
238 | while start_gap + gap_length <= terrain.length - platform_length//2:
239 | gap_platform_length = np.random.randint(gap_platform_length_min, gap_platform_length_max)
240 | terrain.height_field_raw[:, start_gap:start_gap+gap_length] = lava_depth
241 | # randomize gap platform height
242 | if start_gap + gap_length + gap_platform_length <= terrain.length - platform_length //2:
243 | #terrain.height_field_raw[:, start_gap+gap_length:start_gap+gap_length+gap_platform_length] = np.random.randint(-gap_platform_height, gap_platform_height)
244 | terrain.height_field_raw[:, start_gap+gap_length:start_gap+gap_length+gap_platform_length] = -gap_platform_height
245 |
246 | start_gap += gap_length + gap_platform_length
247 |
248 | # the floor is lava
249 | terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth
250 | terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth
251 |
252 | def jump_parkour(terrain, platform_length=1.25, lava_width=0.5, lava_depth=-1.0, height=0.5, height_platform_length=1.5):
253 | platform_length = int(platform_length / terrain.horizontal_scale)
254 | lava_width = int(lava_width / terrain.horizontal_scale)
255 | lava_depth = int(lava_depth / terrain.vertical_scale)
256 |
257 | height_platform_length = int(height_platform_length / terrain.horizontal_scale)
258 | height = int(height / terrain.vertical_scale)
259 |
260 | # Version with 2 jumps
261 | #terrain.height_field_raw[:, platform_length:platform_length+3*height_platform_length] = height
262 | #terrain.height_field_raw[:, platform_length+height_platform_length:platform_length+2*height_platform_length] = 2*height
263 |
264 | # Version with 3 jumps
265 | terrain.height_field_raw[:, 1*platform_length:6*platform_length] = 1*height
266 | terrain.height_field_raw[:, 2*platform_length:5*platform_length] = 2*height
267 | terrain.height_field_raw[:, 3*platform_length:4*platform_length] = 3*height
268 |
269 | # the floor is lava
270 | terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth
271 | terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth
272 |
273 | def stairs_parkour(terrain, platform_length=1., lava_width=0.5, lava_depth=-1.0, height=0.18, width=0.3, stairs_platform_length=1.25):
274 | platform_length = int(platform_length / terrain.horizontal_scale)
275 | lava_width = int(lava_width / terrain.horizontal_scale)
276 | lava_depth = int(lava_depth / terrain.vertical_scale)
277 |
278 | stairs_platform_length = int(stairs_platform_length / terrain.horizontal_scale)
279 | height = int(height / terrain.vertical_scale)
280 | width = int(width / terrain.horizontal_scale)
281 |
282 | start = platform_length
283 | stop = terrain.length - platform_length//2
284 | curr_height = height
285 | while stop - start > platform_length:
286 | terrain.height_field_raw[:, start:stop] = curr_height
287 | curr_height += height
288 | start += width
289 | stop -= width
290 |
291 | # the floor is lava
292 | terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth
293 | terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth
294 |
295 |
296 | def hurdle_parkour(terrain, platform_length=1.5, lava_width=0.5, lava_depth=-1.0, height=0.2, width_min=0.3, width_max=0.5):
297 | platform_length = int(platform_length / terrain.horizontal_scale)
298 | lava_width = int(lava_width / terrain.horizontal_scale)
299 | lava_depth = int(lava_depth / terrain.vertical_scale)
300 |
301 | height = int(height / terrain.vertical_scale)
302 | width_min = int(width_min / terrain.horizontal_scale)
303 | width_max = int(width_max / terrain.horizontal_scale)
304 |
305 | start = platform_length
306 | width = np.random.randint(width_min, width_max)
307 | while start + platform_length + width <= terrain.length - platform_length//2:
308 | terrain.height_field_raw[:, start:start+width] = height
309 | start += platform_length + width
310 | width = np.random.randint(width_min, width_max)
311 |
312 |
313 | # the floor is lava
314 | terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth
315 | terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth
316 |
317 | def crawl_parkour(terrain, platform_length=2.0, lava_width=0.5, lava_depth=-1.0, height=0.2, depth=1.0, width=3.0, height_step=0.15):
318 | # First put the barriers
319 | boxes = []
320 | boxes += box_trimesh(np.array([depth, width, 0.5]), np.array([2.5, 0.0, height+0.25])),
321 | boxes += box_trimesh(np.array([depth, width, 0.5]), np.array([6.5, 0.0, height+0.25+height_step])),
322 |
323 | # Then create the heightmap
324 | platform_length = int(platform_length / terrain.horizontal_scale)
325 | lava_width = int(lava_width / terrain.horizontal_scale)
326 | lava_depth = int(lava_depth / terrain.vertical_scale)
327 |
328 | height = int(height / terrain.vertical_scale)
329 | height_step = int(height_step / terrain.vertical_scale)
330 | depth = int(depth / terrain.horizontal_scale)
331 |
332 | terrain.height_field_raw[:, int(6.0/terrain.horizontal_scale):int(7.0/terrain.horizontal_scale)] = 1*height_step
333 |
334 | # the floor is lava
335 | terrain.height_field_raw[0:lava_width, 0:terrain.length] = lava_depth
336 | terrain.height_field_raw[-lava_width:, 0:terrain.length] = lava_depth
337 |
338 | return boxes
339 |
340 |
341 |
342 |
343 | def box_trimesh(
344 | size, # float [3] for x, y, z axis length (in meter) under box frame
345 | center_position, # float [3] position (in meter) in world frame
346 | ):
347 |
348 | vertices = np.empty((8, 3), dtype= np.float32)
349 | vertices[:] = center_position
350 | vertices[[0, 4, 2, 6], 0] -= size[0] / 2
351 | vertices[[1, 5, 3, 7], 0] += size[0] / 2
352 | vertices[[0, 1, 2, 3], 1] -= size[1] / 2
353 | vertices[[4, 5, 6, 7], 1] += size[1] / 2
354 | vertices[[2, 3, 6, 7], 2] -= size[2] / 2
355 | vertices[[0, 1, 4, 5], 2] += size[2] / 2
356 |
357 | triangles = -np.ones((12, 3), dtype= np.uint32)
358 | triangles[0] = [0, 2, 1] #
359 | triangles[1] = [1, 2, 3]
360 | triangles[2] = [0, 4, 2] #
361 | triangles[3] = [2, 4, 6]
362 | triangles[4] = [4, 5, 6] #
363 | triangles[5] = [5, 7, 6]
364 | triangles[6] = [1, 3, 5] #
365 | triangles[7] = [3, 7, 5]
366 | triangles[8] = [0, 1, 4] #
367 | triangles[9] = [1, 5, 4]
368 | triangles[10]= [2, 6, 3] #
369 | triangles[11]= [3, 6, 7]
370 |
371 | return vertices, triangles
372 |
373 | def combine_trimeshes(*trimeshes):
374 | if len(trimeshes) > 2:
375 | return combine_trimeshes(
376 | trimeshes[0],
377 | combine_trimeshes(trimeshes[1:])
378 | )
379 |
380 | # only two trimesh to combine
381 | trimesh_0, trimesh_1 = trimeshes
382 | if trimesh_0[1].shape[0] < trimesh_1[1].shape[0]:
383 | trimesh_0, trimesh_1 = trimesh_1, trimesh_0
384 |
385 | trimesh_1 = (trimesh_1[0], trimesh_1[1] + trimesh_0[0].shape[0])
386 | vertices = np.concatenate((trimesh_0[0], trimesh_1[0]), axis= 0)
387 | triangles = np.concatenate((trimesh_0[1], trimesh_1[1]), axis= 0)
388 |
389 | return vertices, triangles
390 |
391 | def move_trimesh(trimesh, move: np.ndarray):
392 | trimesh = list(trimesh)
393 | trimesh[0] += move
394 | return tuple(trimesh)
395 |
--------------------------------------------------------------------------------
/algos/DDPG_demos_generate.py:
--------------------------------------------------------------------------------
1 | import gym
2 |
3 | import os
4 | import random
5 | import time
6 | from tqdm.auto import tqdm
7 | import pickle as pkl
8 |
9 | from typing import NamedTuple
10 |
11 | from omegaconf import DictConfig, OmegaConf
12 |
13 | import numpy as np
14 |
15 | import torch
16 | import torch.nn as nn
17 | import torch.nn.functional as F
18 | import torch.optim as optim
19 | from torch.distributions.normal import Normal
20 | from torch.utils.tensorboard import SummaryWriter
21 |
22 | class RunningMeanStd(nn.Module):
23 | def __init__(self, shape = (), epsilon=1e-08):
24 | super(RunningMeanStd, self).__init__()
25 | self.register_buffer("running_mean", torch.zeros(shape))
26 | self.register_buffer("running_var", torch.ones(shape))
27 | self.register_buffer("count", torch.ones(()))
28 |
29 | self.epsilon = epsilon
30 |
31 | def forward(self, obs, update = True):
32 | if update:
33 | self.update(obs)
34 |
35 | return (obs - self.running_mean) / torch.sqrt(self.running_var + self.epsilon)
36 |
37 | def update(self, x):
38 | """Updates the mean, var and count from a batch of samples."""
39 | batch_mean = torch.mean(x, dim=0)
40 | batch_var = torch.var(x, correction=0, dim=0)
41 | batch_count = x.shape[0]
42 | self.update_from_moments(batch_mean, batch_var, batch_count)
43 |
44 | def update_from_moments(self, batch_mean, batch_var, batch_count):
45 | """Updates from batch mean, variance and count moments."""
46 | self.running_mean, self.running_var, self.count = update_mean_var_count_from_moments(
47 | self.running_mean, self.running_var, self.count, batch_mean, batch_var, batch_count
48 | )
49 |
50 | def update_mean_var_count_from_moments(
51 | mean, var, count, batch_mean, batch_var, batch_count
52 | ):
53 | """Updates the mean, var and count using the previous mean, var, count and batch values."""
54 | delta = batch_mean - mean
55 | tot_count = count + batch_count
56 |
57 | new_mean = mean + delta * batch_count / tot_count
58 | m_a = var * count
59 | m_b = batch_var * batch_count
60 | M2 = m_a + m_b + torch.square(delta) * count * batch_count / tot_count
61 | new_var = M2 / tot_count
62 | new_count = tot_count
63 |
64 | return new_mean, new_var, new_count
65 |
66 | def layer_init(layer, std=np.sqrt(2), bias_const=0.0):
67 | torch.nn.init.orthogonal_(layer.weight, std)
68 | torch.nn.init.constant_(layer.bias, bias_const)
69 | return layer
70 |
71 | class Agent(nn.Module):
72 | def __init__(self, envs):
73 | super().__init__()
74 | self.critic = nn.Sequential(
75 | layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 512)),
76 | nn.ELU(),
77 | layer_init(nn.Linear(512, 256)),
78 | nn.ELU(),
79 | layer_init(nn.Linear(256, 128)),
80 | nn.ELU(),
81 | layer_init(nn.Linear(128, 1), std=1.0),
82 | )
83 | self.actor_mean = nn.Sequential(
84 | layer_init(nn.Linear(np.array(envs.single_observation_space.shape).prod(), 512)),
85 | nn.ELU(),
86 | layer_init(nn.Linear(512, 256)),
87 | nn.ELU(),
88 | layer_init(nn.Linear(256, 128)),
89 | nn.ELU(),
90 | layer_init(nn.Linear(128, np.prod(envs.single_action_space.shape)), std=0.01),
91 | )
92 | self.actor_logstd = nn.Parameter(torch.zeros(1, np.prod(envs.single_action_space.shape)))
93 |
94 | self.obs_rms = RunningMeanStd(shape = envs.single_observation_space.shape)
95 | self.value_rms = RunningMeanStd(shape = ())
96 |
97 | def get_value(self, x):
98 | return self.critic(x)
99 |
100 | def get_action_and_value(self, x, action=None):
101 | action_mean = self.actor_mean(x)
102 | return action_mean, None, None, None
103 |
104 | class SeqReplayBufferSamples(NamedTuple):
105 | observations: torch.Tensor
106 | privileged_observations: torch.Tensor
107 | vision_observations: torch.Tensor
108 | actions: torch.Tensor
109 | next_observations: torch.Tensor
110 | privileged_next_observations: torch.Tensor
111 | vision_next_observations: torch.Tensor
112 | cat_dones: torch.Tensor
113 | raw_constraints: torch.Tensor
114 | dones: torch.Tensor
115 | rewards: torch.Tensor
116 | p_ini_hidden_in: torch.Tensor
117 | p_ini_hidden_out: torch.Tensor
118 | mask: torch.Tensor
119 |
120 | class SeqReplayBuffer():
121 | def __init__(
122 | self,
123 | buffer_size,
124 | observation_space,
125 | privileged_observation_space,
126 | vision_space,
127 | action_space,
128 | num_constraints,
129 | max_episode_length,
130 | seq_len,
131 | num_envs,
132 | critic_rnn = False,
133 | storing_device = "cpu",
134 | training_device = "cpu",
135 | handle_timeout_termination = True,
136 | ):
137 | self.buffer_size = buffer_size
138 | self.max_episode_length = max_episode_length
139 | self.seq_len = seq_len
140 | self.observation_space = observation_space
141 | self.privileged_observation_space = privileged_observation_space.shape
142 | self.vision_space = vision_space
143 | self.action_space = action_space
144 | self.num_envs = num_envs
145 |
146 | self.num_constraints = num_constraints
147 |
148 | self.action_dim = int(np.prod(action_space.shape))
149 | self.pos = 0
150 | self.full = False
151 | self.critic_rnn = critic_rnn
152 | self.storing_device = storing_device
153 | self.training_device = training_device
154 |
155 | self.observations = torch.zeros((self.buffer_size, *self.observation_space), dtype=torch.float32, device = storing_device)
156 | self.next_observations = torch.zeros((self.buffer_size, *self.observation_space), dtype=torch.float32, device = storing_device)
157 | self.privileged_observations = torch.zeros((self.buffer_size, *self.privileged_observation_space), dtype=torch.float32, device = storing_device)
158 | self.privileged_next_observations = torch.zeros((self.buffer_size, *self.privileged_observation_space), dtype=torch.float32, device = storing_device)
159 |
160 | self.vision_observations = torch.zeros((self.buffer_size // 5, *self.vision_space), dtype = torch.uint8)
161 | self.next_vision_observations = torch.zeros_like(self.vision_observations)
162 |
163 | self.actions = torch.zeros((self.buffer_size, self.action_dim), dtype=torch.float32, device = storing_device)
164 | self.rewards = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device)
165 | self.cat_dones = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device)
166 | self.raw_constraints = torch.zeros((self.buffer_size, self.num_constraints), dtype=torch.float32, device = storing_device)
167 | self.dones = torch.zeros((self.buffer_size,), dtype=torch.bool, device = storing_device)
168 | self.p_ini_hidden_in = torch.zeros((self.buffer_size, 1, 256), dtype=torch.float32, device = storing_device)
169 |
170 | # For the current episodes that started being added to the replay buffer
171 | # but aren't done yet. We want to still sample from them, however the masking
172 | # needs a termination point to not overlap to the next episode when full or even to the empty
173 | # part of the buffer when not full.
174 | self.markers = torch.zeros((self.buffer_size,), dtype=torch.bool, device = storing_device)
175 | self.started_adding = False
176 |
177 | # Handle timeouts termination properly if needed
178 | # see https://github.com/DLR-RM/stable-baselines3/issues/284
179 | self.handle_timeout_termination = handle_timeout_termination
180 | self.timeouts = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device)
181 |
182 | def add(
183 | self,
184 | obs,
185 | privi_obs,
186 | vobs,
187 | next_obs,
188 | privi_next_obs,
189 | next_vobs,
190 | action,
191 | reward,
192 | cat_done,
193 | raw_constraints,
194 | done,
195 | p_ini_hidden_in,
196 | truncateds = None
197 | ):
198 | start_idx = self.pos
199 | stop_idx = min(self.pos + obs.shape[0], self.buffer_size)
200 | b_max_idx = stop_idx - start_idx
201 |
202 | overflow = False
203 | overflow_size = 0
204 | if self.pos + obs.shape[0] > self.buffer_size:
205 | overflow = True
206 | overflow_size = self.pos + obs.shape[0] - self.buffer_size
207 |
208 | assert start_idx % self.num_envs == 0, f"start_idx is not a multiple of {self.num_envs}"
209 | assert stop_idx % self.num_envs == 0, f"stop_idx is not a multiple of {self.num_envs}"
210 | assert b_max_idx == 0 or b_max_idx == self.num_envs, f"b_max_idx is not either 0 or {self.num_envs}"
211 |
212 | # Copy to avoid modification by reference
213 | self.observations[start_idx : stop_idx] = obs[: b_max_idx].clone().to(self.storing_device)
214 | self.vision_observations[(start_idx // (self.num_envs * 5)) * self.num_envs : (start_idx // (self.num_envs * 5)) * self.num_envs + b_max_idx] = \
215 | vobs[: b_max_idx].clone().to(self.storing_device)
216 |
217 | self.next_observations[start_idx : stop_idx] = next_obs[: b_max_idx].clone().to(self.storing_device)
218 | self.next_vision_observations[(start_idx // (self.num_envs * 5)) * self.num_envs : (start_idx // (self.num_envs * 5)) * self.num_envs + b_max_idx] = \
219 | next_vobs[: b_max_idx].clone().to(self.storing_device)
220 |
221 | self.privileged_observations[start_idx : stop_idx] = privi_obs[: b_max_idx].clone().to(self.storing_device)
222 | self.privileged_next_observations[start_idx : stop_idx] = privi_next_obs[: b_max_idx].clone().to(self.storing_device)
223 | self.actions[start_idx : stop_idx] = action[: b_max_idx].clone().to(self.storing_device)
224 | self.rewards[start_idx : stop_idx] = reward[: b_max_idx].clone().to(self.storing_device)
225 | self.cat_dones[start_idx : stop_idx] = cat_done[: b_max_idx].clone().to(self.storing_device)
226 | self.raw_constraints[start_idx : stop_idx] = raw_constraints[: b_max_idx].clone().to(self.storing_device)
227 | self.dones[start_idx : stop_idx] = done[: b_max_idx].clone().to(self.storing_device)
228 | self.p_ini_hidden_in[start_idx : stop_idx] = p_ini_hidden_in.swapaxes(0, 1)[: b_max_idx].clone().to(self.storing_device)
229 |
230 | # Current episodes last transition marker
231 | self.markers[start_idx : stop_idx] = 1
232 | # We need to unmark previous transitions as last
233 | # but only if it is not the first add to the replay buffer
234 | if self.started_adding:
235 | self.markers[self.prev_start_idx : self.prev_stop_idx] = 0
236 | if self.prev_overflow:
237 | self.markers[: self.prev_overflow_size] = 0
238 | self.started_adding = True
239 | self.prev_start_idx = start_idx
240 | self.prev_stop_idx = stop_idx
241 | self.prev_overflow = overflow
242 | self.prev_overflow_size = overflow_size
243 |
244 | if self.handle_timeout_termination:
245 | self.timeouts[start_idx : stop_idx] = truncateds[: b_max_idx].clone().to(self.storing_device)
246 |
247 | assert overflow_size == 0 or overflow_size == self.num_envs, f"overflow_size is not either 0 or {self.num_envs}"
248 | if overflow:
249 | self.full = True
250 | self.observations[: overflow_size] = obs[b_max_idx :].clone().to(self.storing_device)
251 | self.vision_observations[: overflow_size] = vobs[b_max_idx :].clone().to(self.storing_device)
252 |
253 | self.next_observations[: overflow_size] = next_obs[b_max_idx :].clone().to(self.storing_device)
254 | self.next_vision_observations[: overflow_size] = next_vobs[b_max_idx :].clone().to(self.storing_device)
255 |
256 | self.privileged_observations[: overflow_size] = privi_obs[b_max_idx :].clone().to(self.storing_device)
257 | self.privileged_next_observations[: overflow_size] = privi_next_obs[b_max_idx :].clone().to(self.storing_device)
258 | self.actions[: overflow_size] = action[b_max_idx :].clone().to(self.storing_device)
259 | self.rewards[: overflow_size] = reward[b_max_idx :].clone().to(self.storing_device)
260 | self.cat_dones[: overflow_size] = cat_done[b_max_idx :].clone().to(self.storing_device)
261 | self.raw_constraints[: overflow_size] = raw_constraints[b_max_idx :].clone().to(self.storing_device)
262 | self.dones[: overflow_size] = done[b_max_idx :].clone().to(self.storing_device)
263 | self.p_ini_hidden_in[: overflow_size] = p_ini_hidden_in.swapaxes(0, 1)[b_max_idx :].clone().to(self.storing_device)
264 |
265 | # Current episodes last transition marker
266 | self.markers[: overflow_size] = 1
267 | if self.handle_timeout_termination:
268 | self.timeouts[: overflow_size] = truncateds[b_max_idx :].clone().to(self.storing_device)
269 | self.pos = overflow_size
270 | else:
271 | self.pos += obs.shape[0]
272 |
273 | def sample(self, batch_size) -> SeqReplayBufferSamples:
274 | upper_bound = self.buffer_size if self.full else self.pos
275 | batch_inds = torch.randint(0, upper_bound, size = (batch_size,), device = self.storing_device)
276 | return self._get_samples(batch_inds)
277 |
278 | def _get_samples(self, batch_inds) -> SeqReplayBufferSamples:
279 | # Using modular arithmetic we get the indices of all the transitions of the episode starting from batch_inds
280 | # we get "episodes" of length self.seq_len, but their true length may be less, they can have ended before that
281 | # we'll deal with that using a mask
282 | # Using flat indexing we can actually slice through a tensor using
283 | # different starting points for each dimension of an axis
284 | # as long as the slice size remains constant
285 | # [1, 2, 3].repeat_interleave(3) -> [1, 1, 1, 2, 2, 2, 3, 3, 3]
286 | # [1, 2, 3].repeat(3) -> [1, 2, 3, 1, 2, 3, 1, 2, 3]
287 | all_indices_flat = (batch_inds.repeat_interleave(self.seq_len) + torch.arange(self.seq_len, device = self.storing_device).repeat(batch_inds.shape[0]) * self.num_envs) % self.buffer_size
288 | #all_indices_next_flat = (batch_inds.repeat_interleave(self.seq_len) + torch.arange(1, self.seq_len + 1, device = self.device).repeat(batch_inds.shape[0]) * self.num_envs) % self.buffer_size
289 | gathered_obs = self.observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.observations.shape[1:]))
290 | gathered_next_obs = self.next_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.observations.shape[1:]))
291 |
292 | gathered_privi_obs = self.privileged_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.privileged_observations.shape[1:]))
293 | gathered_privi_next_obs = self.privileged_next_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.privileged_observations.shape[1:]))
294 |
295 | all_indices_flat_vision = ( (((batch_inds // self.num_envs) // 5) * self.num_envs + batch_inds % self.num_envs).repeat_interleave((self.seq_len // 5)) + \
296 | torch.arange(start = 0, end = (self.seq_len // 5), device = self.storing_device).repeat(batch_inds.shape[0]) * self.num_envs) % (self.buffer_size // 5)
297 | gathered_vobs = self.vision_observations[all_indices_flat_vision].reshape((batch_inds.shape[0], (self.seq_len // 5), *self.vision_observations.shape[1:]))
298 | gathered_next_vobs = self.next_vision_observations[all_indices_flat_vision].reshape((batch_inds.shape[0], (self.seq_len // 5), *self.vision_observations.shape[1:]))
299 |
300 | gathered_actions = self.actions[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.actions.shape[1:]))
301 | gathered_cat_dones = self.cat_dones[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
302 | gathered_raw_constraints = self.raw_constraints[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, self.num_constraints))
303 | gathered_dones = self.dones[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
304 | gathered_truncateds = self.timeouts[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
305 | gathered_rewards = self.rewards[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
306 |
307 | gathered_p_ini_hidden_in = self.p_ini_hidden_in[batch_inds].swapaxes(0, 1)
308 | gathered_p_ini_hidden_out = self.p_ini_hidden_in[(batch_inds + self.num_envs) % self.buffer_size].swapaxes(0, 1)
309 |
310 | gathered_markers = self.markers[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
311 | mask = torch.cat([
312 | torch.ones((batch_inds.shape[0], 1), device = self.storing_device),
313 | (1 - (gathered_dones | gathered_markers).float()).cumprod(dim = 1)[:, 1:]
314 | ], dim = 1)
315 | data = (
316 | gathered_obs.to(self.training_device),
317 | gathered_privi_obs.to(self.training_device),
318 | gathered_vobs.to(self.training_device),
319 | gathered_actions.to(self.training_device),
320 | gathered_next_obs.to(self.training_device),
321 | gathered_privi_next_obs.to(self.training_device),
322 | gathered_next_vobs.to(self.training_device),
323 | gathered_cat_dones.to(self.training_device),
324 | gathered_raw_constraints.to(self.training_device),
325 | # Only use dones that are not due to timeouts
326 | # deactivated by default (timeouts is initialized as an array of False)
327 | (gathered_dones.float() * (1 - gathered_truncateds)).to(self.training_device),
328 | gathered_rewards.to(self.training_device),
329 | gathered_p_ini_hidden_in.to(self.training_device),
330 | gathered_p_ini_hidden_out.to(self.training_device),
331 | mask.to(self.training_device),
332 | )
333 | return SeqReplayBufferSamples(*data)
334 |
335 | class ExtractObsWrapper(gym.ObservationWrapper):
336 | def observation(self, obs):
337 | return obs["obs"]
338 |
339 | def DDPG_demos_generate(cfg: DictConfig, envs):
340 | TARGET_POLICY_PATH = cfg["train"]["params"]["config"]["target_policy_path"]
341 | DEMOS_BUFFER_SIZE = int(cfg["train"]["params"]["config"]["demos_buffer_size"])
342 | MAX_EPISODE_LENGTH = 500
343 | SEQ_LEN = 5
344 | NUM_CONSTRAINTS = 107
345 | RNN_CRITIC = True
346 | vis_h = 48
347 | vis_w = 48
348 |
349 | assert SEQ_LEN % 5 == 0, "SEQ_LEN must be a multiple of 5"
350 | assert DEMOS_BUFFER_SIZE % (5 * envs.num_envs) == 0, "DEMO_BUFFER_SIZE must be a multiple of 5 * num_envs"
351 |
352 | actor_sd = torch.load(TARGET_POLICY_PATH)
353 |
354 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
355 |
356 | run_path = f"runs/{cfg['task']['name']}_DDPG_demos_generate_D405_parkour40_CORR_CONTROL_{int(time.time())}"
357 | writer = SummaryWriter(run_path)
358 |
359 | if not os.path.exists(run_path):
360 | os.makedirs(run_path)
361 |
362 | OmegaConf.save(config = cfg, f = f"{run_path}/config.yaml")
363 |
364 | envs.single_action_space = envs.action_space
365 | envs.single_observation_space = envs.observation_space
366 |
367 | envs = ExtractObsWrapper(envs)
368 |
369 | actor_demos = Agent(envs).to(device)
370 | actor_demos.load_state_dict(actor_sd)
371 | print("Target actor for demonstrations loaded succesfully.")
372 | envs.single_observation_space.dtype = np.float32
373 | rb_demos = SeqReplayBuffer(
374 | DEMOS_BUFFER_SIZE,
375 | (45,),
376 | envs.single_observation_space,
377 | (vis_h, vis_w),
378 | envs.single_action_space,
379 | NUM_CONSTRAINTS,
380 | MAX_EPISODE_LENGTH,
381 | SEQ_LEN,
382 | envs.num_envs,
383 | critic_rnn = RNN_CRITIC,
384 | storing_device = "cpu",
385 | training_device = device,
386 | handle_timeout_termination=True,
387 | )
388 |
389 | filling_iterations = DEMOS_BUFFER_SIZE // envs.num_envs
390 | obs_privi = envs.reset()
391 | obs = obs_privi.clone()[:, : 45]
392 | vobs = torch.zeros((envs.num_envs, vis_h, vis_w), dtype = torch.uint8, device = device)
393 | next_vobs = vobs.clone()
394 | with torch.no_grad():
395 | dummy_actions, _, _, _ = actor_demos.get_action_and_value(actor_demos.obs_rms(obs_privi, update = False))
396 | actions_min, _ = dummy_actions.min(dim = 0)
397 | actions_max, _ = dummy_actions.max(dim = 0)
398 |
399 | dummy_hidddens = torch.zeros((1, envs.num_envs, 256), device = device)
400 |
401 | for fi in range(filling_iterations):
402 | with torch.no_grad():
403 | actions, _, _, _ = actor_demos.get_action_and_value(actor_demos.obs_rms(obs_privi, update = False))
404 | cur_min, _ = actions.min(dim = 0)
405 | cur_max, _ = actions.max(dim = 0)
406 | actions_min = torch.min(actions_min, cur_min)
407 | actions_max = torch.max(actions_max, cur_max)
408 |
409 | next_obs_privi, rewards, terminations, infos = envs.step(actions)
410 | true_dones = infos["true_dones"].float()
411 | truncateds = infos["truncateds"].float()
412 | raw_constraints = infos["raw_constraints"]
413 |
414 | next_obs = next_obs_privi.clone()[:, : 45]
415 |
416 | real_next_obs_privi = next_obs_privi.clone()
417 | real_next_obs = next_obs.clone()
418 |
419 | if "depth" in infos:
420 | next_vobs = (infos["depth"].clone()[..., 19:-18] * 255).to(torch.uint8)
421 |
422 | rb_demos.add(obs, obs_privi, vobs, real_next_obs, real_next_obs_privi, next_vobs, actions, rewards, terminations, raw_constraints, true_dones, dummy_hidddens, truncateds)
423 |
424 | # TRY NOT TO MODIFY: CRUCIAL step easy to overlook
425 | obs_privi = next_obs_privi
426 | obs = next_obs
427 | vobs = next_vobs
428 |
429 | torch.save((actions_min.cpu(), actions_max.cpu()), f"{run_path}/ppo_actions_minmax.pt")
430 | pkl.dump(rb_demos, open(f"{run_path}/rb_demos.pkl", "wb"))
431 | print(f"Demo replay buffer filled with {DEMOS_BUFFER_SIZE} experts demonstrations")
432 |
--------------------------------------------------------------------------------
/solo12/solo12_mpi_scaled.urdf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
12 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
65 |
66 |
67 |
68 |
69 |
70 |
79 |
80 |
81 |
82 |
83 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
213 |
214 |
215 |
216 |
217 |
218 |
227 |
228 |
229 |
230 |
231 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 |
246 |
247 |
248 |
249 |
250 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
360 |
361 |
362 |
363 |
364 |
365 |
374 |
375 |
376 |
377 |
378 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
416 |
417 |
418 |
419 |
420 |
421 |
422 |
423 |
424 |
425 |
426 |
427 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
443 |
444 |
445 |
446 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
460 |
461 |
462 |
463 |
464 |
465 |
466 |
467 |
468 |
469 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
495 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
507 |
508 |
509 |
510 |
511 |
512 |
521 |
522 |
523 |
524 |
525 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
563 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
574 |
575 |
576 |
577 |
578 |
579 |
580 |
581 |
582 |
583 |
584 |
585 |
586 |
587 |
588 |
590 |
591 |
592 |
593 |
594 |
595 |
596 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
615 |
616 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
631 |
632 |
633 |
634 |
635 |
636 |
637 |
--------------------------------------------------------------------------------
/solo12/solo12_mpi.urdf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
5 |
6 |
7 |
12 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
84 |
85 |
86 |
87 |
88 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
155 |
156 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
253 |
254 |
255 |
256 |
257 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 |
372 |
373 |
374 |
375 |
376 |
377 |
378 |
379 |
380 |
381 |
382 |
383 |
384 |
385 |
386 |
387 |
388 |
389 |
390 |
391 |
392 |
393 |
394 |
395 |
396 |
397 |
398 |
399 |
400 |
401 |
402 |
403 |
404 |
405 |
406 |
407 |
408 |
409 |
410 |
411 |
412 |
413 |
422 |
423 |
424 |
425 |
426 |
428 |
429 |
430 |
431 |
432 |
433 |
434 |
435 |
436 |
437 |
438 |
439 |
440 |
441 |
442 |
443 |
444 |
445 |
447 |
448 |
449 |
450 |
451 |
452 |
453 |
454 |
455 |
456 |
457 |
458 |
459 |
460 |
461 |
464 |
465 |
466 |
467 |
468 |
469 |
470 |
471 |
472 |
473 |
474 |
475 |
476 |
477 |
478 |
479 |
480 |
481 |
482 |
483 |
484 |
485 |
486 |
487 |
488 |
489 |
490 |
491 |
492 |
493 |
494 |
496 |
497 |
498 |
499 |
500 |
501 |
502 |
503 |
504 |
505 |
506 |
507 |
508 |
509 |
510 |
511 |
512 |
513 |
514 |
515 |
516 |
517 |
518 |
519 |
520 |
521 |
522 |
523 |
524 |
527 |
528 |
529 |
530 |
531 |
532 |
533 |
534 |
535 |
536 |
537 |
538 |
539 |
540 |
541 |
542 |
543 |
544 |
545 |
546 |
547 |
548 |
549 |
550 |
551 |
552 |
553 |
554 |
555 |
556 |
557 |
558 |
559 |
560 |
561 |
562 |
563 |
564 |
565 |
566 |
567 |
568 |
569 |
570 |
571 |
572 |
573 |
574 |
575 |
576 |
577 |
578 |
579 |
580 |
581 |
582 |
591 |
592 |
593 |
594 |
595 |
597 |
598 |
599 |
600 |
601 |
602 |
603 |
604 |
605 |
606 |
607 |
608 |
609 |
610 |
611 |
612 |
613 |
614 |
616 |
617 |
618 |
619 |
620 |
621 |
622 |
623 |
624 |
625 |
626 |
627 |
628 |
629 |
630 |
633 |
634 |
635 |
636 |
637 |
638 |
639 |
640 |
641 |
642 |
643 |
644 |
645 |
646 |
647 |
648 |
649 |
650 |
651 |
652 |
653 |
654 |
655 |
656 |
657 |
658 |
659 |
660 |
661 |
662 |
663 |
665 |
666 |
667 |
668 |
669 |
670 |
671 |
672 |
673 |
674 |
675 |
676 |
677 |
678 |
679 |
680 |
681 |
682 |
683 |
684 |
685 |
686 |
687 |
688 |
689 |
690 |
691 |
692 |
693 |
696 |
697 |
698 |
699 |
700 |
701 |
702 |
703 |
704 |
705 |
706 |
707 |
708 |
709 |
710 |
711 |
712 |
713 |
714 |
715 |
716 |
717 |
718 |
719 |
720 |
721 |
722 |
723 |
724 |
725 |
726 |
727 |
728 |
729 |
730 |
--------------------------------------------------------------------------------
/algos/DDPG_demos_rnn_vision.py:
--------------------------------------------------------------------------------
1 | import gym
2 |
3 | import os
4 | import random
5 | import time
6 | from tqdm.auto import tqdm
7 | import pickle as pkl
8 | import itertools
9 |
10 | from typing import NamedTuple
11 |
12 | from omegaconf import DictConfig, OmegaConf
13 |
14 | import numpy as np
15 |
16 | import torch
17 | import torch.nn as nn
18 | import torch.nn.functional as F
19 | import torch.optim as optim
20 | from torch.distributions.normal import Normal
21 | from torch.utils.tensorboard import SummaryWriter
22 |
23 | class SeqReplayBufferSamples(NamedTuple):
24 | observations: torch.Tensor
25 | privileged_observations: torch.Tensor
26 | vision_observations: torch.Tensor
27 | actions: torch.Tensor
28 | next_observations: torch.Tensor
29 | privileged_next_observations: torch.Tensor
30 | vision_next_observations: torch.Tensor
31 | cat_dones: torch.Tensor
32 | raw_constraints: torch.Tensor
33 | dones: torch.Tensor
34 | rewards: torch.Tensor
35 | p_ini_hidden_in: torch.Tensor
36 | p_ini_hidden_out: torch.Tensor
37 | mask: torch.Tensor
38 |
39 | class SeqReplayBuffer():
40 | def __init__(
41 | self,
42 | buffer_size,
43 | observation_space,
44 | privileged_observation_space,
45 | vision_space,
46 | action_space,
47 | num_constraints,
48 | max_episode_length,
49 | seq_len,
50 | num_envs,
51 | storing_device = "cpu",
52 | training_device = "cpu",
53 | handle_timeout_termination = True,
54 | ):
55 | self.buffer_size = buffer_size
56 | self.max_episode_length = max_episode_length
57 | self.seq_len = seq_len
58 | self.observation_space = observation_space
59 | self.privileged_observation_space = privileged_observation_space.shape
60 | self.vision_space = vision_space
61 | self.action_space = action_space
62 | self.num_envs = num_envs
63 |
64 | self.num_constraints = num_constraints
65 |
66 | self.action_dim = int(np.prod(action_space.shape))
67 | self.pos = 0
68 | self.full = False
69 | self.storing_device = storing_device
70 | self.training_device = training_device
71 |
72 | self.observations = torch.zeros((self.buffer_size, *self.observation_space), dtype=torch.float32, device = storing_device)
73 | self.next_observations = torch.zeros((self.buffer_size, *self.observation_space), dtype=torch.float32, device = storing_device)
74 | self.privileged_observations = torch.zeros((self.buffer_size, *self.privileged_observation_space), dtype=torch.float32, device = storing_device)
75 | self.privileged_next_observations = torch.zeros((self.buffer_size, *self.privileged_observation_space), dtype=torch.float32, device = storing_device)
76 |
77 | self.vision_observations = torch.zeros((self.buffer_size // 5, *self.vision_space), dtype = torch.uint8)
78 | self.next_vision_observations = torch.zeros_like(self.vision_observations)
79 |
80 | self.actions = torch.zeros((self.buffer_size, self.action_dim), dtype=torch.float32, device = storing_device)
81 | self.rewards = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device)
82 | self.cat_dones = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device)
83 | self.raw_constraints = torch.zeros((self.buffer_size, self.num_constraints), dtype=torch.float32, device = storing_device)
84 | self.dones = torch.zeros((self.buffer_size,), dtype=torch.bool, device = storing_device)
85 | self.p_ini_hidden_in = torch.zeros((self.buffer_size, 1, 256), dtype=torch.float32, device = storing_device)
86 |
87 | # For the current episodes that started being added to the replay buffer
88 | # but aren't done yet. We want to still sample from them, however the masking
89 | # needs a termination point to not overlap to the next episode when full or even to the empty
90 | # part of the buffer when not full.
91 | self.markers = torch.zeros((self.buffer_size,), dtype=torch.bool, device = storing_device)
92 | self.started_adding = False
93 |
94 | # Handle timeouts termination properly if needed
95 | # see https://github.com/DLR-RM/stable-baselines3/issues/284
96 | self.handle_timeout_termination = handle_timeout_termination
97 | self.timeouts = torch.zeros((self.buffer_size,), dtype=torch.float32, device = storing_device)
98 |
99 | def add(
100 | self,
101 | obs,
102 | privi_obs,
103 | vobs,
104 | next_obs,
105 | privi_next_obs,
106 | next_vobs,
107 | action,
108 | reward,
109 | cat_done,
110 | raw_constraints,
111 | done,
112 | p_ini_hidden_in,
113 | truncateds = None
114 | ):
115 | start_idx = self.pos
116 | stop_idx = min(self.pos + obs.shape[0], self.buffer_size)
117 | b_max_idx = stop_idx - start_idx
118 |
119 | overflow = False
120 | overflow_size = 0
121 | if self.pos + obs.shape[0] > self.buffer_size:
122 | overflow = True
123 | overflow_size = self.pos + obs.shape[0] - self.buffer_size
124 |
125 | assert start_idx % self.num_envs == 0, f"start_idx is not a multiple of {self.num_envs}"
126 | assert stop_idx % self.num_envs == 0, f"stop_idx is not a multiple of {self.num_envs}"
127 | assert b_max_idx == 0 or b_max_idx == self.num_envs, f"b_max_idx is not either 0 or {self.num_envs}"
128 |
129 | # Copy to avoid modification by reference
130 | self.observations[start_idx : stop_idx] = obs[: b_max_idx].clone().to(self.storing_device)
131 | self.vision_observations[(start_idx // (self.num_envs * 5)) * self.num_envs : (start_idx // (self.num_envs * 5)) * self.num_envs + b_max_idx] = \
132 | vobs[: b_max_idx].clone().to(self.storing_device)
133 |
134 | self.next_observations[start_idx : stop_idx] = next_obs[: b_max_idx].clone().to(self.storing_device)
135 | self.next_vision_observations[(start_idx // (self.num_envs * 5)) * self.num_envs : (start_idx // (self.num_envs * 5)) * self.num_envs + b_max_idx] = \
136 | next_vobs[: b_max_idx].clone().to(self.storing_device)
137 |
138 | self.privileged_observations[start_idx : stop_idx] = privi_obs[: b_max_idx].clone().to(self.storing_device)
139 | self.privileged_next_observations[start_idx : stop_idx] = privi_next_obs[: b_max_idx].clone().to(self.storing_device)
140 | self.actions[start_idx : stop_idx] = action[: b_max_idx].clone().to(self.storing_device)
141 | self.rewards[start_idx : stop_idx] = reward[: b_max_idx].clone().to(self.storing_device)
142 | self.cat_dones[start_idx : stop_idx] = cat_done[: b_max_idx].clone().to(self.storing_device)
143 | self.raw_constraints[start_idx : stop_idx] = raw_constraints[: b_max_idx].clone().to(self.storing_device)
144 | self.dones[start_idx : stop_idx] = done[: b_max_idx].clone().to(self.storing_device)
145 | self.p_ini_hidden_in[start_idx : stop_idx] = p_ini_hidden_in.swapaxes(0, 1)[: b_max_idx].clone().to(self.storing_device)
146 |
147 | # Current episodes last transition marker
148 | self.markers[start_idx : stop_idx] = 1
149 | # We need to unmark previous transitions as last
150 | # but only if it is not the first add to the replay buffer
151 | if self.started_adding:
152 | self.markers[self.prev_start_idx : self.prev_stop_idx] = 0
153 | if self.prev_overflow:
154 | self.markers[: self.prev_overflow_size] = 0
155 | self.started_adding = True
156 | self.prev_start_idx = start_idx
157 | self.prev_stop_idx = stop_idx
158 | self.prev_overflow = overflow
159 | self.prev_overflow_size = overflow_size
160 |
161 | if self.handle_timeout_termination:
162 | self.timeouts[start_idx : stop_idx] = truncateds[: b_max_idx].clone().to(self.storing_device)
163 |
164 | assert overflow_size == 0 or overflow_size == self.num_envs, f"overflow_size is not either 0 or {self.num_envs}"
165 | if overflow:
166 | self.full = True
167 | self.observations[: overflow_size] = obs[b_max_idx :].clone().to(self.storing_device)
168 | self.vision_observations[: overflow_size] = vobs[b_max_idx :].clone().to(self.storing_device)
169 |
170 | self.next_observations[: overflow_size] = next_obs[b_max_idx :].clone().to(self.storing_device)
171 | self.next_vision_observations[: overflow_size] = next_vobs[b_max_idx :].clone().to(self.storing_device)
172 |
173 | self.privileged_observations[: overflow_size] = privi_obs[b_max_idx :].clone().to(self.storing_device)
174 | self.privileged_next_observations[: overflow_size] = privi_next_obs[b_max_idx :].clone().to(self.storing_device)
175 | self.actions[: overflow_size] = action[b_max_idx :].clone().to(self.storing_device)
176 | self.rewards[: overflow_size] = reward[b_max_idx :].clone().to(self.storing_device)
177 | self.cat_dones[: overflow_size] = cat_done[b_max_idx :].clone().to(self.storing_device)
178 | self.raw_constraints[: overflow_size] = raw_constraints[b_max_idx :].clone().to(self.storing_device)
179 | self.dones[: overflow_size] = done[b_max_idx :].clone().to(self.storing_device)
180 | self.p_ini_hidden_in[: overflow_size] = p_ini_hidden_in.swapaxes(0, 1)[b_max_idx :].clone().to(self.storing_device)
181 |
182 | # Current episodes last transition marker
183 | self.markers[: overflow_size] = 1
184 | if self.handle_timeout_termination:
185 | self.timeouts[: overflow_size] = truncateds[b_max_idx :].clone().to(self.storing_device)
186 | self.pos = overflow_size
187 | else:
188 | self.pos += obs.shape[0]
189 |
190 | def sample(self, batch_size) -> SeqReplayBufferSamples:
191 | upper_bound = self.buffer_size if self.full else self.pos
192 | batch_inds = torch.randint(0, upper_bound, size = (batch_size,), device = self.storing_device)
193 | return self._get_samples(batch_inds)
194 |
195 | def _get_samples(self, batch_inds) -> SeqReplayBufferSamples:
196 | # Using modular arithmetic we get the indices of all the transitions of the episode starting from batch_inds
197 | # we get "episodes" of length self.seq_len, but their true length may be less, they can have ended before that
198 | # we'll deal with that using a mask
199 | # Using flat indexing we can actually slice through a tensor using
200 | # different starting points for each dimension of an axis
201 | # as long as the slice size remains constant
202 | # [1, 2, 3].repeat_interleave(3) -> [1, 1, 1, 2, 2, 2, 3, 3, 3]
203 | # [1, 2, 3].repeat(3) -> [1, 2, 3, 1, 2, 3, 1, 2, 3]
204 | all_indices_flat = (batch_inds.repeat_interleave(self.seq_len) + torch.arange(self.seq_len, device = self.storing_device).repeat(batch_inds.shape[0]) * self.num_envs) % self.buffer_size
205 | #all_indices_next_flat = (batch_inds.repeat_interleave(self.seq_len) + torch.arange(1, self.seq_len + 1, device = self.device).repeat(batch_inds.shape[0]) * self.num_envs) % self.buffer_size
206 | gathered_obs = self.observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.observations.shape[1:]))
207 | gathered_next_obs = self.next_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.observations.shape[1:]))
208 |
209 | gathered_privi_obs = self.privileged_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.privileged_observations.shape[1:]))
210 | gathered_privi_next_obs = self.privileged_next_observations[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.privileged_observations.shape[1:]))
211 |
212 | all_indices_flat_vision = ( (((batch_inds // self.num_envs) // 5) * self.num_envs + batch_inds % self.num_envs).repeat_interleave((self.seq_len // 5)) + \
213 | torch.arange(start = 0, end = (self.seq_len // 5), device = self.storing_device).repeat(batch_inds.shape[0]) * self.num_envs) % (self.buffer_size // 5)
214 | gathered_vobs = self.vision_observations[all_indices_flat_vision].reshape((batch_inds.shape[0], (self.seq_len // 5), *self.vision_observations.shape[1:]))
215 | gathered_next_vobs = self.next_vision_observations[all_indices_flat_vision].reshape((batch_inds.shape[0], (self.seq_len // 5), *self.vision_observations.shape[1:]))
216 |
217 | gathered_actions = self.actions[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, *self.actions.shape[1:]))
218 | gathered_cat_dones = self.cat_dones[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
219 | gathered_raw_constraints = self.raw_constraints[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len, self.num_constraints))
220 | gathered_dones = self.dones[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
221 | gathered_truncateds = self.timeouts[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
222 | gathered_rewards = self.rewards[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
223 |
224 | gathered_p_ini_hidden_in = self.p_ini_hidden_in[batch_inds].swapaxes(0, 1)
225 | gathered_p_ini_hidden_out = self.p_ini_hidden_in[(batch_inds + self.num_envs) % self.buffer_size].swapaxes(0, 1)
226 |
227 | gathered_markers = self.markers[all_indices_flat].reshape((batch_inds.shape[0], self.seq_len))
228 | mask = torch.cat([
229 | torch.ones((batch_inds.shape[0], 1), device = self.storing_device),
230 | (1 - (gathered_dones | gathered_markers).float()).cumprod(dim = 1)[:, 1:]
231 | ], dim = 1)
232 | data = (
233 | gathered_obs.to(self.training_device),
234 | gathered_privi_obs.to(self.training_device),
235 | gathered_vobs.to(self.training_device),
236 | gathered_actions.to(self.training_device),
237 | gathered_next_obs.to(self.training_device),
238 | gathered_privi_next_obs.to(self.training_device),
239 | gathered_next_vobs.to(self.training_device),
240 | gathered_cat_dones.to(self.training_device),
241 | gathered_raw_constraints.to(self.training_device),
242 | # Only use dones that are not due to timeouts
243 | # deactivated by default (timeouts is initialized as an array of False)
244 | (gathered_dones.float() * (1 - gathered_truncateds)).to(self.training_device),
245 | gathered_rewards.to(self.training_device),
246 | gathered_p_ini_hidden_in.to(self.training_device),
247 | gathered_p_ini_hidden_out.to(self.training_device),
248 | mask.to(self.training_device),
249 | )
250 | return SeqReplayBufferSamples(*data)
251 |
252 | def random_translate(imgs, pad = 4):
253 | n, c, h, w = imgs.size()
254 | imgs = torch.nn.functional.pad(imgs, (pad, pad, pad, pad)) #, mode = "replicate")
255 | w1 = torch.randint(0, 2*pad + 1, (n,))
256 | h1 = torch.randint(0, 2*pad + 1, (n,))
257 | cropped = torch.empty((n, c, h, w), dtype=imgs.dtype, device=imgs.device)
258 | for i, (img, w11, h11) in enumerate(zip(imgs, w1, h1)):
259 | cropped[i][:] = img[:, h11:h11 + h, w11:w11 + w]
260 | return cropped
261 |
262 | def random_noise(images, noise_std=0.02):
263 | N, C, H, W = images.size()
264 | noise = noise_std*torch.randn_like(images)
265 | noise *= torch.bernoulli(0.5*torch.ones(N, 1, 1, 1).to(images)) # Only applied on half of the images
266 | return images + noise
267 |
268 | def random_shift(images, padding=4):
269 | N, C, H, W = images.size()
270 | padded_images = torch.nn.functional.pad(images, (padding, padding, padding, padding), mode='replicate')
271 | crop_x = torch.randint(0, 2 * padding, (N,))
272 | crop_y = torch.randint(0, 2 * padding, (N,))
273 | shifted_images = torch.zeros_like(images)
274 | for i in range(N):
275 | shifted_images[i] = padded_images[i, :, crop_y[i]:crop_y[i] + H, crop_x[i]:crop_x[i] + W]
276 | return shifted_images
277 |
278 | def random_cutout(images, min_size=2, max_size=24):
279 | N, C, H, W = images.size()
280 | for i in range(N):
281 | size_h = random.randint(min_size, max_size)
282 | size_w = random.randint(min_size, max_size)
283 | top = random.randint(0, H - size_h)
284 | left = random.randint(0, W - size_w)
285 | coin_flip = random.random()
286 | if coin_flip < 0.2:
287 | fill_value = 0.0
288 | images[i, :, top:top + size_h, left:left + size_w] = fill_value
289 | elif coin_flip < 0.4:
290 | fill_value = 1.0
291 | images[i, :, top:top + size_h, left:left + size_w] = fill_value
292 | elif coin_flip < 0.6:
293 | fill_value = torch.rand((C, size_h, size_w), device=images.device)
294 | images[i, :, top:top + size_h, left:left + size_w] = fill_value
295 | return images
296 |
297 | class DepthOnlyFCBackbone58x87(nn.Module):
298 | def __init__(self, scandots_output_dim, seq_len, batch_size):
299 | super().__init__()
300 |
301 | activation = nn.ELU()
302 | self.image_compression = nn.Sequential(
303 | nn.Conv2d(1, 16, 5), nn.LeakyReLU(), nn.MaxPool2d(2),
304 | nn.Conv2d(16, 32, 4), nn.LeakyReLU(), nn.MaxPool2d(2),
305 | nn.Conv2d(32, 32, 3), nn.LeakyReLU(),
306 | nn.Flatten(),
307 | nn.Linear(1568, scandots_output_dim), # 48x48
308 | nn.LeakyReLU(),
309 | nn.Linear(128, scandots_output_dim)
310 | )
311 |
312 | self.output_activation = activation
313 |
314 | def forward(self, vobs, hist = True, augment = False):
315 | bs, seql, w, h = vobs.size()
316 |
317 | vobs = vobs.view(-1, 1, w, h)
318 | if augment:
319 | vobs = random_cutout(random_noise(random_shift(vobs)))
320 |
321 | vision_latent = self.output_activation(self.image_compression(vobs))
322 | vision_latent = vision_latent.view(bs, seql, 128)
323 |
324 | if hist:
325 | vision_latent = vision_latent.repeat_interleave(5, axis = 1)
326 |
327 | return vision_latent
328 |
329 | class ExtraCorpusQMemory(nn.Module):
330 | def __init__(self, seq_len, batch_size):
331 | super().__init__()
332 | self.vision = DepthOnlyFCBackbone58x87(128, seq_len, batch_size)
333 | self.memory = nn.GRU(128, hidden_size = 256, batch_first = True)
334 |
335 | def forward(self, x, hidden_in):
336 | if hidden_in is None:
337 | raise NotImplementedError
338 |
339 | vision_latent = self.vision(x)
340 | time_latent, hidden_out = self.memory(vision_latent, hidden_in)
341 | return time_latent, hidden_out
342 |
343 | class QNetworkVanilla(nn.Module):
344 | def __init__(self, env, device):
345 | super().__init__()
346 | self.memory = nn.GRU(np.array(env.single_observation_space.shape).prod() + np.prod(env.single_action_space.shape), hidden_size = 256, batch_first = True) # dummy memory for compatibility
347 | self.fc1 = nn.Linear(np.array(env.single_observation_space.shape).prod() + np.prod(env.single_action_space.shape), 512)
348 | self.ln1 = nn.LayerNorm(512)
349 | self.fc2 = nn.Linear(512, 256)
350 | self.ln2 = nn.LayerNorm(256)
351 | self.fc3 = nn.Linear(256, 128)
352 | self.ln3 = nn.LayerNorm(128)
353 | self.fc4 = nn.Linear(128, 1)
354 |
355 | def forward(self, x, a, latent):
356 | x = torch.cat([x, a], -1)
357 | x = F.elu(self.ln1(self.fc1(x)))
358 | x = F.elu(self.ln2(self.fc2(x)))
359 | x = F.elu(self.ln3(self.fc3(x)))
360 | x = self.fc4(x)
361 | return x, None
362 |
363 | class Actor(nn.Module):
364 | def __init__(self, env):
365 | super().__init__()
366 | self.memory = nn.GRU(128 + 45, hidden_size = 256, batch_first = True)
367 | self.fc1 = nn.Linear(256, 512)
368 | self.fc2 = nn.Linear(512, 256)
369 | self.fc3 = nn.Linear(256, 128)
370 | self.fc_mu = nn.Linear(128, np.prod(env.single_action_space.shape))
371 |
372 | # action rescaling
373 | self.register_buffer(
374 | "action_scale", torch.tensor((env.action_space.high - env.action_space.low) / 2.0, dtype=torch.float32)
375 | )
376 | self.register_buffer(
377 | "action_bias", torch.tensor((env.action_space.high + env.action_space.low) / 2.0, dtype=torch.float32)
378 | )
379 |
380 | def forward(self, x, vision_latent, hidden_in):
381 | if hidden_in is None:
382 | raise NotImplementedError
383 |
384 | x = torch.cat([x, vision_latent], -1)
385 | time_latent, hidden_out = self.memory(x, hidden_in)
386 | x = F.elu(self.fc1(time_latent))
387 | x = F.elu(self.fc2(x))
388 | x = F.elu(self.fc3(x))
389 | x = torch.tanh(self.fc_mu(x))
390 |
391 | return x * self.action_scale + self.action_bias, hidden_out, None
392 |
393 | class ExtractObsWrapper(gym.ObservationWrapper):
394 | def observation(self, obs):
395 | return obs["obs"]
396 |
397 | def DDPG_demos_rnn_vision(cfg: DictConfig, envs):
398 | TOTAL_TIMESTEPS = int(cfg["train"]["params"]["config"]["total_timesteps"])
399 | CRITIC_LEARNING_RATE = cfg["train"]["params"]["config"]["critic_learning_rate"]
400 | ACTOR_LEARNING_RATE = cfg["train"]["params"]["config"]["actor_learning_rate"]
401 | BUFFER_SIZE = int(cfg["train"]["params"]["config"]["buffer_size"])
402 | LEARNING_STARTS = cfg["train"]["params"]["config"]["learning_starts"]
403 | GAMMA = cfg["train"]["params"]["config"]["gamma"]
404 | POLICY_FREQUENCY = cfg["train"]["params"]["config"]["policy_frequency"]
405 | TAU = cfg["train"]["params"]["config"]["tau"]
406 | BATCH_SIZE = cfg["train"]["params"]["config"]["batch_size"]
407 | POLICY_NOISE = cfg["train"]["params"]["config"]["policy_noise"]
408 | NOISE_CLIP = cfg["train"]["params"]["config"]["noise_clip"]
409 | DEMOS_RB_PATH = cfg["train"]["params"]["config"]["demos_rb_path"]
410 | MAX_EPISODE_LENGTH = 500
411 | SEQ_LEN = cfg["train"]["params"]["config"]["seq_len"]
412 | EVAL_AT_END = True
413 | NUM_CONSTRAINTS = 107
414 | CRITIC_NB = 10
415 | num_atoms = 101
416 | vis_h = 48
417 | vis_w = 48
418 |
419 | assert SEQ_LEN % 5 == 0, "SEQ_LEN must be a multiple of 5"
420 | assert BUFFER_SIZE % (5 * envs.num_envs) == 0, "BUFFER_SIZE must be a multiple of 5 * num_envs"
421 |
422 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
423 |
424 | run_path = f"runs/{cfg['task']['name']}_DDPG_demos_rnn_redq_D405_z0.03_x0.24_parkour40_CORR_AUG_48x48_bigconv_seqlen{SEQ_LEN}_bs{BATCH_SIZE}_seed{cfg.seed}_{int(time.time())}"
425 | writer = SummaryWriter(run_path)
426 |
427 | if not os.path.exists(run_path):
428 | os.makedirs(run_path)
429 |
430 | OmegaConf.save(config = cfg, f = f"{run_path}/config.yaml")
431 |
432 | envs.single_action_space = envs.action_space
433 | envs.single_observation_space = envs.observation_space
434 |
435 | envs = ExtractObsWrapper(envs)
436 |
437 | rb_demos = pkl.load(open(f"{DEMOS_RB_PATH}/rb_demos.pkl", "rb"))
438 | ######
439 | rb_demos.seq_len = SEQ_LEN
440 | ######
441 | actions_min, actions_max = torch.load(f"{DEMOS_RB_PATH}/ppo_actions_minmax.pt")
442 | actions_min, actions_max = actions_min.to(device), actions_max.to(device)
443 | envs.action_space.low = actions_min.cpu().numpy()
444 | envs.action_space.high = actions_max.cpu().numpy()
445 | envs.single_action_space = envs.action_space
446 | envs.single_observation_space = envs.observation_space
447 |
448 | vision_nn = DepthOnlyFCBackbone58x87(128, SEQ_LEN, BATCH_SIZE).to(device)
449 | actor = Actor(envs).to(device)
450 |
451 | QNetwork = QNetworkVanilla
452 | qfs = [QNetwork(envs, device = device).to(device) for _ in range(CRITIC_NB)]
453 | qf_targets = [QNetwork(envs, device = device).to(device) for _ in range(CRITIC_NB)]
454 | for i in range(CRITIC_NB):
455 | qf_targets[i].load_state_dict(qfs[i].state_dict())
456 |
457 | q_optimizer = optim.Adam(itertools.chain(*([q.parameters() for q in qfs])), lr=CRITIC_LEARNING_RATE)
458 | qf1 = QNetwork(envs, device = device).to(device)
459 | qf2 = QNetwork(envs, device = device).to(device)
460 | actor_optimizer = optim.Adam(list(actor.parameters()) + list(vision_nn.parameters()), lr=ACTOR_LEARNING_RATE)
461 |
462 | envs.single_observation_space.dtype = np.float32
463 | rb = SeqReplayBuffer(
464 | BUFFER_SIZE,
465 | (45,),
466 | envs.single_observation_space,
467 | (vis_h, vis_w),
468 | envs.single_action_space,
469 | NUM_CONSTRAINTS,
470 | MAX_EPISODE_LENGTH,
471 | SEQ_LEN,
472 | envs.num_envs,
473 | storing_device = "cpu",
474 | training_device = device,
475 | handle_timeout_termination=True,
476 | )
477 | start_time = time.time()
478 |
479 | # TRY NOT TO MODIFY: start the game
480 | obs_privi = envs.reset()
481 | obs = obs_privi.clone()[:, : 45]
482 | vobs = torch.zeros((envs.num_envs, vis_h, vis_w), device = device)
483 | next_vobs = vobs.clone()
484 | vision_latent = None
485 |
486 | isaac_env_steps = TOTAL_TIMESTEPS // envs.num_envs
487 | print(f"Starting training for {isaac_env_steps} isaac env steps")
488 |
489 | gru_p_hidden_in = torch.zeros((actor.memory.num_layers, envs.num_envs, actor.memory.hidden_size), device = device) # p for policy
490 | gru_p_hidden_out = gru_p_hidden_in.clone()
491 |
492 | true_steps_num = 0
493 | actor_training_step = 0
494 | for global_step in range(isaac_env_steps):
495 | # ALGO LOGIC: put action logic here
496 | with torch.no_grad():
497 | if global_step % 5 == 0:
498 | vision_latent = vision_nn(vobs.unsqueeze(1), hist = False)
499 | if true_steps_num < LEARNING_STARTS:
500 | actions = torch.zeros((envs.num_envs, rb.action_dim), device = device).uniform_(-1, 1)
501 | else:
502 | with torch.no_grad():
503 | actions, gru_p_hidden_out, _ = actor(obs.unsqueeze(1), vision_latent, gru_p_hidden_in)
504 | actions = actions.squeeze(1)
505 |
506 | true_steps_num += envs.num_envs
507 |
508 | # TRY NOT TO MODIFY: execute the game and log data.
509 | next_obs_privi, rewards, terminations, infos = envs.step(actions)
510 | true_dones = infos["true_dones"]
511 | truncateds = infos["truncateds"].float()
512 | raw_constraints = infos["raw_constraints"]
513 |
514 | next_obs = next_obs_privi.clone()[:, : 45]
515 |
516 | real_next_obs_privi = next_obs_privi.clone()
517 | real_next_obs = next_obs.clone()
518 | if(true_dones.any()):
519 | true_dones_idx = torch.argwhere(true_dones).squeeze()
520 | gru_p_hidden_out[:, true_dones_idx] = 0
521 |
522 | if "depth" in infos:
523 | next_vobs = infos["depth"].clone()[..., 19:-18]
524 |
525 | rb.add(obs, obs_privi, (vobs * 255).to(torch.uint8), real_next_obs, real_next_obs_privi, (next_vobs * 255).to(torch.uint8), \
526 | actions, rewards, terminations, raw_constraints, true_dones, gru_p_hidden_in, truncateds)
527 | gru_p_hidden_in = gru_p_hidden_out
528 |
529 | if (global_step + 1) % 24 == 0:
530 | for el, v in zip(list(envs.episode_sums.keys())[:envs.numRewards], (torch.mean(envs.rew_mean_reset, dim=0)).tolist()):
531 | writer.add_scalar(f"reward/{el}", v, (global_step + 1) // 24)
532 | writer.add_scalar(f"reward/cum_rew", (torch.sum(torch.mean(envs.rew_cum_reset, dim=0))).item(), (global_step + 1) // 24)
533 | writer.add_scalar(f"reward/avg_rew", envs.terrain_levels.float().mean().item(), (global_step + 1) // 24)
534 | for el, v in zip(envs.cstr_manager.get_names(), (100.0 * torch.mean(envs.cstr_mean_reset, dim=0)).tolist()):
535 | writer.add_scalar(f"cstr/{el}", v, (global_step + 1) // 24)
536 |
537 | # TRY NOT TO MODIFY: CRUCIAL step easy to overlook
538 | obs_privi = next_obs_privi
539 | obs = next_obs
540 | vobs = next_vobs
541 |
542 | # ALGO LOGIC: training.
543 | if true_steps_num > LEARNING_STARTS:
544 | for local_steps in range(8):
545 | data = rb.sample(BATCH_SIZE // 2)
546 | data_expert = rb_demos.sample(BATCH_SIZE // 2)
547 | data = SeqReplayBufferSamples(
548 | observations=torch.cat([data.observations, data_expert.observations], dim=0),
549 | privileged_observations=torch.cat([data.privileged_observations, data_expert.privileged_observations], dim=0),
550 | vision_observations=torch.cat([data.vision_observations, data_expert.vision_observations], dim=0).float() / 255,
551 | actions=torch.cat([data.actions, data_expert.actions], dim=0),
552 | next_observations=torch.cat([data.next_observations, data_expert.next_observations], dim=0),
553 | privileged_next_observations=torch.cat([data.privileged_next_observations, data_expert.privileged_next_observations], dim=0),
554 | vision_next_observations=torch.cat([data.vision_next_observations, data_expert.vision_next_observations], dim=0).float() / 255,
555 | cat_dones=torch.cat([data.cat_dones, data_expert.cat_dones], dim=0),
556 | raw_constraints=torch.cat([data.raw_constraints, data_expert.raw_constraints], dim=0),
557 | dones=torch.cat([data.dones, data_expert.dones], dim=0),
558 | rewards=torch.cat([data.rewards, data_expert.rewards], dim=0),
559 | p_ini_hidden_in=torch.cat([data.p_ini_hidden_in, data_expert.p_ini_hidden_in], dim=1),
560 | p_ini_hidden_out=torch.cat([data.p_ini_hidden_out, data_expert.p_ini_hidden_out], dim=1),
561 | mask=torch.cat([data.mask, data_expert.mask], dim = 0),
562 | )
563 |
564 | norm_const = data.raw_constraints / envs.cstr_manager.get_running_maxes()
565 | recomputed_cat_dones = (envs.cstr_manager.min_p + torch.clamp(
566 | norm_const,
567 | min=0.0,
568 | max=1.0
569 | ) * (envs.cstr_manager.get_max_p() - envs.cstr_manager.min_p)).max(-1).values
570 |
571 | with torch.no_grad():
572 | clipped_noise = (torch.randn_like(data.actions, device=device) * POLICY_NOISE).clamp(
573 | -NOISE_CLIP, NOISE_CLIP
574 | ) #* actor.action_scale
575 |
576 | vlatent = vision_nn(data.vision_next_observations)
577 | qvlatent = None
578 | next_state_actions, _, _ = actor(data.next_observations, vlatent, data.p_ini_hidden_out)
579 | next_state_actions = (next_state_actions + clipped_noise).clamp(
580 | actions_min, actions_max
581 | )
582 | targets_selected = torch.randperm(CRITIC_NB)[:2]
583 | qf_next_targets = torch.stack([qf_targets[i](data.privileged_next_observations, next_state_actions, qvlatent)[0] for i in targets_selected])
584 | min_qf_next_target = qf_next_targets.min(dim = 0).values
585 | next_q_value = (1 - recomputed_cat_dones.flatten()) * data.rewards.flatten() + (1 - recomputed_cat_dones.flatten()) * (1 - data.dones.flatten()) * GAMMA * (min_qf_next_target).view(-1)
586 |
587 | true_samples_nb = data.mask.sum()
588 | qvlatent = None
589 | qf_a_values = torch.stack([qf(data.privileged_observations, data.actions, qvlatent)[0].view(-1) for qf in qfs])
590 | qf_loss = (((qf_a_values - next_q_value.unsqueeze(0)) ** 2) * data.mask.view(-1).unsqueeze(0)).sum() / (true_samples_nb * CRITIC_NB)
591 |
592 | # optimize the model
593 | q_optimizer.zero_grad()
594 | qf_loss.backward()
595 | nn.utils.clip_grad_norm_(qf1.parameters(), 0.5)
596 | q_optimizer.step()
597 |
598 | if local_steps % POLICY_FREQUENCY == 0:
599 | for i in range(CRITIC_NB):
600 | for param, target_param in zip(qfs[i].parameters(), qf_targets[i].parameters()):
601 | target_param.data.copy_(TAU * param.data + (1 - TAU) * target_param.data)
602 |
603 | if local_steps == 7:
604 | actor_training_step += 1
605 | vlatent = vision_nn(data.vision_observations, augment = True)
606 | qvlatent = None
607 | diff_actions, _, recons_height_maps = actor(data.observations, vlatent, data.p_ini_hidden_in)
608 |
609 | qs = torch.stack([qf(data.privileged_observations, diff_actions, qvlatent)[0] for qf in qfs])
610 | actor_loss = -(qs.squeeze(-1) * data.mask.unsqueeze(0)).sum() / (true_samples_nb * CRITIC_NB)
611 | final_loss = actor_loss
612 |
613 | actor_optimizer.zero_grad()
614 | final_loss.backward()
615 | nn.utils.clip_grad_norm_(actor.parameters(), 0.5)
616 | actor_optimizer.step()
617 |
618 | writer.add_scalar(f"loss/actor_loss", actor_loss.item(), actor_training_step)
619 | writer.add_scalar(f"infos/Q_max", qs.max().item(), actor_training_step)
620 |
621 | if (global_step + 1) % (500 * 24) == 0:
622 | model_path = f"{run_path}/cleanrl_model_e{(global_step + 1) // 24}.pt"
623 | torch.save((actor.state_dict(), vision_nn.state_dict()), model_path)
624 | print("Saved model checkpoint")
625 |
626 | if (global_step + 1) % (5 * 24) == 0:
627 | model_path = f"{run_path}/cleanrl_model.pt"
628 | torch.save((actor.state_dict(), vision_nn.state_dict()), model_path)
629 | print("Saved model")
630 |
631 | def eval_DDPG_demos_rnn_vision(cfg: DictConfig, envs):
632 | import cv2
633 | vis_h = 48
634 | vis_w = 48
635 | is_video_gen = "video_save_path" in cfg["task"]
636 |
637 | if is_video_gen:
638 | video_save_path = cfg["task"]["video_save_path"]
639 | position = video_save_path.rfind(".mp4")
640 | output_file = video_save_path[:position] + "_depth" + video_save_path[position:]
641 | fps = 10
642 |
643 | checkpoint = cfg["checkpoint"]
644 | actor_sd, vision_sd = torch.load(checkpoint)
645 |
646 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
647 |
648 | envs.single_action_space = envs.action_space
649 | envs.single_observation_space = envs.observation_space
650 |
651 | envs = ExtractObsWrapper(envs)
652 |
653 | vision_nn = DepthOnlyFCBackbone58x87(128, 0, 0).to(device)
654 | vision_nn.load_state_dict(vision_sd)
655 | actor = Actor(envs).to(device)
656 | actor.load_state_dict(actor_sd)
657 |
658 | obs_privi = envs.reset()
659 | obs = obs_privi.clone()[:, : 45]
660 | vobs = torch.zeros((envs.num_envs, vis_h, vis_w), device = device)
661 | next_vobs = vobs.clone()
662 | vision_latent = None
663 | gru_p_hidden_in = torch.zeros((actor.memory.num_layers, envs.num_envs, actor.memory.hidden_size), device = device) # p for policy
664 |
665 | if is_video_gen:
666 | depth_images = []
667 |
668 | for global_step in range(2000):
669 | with torch.no_grad():
670 | if global_step % 5 == 0:
671 | vision_latent = vision_nn(vobs.unsqueeze(1), hist = False)
672 |
673 | with torch.no_grad():
674 | actions, gru_p_hidden_out, _ = actor(obs.unsqueeze(1), vision_latent, gru_p_hidden_in)
675 | actions = actions.squeeze(1)
676 |
677 | next_obs_privi, rewards, terminations, infos = envs.step(actions)
678 | next_obs = next_obs_privi.clone()[:, : 45]
679 | obs = next_obs
680 | if "depth" in infos:
681 | next_vobs = infos["depth"].clone()[..., 19:-18]
682 | vobs = next_vobs
683 | gru_p_hidden_in = gru_p_hidden_out
684 |
685 | if is_video_gen:
686 | depth_images.append((vobs * 255).to(torch.uint8).squeeze().cpu().numpy())
687 | if global_step == 200:
688 | fourcc = cv2.VideoWriter_fourcc(*'mp4v')
689 | out = cv2.VideoWriter(output_file, fourcc, fps, (vis_w, vis_h), isColor=True)
690 | out.set(cv2.VIDEOWRITER_PROP_QUALITY, 1) # 1 is the highest quality setting
691 | for i in range(len(depth_images)):
692 | # Get the i-th frame
693 | frame = depth_images[i]
694 |
695 | if frame.shape != (vis_h, vis_w):
696 | raise ValueError(f"Frame {i} has incorrect shape: {frame.shape}")
697 |
698 | # Write the frame to the video (as a single-channel image)
699 | rgb_image = cv2.applyColorMap(frame, cv2.COLORMAP_MAGMA)
700 | out.write(rgb_image)
701 |
702 | # Release the VideoWriter object
703 | out.release()
704 | print(f"Video saved to {output_file}")
705 |
--------------------------------------------------------------------------------