├── .gitignore ├── LICENSE ├── assets └── urdf │ ├── allegro_hand_description │ └── meshes │ │ ├── base_link.STL │ │ ├── base_link.obj │ │ ├── base_link_left.STL │ │ ├── base_link_left.obj │ │ ├── box.obj │ │ ├── box.stl │ │ ├── box.stl.convex.obj │ │ ├── box.stl.convex.stl │ │ ├── digit.STL │ │ ├── digit.STL.convex.stl │ │ ├── gel.obj │ │ ├── gel.stl │ │ ├── gel.stl.convex.stl │ │ ├── link_0.0.STL │ │ ├── link_0.0.obj │ │ ├── link_1.0.STL │ │ ├── link_1.0.obj │ │ ├── link_12.0_left.STL │ │ ├── link_12.0_left.obj │ │ ├── link_12.0_right.STL │ │ ├── link_12.0_right.obj │ │ ├── link_13.0.STL │ │ ├── link_13.0.obj │ │ ├── link_14.0.STL │ │ ├── link_14.0.obj │ │ ├── link_15.0.STL │ │ ├── link_15.0.obj │ │ ├── link_15.0_tip.STL │ │ ├── link_15.0_tip.obj │ │ ├── link_2.0.STL │ │ ├── link_2.0.obj │ │ ├── link_3.0.STL │ │ ├── link_3.0.obj │ │ ├── link_3.0_tip.STL │ │ ├── link_3.0_tip.obj │ │ ├── link_4.0.STL │ │ ├── link_4.0.obj │ │ ├── modified_tip.STL │ │ └── modified_tip.obj │ ├── objects │ ├── ball.urdf │ ├── cross4_0.urdf │ ├── cross4_1.urdf │ ├── cross4_2.urdf │ ├── cross4_3.urdf │ ├── cross4_4.urdf │ ├── meshes │ │ ├── non-convex-param │ │ │ ├── cross4_0.obj │ │ │ ├── cross4_0_0decompose.obj │ │ │ ├── cross4_0_1decompose.obj │ │ │ ├── cross4_0_2decompose.obj │ │ │ ├── cross4_0_3decompose.obj │ │ │ ├── cross4_0_4decompose.obj │ │ │ ├── cross4_1.obj │ │ │ ├── cross4_1_0decompose.obj │ │ │ ├── cross4_1_1decompose.obj │ │ │ ├── cross4_1_2decompose.obj │ │ │ ├── cross4_1_3decompose.obj │ │ │ ├── cross4_1_4decompose.obj │ │ │ ├── cross4_2.obj │ │ │ ├── cross4_2_0decompose.obj │ │ │ ├── cross4_2_1decompose.obj │ │ │ ├── cross4_2_2decompose.obj │ │ │ ├── cross4_2_3decompose.obj │ │ │ ├── cross4_3.obj │ │ │ ├── cross4_3_0decompose.obj │ │ │ ├── cross4_3_1decompose.obj │ │ │ ├── cross4_3_2decompose.obj │ │ │ ├── cross4_3_3decompose.obj │ │ │ ├── cross4_3_4decompose.obj │ │ │ ├── cross4_4.obj │ │ │ ├── cross4_4_0decompose.obj │ │ │ ├── cross4_4_1decompose.obj │ │ │ ├── cross4_4_2decompose.obj │ │ │ ├── cross4_4_3decompose.obj │ │ │ └── cross4_4_4decompose.obj │ │ └── set2 │ │ │ ├── set_obj10_thin_block_corner.obj │ │ │ ├── set_obj10_thin_block_corner_0decompose.obj │ │ │ ├── set_obj10_thin_block_corner_0decompose_0decompose.obj │ │ │ ├── set_obj11_cylinder.obj │ │ │ ├── set_obj11_cylinder_0decompose.obj │ │ │ ├── set_obj12_cylinder_corner.obj │ │ │ ├── set_obj12_cylinder_corner_0decompose.obj │ │ │ ├── set_obj13_irregular_block.obj │ │ │ ├── set_obj13_irregular_block_0decompose.obj │ │ │ ├── set_obj14_irregular_block_cross.obj │ │ │ ├── set_obj14_irregular_block_cross_0decompose.obj │ │ │ ├── set_obj14_irregular_block_cross_1decompose.obj │ │ │ ├── set_obj14_irregular_block_cross_2decompose.obj │ │ │ ├── set_obj15_irregular_block_time.obj │ │ │ ├── set_obj15_irregular_block_time_0decompose.obj │ │ │ ├── set_obj15_irregular_block_time_1decompose.obj │ │ │ ├── set_obj16_cylinder_axis.obj │ │ │ ├── set_obj16_cylinder_axis_0decompose.obj │ │ │ ├── set_obj16_cylinder_axis_1decompose.obj │ │ │ ├── set_obj16_cylinder_axis_2decompose.obj │ │ │ ├── set_obj16_cylinder_axis_3decompose.obj │ │ │ ├── set_obj16_cylinder_axis_4decompose.obj │ │ │ ├── set_obj16_cylinder_axis_5decompose.obj │ │ │ ├── set_obj16_cylinder_axis_6decompose.obj │ │ │ ├── set_obj16_cylinder_axis_7decompose.obj │ │ │ ├── set_obj1_regular_block.obj │ │ │ ├── set_obj1_regular_block_0decompose.obj │ │ │ ├── set_obj2_block.obj │ │ │ ├── set_obj2_block_0decompose.obj │ │ │ ├── set_obj3_block.obj │ │ │ ├── set_obj3_block_0decompose.obj │ │ │ ├── set_obj3_block_1decompose.obj │ │ │ ├── set_obj3_block_2decompose.obj │ │ │ ├── set_obj4_block.obj │ │ │ ├── set_obj4_block_0decompose.obj │ │ │ ├── set_obj4_block_10decompose.obj │ │ │ ├── set_obj4_block_11decompose.obj │ │ │ ├── set_obj4_block_12decompose.obj │ │ │ ├── set_obj4_block_13decompose.obj │ │ │ ├── set_obj4_block_14decompose.obj │ │ │ ├── set_obj4_block_15decompose.obj │ │ │ ├── set_obj4_block_1decompose.obj │ │ │ ├── set_obj4_block_2decompose.obj │ │ │ ├── set_obj4_block_3decompose.obj │ │ │ ├── set_obj4_block_4decompose.obj │ │ │ ├── set_obj4_block_5decompose.obj │ │ │ ├── set_obj4_block_6decompose.obj │ │ │ ├── set_obj4_block_7decompose.obj │ │ │ ├── set_obj4_block_8decompose.obj │ │ │ ├── set_obj4_block_9decompose.obj │ │ │ ├── set_obj5_block.obj │ │ │ ├── set_obj5_block_0decompose.obj │ │ │ ├── set_obj5_block_1decompose.obj │ │ │ ├── set_obj6_block_corner.obj │ │ │ ├── set_obj6_block_corner_0decompose.obj │ │ │ ├── set_obj7_block.obj │ │ │ ├── set_obj7_block_0decompose.obj │ │ │ ├── set_obj8_short_block.obj │ │ │ ├── set_obj8_short_block_0decompose.obj │ │ │ ├── set_obj9_thin_block.obj │ │ │ └── set_obj9_thin_block_0decompose.obj │ ├── set_obj10_thin_block_corner.urdf │ ├── set_obj11_cylinder.urdf │ ├── set_obj12_cylinder_corner.urdf │ ├── set_obj13_irregular_block.urdf │ ├── set_obj14_irregular_block_cross.urdf │ ├── set_obj15_irregular_block_time.urdf │ ├── set_obj16_cylinder_axis.urdf │ ├── set_obj1_regular_block.urdf │ ├── set_obj2_block.urdf │ ├── set_obj3_block.urdf │ ├── set_obj4_block.urdf │ ├── set_obj5_block.urdf │ ├── set_obj6_block_corner.urdf │ ├── set_obj7_block.urdf │ ├── set_obj8_short_block.urdf │ └── set_obj9_thin_block.urdf │ └── xarm6 │ ├── meshes │ ├── base.obj │ ├── base.stl │ ├── base.stl.convex.stl │ ├── base_link.STL │ ├── base_link_left.STL │ ├── box.stl │ ├── gel.stl │ ├── link1.obj │ ├── link1.stl │ ├── link1.stl.convex.stl │ ├── link2.obj │ ├── link2.stl │ ├── link2.stl.convex.stl │ ├── link3.obj │ ├── link3.stl │ ├── link3.stl.convex.stl │ ├── link4.obj │ ├── link4.stl │ ├── link4.stl.convex.stl │ ├── link5.obj │ ├── link5.stl │ ├── link5.stl.convex.stl │ ├── link6.obj │ ├── link6.stl │ ├── link6.stl.convex.stl │ ├── link_0.0.STL │ ├── link_1.0.STL │ ├── link_12.0_left.STL │ ├── link_12.0_right.STL │ ├── link_13.0.STL │ ├── link_14.0.STL │ ├── link_15.0.STL │ ├── link_15.0_tip.STL │ ├── link_2.0.STL │ ├── link_3.0.STL │ ├── link_3.0_tip.STL │ ├── link_4.0.STL │ └── modified_tip.STL │ ├── xarm6_allegro_right_fsr_2023_thin.urdf │ └── xarm6_allegro_right_fsr_2023_thin_tilted.urdf ├── distillation ├── rl_pytorch │ └── rl_pytorch │ │ ├── __init__.py │ │ └── distill │ │ ├── distill_bc_warmup.py │ │ └── distill_collect.py └── utils │ ├── config.py │ └── process_distill.py ├── install.md ├── isaacgymenvs ├── __init__.py ├── cfg │ ├── config.yaml │ ├── config_distill.yaml │ ├── task │ │ └── AllegroArmMOAR.yaml │ └── train │ │ └── AllegroArmMOARPPO.yaml ├── learning │ ├── amp_continuous.py │ ├── amp_datasets.py │ ├── amp_models.py │ ├── amp_network_builder.py │ ├── amp_players.py │ ├── common_agent.py │ ├── common_player.py │ ├── hrl_continuous.py │ ├── hrl_models.py │ └── replay_buffer.py ├── tasks │ ├── __init__.py │ ├── allegro_arm_morb_axis.py │ └── base │ │ ├── __init__.py │ │ └── vec_task.py ├── train.py ├── train_distillation.py └── utils │ ├── dr_utils.py │ ├── pc_utils.py │ ├── reformat.py │ ├── rlgames_utils.py │ ├── rotation3d.py │ ├── torch_jit_utils.py │ └── utils.py ├── object_pc_embeddings_C_pretrain_mug.pkl ├── pickle_utils.py ├── readme.md ├── rl_games ├── __init__.py ├── algos_torch │ ├── __init__.py │ ├── a2c_continuous.py │ ├── a2c_discrete.py │ ├── central_value.py │ ├── d2rl.py │ ├── flatten.py │ ├── layers.py │ ├── model_builder.py │ ├── models.py │ ├── moving_mean_std.py │ ├── network_builder.py │ ├── players.py │ ├── pointnets.py │ ├── running_mean_std.py │ ├── sac_agent.py │ ├── sac_helper.py │ ├── self_play_manager.py │ └── torch_ext.py ├── common │ ├── __init__.py │ ├── a2c_common.py │ ├── algo_observer.py │ ├── categorical.py │ ├── common_losses.py │ ├── datasets.py │ ├── diagnostics.py │ ├── divergence.py │ ├── env_configurations.py │ ├── experience.py │ ├── experiment.py │ ├── interval_summary_writer.py │ ├── ivecenv.py │ ├── layers │ │ ├── __init__.py │ │ └── recurrent.py │ ├── object_factory.py │ ├── player.py │ ├── rollouts.py │ ├── schedulers.py │ ├── segment_tree.py │ ├── tr_helpers.py │ ├── transforms │ │ ├── __init__.py │ │ ├── soft_augmentation.py │ │ └── transforms.py │ ├── vecenv.py │ └── wrappers.py ├── configs │ ├── atari │ │ ├── ppo_breakout.yaml │ │ ├── ppo_breakout_cule.yaml │ │ ├── ppo_breakout_envpool.yaml │ │ ├── ppo_breakout_envpool_resnet.yaml │ │ ├── ppo_breakout_torch_impala.yaml │ │ ├── ppo_gopher.yaml │ │ ├── ppo_invaders_envpool.yaml │ │ ├── ppo_invaders_envpool_rnn.yaml │ │ ├── ppo_pacman_envpool.yaml │ │ ├── ppo_pacman_envpool_resnet.yaml │ │ ├── ppo_pacman_envpool_rnn.yaml │ │ ├── ppo_pacman_torch.yaml │ │ ├── ppo_pacman_torch_rnn.yaml │ │ ├── ppo_pong.yaml │ │ ├── ppo_pong_cule.yaml │ │ ├── ppo_pong_envpool.yaml │ │ ├── ppo_pong_envpool_resnet.yaml │ │ ├── ppo_space_invaders_resnet.yaml │ │ └── ppo_space_invaders_torch.yaml │ ├── brax │ │ ├── ppo_ant.yaml │ │ ├── ppo_ant_tcnn.yaml │ │ ├── ppo_grasp.yaml │ │ ├── ppo_halfcheetah.yaml │ │ ├── ppo_humanoid.yaml │ │ ├── ppo_ur5e.yaml │ │ ├── sac_ant.yaml │ │ └── sac_humanoid.yaml │ ├── carracing_ppo.yaml │ ├── dm_control │ │ ├── cartpole.yaml │ │ ├── fish_swim.yaml │ │ ├── halfcheetah_run.yaml │ │ ├── humanoid2.yaml │ │ ├── humanoid_run.yaml │ │ ├── sac_humanoid.yaml │ │ ├── walker_run.yaml │ │ ├── walker_stand.yaml │ │ └── walker_walk.yaml │ ├── ma │ │ ├── ppo_connect4_self_play.yaml │ │ ├── ppo_connect4_self_play_resnet.yaml │ │ ├── ppo_slime_self_play.yaml │ │ └── ppo_slime_v0.yaml │ ├── minigrid │ │ ├── lava_rnn_img.yaml │ │ └── minigrid_rnn_img.yaml │ ├── mujoco │ │ ├── ant.yaml │ │ ├── ant_envpool.yaml │ │ ├── halfcheetah.yaml │ │ ├── halfcheetah_envpool.yaml │ │ ├── hopper.yaml │ │ ├── hopper_envpool.yaml │ │ ├── humanoid.yaml │ │ ├── humanoid_envpool.yaml │ │ ├── sac_ant_envpool.yaml │ │ ├── sac_halfcheetah_envpool.yaml │ │ ├── walker2d.yaml │ │ └── walker2d_envpool.yaml │ ├── openai │ │ ├── ppo_gym_ant.yaml │ │ ├── ppo_gym_hand.yaml │ │ └── ppo_gym_humanoid.yaml │ ├── ppo_cartpole.yaml │ ├── ppo_cartpole_masked_velocity_rnn.yaml │ ├── ppo_continuous.yaml │ ├── ppo_continuous_lstm.yaml │ ├── ppo_lunar.yaml │ ├── ppo_lunar_continiuos_torch.yaml │ ├── ppo_lunar_discrete.yaml │ ├── ppo_multiwalker.yaml │ ├── ppo_pendulum.yaml │ ├── ppo_pendulum_torch.yaml │ ├── ppo_reacher.yaml │ ├── ppo_smac.yaml │ ├── ppo_walker.yaml │ ├── ppo_walker_hardcore.yaml │ ├── ppo_walker_rnn.yaml │ ├── ppo_walker_tcnn.yaml │ ├── procgen │ │ └── ppo_coinrun.yaml │ ├── smac │ │ ├── 10m_vs_11m_torch.yaml │ │ ├── 27m_vs_30m_cv.yaml │ │ ├── 27m_vs_30m_torch.yaml │ │ ├── 2m_vs_1z.yaml │ │ ├── 2m_vs_1z_torch.yaml │ │ ├── 2s_vs_1c.yaml │ │ ├── 3m_cnn_torch.yaml │ │ ├── 3m_torch.yaml │ │ ├── 3m_torch_cv.yaml │ │ ├── 3m_torch_cv_joint.yaml │ │ ├── 3m_torch_cv_rnn.yaml │ │ ├── 3m_torch_rnn.yaml │ │ ├── 3m_torch_sparse.yaml │ │ ├── 3s5z_vs_3s6z_torch.yaml │ │ ├── 3s5z_vs_3s6z_torch_cv.yaml │ │ ├── 3s_vs_4z.yaml │ │ ├── 3s_vs_5z.yaml │ │ ├── 3s_vs_5z_cv.yaml │ │ ├── 3s_vs_5z_cv_rnn.yaml │ │ ├── 3s_vs_5z_torch_lstm.yaml │ │ ├── 3s_vs_5z_torch_lstm2.yaml │ │ ├── 5m_vs_6m_rnn.yaml │ │ ├── 5m_vs_6m_rnn_cv.yaml │ │ ├── 5m_vs_6m_torch.yaml │ │ ├── 6h_vs_8z_torch.yaml │ │ ├── 6h_vs_8z_torch_cv.yaml │ │ ├── 8m_torch.yaml │ │ ├── 8m_torch_cv.yaml │ │ ├── MMM2_torch.yaml │ │ ├── corridor_torch.yaml │ │ ├── corridor_torch_cv.yaml │ │ └── runs │ │ │ ├── 2c_vs_64zg.yaml │ │ │ ├── 2c_vs_64zg_neg.yaml │ │ │ ├── 2s3z.yaml │ │ │ ├── 2s3z_neg.yaml │ │ │ ├── 2s_vs_1c.yaml │ │ │ ├── 2s_vs_1c_neg.yaml │ │ │ ├── 3s5z.yaml │ │ │ ├── 3s5z_neg.yaml │ │ │ ├── 3s_vs_5z.yaml │ │ │ ├── 3s_vs_5z_neg.yaml │ │ │ ├── 3s_vs_5z_neg_joint.yaml │ │ │ ├── 6h_vs_8z.yaml │ │ │ ├── 6h_vs_8z_neg.yaml │ │ │ ├── 6h_vs_8z_rnn.yaml │ │ │ ├── MMM2.yaml │ │ │ ├── MMM2_conv1d.yaml │ │ │ ├── MMM2_neg.yaml │ │ │ ├── MMM2_rnn.yaml │ │ │ ├── bane_vs_bane.yaml │ │ │ ├── bane_vs_bane_neg.yaml │ │ │ ├── corridor_cv.yaml │ │ │ └── corridor_cv_neg.yaml │ └── test │ │ ├── test_asymmetric_continuous.yaml │ │ ├── test_asymmetric_discrete.yaml │ │ ├── test_asymmetric_discrete_mhv.yaml │ │ ├── test_asymmetric_discrete_mhv_mops.yaml │ │ ├── test_discrete.yaml │ │ ├── test_discrete_multidiscrete_mhv.yaml │ │ ├── test_ppo_walker_truncated_time.yaml │ │ ├── test_rnn.yaml │ │ ├── test_rnn_multidiscrete.yaml │ │ └── test_rnn_multidiscrete_mhv.yaml ├── distributed │ └── __init__.py ├── envs │ ├── __init__.py │ ├── brax.py │ ├── connect4_network.py │ ├── connect4_selfplay.py │ ├── cule.py │ ├── diambra │ │ └── diambra.py │ ├── envpool.py │ ├── multiwalker.py │ ├── slimevolley_selfplay.py │ ├── smac_env.py │ ├── test │ │ ├── __init__.py │ │ ├── example_env.py │ │ ├── rnn_env.py │ │ └── test_asymmetric_env.py │ └── test_network.py ├── interfaces │ ├── __init__.py │ └── base_algorithm.py ├── networks │ ├── __init__.py │ └── tcnn_mlp.py └── torch_runner.py └── scripts ├── bc_axis.sh ├── bc_baoding.sh ├── bc_cross.sh ├── collect_axis.sh ├── collect_baoding.sh ├── collect_cross.sh ├── teacher_axis.sh ├── teacher_axis_visrl.sh ├── teacher_baoding.sh ├── teacher_baoding_visrl.sh ├── teacher_cross.sh └── teacher_cross_visrl.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.egg-info 2 | __pycache__ 3 | *.pyc -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Authors of Robot Synesthesia: In-Hand Manipulation with Visuotactile Sensing 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/base_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/base_link.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/base_link_left.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/base_link_left.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/box.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/box.stl -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/digit.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/digit.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/gel.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/gel.stl -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/link_0.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/link_0.0.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/link_1.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/link_1.0.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/link_12.0_left.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/link_12.0_left.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/link_12.0_right.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/link_12.0_right.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/link_13.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/link_13.0.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/link_14.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/link_14.0.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/link_15.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/link_15.0.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/link_15.0_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/link_15.0_tip.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/link_2.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/link_2.0.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/link_3.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/link_3.0.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/link_3.0_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/link_3.0_tip.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/link_4.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/link_4.0.STL -------------------------------------------------------------------------------- /assets/urdf/allegro_hand_description/meshes/modified_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/allegro_hand_description/meshes/modified_tip.STL -------------------------------------------------------------------------------- /assets/urdf/objects/ball.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj13_irregular_block.obj: -------------------------------------------------------------------------------- 1 | # Blender v3.4.1 OBJ File: 'set_obj13_irregular_block.blend' 2 | # www.blender.org 3 | mtllib set_obj13_irregular_block.mtl 4 | o Cube 5 | v 1.000000 -1.000000 1.000000 6 | v 0.860483 -1.074041 -0.879565 7 | v -1.000000 -1.000000 1.000000 8 | v -1.000000 -1.000000 -1.000000 9 | v 0.884099 0.821569 0.676058 10 | v 1.023710 1.019616 -0.879586 11 | v -0.830663 0.877042 0.784524 12 | v -1.000000 1.000000 -1.000000 13 | vt 0.625000 0.500000 14 | vt 0.875000 0.500000 15 | vt 0.875000 0.750000 16 | vt 0.625000 0.750000 17 | vt 0.375000 0.750000 18 | vt 0.625000 1.000000 19 | vt 0.375000 1.000000 20 | vt 0.375000 0.000000 21 | vt 0.625000 0.000000 22 | vt 0.625000 0.250000 23 | vt 0.375000 0.250000 24 | vt 0.125000 0.500000 25 | vt 0.375000 0.500000 26 | vt 0.125000 0.750000 27 | vn 0.0310 0.1438 0.9891 28 | vn -0.9979 0.0461 0.0461 29 | vn 0.0092 0.9954 0.0956 30 | vn 0.0619 -0.0025 -0.9981 31 | vn -0.0198 -0.9996 0.0198 32 | vn 0.9999 -0.0121 -0.0004 33 | usemtl Material 34 | s off 35 | f 1/1/1 5/2/1 7/3/1 3/4/1 36 | f 4/5/2 3/4/2 7/6/2 8/7/2 37 | f 8/8/3 7/9/3 5/10/3 6/11/3 38 | f 6/12/4 2/13/4 4/5/4 8/14/4 39 | f 2/13/5 1/1/5 3/4/5 4/5/5 40 | f 6/11/6 5/10/6 1/1/6 2/13/6 41 | -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj14_irregular_block_cross_0decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -0.41301100 0.64508900 -0.96163900 3 | v 0.79365100 0.66672600 1.01587300 4 | v 0.79365100 0.63492100 1.01587300 5 | v -0.50793700 0.63492100 1.01587300 6 | v -0.39553600 1.00000000 0.93140700 7 | v 0.41776100 0.99899400 -0.92604000 8 | v -0.39553600 1.00000000 -0.93140700 9 | v 0.41776100 0.65176300 -0.92604000 10 | v 0.41776100 0.99899400 0.92604000 11 | v -0.50793700 0.66672600 1.01587300 12 | v 0.79365100 0.63492100 0.98405200 13 | v -0.50793700 0.66672600 0.98405200 14 | f 10 7 12 15 | f 3 2 4 16 | f 5 6 7 17 | f 6 1 7 18 | f 1 6 8 19 | f 5 2 9 20 | f 2 6 9 21 | f 6 5 9 22 | f 4 2 10 23 | f 2 5 10 24 | f 5 7 10 25 | f 2 3 11 26 | f 3 4 11 27 | f 4 1 11 28 | f 6 2 11 29 | f 1 8 11 30 | f 8 6 11 31 | f 1 4 12 32 | f 7 1 12 33 | f 4 10 12 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj14_irregular_block_cross_1decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.82514800 0.63492100 1.01587300 3 | v -0.97958800 -0.42516800 -0.97958800 4 | v -0.97958800 -0.42516800 0.97958800 5 | v 0.97958800 -0.42516800 -0.97958800 6 | v -0.41301100 0.59669500 -0.94392000 7 | v 0.97958800 -0.42516800 0.97958800 8 | v -0.88878900 0.50792400 1.01587300 9 | v 0.95404100 0.42317200 -0.95404100 10 | v -0.95404100 0.42317200 -0.95404100 11 | v -0.57137900 0.63492100 1.01587300 12 | v 0.41776100 0.60246300 -0.95477500 13 | v 0.95404100 0.42317200 0.95404100 14 | v -0.95404100 0.42317200 0.95404100 15 | v -0.57137900 0.63492100 0.98405200 16 | v 0.82514800 0.63492100 0.98405200 17 | v -0.66644300 0.60311900 0.98405200 18 | f 10 14 16 19 | f 3 2 4 20 | f 3 4 6 21 | f 6 1 7 22 | f 3 6 7 23 | f 6 4 8 24 | f 2 3 9 25 | f 7 1 10 26 | f 4 2 11 27 | f 8 4 11 28 | f 2 9 11 29 | f 9 5 11 30 | f 1 6 12 31 | f 8 1 12 32 | f 6 8 12 33 | f 3 7 13 34 | f 9 3 13 35 | f 7 9 13 36 | f 5 9 14 37 | f 10 1 14 38 | f 11 5 14 39 | f 11 14 15 40 | f 1 8 15 41 | f 8 11 15 42 | f 14 1 15 43 | f 9 7 16 44 | f 7 10 16 45 | f 14 9 16 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj14_irregular_block_cross_2decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -0.41301100 -0.99492200 -0.94136400 3 | v 0.40600200 -0.44902000 1.00000000 4 | v 0.38888400 -1.00000000 0.95942400 5 | v 0.40600200 -0.44902000 -1.00000000 6 | v -0.41301100 -0.46550800 0.94136400 7 | v 0.38888400 -1.00000000 -0.95942400 8 | v -0.41301100 -0.99492200 0.94136400 9 | v -0.41301100 -0.46550800 -0.94136400 10 | f 5 4 8 11 | f 2 3 4 12 | f 2 4 5 13 | f 3 1 6 14 | f 4 3 6 15 | f 1 4 6 16 | f 1 3 7 17 | f 3 2 7 18 | f 5 1 7 19 | f 2 5 7 20 | f 4 1 8 21 | f 1 5 8 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj15_irregular_block_time_0decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.98508900 0.98508900 1.00000000 3 | v 0.98508900 -0.98508900 1.00000000 4 | v -0.98508900 0.98508900 1.00000000 5 | v 0.43699400 0.93239900 0.01129500 6 | v -1.00000000 -1.00000000 -0.02356100 7 | v -0.98508900 -0.98508900 1.00000000 8 | v -0.43566800 0.92957100 0.01275400 9 | v 0.42557800 -0.97264000 -0.00945400 10 | v 0.95121800 0.95121800 0.74469300 11 | v 0.97240900 -0.97240900 0.78055400 12 | v -0.95528400 0.95528400 0.74577300 13 | f 7 5 11 14 | f 2 1 3 15 | f 3 1 4 16 | f 2 3 6 17 | f 5 2 6 18 | f 3 5 6 19 | f 3 4 7 20 | f 4 5 7 21 | f 2 5 8 22 | f 5 4 8 23 | f 4 1 9 24 | f 8 4 9 25 | f 8 9 10 26 | f 1 2 10 27 | f 2 8 10 28 | f 9 1 10 29 | f 5 3 11 30 | f 3 7 11 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj15_irregular_block_time_1decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.95271300 0.95271300 -0.74704600 3 | v -0.98310300 0.98310300 -1.00000000 4 | v -1.00000000 -1.00000000 -0.03943500 5 | v 0.98310300 -0.98310300 -1.00000000 6 | v 0.45606400 -0.97308800 -0.05247000 7 | v -0.98310300 -0.98310300 -1.00000000 8 | v 0.46185000 0.98543400 -0.04649000 9 | v 0.98310300 0.98310300 -1.00000000 10 | v -0.46026900 0.98247100 -0.04792500 11 | v -0.95669500 0.95669500 -0.74797900 12 | v 0.97363600 -0.97363600 -0.78241700 13 | f 4 8 11 14 | f 3 4 5 15 | f 3 2 6 16 | f 4 3 6 17 | f 2 4 6 18 | f 3 5 7 19 | f 4 2 8 20 | f 7 1 8 21 | f 2 7 8 22 | f 7 2 9 23 | f 3 7 9 24 | f 2 3 10 25 | f 9 2 10 26 | f 3 9 10 27 | f 5 4 11 28 | f 1 7 11 29 | f 7 5 11 30 | f 8 1 11 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj16_cylinder_axis_0decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 1.01587300 0.17878900 1.01586700 3 | v 0.02117200 -0.40678900 0.95370400 4 | v 0.57137900 0.81371000 0.95238100 5 | v 0.00000000 0.81371000 1.01586700 6 | v 0.53965800 -0.83676000 1.01586700 7 | v 0.98421300 -0.16028600 0.95436000 8 | v 0.00000000 -0.42398100 1.01586700 9 | v 0.63482100 0.81371000 1.01586700 10 | v 0.00000000 0.81371000 0.98412400 11 | v 0.85706800 -0.58279200 1.01586700 12 | v 0.92581100 0.37631700 0.95427500 13 | v 0.03555700 0.25411300 0.95460300 14 | v 0.59843300 -0.79629300 0.95393800 15 | v 0.85706800 0.59140700 1.01586700 16 | v 1.01587300 -0.17017500 1.01586700 17 | v 0.89787600 -0.43133300 0.95413000 18 | v 0.69090900 0.72040000 0.95362400 19 | v 0.76180500 -0.70977600 1.01586700 20 | v 0.98358200 0.16669100 0.95439900 21 | v 0.83565100 -0.54774800 0.95432800 22 | v 0.95233100 -0.39231600 1.01586700 23 | f 16 6 21 24 | f 1 4 5 25 | f 2 5 7 26 | f 5 4 7 27 | f 4 1 8 28 | f 3 4 8 29 | f 4 3 9 30 | f 2 7 9 31 | f 7 4 9 32 | f 1 5 10 33 | f 3 2 12 34 | f 2 9 12 35 | f 9 3 12 36 | f 2 3 13 37 | f 5 2 13 38 | f 8 1 14 39 | f 1 11 14 40 | f 6 1 15 41 | f 1 10 15 42 | f 3 6 16 43 | f 13 3 16 44 | f 3 8 17 45 | f 11 3 17 46 | f 8 14 17 47 | f 14 11 17 48 | f 10 5 18 49 | f 5 13 18 50 | f 6 3 19 51 | f 1 6 19 52 | f 3 11 19 53 | f 11 1 19 54 | f 16 10 20 55 | f 13 16 20 56 | f 10 18 20 57 | f 18 13 20 58 | f 6 15 21 59 | f 15 10 21 60 | f 10 16 21 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj16_cylinder_axis_3decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.98358200 0.16669100 -0.95439900 3 | v 0.00000000 -0.42398100 -1.01586700 4 | v 0.53965800 -0.83676000 -1.01586700 5 | v 0.63482100 0.81371000 -1.01586700 6 | v 0.03555700 0.25411300 -0.95460300 7 | v 0.59843300 -0.79629300 -0.95393800 8 | v 1.01587300 -0.17017500 -1.01586700 9 | v 0.00000000 0.81371000 -0.98412400 10 | v 0.57137900 0.81371000 -0.95238100 11 | v 0.02117200 -0.40678900 -0.95370400 12 | v 0.00000000 0.81371000 -1.01586700 13 | v 0.95233100 0.40093100 -1.01586700 14 | v 0.89787600 -0.43133300 -0.95413000 15 | v 0.76180500 -0.70977600 -1.01586700 16 | v 0.83379600 0.55121800 -0.95449700 17 | v 0.98421300 -0.16028600 -0.95436000 18 | v 0.95233100 -0.39231600 -1.01586700 19 | v 1.01587300 0.17878900 -1.01586700 20 | v 0.85706800 0.59140700 -1.01586700 21 | v 0.83565100 -0.54774800 -0.95432800 22 | v 0.69090900 0.72040000 -0.95362400 23 | v 0.92581100 0.37631700 -0.95427500 24 | f 15 21 22 25 | f 3 2 4 26 | f 3 4 7 27 | f 8 5 9 28 | f 4 8 9 29 | f 6 9 10 30 | f 2 3 10 31 | f 3 6 10 32 | f 8 2 10 33 | f 5 8 10 34 | f 9 5 10 35 | f 4 2 11 36 | f 2 8 11 37 | f 8 4 11 38 | f 7 4 12 39 | f 9 6 13 40 | f 6 3 14 41 | f 3 7 14 42 | f 1 9 16 43 | f 9 13 16 44 | f 16 13 17 45 | f 14 7 17 46 | f 7 16 17 47 | f 7 12 18 48 | f 1 16 18 49 | f 16 7 18 50 | f 12 4 19 51 | f 13 6 20 52 | f 6 14 20 53 | f 17 13 20 54 | f 14 17 20 55 | f 4 9 21 56 | f 19 4 21 57 | f 15 19 21 58 | f 9 1 22 59 | f 18 12 22 60 | f 1 18 22 61 | f 12 19 22 62 | f 19 15 22 63 | f 21 9 22 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj16_cylinder_axis_4decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -0.03555700 0.25411300 0.95460300 3 | v -0.98421300 -0.16028600 0.95436000 4 | v -1.01587300 -0.17017500 1.01586700 5 | v -0.53965800 -0.83676000 1.01586700 6 | v 0.00000000 0.81371000 1.01586700 7 | v -0.57137900 0.81371000 0.95238100 8 | v -0.02117200 -0.40678900 0.95370400 9 | v -0.85706800 0.59140700 1.01586700 10 | v 0.00000000 -0.42398100 1.01586700 11 | v -0.59843300 -0.79629300 0.95393800 12 | v -0.92581100 0.37631700 0.95427500 13 | v 0.00000000 0.81371000 0.98412400 14 | v -0.85706800 -0.58279200 1.01586700 15 | v -0.63482100 0.81371000 1.01586700 16 | v -1.01587300 0.17878900 1.01586700 17 | v -0.89787600 -0.43133300 0.95413000 18 | v -0.69090900 0.72040000 0.95362400 19 | v -0.76180500 -0.70977600 1.01586700 20 | v -0.98358200 0.16669100 0.95439900 21 | v -0.83565100 -0.54774800 0.95432800 22 | v -0.95233100 -0.39231600 1.01586700 23 | f 16 13 21 24 | f 3 4 5 25 | f 6 1 7 26 | f 3 5 8 27 | f 5 4 9 28 | f 4 7 9 29 | f 6 7 10 30 | f 7 4 10 31 | f 6 5 12 32 | f 1 6 12 33 | f 7 1 12 34 | f 5 9 12 35 | f 9 7 12 36 | f 4 3 13 37 | f 5 6 14 38 | f 8 5 14 39 | f 2 3 15 40 | f 3 8 15 41 | f 8 11 15 42 | f 2 6 16 43 | f 6 10 16 44 | f 6 11 17 45 | f 11 8 17 46 | f 14 6 17 47 | f 8 14 17 48 | f 10 4 18 49 | f 4 13 18 50 | f 6 2 19 51 | f 11 6 19 52 | f 2 15 19 53 | f 15 11 19 54 | f 16 10 20 55 | f 13 16 20 56 | f 10 18 20 57 | f 18 13 20 58 | f 3 2 21 59 | f 13 3 21 60 | f 2 16 21 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj16_cylinder_axis_7decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -0.03555700 0.25411300 -0.95460300 3 | v -1.01587300 -0.17017500 -1.01586700 4 | v -0.98421300 -0.16028600 -0.95436000 5 | v -0.53965800 -0.83676000 -1.01586700 6 | v 0.00000000 0.81371000 -1.01586700 7 | v -0.57137900 0.81371000 -0.95238100 8 | v -0.02117200 -0.40678900 -0.95370400 9 | v -0.85706800 0.59140700 -1.01586700 10 | v 0.00000000 -0.42398100 -1.01586700 11 | v -0.59843300 -0.79629300 -0.95393800 12 | v -0.92581100 0.37631700 -0.95427500 13 | v 0.00000000 0.81371000 -0.98412400 14 | v -0.85706800 -0.58279200 -1.01586700 15 | v -0.63482100 0.81371000 -1.01586700 16 | v -1.01587300 0.17878900 -1.01586700 17 | v -0.89787600 -0.43133300 -0.95413000 18 | v -0.69090900 0.72040000 -0.95362400 19 | v -0.76180500 -0.70977600 -1.01586700 20 | v -0.98358200 0.16669100 -0.95439900 21 | v -0.83565100 -0.54774800 -0.95432800 22 | v -0.95233100 -0.39231600 -1.01586700 23 | f 13 16 21 24 | f 4 2 5 25 | f 1 6 7 26 | f 5 2 8 27 | f 4 5 9 28 | f 7 4 9 29 | f 7 6 10 30 | f 4 7 10 31 | f 5 6 12 32 | f 6 1 12 33 | f 1 7 12 34 | f 9 5 12 35 | f 7 9 12 36 | f 2 4 13 37 | f 6 5 14 38 | f 5 8 14 39 | f 2 3 15 40 | f 8 2 15 41 | f 11 8 15 42 | f 6 3 16 43 | f 10 6 16 44 | f 11 6 17 45 | f 8 11 17 46 | f 6 14 17 47 | f 14 8 17 48 | f 4 10 18 49 | f 13 4 18 50 | f 3 6 19 51 | f 6 11 19 52 | f 15 3 19 53 | f 11 15 19 54 | f 10 16 20 55 | f 16 13 20 56 | f 18 10 20 57 | f 13 18 20 58 | f 3 2 21 59 | f 2 13 21 60 | f 16 3 21 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj1_regular_block.obj: -------------------------------------------------------------------------------- 1 | # Blender v3.4.1 OBJ File: 'set_obj1_regular_block.blend' 2 | # www.blender.org 3 | mtllib set_obj1_regular_block.mtl 4 | o Cube 5 | v 1.000000 1.000000 -1.000000 6 | v 1.000000 -1.000000 -1.000000 7 | v 1.000000 1.000000 1.000000 8 | v 1.000000 -1.000000 1.000000 9 | v -1.000000 1.000000 -1.000000 10 | v -1.000000 -1.000000 -1.000000 11 | v -1.000000 1.000000 1.000000 12 | v -1.000000 -1.000000 1.000000 13 | vt 0.625000 0.500000 14 | vt 0.875000 0.500000 15 | vt 0.875000 0.750000 16 | vt 0.625000 0.750000 17 | vt 0.375000 0.750000 18 | vt 0.625000 1.000000 19 | vt 0.375000 1.000000 20 | vt 0.375000 0.000000 21 | vt 0.625000 0.000000 22 | vt 0.625000 0.250000 23 | vt 0.375000 0.250000 24 | vt 0.125000 0.500000 25 | vt 0.375000 0.500000 26 | vt 0.125000 0.750000 27 | vn 0.0000 1.0000 0.0000 28 | vn 0.0000 0.0000 1.0000 29 | vn -1.0000 0.0000 0.0000 30 | vn 0.0000 -1.0000 0.0000 31 | vn 1.0000 0.0000 0.0000 32 | vn 0.0000 0.0000 -1.0000 33 | usemtl Material 34 | s off 35 | f 1/1/1 5/2/1 7/3/1 3/4/1 36 | f 4/5/2 3/4/2 7/6/2 8/7/2 37 | f 8/8/3 7/9/3 5/10/3 6/11/3 38 | f 6/12/4 2/13/4 4/5/4 8/14/4 39 | f 2/13/5 1/1/5 3/4/5 4/5/5 40 | f 6/11/6 5/10/6 1/1/6 2/13/6 41 | -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj1_regular_block_0decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -1.00000000 -1.00000000 -1.00000000 3 | v 1.00000000 1.00000000 1.00000000 4 | v 1.00000000 -1.00000000 1.00000000 5 | v 1.00000000 1.00000000 -1.00000000 6 | v -1.00000000 1.00000000 1.00000000 7 | v 1.00000000 -1.00000000 -1.00000000 8 | v -1.00000000 -1.00000000 1.00000000 9 | v -1.00000000 1.00000000 -1.00000000 10 | f 5 4 8 11 | f 2 3 4 12 | f 3 2 5 13 | f 2 4 5 14 | f 3 1 6 15 | f 4 3 6 16 | f 1 4 6 17 | f 1 3 7 18 | f 5 1 7 19 | f 3 5 7 20 | f 4 1 8 21 | f 1 5 8 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj2_block.obj: -------------------------------------------------------------------------------- 1 | # Blender v3.4.1 OBJ File: 'set_obj2_block.blend' 2 | # www.blender.org 3 | mtllib set_obj2_block.mtl 4 | o Cube 5 | v 1.000000 -1.000000 -1.000000 6 | v -1.000000 -1.000000 1.000000 7 | v -1.000000 -1.000000 -1.000000 8 | v 0.123934 -1.000000 0.349647 9 | v 1.000000 1.000000 -1.000000 10 | v -1.000000 1.000000 1.000000 11 | v -1.000000 1.000000 -1.000000 12 | v 0.123934 1.000000 0.349647 13 | v 0.123934 1.000000 1.000000 14 | v 0.123934 -1.000000 1.000000 15 | v 1.000000 1.000000 0.349647 16 | v 1.000000 -1.000000 0.349647 17 | vt 0.875000 0.609508 18 | vt 0.875000 0.750000 19 | vt 0.625000 0.750000 20 | vt 0.625000 0.609508 21 | vt 0.375000 0.750000 22 | vt 0.625000 1.000000 23 | vt 0.375000 1.000000 24 | vt 0.375000 0.250000 25 | vt 0.375000 0.000000 26 | vt 0.625000 0.000000 27 | vt 0.625000 0.140492 28 | vt 0.543706 0.140492 29 | vt 0.543706 0.250000 30 | vt 0.125000 0.500000 31 | vt 0.375000 0.500000 32 | vt 0.125000 0.750000 33 | vt 0.543706 0.609508 34 | vt 0.543706 0.500000 35 | vt 0.375000 0.750000 36 | vt 0.375000 1.000000 37 | vt 0.456294 1.000000 38 | vt 0.456294 0.750000 39 | vt 0.125000 0.750000 40 | vt 0.375000 0.640492 41 | vt 0.125000 0.640492 42 | vn 0.0000 0.0000 1.0000 43 | vn -1.0000 0.0000 0.0000 44 | vn 0.0000 1.0000 0.0000 45 | vn 0.0000 0.0000 -1.0000 46 | vn 0.0000 -1.0000 0.0000 47 | vn 1.0000 0.0000 0.0000 48 | usemtl Material 49 | s off 50 | f 9/1/1 6/2/1 2/3/1 10/4/1 51 | f 3/5/2 2/3/2 6/6/2 7/7/2 52 | f 5/8/3 7/9/3 6/10/3 9/11/3 8/12/3 11/13/3 53 | f 5/14/4 1/15/4 3/5/4 7/16/4 54 | f 4/17/5 10/4/5 2/3/5 3/5/5 1/15/5 12/18/5 55 | f 1/15/6 5/8/6 11/13/6 12/18/6 56 | f 4/19/6 8/20/6 9/21/6 10/22/6 57 | f 8/23/1 4/19/1 12/24/1 11/25/1 58 | -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj2_block_0decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -1.00000000 -1.00000000 -1.00000000 3 | v 0.93263500 0.93263500 0.34964700 4 | v 0.93263500 -0.93263500 0.34964700 5 | v -1.00000000 1.00000000 1.00000000 6 | v 1.00000000 1.00000000 -0.99980400 7 | v -1.00000000 1.00000000 -1.00000000 8 | v -1.00000000 -1.00000000 1.00000000 9 | v 1.00000000 -1.00000000 -0.99980400 10 | v 0.15850600 1.01587300 1.01587300 11 | v 0.12393400 -0.99224900 0.99224900 12 | v 0.46867700 -0.96867700 -1.00000000 13 | f 5 8 11 14 | f 2 3 5 15 | f 1 4 6 16 | f 4 1 7 17 | f 5 3 8 18 | f 7 1 8 19 | f 3 2 9 20 | f 2 5 9 21 | f 6 4 9 22 | f 5 6 9 23 | f 4 7 9 24 | f 9 7 10 25 | f 8 3 10 26 | f 7 8 10 27 | f 3 9 10 28 | f 1 6 11 29 | f 6 5 11 30 | f 8 1 11 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj3_block_0decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.11111100 0.11111100 -1.00000000 3 | v 1.00000000 1.00000000 -0.11111100 4 | v 1.00000000 1.00000000 -1.00000000 5 | v 1.00000000 0.11111100 -0.11111100 6 | v 0.11111100 1.00000000 -0.11111100 7 | v 0.09660500 0.09660500 -0.09660500 8 | v 1.00000000 0.11111100 -1.00000000 9 | v 0.11111100 1.00000000 -1.00000000 10 | f 1 6 8 11 | f 3 2 4 12 | f 2 3 5 13 | f 2 5 6 14 | f 4 2 6 15 | f 6 1 7 16 | f 1 3 7 17 | f 3 4 7 18 | f 4 6 7 19 | f 3 1 8 20 | f 5 3 8 21 | f 6 5 8 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj3_block_1decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 1.00000000 0.07597600 0.98084300 3 | v 0.10839000 -1.00000000 -0.98684800 4 | v 0.10243100 0.08655800 -1.00000000 5 | v 0.10243100 0.08655800 1.00000000 6 | v 0.10839000 -1.00000000 0.98684800 7 | v 1.00000000 -0.99671600 -0.98084300 8 | v 0.97736600 0.08655800 -1.00000000 9 | v 1.00000000 -0.99671600 0.98084300 10 | v 1.00000000 0.07597600 -0.98084300 11 | v 0.97736600 0.08655800 1.00000000 12 | f 5 8 10 13 | f 3 2 4 14 | f 4 2 5 15 | f 5 2 6 16 | f 2 3 7 17 | f 3 4 7 18 | f 6 2 7 19 | f 5 6 8 20 | f 6 1 8 21 | f 1 6 9 22 | f 7 1 9 23 | f 6 7 9 24 | f 4 5 10 25 | f 7 4 10 26 | f 1 7 10 27 | f 8 1 10 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj3_block_2decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -0.19047900 -1.01587300 1.01587300 3 | v 0.05529500 1.00000000 1.00000000 4 | v -1.00000000 0.98684800 0.98684800 5 | v 0.08655800 -1.00000000 -1.00000000 6 | v -1.00000000 0.98684800 -0.98684800 7 | v -1.00000000 -0.98684800 -0.98684800 8 | v 0.08645000 0.99980400 -1.00000000 9 | v -1.01587300 -0.25396800 1.01587300 10 | v 0.08655800 -1.00000000 1.00000000 11 | v -1.01587300 -1.01587300 0.09506400 12 | v 0.09523800 0.53955800 -0.19032700 13 | v 0.07788800 0.98414700 0.98434000 14 | v -0.92048900 -1.01587300 0.22214800 15 | v -0.47619700 -0.76190500 1.01587300 16 | v -0.88883900 -0.38085300 1.01587300 17 | v 0.05529500 1.00000000 -1.00000000 18 | v -0.25399600 -1.01587300 0.95223200 19 | f 14 13 17 20 | f 3 2 5 21 | f 1 2 8 22 | f 2 3 8 23 | f 3 5 8 24 | f 2 1 9 25 | f 1 4 9 26 | f 4 1 10 27 | f 6 4 10 28 | f 5 6 10 29 | f 8 5 10 30 | f 4 7 11 31 | f 9 4 11 32 | f 9 11 12 33 | f 7 2 12 34 | f 2 9 12 35 | f 11 7 12 36 | f 10 1 13 37 | f 8 10 13 38 | f 1 8 14 39 | f 13 14 15 40 | f 8 13 15 41 | f 14 8 15 42 | f 5 2 16 43 | f 4 6 16 44 | f 6 5 16 45 | f 2 7 16 46 | f 7 4 16 47 | f 13 1 17 48 | f 1 14 17 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_0decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.92192200 0.44261900 0.05839500 3 | v 1.00000000 0.81065900 0.98194900 4 | v 1.00000000 0.42743600 0.98194900 5 | v 0.57142900 0.82535600 1.01587300 6 | v 1.00000000 0.81065900 0.09741600 7 | v 0.56905000 0.82756500 0.05839500 8 | v 0.95231900 0.41273900 1.01587300 9 | v 1.00000000 0.42743600 0.09741600 10 | v 0.57142900 0.79360700 1.01587300 11 | v 0.56905000 0.79547600 0.05839500 12 | v 0.95401700 0.41053100 0.05839500 13 | f 8 3 11 14 | f 2 3 5 15 | f 2 5 6 16 | f 4 2 6 17 | f 3 2 7 18 | f 2 4 7 19 | f 5 3 8 20 | f 4 6 9 21 | f 1 7 9 22 | f 7 4 9 23 | f 6 1 10 24 | f 9 6 10 25 | f 1 9 10 26 | f 6 5 11 27 | f 1 6 11 28 | f 7 1 11 29 | f 3 7 11 30 | f 5 8 11 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_10decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -0.98478500 -0.00543700 0.05839500 3 | v -0.82857100 0.99894200 1.00000000 4 | v -1.01585400 1.01587300 1.01587300 5 | v -1.01585400 0.00000000 1.01587300 6 | v -1.01585400 1.01587300 0.06349200 7 | v -0.82437800 0.53999700 0.05839500 8 | v -0.82539700 0.53965800 1.01587300 9 | v -0.82837300 1.00000000 0.07837300 10 | v -1.01585400 0.00000000 0.06349200 11 | v -0.98410500 0.00000000 1.01587300 12 | f 1 7 10 13 | f 4 3 5 14 | f 1 5 6 15 | f 2 3 7 16 | f 3 4 7 17 | f 6 2 7 18 | f 1 6 7 19 | f 3 2 8 20 | f 5 3 8 21 | f 2 6 8 22 | f 6 5 8 23 | f 1 4 9 24 | f 5 1 9 25 | f 4 5 9 26 | f 4 1 10 27 | f 7 4 10 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_11decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.00000000 -0.98410500 1.01587300 3 | v -1.01587300 -1.01585400 0.06349200 4 | v -1.01587300 -1.01585400 1.01587300 5 | v -1.00000000 -0.82837300 0.07837300 6 | v 0.00000000 -1.01585400 0.06349200 7 | v -0.53965800 -0.82539700 1.01587300 8 | v -0.53999700 -0.82437800 0.05839500 9 | v -0.99894200 -0.82857100 1.00000000 10 | v 0.00000000 -1.01585400 1.01587300 11 | v 0.00543700 -0.98478500 0.05839500 12 | f 1 9 10 13 | f 2 3 4 14 | f 3 2 5 15 | f 3 1 6 16 | f 2 4 7 17 | f 6 1 7 18 | f 6 7 8 19 | f 4 3 8 20 | f 3 6 8 21 | f 7 4 8 22 | f 1 3 9 23 | f 3 5 9 24 | f 9 5 10 25 | f 5 2 10 26 | f 2 7 10 27 | f 7 1 10 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_12decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.01493900 0.01493900 -1.00000000 3 | v 1.00000000 1.00000000 0.04662700 4 | v 1.00000000 0.01587300 0.04662700 5 | v 0.00479700 0.00479700 0.05839500 6 | v 0.01587300 1.00000000 0.04662700 7 | v 1.00000000 1.00000000 -0.99900800 8 | v 1.00000000 0.01587300 -0.99900800 9 | v 0.01587300 1.00000000 -0.99900800 10 | f 6 1 8 11 | f 3 2 4 12 | f 4 2 5 13 | f 5 2 6 14 | f 2 3 6 15 | f 6 3 7 16 | f 3 4 7 17 | f 4 1 7 18 | f 1 6 7 19 | f 1 4 8 20 | f 4 5 8 21 | f 5 6 8 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_13decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.01587300 -1.00000000 -0.99900800 3 | v 1.00000000 -0.01587300 0.04662700 4 | v 1.00000000 -1.00000000 0.04662700 5 | v 0.00479700 -0.00479700 0.05839500 6 | v 1.00000000 -0.01587300 -0.99900800 7 | v 0.01493900 -0.01493900 -1.00000000 8 | v 0.01587300 -1.00000000 0.04662700 9 | v 1.00000000 -1.00000000 -0.99900800 10 | f 6 5 8 11 | f 3 2 4 12 | f 2 3 5 13 | f 4 2 5 14 | f 1 4 6 15 | f 4 5 6 16 | f 1 3 7 17 | f 3 4 7 18 | f 4 1 7 19 | f 3 1 8 20 | f 5 3 8 21 | f 1 6 8 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_14decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -1.00000000 0.01587300 -0.99900800 3 | v -0.01587300 1.00000000 0.04662700 4 | v -0.00479700 0.00479700 0.05839500 5 | v -0.01493900 0.01493900 -1.00000000 6 | v -1.00000000 1.00000000 0.04662700 7 | v -0.01587300 1.00000000 -0.99900800 8 | v -1.00000000 0.01587300 0.04662700 9 | v -1.00000000 1.00000000 -0.99900800 10 | f 6 4 8 11 | f 3 1 4 12 | f 3 2 5 13 | f 5 2 6 14 | f 2 3 6 15 | f 3 4 6 16 | f 1 3 7 17 | f 5 1 7 18 | f 3 5 7 19 | f 4 1 8 20 | f 1 5 8 21 | f 5 6 8 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_15decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -1.00000000 -1.00000000 -0.99900800 3 | v -0.00479700 -0.00479700 0.05839500 4 | v -0.01587300 -1.00000000 0.04662700 5 | v -0.01493900 -0.01493900 -1.00000000 6 | v -1.00000000 -0.01587300 0.04662700 7 | v -0.01587300 -1.00000000 -0.99900800 8 | v -1.00000000 -0.01587300 -0.99900800 9 | v -1.00000000 -1.00000000 0.04662700 10 | f 2 5 8 11 | f 3 1 6 12 | f 2 3 6 13 | f 4 2 6 14 | f 1 4 6 15 | f 2 4 7 16 | f 4 1 7 17 | f 1 5 7 18 | f 5 2 7 19 | f 1 3 8 20 | f 3 2 8 21 | f 5 1 8 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_1decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 1.01587300 0.41265800 1.01587300 3 | v 0.98396300 -0.00216800 0.05839500 4 | v 1.01587300 0.00004000 0.06349200 5 | v 0.98396300 0.41486600 0.05839500 6 | v 0.98413300 0.00004000 1.01587300 7 | v 1.01587300 0.41265800 0.06349200 8 | v 1.01587300 0.00004000 1.01587300 9 | v 0.98413300 0.41265800 1.01587300 10 | f 4 5 8 11 | f 3 2 4 12 | f 4 2 5 13 | f 1 3 6 14 | f 3 4 6 15 | f 4 1 6 16 | f 3 1 7 17 | f 2 3 7 18 | f 1 5 7 19 | f 5 2 7 20 | f 1 4 8 21 | f 5 1 8 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_2decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.79360700 -0.57142900 1.01587300 3 | v 0.42743600 -1.00000000 0.09741600 4 | v 0.81065900 -1.00000000 0.09741600 5 | v 0.79547600 -0.56905000 0.05839500 6 | v 0.41273900 -0.95231900 1.01587300 7 | v 0.81065900 -1.00000000 0.98194900 8 | v 0.41053100 -0.95401700 0.05839500 9 | v 0.82756500 -0.56905000 0.05839500 10 | v 0.42743600 -1.00000000 0.98194900 11 | v 0.82535600 -0.57142900 1.01587300 12 | f 6 8 10 13 | f 1 4 5 14 | f 2 3 6 15 | f 3 2 7 16 | f 5 4 7 17 | f 4 1 8 18 | f 6 3 8 19 | f 3 7 8 20 | f 7 4 8 21 | f 2 6 9 22 | f 6 5 9 23 | f 7 2 9 24 | f 5 7 9 25 | f 1 5 10 26 | f 5 6 10 27 | f 8 1 10 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_3decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.00004000 -1.01587300 0.06349200 3 | v 0.41265800 -0.98413300 1.01587300 4 | v 0.41265800 -1.01587300 1.01587300 5 | v 0.41486600 -0.98396300 0.05839500 6 | v 0.00004000 -0.98413300 1.01587300 7 | v -0.00216800 -0.98396300 0.05839500 8 | v 0.41265800 -1.01587300 0.06349200 9 | v 0.00004000 -1.01587300 1.01587300 10 | f 5 6 8 11 | f 2 3 4 12 | f 3 2 5 13 | f 2 4 5 14 | f 4 1 6 15 | f 5 4 6 16 | f 3 1 7 17 | f 4 3 7 18 | f 1 4 7 19 | f 1 3 8 20 | f 3 5 8 21 | f 6 1 8 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_4decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -0.41486600 0.98396300 0.05839500 3 | v -0.00004000 1.01587300 1.01587300 4 | v -0.00004000 0.98413300 1.01587300 5 | v 0.00216800 0.98396300 0.05839500 6 | v -0.00004000 1.01587300 0.06349200 7 | v -0.41265800 1.01587300 1.01587300 8 | v -0.41265800 1.01587300 0.06349200 9 | v -0.41265800 0.98413300 1.01587300 10 | f 3 6 8 11 | f 2 3 4 12 | f 3 1 4 13 | f 4 1 5 14 | f 2 4 5 15 | f 2 5 6 16 | f 3 2 6 17 | f 5 1 7 18 | f 6 5 7 19 | f 1 6 7 20 | f 1 3 8 21 | f 6 1 8 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_5decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -0.41273900 0.95231900 1.01587300 3 | v -0.82756500 0.56905000 0.05839500 4 | v -0.79547600 0.56905000 0.05839500 5 | v -0.42743600 1.00000000 0.09741600 6 | v -0.81065900 1.00000000 0.98194900 7 | v -0.82535600 0.57142900 1.01587300 8 | v -0.81065900 1.00000000 0.09741600 9 | v -0.41053100 0.95401700 0.05839500 10 | v -0.42743600 1.00000000 0.98194900 11 | v -0.79360700 0.57142900 1.01587300 12 | f 6 3 10 13 | f 5 2 6 14 | f 2 3 6 15 | f 1 5 6 16 | f 5 4 7 17 | f 2 5 7 18 | f 1 3 8 19 | f 3 2 8 20 | f 2 7 8 21 | f 7 4 8 22 | f 5 1 9 23 | f 4 5 9 24 | f 1 8 9 25 | f 8 4 9 26 | f 3 1 10 27 | f 1 6 10 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_6decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -0.98413300 -0.00004000 1.01587300 3 | v -1.01587300 -0.41265800 0.06349200 4 | v -0.98396300 -0.41486600 0.05839500 5 | v -0.98396300 0.00216800 0.05839500 6 | v -1.01587300 -0.00004000 0.06349200 7 | v -1.01587300 -0.41265800 1.01587300 8 | v -1.01587300 -0.00004000 1.01587300 9 | v -0.98413300 -0.41265800 1.01587300 10 | f 6 3 8 11 | f 3 2 4 12 | f 1 3 4 13 | f 4 2 5 14 | f 5 2 6 15 | f 2 3 6 16 | f 1 4 7 17 | f 4 5 7 18 | f 5 6 7 19 | f 6 1 7 20 | f 3 1 8 21 | f 1 6 8 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_7decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -1.00000000 -0.42743600 0.09741600 3 | v -0.57142900 -0.79360700 1.01587300 4 | v -0.57142900 -0.82535600 1.01587300 5 | v -0.56905000 -0.79547600 0.05839500 6 | v -1.00000000 -0.81065900 0.09741600 7 | v -0.95231900 -0.41273900 1.01587300 8 | v -1.00000000 -0.81065900 0.98194900 9 | v -0.95401700 -0.41053100 0.05839500 10 | v -1.00000000 -0.42743600 0.98194900 11 | v -0.56905000 -0.82756500 0.05839500 12 | f 8 4 10 13 | f 2 3 4 14 | f 3 2 6 15 | f 2 4 6 16 | f 1 5 7 17 | f 3 6 7 18 | f 5 1 8 19 | f 6 4 8 20 | f 6 8 9 21 | f 1 7 9 22 | f 7 6 9 23 | f 8 1 9 24 | f 4 3 10 25 | f 7 5 10 26 | f 3 7 10 27 | f 5 8 10 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_8decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.99894200 0.82857100 1.00000000 3 | v -0.00543700 0.98478500 0.05839500 4 | v 0.00000000 1.01585400 0.06349200 5 | v 1.01587300 1.01585400 0.06349200 6 | v 0.00000000 1.01585400 1.01587300 7 | v 0.53999700 0.82437800 0.05839500 8 | v 1.01587300 1.01585400 1.01587300 9 | v 0.53965800 0.82539700 1.01587300 10 | v 1.00000000 0.82837300 0.07837300 11 | v 0.00000000 0.98410500 1.01587300 12 | f 2 8 10 13 | f 2 3 4 14 | f 3 2 5 15 | f 4 3 5 16 | f 2 4 6 17 | f 4 5 7 18 | f 7 5 8 19 | f 2 6 8 20 | f 6 1 8 21 | f 1 7 8 22 | f 6 4 9 23 | f 1 6 9 24 | f 7 1 9 25 | f 4 7 9 26 | f 5 2 10 27 | f 8 5 10 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj4_block_9decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.82837300 -1.00000000 0.07837300 3 | v 1.01585400 0.00000000 1.01587300 4 | v 1.01585400 0.00000000 0.06349200 5 | v 1.01585400 -1.01587300 1.01587300 6 | v 0.82539700 -0.53965800 1.01587300 7 | v 1.01585400 -1.01587300 0.06349200 8 | v 0.82437800 -0.53999700 0.05839500 9 | v 0.82857100 -0.99894200 1.00000000 10 | v 0.98478500 0.00543700 0.05839500 11 | v 0.98410500 0.00000000 1.01587300 12 | f 9 5 10 13 | f 3 2 4 14 | f 4 2 5 15 | f 3 4 6 16 | f 4 1 6 17 | f 6 1 7 18 | f 1 4 8 19 | f 4 5 8 20 | f 7 1 8 21 | f 5 7 8 22 | f 2 3 9 23 | f 3 6 9 24 | f 7 5 9 25 | f 6 7 9 26 | f 5 2 10 27 | f 2 9 10 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj5_block_0decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.98432000 1.00000000 0.98432000 3 | v -1.00000000 0.00793700 -1.00000000 4 | v -1.00000000 0.00793700 1.00000000 5 | v 1.01587300 0.00000000 0.06344200 6 | v -0.98412700 1.00000000 -0.98412700 7 | v 0.98432000 1.00000000 -0.98432000 8 | v -0.98412700 1.00000000 0.98412700 9 | v 1.00000000 0.00793700 -1.00000000 10 | v 0.96867700 0.00793700 1.00000000 11 | v 1.01587300 0.09526300 1.01587300 12 | f 3 9 10 13 | f 3 2 4 14 | f 2 3 5 15 | f 2 5 6 16 | f 5 1 6 17 | f 1 5 7 18 | f 5 3 7 19 | f 4 2 8 20 | f 2 6 8 21 | f 6 4 8 22 | f 3 4 9 23 | f 9 4 10 24 | f 4 6 10 25 | f 6 1 10 26 | f 1 7 10 27 | f 7 3 10 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj5_block_1decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 1.01587300 0.00000000 0.03162200 3 | v -0.98412700 -1.00000000 -0.98412700 4 | v -0.98412700 -1.00000000 0.98412700 5 | v -1.00000000 -0.00793700 -1.00000000 6 | v 0.98412700 -1.00000000 -0.98412700 7 | v -1.00000000 -0.00793700 1.00000000 8 | v 0.95330100 -1.00000000 0.98412700 9 | v 1.00000000 -0.00793700 -1.00000000 10 | v 0.96867700 -0.00793700 1.00000000 11 | v 1.01587300 -0.03182100 0.03162200 12 | f 1 9 10 13 | f 2 3 4 14 | f 3 2 5 15 | f 2 4 5 16 | f 4 3 6 17 | f 1 4 6 18 | f 3 5 7 19 | f 6 3 7 20 | f 4 1 8 21 | f 5 4 8 22 | f 1 6 9 23 | f 6 7 9 24 | f 9 7 10 25 | f 7 5 10 26 | f 8 1 10 27 | f 5 8 10 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj6_block_corner_0decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.63890600 -0.60843800 -0.91265700 3 | v 0.58618700 0.58621600 0.98759700 4 | v -0.58635500 0.58615400 0.98749100 5 | v 0.58618700 0.98759700 -0.58621600 6 | v -0.98758300 -0.58620800 -0.58620800 7 | v 0.58618700 -0.98759700 0.58621600 8 | v -0.58635500 0.58615400 -0.98749100 9 | v -0.58635500 -0.58615400 0.98749100 10 | v 0.98756300 0.58621800 0.58621800 11 | v -0.58635500 0.98749100 0.58615400 12 | v -0.58635500 -0.98749100 -0.58615400 13 | v -0.98758300 0.58620800 0.58620800 14 | v 0.98756300 -0.58621800 -0.58621800 15 | v 0.58618700 0.58621600 -0.98759700 16 | v -0.58635500 -0.58615400 -0.98749100 17 | v 0.58618700 0.98759700 0.58621600 18 | v 0.58618700 -0.58621600 0.98759700 19 | v 0.98756300 -0.58621800 0.58621800 20 | v 0.98756300 0.58621800 -0.58621800 21 | v -0.58635500 -0.98749100 0.58615400 22 | v -0.58635500 0.98749100 -0.58615400 23 | v -0.98758300 -0.58620800 0.58620800 24 | v -0.98758300 0.58620800 -0.58620800 25 | v 0.58618700 -0.98759700 -0.58621600 26 | v 0.58618700 -0.58621600 -0.98759700 27 | f 1 24 25 28 | f 2 3 8 29 | f 3 2 10 30 | f 8 3 12 31 | f 3 10 12 32 | f 7 4 14 33 | f 5 7 15 34 | f 11 5 15 35 | f 7 14 15 36 | f 2 9 16 37 | f 9 4 16 38 | f 10 2 16 39 | f 4 10 16 40 | f 2 8 17 41 | f 8 6 17 42 | f 9 2 17 43 | f 9 17 18 44 | f 6 13 18 45 | f 13 9 18 46 | f 17 6 18 47 | f 4 9 19 48 | f 9 13 19 49 | f 14 4 19 50 | f 13 14 19 51 | f 6 8 20 52 | f 11 6 20 53 | f 5 11 20 54 | f 4 7 21 55 | f 10 4 21 56 | f 12 10 21 57 | f 12 5 22 58 | f 8 12 22 59 | f 5 20 22 60 | f 20 8 22 61 | f 7 5 23 62 | f 5 12 23 63 | f 21 7 23 64 | f 12 21 23 65 | f 6 11 24 66 | f 1 13 24 67 | f 13 6 24 68 | f 11 15 24 69 | f 24 15 25 70 | f 13 1 25 71 | f 14 13 25 72 | f 15 14 25 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj7_block.obj: -------------------------------------------------------------------------------- 1 | # Blender v3.4.1 OBJ File: 'set_obj7_block.blend' 2 | # www.blender.org 3 | mtllib set_obj7_block.mtl 4 | o Cube 5 | v 1.000000 -1.000000 -1.000000 6 | v -1.000000 -1.000000 1.000000 7 | v -1.000000 -1.000000 -1.000000 8 | v 1.000000 1.000000 1.000000 9 | v 1.000000 1.000000 -1.000000 10 | v -1.000000 1.000000 1.000000 11 | v -1.000000 1.000000 -1.000000 12 | v 1.000000 0.080200 1.000000 13 | v -0.157800 -0.995695 1.000000 14 | v -0.153800 -1.000000 1.000000 15 | v -0.153800 -1.000000 0.988854 16 | v 1.000000 -1.000000 -0.500820 17 | vt 0.760025 0.500000 18 | vt 0.875000 0.500000 19 | vt 0.875000 0.750000 20 | vt 0.625000 0.750000 21 | vt 0.625000 0.644225 22 | vt 0.625538 0.644725 23 | vt 0.375000 0.750000 24 | vt 0.625000 1.000000 25 | vt 0.375000 1.000000 26 | vt 0.375000 0.000000 27 | vt 0.625000 0.000000 28 | vt 0.625000 0.250000 29 | vt 0.375000 0.250000 30 | vt 0.125000 0.500000 31 | vt 0.375000 0.500000 32 | vt 0.125000 0.750000 33 | vt 0.437397 0.500000 34 | vt 0.623607 0.644225 35 | vt 0.625000 0.364975 36 | vt 0.602260 0.947565 37 | vt 0.602261 0.750000 38 | vt 0.600685 0.750000 39 | vt 0.390180 0.855651 40 | vt 0.601918 0.749350 41 | vn 0.0000 0.0000 1.0000 42 | vn -1.0000 0.0000 0.0000 43 | vn 0.0000 1.0000 0.0000 44 | vn 0.0000 0.0000 -1.0000 45 | vn 0.0000 -1.0000 0.0000 46 | vn 1.0000 0.0000 0.0000 47 | vn 0.6022 -0.6480 0.4664 48 | vn 0.7325 0.6807 0.0000 49 | usemtl Material 50 | s off 51 | f 8/1/1 4/2/1 6/3/1 2/4/1 10/5/1 9/6/1 52 | f 3/7/2 2/4/2 6/8/2 7/9/2 53 | f 7/10/3 6/11/3 4/12/3 5/13/3 54 | f 5/14/4 1/15/4 3/7/4 7/16/4 55 | f 10/5/5 2/4/5 3/7/5 1/15/5 12/17/5 11/18/5 56 | f 1/15/6 5/13/6 4/12/6 8/19/6 12/17/6 57 | f 8/20/7 9/21/7 11/22/7 12/23/7 58 | f 9/21/8 10/24/8 11/22/8 59 | -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj7_block_0decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v 0.81362200 -0.96454200 -0.21092300 3 | v 0.99995100 1.00000000 1.00000000 4 | v 0.99995100 1.00000000 -1.00000000 5 | v -1.00000000 1.00000000 1.00000000 6 | v -1.00000000 -1.00000000 -1.00000000 7 | v -1.00000000 -1.00000000 1.00000000 8 | v -1.00000000 1.00000000 -1.00000000 9 | v 0.99995100 -1.00000000 -1.00000000 10 | v 0.96964800 0.03018400 0.96969600 11 | v -0.12220900 -0.97826500 0.97826500 12 | v 0.97168600 -0.97173400 -0.42499100 13 | f 10 8 11 14 | f 2 3 4 15 | f 2 4 6 16 | f 4 5 6 17 | f 4 3 7 18 | f 5 4 7 19 | f 3 5 7 20 | f 3 2 8 21 | f 5 3 8 22 | f 6 5 8 23 | f 8 2 9 24 | f 2 6 10 25 | f 6 8 10 26 | f 9 2 10 27 | f 1 9 10 28 | f 9 1 11 29 | f 8 9 11 30 | f 1 10 11 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj8_short_block.obj: -------------------------------------------------------------------------------- 1 | # Blender v3.4.1 OBJ File: 'set_obj8_short_block.blend' 2 | # www.blender.org 3 | mtllib set_obj8_short_block.mtl 4 | o Cube 5 | v 0.800000 -1.000000 1.000000 6 | v 0.800000 -1.000000 -1.000000 7 | v -0.800000 -1.000000 1.000000 8 | v -0.800000 -1.000000 -1.000000 9 | v 0.800000 1.000000 1.000000 10 | v 0.800000 1.000000 -1.000000 11 | v -0.800000 1.000000 1.000000 12 | v -0.800000 1.000000 -1.000000 13 | vt 0.625000 0.500000 14 | vt 0.875000 0.500000 15 | vt 0.875000 0.750000 16 | vt 0.625000 0.750000 17 | vt 0.375000 0.750000 18 | vt 0.625000 1.000000 19 | vt 0.375000 1.000000 20 | vt 0.375000 0.000000 21 | vt 0.625000 0.000000 22 | vt 0.625000 0.250000 23 | vt 0.375000 0.250000 24 | vt 0.125000 0.500000 25 | vt 0.375000 0.500000 26 | vt 0.125000 0.750000 27 | vn 0.0000 0.0000 1.0000 28 | vn -1.0000 0.0000 0.0000 29 | vn 0.0000 1.0000 0.0000 30 | vn 0.0000 0.0000 -1.0000 31 | vn 0.0000 -1.0000 0.0000 32 | vn 1.0000 0.0000 0.0000 33 | usemtl Material 34 | s off 35 | f 1/1/1 5/2/1 7/3/1 3/4/1 36 | f 4/5/2 3/4/2 7/6/2 8/7/2 37 | f 8/8/3 7/9/3 5/10/3 6/11/3 38 | f 6/12/4 2/13/4 4/5/4 8/14/4 39 | f 2/13/5 1/1/5 3/4/5 4/5/5 40 | f 6/11/6 5/10/6 1/1/6 2/13/6 41 | -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj8_short_block_0decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -0.80000000 -0.99602300 -0.99602300 3 | v 0.79052000 1.00000000 1.00000000 4 | v 0.79052000 1.00000000 -1.00000000 5 | v 0.79052000 -1.00000000 1.00000000 6 | v -0.80000000 0.99602300 0.99602300 7 | v 0.79052000 -1.00000000 -1.00000000 8 | v -0.80000000 0.99602300 -0.99602300 9 | v -0.80000000 -0.99602300 0.99602300 10 | f 4 5 8 11 | f 3 2 4 12 | f 2 3 5 13 | f 4 2 5 14 | f 1 3 6 15 | f 3 4 6 16 | f 4 1 6 17 | f 3 1 7 18 | f 1 5 7 19 | f 5 3 7 20 | f 1 4 8 21 | f 5 1 8 -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj9_thin_block.obj: -------------------------------------------------------------------------------- 1 | # Blender v3.4.1 OBJ File: 'set_obj9_thin_block.blend' 2 | # www.blender.org 3 | mtllib set_obj9_thin_block.mtl 4 | o Cube 5 | v 0.800000 -1.000000 1.000000 6 | v 0.800000 -1.000000 -1.000000 7 | v -0.800000 -1.000000 1.000000 8 | v -0.800000 -1.000000 -1.000000 9 | v 0.800000 1.000000 1.000000 10 | v 0.800000 1.000000 -1.000000 11 | v -0.800000 1.000000 1.000000 12 | v -0.800000 1.000000 -1.000000 13 | vt 0.625000 0.500000 14 | vt 0.875000 0.500000 15 | vt 0.875000 0.750000 16 | vt 0.625000 0.750000 17 | vt 0.375000 0.750000 18 | vt 0.625000 1.000000 19 | vt 0.375000 1.000000 20 | vt 0.375000 0.000000 21 | vt 0.625000 0.000000 22 | vt 0.625000 0.250000 23 | vt 0.375000 0.250000 24 | vt 0.125000 0.500000 25 | vt 0.375000 0.500000 26 | vt 0.125000 0.750000 27 | vn 0.0000 0.0000 1.0000 28 | vn -1.0000 0.0000 0.0000 29 | vn 0.0000 1.0000 0.0000 30 | vn 0.0000 0.0000 -1.0000 31 | vn 0.0000 -1.0000 0.0000 32 | vn 1.0000 0.0000 0.0000 33 | usemtl Material 34 | s off 35 | f 1/1/1 5/2/1 7/3/1 3/4/1 36 | f 4/5/2 3/4/2 7/6/2 8/7/2 37 | f 8/8/3 7/9/3 5/10/3 6/11/3 38 | f 6/12/4 2/13/4 4/5/4 8/14/4 39 | f 2/13/5 1/1/5 3/4/5 4/5/5 40 | f 6/11/6 5/10/6 1/1/6 2/13/6 41 | -------------------------------------------------------------------------------- /assets/urdf/objects/meshes/set2/set_obj9_thin_block_0decompose.obj: -------------------------------------------------------------------------------- 1 | # https://github.com/mikedh/trimesh 2 | v -0.80000000 -0.99602300 -0.99602300 3 | v 0.79052000 1.00000000 1.00000000 4 | v 0.79052000 1.00000000 -1.00000000 5 | v 0.79052000 -1.00000000 1.00000000 6 | v -0.80000000 0.99602300 0.99602300 7 | v 0.79052000 -1.00000000 -1.00000000 8 | v -0.80000000 0.99602300 -0.99602300 9 | v -0.80000000 -0.99602300 0.99602300 10 | f 4 5 8 11 | f 3 2 4 12 | f 2 3 5 13 | f 4 2 5 14 | f 1 3 6 15 | f 3 4 6 16 | f 4 1 6 17 | f 3 1 7 18 | f 1 5 7 19 | f 5 3 7 20 | f 1 4 8 21 | f 5 1 8 -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj10_thin_block_corner.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj11_cylinder.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj12_cylinder_corner.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj13_irregular_block.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj14_irregular_block_cross.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj15_irregular_block_time.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj1_regular_block.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj2_block.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj3_block.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj5_block.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj6_block_corner.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj7_block.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj8_short_block.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /assets/urdf/objects/set_obj9_thin_block.urdf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/base.stl -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/base_link.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/base_link.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/base_link_left.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/base_link_left.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/box.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/box.stl -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/gel.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/gel.stl -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link1.stl -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link2.stl -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link3.stl -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link4.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link4.stl -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link5.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link5.stl -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link6.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link6.stl -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link_0.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link_0.0.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link_1.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link_1.0.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link_12.0_left.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link_12.0_left.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link_12.0_right.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link_12.0_right.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link_13.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link_13.0.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link_14.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link_14.0.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link_15.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link_15.0.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link_15.0_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link_15.0_tip.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link_2.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link_2.0.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link_3.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link_3.0.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link_3.0_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link_3.0_tip.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/link_4.0.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/link_4.0.STL -------------------------------------------------------------------------------- /assets/urdf/xarm6/meshes/modified_tip.STL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/assets/urdf/xarm6/meshes/modified_tip.STL -------------------------------------------------------------------------------- /distillation/rl_pytorch/rl_pytorch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/distillation/rl_pytorch/rl_pytorch/__init__.py -------------------------------------------------------------------------------- /install.md: -------------------------------------------------------------------------------- 1 | # Installation Instruction 2 | This document provides instructions of how to properly install this codebase. We recommend using a conda environment to simplify setup. 3 | ## Setup a Conda Environment 4 | 5 | This repo requires [pytorch3d](https://github.com/facebookresearch/pytorch3d), which can be installed as follows. 6 | ``` 7 | conda create -n robosyn python=3.8 8 | conda activate robosyn 9 | conda install pytorch torchvision torchaudio pytorch-cuda=12.1 -c pytorch -c nvidia 10 | conda install -c fvcore -c iopath -c conda-forge fvcore iopath 11 | conda install pytorch3d -c pytorch3d 12 | ``` 13 | 14 | ## IsaacGym 15 | Download the Isaac Gym Preview 4 release from the [website](https://developer.nvidia.com/isaac-gym), then follow the installation instructions in the documentation. We provide the bash commands we used. 16 | ``` 17 | pip install scipy imageio ninja 18 | tar -xzvf IsaacGym_Preview_4_Package.tar.gz 19 | cd isaacgym/python 20 | pip install -e . --no-deps 21 | ``` 22 | 23 | ## Other dependencies 24 | Install the rest dependencies of this repo via pip: 25 | ``` 26 | pip install hydra-core gym ray open3d numpy==1.20.3 tensorboardX tensorboard wandb 27 | ``` 28 | 29 | You are all set now! Follow the main instructions to continue the exploration journey. -------------------------------------------------------------------------------- /isaacgymenvs/tasks/base/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2018-2022, NVIDIA Corporation 2 | # All rights reserved. 3 | # 4 | # Redistribution and use in source and binary forms, with or without 5 | # modification, are permitted provided that the following conditions are met: 6 | # 7 | # 1. Redistributions of source code must retain the above copyright notice, this 8 | # list of conditions and the following disclaimer. 9 | # 10 | # 2. Redistributions in binary form must reproduce the above copyright notice, 11 | # this list of conditions and the following disclaimer in the documentation 12 | # and/or other materials provided with the distribution. 13 | # 14 | # 3. Neither the name of the copyright holder nor the names of its 15 | # contributors may be used to endorse or promote products derived from 16 | # this software without specific prior written permission. 17 | # 18 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /object_pc_embeddings_C_pretrain_mug.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/object_pc_embeddings_C_pretrain_mug.pkl -------------------------------------------------------------------------------- /pickle_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pickle 3 | import gzip 4 | 5 | 6 | # This version does compression. 7 | def gload_data(file_path): 8 | file = gzip.GzipFile(file_path, 'rb') 9 | res = pickle.load(file) 10 | file.close() 11 | return res 12 | 13 | 14 | def gsave_data(obj, file_path): 15 | file = gzip.GzipFile(file_path, 'wb') 16 | pickle.dump(obj, file, -1) 17 | file.close() 18 | 19 | 20 | def save_data(obj, file_path): 21 | with open(file_path, 'wb') as f: 22 | pickle.dump(obj, f) 23 | 24 | 25 | def load_data(file_path): 26 | with open(file_path, 'rb') as f: 27 | return pickle.load(f) 28 | 29 | if __name__ == "__main__": 30 | data = load_data('./obs_log.pkl') 31 | print(data) 32 | -------------------------------------------------------------------------------- /rl_games/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/rl_games/__init__.py -------------------------------------------------------------------------------- /rl_games/algos_torch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/rl_games/algos_torch/__init__.py -------------------------------------------------------------------------------- /rl_games/algos_torch/d2rl.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class D2RLNet(torch.nn.Module): 4 | def __init__(self, input_size, 5 | units, 6 | activations, 7 | norm_func_name = None): 8 | torch.nn.Module.__init__(self) 9 | self.activations = torch.nn.ModuleList(activations) 10 | self.linears = torch.nn.ModuleList([]) 11 | self.norm_layers = torch.nn.ModuleList([]) 12 | self.num_layers = len(units) 13 | last_size = input_size 14 | for i in range(self.num_layers): 15 | self.linears.append(torch.nn.Linear(last_size, units[i])) 16 | last_size = units[i] + input_size 17 | if norm_func_name == 'layer_norm': 18 | self.norm_layers.append(torch.nn.LayerNorm(units[i])) 19 | elif norm_func_name == 'batch_norm': 20 | self.norm_layers.append(torch.nn.BatchNorm1d(units[i])) 21 | else: 22 | self.norm_layers.append(torch.nn.Identity()) 23 | 24 | def forward(self, input): 25 | x = self.linears[0](input) 26 | x = self.activations[0](x) 27 | x = self.norm_layers[0](x) 28 | for i in range(1,self.num_layers): 29 | x = torch.cat([x,input], dim=1) 30 | x = self.linears[i](x) 31 | x = self.norm_layers[i](x) 32 | x = self.activations[i](x) 33 | return x -------------------------------------------------------------------------------- /rl_games/algos_torch/self_play_manager.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class SelfPlayManager: 4 | def __init__(self, config, writter): 5 | self.config = config 6 | self.writter = writter 7 | self.update_score = self.config['update_score'] 8 | self.games_to_check = self.config['games_to_check'] 9 | self.check_scores = self.config.get('check_scores', False) 10 | self.env_update_num = self.config.get('env_update_num', 1) 11 | self.env_indexes = np.arange(start=0, stop=self.env_update_num) 12 | self.updates_num = 0 13 | 14 | def update(self, algo): 15 | self.updates_num += 1 16 | if self.check_scores: 17 | data = algo.game_scores 18 | else: 19 | data = algo.game_rewards 20 | 21 | if len(data) >= self.games_to_check: 22 | mean_scores = data.get_mean() 23 | mean_rewards = algo.game_rewards.get_mean() 24 | if mean_scores > self.update_score: 25 | print('Mean scores: ', mean_scores, ' mean rewards: ', mean_rewards, ' updating weights') 26 | 27 | algo.clear_stats() 28 | self.writter.add_scalar('selfplay/iters_update_weigths', self.updates_num, algo.frame) 29 | algo.vec_env.set_weights(self.env_indexes, algo.get_weights()) 30 | self.env_indexes = (self.env_indexes + 1) % (algo.num_actors) 31 | self.updates_num = 0 32 | -------------------------------------------------------------------------------- /rl_games/common/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/rl_games/common/__init__.py -------------------------------------------------------------------------------- /rl_games/common/divergence.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.distributions as dist 3 | 4 | 5 | 6 | def d_kl_discrete(p, q): 7 | # p = target, q = online 8 | # categorical distribution parametrized by logits 9 | logits_diff = p - q 10 | p_probs = torch.exp(p) 11 | d_kl = (p_probs * logits_diff).sum(-1) 12 | return d_kl 13 | 14 | 15 | def d_kl_discrete_list(p, q): 16 | d_kl = 0 17 | for pi, qi in zip(p,q): 18 | d_kl += d_kl_discrete(pi, qi) 19 | return d_kl 20 | 21 | def d_kl_normal(p, q): 22 | # p = target, q = online 23 | p_mean, p_sigma = p 24 | q_mean, q_sigma = q 25 | mean_diff = ((q_mean - p_mean) / q_sigma).pow(2) 26 | var_ratio = (p_sigma / q_sigma).pow(2) 27 | 28 | d_kl = 0.5 * (var_ratio + mean_diff - 1 - var_ratio.log()) 29 | return d_kl.sum(-1) -------------------------------------------------------------------------------- /rl_games/common/ivecenv.py: -------------------------------------------------------------------------------- 1 | class IVecEnv: 2 | def step(self, actions): 3 | raise NotImplementedError 4 | 5 | def reset(self): 6 | raise NotImplementedError 7 | 8 | def has_action_masks(self): 9 | return False 10 | 11 | def get_number_of_agents(self): 12 | return 1 13 | 14 | def get_env_info(self): 15 | pass 16 | 17 | def seed(self, seed): 18 | pass 19 | 20 | def set_train_info(self, env_frames, *args, **kwargs): 21 | """ 22 | Send the information in the direction algo->environment. 23 | Most common use case: tell the environment how far along we are in the training process. This is useful 24 | for implementing curriculums and things such as that. 25 | """ 26 | pass 27 | 28 | def get_env_state(self): 29 | """ 30 | Return serializable environment state to be saved to checkpoint. 31 | Can be used for stateful training sessions, i.e. with adaptive curriculums. 32 | """ 33 | return None 34 | 35 | def set_env_state(self, env_state): 36 | pass 37 | -------------------------------------------------------------------------------- /rl_games/common/layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/rl_games/common/layers/__init__.py -------------------------------------------------------------------------------- /rl_games/common/object_factory.py: -------------------------------------------------------------------------------- 1 | class ObjectFactory: 2 | def __init__(self): 3 | self._builders = {} 4 | 5 | def register_builder(self, name, builder): 6 | self._builders[name] = builder 7 | 8 | def set_builders(self, builders): 9 | self._builders = builders 10 | 11 | def create(self, name, **kwargs): 12 | builder = self._builders.get(name) 13 | if not builder: 14 | raise ValueError(name) 15 | return builder(**kwargs) -------------------------------------------------------------------------------- /rl_games/common/rollouts.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | ''' 4 | TODO: move play_steps here 5 | ''' 6 | class Rollout: 7 | def __init__(self, gamma): 8 | self.gamma = gamma 9 | 10 | def play_steps(self, env, max_steps_count = 1): 11 | pass 12 | 13 | 14 | class DiscretePpoRollout(Rollout): 15 | def __init__(self, gamma, lam): 16 | super(Rollout, self).__init__(gamma) 17 | self.lam = lam 18 | 19 | def play_steps(self, env, max_steps_count = 1): 20 | pass -------------------------------------------------------------------------------- /rl_games/common/transforms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/rl_games/common/transforms/__init__.py -------------------------------------------------------------------------------- /rl_games/common/transforms/soft_augmentation.py: -------------------------------------------------------------------------------- 1 | from rl_games.common.transforms import transforms 2 | import torch 3 | 4 | class SoftAugmentation(): 5 | def __init__(self, **kwargs): 6 | self.transform_config = kwargs.pop('transform') 7 | self.aug_coef = kwargs.pop('aug_coef', 0.001) 8 | print('aug coef:', self.aug_coef) 9 | self.name = self.transform_config['name'] 10 | 11 | #TODO: remove hardcode 12 | self.transform = transforms.ImageDatasetTransform(**self.transform_config) 13 | 14 | def get_coef(self): 15 | return self.aug_coef 16 | 17 | def get_loss(self, p_dict, model, input_dict, loss_type = 'both'): 18 | ''' 19 | loss_type: 'critic', 'policy', 'both' 20 | ''' 21 | if self.transform: 22 | input_dict = self.transform(input_dict) 23 | loss = 0 24 | q_dict = model(input_dict) 25 | if loss_type == 'policy' or loss_type == 'both': 26 | p_dict['logits'] = p_dict['logits'].detach() 27 | loss = model.kl(p_dict, q_dict) 28 | if loss_type == 'critic' or loss_type == 'both': 29 | p_value = p_dict['value'].detach() 30 | q_value = q_dict['value'] 31 | loss = loss + (0.5 * (p_value - q_value)**2).sum(dim=-1) 32 | 33 | return loss -------------------------------------------------------------------------------- /rl_games/common/transforms/transforms.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class DatasetTransform(nn.Module): 5 | def __init__(self): 6 | super().__init__() 7 | 8 | def forward(self, dataset): 9 | return dataset 10 | 11 | 12 | class ImageDatasetTransform(DatasetTransform): 13 | def __init__(self, **kwargs): 14 | super().__init__() 15 | import kornia 16 | self.transform = torch.nn.Sequential( 17 | nn.ReplicationPad2d(4), 18 | kornia.augmentation.RandomCrop((84,84)) 19 | #kornia.augmentation.RandomErasing(p=0.2), 20 | #kornia.augmentation.RandomAffine(degrees=0, translate=(2.0/84,2.0/84), p=1), 21 | #kornia.augmentation.RandomCrop((84,84)) 22 | ) 23 | 24 | def forward(self, dataset): 25 | dataset['obs'] = self.transform(dataset['obs']) 26 | return dataset -------------------------------------------------------------------------------- /rl_games/configs/atari/ppo_breakout_torch_impala.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: resnet_actor_critic 10 | require_rewards: True 11 | require_last_actions: True 12 | separate: False 13 | value_shape: 1 14 | space: 15 | discrete: 16 | 17 | cnn: 18 | permute_input: True 19 | conv_depths: [16, 32, 32] 20 | activation: relu 21 | initializer: 22 | name: default 23 | regularizer: 24 | name: 'None' 25 | 26 | mlp: 27 | units: [512] 28 | activation: relu 29 | regularizer: 30 | name: 'None' 31 | initializer: 32 | name: default 33 | rnn: 34 | name: lstm 35 | units: 256 36 | layers: 1 37 | 38 | config: 39 | env_name: atari_gym 40 | reward_shaper: 41 | min_val: -1 42 | max_val: 1 43 | 44 | normalize_advantage: True 45 | gamma: 0.99 46 | tau: 0.95 47 | learning_rate: 5e-4 48 | name: breakout_impala_lstm 49 | score_to_win: 900 50 | grad_norm: 0.5 51 | entropy_coef: 0.01 52 | truncate_grads: True 53 | 54 | e_clip: 0.2 55 | clip_value: True 56 | num_actors: 16 57 | horizon_length: 256 58 | minibatch_size: 512 59 | mini_epochs: 3 60 | critic_coef: 1 61 | lr_schedule: None 62 | kl_threshold: 0.01 63 | normalize_input: False 64 | seq_length: 8 65 | 66 | # max_epochs: 5000 67 | env_config: 68 | skip: 4 69 | name: 'BreakoutNoFrameskip-v4' 70 | episode_life: True 71 | wrap_impala: True 72 | player: 73 | render: False 74 | games_num: 100 75 | n_game_life: 5 76 | determenistic: False 77 | -------------------------------------------------------------------------------- /rl_games/configs/atari/ppo_space_invaders_resnet.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: resnet_actor_critic 10 | separate: False 11 | value_shape: 1 12 | space: 13 | discrete: 14 | 15 | cnn: 16 | conv_depths: [16, 32, 32] 17 | activation: relu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: 'None' 22 | 23 | mlp: 24 | units: [512] 25 | activation: relu 26 | regularizer: 27 | name: 'None' 28 | initializer: 29 | name: default 30 | rnn: 31 | name: lstm 32 | units: 256 33 | layers: 1 34 | config: 35 | reward_shaper: 36 | min_val: -1 37 | max_val: 1 38 | 39 | normalize_advantage: True 40 | gamma: 0.995 41 | tau: 0.95 42 | learning_rate: 3e-4 43 | name: invaders_resnet 44 | score_to_win: 100000 45 | grad_norm: 1.5 46 | entropy_coef: 0.001 47 | truncate_grads: True 48 | env_name: atari_gym #'openai_gym' #'PongNoFrameskip-v4' # 49 | e_clip: 0.2 50 | clip_value: True 51 | num_actors: 16 52 | horizon_length: 256 53 | minibatch_size: 2048 54 | mini_epochs: 4 55 | critic_coef: 1 56 | lr_schedule: None 57 | kl_threshold: 0.01 58 | normalize_input: False 59 | seq_length: 4 60 | max_epochs: 200000 61 | 62 | env_config: 63 | skip: 3 64 | name: 'SpaceInvadersNoFrameskip-v4' 65 | episode_life: False 66 | 67 | player: 68 | render: True 69 | games_num: 10 70 | n_game_life: 1 71 | determenistic: True 72 | 73 | -------------------------------------------------------------------------------- /rl_games/configs/brax/ppo_ant.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 7 3 | 4 | #devices: [0, 0] 5 | 6 | algo: 7 | name: a2c_continuous 8 | 9 | model: 10 | name: continuous_a2c_logstd 11 | 12 | network: 13 | name: actor_critic 14 | separate: False 15 | space: 16 | continuous: 17 | mu_activation: None 18 | sigma_activation: None 19 | 20 | mu_init: 21 | name: default 22 | sigma_init: 23 | name: const_initializer 24 | val: 0 25 | fixed_sigma: True 26 | mlp: 27 | units: [256, 128, 64] 28 | activation: elu 29 | d2rl: False 30 | 31 | initializer: 32 | name: default 33 | regularizer: 34 | name: None 35 | 36 | config: 37 | name: Ant_brax 38 | full_experiment_name: Ant_brax 39 | env_name: brax 40 | multi_gpu: False 41 | mixed_precision: True 42 | normalize_input: True 43 | normalize_value: True 44 | normalize_advantage: True 45 | use_smooth_clamp: True 46 | reward_shaper: 47 | scale_value: 1.0 48 | gamma: 0.99 49 | tau: 0.95 50 | learning_rate: 3e-4 51 | lr_schedule: adaptive 52 | kl_threshold: 0.008 53 | score_to_win: 20000 54 | max_epochs: 1000 55 | save_best_after: 100 56 | save_frequency: 50 57 | grad_norm: 1.0 58 | entropy_coef: 0.0 59 | truncate_grads: True 60 | e_clip: 0.2 61 | horizon_length: 8 62 | num_actors: 4096 63 | minibatch_size: 32768 64 | mini_epochs: 5 65 | critic_coef: 2 66 | clip_value: False 67 | bounds_loss_coef: 0.0001 68 | 69 | env_config: 70 | env_name: ant 71 | -------------------------------------------------------------------------------- /rl_games/configs/brax/ppo_ant_tcnn.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 7 3 | 4 | #devices: [0, 0] 5 | 6 | algo: 7 | name: a2c_continuous 8 | 9 | model: 10 | name: continuous_a2c_logstd 11 | 12 | network: 13 | name: tcnnnet 14 | 15 | encoding: 16 | otype: "Identity" 17 | 18 | network: 19 | type: "FullyFusedMLP" 20 | activation: "ReLU" 21 | output_activation: "None" 22 | n_neurons: 128 23 | n_hidden_layers: 4 24 | 25 | config: 26 | name: Ant_brax_tcnn 27 | env_name: brax 28 | multi_gpu: False 29 | mixed_precision: True 30 | normalize_input: True 31 | normalize_value: True 32 | reward_shaper: 33 | scale_value: 1.0 34 | normalize_advantage: True 35 | gamma: 0.99 36 | tau: 0.95 37 | learning_rate: 3e-4 38 | lr_schedule: adaptive 39 | kl_threshold: 0.008 40 | score_to_win: 20000 41 | max_epochs: 1000 42 | save_best_after: 100 43 | save_frequency: 50 44 | grad_norm: 1.0 45 | entropy_coef: 0.0 46 | truncate_grads: True 47 | e_clip: 0.2 48 | horizon_length: 8 49 | num_actors: 4096 50 | minibatch_size: 32768 51 | mini_epochs: 5 52 | critic_coef: 2 53 | clip_value: False 54 | bounds_loss_coef: 0.0001 55 | 56 | env_config: 57 | env_name: 'ant' 58 | -------------------------------------------------------------------------------- /rl_games/configs/brax/ppo_grasp.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 7 3 | 4 | #devices: [0, 0] 5 | 6 | algo: 7 | name: a2c_continuous 8 | 9 | model: 10 | name: continuous_a2c_logstd 11 | 12 | network: 13 | name: actor_critic 14 | separate: False 15 | space: 16 | continuous: 17 | mu_activation: None 18 | sigma_activation: None 19 | 20 | mu_init: 21 | name: default 22 | sigma_init: 23 | name: const_initializer 24 | val: 0 25 | fixed_sigma: True 26 | mlp: 27 | units: [512, 256, 128] 28 | activation: elu 29 | d2rl: False 30 | 31 | initializer: 32 | name: default 33 | regularizer: 34 | name: None 35 | 36 | config: 37 | name: 'Grasp_brax' 38 | env_name: brax 39 | multi_gpu: False 40 | mixed_precision: True 41 | normalize_input: True 42 | normalize_value: True 43 | reward_shaper: 44 | scale_value: 1.0 45 | normalize_advantage: True 46 | gamma: 0.99 47 | tau: 0.95 48 | learning_rate: 3e-4 49 | lr_schedule: adaptive 50 | kl_threshold: 0.008 51 | score_to_win: 20000 52 | max_epochs: 2000 53 | save_best_after: 100 54 | save_frequency: 50 55 | grad_norm: 1.0 56 | entropy_coef: 0.00 57 | truncate_grads: True 58 | e_clip: 0.2 59 | horizon_length: 16 60 | num_actors: 8192 61 | minibatch_size: 32768 62 | mini_epochs: 5 63 | critic_coef: 2 64 | clip_value: False 65 | bounds_loss_coef: 0.0004 66 | 67 | env_config: 68 | env_name: 'grasp' 69 | -------------------------------------------------------------------------------- /rl_games/configs/brax/ppo_halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 7 3 | 4 | #devices: [0, 0] 5 | 6 | algo: 7 | name: a2c_continuous 8 | 9 | model: 10 | name: continuous_a2c_logstd 11 | 12 | network: 13 | name: actor_critic 14 | separate: False 15 | space: 16 | continuous: 17 | mu_activation: None 18 | sigma_activation: None 19 | 20 | mu_init: 21 | name: default 22 | sigma_init: 23 | name: const_initializer 24 | val: 0 25 | fixed_sigma: True 26 | mlp: 27 | units: [512, 256, 128] 28 | activation: elu 29 | d2rl: False 30 | 31 | initializer: 32 | name: default 33 | regularizer: 34 | name: None 35 | 36 | config: 37 | name: Halfcheetah_brax 38 | env_name: brax 39 | multi_gpu: False 40 | mixed_precision: True 41 | normalize_input: True 42 | normalize_value: True 43 | reward_shaper: 44 | scale_value: 1.0 45 | normalize_advantage: True 46 | gamma: 0.99 47 | tau: 0.95 48 | learning_rate: 3e-4 49 | lr_schedule: adaptive 50 | kl_threshold: 0.008 51 | score_to_win: 20000 52 | max_epochs: 2000 53 | save_best_after: 100 54 | save_frequency: 50 55 | grad_norm: 1.0 56 | entropy_coef: 0.0 57 | truncate_grads: True 58 | e_clip: 0.2 59 | horizon_length: 16 60 | num_actors: 8192 61 | minibatch_size: 32768 62 | mini_epochs: 5 63 | critic_coef: 2 64 | clip_value: False 65 | bounds_loss_coef: 0.0004 66 | 67 | env_config: 68 | env_name: 'halfcheetah' 69 | -------------------------------------------------------------------------------- /rl_games/configs/brax/ppo_humanoid.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 7 3 | 4 | algo: 5 | name: a2c_continuous 6 | 7 | model: 8 | name: continuous_a2c_logstd 9 | 10 | network: 11 | name: actor_critic 12 | separate: False 13 | space: 14 | continuous: 15 | mu_activation: None 16 | sigma_activation: None 17 | 18 | mu_init: 19 | name: default 20 | sigma_init: 21 | name: const_initializer 22 | val: 0 23 | fixed_sigma: True 24 | mlp: 25 | units: [512, 256, 128] 26 | activation: elu 27 | d2rl: False 28 | 29 | initializer: 30 | name: default 31 | regularizer: 32 | name: None 33 | 34 | config: 35 | name: Humanoid_brax 36 | full_experiment_name: Humanoid_brax 37 | env_name: brax 38 | multi_gpu: False 39 | mixed_precision: True 40 | normalize_input: True 41 | normalize_value: True 42 | normalize_advantage: True 43 | use_smooth_clamp: True 44 | reward_shaper: 45 | scale_value: 1.0 46 | gamma: 0.99 47 | tau: 0.95 48 | learning_rate: 3e-4 49 | lr_schedule: adaptive 50 | kl_threshold: 0.008 51 | score_to_win: 20000 52 | max_epochs: 1000 53 | save_best_after: 100 54 | save_frequency: 50 55 | grad_norm: 1.0 56 | entropy_coef: 0.0 57 | truncate_grads: True 58 | e_clip: 0.2 59 | horizon_length: 16 60 | num_actors: 4096 61 | minibatch_size: 32768 62 | mini_epochs: 5 63 | critic_coef: 2 64 | 65 | clip_value: True 66 | bound_loss_type: regularisation 67 | bounds_loss_coef: 0.0 68 | 69 | env_config: 70 | env_name: humanoid -------------------------------------------------------------------------------- /rl_games/configs/brax/ppo_ur5e.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 7 3 | 4 | #devices: [0, 0] 5 | 6 | algo: 7 | name: a2c_continuous 8 | 9 | model: 10 | name: continuous_a2c_logstd 11 | 12 | network: 13 | name: actor_critic 14 | separate: False 15 | space: 16 | continuous: 17 | mu_activation: None 18 | sigma_activation: None 19 | 20 | mu_init: 21 | name: default 22 | sigma_init: 23 | name: const_initializer 24 | val: 0 25 | fixed_sigma: True 26 | mlp: 27 | units: [512, 256, 128] 28 | activation: elu 29 | d2rl: False 30 | 31 | initializer: 32 | name: default 33 | regularizer: 34 | name: None 35 | 36 | config: 37 | name: Ur5e_brax 38 | env_name: brax 39 | multi_gpu: False 40 | mixed_precision: True 41 | normalize_input: True 42 | normalize_value: True 43 | reward_shaper: 44 | scale_value: 1.0 45 | normalize_advantage: True 46 | gamma: 0.99 47 | tau: 0.95 48 | learning_rate: 3e-4 49 | lr_schedule: adaptive 50 | kl_threshold: 0.008 51 | score_to_win: 20000 52 | max_epochs: 2000 53 | save_best_after: 100 54 | save_frequency: 50 55 | grad_norm: 1.0 56 | entropy_coef: 0.00 57 | truncate_grads: True 58 | e_clip: 0.2 59 | horizon_length: 16 60 | num_actors: 8192 61 | minibatch_size: 32768 62 | mini_epochs: 5 63 | critic_coef: 2 64 | clip_value: False 65 | bounds_loss_coef: 0.0004 66 | 67 | env_config: 68 | env_name: 'ur5e' 69 | -------------------------------------------------------------------------------- /rl_games/configs/brax/sac_ant.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: sac 4 | 5 | model: 6 | name: soft_actor_critic 7 | 8 | network: 9 | name: soft_actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | mlp: 14 | units: [256, 128, 64] 15 | activation: relu 16 | 17 | initializer: 18 | name: default 19 | log_std_bounds: [-5, 2] 20 | 21 | config: 22 | name: Ant_brax_SAC 23 | env_name: brax 24 | normalize_input: True 25 | reward_shaper: 26 | scale_value: 1 27 | device: cuda 28 | max_epochs: 10000 29 | num_steps_per_episode: 16 30 | save_best_after: 100 31 | save_frequency: 10000 32 | gamma: 0.99 33 | init_alpha: 1 34 | alpha_lr: 0.005 35 | actor_lr: 0.0005 36 | critic_lr: 0.0005 37 | critic_tau: 0.005 38 | batch_size: 4096 39 | learnable_temperature: True 40 | num_warmup_steps: 10 # total number of warmup steps: num_actors * num_steps_per_episode * num_seed_steps 41 | replay_buffer_size: 1000000 42 | num_actors: 128 43 | 44 | env_config: 45 | env_name: ant -------------------------------------------------------------------------------- /rl_games/configs/brax/sac_humanoid.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: sac 4 | 5 | model: 6 | name: soft_actor_critic 7 | 8 | network: 9 | name: soft_actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | 14 | mlp: 15 | units: [512, 256] 16 | activation: relu 17 | initializer: 18 | name: default 19 | 20 | log_std_bounds: [-5, 2] 21 | 22 | config: 23 | name: Humanoid_brax_SAC 24 | env_name: brax 25 | normalize_input: True 26 | reward_shaper: 27 | scale_value: 1 28 | device: cuda 29 | max_epochs: 2000000 30 | num_steps_per_episode: 16 31 | save_best_after: 100 32 | save_frequency: 10000 33 | gamma: 0.99 34 | init_alpha: 1 35 | alpha_lr: 0.0002 36 | actor_lr: 0.0003 37 | critic_lr: 0.0003 38 | critic_tau: 0.005 39 | batch_size: 2048 40 | learnable_temperature: True 41 | num_warmup_steps: 5 # total number of warmup steps: num_actors * num_steps_per_episode * num_seed_steps 42 | replay_buffer_size: 1000000 43 | num_actors: 64 44 | 45 | env_config: 46 | env_name: humanoid -------------------------------------------------------------------------------- /rl_games/configs/dm_control/cartpole.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | scale: 0.02 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [32, 16] 24 | activation: relu 25 | initializer: 26 | name: default 27 | 28 | config: 29 | reward_shaper: 30 | scale_value: 0.1 31 | normalize_advantage: True 32 | gamma: 0.99 33 | tau: 0.9 34 | 35 | learning_rate: 1e-4 36 | name: cartpole 37 | score_to_win: 950 38 | 39 | grad_norm: 0.5 40 | entropy_coef: 0.0 41 | truncate_grads: True 42 | env_name: dm_control 43 | e_clip: 0.2 44 | clip_value: True 45 | num_actors: 16 46 | horizon_length: 128 47 | minibatch_size: 1024 48 | mini_epochs: 8 49 | critic_coef: 1 50 | lr_schedule: adaptive 51 | kl_threshold: 0.008 52 | value_bootstrap: True 53 | normalize_input: False 54 | seq_length: 8 55 | bounds_loss_coef: 0.0000 56 | 57 | env_config: 58 | name: CartpoleBalance-v0 59 | flat_observation: True 60 | -------------------------------------------------------------------------------- /rl_games/configs/dm_control/fish_swim.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: False 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | scale: 0.02 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: elu 25 | initializer: 26 | name: default 27 | regularizer: 28 | name: 'None' #'l2_regularizer' 29 | #scale: 0.001 30 | 31 | config: 32 | reward_shaper: 33 | scale_value: 0.1 34 | normalize_advantage: True 35 | gamma: 0.99 36 | tau: 0.9 37 | 38 | learning_rate: 3e-4 39 | name: dm_fish_swim 40 | score_to_win: 950 41 | 42 | grad_norm: 0.5 43 | entropy_coef: 0.0 44 | truncate_grads: True 45 | env_name: dm_control 46 | e_clip: 0.2 47 | clip_value: True 48 | num_actors: 16 49 | horizon_length: 1024 50 | minibatch_size: 4096 51 | mini_epochs: 4 52 | critic_coef: 1 53 | lr_schedule: adaptive 54 | kl_threshold: 0.008 55 | value_bootstrap: True 56 | normalize_input: True 57 | normalize_value: True 58 | bounds_loss_coef: 0.001 59 | 60 | env_config: 61 | name: FishSwim-v0 62 | flat_observation: True 63 | player: 64 | render: True 65 | deterministic: True 66 | -------------------------------------------------------------------------------- /rl_games/configs/dm_control/halfcheetah_run.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: False 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | sigma_init: 18 | name: const_initializer 19 | val: 0 20 | fixed_sigma: True 21 | mlp: 22 | units: [256, 128, 64] 23 | activation: elu 24 | initializer: 25 | name: default 26 | config: 27 | reward_shaper: 28 | scale_value: 0.1 29 | normalize_advantage: True 30 | gamma: 0.995 31 | tau: 0.95 32 | 33 | learning_rate: 3e-4 34 | name: HalfCheetah-v2 35 | score_to_win: 10000 36 | 37 | grad_norm: 0.5 38 | entropy_coef: 0.0 39 | truncate_grads: True 40 | env_name: openai_gym 41 | e_clip: 0.2 42 | clip_value: False 43 | num_actors: 16 44 | horizon_length: 128 45 | minibatch_size: 512 46 | mini_epochs: 4 47 | critic_coef: 1 48 | lr_schedule: adaptive 49 | kl_threshold: 0.008 50 | normalize_input: True 51 | normalize_value: True 52 | value_bootstrap: True 53 | bounds_loss_coef: 0.000 54 | 55 | env_config: 56 | name: HalfCheetah-v2 57 | seed: 5 58 | 59 | player: 60 | render: True -------------------------------------------------------------------------------- /rl_games/configs/dm_control/humanoid2.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | scale: 0.02 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: elu 25 | initializer: 26 | name: default 27 | regularizer: 28 | name: 'None' #'l2_regularizer' 29 | #scale: 0.001 30 | 31 | config: 32 | reward_shaper: 33 | scale_value: 0.1 34 | normalize_advantage: True 35 | gamma: 0.99 36 | tau: 0.9 37 | 38 | learning_rate: 3e-4 39 | name: dm_humanoid 40 | score_to_win: 10000 41 | 42 | grad_norm: 0.5 43 | entropy_coef: 0.0 44 | truncate_grads: True 45 | env_name: dm_control 46 | e_clip: 0.2 47 | clip_value: True 48 | num_actors: 4 49 | horizon_length: 4096 50 | minibatch_size: 4096 51 | mini_epochs: 15 52 | critic_coef: 1 53 | lr_schedule: adaptive 54 | kl_threshold: 0.008 55 | 56 | normalize_input: False 57 | seq_length: 8 58 | bounds_loss_coef: 0.0 59 | 60 | env_config: 61 | name: Humanoid2Run-v0 62 | flat_observation: True 63 | -------------------------------------------------------------------------------- /rl_games/configs/dm_control/humanoid_run.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: False 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: swish 25 | initializer: 26 | name: default 27 | regularizer: 28 | name: 'None' #'l2_regularizer' 29 | #scale: 0.001 30 | 31 | config: 32 | reward_shaper: 33 | scale_value: 0.1 34 | normalize_advantage: True 35 | gamma: 0.995 36 | tau: 0.95 37 | learning_rate: 3e-4 38 | name: dm_humanoid_run 39 | score_to_win: 1000 40 | 41 | grad_norm: 10.5 42 | entropy_coef: 0.0 43 | truncate_grads: True 44 | env_name: dm_control 45 | e_clip: 0.2 46 | clip_value: False 47 | num_actors: 24 48 | horizon_length: 1024 49 | minibatch_size: 4096 50 | mini_epochs: 4 51 | critic_coef: 1 52 | lr_schedule: adaptive 53 | kl_threshold: 0.008 54 | value_bootstrap: True 55 | normalize_input: True 56 | normalize_value: True 57 | bound_loss_type: regularisation #'bound' 58 | bounds_loss_coef: 0.001 59 | 60 | env_config: 61 | name: HumanoidRun-v0 62 | flat_observation: True 63 | player: 64 | render: True -------------------------------------------------------------------------------- /rl_games/configs/dm_control/sac_humanoid.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: sac 4 | 5 | model: 6 | name: soft_actor_critic 7 | 8 | network: 9 | name: soft_actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | 14 | mlp: 15 | units: [512, 256] 16 | activation: relu 17 | initializer: 18 | name: default 19 | 20 | log_std_bounds: [-5, 2] 21 | 22 | config: 23 | name: 'humanoid_run_sac' 24 | env_name : dm_control 25 | normalize_input: True 26 | reward_shaper: 27 | scale_value: 0.1 28 | device: cuda 29 | max_epochs: 2000000 30 | num_steps_per_episode: 128 31 | save_best_after: 100 32 | save_frequency: 10000 33 | gamma: 0.99 34 | init_alpha: 1 35 | alpha_lr: 0.0002 36 | actor_lr: 0.0003 37 | critic_lr: 0.0003 38 | critic_tau: 0.005 39 | batch_size: 1024 40 | learnable_temperature: true 41 | num_warmup_steps: 16 42 | replay_buffer_size: 1000000 43 | num_actors: 32 44 | 45 | env_config: 46 | name: HumanoidRun-v0 47 | flat_observation: True -------------------------------------------------------------------------------- /rl_games/configs/dm_control/walker_run.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: False 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | scale: 0.02 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: elu 25 | initializer: 26 | name: default 27 | regularizer: 28 | name: None #'l2_regularizer' 29 | #scale: 0.001 30 | 31 | config: 32 | reward_shaper: 33 | scale_value: 0.1 34 | normalize_advantage: True 35 | gamma: 0.99 36 | tau: 0.9 37 | 38 | learning_rate: 3e-4 39 | name: dm_walker 40 | score_to_win: 950 41 | 42 | grad_norm: 0.5 43 | entropy_coef: 0.0 44 | truncate_grads: True 45 | env_name: dm_control 46 | e_clip: 0.2 47 | clip_value: True 48 | num_actors: 16 49 | horizon_length: 1024 50 | minibatch_size: 4096 51 | mini_epochs: 4 52 | critic_coef: 1 53 | lr_schedule: adaptive 54 | kl_threshold: 0.008 55 | value_bootstrap: True 56 | normalize_input: True 57 | normalize_value: True 58 | bounds_loss_coef: 0.001 59 | 60 | env_config: 61 | name: WalkerRun-v0 62 | flat_observation: True 63 | 64 | player: 65 | render: True -------------------------------------------------------------------------------- /rl_games/configs/dm_control/walker_stand.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: False 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | scale: 0.02 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: elu 25 | initializer: 26 | name: default 27 | regularizer: 28 | name: 'None' #'l2_regularizer' 29 | #scale: 0.001 30 | 31 | config: 32 | reward_shaper: 33 | scale_value: 0.1 34 | normalize_advantage: True 35 | gamma: 0.99 36 | tau: 0.9 37 | 38 | learning_rate: 3e-4 39 | name: dm_walker_stand 40 | score_to_win: 950 41 | 42 | grad_norm: 0.5 43 | entropy_coef: 0.0 44 | truncate_grads: True 45 | env_name: dm_control 46 | e_clip: 0.2 47 | clip_value: True 48 | num_actors: 16 49 | horizon_length: 1024 50 | minibatch_size: 4096 51 | mini_epochs: 4 52 | critic_coef: 1 53 | lr_schedule: adaptive 54 | kl_threshold: 0.008 55 | value_bootstrap: True 56 | normalize_input: True 57 | normalize_value: True 58 | bounds_loss_coef: 0.001 59 | 60 | env_config: 61 | name: WalkerStand-v0 62 | flat_observation: True 63 | -------------------------------------------------------------------------------- /rl_games/configs/dm_control/walker_walk.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: False 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | scale: 0.02 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: elu 25 | initializer: 26 | name: default 27 | regularizer: 28 | name: 'None' #'l2_regularizer' 29 | #scale: 0.001 30 | 31 | config: 32 | reward_shaper: 33 | scale_value: 0.1 34 | normalize_advantage: True 35 | gamma: 0.99 36 | tau: 0.9 37 | 38 | learning_rate: 3e-4 39 | name: dm_walker_walk 40 | score_to_win: 950 41 | 42 | grad_norm: 0.5 43 | entropy_coef: 0.0 44 | truncate_grads: True 45 | env_name: dm_control 46 | e_clip: 0.2 47 | clip_value: True 48 | num_actors: 16 49 | horizon_length: 1024 50 | minibatch_size: 4096 51 | mini_epochs: 4 52 | critic_coef: 1 53 | lr_schedule: adaptive 54 | kl_threshold: 0.008 55 | value_bootstrap: True 56 | normalize_input: True 57 | normalize_value: True 58 | bounds_loss_coef: 0.001 59 | 60 | env_config: 61 | name: WalkerWalk-v0 62 | flat_observation: True 63 | -------------------------------------------------------------------------------- /rl_games/configs/ma/ppo_connect4_self_play_resnet.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: connect4net 10 | blocks: 5 11 | 12 | config: 13 | name: connect4_rn 14 | reward_shaper: 15 | scale_value: 1 16 | normalize_advantage: True 17 | gamma: 0.995 18 | tau: 0.95 19 | learning_rate: 2e-4 20 | score_to_win: 100 21 | grad_norm: 0.5 22 | entropy_coef: 0.005 23 | truncate_grads: True 24 | env_name: connect4_env 25 | e_clip: 0.2 26 | clip_value: True 27 | num_actors: 4 28 | horizon_length: 128 29 | minibatch_size: 512 30 | mini_epochs: 4 31 | critic_coef: 1 32 | lr_schedule: None 33 | kl_threshold: 0.05 34 | normalize_input: False 35 | games_to_track: 1000 36 | use_action_masks: True 37 | weight_decay: 0.001 38 | self_play_config: 39 | update_score: 0.1 40 | games_to_check: 100 41 | env_update_num: 4 42 | 43 | env_config: 44 | name: connect_four_v0 45 | self_play: True 46 | is_human: True 47 | random_agent: False 48 | config_path: 'rl_games/configs/ma/ppo_connect4_self_play_resnet.yaml' -------------------------------------------------------------------------------- /rl_games/configs/ma/ppo_slime_self_play.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: layer_norm 12 | space: 13 | discrete: 14 | 15 | mlp: 16 | units: [128,64] 17 | activation: elu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: 'None' 22 | config: 23 | name: slime_pvp2 24 | reward_shaper: 25 | scale_value: 1 26 | normalize_advantage: True 27 | gamma: 0.995 28 | tau: 0.95 29 | learning_rate: 2e-4 30 | score_to_win: 100 31 | grad_norm: 0.5 32 | entropy_coef: 0.01 33 | truncate_grads: True 34 | env_name: slime_gym 35 | e_clip: 0.2 36 | clip_value: True 37 | num_actors: 8 38 | horizon_length: 512 39 | minibatch_size: 2048 40 | mini_epochs: 4 41 | critic_coef: 1 42 | lr_schedule: None 43 | kl_threshold: 0.05 44 | normalize_input: False 45 | games_to_track: 500 46 | 47 | self_play_config: 48 | update_score: 1 49 | games_to_check: 200 50 | check_scores : False 51 | 52 | env_config: 53 | name: SlimeVolleyDiscrete-v0 54 | #neg_scale: 1 #0.5 55 | self_play: True 56 | config_path: 'rl_games/configs/ma/ppo_slime_self_play.yaml' 57 | 58 | player: 59 | render: True 60 | games_num: 200 61 | n_game_life: 1 62 | determenistic: True 63 | device_name: 'cpu' -------------------------------------------------------------------------------- /rl_games/configs/ma/ppo_slime_v0.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: layer_norm 12 | space: 13 | discrete: 14 | 15 | mlp: 16 | units: [128,64] 17 | activation: elu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: None 22 | 23 | config: 24 | name: slime 25 | reward_shaper: 26 | scale_value: 1 27 | normalize_advantage: True 28 | gamma: 0.99 29 | tau: 0.95 30 | learning_rate: 1e-4 31 | score_to_win: 20 32 | grad_norm: 0.5 33 | entropy_coef: 0.005 34 | truncate_grads: True 35 | env_name: slime_gym 36 | e_clip: 0.2 37 | clip_value: True 38 | num_actors: 8 39 | horizon_length: 128 40 | minibatch_size: 512 41 | mini_epochs: 4 42 | critic_coef: 1 43 | lr_schedule: None 44 | kl_threshold: 0.05 45 | normalize_input: False 46 | seq_length: 4 47 | use_action_masks: False 48 | ignore_dead_batches : False 49 | 50 | env_config: 51 | name: SlimeVolleyDiscrete-v0 52 | 53 | player: 54 | render: True 55 | games_num: 200 56 | n_game_life: 1 57 | determenistic: True -------------------------------------------------------------------------------- /rl_games/configs/mujoco/ant.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 5 3 | algo: 4 | name: a2c_continuous 5 | 6 | model: 7 | name: continuous_a2c_logstd 8 | 9 | network: 10 | name: actor_critic 11 | separate: False 12 | space: 13 | continuous: 14 | mu_activation: None 15 | sigma_activation: None 16 | mu_init: 17 | name: default 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: elu 25 | initializer: 26 | name: default 27 | 28 | config: 29 | name: Ant-v4_ray 30 | env_name: openai_gym 31 | score_to_win: 20000 32 | normalize_input: True 33 | normalize_value: True 34 | value_bootstrap: True 35 | reward_shaper: 36 | scale_value: 0.1 37 | normalize_advantage: True 38 | gamma: 0.99 39 | tau: 0.95 40 | 41 | learning_rate: 3e-4 42 | lr_schedule: adaptive 43 | kl_threshold: 0.008 44 | grad_norm: 1.0 45 | entropy_coef: 0.0 46 | truncate_grads: True 47 | e_clip: 0.2 48 | max_epochs: 2000 49 | num_actors: 64 50 | horizon_length: 64 51 | minibatch_size: 2048 52 | mini_epochs: 4 53 | critic_coef: 2 54 | clip_value: True 55 | use_smooth_clamp: True 56 | bound_loss_type: regularisation 57 | bounds_loss_coef: 0.0 58 | 59 | env_config: 60 | name: Ant-v4 61 | seed: 5 62 | 63 | player: 64 | render: True -------------------------------------------------------------------------------- /rl_games/configs/mujoco/ant_envpool.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 5 3 | algo: 4 | name: a2c_continuous 5 | 6 | model: 7 | name: continuous_a2c_logstd 8 | 9 | network: 10 | name: actor_critic 11 | separate: False 12 | space: 13 | continuous: 14 | mu_activation: None 15 | sigma_activation: None 16 | mu_init: 17 | name: default 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: elu 25 | initializer: 26 | name: default 27 | 28 | config: 29 | name: Ant-v4_envpool 30 | env_name: envpool 31 | score_to_win: 20000 32 | normalize_input: True 33 | normalize_value: True 34 | value_bootstrap: True 35 | normalize_advantage: True 36 | reward_shaper: 37 | scale_value: 0.1 38 | 39 | gamma: 0.99 40 | tau: 0.95 41 | learning_rate: 3e-4 42 | lr_schedule: adaptive 43 | kl_threshold: 0.008 44 | grad_norm: 1.0 45 | entropy_coef: 0.0 46 | truncate_grads: True 47 | e_clip: 0.2 48 | clip_value: True 49 | use_smooth_clamp: True 50 | bound_loss_type: regularisation 51 | bounds_loss_coef: 0.0 52 | max_epochs: 2000 53 | num_actors: 64 54 | horizon_length: 64 55 | minibatch_size: 2048 56 | mini_epochs: 4 57 | critic_coef: 2 58 | 59 | env_config: 60 | env_name: Ant-v4 61 | seed: 5 62 | #flat_observation: True 63 | 64 | player: 65 | render: False -------------------------------------------------------------------------------- /rl_games/configs/mujoco/halfcheetah.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 5 3 | algo: 4 | name: a2c_continuous 5 | 6 | model: 7 | name: continuous_a2c_logstd 8 | 9 | network: 10 | name: actor_critic 11 | separate: False 12 | space: 13 | continuous: 14 | mu_activation: None 15 | sigma_activation: None 16 | mu_init: 17 | name: default 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [128, 64, 32] 24 | activation: elu 25 | initializer: 26 | name: variance_scaling_initializer 27 | scale: 2.0 28 | 29 | config: 30 | name: HalfCheetah-v4_ray 31 | env_name: openai_gym 32 | score_to_win: 20000 33 | normalize_input: True 34 | normalize_value: True 35 | value_bootstrap: True 36 | reward_shaper: 37 | scale_value: 0.1 38 | normalize_advantage: True 39 | use_smooth_clamp: True 40 | gamma: 0.99 41 | tau: 0.95 42 | 43 | learning_rate: 5e-4 44 | lr_schedule: adaptive 45 | kl_threshold: 0.008 46 | grad_norm: 1.0 47 | entropy_coef: 0.0 48 | truncate_grads: True 49 | e_clip: 0.2 50 | clip_value: False 51 | num_actors: 64 52 | horizon_length: 256 53 | minibatch_size: 2048 54 | mini_epochs: 5 55 | critic_coef: 4 56 | bounds_loss_coef: 0.0 57 | max_epochs: 1000 58 | env_config: 59 | name: HalfCheetah-v4 60 | seed: 5 61 | 62 | player: 63 | render: True 64 | deterministic: True 65 | games_num: 100 -------------------------------------------------------------------------------- /rl_games/configs/mujoco/halfcheetah_envpool.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 5 3 | algo: 4 | name: a2c_continuous 5 | 6 | model: 7 | name: continuous_a2c_logstd 8 | 9 | network: 10 | name: actor_critic 11 | separate: False 12 | space: 13 | continuous: 14 | mu_activation: None 15 | sigma_activation: None 16 | mu_init: 17 | name: default 18 | # name: variance_scaling_initializer 19 | # scale: 1.0 20 | sigma_init: 21 | name: const_initializer 22 | val: 0 23 | fixed_sigma: True 24 | mlp: 25 | units: [128, 64, 32] 26 | activation: elu 27 | initializer: 28 | name: variance_scaling_initializer 29 | scale: 2.0 30 | 31 | config: 32 | name: HalfCheetah-v4_envpool 33 | env_name: envpool 34 | score_to_win: 20000 35 | normalize_input: True 36 | normalize_value: True 37 | value_bootstrap: True 38 | reward_shaper: 39 | scale_value: 0.1 40 | normalize_advantage: True 41 | use_smooth_clamp: True 42 | gamma: 0.99 43 | tau: 0.95 44 | 45 | learning_rate: 5e-4 46 | lr_schedule: adaptive 47 | kl_threshold: 0.008 48 | grad_norm: 1.0 49 | entropy_coef: 0.0 50 | truncate_grads: True 51 | e_clip: 0.2 52 | clip_value: False 53 | num_actors: 64 54 | horizon_length: 256 55 | minibatch_size: 2048 56 | mini_epochs: 5 57 | critic_coef: 4 58 | bounds_loss_coef: 0.0 59 | max_epochs: 1000 60 | env_config: 61 | env_name: HalfCheetah-v4 62 | seed: 5 63 | 64 | player: 65 | render: True 66 | deterministic: True 67 | games_num: 100 -------------------------------------------------------------------------------- /rl_games/configs/mujoco/hopper.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 5 3 | algo: 4 | name: a2c_continuous 5 | 6 | model: 7 | name: continuous_a2c_logstd 8 | 9 | network: 10 | name: actor_critic 11 | separate: False 12 | space: 13 | continuous: 14 | mu_activation: None 15 | sigma_activation: None 16 | mu_init: 17 | name: default 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: elu 25 | initializer: 26 | name: default 27 | 28 | config: 29 | name: Hopper-v4_ray 30 | env_name: openai_gym 31 | score_to_win: 20000 32 | normalize_input: True 33 | normalize_value: True 34 | value_bootstrap: True 35 | reward_shaper: 36 | scale_value: 0.1 37 | normalize_advantage: True 38 | gamma: 0.99 39 | tau: 0.95 40 | 41 | learning_rate: 5e-4 42 | lr_schedule: adaptive 43 | kl_threshold: 0.008 44 | grad_norm: 1.0 45 | entropy_coef: 0.0 46 | truncate_grads: True 47 | e_clip: 0.2 48 | clip_value: False 49 | num_actors: 64 50 | horizon_length: 64 51 | minibatch_size: 2048 52 | mini_epochs: 5 53 | critic_coef: 2 54 | use_smooth_clamp: True 55 | bound_loss_type: regularisation 56 | bounds_loss_coef: 0.0 57 | max_epochs: 1000 58 | 59 | env_config: 60 | name: Hopper-v4 61 | seed: 5 62 | 63 | player: 64 | render: True 65 | determenistic: True -------------------------------------------------------------------------------- /rl_games/configs/mujoco/hopper_envpool.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 5 3 | algo: 4 | name: a2c_continuous 5 | 6 | model: 7 | name: continuous_a2c_logstd 8 | 9 | network: 10 | name: actor_critic 11 | separate: False 12 | space: 13 | continuous: 14 | mu_activation: None 15 | sigma_activation: None 16 | mu_init: 17 | name: default 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: elu 25 | initializer: 26 | name: default 27 | 28 | config: 29 | name: Hopper-v4_envpool 30 | env_name: envpool 31 | score_to_win: 20000 32 | normalize_input: True 33 | normalize_value: True 34 | value_bootstrap: True 35 | reward_shaper: 36 | scale_value: 0.1 37 | normalize_advantage: True 38 | use_smooth_clamp: True 39 | gamma: 0.99 40 | tau: 0.95 41 | 42 | learning_rate: 5e-4 43 | lr_schedule: adaptive 44 | kl_threshold: 0.008 45 | grad_norm: 1.0 46 | entropy_coef: 0.0 47 | truncate_grads: True 48 | e_clip: 0.2 49 | clip_value: False 50 | num_actors: 64 51 | horizon_length: 64 52 | minibatch_size: 2048 53 | mini_epochs: 5 54 | critic_coef: 2 55 | bound_loss_type: regularisation 56 | bounds_loss_coef: 0.0 57 | max_epochs: 1000 58 | 59 | env_config: 60 | env_name: Hopper-v4 61 | seed: 5 62 | 63 | player: 64 | render: True 65 | deterministic: True 66 | games_num: 100 -------------------------------------------------------------------------------- /rl_games/configs/mujoco/humanoid.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 7 3 | algo: 4 | name: a2c_continuous 5 | 6 | model: 7 | name: continuous_a2c_logstd 8 | 9 | network: 10 | name: actor_critic 11 | separate: False 12 | space: 13 | continuous: 14 | mu_activation: None 15 | sigma_activation: None 16 | mu_init: 17 | name: default 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [512, 256, 128] 24 | activation: elu 25 | initializer: 26 | name: default 27 | 28 | config: 29 | name: Humanoid-v4_ray 30 | env_name: openai_gym 31 | score_to_win: 20000 32 | normalize_input: True 33 | normalize_value: True 34 | value_bootstrap: True 35 | reward_shaper: 36 | scale_value: 0.1 37 | normalize_advantage: True 38 | gamma: 0.99 39 | tau: 0.95 40 | 41 | learning_rate: 3e-4 42 | lr_schedule: adaptive 43 | kl_threshold: 0.008 44 | grad_norm: 1.0 45 | entropy_coef: 0.0 46 | truncate_grads: True 47 | e_clip: 0.2 48 | clip_value: True 49 | use_smooth_clamp: True 50 | bound_loss_type: regularisation 51 | bounds_loss_coef: 0.0005 52 | max_epochs: 2000 53 | num_actors: 64 54 | horizon_length: 128 55 | minibatch_size: 2048 56 | mini_epochs: 5 57 | critic_coef: 4 58 | 59 | env_config: 60 | name: Humanoid-v4 61 | seed: 5 62 | 63 | player: 64 | render: True -------------------------------------------------------------------------------- /rl_games/configs/mujoco/humanoid_envpool.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 5 3 | algo: 4 | name: a2c_continuous 5 | 6 | model: 7 | name: continuous_a2c_logstd 8 | 9 | network: 10 | name: actor_critic 11 | separate: False 12 | space: 13 | continuous: 14 | mu_activation: None 15 | sigma_activation: None 16 | mu_init: 17 | name: default 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [512, 256, 128] 24 | activation: elu 25 | initializer: 26 | name: default 27 | 28 | config: 29 | name: Humanoid-v4_envpool 30 | env_name: envpool 31 | score_to_win: 20000 32 | normalize_input: True 33 | normalize_value: True 34 | value_bootstrap: True 35 | reward_shaper: 36 | scale_value: 0.1 37 | normalize_advantage: True 38 | gamma: 0.99 39 | tau: 0.95 40 | 41 | learning_rate: 3e-4 42 | lr_schedule: adaptive 43 | kl_threshold: 0.008 44 | grad_norm: 1.0 45 | entropy_coef: 0.0 46 | truncate_grads: True 47 | e_clip: 0.2 48 | clip_value: True 49 | use_smooth_clamp: True 50 | bound_loss_type: regularisation 51 | bounds_loss_coef: 0.0005 52 | max_epochs: 2000 53 | num_actors: 64 54 | horizon_length: 128 55 | minibatch_size: 2048 56 | mini_epochs: 5 57 | critic_coef: 4 58 | 59 | env_config: 60 | env_name: Humanoid-v4 61 | 62 | player: 63 | render: True -------------------------------------------------------------------------------- /rl_games/configs/mujoco/sac_ant_envpool.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 5 3 | algo: 4 | name: sac 5 | 6 | model: 7 | name: soft_actor_critic 8 | 9 | network: 10 | name: soft_actor_critic 11 | separate: True 12 | space: 13 | continuous: 14 | mlp: 15 | units: [256, 128, 64] 16 | activation: relu 17 | 18 | initializer: 19 | name: default 20 | log_std_bounds: [-5, 2] 21 | 22 | config: 23 | name: Ant-v4_SAC 24 | env_name: envpool 25 | normalize_input: True 26 | reward_shaper: 27 | scale_value: 1.0 28 | 29 | max_epochs: 10000 30 | num_steps_per_episode: 8 31 | save_best_after: 500 32 | save_frequency: 10000 33 | gamma: 0.99 34 | init_alpha: 1 35 | alpha_lr: 0.005 36 | actor_lr: 0.0005 37 | critic_lr: 0.0005 38 | critic_tau: 0.005 39 | batch_size: 2048 40 | learnable_temperature: True 41 | num_seed_steps: 5 42 | replay_buffer_size: 1000000 43 | num_actors: 64 44 | 45 | env_config: 46 | env_name: Ant-v4 47 | seed: 5 48 | -------------------------------------------------------------------------------- /rl_games/configs/mujoco/sac_halfcheetah_envpool.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 5 3 | algo: 4 | name: sac 5 | 6 | model: 7 | name: soft_actor_critic 8 | 9 | network: 10 | name: soft_actor_critic 11 | separate: True 12 | space: 13 | continuous: 14 | mlp: 15 | units: [256, 128, 64] 16 | activation: relu 17 | 18 | initializer: 19 | name: default 20 | log_std_bounds: [-5, 2] 21 | 22 | config: 23 | name: HalfCheetah-v4_SAC 24 | env_name: envpool 25 | normalize_input: True 26 | reward_shaper: 27 | scale_value: 1.0 28 | 29 | max_epochs: 40000 30 | num_steps_per_episode: 2 31 | save_best_after: 500 32 | save_frequency: 1000 33 | gamma: 0.99 34 | init_alpha: 1.0 35 | alpha_lr: 5e-3 36 | actor_lr: 5e-4 37 | critic_lr: 5e-4 38 | critic_tau: 0.005 39 | batch_size: 2048 40 | learnable_temperature: True 41 | num_warmup_steps: 50 42 | replay_buffer_size: 1000000 43 | num_actors: 32 44 | 45 | env_config: 46 | env_name: HalfCheetah-v4 47 | seed: 5 48 | 49 | player: 50 | render: True 51 | deterministic: True 52 | games_num: 100 -------------------------------------------------------------------------------- /rl_games/configs/mujoco/walker2d.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 5 3 | algo: 4 | name: a2c_continuous 5 | 6 | model: 7 | name: continuous_a2c_logstd 8 | 9 | network: 10 | name: actor_critic 11 | separate: False 12 | space: 13 | continuous: 14 | mu_activation: None 15 | sigma_activation: None 16 | mu_init: 17 | name: default 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: elu 25 | initializer: 26 | name: default 27 | 28 | config: 29 | name: Walker2d-v4_ray 30 | env_name: openai_gym 31 | normalize_input: True 32 | normalize_value: True 33 | value_bootstrap: True 34 | reward_shaper: 35 | scale_value: 0.1 36 | normalize_advantage: True 37 | gamma: 0.99 38 | tau: 0.95 39 | 40 | learning_rate: 3e-4 41 | lr_schedule: adaptive 42 | kl_threshold: 0.008 43 | grad_norm: 1.0 44 | entropy_coef: 0.0 45 | truncate_grads: True 46 | e_clip: 0.2 47 | clip_value: False 48 | num_actors: 64 49 | horizon_length: 128 50 | minibatch_size: 2048 51 | mini_epochs: 5 52 | critic_coef: 2 53 | use_smooth_clamp: True 54 | bound_loss_type: regularisation 55 | bounds_loss_coef: 0.0 56 | max_epochs: 1000 57 | env_config: 58 | name: Walker2d-v4 59 | seed: 5 60 | 61 | player: 62 | render: True -------------------------------------------------------------------------------- /rl_games/configs/mujoco/walker2d_envpool.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 5 3 | algo: 4 | name: a2c_continuous 5 | 6 | model: 7 | name: continuous_a2c_logstd 8 | 9 | network: 10 | name: actor_critic 11 | separate: False 12 | space: 13 | continuous: 14 | mu_activation: None 15 | sigma_activation: None 16 | mu_init: 17 | name: default 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: elu 25 | initializer: 26 | name: default 27 | 28 | config: 29 | name: Walker2d-v4_envpool 30 | env_name: envpool 31 | score_to_win: 20000 32 | normalize_input: True 33 | normalize_value: True 34 | value_bootstrap: True 35 | reward_shaper: 36 | scale_value: 0.1 37 | normalize_advantage: True 38 | gamma: 0.99 39 | tau: 0.95 40 | 41 | learning_rate: 3e-4 42 | lr_schedule: adaptive 43 | kl_threshold: 0.008 44 | grad_norm: 1.0 45 | entropy_coef: 0.0 46 | truncate_grads: True 47 | e_clip: 0.2 48 | clip_value: False 49 | num_actors: 64 50 | horizon_length: 128 51 | minibatch_size: 2048 52 | mini_epochs: 5 53 | critic_coef: 2 54 | use_smooth_clamp: True 55 | bound_loss_type: regularisation 56 | bounds_loss_coef: 0.0 57 | max_epochs: 1000 58 | env_config: 59 | env_name: Walker2d-v4 60 | seed: 5 61 | 62 | player: 63 | render: True 64 | 65 | -------------------------------------------------------------------------------- /rl_games/configs/openai/ppo_gym_ant.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | scale: 0.02 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: elu 25 | initializer: 26 | name: default 27 | regularizer: 28 | name: 'None' #'l2_regularizer' 29 | #scale: 0.001 30 | 31 | 32 | config: 33 | reward_shaper: 34 | scale_value: 0.1 35 | 36 | normalize_advantage: True 37 | gamma: 0.99 38 | tau: 0.9 39 | learning_rate: 3e-4 40 | name: Hand_block 41 | score_to_win: 100080 42 | grad_norm: 1.0 43 | entropy_coef: 0.0 44 | truncate_grads: True 45 | env_name: openai_gym 46 | e_clip: 0.2 47 | clip_value: True 48 | num_actors: 16 49 | horizon_length: 128 50 | minibatch_size: 2048 51 | mini_epochs: 12 52 | critic_coef: 2 53 | lr_schedule: adaptive 54 | kl_threshold: 0.008 55 | normalize_input: False 56 | seq_length: 4 57 | bounds_loss_coef: 0.0001 58 | max_epochs: 10000 59 | 60 | env_config: 61 | name: Ant-v3 62 | -------------------------------------------------------------------------------- /rl_games/configs/openai/ppo_gym_hand.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | scale: 0.02 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [400, 200, 100] 24 | activation: elu 25 | initializer: 26 | name: default 27 | regularizer: 28 | name: 'None' #'l2_regularizer' 29 | #scale: 0.001 30 | 31 | config: 32 | reward_shaper: 33 | scale_value: 1.0 34 | 35 | normalize_advantage: True 36 | gamma: 0.99 37 | tau: 0.9 38 | learning_rate: 3e-4 39 | name: HandBlockDenseXYZ 40 | score_to_win: 10000 41 | grad_norm: 1.0 42 | entropy_coef: 0.0 43 | truncate_grads: True 44 | env_name: openai_robot_gym 45 | e_clip: 0.2 46 | clip_value: True 47 | num_actors: 16 48 | horizon_length: 256 49 | minibatch_size: 2048 50 | mini_epochs: 12 51 | critic_coef: 2 52 | lr_schedule: adaptive 53 | kl_threshold: 0.008 54 | normalize_input: True 55 | seq_length: 4 56 | bounds_loss_coef: 0.0001 57 | max_epochs: 10000 58 | 59 | env_config: 60 | name: HandVMManipulateBlockRotateXYZDense-v0 -------------------------------------------------------------------------------- /rl_games/configs/openai/ppo_gym_humanoid.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | scale: 0.02 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [400, 200, 100] 24 | activation: elu 25 | initializer: 26 | name: default 27 | regularizer: 28 | name: 'None' #'l2_regularizer' 29 | #scale: 0.001 30 | 31 | config: 32 | reward_shaper: 33 | scale_value: 0.1 34 | 35 | normalize_advantage: True 36 | gamma: 0.99 37 | tau: 0.9 38 | learning_rate: 3e-4 39 | name: Humanoid 40 | score_to_win: 100080 41 | grad_norm: 1.0 42 | entropy_coef: 0.0 43 | truncate_grads: True 44 | env_name: openai_gym 45 | e_clip: 0.2 46 | clip_value: True 47 | num_actors: 16 48 | horizon_length: 256 49 | minibatch_size: 2048 50 | mini_epochs: 12 51 | critic_coef: 2 52 | lr_schedule: adaptive 53 | kl_threshold: 0.008 54 | normalize_input: False 55 | seq_length: 4 56 | bounds_loss_coef: 0.0001 57 | max_epochs: 10000 58 | 59 | env_config: 60 | name: Humanoid-v3 61 | -------------------------------------------------------------------------------- /rl_games/configs/ppo_cartpole.yaml: -------------------------------------------------------------------------------- 1 | 2 | #Cartpole MLP 3 | 4 | params: 5 | algo: 6 | name: a2c_discrete 7 | 8 | model: 9 | name: discrete_a2c 10 | 11 | load_checkpoint: False 12 | load_path: path 13 | 14 | network: 15 | name: actor_critic 16 | separate: True 17 | space: 18 | discrete: 19 | mlp: 20 | units: [32, 32] 21 | activation: relu 22 | initializer: 23 | name: default 24 | regularizer: 25 | name: None 26 | 27 | config: 28 | reward_shaper: 29 | scale_value: 0.1 30 | normalize_advantage: True 31 | gamma: 0.99 32 | tau: 0.9 33 | learning_rate: 2e-4 34 | name: cartpole_vel_info 35 | score_to_win: 400 36 | grad_norm: 1.0 37 | entropy_coef: 0.01 38 | truncate_grads: True 39 | env_name: CartPole-v1 40 | e_clip: 0.2 41 | clip_value: True 42 | num_actors: 16 43 | horizon_length: 32 44 | minibatch_size: 64 45 | mini_epochs: 4 46 | critic_coef: 1 47 | lr_schedule: None 48 | kl_threshold: 0.008 49 | normalize_input: False 50 | save_best_after: 10 51 | device: 'cpu' 52 | multi_gpu: True 53 | 54 | -------------------------------------------------------------------------------- /rl_games/configs/ppo_cartpole_masked_velocity_rnn.yaml: -------------------------------------------------------------------------------- 1 | 2 | #Cartpole without velocities lstm test 3 | 4 | params: 5 | algo: 6 | name: a2c_discrete 7 | 8 | model: 9 | name: discrete_a2c 10 | 11 | load_checkpoint: False 12 | load_path: path 13 | 14 | network: 15 | name: actor_critic 16 | separate: True 17 | space: 18 | discrete: 19 | 20 | mlp: 21 | units: [64, 64] 22 | activation: relu 23 | normalization: 'layer_norm' 24 | norm_only_first_layer: True 25 | initializer: 26 | name: default 27 | regularizer: 28 | name: None 29 | rnn: 30 | name: 'lstm' 31 | units: 64 32 | layers: 2 33 | before_mlp: False 34 | concat_input: True 35 | layer_norm: True 36 | 37 | config: 38 | env_name: CartPoleMaskedVelocity-v1 39 | reward_shaper: 40 | scale_value: 0.1 41 | normalize_advantage: True 42 | gamma: 0.99 43 | tau: 0.9 44 | learning_rate: 1e-4 45 | name: cartpole_vel_info 46 | score_to_win: 500 47 | grad_norm: 0.5 48 | entropy_coef: 0.01 49 | truncate_grads: True 50 | e_clip: 0.2 51 | clip_value: True 52 | num_actors: 16 53 | horizon_length: 256 54 | minibatch_size: 2048 55 | mini_epochs: 4 56 | critic_coef: 1 57 | lr_schedule: None 58 | kl_threshold: 0.008 59 | normalize_input: False 60 | seq_length: 4 -------------------------------------------------------------------------------- /rl_games/configs/ppo_continuous.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | scale: 0.02 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | activation: elu 25 | initializer: 26 | name: default 27 | regularizer: 28 | name: None #'l2_regularizer' 29 | #scale: 0.001 30 | 31 | load_checkpoint: False 32 | load_path: path 33 | 34 | config: 35 | reward_shaper: 36 | scale_value: 0.1 37 | normalize_advantage: True 38 | gamma: 0.99 39 | tau: 0.9 40 | 41 | learning_rate: 3e-4 42 | name: walker 43 | score_to_win: 300 44 | 45 | grad_norm: 0.5 46 | entropy_coef: 0.0 47 | truncate_grads: True 48 | env_name: openai_gym 49 | e_clip: 0.2 50 | clip_value: True 51 | num_actors: 16 52 | horizon_length: 256 53 | minibatch_size: 1024 54 | mini_epochs: 8 55 | critic_coef: 1 56 | lr_schedule: adaptive 57 | kl_threshold: 0.008 58 | 59 | normalize_input: False 60 | seq_length: 8 61 | bounds_loss_coef: 0.001 62 | env_config: 63 | name: BipedalWalkerHardcore-v3 64 | 65 | -------------------------------------------------------------------------------- /rl_games/configs/ppo_continuous_lstm.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_lstm_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: normc_initializer 17 | std: 0.01 18 | sigma_init: 19 | name: const_initializer 20 | value: 0.0 21 | fixed_sigma: True 22 | 23 | mlp: 24 | units: [256, 256, 128] 25 | activation: relu 26 | initializer: 27 | name: normc_initializer 28 | std: 1 29 | regularizer: 30 | name: None 31 | lstm: 32 | units: 128 33 | concated: False 34 | 35 | config: 36 | env_name: BipedalWalkerHardcore-v2 37 | reward_shaper: 38 | scale_value: 0.1 39 | 40 | normalize_advantage: True 41 | gamma: 0.99 42 | tau: 0.9 43 | learning_rate: 1e-4 44 | name: walker_lstm 45 | score_to_win: 300 46 | grad_norm: 0.5 47 | entropy_coef: 0.000 48 | truncate_grads: True 49 | e_clip: 0.2 50 | clip_value: True 51 | num_actors: 16 52 | horizon_length: 512 53 | minibatch_size: 2048 54 | mini_epochs: 8 55 | critic_coef: 1 56 | lr_schedule: None 57 | kl_threshold: 0.008 58 | normalize_input: False 59 | seq_length: 8 60 | bounds_loss_coef: 0.5 61 | max_epochs: 5000 62 | -------------------------------------------------------------------------------- /rl_games/configs/ppo_lunar.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: glorot_normal_initializer 17 | #scal: 0.01 18 | sigma_init: 19 | name: const_initializer 20 | value: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [64, 64] 24 | activation: relu 25 | initializer: 26 | name: glorot_normal_initializer 27 | #gain: 2 28 | regularizer: 29 | name: 'None' #'l2_regularizer' 30 | #scale: 0.001 31 | 32 | load_checkpoint: False 33 | load_path: path 34 | 35 | config: 36 | reward_shaper: 37 | scale_value: 0.1 38 | normalize_advantage: True 39 | gamma: 0.99 40 | tau: 0.9 41 | 42 | learning_rate: 1e-4 43 | name: test 44 | score_to_win: 300 45 | 46 | grad_norm: 0.5 47 | entropy_coef: 0.0 48 | truncate_grads: True 49 | env_name: LunarLanderContinuous-v2 50 | e_clip: 0.2 51 | clip_value: True 52 | num_actors: 16 53 | horizon_length: 128 54 | minibatch_size: 1024 55 | mini_epochs: 4 56 | critic_coef: 1 57 | lr_schedule: adaptive 58 | kl_threshold: 0.008 59 | normalize_input: False 60 | bounds_loss_coef: 0 61 | -------------------------------------------------------------------------------- /rl_games/configs/ppo_lunar_continiuos_torch.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | scale: 0.02 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [64] 24 | activation: relu 25 | initializer: 26 | name: default 27 | scale: 2 28 | rnn: 29 | name: 'lstm' 30 | units: 64 31 | layers: 1 32 | 33 | load_checkpoint: False 34 | load_path: path 35 | 36 | config: 37 | env_name: LunarLanderContinuous-v2 38 | reward_shaper: 39 | scale_value: 0.1 40 | normalize_advantage: True 41 | gamma: 0.99 42 | tau: 0.9 43 | 44 | learning_rate: 1e-3 45 | name: test 46 | score_to_win: 300 47 | 48 | grad_norm: 0.5 49 | entropy_coef: 0.0 50 | truncate_grads: True 51 | e_clip: 0.2 52 | clip_value: True 53 | num_actors: 16 54 | horizon_length: 128 55 | minibatch_size: 1024 56 | mini_epochs: 4 57 | critic_coef: 1 58 | lr_schedule: adaptive 59 | kl_threshold: 0.008 60 | normalize_input: True 61 | seq_length: 4 62 | bounds_loss_coef: 0 63 | 64 | player: 65 | render: True 66 | -------------------------------------------------------------------------------- /rl_games/configs/ppo_lunar_discrete.yaml: -------------------------------------------------------------------------------- 1 | 2 | #Cartpole MLP 3 | 4 | params: 5 | algo: 6 | name: a2c_discrete 7 | 8 | model: 9 | name: discrete_a2c 10 | 11 | network: 12 | name: actor_critic 13 | separate: True 14 | space: 15 | discrete: 16 | mlp: 17 | units: [64, 64] 18 | activation: relu 19 | initializer: 20 | name: default 21 | regularizer: 22 | name: None 23 | 24 | config: 25 | env_name: LunarLander-v2 26 | reward_shaper: 27 | scale_value: 0.1 28 | normalize_advantage: True 29 | gamma: 0.99 30 | tau: 0.9 31 | learning_rate: 8e-4 32 | name: LunarLander-discrete 33 | score_to_win: 500 34 | grad_norm: 1.0 35 | entropy_coef: 0.01 36 | truncate_grads: True 37 | e_clip: 0.2 38 | clip_value: True 39 | num_actors: 16 40 | horizon_length: 32 41 | minibatch_size: 64 42 | mini_epochs: 4 43 | critic_coef: 1 44 | lr_schedule: None 45 | kl_threshold: 0.008 46 | normalize_input: False 47 | device: cuda 48 | multi_gpu: False 49 | use_diagnostics: True -------------------------------------------------------------------------------- /rl_games/configs/ppo_pendulum.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | scale: 0.01 18 | sigma_init: 19 | name: const_initializer 20 | value: 0 21 | fixed_sigma: False 22 | mlp: 23 | units: [32, 32] 24 | activation: elu 25 | initializer: 26 | name: default 27 | scale: 1 28 | regularizer: 29 | name: 'None' #'l2_regularizer' 30 | #scale: 0.001 31 | 32 | load_checkpoint: False 33 | load_path: path 34 | 35 | config: 36 | env_name: Pendulum-v0 37 | reward_shaper: 38 | scale_value: 0.01 39 | normalize_advantage: True 40 | gamma: 0.99 41 | tau: 0.9 42 | 43 | learning_rate: 1e-4 44 | lr_schedule: adaptive 45 | kl_threshold: 0.008 46 | name: test 47 | score_to_win: 300 48 | 49 | grad_norm: 0.5 50 | entropy_coef: 0.0 51 | truncate_grads: True 52 | e_clip: 0.2 53 | clip_value: True 54 | num_actors: 16 55 | horizon_length: 128 56 | minibatch_size: 1024 57 | mini_epochs: 4 58 | critic_coef: 1 59 | 60 | normalize_input: False 61 | seq_length: 8 62 | bounds_loss_coef: 0 63 | -------------------------------------------------------------------------------- /rl_games/configs/ppo_pendulum_torch.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: glorot_normal_initializer 17 | gain: 0.01 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [32, 32] 24 | activation: elu 25 | initializer: 26 | name: glorot_normal_initializer 27 | gain: 2 28 | regularizer: 29 | name: None #'l2_regularizer' 30 | #scale: 0.001 31 | 32 | config: 33 | env_name: openai_gym 34 | reward_shaper: 35 | scale_value: 0.01 36 | normalize_advantage: True 37 | gamma: 0.99 38 | tau: 0.9 39 | 40 | learning_rate: 1e-3 41 | name: pendulum 42 | score_to_win: 300 43 | 44 | grad_norm: 0.5 45 | entropy_coef: 0.0 46 | truncate_grads: True 47 | e_clip: 0.2 48 | clip_value: False 49 | num_actors: 16 50 | horizon_length: 128 51 | minibatch_size: 1024 52 | mini_epochs: 4 53 | critic_coef: 1 54 | lr_schedule: adaptive 55 | kl_threshold: 0.016 56 | 57 | normalize_input: False 58 | bounds_loss_coef: 0 59 | 60 | env_config: 61 | name: Pendulum-v1 -------------------------------------------------------------------------------- /rl_games/configs/ppo_smac.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | discrete: 13 | 14 | mlp: 15 | units: [256, 128] 16 | activation: relu 17 | initializer: 18 | name: default 19 | regularizer: 20 | name: None 21 | 22 | config: 23 | name: 6h_vs_8z 24 | env_name: smac 25 | reward_shaper: 26 | scale_value: 1 27 | normalize_advantage: True 28 | gamma: 0.99 29 | tau: 0.95 30 | learning_rate: 1e-4 31 | lr_schedule: None 32 | kl_threshold: 0.05 33 | score_to_win: 1000 34 | grad_norm: 0.5 35 | entropy_coef: 0.001 36 | truncate_grads: True 37 | 38 | e_clip: 0.2 39 | clip_value: True 40 | num_actors: 8 41 | horizon_length: 128 42 | minibatch_size: 3072 43 | mini_epochs: 4 44 | critic_coef: 1 45 | normalize_input: False 46 | seq_length: 4 47 | use_action_masks: True 48 | 49 | env_config: 50 | name: 6h_vs_8z 51 | frames: 2 52 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/ppo_walker.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 8 3 | algo: 4 | name: a2c_continuous 5 | 6 | model: 7 | name: continuous_a2c_logstd 8 | 9 | network: 10 | name: actor_critic 11 | separate: False 12 | space: 13 | continuous: 14 | mu_activation: None 15 | sigma_activation: None 16 | mu_init: 17 | name: default 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256,128,64] 24 | d2rl: False 25 | activation: elu 26 | initializer: 27 | name: default 28 | scale: 2 29 | config: 30 | env_name: BipedalWalker-v3 31 | name: walker 32 | reward_shaper: 33 | min_val: -1 34 | scale_value: 0.1 35 | 36 | normalize_advantage: True 37 | gamma: 0.995 38 | tau: 0.95 39 | learning_rate: 3e-4 40 | lr_schedule: adaptive 41 | kl_threshold: 0.008 42 | save_best_after: 10 43 | score_to_win: 300 44 | grad_norm: 1.5 45 | entropy_coef: 0 46 | truncate_grads: True 47 | e_clip: 0.2 48 | clip_value: False 49 | num_actors: 16 50 | horizon_length: 4096 51 | minibatch_size: 8192 52 | mini_epochs: 4 53 | critic_coef: 2 54 | normalize_input: True 55 | bounds_loss_coef: 0.00 56 | max_epochs: 10000 57 | normalize_value: True 58 | use_diagnostics: True 59 | value_bootstrap: True 60 | #weight_decay: 0.0001 61 | use_smooth_clamp: True 62 | 63 | player: 64 | render: True 65 | determenistic: True 66 | games_num: 200 67 | -------------------------------------------------------------------------------- /rl_games/configs/ppo_walker_hardcore.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_continuous 4 | 5 | model: 6 | name: continuous_a2c_logstd 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | continuous: 13 | mu_activation: None 14 | sigma_activation: None 15 | mu_init: 16 | name: default 17 | sigma_init: 18 | name: const_initializer 19 | val: 0 20 | fixed_sigma: True 21 | mlp: 22 | units: [256,128, 64] 23 | d2rl: False 24 | activation: elu 25 | initializer: 26 | name: default 27 | load_checkpoint: False 28 | load_path: './nn/walker_hc.pth' 29 | 30 | config: 31 | env_name: BipedalWalkerHardcore-v3 32 | name: walker_hc 33 | reward_shaper: 34 | min_val: -1 35 | scale_value: 0.1 36 | 37 | normalize_advantage: True 38 | gamma: 0.995 39 | tau: 0.95 40 | learning_rate: 5e-4 41 | lr_schedule: adaptive 42 | kl_threshold: 0.008 43 | score_to_win: 300 44 | grad_norm: 1.5 45 | save_best_after: 10 46 | entropy_coef: 0 47 | truncate_grads: True 48 | e_clip: 0.2 49 | clip_value: False 50 | num_actors: 16 51 | horizon_length: 4096 52 | minibatch_size: 8192 53 | mini_epochs: 4 54 | critic_coef: 1 55 | normalize_input: True 56 | seq_length: 4 57 | bounds_loss_coef: 0.0 58 | max_epochs: 100000 59 | weight_decay: 0 60 | 61 | player: 62 | render: False 63 | games_num: 200 64 | determenistic: True 65 | 66 | -------------------------------------------------------------------------------- /rl_games/configs/ppo_walker_tcnn.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 8 3 | algo: 4 | name: a2c_continuous 5 | 6 | model: 7 | name: continuous_a2c_logstd 8 | 9 | network: 10 | name: tcnnnet 11 | 12 | encoding: 13 | otype: "Identity" 14 | 15 | network: 16 | type: "FullyFusedMLP" 17 | activation: "ReLU" 18 | output_activation: "None" 19 | n_neurons: 64 20 | n_hidden_layers: 3 21 | 22 | config: 23 | env_name: BipedalWalker-v3 24 | name: walker_tcnn 25 | reward_shaper: 26 | min_val: -1 27 | scale_value: 0.1 28 | 29 | normalize_advantage: True 30 | gamma: 0.995 31 | tau: 0.95 32 | learning_rate: 3e-4 33 | lr_schedule: adaptive 34 | kl_threshold: 0.008 35 | save_best_after: 10 36 | score_to_win: 300 37 | grad_norm: 1.5 38 | entropy_coef: 0 39 | truncate_grads: True 40 | e_clip: 0.2 41 | clip_value: False 42 | num_actors: 16 43 | horizon_length: 4096 44 | minibatch_size: 8192 45 | mini_epochs: 4 46 | critic_coef: 2 47 | normalize_input: True 48 | bounds_loss_coef: 0.00 49 | max_epochs: 10000 50 | normalize_value: True 51 | use_diagnostics: True 52 | value_bootstrap: True 53 | #weight_decay: 0.0001 54 | 55 | player: 56 | render: True 57 | determenistic: True 58 | games_num: 200 59 | -------------------------------------------------------------------------------- /rl_games/configs/procgen/ppo_coinrun.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: resnet_actor_critic 10 | separate: False 11 | value_shape: 1 12 | space: 13 | discrete: 14 | 15 | cnn: 16 | conv_depths: [16, 32, 32] 17 | activation: elu 18 | initializer: 19 | name: default 20 | 21 | mlp: 22 | units: [512] 23 | activation: elu 24 | initializer: 25 | name: default 26 | rnn1: 27 | name: lstm 28 | units: 256 29 | layers: 1 30 | config: 31 | reward_shaper: 32 | max_val: 10 33 | 34 | normalize_advantage: True 35 | gamma: 0.999 36 | tau: 0.95 37 | learning_rate: 1e-4 38 | name: atari 39 | score_to_win: 900 40 | grad_norm: 0.5 41 | entropy_coef: 0.001 42 | truncate_grads: True 43 | env_name: openai_gym #'PongNoFrameskip-v4' 44 | e_clip: 0.2 45 | clip_value: True 46 | num_actors: 16 47 | horizon_length: 256 48 | minibatch_size: 1024 49 | mini_epochs: 3 50 | critic_coef: 1 51 | lr_schedule: polynom_decay 52 | kl_threshold: 0.01 53 | normalize_input: False 54 | seq_length: 4 55 | max_epochs: 2000 56 | env_config: 57 | name: "procgen:procgen-coinrun-v0" 58 | procgen: True 59 | frames: 4 60 | num_levels: 1000 61 | start_level: 323 62 | limit_steps: True 63 | distribution_mode: 'easy' 64 | -------------------------------------------------------------------------------- /rl_games/configs/smac/10m_vs_11m_torch.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: layer_norm 12 | space: 13 | discrete: 14 | 15 | cnn: 16 | type: conv1d 17 | activation: relu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: 'None' 22 | convs: 23 | - filters: 256 24 | kernel_size: 3 25 | strides: 1 26 | padding: 1 27 | - filters: 512 28 | kernel_size: 3 29 | strides: 1 30 | padding: 1 31 | - filters: 1024 32 | kernel_size: 3 33 | strides: 1 34 | padding: 1 35 | mlp: 36 | units: [256, 128] 37 | activation: relu 38 | initializer: 39 | name: default 40 | regularizer: 41 | name: None 42 | config: 43 | name: 10m 44 | reward_shaper: 45 | scale_value: 1 46 | normalize_advantage: True 47 | gamma: 0.99 48 | tau: 0.95 49 | learning_rate: 1e-4 50 | score_to_win: 20 51 | grad_norm: 0.5 52 | entropy_coef: 0.005 53 | truncate_grads: True 54 | env_name: smac_cnn 55 | e_clip: 0.2 56 | clip_value: True 57 | num_actors: 8 58 | horizon_length: 128 59 | minibatch_size: 2560 60 | mini_epochs: 4 61 | critic_coef: 2 62 | lr_schedule: None 63 | kl_threshold: 0.05 64 | normalize_input: True 65 | seq_length: 2 66 | use_action_masks: True 67 | env_config: 68 | name: 10m_vs_11m 69 | frames: 14 70 | transpose: False 71 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/smac/27m_vs_30m_torch.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: layer_norm 12 | space: 13 | discrete: 14 | 15 | cnn: 16 | type: conv1d 17 | activation: relu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: None 22 | convs: 23 | - filters: 256 24 | kernel_size: 3 25 | strides: 1 26 | padding: 1 27 | - filters: 512 28 | kernel_size: 3 29 | strides: 1 30 | padding: 1 31 | - filters: 1024 32 | kernel_size: 3 33 | strides: 1 34 | padding: 1 35 | mlp: 36 | units: [256, 128] 37 | activation: relu 38 | initializer: 39 | name: default 40 | regularizer: 41 | name: 'None' 42 | config: 43 | name: 27m 44 | reward_shaper: 45 | scale_value: 1 46 | normalize_advantage: True 47 | gamma: 0.99 48 | tau: 0.95 49 | learning_rate: 1e-4 50 | score_to_win: 20 51 | grad_norm: 0.5 52 | entropy_coef: 0.005 53 | truncate_grads: True 54 | env_name: smac_cnn 55 | e_clip: 0.2 56 | clip_value: True 57 | num_actors: 8 58 | horizon_length: 128 59 | minibatch_size: 3456 60 | mini_epochs: 4 61 | critic_coef: 2 62 | lr_schedule: None 63 | kl_threshold: 0.05 64 | normalize_input: True 65 | seq_length: 2 66 | use_action_masks: True 67 | 68 | env_config: 69 | name: 27m_vs_30m 70 | frames: 4 71 | transpose: False 72 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/smac/2m_vs_1z.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | discrete: 13 | mlp: 14 | units: [256, 128] 15 | activation: relu 16 | initializer: 17 | name: default 18 | regularizer: 19 | name: 'None' 20 | config: 21 | name: 2s_vs_1z 22 | reward_shaper: 23 | scale_value: 1 24 | normalize_advantage: True 25 | gamma: 0.99 26 | tau: 0.95 27 | learning_rate: 5e-4 28 | score_to_win: 1000 29 | grad_norm: 0.5 30 | entropy_coef: 0.005 31 | truncate_grads: True 32 | env_name: smac 33 | e_clip: 0.2 34 | clip_value: True 35 | num_actors: 8 36 | horizon_length: 128 37 | minibatch_size: 1024 38 | mini_epochs: 4 39 | critic_coef: 1 40 | lr_schedule: None 41 | kl_threshold: 0.05 42 | normalize_input: True 43 | seq_length: 4 44 | use_action_masks: True 45 | 46 | env_config: 47 | name: 2m_vs_1z 48 | frames: 1 49 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/smac/2m_vs_1z_torch.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | discrete: 13 | 14 | mlp: 15 | units: [256, 128] 16 | activation: relu 17 | initializer: 18 | name: default 19 | regularizer: 20 | name: 'None' 21 | config: 22 | name: 2m_vs_1z 23 | reward_shaper: 24 | scale_value: 1 25 | normalize_advantage: True 26 | gamma: 0.99 27 | tau: 0.95 28 | learning_rate: 5e-4 29 | score_to_win: 1000 30 | grad_norm: 0.5 31 | entropy_coef: 0.005 32 | truncate_grads: True 33 | env_name: smac 34 | e_clip: 0.2 35 | clip_value: True 36 | num_actors: 8 37 | horizon_length: 128 38 | minibatch_size: 1024 39 | mini_epochs: 4 40 | critic_coef: 1 41 | lr_schedule: None 42 | kl_threshold: 0.05 43 | normalize_input: True 44 | seq_length: 4 45 | use_action_masks: True 46 | 47 | env_config: 48 | name: 2m_vs_1z 49 | frames: 1 50 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/smac/2s_vs_1c.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c_lstm 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | discrete: 13 | 14 | mlp: 15 | units: [256, 128] 16 | activation: relu 17 | initializer: 18 | name: default 19 | regularizer: 20 | name: 'None' 21 | lstm: 22 | units: 128 23 | concated: False 24 | config: 25 | name: 2m_vs_1z 26 | reward_shaper: 27 | scale_value: 1 28 | normalize_advantage: True 29 | gamma: 0.99 30 | tau: 0.95 31 | learning_rate: 1e-4 32 | score_to_win: 1000 33 | grad_norm: 0.5 34 | entropy_coef: 0.005 35 | truncate_grads: True 36 | env_name: smac 37 | e_clip: 0.2 38 | clip_value: True 39 | num_actors: 8 40 | horizon_length: 128 41 | minibatch_size: 1024 42 | mini_epochs: 4 43 | critic_coef: 1 44 | lr_schedule: None 45 | kl_threshold: 0.05 46 | normalize_input: False 47 | seq_length: 4 48 | use_action_masks: True 49 | 50 | env_config: 51 | name: 2m_vs_1z 52 | frames: 1 53 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/smac/3m_cnn_torch.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: layer_norm 12 | space: 13 | discrete: 14 | 15 | cnn: 16 | type: conv1d 17 | 18 | activation: relu 19 | initializer: 20 | name: glorot_uniform_initializer 21 | gain: 1 22 | regularizer: 23 | name: 'None' 24 | convs: 25 | - filters: 64 26 | kernel_size: 3 27 | strides: 2 28 | padding: 1 29 | - filters: 128 30 | kernel_size: 3 31 | strides: 1 32 | padding: 0 33 | - filters: 256 34 | kernel_size: 3 35 | strides: 1 36 | padding: 0 37 | mlp: 38 | units: [256, 128] 39 | activation: relu 40 | initializer: 41 | name: glorot_uniform_initializer 42 | gain: 1 43 | regularizer: 44 | name: 'None' 45 | config: 46 | name: 3m 47 | reward_shaper: 48 | scale_value: 1 49 | normalize_advantage: True 50 | gamma: 0.99 51 | tau: 0.95 52 | learning_rate: 5e-4 53 | score_to_win: 20 54 | grad_norm: 0.5 55 | entropy_coef: 0.005 56 | truncate_grads: True 57 | env_name: smac_cnn 58 | e_clip: 0.2 59 | clip_value: True 60 | num_actors: 8 61 | horizon_length: 128 62 | minibatch_size: 1536 63 | mini_epochs: 1 64 | critic_coef: 1 65 | lr_schedule: None 66 | kl_threshold: 0.05 67 | normalize_input: True 68 | seq_length: 2 69 | use_action_masks: True 70 | 71 | env_config: 72 | name: 3m 73 | frames: 4 74 | transpose: True 75 | random_invalid_step: True 76 | 77 | -------------------------------------------------------------------------------- /rl_games/configs/smac/3m_torch.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: layer_norm 12 | space: 13 | discrete: 14 | 15 | mlp: 16 | units: [256, 128] 17 | activation: relu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: None 22 | config: 23 | name: 3m 24 | reward_shaper: 25 | scale_value: 1 26 | normalize_advantage: True 27 | gamma: 0.99 28 | tau: 0.95 29 | learning_rate: 5e-4 30 | score_to_win: 20 31 | grad_norm: 0.5 32 | entropy_coef: 0.001 33 | truncate_grads: True 34 | env_name: smac 35 | e_clip: 0.2 36 | clip_value: True 37 | num_actors: 8 38 | horizon_length: 128 39 | minibatch_size: 1536 40 | mini_epochs: 4 41 | critic_coef: 1 42 | lr_schedule: None 43 | kl_threshold: 0.05 44 | normalize_input: True 45 | use_action_masks: True 46 | ignore_dead_batches : False 47 | 48 | env_config: 49 | name: 3m 50 | frames: 1 51 | transpose: False 52 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/smac/3m_torch_rnn.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: layer_norm 12 | space: 13 | discrete: 14 | 15 | mlp: 16 | units: [256, 128] 17 | activation: relu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: 'None' 22 | rnn: 23 | name: lstm 24 | units: 128 25 | layers: 1 26 | config: 27 | name: 3m 28 | reward_shaper: 29 | scale_value: 1 30 | normalize_advantage: True 31 | gamma: 0.99 32 | tau: 0.95 33 | learning_rate: 5e-4 34 | score_to_win: 20 35 | grad_norm: 0.5 36 | entropy_coef: 0.001 37 | truncate_grads: True 38 | env_name: smac 39 | e_clip: 0.2 40 | clip_value: True 41 | num_actors: 8 42 | horizon_length: 128 43 | minibatch_size: 1536 44 | mini_epochs: 4 45 | critic_coef: 1 46 | lr_schedule: None 47 | kl_threshold: 0.05 48 | normalize_input: True 49 | seq_length: 4 50 | use_action_masks: True 51 | ignore_dead_batches : False 52 | 53 | env_config: 54 | name: 3m 55 | frames: 1 56 | transpose: False 57 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/smac/3s_vs_4z.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c_lstm 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | discrete: 13 | 14 | mlp: 15 | units: [256, 128] 16 | activation: relu 17 | initializer: 18 | name: default 19 | regularizer: 20 | name: 'None' 21 | lstm: 22 | units: 128 23 | concated: False 24 | config: 25 | name: sc2_fc 26 | reward_shaper: 27 | scale_value: 1 28 | normalize_advantage: True 29 | gamma: 0.99 30 | tau: 0.95 31 | learning_rate: 1e-4 32 | score_to_win: 1000 33 | grad_norm: 0.5 34 | entropy_coef: 0.005 35 | truncate_grads: True 36 | env_name: smac 37 | e_clip: 0.2 38 | clip_value: True 39 | num_actors: 8 40 | horizon_length: 64 41 | minibatch_size: 1536 42 | mini_epochs: 8 43 | critic_coef: 1 44 | lr_schedule: None 45 | kl_threshold: 0.05 46 | normalize_input: False 47 | seq_length: 4 48 | use_action_masks: True 49 | 50 | env_config: 51 | name: 3s_vs_4z 52 | frames: 1 53 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/smac/3s_vs_5z.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c_lstm 7 | 8 | 9 | network: 10 | name: actor_critic 11 | separate: True 12 | space: 13 | discrete: 14 | 15 | mlp: 16 | units: [256, 128] 17 | activation: relu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: 'None' 22 | lstm: 23 | units: 128 24 | concated: False 25 | config: 26 | name: 3s_vs_5z 27 | reward_shaper: 28 | scale_value: 1 29 | normalize_advantage: True 30 | gamma: 0.99 31 | tau: 0.95 32 | learning_rate: 1e-4 33 | score_to_win: 1000 34 | grad_norm: 0.5 35 | entropy_coef: 0.001 36 | truncate_grads: True 37 | env_name: smac 38 | e_clip: 0.2 39 | clip_value: True 40 | num_actors: 8 41 | horizon_length: 128 42 | minibatch_size: 1536 #1024 43 | mini_epochs: 4 44 | critic_coef: 1 45 | lr_schedule: None 46 | kl_threshold: 0.05 47 | normalize_input: False 48 | seq_length: 4 49 | use_action_masks: True 50 | 51 | env_config: 52 | name: 3s_vs_5z 53 | frames: 1 54 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/smac/3s_vs_5z_torch_lstm.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | normalization: layer_norm 12 | space: 13 | discrete: 14 | 15 | mlp: 16 | units: [256, 128] 17 | activation: relu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: None 22 | rnn: 23 | name: lstm 24 | units: 64 25 | layers: 1 26 | before_mlp: False 27 | 28 | config: 29 | name: 3s_vs_5z 30 | reward_shaper: 31 | scale_value: 1 32 | normalize_advantage: True 33 | gamma: 0.99 34 | tau: 0.95 35 | learning_rate: 1e-4 36 | score_to_win: 1000 37 | grad_norm: 0.5 38 | entropy_coef: 0.01 39 | truncate_grads: True 40 | env_name: smac 41 | e_clip: 0.2 42 | clip_value: True 43 | num_actors: 8 44 | horizon_length: 256 45 | minibatch_size: 1536 #1024 46 | mini_epochs: 4 47 | critic_coef: 1 48 | lr_schedule: None 49 | kl_threshold: 0.05 50 | normalize_input: True 51 | seq_length: 32 52 | use_action_masks: True 53 | max_epochs: 20000 54 | env_config: 55 | name: 3s_vs_5z 56 | frames: 1 57 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/smac/3s_vs_5z_torch_lstm2.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | discrete: 13 | 14 | mlp: 15 | units: [256, 128] 16 | activation: relu 17 | initializer: 18 | name: default 19 | regularizer: 20 | name: 'None' 21 | rnn: 22 | name: lstm 23 | units: 128 24 | layers: 1 25 | before_mlp: False 26 | config: 27 | name: 3s_vs_5z2 28 | reward_shaper: 29 | scale_value: 1 30 | normalize_advantage: True 31 | gamma: 0.99 32 | tau: 0.95 33 | learning_rate: 1e-4 34 | score_to_win: 1000 35 | grad_norm: 0.5 36 | entropy_coef: 0.005 37 | truncate_grads: True 38 | env_name: smac 39 | e_clip: 0.2 40 | clip_value: True 41 | num_actors: 8 42 | horizon_length: 128 43 | minibatch_size: 1536 #1024 44 | mini_epochs: 4 45 | critic_coef: 1 46 | lr_schedule: None 47 | kl_threshold: 0.05 48 | normalize_input: False 49 | seq_length: 4 50 | use_action_masks: True 51 | max_epochs: 20000 52 | env_config: 53 | name: 3s_vs_5z 54 | frames: 1 55 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/smac/5m_vs_6m_rnn.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: layer_norm 12 | space: 13 | discrete: 14 | 15 | mlp: 16 | units: [512, 256] 17 | activation: relu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: 'None' 22 | rnn: 23 | name: lstm 24 | units: 128 25 | layers: 1 26 | layer_norm: True 27 | config: 28 | name: 5m_vs_6m_rnn 29 | reward_shaper: 30 | scale_value: 1 31 | normalize_advantage: True 32 | gamma: 0.99 33 | tau: 0.95 34 | learning_rate: 1e-4 35 | score_to_win: 20 36 | entropy_coef: 0.005 37 | truncate_grads: True 38 | grad_norm: 1.5 39 | env_name: smac 40 | e_clip: 0.2 41 | clip_value: True 42 | num_actors: 8 43 | horizon_length: 128 44 | minibatch_size: 2560 # 5 * 512 45 | mini_epochs: 4 46 | critic_coef: 1 47 | lr_schedule: None 48 | kl_threshold: 0.05 49 | normalize_input: True 50 | normalize_value: False 51 | use_action_masks: True 52 | seq_length: 8 53 | #max_epochs: 10000 54 | env_config: 55 | name: 5m_vs_6m 56 | central_value: False 57 | reward_only_positive: True 58 | obs_last_action: True 59 | apply_agent_ids: False 60 | 61 | player: 62 | render: False 63 | games_num: 200 64 | n_game_life: 1 65 | determenistic: True 66 | 67 | #reward_negative_scale: 0.1 -------------------------------------------------------------------------------- /rl_games/configs/smac/5m_vs_6m_torch.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | load_checkpoint: False 9 | load_path: 'nn/5msmac_cnn.pth' 10 | 11 | network: 12 | name: actor_critic 13 | separate: True 14 | #normalization: layer_norm 15 | space: 16 | discrete: 17 | 18 | cnn: 19 | type: conv1d 20 | activation: relu 21 | initializer: 22 | name: default 23 | regularizer: 24 | name: 'None' 25 | convs: 26 | - filters: 256 27 | kernel_size: 3 28 | strides: 1 29 | padding: 1 30 | - filters: 512 31 | kernel_size: 3 32 | strides: 1 33 | padding: 1 34 | - filters: 1024 35 | kernel_size: 3 36 | strides: 1 37 | padding: 1 38 | mlp: 39 | units: [256, 128] 40 | activation: relu 41 | initializer: 42 | name: default 43 | regularizer: 44 | name: 'None' 45 | config: 46 | name: 5m 47 | reward_shaper: 48 | scale_value: 1 49 | normalize_advantage: True 50 | gamma: 0.99 51 | tau: 0.95 52 | learning_rate: 1e-4 53 | score_to_win: 20 54 | grad_norm: 0.5 55 | entropy_coef: 0.005 56 | truncate_grads: True 57 | env_name: smac_cnn 58 | e_clip: 0.2 59 | clip_value: True 60 | num_actors: 8 61 | horizon_length: 128 62 | minibatch_size: 2560 63 | mini_epochs: 4 64 | critic_coef: 2 65 | lr_schedule: None 66 | kl_threshold: 0.05 67 | normalize_input: True 68 | seq_length: 2 69 | use_action_masks: True 70 | env_config: 71 | name: 5m_vs_6m 72 | frames: 4 73 | transpose: False 74 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/smac/6h_vs_8z_torch.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: layer_norm 12 | space: 13 | discrete: 14 | mlp: 15 | units: [256, 256] 16 | activation: relu 17 | initializer: 18 | name: default 19 | regularizer: 20 | name: 'None' 21 | 22 | config: 23 | name: 6h_vs_8z_separate 24 | reward_shaper: 25 | scale_value: 1 26 | normalize_advantage: True 27 | gamma: 0.99 28 | tau: 0.95 29 | learning_rate: 5e-4 30 | score_to_win: 20 31 | grad_norm: 0.5 32 | entropy_coef: 0.002 33 | truncate_grads: True 34 | env_name: smac 35 | e_clip: 0.2 36 | clip_value: True 37 | num_actors: 8 38 | horizon_length: 128 39 | minibatch_size: 3072 # 6 * 512 40 | mini_epochs: 2 41 | critic_coef: 1 42 | lr_schedule: None 43 | kl_threshold: 0.05 44 | normalize_input: True 45 | use_action_masks: True 46 | ignore_dead_batches : False 47 | 48 | env_config: 49 | name: 6h_vs_8z 50 | central_value: False 51 | reward_only_positive: False 52 | obs_last_action: True 53 | frames: 1 54 | #flatten: False -------------------------------------------------------------------------------- /rl_games/configs/smac/8m_torch.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: layer_norm 12 | space: 13 | discrete: 14 | 15 | mlp: 16 | units: [256, 128] 17 | activation: relu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: None 22 | config: 23 | name: 8m 24 | reward_shaper: 25 | scale_value: 1 26 | normalize_advantage: True 27 | gamma: 0.99 28 | tau: 0.95 29 | learning_rate: 5e-4 30 | score_to_win: 20 31 | grad_norm: 0.5 32 | entropy_coef: 0.001 33 | truncate_grads: True 34 | env_name: smac 35 | e_clip: 0.2 36 | clip_value: True 37 | num_actors: 8 38 | horizon_length: 128 39 | minibatch_size: 4096 40 | mini_epochs: 4 41 | critic_coef: 1 42 | lr_schedule: None 43 | kl_threshold: 0.05 44 | normalize_input: True 45 | seq_length: 2 46 | use_action_masks: True 47 | ignore_dead_batches : False 48 | max_epochs: 10000 49 | env_config: 50 | name: 8m 51 | frames: 1 52 | transpose: False 53 | random_invalid_step: False -------------------------------------------------------------------------------- /rl_games/configs/smac/MMM2_torch.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: layer_norm 12 | space: 13 | discrete: 14 | 15 | cnn: 16 | type: conv1d 17 | activation: relu 18 | initializer: 19 | name: default 20 | scale: 1.3 21 | regularizer: 22 | name: 'None' 23 | convs: 24 | - filters: 64 25 | kernel_size: 3 26 | strides: 2 27 | padding: 0 28 | - filters: 128 29 | kernel_size: 3 30 | strides: 1 31 | padding: 0 32 | - filters: 256 33 | kernel_size: 3 34 | strides: 1 35 | padding: 0 36 | mlp: 37 | units: [256, 128] 38 | activation: relu 39 | initializer: 40 | name: default 41 | regularizer: 42 | name: 'None' 43 | config: 44 | name: MMM2_cnn 45 | reward_shaper: 46 | scale_value: 1.3 47 | normalize_advantage: True 48 | gamma: 0.99 49 | tau: 0.95 50 | learning_rate: 1e-4 51 | score_to_win: 20 52 | grad_norm: 0.5 53 | entropy_coef: 0.005 54 | truncate_grads: True 55 | env_name: smac_cnn 56 | e_clip: 0.2 57 | clip_value: True 58 | num_actors: 8 59 | horizon_length: 64 60 | minibatch_size: 2560 61 | mini_epochs: 1 62 | critic_coef: 2 63 | lr_schedule: None 64 | kl_threshold: 0.05 65 | normalize_input: False 66 | use_action_masks: True 67 | 68 | env_config: 69 | name: MMM2 70 | frames: 4 71 | transpose: False # for pytorch transpose == not Transpose in tf 72 | random_invalid_step: False 73 | replay_save_freq: 100 -------------------------------------------------------------------------------- /rl_games/configs/smac/runs/2c_vs_64zg_neg.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | discrete: 13 | 14 | mlp: 15 | units: [512, 256, 128] 16 | activation: relu 17 | initializer: 18 | name: default 19 | regularizer: 20 | name: 'None' 21 | config: 22 | name: 2c_vs_64zg_neg 23 | reward_shaper: 24 | scale_value: 1 25 | normalize_advantage: True 26 | gamma: 0.99 27 | tau: 0.95 28 | learning_rate: 5e-4 29 | score_to_win: 1000 30 | grad_norm: 0.5 31 | entropy_coef: 0.005 32 | truncate_grads: True 33 | env_name: smac 34 | e_clip: 0.2 35 | clip_value: True 36 | num_actors: 8 37 | horizon_length: 128 38 | minibatch_size: 1024 39 | mini_epochs: 4 40 | critic_coef: 1 41 | lr_schedule: None 42 | kl_threshold: 0.05 43 | normalize_input: True 44 | use_action_masks: True 45 | 46 | env_config: 47 | name: 2c_vs_64zg 48 | frames: 1 49 | random_invalid_step: False 50 | central_value: True 51 | reward_only_positive: False 52 | state_last_action: True 53 | 54 | central_value_config: 55 | minibatch_size: 512 56 | mini_epochs: 4 57 | learning_rate: 5e-4 58 | clip_value: False 59 | normalize_input: True 60 | network: 61 | name: actor_critic 62 | central_value: True 63 | mlp: 64 | units: [512, 256, 128] 65 | activation: relu 66 | initializer: 67 | name: default 68 | scale: 2 69 | regularizer: 70 | name: 'None' 71 | -------------------------------------------------------------------------------- /rl_games/configs/smac/runs/2s_vs_1c.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | 9 | network: 10 | name: actor_critic 11 | separate: True 12 | space: 13 | discrete: 14 | 15 | mlp: 16 | units: [256, 128] 17 | activation: relu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: 'None' 22 | config: 23 | name: 2s_vs_1sc_cv_neg 24 | reward_shaper: 25 | scale_value: 1 26 | normalize_advantage: True 27 | gamma: 0.99 28 | tau: 0.95 29 | learning_rate: 5e-4 30 | score_to_win: 1000 31 | grad_norm: 0.5 32 | entropy_coef: 0.005 33 | truncate_grads: True 34 | env_name: smac 35 | e_clip: 0.2 36 | clip_value: True 37 | num_actors: 8 38 | horizon_length: 128 39 | minibatch_size: 1024 40 | mini_epochs: 4 41 | critic_coef: 1 42 | lr_schedule: None 43 | kl_threshold: 0.05 44 | normalize_input: True 45 | use_action_masks: True 46 | 47 | env_config: 48 | name: 2s_vs_1sc 49 | frames: 1 50 | random_invalid_step: False 51 | central_value: True 52 | reward_only_positive: True 53 | state_last_action: True 54 | 55 | central_value_config: 56 | minibatch_size: 512 57 | mini_epochs: 4 58 | learning_rate: 5e-4 59 | clip_value: False 60 | normalize_input: True 61 | network: 62 | name: actor_critic 63 | central_value: True 64 | mlp: 65 | units: [256, 128] 66 | activation: relu 67 | initializer: 68 | name: default 69 | scale: 2 70 | regularizer: 71 | name: 'None' 72 | -------------------------------------------------------------------------------- /rl_games/configs/smac/runs/2s_vs_1c_neg.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | space: 12 | discrete: 13 | 14 | mlp: 15 | units: [256, 128] 16 | activation: relu 17 | initializer: 18 | name: default 19 | regularizer: 20 | name: None 21 | 22 | config: 23 | name: 2s_vs_1sc_cv_neg 24 | reward_shaper: 25 | scale_value: 1 26 | normalize_advantage: True 27 | gamma: 0.99 28 | tau: 0.95 29 | learning_rate: 5e-4 30 | score_to_win: 1000 31 | grad_norm: 0.5 32 | entropy_coef: 0.005 33 | truncate_grads: True 34 | env_name: smac 35 | e_clip: 0.2 36 | clip_value: True 37 | num_actors: 8 38 | horizon_length: 128 39 | minibatch_size: 1024 40 | mini_epochs: 4 41 | critic_coef: 1 42 | lr_schedule: None 43 | kl_threshold: 0.05 44 | normalize_input: True 45 | use_action_masks: True 46 | 47 | env_config: 48 | name: 2s_vs_1sc 49 | frames: 1 50 | random_invalid_step: False 51 | central_value: True 52 | reward_only_positive: False 53 | state_last_action: True 54 | 55 | central_value_config: 56 | minibatch_size: 512 57 | mini_epochs: 4 58 | learning_rate: 5e-4 59 | clip_value: False 60 | normalize_input: True 61 | network: 62 | name: actor_critic 63 | central_value: True 64 | mlp: 65 | units: [256, 128] 66 | activation: relu 67 | initializer: 68 | name: default 69 | scale: 2 70 | regularizer: 71 | name: None -------------------------------------------------------------------------------- /rl_games/configs/test/test_asymmetric_discrete_mhv_mops.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: testnet 10 | config: 11 | reward_shaper: 12 | scale_value: 1 13 | normalize_advantage: True 14 | gamma: 0.99 15 | tau: 0.9 16 | learning_rate: 2e-4 17 | name: test_md_multi_obs 18 | score_to_win: 0.95 19 | grad_norm: 10.5 20 | entropy_coef: 0.005 21 | truncate_grads: True 22 | env_name: test_env 23 | e_clip: 0.2 24 | clip_value: False 25 | num_actors: 16 26 | horizon_length: 256 27 | minibatch_size: 2048 28 | mini_epochs: 4 29 | critic_coef: 1 30 | lr_schedule: None 31 | kl_threshold: 0.008 32 | normalize_input: False 33 | normalize_value: False 34 | weight_decay: 0.0000 35 | max_epochs: 10000 36 | seq_length: 16 37 | save_best_after: 10 38 | save_frequency: 20 39 | 40 | env_config: 41 | name: TestRnnEnv-v0 42 | hide_object: False 43 | apply_dist_reward: False 44 | min_dist: 2 45 | max_dist: 8 46 | use_central_value: True 47 | multi_obs_space: True 48 | multi_head_value: False 49 | player: 50 | games_num: 100 51 | determenistic: True 52 | 53 | central_value_config: 54 | minibatch_size: 512 55 | mini_epochs: 4 56 | learning_rate: 5e-4 57 | clip_value: False 58 | normalize_input: False 59 | truncate_grads: True 60 | grad_norm: 10 61 | network: 62 | name: testnet 63 | central_value: True 64 | mlp: 65 | units: [64,32] 66 | activation: relu 67 | initializer: 68 | name: default -------------------------------------------------------------------------------- /rl_games/configs/test/test_discrete.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: 'layer_norm' 12 | space: 13 | discrete: 14 | 15 | mlp: 16 | units: [32,32] 17 | activation: relu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: None 22 | 23 | config: 24 | reward_shaper: 25 | scale_value: 1 26 | normalize_advantage: True 27 | gamma: 0.99 28 | tau: 0.9 29 | learning_rate: 2e-4 30 | name: test_md 31 | score_to_win: 0.95 32 | grad_norm: 10.5 33 | entropy_coef: 0.005 34 | truncate_grads: True 35 | env_name: test_env 36 | e_clip: 0.2 37 | clip_value: False 38 | num_actors: 16 39 | horizon_length: 512 40 | minibatch_size: 2048 41 | mini_epochs: 4 42 | critic_coef: 1 43 | lr_schedule: None 44 | kl_threshold: 0.008 45 | normalize_input: True 46 | weight_decay: 0.0000 47 | max_epochs: 10000 48 | 49 | env_config: 50 | name: TestRnnEnv-v0 51 | hide_object: False 52 | apply_dist_reward: True 53 | min_dist: 2 54 | max_dist: 8 55 | use_central_value: True 56 | multi_discrete_space: False 57 | multi_head_value: False 58 | player: 59 | games_num: 100 60 | determenistic: True 61 | 62 | -------------------------------------------------------------------------------- /rl_games/configs/test/test_discrete_multidiscrete_mhv.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: multi_discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: 'layer_norm' 12 | space: 13 | multi_discrete: 14 | 15 | mlp: 16 | units: [32,32] 17 | activation: relu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: None 22 | 23 | config: 24 | reward_shaper: 25 | scale_value: 1 26 | normalize_advantage: True 27 | gamma: 0.99 28 | tau: 0.9 29 | learning_rate: 2e-4 30 | name: test_md_mhv 31 | score_to_win: 0.95 32 | grad_norm: 10.5 33 | entropy_coef: 0.005 34 | truncate_grads: True 35 | env_name: test_env 36 | e_clip: 0.2 37 | clip_value: False 38 | num_actors: 16 39 | horizon_length: 512 40 | minibatch_size: 2048 41 | mini_epochs: 4 42 | critic_coef: 1 43 | lr_schedule: None 44 | kl_threshold: 0.008 45 | normalize_input: False 46 | weight_decay: 0.0000 47 | max_epochs: 10000 48 | 49 | env_config: 50 | name: TestRnnEnv-v0 51 | hide_object: False 52 | apply_dist_reward: False 53 | min_dist: 2 54 | max_dist: 8 55 | use_central_value: False 56 | multi_discrete_space: True 57 | multi_head_value: False 58 | player: 59 | games_num: 100 60 | determenistic: True 61 | 62 | -------------------------------------------------------------------------------- /rl_games/configs/test/test_ppo_walker_truncated_time.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 8 3 | algo: 4 | name: a2c_continuous 5 | 6 | model: 7 | name: continuous_a2c_logstd 8 | 9 | network: 10 | name: actor_critic 11 | separate: False 12 | space: 13 | continuous: 14 | mu_activation: None 15 | sigma_activation: None 16 | mu_init: 17 | name: default 18 | sigma_init: 19 | name: const_initializer 20 | val: 0 21 | fixed_sigma: True 22 | mlp: 23 | units: [256, 128, 64] 24 | d2rl: False 25 | activation: relu 26 | initializer: 27 | name: default 28 | scale: 2 29 | 30 | config: 31 | name: walker_truncated_step_1000 32 | reward_shaper: 33 | min_val: -1 34 | scale_value: 0.1 35 | 36 | normalize_input: True 37 | normalize_advantage: True 38 | normalize_value: True 39 | value_bootstrap: True 40 | gamma: 0.995 41 | tau: 0.95 42 | 43 | learning_rate: 3e-4 44 | lr_schedule: adaptive 45 | kl_threshold: 0.005 46 | 47 | score_to_win: 300 48 | grad_norm: 0.5 49 | entropy_coef: 0 50 | truncate_grads: True 51 | env_name: BipedalWalker-v3 52 | e_clip: 0.2 53 | clip_value: False 54 | num_actors: 16 55 | horizon_length: 256 56 | minibatch_size: 256 57 | mini_epochs: 4 58 | critic_coef: 2 59 | 60 | bounds_loss_coef: 0.00 61 | max_epochs: 10000 62 | #weight_decay: 0.0001 63 | 64 | env_config: 65 | steps_limit: 1000 66 | 67 | player: 68 | render: True 69 | determenistic: True 70 | games_num: 200 71 | -------------------------------------------------------------------------------- /rl_games/configs/test/test_rnn.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | algo: 3 | name: a2c_discrete 4 | 5 | model: 6 | name: discrete_a2c 7 | 8 | network: 9 | name: actor_critic 10 | separate: True 11 | #normalization: 'layer_norm' 12 | space: 13 | discrete: 14 | 15 | mlp: 16 | units: [64] 17 | activation: relu 18 | initializer: 19 | name: default 20 | regularizer: 21 | name: None 22 | rnn: 23 | name: lstm 24 | #layer_norm: True 25 | units: 64 26 | layers: 1 27 | before_mlp: False 28 | config: 29 | reward_shaper: 30 | scale_value: 1 31 | normalize_advantage: True 32 | gamma: 0.99 33 | tau: 0.9 34 | learning_rate: 2e-4 35 | name: test_rnn 36 | score_to_win: 0.95 37 | grad_norm: 10.5 38 | entropy_coef: 0.005 39 | truncate_grads: True 40 | env_name: test_env 41 | e_clip: 0.2 42 | clip_value: False 43 | num_actors: 16 44 | horizon_length: 512 45 | minibatch_size: 2048 46 | mini_epochs: 4 47 | critic_coef: 1 48 | lr_schedule: None 49 | kl_threshold: 0.008 50 | normalize_input: False 51 | seq_length: 32 52 | weight_decay: 0.0000 53 | max_epochs: 10000 54 | 55 | env_config: 56 | name: TestRnnEnv-v0 57 | hide_object: True 58 | apply_dist_reward: False 59 | min_dist: 2 60 | max_dist: 8 61 | use_central_value: False 62 | 63 | player: 64 | games_num: 100 65 | determenistic: True 66 | 67 | -------------------------------------------------------------------------------- /rl_games/configs/test/test_rnn_multidiscrete_mhv.yaml: -------------------------------------------------------------------------------- 1 | params: 2 | seed: 322 3 | algo: 4 | name: a2c_discrete 5 | 6 | model: 7 | name: multi_discrete_a2c 8 | 9 | network: 10 | name: actor_critic 11 | separate: True 12 | #normalization: 'layer_norm' 13 | space: 14 | multi_discrete: 15 | 16 | mlp: 17 | units: [64] 18 | activation: relu 19 | initializer: 20 | name: default 21 | regularizer: 22 | name: None 23 | rnn: 24 | name: lstm 25 | #layer_norm: True 26 | units: 64 27 | layers: 1 28 | before_mlp: False 29 | 30 | config: 31 | reward_shaper: 32 | scale_value: 1 33 | normalize_advantage: True 34 | gamma: 0.99 35 | tau: 0.9 36 | learning_rate: 2e-4 37 | name: test_rnn_md_mhv 38 | score_to_win: 0.99 39 | grad_norm: 10.5 40 | entropy_coef: 0.005 41 | truncate_grads: True 42 | env_name: test_env 43 | e_clip: 0.2 44 | clip_value: False 45 | num_actors: 16 46 | horizon_length: 512 47 | minibatch_size: 2048 48 | mini_epochs: 4 49 | critic_coef: 1 50 | lr_schedule: None 51 | kl_threshold: 0.008 52 | normalize_input: False 53 | normalize_value: True 54 | seq_length: 16 55 | weight_decay: 0.0000 56 | max_epochs: 10000 57 | 58 | env_config: 59 | name: TestRnnEnv-v0 60 | hide_object: True 61 | apply_dist_reward: True 62 | min_dist: 2 63 | max_dist: 8 64 | use_central_value: False 65 | multi_discrete_space: True 66 | multi_head_value: True 67 | player: 68 | games_num: 100 69 | determenistic: True 70 | 71 | -------------------------------------------------------------------------------- /rl_games/distributed/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/rl_games/distributed/__init__.py -------------------------------------------------------------------------------- /rl_games/envs/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from rl_games.envs.connect4_network import ConnectBuilder 4 | from rl_games.envs.test_network import TestNetBuilder 5 | from rl_games.algos_torch import model_builder 6 | 7 | model_builder.register_network('connect4net', ConnectBuilder) 8 | model_builder.register_network('testnet', TestNetBuilder) -------------------------------------------------------------------------------- /rl_games/envs/test/__init__.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | gym.envs.register( 4 | id='TestRnnEnv-v0', 5 | entry_point='rl_games.envs.test.rnn_env:TestRNNEnv', 6 | max_episode_steps=100500, 7 | ) 8 | 9 | gym.envs.register( 10 | id='TestAsymmetricEnv-v0', 11 | entry_point='rl_games.envs.test.test_asymmetric_env:TestAsymmetricCritic' 12 | ) -------------------------------------------------------------------------------- /rl_games/envs/test/example_env.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | 4 | 5 | class ExampleEnv(gym.Env): 6 | ''' 7 | Just example empty env which demonstrates additional features compared to the default openai gym 8 | ''' 9 | def __init__(self, **kwargs): 10 | gym.Env.__init__(self) 11 | 12 | self.use_central_value = True 13 | self.value_size = 2 14 | self.concat_infos = False 15 | self.action_space = gym.spaces.Tuple([gym.spaces.Discrete(2),gym.spaces.Discrete(3)]) # gym.spaces.Discrete(3), gym.spaces.Box(low=0, high=1, shape=(3, ), dtype=np.float32) 16 | self.observation_space = gym.spaces.Box(low=0, high=1, shape=(6, ), dtype=np.float32) # or Dict 17 | 18 | def get_number_of_agents(self): 19 | return 1 20 | 21 | def has_action_mask(self): 22 | return False 23 | 24 | def get_action_mask(self): 25 | pass -------------------------------------------------------------------------------- /rl_games/interfaces/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YingYuan0414/in-hand-rotation/1e5cc92864b87fbe60482a923002b5499ec5e799/rl_games/interfaces/__init__.py -------------------------------------------------------------------------------- /rl_games/interfaces/base_algorithm.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from abc import abstractmethod, abstractproperty 3 | 4 | class BaseAlgorithm(ABC): 5 | def __init__(self, base_name, config): 6 | pass 7 | 8 | @abstractproperty 9 | def device(self): 10 | pass 11 | 12 | @abstractmethod 13 | def clear_stats(self): 14 | pass 15 | 16 | @abstractmethod 17 | def train(self): 18 | pass 19 | 20 | @abstractmethod 21 | def train_epoch(self): 22 | pass 23 | 24 | @abstractmethod 25 | def get_full_state_weights(self): 26 | pass 27 | 28 | @abstractmethod 29 | def set_full_state_weights(self, weights): 30 | pass 31 | 32 | @abstractmethod 33 | def get_weights(self): 34 | pass 35 | 36 | @abstractmethod 37 | def set_weights(self, weights): 38 | pass 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /rl_games/networks/__init__.py: -------------------------------------------------------------------------------- 1 | from rl_games.networks.tcnn_mlp import TcnnNetBuilder 2 | from rl_games.algos_torch import model_builder 3 | 4 | model_builder.register_network('tcnnnet', TcnnNetBuilder) -------------------------------------------------------------------------------- /rl_games/networks/tcnn_mlp.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class TcnnNet(nn.Module): 7 | def __init__(self, params, **kwargs): 8 | import tinycudann as tcnn 9 | nn.Module.__init__(self) 10 | self.actions_num = actions_num = kwargs.pop('actions_num') 11 | input_shape = kwargs.pop('input_shape') 12 | num_inputs = input_shape[0] 13 | self.central_value = params.get('central_value', False) 14 | self.sigma = torch.nn.Parameter(torch.zeros(actions_num, requires_grad=True, dtype=torch.float32), 15 | requires_grad=True) 16 | self.model = tcnn.NetworkWithInputEncoding(n_input_dims=num_inputs, n_output_dims=actions_num+1, 17 | encoding_config=params["encoding"], network_config=params["network"]) 18 | def is_rnn(self): 19 | return False 20 | 21 | def forward(self, obs): 22 | obs = obs['obs'] 23 | mu_val = self.model(obs) 24 | mu, value = torch.split(mu_val, [self.actions_num, 1], dim=1) 25 | return mu, mu * 0.0 + self.sigma, value, None 26 | 27 | 28 | from rl_games.algos_torch.network_builder import NetworkBuilder 29 | 30 | 31 | class TcnnNetBuilder(NetworkBuilder): 32 | def __init__(self, **kwargs): 33 | NetworkBuilder.__init__(self) 34 | 35 | def load(self, params): 36 | self.params = params 37 | 38 | def build(self, name, **kwargs): 39 | return TcnnNet(self.params, **kwargs) 40 | 41 | def __call__(self, name, **kwargs): 42 | return self.build(name, **kwargs) 43 | -------------------------------------------------------------------------------- /scripts/bc_axis.sh: -------------------------------------------------------------------------------- 1 | GPUS=$1 2 | 3 | array=( $@ ) 4 | len=${#array[@]} 5 | EXTRA_ARGS=${array[@]:1:$len} 6 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 7 | 8 | CUDA_VISIBLE_DEVICES=${GPUS} \ 9 | python ./isaacgymenvs/train_distillation.py headless=True \ 10 | task.env.legacy_obs=False distill.bc_training=warmup \ 11 | task.env.objSet=C task.env.is_distillation=True \ 12 | task=AllegroArmMOAR task.env.numEnvs=64 \ 13 | train.params.config.minibatch_size=1024 \ 14 | train.params.config.central_value_config.minibatch_size=1024 \ 15 | task.env.observationType=full_stack_pointcloud \ 16 | distill.ablation_mode=multi-modality-plus \ 17 | distill.teacher_data_dir=demonstration-x-axis \ 18 | distill.student_logdir=runs/student/bc-x-multimodplus \ 19 | train.params.config.user_prefix=bc-x-multimodplus \ 20 | experiment=bc-x-multimodplus wandb_activate=True \ 21 | ${EXTRA_ARGS} -------------------------------------------------------------------------------- /scripts/bc_baoding.sh: -------------------------------------------------------------------------------- 1 | GPUS=$1 2 | 3 | array=( $@ ) 4 | len=${#array[@]} 5 | EXTRA_ARGS=${array[@]:1:$len} 6 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 7 | 8 | CUDA_VISIBLE_DEVICES=${GPUS} \ 9 | python ./isaacgymenvs/train_distillation.py headless=True \ 10 | task.env.legacy_obs=False distill.bc_training=warmup \ 11 | task.env.objSet=ball task.env.is_distillation=True \ 12 | task=AllegroArmMOAR task.env.numEnvs=64 \ 13 | train.params.config.minibatch_size=1024 \ 14 | train.params.config.central_value_config.minibatch_size=1024 \ 15 | task.env.observationType=full_stack_baoding \ 16 | distill.ablation_mode=multi-modality-plus \ 17 | distill.teacher_data_dir=demonstration-baoding \ 18 | distill.student_logdir=runs/student/bc-baoding-multimodplus \ 19 | train.params.config.user_prefix=bc-baoding-multimodplus \ 20 | experiment=bc-baoding-multimodplus wandb_activate=True \ 21 | ${EXTRA_ARGS} -------------------------------------------------------------------------------- /scripts/bc_cross.sh: -------------------------------------------------------------------------------- 1 | GPUS=$1 2 | 3 | array=( $@ ) 4 | len=${#array[@]} 5 | EXTRA_ARGS=${array[@]:1:$len} 6 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 7 | 8 | CUDA_VISIBLE_DEVICES=${GPUS} \ 9 | python ./isaacgymenvs/train_distillation.py headless=True \ 10 | task.env.legacy_obs=False distill.bc_training=warmup \ 11 | task.env.objSet=cross task.env.is_distillation=True \ 12 | task=AllegroArmMOAR task.env.numEnvs=64 \ 13 | train.params.config.minibatch_size=1024 \ 14 | train.params.config.central_value_config.minibatch_size=1024 \ 15 | task.env.observationType=full_stack \ 16 | distill.ablation_mode=multi-modality-plus \ 17 | distill.teacher_data_dir=demonstration-cross \ 18 | distill.student_logdir=runs/student/bc-cross-multimodplus \ 19 | train.params.config.user_prefix=bc-cross-multimodplus \ 20 | experiment=bc-cross-multimodplus wandb_activate=True \ 21 | ${EXTRA_ARGS} -------------------------------------------------------------------------------- /scripts/collect_axis.sh: -------------------------------------------------------------------------------- 1 | GPUS=$1 2 | 3 | array=( $@ ) 4 | len=${#array[@]} 5 | EXTRA_ARGS=${array[@]:1:$len} 6 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 7 | 8 | CUDA_VISIBLE_DEVICES=${GPUS} \ 9 | python ./isaacgymenvs/train_distillation.py headless=True \ 10 | distill.teacher_data_dir=demonstration-x-axis \ 11 | task.env.legacy_obs=False distill.bc_training=collect \ 12 | task.env.objSet=C task.env.is_distillation=True \ 13 | train.params.config.user_prefix=bc-x-collect task=AllegroArmMOAR \ 14 | task.env.numEnvs=64 train.params.config.minibatch_size=1024 \ 15 | experiment=bc-x-collect wandb_activate=False task.env.axis=x \ 16 | task.env.observationType=full_stack_pointcloud \ 17 | train.params.config.central_value_config.minibatch_size=1024 \ 18 | distill.worker_id=0 distill.ablation_mode=multi-modality-plus \ 19 | task.env.ablation_mode=multi-modality-plus \ 20 | ${EXTRA_ARGS} -------------------------------------------------------------------------------- /scripts/collect_baoding.sh: -------------------------------------------------------------------------------- 1 | GPUS=$1 2 | 3 | array=( $@ ) 4 | len=${#array[@]} 5 | EXTRA_ARGS=${array[@]:1:$len} 6 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 7 | 8 | CUDA_VISIBLE_DEVICES=${GPUS} \ 9 | python ./isaacgymenvs/train_distillation.py headless=True \ 10 | distill.teacher_data_dir=demonstration-baoding \ 11 | task.env.legacy_obs=False distill.bc_training=collect \ 12 | task.env.objSet=ball task.env.is_distillation=True \ 13 | train.params.config.user_prefix=bc-baoding-collect task=AllegroArmMOAR \ 14 | task.env.numEnvs=64 train.params.config.minibatch_size=1024 \ 15 | experiment=bc-baoding-collect wandb_activate=False \ 16 | task.env.observationType=full_stack_baoding \ 17 | train.params.config.central_value_config.minibatch_size=1024 \ 18 | distill.worker_id=0 distill.ablation_mode=multi-modality-plus \ 19 | task.env.ablation_mode=multi-modality-plus \ 20 | ${EXTRA_ARGS} -------------------------------------------------------------------------------- /scripts/collect_cross.sh: -------------------------------------------------------------------------------- 1 | GPUS=$1 2 | 3 | array=( $@ ) 4 | len=${#array[@]} 5 | EXTRA_ARGS=${array[@]:1:$len} 6 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 7 | 8 | CUDA_VISIBLE_DEVICES=${GPUS} \ 9 | python ./isaacgymenvs/train_distillation.py headless=True \ 10 | distill.teacher_data_dir=demonstration-cross \ 11 | task.env.legacy_obs=False distill.bc_training=collect \ 12 | task.env.objSet=cross task.env.is_distillation=True \ 13 | train.params.config.user_prefix=bc-cross-collect task=AllegroArmMOAR \ 14 | task.env.numEnvs=64 train.params.config.minibatch_size=1024 \ 15 | experiment=bc-cross-collect wandb_activate=False \ 16 | task.env.observationType=full_stack \ 17 | train.params.config.central_value_config.minibatch_size=1024 \ 18 | distill.worker_id=0 distill.ablation_mode=multi-modality-plus \ 19 | task.env.ablation_mode=multi-modality-plus \ 20 | ${EXTRA_ARGS} -------------------------------------------------------------------------------- /scripts/teacher_axis.sh: -------------------------------------------------------------------------------- 1 | GPUS=$1 2 | 3 | array=( $@ ) 4 | len=${#array[@]} 5 | EXTRA_ARGS=${array[@]:1:$len} 6 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 7 | 8 | CUDA_VISIBLE_DEVICES=${GPUS} \ 9 | python ./isaacgymenvs/train.py headless=True \ 10 | task.env.objSet=C task=AllegroArmMOAR task.env.axis=x \ 11 | task.env.numEnvs=8192 train.params.config.minibatch_size=16384 \ 12 | train.params.config.central_value_config.minibatch_size=16384 \ 13 | task.env.observationType=full_stack_pointcloud task.env.legacy_obs=True \ 14 | task.env.ablation_mode=no-pc experiment=x-axis \ 15 | train.params.config.user_prefix=x-axis wandb_activate=True \ 16 | ${EXTRA_ARGS} -------------------------------------------------------------------------------- /scripts/teacher_axis_visrl.sh: -------------------------------------------------------------------------------- 1 | GPUS=$1 2 | 3 | array=( $@ ) 4 | len=${#array[@]} 5 | EXTRA_ARGS=${array[@]:1:$len} 6 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 7 | 8 | CUDA_VISIBLE_DEVICES=${GPUS} \ 9 | python ./isaacgymenvs/train.py headless=True \ 10 | task.env.objSet=C task=AllegroArmMOAR task.env.axis=x \ 11 | task.env.numEnvs=64 train.params.config.minibatch_size=1024 \ 12 | train.params.config.central_value_config.minibatch_size=1024 \ 13 | task.env.observationType=partial_stack_pointcloud task.env.legacy_obs=False \ 14 | task.env.ablation_mode=multi-modality-plus task.env.pc_mode=label \ 15 | experiment=x-axis-visualrl \ 16 | train.params.config.user_prefix=x-axis-visualrl \ 17 | wandb_activate=True \ 18 | ${EXTRA_ARGS} -------------------------------------------------------------------------------- /scripts/teacher_baoding.sh: -------------------------------------------------------------------------------- 1 | GPUS=$1 2 | 3 | array=( $@ ) 4 | len=${#array[@]} 5 | EXTRA_ARGS=${array[@]:1:$len} 6 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 7 | 8 | CUDA_VISIBLE_DEVICES=${GPUS} \ 9 | python ./isaacgymenvs/train.py headless=False \ 10 | task.env.objSet=ball task=AllegroArmMOAR task.env.axis=z \ 11 | task.env.numEnvs=8192 train.params.config.minibatch_size=16384 \ 12 | train.params.config.central_value_config.minibatch_size=16384 \ 13 | task.env.observationType=full_stack_baoding task.env.legacy_obs=True \ 14 | task.env.ablation_mode=no-pc experiment=baoding \ 15 | train.params.config.user_prefix=baoding wandb_activate=True \ 16 | ${EXTRA_ARGS} -------------------------------------------------------------------------------- /scripts/teacher_baoding_visrl.sh: -------------------------------------------------------------------------------- 1 | GPUS=$1 2 | 3 | array=( $@ ) 4 | len=${#array[@]} 5 | EXTRA_ARGS=${array[@]:1:$len} 6 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 7 | 8 | CUDA_VISIBLE_DEVICES=${GPUS} \ 9 | python ./isaacgymenvs/train.py headless=True \ 10 | task.env.objSet=ball task=AllegroArmMOAR task.env.axis=z \ 11 | task.env.numEnvs=64 train.params.config.minibatch_size=1024 \ 12 | train.params.config.central_value_config.minibatch_size=1024 \ 13 | task.env.observationType=partial_stack_baoding task.env.legacy_obs=False \ 14 | task.env.ablation_mode=multi-modality-plus task.env.pc_mode=label \ 15 | experiment=baoding-visualrl \ 16 | train.params.config.user_prefix=baoding-visualrl \ 17 | wandb_activate=True \ 18 | ${EXTRA_ARGS} -------------------------------------------------------------------------------- /scripts/teacher_cross.sh: -------------------------------------------------------------------------------- 1 | GPUS=$1 2 | 3 | array=( $@ ) 4 | len=${#array[@]} 5 | EXTRA_ARGS=${array[@]:1:$len} 6 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 7 | 8 | CUDA_VISIBLE_DEVICES=${GPUS} \ 9 | python ./isaacgymenvs/train.py headless=True \ 10 | task.env.objSet=cross task=AllegroArmMOAR task.env.axis=z \ 11 | task.env.numEnvs=8192 train.params.config.minibatch_size=16384 \ 12 | train.params.config.central_value_config.minibatch_size=16384 \ 13 | task.env.observationType=full_stack task.env.legacy_obs=True \ 14 | task.env.ablation_mode=no-pc experiment=wheel-wrench \ 15 | train.params.config.user_prefix=wheel-wrench wandb_activate=True \ 16 | ${EXTRA_ARGS} -------------------------------------------------------------------------------- /scripts/teacher_cross_visrl.sh: -------------------------------------------------------------------------------- 1 | GPUS=$1 2 | 3 | array=( $@ ) 4 | len=${#array[@]} 5 | EXTRA_ARGS=${array[@]:1:$len} 6 | EXTRA_ARGS_SLUG=${EXTRA_ARGS// /_} 7 | 8 | CUDA_VISIBLE_DEVICES=${GPUS} \ 9 | python ./isaacgymenvs/train.py headless=True \ 10 | task.env.objSet=cross task=AllegroArmMOAR task.env.axis=z \ 11 | task.env.numEnvs=64 train.params.config.minibatch_size=1024 \ 12 | train.params.config.central_value_config.minibatch_size=1024 \ 13 | task.env.observationType=partial_stack task.env.legacy_obs=False \ 14 | task.env.ablation_mode=multi-modality-plus task.env.pc_mode=label \ 15 | experiment=wheel-wrench-visualrl \ 16 | train.params.config.user_prefix=wheel-wrench-visualrl \ 17 | wandb_activate=True \ 18 | ${EXTRA_ARGS} --------------------------------------------------------------------------------