├── D2SR_RCAR_中文版.pdf ├── D2SSR ├── __init__.py ├── baseOffPolicy.py ├── d2ssr_train_torch.py ├── td3_per_her.py ├── torch_arguments.py └── tune_d2ssr_main.py ├── DRLib.jpg ├── DRLib_tree.txt ├── HER_DRLib_Net_Reload └── 2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1 │ └── 2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300 │ ├── actor.pth │ ├── config.json │ ├── norm.pkl │ └── progress.txt ├── HER_DRLib_mpi ├── 2022-07-29_HER_mpi19_random_TD3_FetchPush-v1 │ └── 2022-07-29_12-21-04-HER_mpi19_random_TD3_FetchPush-v1_s123 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-29_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1 │ └── 2022-07-29_12-22-08-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s123 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-29_HER_mpi1_random_TD3_FetchPush-v1 │ ├── 2022-07-29_12-21-16-HER_mpi1_random_TD3_FetchPush-v1_s123 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-07-29_12-24-48-HER_mpi1_random_TD3_FetchPush-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-07-29_12-24-56-HER_mpi1_random_TD3_FetchPush-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-29_HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1 │ └── 2022-07-29_12-22-27-HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1_s123 │ │ ├── config.json │ │ └── progress.txt └── MPI1-6-19.png ├── HER_DRLib_mpi1 ├── 2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1 │ ├── 2022-07-29_12-25-10-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s300 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-07-29_12-25-21-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-07-29_12-25-27-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-29_HER_mpi1_random_TD3_FetchPush-v1 │ ├── 2022-07-29_12-21-16-HER_mpi1_random_TD3_FetchPush-v1_s123 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-07-29_12-24-48-HER_mpi1_random_TD3_FetchPush-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-07-29_12-24-56-HER_mpi1_random_TD3_FetchPush-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1 │ ├── 2022-07-29_12-25-42-HER_mpi1_random_TD3_FetchSlide-v1_s300 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-07-29_12-25-46-HER_mpi1_random_TD3_FetchSlide-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-07-29_12-25-47-HER_mpi1_random_TD3_FetchSlide-v1_s100 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1 │ ├── 2022-07-30_22-19-36-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-07-30_22-19-42-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-07-30_22-19-45-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1 │ ├── 2022-07-30_22-19-22-HER_mpi1_random_DDPG_FetchPush-v1_s300 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-07-30_22-19-27-HER_mpi1_random_DDPG_FetchPush-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-07-30_22-19-30-HER_mpi1_random_DDPG_FetchPush-v1_s100 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1 │ ├── 2022-07-30_22-18-55-HER_mpi1_random_DDPG_FetchSlide-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-07-30_22-19-10-HER_mpi1_random_DDPG_FetchSlide-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-07-30_22-19-14-HER_mpi1_random_DDPG_FetchSlide-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1 │ ├── 2022-07-31_17-09-31-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-07-31_17-09-33-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s300 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-07-31_17-09-34-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s200 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1 │ ├── 2022-07-31_17-09-45-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-07-31_17-09-48-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-07-31_17-09-48-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1 │ ├── 2022-07-31_17-09-58-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-07-31_17-10-05-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-07-31_17-10-07-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1 │ ├── 2022-07-31_17-09-01-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-07-31_17-09-03-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-07-31_17-09-05-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-31_HER_mpi1_random_SAC_FetchPush-v1 │ ├── 2022-07-31_17-08-42-HER_mpi1_random_SAC_FetchPush-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-07-31_17-08-44-HER_mpi1_random_SAC_FetchPush-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-07-31_17-08-48-HER_mpi1_random_SAC_FetchPush-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1 │ ├── 2022-07-31_17-08-20-HER_mpi1_random_SAC_FetchSlide-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-07-31_17-08-26-HER_mpi1_random_SAC_FetchSlide-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-07-31_17-08-29-HER_mpi1_random_SAC_FetchSlide-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1 │ ├── 2022-08-01_12-21-12-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-08-01_12-21-16-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-08-01_12-21-19-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1 │ ├── 2022-08-01_12-20-44-HER_mpi1_random_DDPGTorch_FetchPush-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-08-01_12-20-54-HER_mpi1_random_DDPGTorch_FetchPush-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-08-01_12-20-57-HER_mpi1_random_DDPGTorch_FetchPush-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1 │ ├── 2022-08-01_12-21-31-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-08-01_12-21-36-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-08-01_12-21-39-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1 │ ├── 2022-08-01_12-22-28-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-08-01_12-22-30-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-08-01_12-22-34-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1 │ ├── 2022-08-01_12-22-12-HER_mpi1_random_SACTorch_FetchPush-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-08-01_12-22-16-HER_mpi1_random_SACTorch_FetchPush-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-08-01_12-22-18-HER_mpi1_random_SACTorch_FetchPush-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1 │ ├── 2022-08-01_12-21-55-HER_mpi1_random_SACTorch_FetchSlide-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-08-01_12-21-58-HER_mpi1_random_SACTorch_FetchSlide-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-08-01_12-22-01-HER_mpi1_random_SACTorch_FetchSlide-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1 │ ├── 2022-08-01_12-15-29-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-08-01_12-15-36-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-08-01_12-15-40-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1 │ ├── 2022-08-01_12-15-51-HER_mpi1_random_TD3Torch_FetchPush-v1_s300 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-08-01_12-16-00-HER_mpi1_random_TD3Torch_FetchPush-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-08-01_12-16-07-HER_mpi1_random_TD3Torch_FetchPush-v1_s100 │ │ ├── config.json │ │ └── progress.txt ├── 2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1 │ ├── 2022-08-01_12-16-17-HER_mpi1_random_TD3Torch_FetchSlide-v1_s100 │ │ ├── config.json │ │ └── progress.txt │ ├── 2022-08-01_12-16-22-HER_mpi1_random_TD3Torch_FetchSlide-v1_s200 │ │ ├── config.json │ │ └── progress.txt │ └── 2022-08-01_12-16-25-HER_mpi1_random_TD3Torch_FetchSlide-v1_s300 │ │ ├── config.json │ │ └── progress.txt ├── 2ep_reward_Pick.png ├── 2ep_reward_Push.png └── 2ep_reward_Slide.png ├── LICENSE ├── README.md ├── algos ├── __init__.py ├── pytorch │ ├── __init__.py │ ├── ddpg_sp │ │ ├── __init__.py │ │ ├── core.py │ │ ├── ddpg.py │ │ └── ddpg_per_her.py │ ├── offPolicy │ │ ├── HER_introduction.md │ │ ├── __init__.py │ │ ├── baseOffPolicy.py │ │ └── norm.py │ ├── sac_sp │ │ ├── __init__.py │ │ ├── core.py │ │ ├── sac.py │ │ └── sac_per_her.py │ └── td3_sp │ │ ├── MPI_td3_per_her.py │ │ ├── README.md │ │ ├── __init__.py │ │ ├── core.py │ │ ├── td3_gpu_class.py │ │ └── td3_per_her.py └── tf1 │ ├── __init__.py │ ├── ddpg_sp │ ├── DDPG_class.py │ ├── DDPG_per_class.py │ ├── DDPG_per_her.py │ ├── DDPG_per_her_class.py │ ├── DDPG_sp.py │ ├── MPI_DDPG_per_her.py │ ├── __init__.py │ └── core.py │ ├── offPolicy │ ├── __init__.py │ ├── baseOffPolicy.py │ ├── core.py │ └── norm.py │ ├── sac_auto │ ├── MPI_sac_auto_per_her.py │ ├── __init__.py │ ├── core.py │ ├── sac_auto_class.py │ ├── sac_auto_per_class.py │ └── sac_auto_per_her.py │ ├── sac_sp │ ├── MPI_SAC_per_her.py │ ├── SAC_class.py │ ├── SAC_per_class.py │ ├── SAC_per_her.py │ ├── SAC_sp.py │ ├── __init__.py │ └── core.py │ └── td3_sp │ ├── MPI_TD3_per_her.py │ ├── README.md │ ├── TD3_class.py │ ├── TD3_per_class.py │ ├── TD3_per_her.py │ ├── TD3_per_her_class.py │ ├── TD3_sp.py │ ├── __init__.py │ └── core.py ├── arguments.py ├── gym ├── __init__.py ├── core.py ├── demo.py ├── demo_drawer.py ├── demo_insert_rand.py ├── demo_obs_push.py ├── double_push_demo.py ├── drawer2.png ├── drawer_box_demo.py ├── envs │ ├── __init__.py │ ├── algorithmic │ │ ├── __init__.py │ │ ├── algorithmic_env.py │ │ ├── copy_.py │ │ ├── duplicated_input.py │ │ ├── repeat_copy.py │ │ ├── reverse.py │ │ ├── reversed_addition.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ └── test_algorithmic.py │ ├── atari │ │ ├── __init__.py │ │ └── atari_env.py │ ├── box2d │ │ ├── __init__.py │ │ ├── bipedal_walker.py │ │ ├── car_dynamics.py │ │ ├── car_racing.py │ │ ├── lunar_lander.py │ │ └── test_lunar_lander.py │ ├── classic_control │ │ ├── __init__.py │ │ ├── acrobot.py │ │ ├── assets │ │ │ └── clockwise.png │ │ ├── cartpole.py │ │ ├── continuous_mountain_car.py │ │ ├── mountain_car.py │ │ ├── pendulum.py │ │ └── rendering.py │ ├── mujoco │ │ ├── __init__.py │ │ ├── ant.py │ │ ├── ant_v3.py │ │ ├── assets │ │ │ ├── ant.xml │ │ │ ├── half_cheetah.xml │ │ │ ├── hopper.xml │ │ │ ├── humanoid.xml │ │ │ ├── humanoidstandup.xml │ │ │ ├── inverted_double_pendulum.xml │ │ │ ├── inverted_pendulum.xml │ │ │ ├── point.xml │ │ │ ├── pusher.xml │ │ │ ├── reacher.xml │ │ │ ├── striker.xml │ │ │ ├── swimmer.xml │ │ │ ├── thrower.xml │ │ │ └── walker2d.xml │ │ ├── half_cheetah.py │ │ ├── half_cheetah_v3.py │ │ ├── hopper.py │ │ ├── hopper_v3.py │ │ ├── humanoid.py │ │ ├── humanoid_v3.py │ │ ├── humanoidstandup.py │ │ ├── inverted_double_pendulum.py │ │ ├── inverted_pendulum.py │ │ ├── mujoco_env.py │ │ ├── pusher.py │ │ ├── pusher2d_her.py │ │ ├── reacher.py │ │ ├── reacher2d_her.py │ │ ├── reacher2d_her_harder.py │ │ ├── striker.py │ │ ├── swimmer.py │ │ ├── swimmer_v3.py │ │ ├── thrower.py │ │ ├── walker2d.py │ │ └── walker2d_v3.py │ ├── registration.py │ ├── robotics │ │ ├── __init__.py │ │ ├── assets │ │ │ ├── LICENSE.md │ │ │ ├── fetch │ │ │ │ ├── double_push.xml │ │ │ │ ├── drawer.xml │ │ │ │ ├── drawer_body.xml │ │ │ │ ├── drawer_box.xml │ │ │ │ ├── drawer_dependencies.xml │ │ │ │ ├── insert_rand.xml │ │ │ │ ├── obs_push.xml │ │ │ │ ├── occ_push.xml │ │ │ │ ├── pick_and_place.xml │ │ │ │ ├── push.xml │ │ │ │ ├── reach.xml │ │ │ │ ├── robot.xml │ │ │ │ ├── shared.xml │ │ │ │ ├── slide.xml │ │ │ │ ├── stack.xml │ │ │ │ ├── three_push.xml │ │ │ │ └── three_stack.xml │ │ │ ├── hand │ │ │ │ ├── manipulate_block.xml │ │ │ │ ├── manipulate_block_touch_sensors.xml │ │ │ │ ├── manipulate_egg.xml │ │ │ │ ├── manipulate_egg_touch_sensors.xml │ │ │ │ ├── manipulate_pen.xml │ │ │ │ ├── manipulate_pen_touch_sensors.xml │ │ │ │ ├── reach.xml │ │ │ │ ├── robot.xml │ │ │ │ ├── robot_touch_sensors_92.xml │ │ │ │ ├── shared.xml │ │ │ │ ├── shared_asset.xml │ │ │ │ └── shared_touch_sensors_92.xml │ │ │ ├── stls │ │ │ │ ├── fetch │ │ │ │ │ ├── base_link_collision.stl │ │ │ │ │ ├── bellows_link_collision.stl │ │ │ │ │ ├── drawer.stl │ │ │ │ │ ├── drawercase.stl │ │ │ │ │ ├── drawerhandle.stl │ │ │ │ │ ├── elbow_flex_link_collision.stl │ │ │ │ │ ├── estop_link.stl │ │ │ │ │ ├── forearm_roll_link_collision.stl │ │ │ │ │ ├── gripper_link.stl │ │ │ │ │ ├── head_pan_link_collision.stl │ │ │ │ │ ├── head_tilt_link_collision.stl │ │ │ │ │ ├── l_wheel_link_collision.stl │ │ │ │ │ ├── laser_link.stl │ │ │ │ │ ├── r_wheel_link_collision.stl │ │ │ │ │ ├── shoulder_lift_link_collision.stl │ │ │ │ │ ├── shoulder_pan_link_collision.stl │ │ │ │ │ ├── torso_fixed_link.stl │ │ │ │ │ ├── torso_lift_link_collision.stl │ │ │ │ │ ├── upperarm_roll_link_collision.stl │ │ │ │ │ ├── window_base.stl │ │ │ │ │ ├── window_frame.stl │ │ │ │ │ ├── window_h_base.stl │ │ │ │ │ ├── window_h_frame.stl │ │ │ │ │ ├── windowa_frame.stl │ │ │ │ │ ├── windowa_glass.stl │ │ │ │ │ ├── windowa_h_frame.stl │ │ │ │ │ ├── windowa_h_glass.stl │ │ │ │ │ ├── windowb_frame.stl │ │ │ │ │ ├── windowb_glass.stl │ │ │ │ │ ├── windowb_h_frame.stl │ │ │ │ │ ├── windowb_h_glass.stl │ │ │ │ │ ├── wrist_flex_link_collision.stl │ │ │ │ │ └── wrist_roll_link_collision.stl │ │ │ │ └── hand │ │ │ │ │ ├── F1.stl │ │ │ │ │ ├── F2.stl │ │ │ │ │ ├── F3.stl │ │ │ │ │ ├── TH1_z.stl │ │ │ │ │ ├── TH2_z.stl │ │ │ │ │ ├── TH3_z.stl │ │ │ │ │ ├── forearm_electric.stl │ │ │ │ │ ├── forearm_electric_cvx.stl │ │ │ │ │ ├── knuckle.stl │ │ │ │ │ ├── lfmetacarpal.stl │ │ │ │ │ ├── palm.stl │ │ │ │ │ └── wrist.stl │ │ │ └── textures │ │ │ │ ├── block.png │ │ │ │ └── block_hidden.png │ │ ├── fetch │ │ │ ├── __init__.py │ │ │ ├── dpush.py │ │ │ ├── drawer.py │ │ │ ├── drawer_box.py │ │ │ ├── insert.py │ │ │ ├── insert_rand.py │ │ │ ├── obs_push.py │ │ │ ├── occ_push.py │ │ │ ├── pick_and_place.py │ │ │ ├── push.py │ │ │ ├── reach.py │ │ │ ├── slide.py │ │ │ ├── stack.py │ │ │ ├── tpush.py │ │ │ └── tstack.py │ │ ├── fetch_double_push_env.py │ │ ├── fetch_drawer_box_env.py │ │ ├── fetch_drawer_env.py │ │ ├── fetch_env.py │ │ ├── fetch_insert_env.py │ │ ├── fetch_insert_rand_env.py │ │ ├── fetch_obs_push_env.py │ │ ├── fetch_occ_push_env.py │ │ ├── fetch_stack_env.py │ │ ├── fetch_three_push_env.py │ │ ├── fetch_three_stack_env.py │ │ ├── hand │ │ │ ├── __init__.py │ │ │ ├── manipulate.py │ │ │ ├── manipulate_touch_sensors.py │ │ │ └── reach.py │ │ ├── hand_env.py │ │ ├── robot_double_push_env.py │ │ ├── robot_env.py │ │ ├── robot_joint_env.py │ │ ├── robot_stack_env.py │ │ ├── robot_three_push_env.py │ │ ├── robot_three_stack_env.py │ │ ├── rotations.py │ │ └── utils.py │ ├── tests │ │ ├── __init__.py │ │ ├── spec_list.py │ │ ├── test_determinism.py │ │ ├── test_envs.py │ │ ├── test_envs_semantics.py │ │ ├── test_frozenlake_dfs.py │ │ ├── test_kellycoinflip.py │ │ ├── test_mujoco_v2_to_v3_conversion.py │ │ └── test_registration.py │ ├── toy_text │ │ ├── __init__.py │ │ ├── blackjack.py │ │ ├── cliffwalking.py │ │ ├── discrete.py │ │ ├── frozen_lake.py │ │ ├── guessing_game.py │ │ ├── hotter_colder.py │ │ ├── kellycoinflip.py │ │ ├── nchain.py │ │ ├── roulette.py │ │ └── taxi.py │ └── unittest │ │ ├── __init__.py │ │ ├── cube_crash.py │ │ └── memorize_digits.py ├── error.py ├── logger.py ├── mjkey.txt ├── obstacle_push.png ├── spaces │ ├── __init__.py │ ├── box.py │ ├── dict.py │ ├── discrete.py │ ├── multi_binary.py │ ├── multi_discrete.py │ ├── space.py │ ├── tests │ │ ├── __init__.py │ │ ├── test_spaces.py │ │ └── test_utils.py │ ├── tuple.py │ └── utils.py ├── stack_demo.py ├── three_push_demo.py ├── three_stack_demo.py ├── utils │ ├── __init__.py │ ├── atomic_write.py │ ├── closer.py │ ├── colorize.py │ ├── ezpickle.py │ ├── json_utils.py │ ├── play.py │ └── seeding.py ├── vector │ ├── __init__.py │ ├── async_vector_env.py │ ├── sync_vector_env.py │ ├── tests │ │ ├── __init__.py │ │ ├── test_async_vector_env.py │ │ ├── test_numpy_utils.py │ │ ├── test_shared_memory.py │ │ ├── test_spaces.py │ │ ├── test_sync_vector_env.py │ │ ├── test_vector_env.py │ │ ├── test_vector_env_wrapper.py │ │ └── utils.py │ ├── utils │ │ ├── __init__.py │ │ ├── misc.py │ │ ├── numpy_utils.py │ │ ├── shared_memory.py │ │ └── spaces.py │ └── vector_env.py ├── version.py └── wrappers │ ├── __init__.py │ ├── atari_preprocessing.py │ ├── clip_action.py │ ├── filter_observation.py │ ├── flatten_observation.py │ ├── frame_stack.py │ ├── gray_scale_observation.py │ ├── monitor.py │ ├── monitoring │ ├── __init__.py │ ├── stats_recorder.py │ ├── tests │ │ ├── __init__.py │ │ ├── helpers.py │ │ └── test_video_recorder.py │ └── video_recorder.py │ ├── pixel_observation.py │ ├── record_episode_statistics.py │ ├── rescale_action.py │ ├── resize_observation.py │ ├── test_atari_preprocessing.py │ ├── test_clip_action.py │ ├── test_filter_observation.py │ ├── test_flatten_observation.py │ ├── test_frame_stack.py │ ├── test_gray_scale_observation.py │ ├── test_pixel_observation.py │ ├── test_record_episode_statistics.py │ ├── test_rescale_action.py │ ├── test_resize_observation.py │ ├── test_time_aware_observation.py │ ├── test_transform_observation.py │ ├── test_transform_reward.py │ ├── tests │ └── __init__.py │ ├── time_aware_observation.py │ ├── time_limit.py │ ├── transform_observation.py │ └── transform_reward.py ├── memory ├── __init__.py ├── per_memory.py ├── simple_memory.py ├── sp_memory.py ├── sp_memory_torch.py ├── sp_per_memory.py └── sp_per_memory_torch.py ├── pip_requirement.txt ├── spinup_utils ├── README.md ├── __init__.py ├── delete_no_checkpoint_or_pth.py ├── demo.png ├── group_plot.py ├── log2table.py ├── logx.py ├── mpi_pytorch.py ├── mpi_tf.py ├── mpi_tools.py ├── plot.py ├── plot_demo_files │ ├── 2020-10-02_12-18-23-gym_clean_buffer_reach_d2s_dense_n2b2_s5958 │ │ ├── config.json │ │ └── progress.txt │ ├── 2020-10-02_12-18-42-gym_clean_buffer_reach_d2s_dense_n2b2_s9317 │ │ ├── config.json │ │ └── progress.txt │ ├── 2020-10-02_12-18-43-gym_clean_buffer_reach_d2s_dense_n2b2_s7515 │ │ ├── config.json │ │ └── progress.txt │ ├── 2020-10-02_12-18-50-gym_clean_buffer_reach_d2s_dense_n2b2_s9180 │ │ ├── config.json │ │ └── progress.txt │ └── __init__.py ├── plot_success.py ├── plot_success_group.py ├── print_logger.py ├── run_entrypoint.py ├── run_utils.py ├── serialization_utils.py └── user_config.py ├── tf1_arguments.py ├── torch_arguments.py ├── train_tf1.py ├── train_tf1_mpi.py ├── train_torch.py ├── train_torch_mpi.py ├── train_torch_mpi_norm_load.py ├── train_torch_mpi_norm_save.py └── tune_exps ├── __init__.py ├── tune_arguments.py ├── tune_exps_demo.py └── tune_func.py /D2SR_RCAR_中文版.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/D2SR_RCAR_中文版.pdf -------------------------------------------------------------------------------- /D2SSR/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/D2SSR/__init__.py -------------------------------------------------------------------------------- /DRLib.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/DRLib.jpg -------------------------------------------------------------------------------- /HER_DRLib_Net_Reload/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/actor.pth: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/HER_DRLib_Net_Reload/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/actor.pth -------------------------------------------------------------------------------- /HER_DRLib_Net_Reload/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_Net_Reload/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/norm.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/HER_DRLib_Net_Reload/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/norm.pkl -------------------------------------------------------------------------------- /HER_DRLib_mpi/2022-07-29_HER_mpi19_random_TD3_FetchPush-v1/2022-07-29_12-21-04-HER_mpi19_random_TD3_FetchPush-v1_s123/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi19_random_TD3_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi19_random_TD3_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi19_random_TD3_FetchPush-v1/2022-07-29_12-21-04-HER_mpi19_random_TD3_FetchPush-v1_s123" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi/2022-07-29_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-07-29_12-22-08-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s123/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-07-29_12-22-08-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s123" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-21-16-HER_mpi1_random_TD3_FetchPush-v1_s123/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-21-16-HER_mpi1_random_TD3_FetchPush-v1_s123" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-48-HER_mpi1_random_TD3_FetchPush-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-48-HER_mpi1_random_TD3_FetchPush-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-56-HER_mpi1_random_TD3_FetchPush-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-56-HER_mpi1_random_TD3_FetchPush-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi/2022-07-29_HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1/2022-07-29_12-22-27-HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1_s123/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1/2022-07-29_12-22-27-HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1_s123" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi/MPI1-6-19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/HER_DRLib_mpi/MPI1-6-19.png -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1/2022-07-29_12-25-10-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1/2022-07-29_12-25-10-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1/2022-07-29_12-25-21-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1/2022-07-29_12-25-21-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1/2022-07-29_12-25-27-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1/2022-07-29_12-25-27-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-21-16-HER_mpi1_random_TD3_FetchPush-v1_s123/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-21-16-HER_mpi1_random_TD3_FetchPush-v1_s123" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-48-HER_mpi1_random_TD3_FetchPush-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-48-HER_mpi1_random_TD3_FetchPush-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-56-HER_mpi1_random_TD3_FetchPush-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-56-HER_mpi1_random_TD3_FetchPush-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1/2022-07-29_12-25-42-HER_mpi1_random_TD3_FetchSlide-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1/2022-07-29_12-25-42-HER_mpi1_random_TD3_FetchSlide-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1/2022-07-29_12-25-46-HER_mpi1_random_TD3_FetchSlide-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1/2022-07-29_12-25-46-HER_mpi1_random_TD3_FetchSlide-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1/2022-07-29_12-25-47-HER_mpi1_random_TD3_FetchSlide-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1/2022-07-29_12-25-47-HER_mpi1_random_TD3_FetchSlide-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1/2022-07-30_22-19-36-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPG_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPG_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1/2022-07-30_22-19-36-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1/2022-07-30_22-19-42-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPG_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPG_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1/2022-07-30_22-19-42-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1/2022-07-30_22-19-45-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPG_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPG_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1/2022-07-30_22-19-45-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1/2022-07-30_22-19-22-HER_mpi1_random_DDPG_FetchPush-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPG_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPG_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1/2022-07-30_22-19-22-HER_mpi1_random_DDPG_FetchPush-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1/2022-07-30_22-19-27-HER_mpi1_random_DDPG_FetchPush-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPG_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPG_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1/2022-07-30_22-19-27-HER_mpi1_random_DDPG_FetchPush-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1/2022-07-30_22-19-30-HER_mpi1_random_DDPG_FetchPush-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPG_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPG_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1/2022-07-30_22-19-30-HER_mpi1_random_DDPG_FetchPush-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1/2022-07-30_22-18-55-HER_mpi1_random_DDPG_FetchSlide-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPG_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPG_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1/2022-07-30_22-18-55-HER_mpi1_random_DDPG_FetchSlide-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1/2022-07-30_22-19-10-HER_mpi1_random_DDPG_FetchSlide-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPG_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPG_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1/2022-07-30_22-19-10-HER_mpi1_random_DDPG_FetchSlide-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1/2022-07-30_22-19-14-HER_mpi1_random_DDPG_FetchSlide-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPG_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPG_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1/2022-07-30_22-19-14-HER_mpi1_random_DDPG_FetchSlide-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1/2022-07-31_17-09-31-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1/2022-07-31_17-09-31-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1/2022-07-31_17-09-33-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1/2022-07-31_17-09-33-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1/2022-07-31_17-09-34-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1/2022-07-31_17-09-34-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1/2022-07-31_17-09-45-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1/2022-07-31_17-09-45-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1/2022-07-31_17-09-48-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1/2022-07-31_17-09-48-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1/2022-07-31_17-09-48-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1/2022-07-31_17-09-48-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1/2022-07-31_17-09-58-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1/2022-07-31_17-09-58-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1/2022-07-31_17-10-05-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1/2022-07-31_17-10-05-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1/2022-07-31_17-10-07-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1/2022-07-31_17-10-07-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1/2022-07-31_17-09-01-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1/2022-07-31_17-09-01-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1/2022-07-31_17-09-03-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1/2022-07-31_17-09-03-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1/2022-07-31_17-09-05-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1/2022-07-31_17-09-05-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchPush-v1/2022-07-31_17-08-42-HER_mpi1_random_SAC_FetchPush-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchPush-v1/2022-07-31_17-08-42-HER_mpi1_random_SAC_FetchPush-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchPush-v1/2022-07-31_17-08-44-HER_mpi1_random_SAC_FetchPush-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchPush-v1/2022-07-31_17-08-44-HER_mpi1_random_SAC_FetchPush-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchPush-v1/2022-07-31_17-08-48-HER_mpi1_random_SAC_FetchPush-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_FetchPush-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchPush-v1/2022-07-31_17-08-48-HER_mpi1_random_SAC_FetchPush-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1/2022-07-31_17-08-20-HER_mpi1_random_SAC_FetchSlide-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1/2022-07-31_17-08-20-HER_mpi1_random_SAC_FetchSlide-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1/2022-07-31_17-08-26-HER_mpi1_random_SAC_FetchSlide-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1/2022-07-31_17-08-26-HER_mpi1_random_SAC_FetchSlide-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1/2022-07-31_17-08-29-HER_mpi1_random_SAC_FetchSlide-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SAC_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SAC_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1/2022-07-31_17-08-29-HER_mpi1_random_SAC_FetchSlide-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1/2022-08-01_12-21-12-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1/2022-08-01_12-21-12-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1/2022-08-01_12-21-16-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1/2022-08-01_12-21-16-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1/2022-08-01_12-21-19-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1/2022-08-01_12-21-19-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1/2022-08-01_12-20-44-HER_mpi1_random_DDPGTorch_FetchPush-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPush-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1/2022-08-01_12-20-44-HER_mpi1_random_DDPGTorch_FetchPush-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1/2022-08-01_12-20-54-HER_mpi1_random_DDPGTorch_FetchPush-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPush-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1/2022-08-01_12-20-54-HER_mpi1_random_DDPGTorch_FetchPush-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1/2022-08-01_12-20-57-HER_mpi1_random_DDPGTorch_FetchPush-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPush-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1/2022-08-01_12-20-57-HER_mpi1_random_DDPGTorch_FetchPush-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1/2022-08-01_12-21-31-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1/2022-08-01_12-21-31-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1/2022-08-01_12-21-36-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1/2022-08-01_12-21-36-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1/2022-08-01_12-21-39-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1/2022-08-01_12-21-39-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1/2022-08-01_12-22-28-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SACTorch_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SACTorch_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1/2022-08-01_12-22-28-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1/2022-08-01_12-22-30-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SACTorch_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SACTorch_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1/2022-08-01_12-22-30-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1/2022-08-01_12-22-34-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SACTorch_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SACTorch_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1/2022-08-01_12-22-34-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1/2022-08-01_12-22-12-HER_mpi1_random_SACTorch_FetchPush-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SACTorch_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SACTorch_FetchPush-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1/2022-08-01_12-22-12-HER_mpi1_random_SACTorch_FetchPush-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1/2022-08-01_12-22-16-HER_mpi1_random_SACTorch_FetchPush-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SACTorch_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SACTorch_FetchPush-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1/2022-08-01_12-22-16-HER_mpi1_random_SACTorch_FetchPush-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1/2022-08-01_12-22-18-HER_mpi1_random_SACTorch_FetchPush-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SACTorch_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SACTorch_FetchPush-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1/2022-08-01_12-22-18-HER_mpi1_random_SACTorch_FetchPush-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1/2022-08-01_12-21-55-HER_mpi1_random_SACTorch_FetchSlide-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SACTorch_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SACTorch_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1/2022-08-01_12-21-55-HER_mpi1_random_SACTorch_FetchSlide-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1/2022-08-01_12-21-58-HER_mpi1_random_SACTorch_FetchSlide-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SACTorch_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SACTorch_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1/2022-08-01_12-21-58-HER_mpi1_random_SACTorch_FetchSlide-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1/2022-08-01_12-22-01-HER_mpi1_random_SACTorch_FetchSlide-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_SACTorch_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_SACTorch_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1/2022-08-01_12-22-01-HER_mpi1_random_SACTorch_FetchSlide-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-08-01_12-15-29-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-08-01_12-15-29-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-08-01_12-15-36-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-08-01_12-15-36-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-08-01_12-15-40-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-08-01_12-15-40-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-01_12-15-51-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-01_12-15-51-HER_mpi1_random_TD3Torch_FetchPush-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-01_12-16-00-HER_mpi1_random_TD3Torch_FetchPush-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-01_12-16-00-HER_mpi1_random_TD3Torch_FetchPush-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-01_12-16-07-HER_mpi1_random_TD3Torch_FetchPush-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-01_12-16-07-HER_mpi1_random_TD3Torch_FetchPush-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1/2022-08-01_12-16-17-HER_mpi1_random_TD3Torch_FetchSlide-v1_s100/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1/2022-08-01_12-16-17-HER_mpi1_random_TD3Torch_FetchSlide-v1_s100" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1/2022-08-01_12-16-22-HER_mpi1_random_TD3Torch_FetchSlide-v1_s200/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1/2022-08-01_12-16-22-HER_mpi1_random_TD3Torch_FetchSlide-v1_s200" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1/2022-08-01_12-16-25-HER_mpi1_random_TD3Torch_FetchSlide-v1_s300/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "args": null, 3 | "env": null, 4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchSlide-v1", 5 | "logger": null, 6 | "logger_kwargs": { 7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchSlide-v1", 8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1/2022-08-01_12-16-25-HER_mpi1_random_TD3Torch_FetchSlide-v1_s300" 9 | }, 10 | "net": null 11 | } -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2ep_reward_Pick.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/HER_DRLib_mpi1/2ep_reward_Pick.png -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2ep_reward_Push.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/HER_DRLib_mpi1/2ep_reward_Push.png -------------------------------------------------------------------------------- /HER_DRLib_mpi1/2ep_reward_Slide.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/HER_DRLib_mpi1/2ep_reward_Slide.png -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 kaixindelele 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /algos/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /algos/pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /algos/pytorch/ddpg_sp/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /algos/pytorch/offPolicy/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /algos/pytorch/offPolicy/norm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class StateNorm: 5 | def __init__(self, size, eps=1e-2, default_clip_range=5): 6 | self.size = size 7 | self.eps = eps 8 | self.default_clip_range = default_clip_range 9 | 10 | self.sum = np.zeros(self.size, np.float32) 11 | self.sumsq = np.zeros(self.size, np.float32) 12 | self.count = np.zeros(1, np.float32) 13 | 14 | # get the mean and std 15 | self.mean = np.zeros(self.size, np.float32) 16 | self.std = np.ones(self.size, np.float32) 17 | 18 | # update the parameters of the normalizer 19 | def update(self, v): 20 | v = v.reshape(-1, self.size) 21 | self.sum += v.sum(axis=0) 22 | self.sumsq += (np.square(v)).sum(axis=0) 23 | self.count += v.shape[0] 24 | 25 | self.mean = self.sum / self.count 26 | self.std = np.sqrt(np.maximum(np.square(self.eps), 27 | (self.sumsq / self.count) - np.square( 28 | self.sum / self.count))) 29 | # print("mean:", self.mean) 30 | # print("std:", self.std) 31 | 32 | # normalize the observation 33 | def normalize(self, v, clip_range=None): 34 | if clip_range is None: 35 | clip_range = self.default_clip_range 36 | 37 | return np.clip((v - self.mean) / self.std, 38 | -clip_range, clip_range) 39 | 40 | 41 | def main(): 42 | norm = Norm(size=3) 43 | v = np.random.random((4, 2, 3)) 44 | print("v:", v) 45 | 46 | r0 = v.reshape(-1, 3) 47 | print(r0.shape) 48 | print(r0) 49 | r0 = r0[:, 0] 50 | 51 | print(r0.shape) 52 | print(r0) 53 | std = np.std(r0) 54 | print(std.shape) 55 | print(std) 56 | norm.update(v=v) 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /algos/pytorch/sac_sp/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /algos/pytorch/td3_sp/README.md: -------------------------------------------------------------------------------- 1 | 更新啦! 2 | 3 | 我终于把tf版本-基于mpi的多进程调好了~ 4 | 5 | torch版本的没有测试完毕,有报错! 6 | 7 | 如果大家的CPU核心足够多的情况下,试试mpi多进程,性能会提升比较大的。 8 | 9 | 目前测试的结果是,tf-DDPG的性能最佳,TD3的结果竟然会比ddpg的差,简直了~ 10 | -------------------------------------------------------------------------------- /algos/pytorch/td3_sp/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /algos/tf1/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /algos/tf1/ddpg_sp/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /algos/tf1/ddpg_sp/core.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def placeholder(dim=None): 6 | return tf.placeholder(dtype=tf.float32, shape=(None,dim) if dim else (None,)) 7 | 8 | 9 | def placeholders(*args): 10 | return [placeholder(dim) for dim in args] 11 | 12 | 13 | def mlp(x, hidden_sizes=(32,), activation=tf.tanh, output_activation=None): 14 | for h in hidden_sizes[:-1]: 15 | x = tf.layers.dense(x, units=h, activation=activation) 16 | return tf.layers.dense(x, units=hidden_sizes[-1], activation=output_activation) 17 | 18 | 19 | def get_vars(scope): 20 | return [x for x in tf.global_variables() if scope in x.name] 21 | 22 | 23 | def count_vars(scope): 24 | v = get_vars(scope) 25 | return sum([np.prod(var.shape.as_list()) for var in v]) 26 | 27 | 28 | """ 29 | Actor-Critics 30 | """ 31 | 32 | 33 | def mlp_actor_critic(x, a, hidden_sizes=(400, 300), activation=tf.nn.relu, 34 | output_activation=tf.tanh, action_space=None): 35 | act_dim = a.shape.as_list()[-1] 36 | act_limit = action_space 37 | with tf.variable_scope('pi'): 38 | pi = act_limit * mlp(x, list(hidden_sizes)+[act_dim], activation, output_activation) 39 | with tf.variable_scope('q'): 40 | q = tf.squeeze(mlp(tf.concat([x, a], axis=-1), 41 | list(hidden_sizes)+[1], activation, None), axis=1) 42 | with tf.variable_scope('q', reuse=True): 43 | q_pi = tf.squeeze(mlp(tf.concat([x, pi], axis=-1), 44 | list(hidden_sizes)+[1], activation, None), axis=1) 45 | return pi, q, q_pi 46 | -------------------------------------------------------------------------------- /algos/tf1/offPolicy/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /algos/tf1/offPolicy/core.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def placeholder(dim=None): 6 | return tf.placeholder(dtype=tf.float32, shape=(None,dim) if dim else (None,)) 7 | 8 | 9 | def placeholders(*args): 10 | return [placeholder(dim) for dim in args] 11 | 12 | 13 | def mlp(x, hidden_sizes=(32,), activation=tf.tanh, output_activation=None): 14 | for h in hidden_sizes[:-1]: 15 | x = tf.layers.dense(x, units=h, activation=activation) 16 | return tf.layers.dense(x, units=hidden_sizes[-1], activation=output_activation) 17 | 18 | 19 | def get_vars(scope): 20 | return [x for x in tf.global_variables() if scope in x.name] 21 | 22 | 23 | def count_vars(scope): 24 | v = get_vars(scope) 25 | return sum([np.prod(var.shape.as_list()) for var in v]) 26 | 27 | 28 | """ 29 | Actor-Critics 30 | """ 31 | 32 | 33 | def mlp_actor_critic(x, a, hidden_sizes=(400, 300), activation=tf.nn.relu, 34 | output_activation=tf.tanh, action_space=None): 35 | act_dim = a.shape.as_list()[-1] 36 | act_limit = action_space 37 | with tf.variable_scope('pi'): 38 | pi = act_limit * mlp(x, list(hidden_sizes)+[act_dim], activation, output_activation) 39 | with tf.variable_scope('q'): 40 | q = tf.squeeze(mlp(tf.concat([x, a], axis=-1), 41 | list(hidden_sizes)+[1], activation, None), axis=1) 42 | with tf.variable_scope('q', reuse=True): 43 | q_pi = tf.squeeze(mlp(tf.concat([x, pi], axis=-1), 44 | list(hidden_sizes)+[1], activation, None), axis=1) 45 | return pi, q, q_pi 46 | -------------------------------------------------------------------------------- /algos/tf1/offPolicy/norm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class StateNorm: 5 | def __init__(self, size, eps=1e-2, default_clip_range=5): 6 | self.size = size 7 | self.eps = eps 8 | self.default_clip_range = default_clip_range 9 | 10 | self.sum = np.zeros(self.size, np.float32) 11 | self.sumsq = np.zeros(self.size, np.float32) 12 | self.count = np.zeros(1, np.float32) 13 | 14 | # get the mean and std 15 | self.mean = np.zeros(self.size, np.float32) 16 | self.std = np.ones(self.size, np.float32) 17 | 18 | # update the parameters of the normalizer 19 | def update(self, v): 20 | v = v.reshape(-1, self.size) 21 | self.sum += v.sum(axis=0) 22 | self.sumsq += (np.square(v)).sum(axis=0) 23 | self.count += v.shape[0] 24 | 25 | self.mean = self.sum / self.count 26 | self.std = np.sqrt(np.maximum(np.square(self.eps), 27 | (self.sumsq / self.count) - np.square( 28 | self.sum / self.count))) 29 | # print("mean:", self.mean) 30 | # print("std:", self.std) 31 | 32 | # normalize the observation 33 | def normalize(self, v, clip_range=None): 34 | if clip_range is None: 35 | clip_range = self.default_clip_range 36 | 37 | return np.clip((v - self.mean) / self.std, 38 | -clip_range, clip_range) 39 | 40 | 41 | def main(): 42 | norm = Norm(size=3) 43 | v = np.random.random((4, 2, 3)) 44 | print("v:", v) 45 | 46 | r0 = v.reshape(-1, 3) 47 | print(r0.shape) 48 | print(r0) 49 | r0 = r0[:, 0] 50 | 51 | print(r0.shape) 52 | print(r0) 53 | std = np.std(r0) 54 | print(std.shape) 55 | print(std) 56 | norm.update(v=v) 57 | 58 | 59 | if __name__ == '__main__': 60 | main() 61 | -------------------------------------------------------------------------------- /algos/tf1/sac_auto/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /algos/tf1/sac_sp/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /algos/tf1/td3_sp/README.md: -------------------------------------------------------------------------------- 1 | 更新啦! 2 | 3 | 我终于把tf版本-基于mpi的多进程调好了~ 4 | 5 | torch版本的没有测试完毕,有报错! 6 | 7 | 如果大家的CPU核心足够多的情况下,试试mpi多进程,性能会提升比较大的。 8 | 9 | 目前测试的结果是,tf-DDPG的性能最佳,TD3的结果竟然会比ddpg的差,简直了~ 10 | -------------------------------------------------------------------------------- /algos/tf1/td3_sp/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /algos/tf1/td3_sp/core.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | 5 | def placeholder(dim=None): 6 | return tf.placeholder(dtype=tf.float32, 7 | shape=(None,dim) if dim else (None,)) 8 | 9 | 10 | def placeholders(*args): 11 | return [placeholder(dim) for dim in args] 12 | 13 | 14 | def mlp(x, hidden_sizes=(32,), activation=tf.tanh, output_activation=None): 15 | for h in hidden_sizes[:-1]: 16 | x = tf.layers.dense(x, units=h, activation=activation) 17 | return tf.layers.dense(x, units=hidden_sizes[-1], activation=output_activation) 18 | 19 | 20 | def get_vars(scope): 21 | return [x for x in tf.global_variables() if scope in x.name] 22 | 23 | 24 | def count_vars(scope): 25 | v = get_vars(scope) 26 | return sum([np.prod(var.shape.as_list()) for var in v]) 27 | 28 | 29 | """ 30 | Actor-Critics 31 | """ 32 | 33 | 34 | def mlp_actor_critic(x, a, hidden_sizes=(400, 300), activation=tf.nn.relu, 35 | output_activation=tf.tanh, action_space=None): 36 | act_dim = a.shape.as_list()[-1] 37 | act_limit = action_space 38 | with tf.variable_scope('pi'): 39 | pi = act_limit * mlp(x, list(hidden_sizes)+[act_dim], 40 | activation, output_activation) 41 | with tf.variable_scope('q1'): 42 | q1 = tf.squeeze(mlp(tf.concat([x, a], axis=-1), 43 | list(hidden_sizes)+[1], 44 | activation, None), axis=1) 45 | with tf.variable_scope('q2'): 46 | q2 = tf.squeeze(mlp(tf.concat([x, a], axis=-1), 47 | list(hidden_sizes)+[1], 48 | activation, None), axis=1) 49 | with tf.variable_scope('q1', reuse=True): 50 | q1_pi = tf.squeeze(mlp(tf.concat([x, pi], axis=-1), 51 | list(hidden_sizes)+[1], 52 | activation, None), axis=1) 53 | return pi, q1, q2, q1_pi 54 | -------------------------------------------------------------------------------- /gym/__init__.py: -------------------------------------------------------------------------------- 1 | import distutils.version 2 | import os 3 | import sys 4 | import warnings 5 | 6 | from gym import error 7 | from gym.version import VERSION as __version__ 8 | 9 | from gym.core import Env, GoalEnv, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper 10 | from gym.spaces import Space 11 | from gym.envs import make, spec, register 12 | from gym import logger 13 | from gym import vector 14 | from gym import wrappers 15 | 16 | __all__ = ["Env", "Space", "Wrapper", "make", "spec", "register"] 17 | -------------------------------------------------------------------------------- /gym/demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import gym 4 | # from gym.envs.robotics.fetch.insert import FetchInsertEnv 5 | # env = FetchInsertEnv() 6 | # env = gym.make("FetchPushFixed-v1") 7 | env = gym.make("FetchPickAndPlaceFixed-v1") 8 | # env = gym.make("FetchSlideFixed-v1") 9 | 10 | 11 | def p_control(env, obs, p_rate=0.2): 12 | a = env.action_space.sample() 13 | gg = obs['grip_goal'] 14 | ag = obs['achieved_goal'] 15 | error = ag - gg 16 | for axis, value in enumerate(error): 17 | if abs(value) > 0.02: 18 | if value > 0: 19 | a[axis] = p_rate 20 | else: 21 | a[axis] = -p_rate 22 | else: 23 | a[axis] = 0 24 | # if axis == 0: 25 | # a[axis] = -p_rate 26 | # else: 27 | # a[axis] = p_rate 28 | action = a 29 | return action 30 | 31 | 32 | for ep in range(20): 33 | obs = env.reset() 34 | for i in range(200): 35 | # a = p_control(env, obs=obs) 36 | # 37 | a = env.action_space.sample() 38 | a[0] = 0.01 39 | if obs['grip_goal'][2] < 0.3: 40 | pass 41 | else: 42 | a[1] = -0.2 43 | a[2] = -0.2 44 | print("gg:", obs['grip_goal']) 45 | 46 | obs, reward, done, info = env.step(a) 47 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info)) 48 | env.render() 49 | -------------------------------------------------------------------------------- /gym/demo_drawer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import gym 4 | import cv2 5 | from gym.envs.robotics.fetch.drawer import FetchDrawerEnv 6 | env = FetchDrawerEnv() 7 | 8 | 9 | def p_control(env, obs, p_rate=0.1): 10 | a = env.action_space.sample() 11 | gg = obs['grip_goal'] 12 | ag = obs['achieved_goal'] 13 | ag[1] -= 0.01 14 | # ag[0] += 0.05 15 | error = ag - gg 16 | for axis, value in enumerate(error): 17 | if abs(value) > 0.01: 18 | if value > 0: 19 | a[axis] = p_rate 20 | else: 21 | a[axis] = -p_rate 22 | else: 23 | a[axis] = 0 24 | action = a 25 | # if np.random.random() < 0.1: 26 | # action[-1] = 1.0 27 | # else: 28 | # action[-1] = 0.0 29 | return action 30 | 31 | 32 | for ep in range(20): 33 | ag_list = [] 34 | obs = env.reset() 35 | move = False 36 | for i in range(50): 37 | if not move: 38 | a = p_control(env, obs=obs) 39 | gg2ag = np.linalg.norm(obs['grip_goal'] - obs['achieved_goal']) 40 | print("gg2ag:", gg2ag) 41 | if gg2ag < 0.03: 42 | a = env.action_space.sample() 43 | a[0] = -0.1 44 | a[-1] = 0 45 | # move = True 46 | # print("a:", a) 47 | # a = env.action_space.sample() 48 | a[2] = -1.0 49 | obs, reward, done, info = env.step(a) 50 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info)) 51 | print("ag:", obs['achieved_goal']) 52 | print("gg:", obs['grip_goal']) 53 | ag_list.append(obs['achieved_goal']) 54 | 55 | # env.render() 56 | image_size = 2048 57 | img = env.render(mode='rgb_array', width=image_size, height=image_size) 58 | clip_value = 200 59 | # [上下, 左右,:] 60 | img = img[clip_value*2:image_size-1*clip_value, 0:image_size-2*clip_value, :] 61 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 62 | cv2.imwrite('drawer2.png', img) 63 | 64 | # plt.plot(ag_list) 65 | # plt.pause(2) 66 | -------------------------------------------------------------------------------- /gym/demo_obs_push.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import gym 4 | import cv2 5 | from gym.envs.robotics.fetch.obs_push import FetchObsPushEnv 6 | env = FetchObsPushEnv() 7 | 8 | 9 | def p_control(env, obs, p_rate=0.2): 10 | a = env.action_space.sample() 11 | gg = obs['grip_goal'] 12 | ag = obs['achieved_goal'] 13 | error = ag - gg 14 | for axis, value in enumerate(error): 15 | if abs(value) > 0.02: 16 | if value > 0: 17 | a[axis] = p_rate 18 | else: 19 | a[axis] = -p_rate 20 | else: 21 | a[axis] = 0 22 | action = a 23 | return action 24 | 25 | 26 | for ep in range(20): 27 | obs = env.reset() 28 | for i in range(20): 29 | a = p_control(env, obs=obs) 30 | a[-1] = 0.0 31 | # a = env.action_space.sample() 32 | # a[0] = 0.01 33 | # if obs['grip_goal'][2] < 0.3: 34 | # pass 35 | # else: 36 | # a[1] = -0.2 37 | # a[2] = -0.2 38 | print("gg:", obs['grip_goal']) 39 | a *= 0 40 | obs, reward, done, info = env.step(a) 41 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info)) 42 | # env.render() 43 | image_size = 2048 44 | img = env.render(mode='rgb_array', width=image_size, height=image_size) 45 | clip_value = 200 46 | # [上下, 左右,:] 47 | img = img[clip_value*2:image_size-1*clip_value, 0:image_size-2*clip_value, :] 48 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 49 | cv2.imwrite('obstacle_push.png', img) 50 | -------------------------------------------------------------------------------- /gym/double_push_demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import gym 4 | from gym.envs.robotics.fetch.dpush import FetchDoublePushEnv 5 | env = FetchDoublePushEnv() 6 | 7 | 8 | def p_control(env, obs, p_rate=0.2): 9 | a = env.action_space.sample() 10 | gg = obs['grip_goal'] 11 | ag = obs['achieved_goal'] 12 | error = ag - gg 13 | for axis, value in enumerate(error): 14 | if abs(value) > 0.02: 15 | if value > 0: 16 | a[axis] = p_rate 17 | else: 18 | a[axis] = -p_rate 19 | else: 20 | a[axis] = 0 21 | action = a 22 | return action 23 | 24 | 25 | for ep in range(20): 26 | obs = env.reset() 27 | for i in range(20): 28 | a = p_control(env, obs=obs) 29 | # a[-1] = 0.0 30 | # 31 | # a = env.action_space.sample() 32 | # a[0] = 0.01 33 | # if obs['grip_goal'][2] < 0.3: 34 | # pass 35 | # else: 36 | # a[1] = -0.2 37 | # a[2] = -0.2 38 | print("gg:", obs['grip_goal']) 39 | 40 | obs, reward, done, info = env.step(a) 41 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info)) 42 | env.render() 43 | -------------------------------------------------------------------------------- /gym/drawer2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/drawer2.png -------------------------------------------------------------------------------- /gym/drawer_box_demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import gym 4 | from gym.envs.robotics.fetch.drawer_open import FetchDrawerOpenEnv 5 | from gym.envs.robotics.fetch.drawer_horiz_open import FetchHorizonDrawerOpenEnv 6 | from gym.envs.robotics.fetch.drawer_box import FetchDrawerBoxEnv 7 | env = FetchDrawerBoxEnv() 8 | # env = FetchHorizonDrawerOpenEnv() 9 | 10 | 11 | def p_control(env, obs, p_rate=0.99): 12 | a = env.action_space.sample() 13 | gg = obs['grip_goal'] 14 | ag = obs['ag0'] 15 | ag[1] -= 0.01 16 | # ag[0] += 0.05 17 | error = ag - gg 18 | for axis, value in enumerate(error): 19 | if abs(value) > 0.01: 20 | if value > 0: 21 | a[axis] = p_rate 22 | else: 23 | a[axis] = -p_rate 24 | else: 25 | a[axis] = 0 26 | action = a 27 | # action = np.zeros(4) 28 | # if np.random.random() < 0.1: 29 | # action[-1] = 1.0 30 | # else: 31 | # action[-1] = 0.0 32 | return action 33 | 34 | 35 | env.task = 'in2out' 36 | # env.task = 'out2in' 37 | for ep in range(20): 38 | ag_list = [] 39 | obs = env.reset() 40 | move = False 41 | for i in range(100): 42 | if not move: 43 | a = p_control(env, obs=obs) 44 | gg2ag = np.linalg.norm(obs['grip_goal'] - obs['ag0']) 45 | print("gg2ag:", gg2ag) 46 | if gg2ag < 0.05: 47 | # a = env.action_space.sample() 48 | a[0] = -0.1 49 | a[-1] = -1.0 50 | # move = True 51 | # print("a:", a) 52 | # a = env.action_space.sample() 53 | # a[2] = -1.0 54 | # if i > 60: 55 | # a[-1] = 1 56 | # a[2] = 1 57 | obs, reward, done, info = env.step(a) 58 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info)) 59 | print("ag:", obs['achieved_goal']) 60 | print("ag1:", obs['ag1']) 61 | print("gg:", obs['grip_goal']) 62 | ag_list.append(obs['achieved_goal']) 63 | 64 | env.render() 65 | # plt.plot(ag_list) 66 | # plt.pause(2) 67 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.algorithmic.copy_ import CopyEnv 2 | from gym.envs.algorithmic.repeat_copy import RepeatCopyEnv 3 | from gym.envs.algorithmic.duplicated_input import DuplicatedInputEnv 4 | from gym.envs.algorithmic.reverse import ReverseEnv 5 | from gym.envs.algorithmic.reversed_addition import ReversedAdditionEnv 6 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/copy_.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to copy content from the input tape to 3 | the output tape. http://arxiv.org/abs/1511.07275 4 | """ 5 | from gym.envs.algorithmic import algorithmic_env 6 | 7 | 8 | class CopyEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | def __init__(self, base=5, chars=True): 10 | super(CopyEnv, self).__init__(base=base, chars=chars) 11 | 12 | def target_from_input_data(self, input_data): 13 | return input_data 14 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/duplicated_input.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to return every nth character from the input tape. 3 | http://arxiv.org/abs/1511.07275 4 | """ 5 | from gym.envs.algorithmic import algorithmic_env 6 | 7 | 8 | class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | def __init__(self, duplication=2, base=5): 10 | self.duplication = duplication 11 | super(DuplicatedInputEnv, self).__init__(base=base, chars=True) 12 | 13 | def generate_input_data(self, size): 14 | res = [] 15 | if size < self.duplication: 16 | size = self.duplication 17 | for _ in range(size // self.duplication): 18 | char = self.np_random.randint(self.base) 19 | for _ in range(self.duplication): 20 | res.append(char) 21 | return res 22 | 23 | def target_from_input_data(self, input_data): 24 | return [ 25 | input_data[i] for i in range(0, len(input_data), self.duplication) 26 | ] 27 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/repeat_copy.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to copy content multiple times from the input tape to 3 | the output tape. http://arxiv.org/abs/1511.07275 4 | """ 5 | from gym.envs.algorithmic import algorithmic_env 6 | 7 | 8 | class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1 10 | 11 | def __init__(self, base=5): 12 | super(RepeatCopyEnv, self).__init__(base=base, chars=True) 13 | self.last = 50 14 | 15 | def target_from_input_data(self, input_data): 16 | return input_data + list(reversed(input_data)) + input_data 17 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/reverse.py: -------------------------------------------------------------------------------- 1 | """ 2 | Task is to reverse content over the input tape. 3 | http://arxiv.org/abs/1511.07275 4 | """ 5 | from gym.envs.algorithmic import algorithmic_env 6 | 7 | 8 | class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv): 9 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1 10 | 11 | def __init__(self, base=2): 12 | super(ReverseEnv, self).__init__( 13 | base=base, chars=True, starting_min_length=1 14 | ) 15 | self.last = 50 16 | 17 | def target_from_input_data(self, input_str): 18 | return list(reversed(input_str)) 19 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/reversed_addition.py: -------------------------------------------------------------------------------- 1 | from gym.envs.algorithmic import algorithmic_env 2 | 3 | 4 | class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv): 5 | def __init__(self, rows=2, base=3): 6 | super(ReversedAdditionEnv, self).__init__( 7 | rows=rows, base=base, chars=False 8 | ) 9 | 10 | def target_from_input_data(self, input_strings): 11 | curry = 0 12 | target = [] 13 | for digits in input_strings: 14 | total = sum(digits) + curry 15 | target.append(total % self.base) 16 | curry = total // self.base 17 | 18 | if curry > 0: 19 | target.append(curry) 20 | return target 21 | 22 | @property 23 | def time_limit(self): 24 | # Quirk preserved for the sake of consistency: add the length of the 25 | # input rather than the length of the desired output (which may differ 26 | # if there's an extra carried digit). 27 | # TODO: It seems like this time limit is so strict as to make 28 | # Addition3-v0 unsolvable, since agents aren't even given enough time 29 | # steps to look at all the digits. (The solutions on the scoreboard 30 | # seem to only work by save-scumming.) 31 | return self.input_width*2 + 4 32 | -------------------------------------------------------------------------------- /gym/envs/algorithmic/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/algorithmic/tests/__init__.py -------------------------------------------------------------------------------- /gym/envs/atari/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.atari.atari_env import AtariEnv 2 | -------------------------------------------------------------------------------- /gym/envs/box2d/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | import Box2D 3 | from gym.envs.box2d.lunar_lander import LunarLander 4 | from gym.envs.box2d.lunar_lander import LunarLanderContinuous 5 | from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore 6 | from gym.envs.box2d.car_racing import CarRacing 7 | except ImportError: 8 | Box2D = None 9 | -------------------------------------------------------------------------------- /gym/envs/box2d/test_lunar_lander.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | try: 3 | import Box2D 4 | from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander 5 | except ImportError: 6 | Box2D = None 7 | 8 | 9 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed') 10 | def test_lunar_lander(): 11 | _test_lander(LunarLander(), seed=0) 12 | 13 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed') 14 | def test_lunar_lander_continuous(): 15 | _test_lander(LunarLanderContinuous(), seed=0) 16 | 17 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed') 18 | def _test_lander(env, seed=None, render=False): 19 | total_reward = demo_heuristic_lander(env, seed=seed, render=render) 20 | assert total_reward > 100 21 | 22 | 23 | -------------------------------------------------------------------------------- /gym/envs/classic_control/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.classic_control.cartpole import CartPoleEnv 2 | from gym.envs.classic_control.mountain_car import MountainCarEnv 3 | from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv 4 | from gym.envs.classic_control.pendulum import PendulumEnv 5 | from gym.envs.classic_control.acrobot import AcrobotEnv 6 | 7 | -------------------------------------------------------------------------------- /gym/envs/classic_control/assets/clockwise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/classic_control/assets/clockwise.png -------------------------------------------------------------------------------- /gym/envs/mujoco/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.mujoco.mujoco_env import MujocoEnv 2 | # ^^^^^ so that user gets the correct error 3 | # message if mujoco is not installed correctly 4 | from gym.envs.mujoco.ant import AntEnv 5 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv 6 | from gym.envs.mujoco.hopper import HopperEnv 7 | from gym.envs.mujoco.walker2d import Walker2dEnv 8 | from gym.envs.mujoco.humanoid import HumanoidEnv 9 | from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv 10 | from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv 11 | from gym.envs.mujoco.reacher import ReacherEnv 12 | from gym.envs.mujoco.reacher2d_her import ReacherHEREnv 13 | from gym.envs.mujoco.reacher2d_her_harder import ReacherHERHarderEnv 14 | from gym.envs.mujoco.swimmer import SwimmerEnv 15 | from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv 16 | from gym.envs.mujoco.pusher import PusherEnv 17 | from gym.envs.mujoco.pusher2d_her import PusherHEREnv 18 | from gym.envs.mujoco.thrower import ThrowerEnv 19 | from gym.envs.mujoco.striker import StrikerEnv 20 | -------------------------------------------------------------------------------- /gym/envs/mujoco/ant.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, a): 11 | xposbefore = self.get_body_com("torso")[0] 12 | self.do_simulation(a, self.frame_skip) 13 | xposafter = self.get_body_com("torso")[0] 14 | forward_reward = (xposafter - xposbefore)/self.dt 15 | ctrl_cost = .5 * np.square(a).sum() 16 | contact_cost = 0.5 * 1e-3 * np.sum( 17 | np.square(np.clip(self.sim.data.cfrc_ext, -1, 1))) 18 | survive_reward = 1.0 19 | reward = forward_reward - ctrl_cost - contact_cost + survive_reward 20 | state = self.state_vector() 21 | notdone = np.isfinite(state).all() \ 22 | and state[2] >= 0.2 and state[2] <= 1.0 23 | done = not notdone 24 | ob = self._get_obs() 25 | return ob, reward, done, dict( 26 | reward_forward=forward_reward, 27 | reward_ctrl=-ctrl_cost, 28 | reward_contact=-contact_cost, 29 | reward_survive=survive_reward) 30 | 31 | def _get_obs(self): 32 | return np.concatenate([ 33 | self.sim.data.qpos.flat[2:], 34 | self.sim.data.qvel.flat, 35 | np.clip(self.sim.data.cfrc_ext, -1, 1).flat, 36 | ]) 37 | 38 | def reset_model(self): 39 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1) 40 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1 41 | self.set_state(qpos, qvel) 42 | return self._get_obs() 43 | 44 | def viewer_setup(self): 45 | self.viewer.cam.distance = self.model.stat.extent * 0.5 46 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/inverted_double_pendulum.xml: -------------------------------------------------------------------------------- 1 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/inverted_pendulum.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /gym/envs/mujoco/assets/point.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 32 | -------------------------------------------------------------------------------- /gym/envs/mujoco/half_cheetah.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, action): 11 | xposbefore = self.sim.data.qpos[0] 12 | self.do_simulation(action, self.frame_skip) 13 | xposafter = self.sim.data.qpos[0] 14 | ob = self._get_obs() 15 | reward_ctrl = - 0.1 * np.square(action).sum() 16 | reward_run = (xposafter - xposbefore)/self.dt 17 | reward = reward_ctrl + reward_run 18 | done = False 19 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl) 20 | 21 | def _get_obs(self): 22 | return np.concatenate([ 23 | self.sim.data.qpos.flat[1:], 24 | self.sim.data.qvel.flat, 25 | ]) 26 | 27 | def reset_model(self): 28 | qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq) 29 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1 30 | self.set_state(qpos, qvel) 31 | return self._get_obs() 32 | 33 | def viewer_setup(self): 34 | self.viewer.cam.distance = self.model.stat.extent * 0.5 35 | -------------------------------------------------------------------------------- /gym/envs/mujoco/hopper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, a): 11 | posbefore = self.sim.data.qpos[0] 12 | self.do_simulation(a, self.frame_skip) 13 | posafter, height, ang = self.sim.data.qpos[0:3] 14 | alive_bonus = 1.0 15 | reward = (posafter - posbefore) / self.dt 16 | reward += alive_bonus 17 | reward -= 1e-3 * np.square(a).sum() 18 | s = self.state_vector() 19 | done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and 20 | (height > .7) and (abs(ang) < .2)) 21 | ob = self._get_obs() 22 | return ob, reward, done, {} 23 | 24 | def _get_obs(self): 25 | return np.concatenate([ 26 | self.sim.data.qpos.flat[1:], 27 | np.clip(self.sim.data.qvel.flat, -10, 10) 28 | ]) 29 | 30 | def reset_model(self): 31 | qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq) 32 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) 33 | self.set_state(qpos, qvel) 34 | return self._get_obs() 35 | 36 | def viewer_setup(self): 37 | self.viewer.cam.trackbodyid = 2 38 | self.viewer.cam.distance = self.model.stat.extent * 0.75 39 | self.viewer.cam.lookat[2] = 1.15 40 | self.viewer.cam.elevation = -20 41 | -------------------------------------------------------------------------------- /gym/envs/mujoco/humanoidstandup.py: -------------------------------------------------------------------------------- 1 | from gym.envs.mujoco import mujoco_env 2 | from gym import utils 3 | import numpy as np 4 | 5 | class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5) 8 | utils.EzPickle.__init__(self) 9 | 10 | def _get_obs(self): 11 | data = self.sim.data 12 | return np.concatenate([data.qpos.flat[2:], 13 | data.qvel.flat, 14 | data.cinert.flat, 15 | data.cvel.flat, 16 | data.qfrc_actuator.flat, 17 | data.cfrc_ext.flat]) 18 | 19 | def step(self, a): 20 | self.do_simulation(a, self.frame_skip) 21 | pos_after = self.sim.data.qpos[2] 22 | data = self.sim.data 23 | uph_cost = (pos_after - 0) / self.model.opt.timestep 24 | 25 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum() 26 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum() 27 | quad_impact_cost = min(quad_impact_cost, 10) 28 | reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1 29 | 30 | done = bool(False) 31 | return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost) 32 | 33 | def reset_model(self): 34 | c = 0.01 35 | self.set_state( 36 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq), 37 | self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,) 38 | ) 39 | return self._get_obs() 40 | 41 | def viewer_setup(self): 42 | self.viewer.cam.trackbodyid = 1 43 | self.viewer.cam.distance = self.model.stat.extent * 1.0 44 | self.viewer.cam.lookat[2] = 0.8925 45 | self.viewer.cam.elevation = -20 46 | -------------------------------------------------------------------------------- /gym/envs/mujoco/inverted_double_pendulum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5) 9 | utils.EzPickle.__init__(self) 10 | 11 | def step(self, action): 12 | self.do_simulation(action, self.frame_skip) 13 | ob = self._get_obs() 14 | x, _, y = self.sim.data.site_xpos[0] 15 | dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2 16 | v1, v2 = self.sim.data.qvel[1:3] 17 | vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2 18 | alive_bonus = 10 19 | r = alive_bonus - dist_penalty - vel_penalty 20 | done = bool(y <= 1) 21 | return ob, r, done, {} 22 | 23 | def _get_obs(self): 24 | return np.concatenate([ 25 | self.sim.data.qpos[:1], # cart x pos 26 | np.sin(self.sim.data.qpos[1:]), # link angles 27 | np.cos(self.sim.data.qpos[1:]), 28 | np.clip(self.sim.data.qvel, -10, 10), 29 | np.clip(self.sim.data.qfrc_constraint, -10, 10) 30 | ]).ravel() 31 | 32 | def reset_model(self): 33 | self.set_state( 34 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq), 35 | self.init_qvel + self.np_random.randn(self.model.nv) * .1 36 | ) 37 | return self._get_obs() 38 | 39 | def viewer_setup(self): 40 | v = self.viewer 41 | v.cam.trackbodyid = 0 42 | v.cam.distance = self.model.stat.extent * 0.5 43 | v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2] 44 | -------------------------------------------------------------------------------- /gym/envs/mujoco/inverted_pendulum.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | utils.EzPickle.__init__(self) 8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2) 9 | 10 | def step(self, a): 11 | reward = 1.0 12 | self.do_simulation(a, self.frame_skip) 13 | ob = self._get_obs() 14 | notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2) 15 | done = not notdone 16 | return ob, reward, done, {} 17 | 18 | def reset_model(self): 19 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01) 20 | qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01) 21 | self.set_state(qpos, qvel) 22 | return self._get_obs() 23 | 24 | def _get_obs(self): 25 | return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel() 26 | 27 | def viewer_setup(self): 28 | v = self.viewer 29 | v.cam.trackbodyid = 0 30 | v.cam.distance = self.model.stat.extent 31 | -------------------------------------------------------------------------------- /gym/envs/mujoco/pusher.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | import mujoco_py 6 | 7 | 8 | class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle): 9 | def __init__(self): 10 | utils.EzPickle.__init__(self) 11 | mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', 5) 12 | 13 | def step(self, a): 14 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm") 15 | vec_2 = self.get_body_com("object") - self.get_body_com("goal") 16 | 17 | reward_near = - np.linalg.norm(vec_1) 18 | reward_dist = - np.linalg.norm(vec_2) 19 | reward_ctrl = - np.square(a).sum() 20 | reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near 21 | 22 | self.do_simulation(a, self.frame_skip) 23 | ob = self._get_obs() 24 | done = False 25 | return ob, reward, done, dict(reward_dist=reward_dist, 26 | reward_ctrl=reward_ctrl) 27 | 28 | def viewer_setup(self): 29 | self.viewer.cam.trackbodyid = -1 30 | self.viewer.cam.distance = 4.0 31 | 32 | def reset_model(self): 33 | qpos = self.init_qpos 34 | 35 | self.goal_pos = np.asarray([0, 0]) 36 | while True: 37 | self.cylinder_pos = np.concatenate([ 38 | self.np_random.uniform(low=-0.3, high=0, size=1), 39 | self.np_random.uniform(low=-0.2, high=0.2, size=1)]) 40 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17: 41 | break 42 | 43 | qpos[-4:-2] = self.cylinder_pos 44 | qpos[-2:] = self.goal_pos 45 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005, 46 | high=0.005, size=self.model.nv) 47 | qvel[-4:] = 0 48 | self.set_state(qpos, qvel) 49 | return self._get_obs() 50 | 51 | def _get_obs(self): 52 | return np.concatenate([ 53 | self.sim.data.qpos.flat[:7], 54 | self.sim.data.qvel.flat[:7], 55 | self.get_body_com("tips_arm"), 56 | self.get_body_com("object"), 57 | self.get_body_com("goal"), 58 | ]) 59 | -------------------------------------------------------------------------------- /gym/envs/mujoco/reacher.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | utils.EzPickle.__init__(self) 8 | mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2) 9 | 10 | def step(self, a): 11 | vec = self.get_body_com("fingertip")-self.get_body_com("target") 12 | reward_dist = - np.linalg.norm(vec) 13 | reward_ctrl = - np.square(a).sum() 14 | reward = reward_dist + reward_ctrl 15 | self.do_simulation(a, self.frame_skip) 16 | ob = self._get_obs() 17 | done = False 18 | return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl) 19 | 20 | def viewer_setup(self): 21 | self.viewer.cam.trackbodyid = 0 22 | 23 | def reset_model(self): 24 | qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos 25 | while True: 26 | self.goal = self.np_random.uniform(low=-.2, high=.2, size=2) 27 | if np.linalg.norm(self.goal) < 0.2: 28 | break 29 | qpos[-2:] = self.goal 30 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) 31 | qvel[-2:] = 0 32 | self.set_state(qpos, qvel) 33 | return self._get_obs() 34 | 35 | def _get_obs(self): 36 | theta = self.sim.data.qpos.flat[:2] 37 | return np.concatenate([ 38 | np.cos(theta), 39 | np.sin(theta), 40 | self.sim.data.qpos.flat[2:], 41 | self.sim.data.qvel.flat[:2], 42 | self.get_body_com("fingertip") - self.get_body_com("target") 43 | ]) 44 | -------------------------------------------------------------------------------- /gym/envs/mujoco/swimmer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, a): 11 | ctrl_cost_coeff = 0.0001 12 | xposbefore = self.sim.data.qpos[0] 13 | self.do_simulation(a, self.frame_skip) 14 | xposafter = self.sim.data.qpos[0] 15 | reward_fwd = (xposafter - xposbefore) / self.dt 16 | reward_ctrl = - ctrl_cost_coeff * np.square(a).sum() 17 | reward = reward_fwd + reward_ctrl 18 | ob = self._get_obs() 19 | return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl) 20 | 21 | def _get_obs(self): 22 | qpos = self.sim.data.qpos 23 | qvel = self.sim.data.qvel 24 | return np.concatenate([qpos.flat[2:], qvel.flat]) 25 | 26 | def reset_model(self): 27 | self.set_state( 28 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq), 29 | self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv) 30 | ) 31 | return self._get_obs() 32 | -------------------------------------------------------------------------------- /gym/envs/mujoco/walker2d.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | 7 | def __init__(self): 8 | mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4) 9 | utils.EzPickle.__init__(self) 10 | 11 | def step(self, a): 12 | posbefore = self.sim.data.qpos[0] 13 | self.do_simulation(a, self.frame_skip) 14 | posafter, height, ang = self.sim.data.qpos[0:3] 15 | alive_bonus = 1.0 16 | reward = ((posafter - posbefore) / self.dt) 17 | reward += alive_bonus 18 | reward -= 1e-3 * np.square(a).sum() 19 | done = not (height > 0.8 and height < 2.0 and 20 | ang > -1.0 and ang < 1.0) 21 | ob = self._get_obs() 22 | return ob, reward, done, {} 23 | 24 | def _get_obs(self): 25 | qpos = self.sim.data.qpos 26 | qvel = self.sim.data.qvel 27 | return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel() 28 | 29 | def reset_model(self): 30 | self.set_state( 31 | self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq), 32 | self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv) 33 | ) 34 | return self._get_obs() 35 | 36 | def viewer_setup(self): 37 | self.viewer.cam.trackbodyid = 2 38 | self.viewer.cam.distance = self.model.stat.extent * 0.5 39 | self.viewer.cam.lookat[2] = 1.15 40 | self.viewer.cam.elevation = -20 41 | -------------------------------------------------------------------------------- /gym/envs/robotics/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.robotics.fetch_env import FetchEnv 2 | from gym.envs.robotics.fetch.slide import FetchSlideEnv 3 | from gym.envs.robotics.fetch.pick_and_place import FetchPickAndPlaceEnv 4 | from gym.envs.robotics.fetch.push import FetchPushEnv 5 | 6 | from gym.envs.robotics.fetch.reach import FetchReachEnv 7 | 8 | from gym.envs.robotics.fetch.insert import FetchInsertEnv 9 | from gym.envs.robotics.fetch.drawer import FetchDrawerEnv 10 | from gym.envs.robotics.fetch.obs_push import FetchObsPushEnv 11 | 12 | from gym.envs.robotics.hand.reach import HandReachEnv 13 | from gym.envs.robotics.hand.manipulate import HandBlockEnv 14 | from gym.envs.robotics.hand.manipulate import HandEggEnv 15 | from gym.envs.robotics.hand.manipulate import HandPenEnv 16 | 17 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandBlockTouchSensorsEnv 18 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandEggTouchSensorsEnv 19 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandPenTouchSensorsEnv 20 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/drawer.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/drawer_box.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/drawer_dependencies.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/obs_push.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/occ_push.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/pick_and_place.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/push.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/reach.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/fetch/slide.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/reach.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/hand/shared_asset.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/base_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/drawer.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/drawer.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/drawercase.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/drawercase.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/drawerhandle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/drawerhandle.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/estop_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/estop_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/gripper_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/gripper_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/laser_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/laser_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/window_base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/window_base.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/window_frame.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/window_frame.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/window_h_base.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/window_h_base.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/window_h_frame.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/window_h_frame.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/windowa_frame.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowa_frame.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/windowa_glass.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowa_glass.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/windowa_h_frame.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowa_h_frame.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/windowa_h_glass.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowa_h_glass.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/windowb_frame.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowb_frame.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/windowb_glass.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowb_glass.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/windowb_h_frame.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowb_h_frame.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/windowb_h_glass.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowb_h_glass.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/F1.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/F1.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/F2.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/F2.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/F3.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/F3.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/TH1_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/TH1_z.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/TH2_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/TH2_z.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/TH3_z.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/TH3_z.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/forearm_electric.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/forearm_electric.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/knuckle.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/knuckle.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/palm.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/palm.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/stls/hand/wrist.stl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/wrist.stl -------------------------------------------------------------------------------- /gym/envs/robotics/assets/textures/block.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/textures/block.png -------------------------------------------------------------------------------- /gym/envs/robotics/assets/textures/block_hidden.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/textures/block_hidden.png -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/fetch/__init__.py -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/dpush.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics.fetch_double_push_env import FetchEnv 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'double_push.xml') 8 | 9 | 10 | class FetchDoublePushEnv(FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, 20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.1, target_range=0.1, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/drawer.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_drawer_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'drawer.xml') 8 | 9 | 10 | class FetchDrawerEnv(fetch_drawer_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.15, 16 | # 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | fetch_drawer_env.FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=False, block_gripper=False, n_substeps=20, 20 | gripper_extra_height=0.15, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.0, target_range=0.0, distance_threshold=0.02, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/drawer_box.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_drawer_box_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'drawer_box.xml') 8 | 9 | 10 | class FetchDrawerBoxEnv(fetch_drawer_box_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.15, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | fetch_drawer_box_env.FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20, 20 | gripper_extra_height=0.15, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.0, target_range=0.0, distance_threshold=0.02, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/insert.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_insert_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'insert_rand.xml') 8 | 9 | 10 | class FetchInsertEnv(fetch_insert_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | # 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | fetch_insert_env.FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20, 20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.0, target_range=0.0, distance_threshold=0.01, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/insert_rand.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_insert_rand_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'insert_rand.xml') 8 | 9 | 10 | class FetchInsertRandEnv(fetch_insert_rand_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.15, 16 | # 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | fetch_insert_rand_env.FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20, 20 | gripper_extra_height=0.15, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.0, target_range=0.0, distance_threshold=0.02, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/obs_push.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics.fetch_obs_push_env import FetchEnv 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'obs_push.xml') 8 | 9 | 10 | class FetchObsPushEnv(FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, 20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/occ_push.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics.fetch_occ_push_env import FetchEnv 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'occ_push.xml') 8 | 9 | 10 | class FetchPushEnv(FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, 20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/pick_and_place.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'pick_and_place.xml') 8 | 9 | 10 | class FetchPickAndPlaceEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | fetch_env.FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20, 20 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0, 21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/push.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'push.xml') 8 | 9 | 10 | class FetchPushEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | fetch_env.FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, 20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/reach.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics import fetch_env 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'reach.xml') 8 | 9 | 10 | class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.4049, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | } 17 | fetch_env.FetchEnv.__init__( 18 | self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20, 19 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0, 20 | obj_range=0.15, target_range=0.15, distance_threshold=0.05, 21 | initial_qpos=initial_qpos, reward_type=reward_type) 22 | utils.EzPickle.__init__(self) 23 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/slide.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | 4 | from gym import utils 5 | from gym.envs.robotics import fetch_env 6 | 7 | 8 | # Ensure we get the path separator correct on windows 9 | MODEL_XML_PATH = os.path.join('fetch', 'slide.xml') 10 | 11 | 12 | class FetchSlideEnv(fetch_env.FetchEnv, utils.EzPickle): 13 | def __init__(self, reward_type='sparse'): 14 | initial_qpos = { 15 | 'robot0:slide0': 0.05, 16 | 'robot0:slide1': 0.48, 17 | 'robot0:slide2': 0.0, 18 | 'object0:joint': [1.7, 1.1, 0.41, 1., 0., 0., 0.], 19 | } 20 | fetch_env.FetchEnv.__init__( 21 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, 22 | gripper_extra_height=-0.02, target_in_the_air=False, target_offset=np.array([0.4, 0.0, 0.0]), 23 | obj_range=0.1, target_range=0.3, distance_threshold=0.05, 24 | initial_qpos=initial_qpos, reward_type=reward_type) 25 | utils.EzPickle.__init__(self) 26 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/stack.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics.fetch_stack_env import FetchEnv 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'stack.xml') 8 | 9 | 10 | class FetchStackEnv(FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20, 20 | gripper_extra_height=0.2, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.1, target_range=0.1, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/tpush.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics.fetch_three_push_env import FetchEnv 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'three_push.xml') 8 | 9 | 10 | class FetchThreePushEnv(FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20, 20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.1, target_range=0.1, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/fetch/tstack.py: -------------------------------------------------------------------------------- 1 | import os 2 | from gym import utils 3 | from gym.envs.robotics.fetch_three_stack_env import FetchEnv 4 | 5 | 6 | # Ensure we get the path separator correct on windows 7 | MODEL_XML_PATH = os.path.join('fetch', 'three_stack.xml') 8 | 9 | 10 | class FetchThreeStackEnv(FetchEnv, utils.EzPickle): 11 | def __init__(self, reward_type='sparse'): 12 | initial_qpos = { 13 | 'robot0:slide0': 0.405, 14 | 'robot0:slide1': 0.48, 15 | 'robot0:slide2': 0.0, 16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.], 17 | } 18 | FetchEnv.__init__( 19 | self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20, 20 | gripper_extra_height=0.2, target_in_the_air=False, target_offset=0.0, 21 | obj_range=0.1, target_range=0.1, distance_threshold=0.05, 22 | initial_qpos=initial_qpos, reward_type=reward_type) 23 | utils.EzPickle.__init__(self) 24 | -------------------------------------------------------------------------------- /gym/envs/robotics/hand/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/hand/__init__.py -------------------------------------------------------------------------------- /gym/envs/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/tests/__init__.py -------------------------------------------------------------------------------- /gym/envs/tests/spec_list.py: -------------------------------------------------------------------------------- 1 | from gym import envs, logger 2 | import os 3 | 4 | 5 | SKIP_MUJOCO_WARNING_MESSAGE = ( 6 | "Cannot run mujoco test (either license key not found or mujoco not" 7 | "installed properly).") 8 | 9 | 10 | skip_mujoco = not (os.environ.get('MUJOCO_KEY')) 11 | if not skip_mujoco: 12 | try: 13 | import mujoco_py 14 | except ImportError: 15 | skip_mujoco = True 16 | 17 | def should_skip_env_spec_for_tests(spec): 18 | # We skip tests for envs that require dependencies or are otherwise 19 | # troublesome to run frequently 20 | ep = spec.entry_point 21 | # Skip mujoco tests for pull request CI 22 | if skip_mujoco and (ep.startswith('gym.envs.mujoco') or ep.startswith('gym.envs.robotics:')): 23 | return True 24 | try: 25 | import atari_py 26 | except ImportError: 27 | if ep.startswith('gym.envs.atari'): 28 | return True 29 | try: 30 | import Box2D 31 | except ImportError: 32 | if ep.startswith('gym.envs.box2d'): 33 | return True 34 | 35 | if ( 'GoEnv' in ep or 36 | 'HexEnv' in ep or 37 | (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest")) 38 | ): 39 | logger.warn("Skipping tests for env {}".format(ep)) 40 | return True 41 | return False 42 | 43 | spec_list = [spec for spec in sorted(envs.registry.all(), key=lambda x: x.id) if spec.entry_point is not None and not should_skip_env_spec_for_tests(spec)] 44 | -------------------------------------------------------------------------------- /gym/envs/tests/test_frozenlake_dfs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from gym.envs.toy_text.frozen_lake import generate_random_map 5 | 6 | # Test that FrozenLake map generation creates valid maps of various sizes. 7 | def test_frozenlake_dfs_map_generation(): 8 | 9 | def frozenlake_dfs_path_exists(res): 10 | frontier, discovered = [], set() 11 | frontier.append((0,0)) 12 | while frontier: 13 | r, c = frontier.pop() 14 | if not (r,c) in discovered: 15 | discovered.add((r,c)) 16 | directions = [(1, 0), (0, 1), (-1, 0), (0, -1)] 17 | for x, y in directions: 18 | r_new = r + x 19 | c_new = c + y 20 | if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size: 21 | continue 22 | if res[r_new][c_new] == 'G': 23 | return True 24 | if (res[r_new][c_new] not in '#H'): 25 | frontier.append((r_new, c_new)) 26 | return False 27 | 28 | map_sizes = [5, 10, 200] 29 | for size in map_sizes: 30 | new_frozenlake = generate_random_map(size) 31 | assert len(new_frozenlake) == size 32 | assert len(new_frozenlake[0]) == size 33 | assert frozenlake_dfs_path_exists(new_frozenlake) 34 | -------------------------------------------------------------------------------- /gym/envs/tests/test_kellycoinflip.py: -------------------------------------------------------------------------------- 1 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv 2 | 3 | 4 | class TestKellyCoinflipEnv: 5 | @staticmethod 6 | def test_done_when_reaches_max_wealth(): 7 | # https://github.com/openai/gym/issues/1266 8 | env = KellyCoinflipEnv() 9 | env.seed(1) 10 | env.reset() 11 | done = False 12 | 13 | while not done: 14 | action = int(env.wealth * 20) # bet 20% of the wealth 15 | observation, reward, done, info = env.step(action) 16 | 17 | assert env.wealth == env.max_wealth 18 | -------------------------------------------------------------------------------- /gym/envs/toy_text/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.toy_text.blackjack import BlackjackEnv 2 | from gym.envs.toy_text.roulette import RouletteEnv 3 | from gym.envs.toy_text.frozen_lake import FrozenLakeEnv 4 | from gym.envs.toy_text.nchain import NChainEnv 5 | from gym.envs.toy_text.hotter_colder import HotterColder 6 | from gym.envs.toy_text.guessing_game import GuessingGame 7 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv 8 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipGeneralizedEnv 9 | from gym.envs.toy_text.cliffwalking import CliffWalkingEnv 10 | from gym.envs.toy_text.taxi import TaxiEnv 11 | from gym.envs.toy_text.guessing_game import GuessingGame 12 | from gym.envs.toy_text.hotter_colder import HotterColder 13 | -------------------------------------------------------------------------------- /gym/envs/toy_text/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym import Env, spaces 4 | from gym.utils import seeding 5 | 6 | 7 | def categorical_sample(prob_n, np_random): 8 | """ 9 | Sample from categorical distribution 10 | Each row specifies class probabilities 11 | """ 12 | prob_n = np.asarray(prob_n) 13 | csprob_n = np.cumsum(prob_n) 14 | return (csprob_n > np_random.rand()).argmax() 15 | 16 | 17 | class DiscreteEnv(Env): 18 | 19 | """ 20 | Has the following members 21 | - nS: number of states 22 | - nA: number of actions 23 | - P: transitions (*) 24 | - isd: initial state distribution (**) 25 | 26 | (*) dictionary of lists, where 27 | P[s][a] == [(probability, nextstate, reward, done), ...] 28 | (**) list or array of length nS 29 | 30 | 31 | """ 32 | def __init__(self, nS, nA, P, isd): 33 | self.P = P 34 | self.isd = isd 35 | self.lastaction = None # for rendering 36 | self.nS = nS 37 | self.nA = nA 38 | 39 | self.action_space = spaces.Discrete(self.nA) 40 | self.observation_space = spaces.Discrete(self.nS) 41 | 42 | self.seed() 43 | self.s = categorical_sample(self.isd, self.np_random) 44 | 45 | def seed(self, seed=None): 46 | self.np_random, seed = seeding.np_random(seed) 47 | return [seed] 48 | 49 | def reset(self): 50 | self.s = categorical_sample(self.isd, self.np_random) 51 | self.lastaction = None 52 | return int(self.s) 53 | 54 | def step(self, a): 55 | transitions = self.P[self.s][a] 56 | i = categorical_sample([t[0] for t in transitions], self.np_random) 57 | p, s, r, d = transitions[i] 58 | self.s = s 59 | self.lastaction = a 60 | return (int(s), r, d, {"prob": p}) 61 | -------------------------------------------------------------------------------- /gym/envs/toy_text/roulette.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | 5 | 6 | class RouletteEnv(gym.Env): 7 | """Simple roulette environment 8 | 9 | The roulette wheel has 37 spots. If the bet is 0 and a 0 comes up, 10 | you win a reward of 35. If the parity of your bet matches the parity 11 | of the spin, you win 1. Otherwise you receive a reward of -1. 12 | 13 | The long run reward for playing 0 should be -1/37 for any state 14 | 15 | The last action (38) stops the rollout for a return of 0 (walking away) 16 | """ 17 | def __init__(self, spots=37): 18 | self.n = spots + 1 19 | self.action_space = spaces.Discrete(self.n) 20 | self.observation_space = spaces.Discrete(1) 21 | self.seed() 22 | 23 | def seed(self, seed=None): 24 | self.np_random, seed = seeding.np_random(seed) 25 | return [seed] 26 | 27 | def step(self, action): 28 | assert self.action_space.contains(action) 29 | if action == self.n - 1: 30 | # observation, reward, done, info 31 | return 0, 0, True, {} 32 | 33 | # N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B] 34 | val = self.np_random.randint(0, self.n - 1) 35 | if val == action == 0: 36 | reward = self.n - 2.0 37 | elif val != 0 and action != 0 and val % 2 == action % 2: 38 | reward = 1.0 39 | else: 40 | reward = -1.0 41 | return 0, reward, False, {} 42 | 43 | def reset(self): 44 | return 0 45 | -------------------------------------------------------------------------------- /gym/envs/unittest/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.unittest.cube_crash import CubeCrash 2 | from gym.envs.unittest.cube_crash import CubeCrashSparse 3 | from gym.envs.unittest.cube_crash import CubeCrashScreenBecomesBlack 4 | from gym.envs.unittest.memorize_digits import MemorizeDigits 5 | 6 | -------------------------------------------------------------------------------- /gym/logger.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | from gym.utils import colorize 4 | 5 | DEBUG = 10 6 | INFO = 20 7 | WARN = 30 8 | ERROR = 40 9 | DISABLED = 50 10 | 11 | MIN_LEVEL = 30 12 | 13 | def set_level(level): 14 | """ 15 | Set logging threshold on current logger. 16 | """ 17 | global MIN_LEVEL 18 | MIN_LEVEL = level 19 | 20 | def debug(msg, *args): 21 | if MIN_LEVEL <= DEBUG: 22 | print('%s: %s'%('DEBUG', msg % args)) 23 | 24 | def info(msg, *args): 25 | if MIN_LEVEL <= INFO: 26 | print('%s: %s'%('INFO', msg % args)) 27 | 28 | def warn(msg, *args): 29 | if MIN_LEVEL <= WARN: 30 | warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow')) 31 | 32 | def error(msg, *args): 33 | if MIN_LEVEL <= ERROR: 34 | print(colorize('%s: %s'%('ERROR', msg % args), 'red')) 35 | 36 | # DEPRECATED: 37 | setLevel = set_level 38 | -------------------------------------------------------------------------------- /gym/mjkey.txt: -------------------------------------------------------------------------------- 1 | MuJoCo Pro Individual license activation key, number 7777, type 6. 2 | 3 | Issued to Everyone. 4 | 5 | Expires October 18, 2031. 6 | 7 | Do not modify this file. Its entire content, including the 8 | plain text section, is used by the activation manager. 9 | 10 | 9aaedeefb37011a8a52361c736643665c7f60e796ff8ff70bb3f7a1d78e9a605 11 | 0453a3c853e4aa416e712d7e80cf799c6314ee5480ec6bd0f1ab51d1bb3c768f 12 | 8c06e7e572f411ecb25c3d6ef82cc20b00f672db88e6001b3dfdd3ab79e6c480 13 | 185d681811cfdaff640fb63295e391b05374edba90dd54cc1e162a9d99b82a8b 14 | ea3e87f2c67d08006c53daac2e563269cdb286838b168a2071c48c29fedfbea2 15 | 5effe96fe3cb05e85fb8af2d3851f385618ef8cdac42876831f095e052bd18c9 16 | 5dce57ff9c83670aad77e5a1f41444bec45e30e4e827f7bf9799b29f2c934e23 17 | dcf6d3c3ee9c8dd2ed057317100cd21b4abbbf652d02bf72c3d322e0c55dcc24 18 | -------------------------------------------------------------------------------- /gym/obstacle_push.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/obstacle_push.png -------------------------------------------------------------------------------- /gym/spaces/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.spaces.space import Space 2 | from gym.spaces.box import Box 3 | from gym.spaces.discrete import Discrete 4 | from gym.spaces.multi_discrete import MultiDiscrete 5 | from gym.spaces.multi_binary import MultiBinary 6 | from gym.spaces.tuple import Tuple 7 | from gym.spaces.dict import Dict 8 | 9 | from gym.spaces.utils import flatdim 10 | from gym.spaces.utils import flatten_space 11 | from gym.spaces.utils import flatten 12 | from gym.spaces.utils import unflatten 13 | 14 | __all__ = ["Space", "Box", "Discrete", "MultiDiscrete", "MultiBinary", "Tuple", "Dict", "flatdim", "flatten_space", "flatten", "unflatten"] 15 | -------------------------------------------------------------------------------- /gym/spaces/discrete.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class Discrete(Space): 6 | r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`. 7 | 8 | Example:: 9 | 10 | >>> Discrete(2) 11 | 12 | """ 13 | def __init__(self, n): 14 | assert n >= 0 15 | self.n = n 16 | super(Discrete, self).__init__((), np.int64) 17 | 18 | def sample(self): 19 | return self.np_random.randint(self.n) 20 | 21 | def contains(self, x): 22 | if isinstance(x, int): 23 | as_int = x 24 | elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()): 25 | as_int = int(x) 26 | else: 27 | return False 28 | return as_int >= 0 and as_int < self.n 29 | 30 | def __repr__(self): 31 | return "Discrete(%d)" % self.n 32 | 33 | def __eq__(self, other): 34 | return isinstance(other, Discrete) and self.n == other.n 35 | -------------------------------------------------------------------------------- /gym/spaces/multi_binary.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class MultiBinary(Space): 6 | ''' 7 | An n-shape binary space. 8 | 9 | The argument to MultiBinary defines n, which could be a number or a `list` of numbers. 10 | 11 | Example Usage: 12 | 13 | >> self.observation_space = spaces.MultiBinary(5) 14 | 15 | >> self.observation_space.sample() 16 | 17 | array([0,1,0,1,0], dtype =int8) 18 | 19 | >> self.observation_space = spaces.MultiBinary([3,2]) 20 | 21 | >> self.observation_space.sample() 22 | 23 | array([[0, 0], 24 | [0, 1], 25 | [1, 1]], dtype=int8) 26 | 27 | ''' 28 | def __init__(self, n): 29 | self.n = n 30 | if type(n) in [tuple, list, np.ndarray]: 31 | input_n = n 32 | else: 33 | input_n = (n, ) 34 | super(MultiBinary, self).__init__(input_n, np.int8) 35 | 36 | def sample(self): 37 | return self.np_random.randint(low=0, high=2, size=self.n, dtype=self.dtype) 38 | 39 | def contains(self, x): 40 | if isinstance(x, list) or isinstance(x, tuple): 41 | x = np.array(x) # Promote list to array for contains check 42 | if self.shape != x.shape: 43 | return False 44 | return ((x==0) | (x==1)).all() 45 | 46 | def to_jsonable(self, sample_n): 47 | return np.array(sample_n).tolist() 48 | 49 | def from_jsonable(self, sample_n): 50 | return [np.asarray(sample) for sample in sample_n] 51 | 52 | def __repr__(self): 53 | return "MultiBinary({})".format(self.n) 54 | 55 | def __eq__(self, other): 56 | return isinstance(other, MultiBinary) and self.n == other.n 57 | -------------------------------------------------------------------------------- /gym/spaces/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/spaces/tests/__init__.py -------------------------------------------------------------------------------- /gym/spaces/tuple.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .space import Space 3 | 4 | 5 | class Tuple(Space): 6 | """ 7 | A tuple (i.e., product) of simpler spaces 8 | 9 | Example usage: 10 | self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3))) 11 | """ 12 | def __init__(self, spaces): 13 | self.spaces = spaces 14 | for space in spaces: 15 | assert isinstance(space, Space), "Elements of the tuple must be instances of gym.Space" 16 | super(Tuple, self).__init__(None, None) 17 | 18 | def seed(self, seed=None): 19 | [space.seed(seed) for space in self.spaces] 20 | 21 | def sample(self): 22 | return tuple([space.sample() for space in self.spaces]) 23 | 24 | def contains(self, x): 25 | if isinstance(x, list): 26 | x = tuple(x) # Promote list to tuple for contains check 27 | return isinstance(x, tuple) and len(x) == len(self.spaces) and all( 28 | space.contains(part) for (space,part) in zip(self.spaces,x)) 29 | 30 | def __repr__(self): 31 | return "Tuple(" + ", ". join([str(s) for s in self.spaces]) + ")" 32 | 33 | def to_jsonable(self, sample_n): 34 | # serialize as list-repr of tuple of vectors 35 | return [space.to_jsonable([sample[i] for sample in sample_n]) \ 36 | for i, space in enumerate(self.spaces)] 37 | 38 | def from_jsonable(self, sample_n): 39 | return [sample for sample in zip(*[space.from_jsonable(sample_n[i]) for i, space in enumerate(self.spaces)])] 40 | 41 | def __getitem__(self, index): 42 | return self.spaces[index] 43 | 44 | def __len__(self): 45 | return len(self.spaces) 46 | 47 | def __eq__(self, other): 48 | return isinstance(other, Tuple) and self.spaces == other.spaces 49 | -------------------------------------------------------------------------------- /gym/stack_demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import gym 4 | from gym.envs.robotics.fetch.stack import FetchStackEnv 5 | env = FetchStackEnv() 6 | 7 | 8 | def p_control(env, obs, p_rate=0.2): 9 | a = env.action_space.sample() 10 | gg = obs['grip_goal'] 11 | ag = obs['achieved_goal'] 12 | error = ag - gg 13 | for axis, value in enumerate(error): 14 | if abs(value) > 0.02: 15 | if value > 0: 16 | a[axis] = p_rate 17 | else: 18 | a[axis] = -p_rate 19 | else: 20 | a[axis] = 0 21 | action = a 22 | return action 23 | 24 | 25 | for ep in range(20): 26 | obs = env.reset() 27 | for i in range(20): 28 | a = p_control(env, obs=obs) 29 | # a[-1] = 0.0 30 | # 31 | # a = env.action_space.sample() 32 | # a[0] = 0.01 33 | # if obs['grip_goal'][2] < 0.3: 34 | # pass 35 | # else: 36 | # a[1] = -0.2 37 | # a[2] = -0.2 38 | print("gg:", obs['grip_goal']) 39 | 40 | obs, reward, done, info = env.step(a) 41 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info)) 42 | env.render() 43 | -------------------------------------------------------------------------------- /gym/three_push_demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import gym 4 | import time 5 | from gym.envs.robotics.fetch.tpush import FetchThreePushEnv 6 | env = FetchThreePushEnv() 7 | 8 | 9 | def p_control(env, obs, p_rate=0.2): 10 | a = env.action_space.sample() 11 | gg = obs['grip_goal'] 12 | ag = obs['achieved_goal'] 13 | error = ag - gg 14 | for axis, value in enumerate(error): 15 | if abs(value) > 0.02: 16 | if value > 0: 17 | a[axis] = p_rate 18 | else: 19 | a[axis] = -p_rate 20 | else: 21 | a[axis] = 0 22 | action = a 23 | return action 24 | 25 | 26 | for ep in range(20): 27 | st = time.time() 28 | obs = env.reset() 29 | for i in range(50): 30 | # a = p_control(env, obs=obs) 31 | # a[2] = 0.0 32 | a = np.random.random(4) 33 | a[2] = 1.0 34 | 35 | print("gg:", obs['grip_goal']) 36 | 37 | obs, reward, done, info = env.step(a) 38 | print("obs:", obs) 39 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info)) 40 | env.render() 41 | # print('ep_time:', time.time() - st) 42 | -------------------------------------------------------------------------------- /gym/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """A set of common utilities used within the environments. These are 2 | not intended as API functions, and will not remain stable over time. 3 | """ 4 | 5 | # These submodules should not have any import-time dependencies. 6 | # We want this since we use `utils` during our import-time sanity checks 7 | # that verify that our dependencies are actually present. 8 | from .colorize import colorize 9 | from .ezpickle import EzPickle 10 | -------------------------------------------------------------------------------- /gym/utils/colorize.py: -------------------------------------------------------------------------------- 1 | """A set of common utilities used within the environments. These are 2 | not intended as API functions, and will not remain stable over time. 3 | """ 4 | 5 | color2num = dict( 6 | gray=30, 7 | red=31, 8 | green=32, 9 | yellow=33, 10 | blue=34, 11 | magenta=35, 12 | cyan=36, 13 | white=37, 14 | crimson=38 15 | ) 16 | 17 | 18 | def colorize(string, color, bold=False, highlight = False): 19 | """Return string surrounded by appropriate terminal color codes to 20 | print colorized text. Valid colors: gray, red, green, yellow, 21 | blue, magenta, cyan, white, crimson 22 | """ 23 | 24 | attr = [] 25 | num = color2num[color] 26 | if highlight: num += 10 27 | attr.append(str(num)) 28 | if bold: attr.append('1') 29 | attrs = ';'.join(attr) 30 | return '\x1b[%sm%s\x1b[0m' % (attrs, string) 31 | -------------------------------------------------------------------------------- /gym/utils/ezpickle.py: -------------------------------------------------------------------------------- 1 | class EzPickle(object): 2 | """Objects that are pickled and unpickled via their constructor 3 | arguments. 4 | 5 | Example usage: 6 | 7 | class Dog(Animal, EzPickle): 8 | def __init__(self, furcolor, tailkind="bushy"): 9 | Animal.__init__() 10 | EzPickle.__init__(furcolor, tailkind) 11 | ... 12 | 13 | When this object is unpickled, a new Dog will be constructed by passing the provided 14 | furcolor and tailkind into the constructor. However, philosophers are still not sure 15 | whether it is still the same dog. 16 | 17 | This is generally needed only for environments which wrap C/C++ code, such as MuJoCo 18 | and Atari. 19 | """ 20 | def __init__(self, *args, **kwargs): 21 | self._ezpickle_args = args 22 | self._ezpickle_kwargs = kwargs 23 | def __getstate__(self): 24 | return {"_ezpickle_args" : self._ezpickle_args, "_ezpickle_kwargs": self._ezpickle_kwargs} 25 | def __setstate__(self, d): 26 | out = type(self)(*d["_ezpickle_args"], **d["_ezpickle_kwargs"]) 27 | self.__dict__.update(out.__dict__) 28 | -------------------------------------------------------------------------------- /gym/utils/json_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def json_encode_np(obj): 4 | """ 5 | JSON can't serialize numpy types, so convert to pure python 6 | """ 7 | if isinstance(obj, np.ndarray): 8 | return list(obj) 9 | elif isinstance(obj, np.float32): 10 | return float(obj) 11 | elif isinstance(obj, np.float64): 12 | return float(obj) 13 | elif isinstance(obj, np.int8): 14 | return int(obj) 15 | elif isinstance(obj, np.int16): 16 | return int(obj) 17 | elif isinstance(obj, np.int32): 18 | return int(obj) 19 | elif isinstance(obj, np.int64): 20 | return int(obj) 21 | else: 22 | return obj 23 | -------------------------------------------------------------------------------- /gym/vector/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/vector/tests/__init__.py -------------------------------------------------------------------------------- /gym/vector/tests/test_vector_env_wrapper.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym.vector import make 3 | from gym.vector import VectorEnvWrapper 4 | 5 | class DummyWrapper(VectorEnvWrapper): 6 | def __init__(self, env): 7 | self.env = env 8 | self.counter = 0 9 | 10 | def reset_async(self): 11 | super().reset_async() 12 | self.counter += 1 13 | 14 | 15 | def test_vector_env_wrapper_inheritance(): 16 | env = make('FrozenLake-v0', asynchronous=False) 17 | wrapped = DummyWrapper(env) 18 | wrapped.reset() 19 | assert wrapped.counter == 1 20 | 21 | -------------------------------------------------------------------------------- /gym/vector/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.vector.utils.misc import CloudpickleWrapper, clear_mpi_env_vars 2 | from gym.vector.utils.numpy_utils import concatenate, create_empty_array 3 | from gym.vector.utils.shared_memory import create_shared_memory, read_from_shared_memory, write_to_shared_memory 4 | from gym.vector.utils.spaces import _BaseGymSpaces, batch_space 5 | 6 | __all__ = [ 7 | 'CloudpickleWrapper', 8 | 'clear_mpi_env_vars', 9 | 'concatenate', 10 | 'create_empty_array', 11 | 'create_shared_memory', 12 | 'read_from_shared_memory', 13 | 'write_to_shared_memory', 14 | '_BaseGymSpaces', 15 | 'batch_space' 16 | ] 17 | -------------------------------------------------------------------------------- /gym/vector/utils/misc.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import os 3 | 4 | __all__ = ['CloudpickleWrapper', 'clear_mpi_env_vars'] 5 | 6 | class CloudpickleWrapper(object): 7 | def __init__(self, fn): 8 | self.fn = fn 9 | 10 | def __getstate__(self): 11 | import cloudpickle 12 | return cloudpickle.dumps(self.fn) 13 | 14 | def __setstate__(self, ob): 15 | import pickle 16 | self.fn = pickle.loads(ob) 17 | 18 | def __call__(self): 19 | return self.fn() 20 | 21 | @contextlib.contextmanager 22 | def clear_mpi_env_vars(): 23 | """ 24 | `from mpi4py import MPI` will call `MPI_Init` by default. If the child 25 | process has MPI environment variables, MPI will think that the child process 26 | is an MPI process just like the parent and do bad things such as hang. 27 | 28 | This context manager is a hacky way to clear those environment variables 29 | temporarily such as when we are starting multiprocessing Processes. 30 | """ 31 | removed_environment = {} 32 | for k, v in list(os.environ.items()): 33 | for prefix in ['OMPI_', 'PMI_']: 34 | if k.startswith(prefix): 35 | removed_environment[k] = v 36 | del os.environ[k] 37 | try: 38 | yield 39 | finally: 40 | os.environ.update(removed_environment) 41 | -------------------------------------------------------------------------------- /gym/version.py: -------------------------------------------------------------------------------- 1 | VERSION = '0.18.0' 2 | -------------------------------------------------------------------------------- /gym/wrappers/__init__.py: -------------------------------------------------------------------------------- 1 | from gym import error 2 | from gym.wrappers.monitor import Monitor 3 | from gym.wrappers.time_limit import TimeLimit 4 | from gym.wrappers.filter_observation import FilterObservation 5 | from gym.wrappers.atari_preprocessing import AtariPreprocessing 6 | from gym.wrappers.time_aware_observation import TimeAwareObservation 7 | from gym.wrappers.rescale_action import RescaleAction 8 | from gym.wrappers.flatten_observation import FlattenObservation 9 | from gym.wrappers.gray_scale_observation import GrayScaleObservation 10 | from gym.wrappers.frame_stack import LazyFrames 11 | from gym.wrappers.frame_stack import FrameStack 12 | from gym.wrappers.transform_observation import TransformObservation 13 | from gym.wrappers.transform_reward import TransformReward 14 | from gym.wrappers.resize_observation import ResizeObservation 15 | from gym.wrappers.clip_action import ClipAction 16 | from gym.wrappers.record_episode_statistics import RecordEpisodeStatistics 17 | -------------------------------------------------------------------------------- /gym/wrappers/clip_action.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym import ActionWrapper 4 | from gym.spaces import Box 5 | 6 | 7 | class ClipAction(ActionWrapper): 8 | r"""Clip the continuous action within the valid bound. """ 9 | def __init__(self, env): 10 | assert isinstance(env.action_space, Box) 11 | super(ClipAction, self).__init__(env) 12 | 13 | def action(self, action): 14 | return np.clip(action, self.action_space.low, self.action_space.high) 15 | -------------------------------------------------------------------------------- /gym/wrappers/flatten_observation.py: -------------------------------------------------------------------------------- 1 | import gym.spaces as spaces 2 | from gym import ObservationWrapper 3 | 4 | 5 | class FlattenObservation(ObservationWrapper): 6 | r"""Observation wrapper that flattens the observation.""" 7 | def __init__(self, env): 8 | super(FlattenObservation, self).__init__(env) 9 | self.observation_space = spaces.flatten_space(env.observation_space) 10 | 11 | def observation(self, observation): 12 | return spaces.flatten(self.env.observation_space, observation) 13 | -------------------------------------------------------------------------------- /gym/wrappers/gray_scale_observation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym.spaces import Box 4 | from gym import ObservationWrapper 5 | 6 | 7 | class GrayScaleObservation(ObservationWrapper): 8 | r"""Convert the image observation from RGB to gray scale. """ 9 | def __init__(self, env, keep_dim=False): 10 | super(GrayScaleObservation, self).__init__(env) 11 | self.keep_dim = keep_dim 12 | 13 | assert len(env.observation_space.shape) == 3 and env.observation_space.shape[-1] == 3 14 | obs_shape = self.observation_space.shape[:2] 15 | if self.keep_dim: 16 | self.observation_space = Box(low=0, high=255, shape=(obs_shape[0], obs_shape[1], 1), dtype=np.uint8) 17 | else: 18 | self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8) 19 | 20 | def observation(self, observation): 21 | import cv2 22 | observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY) 23 | if self.keep_dim: 24 | observation = np.expand_dims(observation, -1) 25 | return observation 26 | -------------------------------------------------------------------------------- /gym/wrappers/monitoring/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/wrappers/monitoring/__init__.py -------------------------------------------------------------------------------- /gym/wrappers/monitoring/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/wrappers/monitoring/tests/__init__.py -------------------------------------------------------------------------------- /gym/wrappers/monitoring/tests/helpers.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import shutil 3 | import tempfile 4 | 5 | @contextlib.contextmanager 6 | def tempdir(): 7 | temp = tempfile.mkdtemp() 8 | yield temp 9 | shutil.rmtree(temp) 10 | -------------------------------------------------------------------------------- /gym/wrappers/monitoring/tests/test_video_recorder.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shutil 4 | import tempfile 5 | import numpy as np 6 | 7 | import gym 8 | from gym.wrappers.monitoring.video_recorder import VideoRecorder 9 | 10 | class BrokenRecordableEnv(object): 11 | metadata = {'render.modes': [None, 'rgb_array']} 12 | 13 | def render(self, mode=None): 14 | pass 15 | 16 | class UnrecordableEnv(object): 17 | metadata = {'render.modes': [None]} 18 | 19 | def render(self, mode=None): 20 | pass 21 | 22 | def test_record_simple(): 23 | env = gym.make("CartPole-v1") 24 | rec = VideoRecorder(env) 25 | env.reset() 26 | rec.capture_frame() 27 | rec.close() 28 | assert not rec.empty 29 | assert not rec.broken 30 | assert os.path.exists(rec.path) 31 | f = open(rec.path) 32 | assert os.fstat(f.fileno()).st_size > 100 33 | 34 | def test_no_frames(): 35 | env = BrokenRecordableEnv() 36 | rec = VideoRecorder(env) 37 | rec.close() 38 | assert rec.empty 39 | assert rec.functional 40 | assert not os.path.exists(rec.path) 41 | 42 | def test_record_unrecordable_method(): 43 | env = UnrecordableEnv() 44 | rec = VideoRecorder(env) 45 | assert not rec.enabled 46 | rec.close() 47 | 48 | def test_record_breaking_render_method(): 49 | env = BrokenRecordableEnv() 50 | rec = VideoRecorder(env) 51 | rec.capture_frame() 52 | rec.close() 53 | assert rec.empty 54 | assert rec.broken 55 | assert not os.path.exists(rec.path) 56 | 57 | def test_text_envs(): 58 | env = gym.make('FrozenLake-v0') 59 | video = VideoRecorder(env) 60 | try: 61 | env.reset() 62 | video.capture_frame() 63 | video.close() 64 | finally: 65 | os.remove(video.path) 66 | -------------------------------------------------------------------------------- /gym/wrappers/record_episode_statistics.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import deque 3 | 4 | import gym 5 | 6 | 7 | class RecordEpisodeStatistics(gym.Wrapper): 8 | def __init__(self, env, deque_size=100): 9 | super(RecordEpisodeStatistics, self).__init__(env) 10 | self.t0 = time.time() # TODO: use perf_counter when gym removes Python 2 support 11 | self.episode_return = 0.0 12 | self.episode_length = 0 13 | self.return_queue = deque(maxlen=deque_size) 14 | self.length_queue = deque(maxlen=deque_size) 15 | 16 | def reset(self, **kwargs): 17 | observation = super(RecordEpisodeStatistics, self).reset(**kwargs) 18 | self.episode_return = 0.0 19 | self.episode_length = 0 20 | return observation 21 | 22 | def step(self, action): 23 | observation, reward, done, info = super(RecordEpisodeStatistics, self).step(action) 24 | self.episode_return += reward 25 | self.episode_length += 1 26 | if done: 27 | info['episode'] = {'r': self.episode_return, 28 | 'l': self.episode_length, 29 | 't': round(time.time() - self.t0, 6)} 30 | self.return_queue.append(self.episode_return) 31 | self.length_queue.append(self.episode_length) 32 | self.episode_return = 0.0 33 | self.episode_length = 0 34 | return observation, reward, done, info 35 | -------------------------------------------------------------------------------- /gym/wrappers/rescale_action.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from gym import spaces 5 | 6 | 7 | class RescaleAction(gym.ActionWrapper): 8 | r"""Rescales the continuous action space of the environment to a range [a,b]. 9 | 10 | Example:: 11 | 12 | >>> RescaleAction(env, a, b).action_space == Box(a,b) 13 | True 14 | 15 | """ 16 | def __init__(self, env, a, b): 17 | assert isinstance(env.action_space, spaces.Box), ( 18 | "expected Box action space, got {}".format(type(env.action_space))) 19 | assert np.less_equal(a, b).all(), (a, b) 20 | super(RescaleAction, self).__init__(env) 21 | self.a = np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + a 22 | self.b = np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + b 23 | self.action_space = spaces.Box(low=a, high=b, shape=env.action_space.shape, dtype=env.action_space.dtype) 24 | 25 | def action(self, action): 26 | assert np.all(np.greater_equal(action, self.a)), (action, self.a) 27 | assert np.all(np.less_equal(action, self.b)), (action, self.b) 28 | low = self.env.action_space.low 29 | high = self.env.action_space.high 30 | action = low + (high - low)*((action - self.a)/(self.b - self.a)) 31 | action = np.clip(action, low, high) 32 | return action 33 | -------------------------------------------------------------------------------- /gym/wrappers/resize_observation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym.spaces import Box 4 | from gym import ObservationWrapper 5 | 6 | 7 | class ResizeObservation(ObservationWrapper): 8 | r"""Downsample the image observation to a square image. """ 9 | def __init__(self, env, shape): 10 | super(ResizeObservation, self).__init__(env) 11 | if isinstance(shape, int): 12 | shape = (shape, shape) 13 | assert all(x > 0 for x in shape), shape 14 | self.shape = tuple(shape) 15 | 16 | obs_shape = self.shape + self.observation_space.shape[2:] 17 | self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8) 18 | 19 | def observation(self, observation): 20 | import cv2 21 | observation = cv2.resize(observation, self.shape[::-1], interpolation=cv2.INTER_AREA) 22 | if observation.ndim == 2: 23 | observation = np.expand_dims(observation, -1) 24 | return observation 25 | -------------------------------------------------------------------------------- /gym/wrappers/test_clip_action.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import gym 4 | from gym.wrappers import ClipAction 5 | 6 | 7 | def test_clip_action(): 8 | # mountaincar: action-based rewards 9 | make_env = lambda: gym.make('MountainCarContinuous-v0') 10 | env = make_env() 11 | wrapped_env = ClipAction(make_env()) 12 | 13 | seed = 0 14 | env.seed(seed) 15 | wrapped_env.seed(seed) 16 | 17 | env.reset() 18 | wrapped_env.reset() 19 | 20 | actions = [[.4], [1.2], [-0.3], [0.0], [-2.5]] 21 | for action in actions: 22 | obs1, r1, d1, _ = env.step(np.clip(action, env.action_space.low, env.action_space.high)) 23 | obs2, r2, d2, _ = wrapped_env.step(action) 24 | assert np.allclose(r1, r2) 25 | assert np.allclose(obs1, obs2) 26 | assert d1 == d2 27 | -------------------------------------------------------------------------------- /gym/wrappers/test_flatten_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import FlattenObservation 7 | from gym import spaces 8 | 9 | 10 | @pytest.mark.parametrize('env_id', ['Blackjack-v0', 'KellyCoinflip-v0']) 11 | def test_flatten_observation(env_id): 12 | env = gym.make(env_id) 13 | wrapped_env = FlattenObservation(env) 14 | 15 | obs = env.reset() 16 | wrapped_obs = wrapped_env.reset() 17 | 18 | if env_id == 'Blackjack-v0': 19 | space = spaces.Tuple(( 20 | spaces.Discrete(32), 21 | spaces.Discrete(11), 22 | spaces.Discrete(2))) 23 | wrapped_space = spaces.Box(-np.inf, np.inf, 24 | [32 + 11 + 2], dtype=np.float32) 25 | elif env_id == 'KellyCoinflip-v0': 26 | space = spaces.Tuple(( 27 | spaces.Box(0, 250.0, [1], dtype=np.float32), 28 | spaces.Discrete(300 + 1))) 29 | wrapped_space = spaces.Box(-np.inf, np.inf, 30 | [1 + (300 + 1)], dtype=np.float32) 31 | 32 | assert space.contains(obs) 33 | assert wrapped_space.contains(wrapped_obs) 34 | -------------------------------------------------------------------------------- /gym/wrappers/test_frame_stack.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | pytest.importorskip("atari_py") 3 | 4 | import numpy as np 5 | import gym 6 | from gym.wrappers import FrameStack 7 | try: 8 | import lz4 9 | except ImportError: 10 | lz4 = None 11 | 12 | 13 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0', 'Pong-v0']) 14 | @pytest.mark.parametrize('num_stack', [2, 3, 4]) 15 | @pytest.mark.parametrize('lz4_compress', [ 16 | pytest.param(True, marks=pytest.mark.skipif(lz4 is None, reason="Need lz4 to run tests with compression")), 17 | False 18 | ]) 19 | def test_frame_stack(env_id, num_stack, lz4_compress): 20 | env = gym.make(env_id) 21 | shape = env.observation_space.shape 22 | env = FrameStack(env, num_stack, lz4_compress) 23 | assert env.observation_space.shape == (num_stack,) + shape 24 | assert env.observation_space.dtype == env.env.observation_space.dtype 25 | 26 | obs = env.reset() 27 | obs = np.asarray(obs) 28 | assert obs.shape == (num_stack,) + shape 29 | for i in range(1, num_stack): 30 | assert np.allclose(obs[i - 1], obs[i]) 31 | 32 | obs, _, _, _ = env.step(env.action_space.sample()) 33 | obs = np.asarray(obs) 34 | assert obs.shape == (num_stack,) + shape 35 | for i in range(1, num_stack - 1): 36 | assert np.allclose(obs[i - 1], obs[i]) 37 | assert not np.allclose(obs[-1], obs[-2]) 38 | 39 | obs, _, _, _ = env.step(env.action_space.sample()) 40 | assert len(obs) == num_stack 41 | -------------------------------------------------------------------------------- /gym/wrappers/test_gray_scale_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import GrayScaleObservation 7 | from gym.wrappers import AtariPreprocessing 8 | pytest.importorskip('atari_py') 9 | pytest.importorskip('cv2') 10 | 11 | @pytest.mark.parametrize('env_id', ['PongNoFrameskip-v0', 'SpaceInvadersNoFrameskip-v0']) 12 | @pytest.mark.parametrize('keep_dim', [True, False]) 13 | def test_gray_scale_observation(env_id, keep_dim): 14 | gray_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=True) 15 | rgb_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=False) 16 | wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim) 17 | assert rgb_env.observation_space.shape[-1] == 3 18 | 19 | seed = 0 20 | gray_env.seed(seed) 21 | wrapped_env.seed(seed) 22 | 23 | gray_obs = gray_env.reset() 24 | wrapped_obs = wrapped_env.reset() 25 | 26 | if keep_dim: 27 | assert wrapped_env.observation_space.shape[-1] == 1 28 | assert len(wrapped_obs.shape) == 3 29 | wrapped_obs = wrapped_obs.squeeze(-1) 30 | else: 31 | assert len(wrapped_env.observation_space.shape) == 2 32 | assert len(wrapped_obs.shape) == 2 33 | 34 | # ALE gray scale is slightly different, but no more than by one shade 35 | assert np.allclose(gray_obs.astype('int32'), wrapped_obs.astype('int32'), atol=1) 36 | -------------------------------------------------------------------------------- /gym/wrappers/test_record_episode_statistics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import gym 4 | from gym.wrappers import RecordEpisodeStatistics 5 | 6 | 7 | @pytest.mark.parametrize('env_id', ['CartPole-v0', 'Pendulum-v0']) 8 | @pytest.mark.parametrize('deque_size', [2, 5]) 9 | def test_record_episode_statistics(env_id, deque_size): 10 | env = gym.make(env_id) 11 | env = RecordEpisodeStatistics(env, deque_size) 12 | 13 | for n in range(5): 14 | env.reset() 15 | assert env.episode_return == 0.0 16 | assert env.episode_length == 0 17 | for t in range(env.spec.max_episode_steps): 18 | _, _, done, info = env.step(env.action_space.sample()) 19 | if done: 20 | assert 'episode' in info 21 | assert all([item in info['episode'] for item in ['r', 'l', 't']]) 22 | break 23 | assert len(env.return_queue) == deque_size 24 | assert len(env.length_queue) == deque_size 25 | -------------------------------------------------------------------------------- /gym/wrappers/test_rescale_action.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import RescaleAction 7 | 8 | 9 | def test_rescale_action(): 10 | env = gym.make('CartPole-v1') 11 | with pytest.raises(AssertionError): 12 | env = RescaleAction(env, -1, 1) 13 | del env 14 | 15 | env = gym.make('Pendulum-v0') 16 | wrapped_env = RescaleAction(gym.make('Pendulum-v0'), -1, 1) 17 | 18 | seed = 0 19 | env.seed(seed) 20 | wrapped_env.seed(seed) 21 | 22 | obs = env.reset() 23 | wrapped_obs = wrapped_env.reset() 24 | assert np.allclose(obs, wrapped_obs) 25 | 26 | obs, reward, _, _ = env.step([1.5]) 27 | with pytest.raises(AssertionError): 28 | wrapped_env.step([1.5]) 29 | wrapped_obs, wrapped_reward, _, _ = wrapped_env.step([0.75]) 30 | 31 | assert np.allclose(obs, wrapped_obs) 32 | assert np.allclose(reward, wrapped_reward) 33 | -------------------------------------------------------------------------------- /gym/wrappers/test_resize_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import gym 4 | from gym.wrappers import ResizeObservation 5 | try: 6 | import atari_py 7 | except ImportError: 8 | atari_py = None 9 | 10 | 11 | @pytest.mark.skipif(atari_py is None, reason='Only run this test when atari_py is installed') 12 | @pytest.mark.parametrize('env_id', ['PongNoFrameskip-v0', 'SpaceInvadersNoFrameskip-v0']) 13 | @pytest.mark.parametrize('shape', [16, 32, (8, 5), [10, 7]]) 14 | def test_resize_observation(env_id, shape): 15 | env = gym.make(env_id) 16 | env = ResizeObservation(env, shape) 17 | 18 | 19 | assert env.observation_space.shape[-1] == 3 20 | obs = env.reset() 21 | if isinstance(shape, int): 22 | assert env.observation_space.shape[:2] == (shape, shape) 23 | assert obs.shape == (shape, shape, 3) 24 | else: 25 | assert env.observation_space.shape[:2] == tuple(shape) 26 | assert obs.shape == tuple(shape) + (3,) 27 | -------------------------------------------------------------------------------- /gym/wrappers/test_time_aware_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import gym 4 | from gym.wrappers import TimeAwareObservation 5 | 6 | 7 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0']) 8 | def test_time_aware_observation(env_id): 9 | env = gym.make(env_id) 10 | wrapped_env = TimeAwareObservation(env) 11 | 12 | assert wrapped_env.observation_space.shape[0] == env.observation_space.shape[0] + 1 13 | 14 | obs = env.reset() 15 | wrapped_obs = wrapped_env.reset() 16 | assert wrapped_env.t == 0.0 17 | assert wrapped_obs[-1] == 0.0 18 | assert wrapped_obs.shape[0] == obs.shape[0] + 1 19 | 20 | wrapped_obs, _, _, _ = wrapped_env.step(env.action_space.sample()) 21 | assert wrapped_env.t == 1.0 22 | assert wrapped_obs[-1] == 1.0 23 | assert wrapped_obs.shape[0] == obs.shape[0] + 1 24 | 25 | wrapped_obs, _, _, _ = wrapped_env.step(env.action_space.sample()) 26 | assert wrapped_env.t == 2.0 27 | assert wrapped_obs[-1] == 2.0 28 | assert wrapped_obs.shape[0] == obs.shape[0] + 1 29 | 30 | wrapped_obs = wrapped_env.reset() 31 | assert wrapped_env.t == 0.0 32 | assert wrapped_obs[-1] == 0.0 33 | assert wrapped_obs.shape[0] == obs.shape[0] + 1 34 | -------------------------------------------------------------------------------- /gym/wrappers/test_transform_observation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import TransformObservation 7 | 8 | 9 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0']) 10 | def test_transform_observation(env_id): 11 | affine_transform = lambda x: 3*x + 2 12 | env = gym.make(env_id) 13 | wrapped_env = TransformObservation(gym.make(env_id), lambda obs: affine_transform(obs)) 14 | 15 | env.seed(0) 16 | wrapped_env.seed(0) 17 | 18 | obs = env.reset() 19 | wrapped_obs = wrapped_env.reset() 20 | assert np.allclose(wrapped_obs, affine_transform(obs)) 21 | 22 | action = env.action_space.sample() 23 | obs, reward, done, _ = env.step(action) 24 | wrapped_obs, wrapped_reward, wrapped_done, _ = wrapped_env.step(action) 25 | assert np.allclose(wrapped_obs, affine_transform(obs)) 26 | assert np.allclose(wrapped_reward, reward) 27 | assert wrapped_done == done 28 | -------------------------------------------------------------------------------- /gym/wrappers/test_transform_reward.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import numpy as np 4 | 5 | import gym 6 | from gym.wrappers import TransformReward 7 | 8 | 9 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0']) 10 | def test_transform_reward(env_id): 11 | # use case #1: scale 12 | scales = [0.1, 200] 13 | for scale in scales: 14 | env = gym.make(env_id) 15 | wrapped_env = TransformReward(gym.make(env_id), lambda r: scale*r) 16 | action = env.action_space.sample() 17 | 18 | env.seed(0) 19 | env.reset() 20 | wrapped_env.seed(0) 21 | wrapped_env.reset() 22 | 23 | _, reward, _, _ = env.step(action) 24 | _, wrapped_reward, _, _ = wrapped_env.step(action) 25 | 26 | assert wrapped_reward == scale*reward 27 | del env, wrapped_env 28 | 29 | # use case #2: clip 30 | min_r = -0.0005 31 | max_r = 0.0002 32 | env = gym.make(env_id) 33 | wrapped_env = TransformReward(gym.make(env_id), lambda r: np.clip(r, min_r, max_r)) 34 | action = env.action_space.sample() 35 | 36 | env.seed(0) 37 | env.reset() 38 | wrapped_env.seed(0) 39 | wrapped_env.reset() 40 | 41 | _, reward, _, _ = env.step(action) 42 | _, wrapped_reward, _, _ = wrapped_env.step(action) 43 | 44 | assert abs(wrapped_reward) < abs(reward) 45 | assert wrapped_reward == -0.0005 or wrapped_reward == 0.0002 46 | del env, wrapped_env 47 | 48 | # use case #3: sign 49 | env = gym.make(env_id) 50 | wrapped_env = TransformReward(gym.make(env_id), lambda r: np.sign(r)) 51 | 52 | env.seed(0) 53 | env.reset() 54 | wrapped_env.seed(0) 55 | wrapped_env.reset() 56 | 57 | for _ in range(1000): 58 | action = env.action_space.sample() 59 | _, wrapped_reward, done, _ = wrapped_env.step(action) 60 | assert wrapped_reward in [-1.0, 0.0, 1.0] 61 | if done: 62 | break 63 | del env, wrapped_env 64 | -------------------------------------------------------------------------------- /gym/wrappers/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/wrappers/tests/__init__.py -------------------------------------------------------------------------------- /gym/wrappers/time_aware_observation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from gym.spaces import Box 4 | from gym import ObservationWrapper 5 | 6 | 7 | class TimeAwareObservation(ObservationWrapper): 8 | r"""Augment the observation with current time step in the trajectory. 9 | 10 | .. note:: 11 | Currently it only works with one-dimensional observation space. It doesn't 12 | support pixel observation space yet. 13 | 14 | """ 15 | def __init__(self, env): 16 | super(TimeAwareObservation, self).__init__(env) 17 | assert isinstance(env.observation_space, Box) 18 | assert env.observation_space.dtype == np.float32 19 | low = np.append(self.observation_space.low, 0.0) 20 | high = np.append(self.observation_space.high, np.inf) 21 | self.observation_space = Box(low, high, dtype=np.float32) 22 | 23 | def observation(self, observation): 24 | return np.append(observation, self.t) 25 | 26 | def step(self, action): 27 | self.t += 1 28 | return super(TimeAwareObservation, self).step(action) 29 | 30 | def reset(self, **kwargs): 31 | self.t = 0 32 | return super(TimeAwareObservation, self).reset(**kwargs) 33 | -------------------------------------------------------------------------------- /gym/wrappers/time_limit.py: -------------------------------------------------------------------------------- 1 | import gym 2 | 3 | 4 | class TimeLimit(gym.Wrapper): 5 | def __init__(self, env, max_episode_steps=None): 6 | super(TimeLimit, self).__init__(env) 7 | if max_episode_steps is None and self.env.spec is not None: 8 | max_episode_steps = env.spec.max_episode_steps 9 | if self.env.spec is not None: 10 | self.env.spec.max_episode_steps = max_episode_steps 11 | self._max_episode_steps = max_episode_steps 12 | self._elapsed_steps = None 13 | 14 | def step(self, action): 15 | assert self._elapsed_steps is not None, "Cannot call env.step() before calling reset()" 16 | observation, reward, done, info = self.env.step(action) 17 | self._elapsed_steps += 1 18 | if self._elapsed_steps >= self._max_episode_steps: 19 | info['TimeLimit.truncated'] = not done 20 | done = True 21 | return observation, reward, done, info 22 | 23 | def reset(self, **kwargs): 24 | self._elapsed_steps = 0 25 | return self.env.reset(**kwargs) 26 | -------------------------------------------------------------------------------- /gym/wrappers/transform_observation.py: -------------------------------------------------------------------------------- 1 | from gym import ObservationWrapper 2 | 3 | 4 | class TransformObservation(ObservationWrapper): 5 | r"""Transform the observation via an arbitrary function. 6 | 7 | Example:: 8 | 9 | >>> import gym 10 | >>> env = gym.make('CartPole-v1') 11 | >>> env = TransformObservation(env, lambda obs: obs + 0.1*np.random.randn(*obs.shape)) 12 | >>> env.reset() 13 | array([-0.08319338, 0.04635121, -0.07394746, 0.20877492]) 14 | 15 | Args: 16 | env (Env): environment 17 | f (callable): a function that transforms the observation 18 | 19 | """ 20 | def __init__(self, env, f): 21 | super(TransformObservation, self).__init__(env) 22 | assert callable(f) 23 | self.f = f 24 | 25 | def observation(self, observation): 26 | return self.f(observation) 27 | -------------------------------------------------------------------------------- /gym/wrappers/transform_reward.py: -------------------------------------------------------------------------------- 1 | from gym import RewardWrapper 2 | 3 | 4 | class TransformReward(RewardWrapper): 5 | r"""Transform the reward via an arbitrary function. 6 | 7 | Example:: 8 | 9 | >>> import gym 10 | >>> env = gym.make('CartPole-v1') 11 | >>> env = TransformReward(env, lambda r: 0.01*r) 12 | >>> env.reset() 13 | >>> observation, reward, done, info = env.step(env.action_space.sample()) 14 | >>> reward 15 | 0.01 16 | 17 | Args: 18 | env (Env): environment 19 | f (callable): a function that transforms the reward 20 | 21 | """ 22 | def __init__(self, env, f): 23 | super(TransformReward, self).__init__(env) 24 | assert callable(f) 25 | self.f = f 26 | 27 | def reward(self, reward): 28 | return self.f(reward) 29 | -------------------------------------------------------------------------------- /memory/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /memory/sp_memory.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | class ReplayBuffer: 5 | """ 6 | A simple FIFO experience replay buffer for TD3 agents. 7 | """ 8 | 9 | def __init__(self, obs_dim, act_dim, size): 10 | self.obs1_buf = np.zeros([size, obs_dim], dtype=np.float32) 11 | self.obs2_buf = np.zeros([size, obs_dim], dtype=np.float32) 12 | self.acts_buf = np.zeros([size, act_dim], dtype=np.float32) 13 | self.rews_buf = np.zeros(size, dtype=np.float32) 14 | self.done_buf = np.zeros(size, dtype=np.float32) 15 | self.ptr, self.size, self.max_size = 0, 0, size 16 | 17 | def store(self, obs, act, rew, next_obs, done): 18 | self.obs1_buf[self.ptr] = obs 19 | self.obs2_buf[self.ptr] = next_obs 20 | self.acts_buf[self.ptr] = act 21 | self.rews_buf[self.ptr] = rew 22 | self.done_buf[self.ptr] = done 23 | self.ptr = (self.ptr + 1) % self.max_size 24 | self.size = min(self.size + 1, self.max_size) 25 | 26 | def sample_batch(self, batch_size=32): 27 | idxs = np.random.randint(0, self.size, size=batch_size) 28 | return dict(obs1=self.obs1_buf[idxs], 29 | obs2=self.obs2_buf[idxs], 30 | acts=self.acts_buf[idxs], 31 | rews=self.rews_buf[idxs], 32 | done=self.done_buf[idxs]) -------------------------------------------------------------------------------- /memory/sp_memory_torch.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | class ReplayBuffer: 6 | """ 7 | A simple FIFO experience replay buffer for TD3 agents. 8 | """ 9 | 10 | def __init__(self, obs_dim, act_dim, size, device=None): 11 | self.device = device 12 | self.obs_buf = np.zeros(self.combined_shape(size, obs_dim), dtype=np.float32) 13 | self.obs2_buf = np.zeros(self.combined_shape(size, obs_dim), dtype=np.float32) 14 | self.act_buf = np.zeros(self.combined_shape(size, act_dim), dtype=np.float32) 15 | self.rew_buf = np.zeros(size, dtype=np.float32) 16 | self.done_buf = np.zeros(size, dtype=np.float32) 17 | self.ptr, self.size, self.max_size = 0, 0, size 18 | 19 | def combined_shape(self, length, shape=None): 20 | if shape is None: 21 | return (length,) 22 | return (length, shape) if np.isscalar(shape) else (length, *shape) 23 | 24 | def store(self, obs, act, rew, next_obs, done): 25 | self.obs_buf[self.ptr] = obs 26 | self.obs2_buf[self.ptr] = next_obs 27 | self.act_buf[self.ptr] = act 28 | self.rew_buf[self.ptr] = rew 29 | self.done_buf[self.ptr] = done 30 | self.ptr = (self.ptr+1) % self.max_size 31 | self.size = min(self.size+1, self.max_size) 32 | 33 | def sample_batch(self, batch_size=32): 34 | idxs = np.random.randint(0, self.size, size=batch_size) 35 | batch = dict(obs=self.obs_buf[idxs], 36 | obs2=self.obs2_buf[idxs], 37 | act=self.act_buf[idxs], 38 | rew=self.rew_buf[idxs], 39 | done=self.done_buf[idxs]) 40 | return {k: torch.as_tensor(v, dtype=torch.float32, device=self.device) for k,v in batch.items()} 41 | -------------------------------------------------------------------------------- /pip_requirement.txt: -------------------------------------------------------------------------------- 1 | cloudpickle==1.2.1 2 | ipython==7.19.0 3 | joblib==1.0.0 4 | matplotlib==3.3.3 5 | # mpi4py @ file:///tmp/build/80754af9/mpi4py_1594373948151/work 6 | # or conda install mpi4py -y 7 | pandas==1.2.0 8 | psutil==5.8.0 9 | pyglet==1.5.0 10 | pytest==6.2.1 11 | # scipy @ file:///tmp/build/80754af9/scipy_1612469579197/work 12 | # seaborn @ file:///tmp/build/80754af9/seaborn_1608578541026/work 13 | # or seaborn==0.8.1 14 | tqdm==4.55.0 15 | -------------------------------------------------------------------------------- /spinup_utils/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /spinup_utils/delete_no_checkpoint_or_pth.py: -------------------------------------------------------------------------------- 1 | 还是删了吧~ 2 | -------------------------------------------------------------------------------- /spinup_utils/demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/spinup_utils/demo.png -------------------------------------------------------------------------------- /spinup_utils/plot_demo_files/2020-10-02_12-18-23-gym_clean_buffer_reach_d2s_dense_n2b2_s5958/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "argparse": "argparse", 3 | "args": null, 4 | "b_lr": 0.001, 5 | "base_lr": 0.001, 6 | "env_str": "FetchReach-v1", 7 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2", 8 | "logger": null, 9 | "logger_kwargs": { 10 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2", 11 | "output_dir": "../Gym_reach_D2S_clean_buffer/2020-10-02_gym_clean_buffer_reach_d2s_dense_n2b2/2020-10-02_12-18-23-gym_clean_buffer_reach_d2s_dense_n2b2_s5958" 12 | }, 13 | "noise_value": 0.1, 14 | "parser": null, 15 | "random_seed": 5958 16 | } -------------------------------------------------------------------------------- /spinup_utils/plot_demo_files/2020-10-02_12-18-42-gym_clean_buffer_reach_d2s_dense_n2b2_s9317/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "argparse": "argparse", 3 | "args": null, 4 | "b_lr": 0.001, 5 | "base_lr": 0.001, 6 | "env_str": "FetchReach-v1", 7 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2", 8 | "logger": null, 9 | "logger_kwargs": { 10 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2", 11 | "output_dir": "../Gym_reach_D2S_clean_buffer/2020-10-02_gym_clean_buffer_reach_d2s_dense_n2b2/2020-10-02_12-18-42-gym_clean_buffer_reach_d2s_dense_n2b2_s9317" 12 | }, 13 | "noise_value": 0.1, 14 | "parser": null, 15 | "random_seed": 9317 16 | } -------------------------------------------------------------------------------- /spinup_utils/plot_demo_files/2020-10-02_12-18-43-gym_clean_buffer_reach_d2s_dense_n2b2_s7515/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "argparse": "argparse", 3 | "args": null, 4 | "b_lr": 0.001, 5 | "base_lr": 0.001, 6 | "env_str": "FetchReach-v1", 7 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2", 8 | "logger": null, 9 | "logger_kwargs": { 10 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2", 11 | "output_dir": "../Gym_reach_D2S_clean_buffer/2020-10-02_gym_clean_buffer_reach_d2s_dense_n2b2/2020-10-02_12-18-43-gym_clean_buffer_reach_d2s_dense_n2b2_s7515" 12 | }, 13 | "noise_value": 0.1, 14 | "parser": null, 15 | "random_seed": 7515 16 | } -------------------------------------------------------------------------------- /spinup_utils/plot_demo_files/2020-10-02_12-18-50-gym_clean_buffer_reach_d2s_dense_n2b2_s9180/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "argparse": "argparse", 3 | "args": null, 4 | "b_lr": 0.001, 5 | "base_lr": 0.001, 6 | "env_str": "FetchReach-v1", 7 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2", 8 | "logger": null, 9 | "logger_kwargs": { 10 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2", 11 | "output_dir": "../Gym_reach_D2S_clean_buffer/2020-10-02_gym_clean_buffer_reach_d2s_dense_n2b2/2020-10-02_12-18-50-gym_clean_buffer_reach_d2s_dense_n2b2_s9180" 12 | }, 13 | "noise_value": 0.1, 14 | "parser": null, 15 | "random_seed": 9180 16 | } -------------------------------------------------------------------------------- /spinup_utils/plot_demo_files/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /spinup_utils/print_logger.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | 4 | class Logger(object): 5 | def __init__(self, filename='default.log', add_flag=True, stream=sys.stdout): 6 | self.terminal = stream 7 | print("filename:", filename) 8 | self.filename = filename 9 | self.add_flag = add_flag 10 | # self.log = open(filename, 'a+') 11 | 12 | def write(self, message): 13 | if self.add_flag: 14 | with open(self.filename, 'a+') as log: 15 | self.terminal.write(message) 16 | log.write(message) 17 | else: 18 | with open(self.filename, 'w') as log: 19 | self.terminal.write(message) 20 | log.write(message) 21 | 22 | def flush(self): 23 | pass 24 | 25 | 26 | def main(): 27 | logger_kwargs = {'output_dir':"logger/"} 28 | try: 29 | import os 30 | os.mkdir(logger_kwargs['output_dir']) 31 | except: 32 | pass 33 | sys.stdout = Logger(logger_kwargs["output_dir"]+"print.log", 34 | sys.stdout) 35 | 36 | print('print something') 37 | print("*" * 3) 38 | import time 39 | time.sleep(2) 40 | print("other things") 41 | 42 | 43 | if __name__ == '__main__': 44 | main() 45 | -------------------------------------------------------------------------------- /spinup_utils/run_entrypoint.py: -------------------------------------------------------------------------------- 1 | """ 2 | 如果用mpi_fork的话,就要多次启动当前脚本。 3 | 如果不进行隔离的话,在run_utils.py中的for var in vars中执行ppo. 4 | 执行到ppo中的mpi_fork(num_cpu)这句话时,会 5 | 6 | """ 7 | 8 | 9 | import zlib 10 | import pickle 11 | import base64 12 | import time 13 | from spinup_utils.mpi_tools import proc_id 14 | 15 | 16 | if __name__ == '__main__': 17 | import argparse 18 | parser = argparse.ArgumentParser() 19 | # 为什么加了这个,就能直接获取这个变量? 20 | # 当执行python run_entrypoint.py encoded_thunk 时, 21 | parser.add_argument('encoded_thunk') 22 | args = parser.parse_args() 23 | # print("thunk.args:", args) 24 | # input(("args")) 25 | # pickle.loads是读取函数 26 | thunk = pickle.loads(zlib.decompress(base64.b64decode(args.encoded_thunk))) 27 | # print("thunk:", thunk) 28 | # print("entry_point_proc_id:", proc_id()) 29 | # time.sleep(1) 30 | thunk() 31 | 32 | 33 | -------------------------------------------------------------------------------- /spinup_utils/serialization_utils.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def convert_json(obj): 5 | """ Convert obj to a version which can be serialized with JSON. 6 | 垃圾递归! 7 | """ 8 | 9 | if is_json_serializable(obj): 10 | return obj 11 | else: 12 | if isinstance(obj, dict): 13 | return {convert_json(k): convert_json(v) 14 | for k,v in obj.items()} 15 | 16 | elif isinstance(obj, tuple): 17 | return (convert_json(x) for x in obj) 18 | 19 | elif isinstance(obj, list): 20 | return [convert_json(x) for x in obj] 21 | 22 | elif hasattr(obj,'__name__') and not('lambda' in obj.__name__): 23 | # return 24 | return convert_json(obj.__name__) 25 | 26 | elif hasattr(obj,'__dict__') and obj.__dict__: 27 | return 28 | obj_dict = {convert_json(k): convert_json(v) 29 | for k,v in obj.__dict__.items()} 30 | return {str(obj): obj_dict} 31 | 32 | return str(obj) 33 | 34 | def is_json_serializable(v): 35 | try: 36 | json.dumps(v) 37 | return True 38 | except: 39 | return False -------------------------------------------------------------------------------- /spinup_utils/user_config.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | 4 | # Where experiment outputs are saved by default: 5 | DEFAULT_DATA_DIR = osp.join(osp.abspath(osp.dirname(osp.dirname(__file__))),'data') 6 | 7 | # Whether to automatically insert a date and time stamp into the names of 8 | # save directories: 9 | FORCE_DATESTAMP = True 10 | 11 | # Whether GridSearch provides automatically-generated default shorthands: 12 | DEFAULT_SHORTHAND = True 13 | 14 | # Tells the GridSearch how many seconds to pause for before launching 15 | # experiments. 16 | WAIT_BEFORE_LAUNCH = 5 -------------------------------------------------------------------------------- /tune_exps/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tune_exps/tune_func.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from mpi4py import MPI 4 | import numpy as np 5 | 6 | # 主函数替换成你需要的任何关于强化的主程序 7 | def func(params_dict_list): 8 | proc_id = MPI.COMM_WORLD.Get_rank() 9 | if proc_id > len(params_dict_list)-1: 10 | print("proc_id:", proc_id) 11 | print("sys.exit()") 12 | sys.exit() 13 | print("sys.exit()") 14 | params_dict = params_dict_list[proc_id] 15 | print("proc_id:", proc_id) 16 | print("params_dict:", params_dict) 17 | print("-"*20) 18 | 19 | 20 | if __name__=='__main__': 21 | params_dict = { 22 | 'lr': [2, 3, 4, 5, 6, 7], 23 | "batch": [10, 20, 30, 40, 50,], 24 | "epoch": [100, 200, 300, 400, 500, 600], 25 | } 26 | import itertools 27 | 28 | params_list = [list(value) for value in itertools.product(*params_dict.values())] 29 | params_dict_list = [{key: cur_param.pop(0) for key, value in params_dict.items()} for cur_param in params_list] 30 | for i in range(2): 31 | func(params_dict_list=params_dict_list) 32 | --------------------------------------------------------------------------------