├── D2SR_RCAR_中文版.pdf
├── D2SSR
├── __init__.py
├── baseOffPolicy.py
├── d2ssr_train_torch.py
├── td3_per_her.py
├── torch_arguments.py
└── tune_d2ssr_main.py
├── DRLib.jpg
├── DRLib_tree.txt
├── HER_DRLib_Net_Reload
└── 2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1
│ └── 2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300
│ ├── actor.pth
│ ├── config.json
│ ├── norm.pkl
│ └── progress.txt
├── HER_DRLib_mpi
├── 2022-07-29_HER_mpi19_random_TD3_FetchPush-v1
│ └── 2022-07-29_12-21-04-HER_mpi19_random_TD3_FetchPush-v1_s123
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-29_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1
│ └── 2022-07-29_12-22-08-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s123
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-29_HER_mpi1_random_TD3_FetchPush-v1
│ ├── 2022-07-29_12-21-16-HER_mpi1_random_TD3_FetchPush-v1_s123
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-07-29_12-24-48-HER_mpi1_random_TD3_FetchPush-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-07-29_12-24-56-HER_mpi1_random_TD3_FetchPush-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-29_HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1
│ └── 2022-07-29_12-22-27-HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1_s123
│ │ ├── config.json
│ │ └── progress.txt
└── MPI1-6-19.png
├── HER_DRLib_mpi1
├── 2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1
│ ├── 2022-07-29_12-25-10-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-07-29_12-25-21-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-07-29_12-25-27-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-29_HER_mpi1_random_TD3_FetchPush-v1
│ ├── 2022-07-29_12-21-16-HER_mpi1_random_TD3_FetchPush-v1_s123
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-07-29_12-24-48-HER_mpi1_random_TD3_FetchPush-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-07-29_12-24-56-HER_mpi1_random_TD3_FetchPush-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1
│ ├── 2022-07-29_12-25-42-HER_mpi1_random_TD3_FetchSlide-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-07-29_12-25-46-HER_mpi1_random_TD3_FetchSlide-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-07-29_12-25-47-HER_mpi1_random_TD3_FetchSlide-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1
│ ├── 2022-07-30_22-19-36-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-07-30_22-19-42-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-07-30_22-19-45-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1
│ ├── 2022-07-30_22-19-22-HER_mpi1_random_DDPG_FetchPush-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-07-30_22-19-27-HER_mpi1_random_DDPG_FetchPush-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-07-30_22-19-30-HER_mpi1_random_DDPG_FetchPush-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1
│ ├── 2022-07-30_22-18-55-HER_mpi1_random_DDPG_FetchSlide-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-07-30_22-19-10-HER_mpi1_random_DDPG_FetchSlide-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-07-30_22-19-14-HER_mpi1_random_DDPG_FetchSlide-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1
│ ├── 2022-07-31_17-09-31-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-07-31_17-09-33-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-07-31_17-09-34-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1
│ ├── 2022-07-31_17-09-45-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-07-31_17-09-48-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-07-31_17-09-48-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1
│ ├── 2022-07-31_17-09-58-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-07-31_17-10-05-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-07-31_17-10-07-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1
│ ├── 2022-07-31_17-09-01-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-07-31_17-09-03-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-07-31_17-09-05-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-31_HER_mpi1_random_SAC_FetchPush-v1
│ ├── 2022-07-31_17-08-42-HER_mpi1_random_SAC_FetchPush-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-07-31_17-08-44-HER_mpi1_random_SAC_FetchPush-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-07-31_17-08-48-HER_mpi1_random_SAC_FetchPush-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1
│ ├── 2022-07-31_17-08-20-HER_mpi1_random_SAC_FetchSlide-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-07-31_17-08-26-HER_mpi1_random_SAC_FetchSlide-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-07-31_17-08-29-HER_mpi1_random_SAC_FetchSlide-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1
│ ├── 2022-08-01_12-21-12-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-08-01_12-21-16-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-08-01_12-21-19-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1
│ ├── 2022-08-01_12-20-44-HER_mpi1_random_DDPGTorch_FetchPush-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-08-01_12-20-54-HER_mpi1_random_DDPGTorch_FetchPush-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-08-01_12-20-57-HER_mpi1_random_DDPGTorch_FetchPush-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1
│ ├── 2022-08-01_12-21-31-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-08-01_12-21-36-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-08-01_12-21-39-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1
│ ├── 2022-08-01_12-22-28-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-08-01_12-22-30-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-08-01_12-22-34-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1
│ ├── 2022-08-01_12-22-12-HER_mpi1_random_SACTorch_FetchPush-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-08-01_12-22-16-HER_mpi1_random_SACTorch_FetchPush-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-08-01_12-22-18-HER_mpi1_random_SACTorch_FetchPush-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1
│ ├── 2022-08-01_12-21-55-HER_mpi1_random_SACTorch_FetchSlide-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-08-01_12-21-58-HER_mpi1_random_SACTorch_FetchSlide-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-08-01_12-22-01-HER_mpi1_random_SACTorch_FetchSlide-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1
│ ├── 2022-08-01_12-15-29-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-08-01_12-15-36-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-08-01_12-15-40-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1
│ ├── 2022-08-01_12-15-51-HER_mpi1_random_TD3Torch_FetchPush-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-08-01_12-16-00-HER_mpi1_random_TD3Torch_FetchPush-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-08-01_12-16-07-HER_mpi1_random_TD3Torch_FetchPush-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
├── 2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1
│ ├── 2022-08-01_12-16-17-HER_mpi1_random_TD3Torch_FetchSlide-v1_s100
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2022-08-01_12-16-22-HER_mpi1_random_TD3Torch_FetchSlide-v1_s200
│ │ ├── config.json
│ │ └── progress.txt
│ └── 2022-08-01_12-16-25-HER_mpi1_random_TD3Torch_FetchSlide-v1_s300
│ │ ├── config.json
│ │ └── progress.txt
├── 2ep_reward_Pick.png
├── 2ep_reward_Push.png
└── 2ep_reward_Slide.png
├── LICENSE
├── README.md
├── algos
├── __init__.py
├── pytorch
│ ├── __init__.py
│ ├── ddpg_sp
│ │ ├── __init__.py
│ │ ├── core.py
│ │ ├── ddpg.py
│ │ └── ddpg_per_her.py
│ ├── offPolicy
│ │ ├── HER_introduction.md
│ │ ├── __init__.py
│ │ ├── baseOffPolicy.py
│ │ └── norm.py
│ ├── sac_sp
│ │ ├── __init__.py
│ │ ├── core.py
│ │ ├── sac.py
│ │ └── sac_per_her.py
│ └── td3_sp
│ │ ├── MPI_td3_per_her.py
│ │ ├── README.md
│ │ ├── __init__.py
│ │ ├── core.py
│ │ ├── td3_gpu_class.py
│ │ └── td3_per_her.py
└── tf1
│ ├── __init__.py
│ ├── ddpg_sp
│ ├── DDPG_class.py
│ ├── DDPG_per_class.py
│ ├── DDPG_per_her.py
│ ├── DDPG_per_her_class.py
│ ├── DDPG_sp.py
│ ├── MPI_DDPG_per_her.py
│ ├── __init__.py
│ └── core.py
│ ├── offPolicy
│ ├── __init__.py
│ ├── baseOffPolicy.py
│ ├── core.py
│ └── norm.py
│ ├── sac_auto
│ ├── MPI_sac_auto_per_her.py
│ ├── __init__.py
│ ├── core.py
│ ├── sac_auto_class.py
│ ├── sac_auto_per_class.py
│ └── sac_auto_per_her.py
│ ├── sac_sp
│ ├── MPI_SAC_per_her.py
│ ├── SAC_class.py
│ ├── SAC_per_class.py
│ ├── SAC_per_her.py
│ ├── SAC_sp.py
│ ├── __init__.py
│ └── core.py
│ └── td3_sp
│ ├── MPI_TD3_per_her.py
│ ├── README.md
│ ├── TD3_class.py
│ ├── TD3_per_class.py
│ ├── TD3_per_her.py
│ ├── TD3_per_her_class.py
│ ├── TD3_sp.py
│ ├── __init__.py
│ └── core.py
├── arguments.py
├── gym
├── __init__.py
├── core.py
├── demo.py
├── demo_drawer.py
├── demo_insert_rand.py
├── demo_obs_push.py
├── double_push_demo.py
├── drawer2.png
├── drawer_box_demo.py
├── envs
│ ├── __init__.py
│ ├── algorithmic
│ │ ├── __init__.py
│ │ ├── algorithmic_env.py
│ │ ├── copy_.py
│ │ ├── duplicated_input.py
│ │ ├── repeat_copy.py
│ │ ├── reverse.py
│ │ ├── reversed_addition.py
│ │ └── tests
│ │ │ ├── __init__.py
│ │ │ └── test_algorithmic.py
│ ├── atari
│ │ ├── __init__.py
│ │ └── atari_env.py
│ ├── box2d
│ │ ├── __init__.py
│ │ ├── bipedal_walker.py
│ │ ├── car_dynamics.py
│ │ ├── car_racing.py
│ │ ├── lunar_lander.py
│ │ └── test_lunar_lander.py
│ ├── classic_control
│ │ ├── __init__.py
│ │ ├── acrobot.py
│ │ ├── assets
│ │ │ └── clockwise.png
│ │ ├── cartpole.py
│ │ ├── continuous_mountain_car.py
│ │ ├── mountain_car.py
│ │ ├── pendulum.py
│ │ └── rendering.py
│ ├── mujoco
│ │ ├── __init__.py
│ │ ├── ant.py
│ │ ├── ant_v3.py
│ │ ├── assets
│ │ │ ├── ant.xml
│ │ │ ├── half_cheetah.xml
│ │ │ ├── hopper.xml
│ │ │ ├── humanoid.xml
│ │ │ ├── humanoidstandup.xml
│ │ │ ├── inverted_double_pendulum.xml
│ │ │ ├── inverted_pendulum.xml
│ │ │ ├── point.xml
│ │ │ ├── pusher.xml
│ │ │ ├── reacher.xml
│ │ │ ├── striker.xml
│ │ │ ├── swimmer.xml
│ │ │ ├── thrower.xml
│ │ │ └── walker2d.xml
│ │ ├── half_cheetah.py
│ │ ├── half_cheetah_v3.py
│ │ ├── hopper.py
│ │ ├── hopper_v3.py
│ │ ├── humanoid.py
│ │ ├── humanoid_v3.py
│ │ ├── humanoidstandup.py
│ │ ├── inverted_double_pendulum.py
│ │ ├── inverted_pendulum.py
│ │ ├── mujoco_env.py
│ │ ├── pusher.py
│ │ ├── pusher2d_her.py
│ │ ├── reacher.py
│ │ ├── reacher2d_her.py
│ │ ├── reacher2d_her_harder.py
│ │ ├── striker.py
│ │ ├── swimmer.py
│ │ ├── swimmer_v3.py
│ │ ├── thrower.py
│ │ ├── walker2d.py
│ │ └── walker2d_v3.py
│ ├── registration.py
│ ├── robotics
│ │ ├── __init__.py
│ │ ├── assets
│ │ │ ├── LICENSE.md
│ │ │ ├── fetch
│ │ │ │ ├── double_push.xml
│ │ │ │ ├── drawer.xml
│ │ │ │ ├── drawer_body.xml
│ │ │ │ ├── drawer_box.xml
│ │ │ │ ├── drawer_dependencies.xml
│ │ │ │ ├── insert_rand.xml
│ │ │ │ ├── obs_push.xml
│ │ │ │ ├── occ_push.xml
│ │ │ │ ├── pick_and_place.xml
│ │ │ │ ├── push.xml
│ │ │ │ ├── reach.xml
│ │ │ │ ├── robot.xml
│ │ │ │ ├── shared.xml
│ │ │ │ ├── slide.xml
│ │ │ │ ├── stack.xml
│ │ │ │ ├── three_push.xml
│ │ │ │ └── three_stack.xml
│ │ │ ├── hand
│ │ │ │ ├── manipulate_block.xml
│ │ │ │ ├── manipulate_block_touch_sensors.xml
│ │ │ │ ├── manipulate_egg.xml
│ │ │ │ ├── manipulate_egg_touch_sensors.xml
│ │ │ │ ├── manipulate_pen.xml
│ │ │ │ ├── manipulate_pen_touch_sensors.xml
│ │ │ │ ├── reach.xml
│ │ │ │ ├── robot.xml
│ │ │ │ ├── robot_touch_sensors_92.xml
│ │ │ │ ├── shared.xml
│ │ │ │ ├── shared_asset.xml
│ │ │ │ └── shared_touch_sensors_92.xml
│ │ │ ├── stls
│ │ │ │ ├── fetch
│ │ │ │ │ ├── base_link_collision.stl
│ │ │ │ │ ├── bellows_link_collision.stl
│ │ │ │ │ ├── drawer.stl
│ │ │ │ │ ├── drawercase.stl
│ │ │ │ │ ├── drawerhandle.stl
│ │ │ │ │ ├── elbow_flex_link_collision.stl
│ │ │ │ │ ├── estop_link.stl
│ │ │ │ │ ├── forearm_roll_link_collision.stl
│ │ │ │ │ ├── gripper_link.stl
│ │ │ │ │ ├── head_pan_link_collision.stl
│ │ │ │ │ ├── head_tilt_link_collision.stl
│ │ │ │ │ ├── l_wheel_link_collision.stl
│ │ │ │ │ ├── laser_link.stl
│ │ │ │ │ ├── r_wheel_link_collision.stl
│ │ │ │ │ ├── shoulder_lift_link_collision.stl
│ │ │ │ │ ├── shoulder_pan_link_collision.stl
│ │ │ │ │ ├── torso_fixed_link.stl
│ │ │ │ │ ├── torso_lift_link_collision.stl
│ │ │ │ │ ├── upperarm_roll_link_collision.stl
│ │ │ │ │ ├── window_base.stl
│ │ │ │ │ ├── window_frame.stl
│ │ │ │ │ ├── window_h_base.stl
│ │ │ │ │ ├── window_h_frame.stl
│ │ │ │ │ ├── windowa_frame.stl
│ │ │ │ │ ├── windowa_glass.stl
│ │ │ │ │ ├── windowa_h_frame.stl
│ │ │ │ │ ├── windowa_h_glass.stl
│ │ │ │ │ ├── windowb_frame.stl
│ │ │ │ │ ├── windowb_glass.stl
│ │ │ │ │ ├── windowb_h_frame.stl
│ │ │ │ │ ├── windowb_h_glass.stl
│ │ │ │ │ ├── wrist_flex_link_collision.stl
│ │ │ │ │ └── wrist_roll_link_collision.stl
│ │ │ │ └── hand
│ │ │ │ │ ├── F1.stl
│ │ │ │ │ ├── F2.stl
│ │ │ │ │ ├── F3.stl
│ │ │ │ │ ├── TH1_z.stl
│ │ │ │ │ ├── TH2_z.stl
│ │ │ │ │ ├── TH3_z.stl
│ │ │ │ │ ├── forearm_electric.stl
│ │ │ │ │ ├── forearm_electric_cvx.stl
│ │ │ │ │ ├── knuckle.stl
│ │ │ │ │ ├── lfmetacarpal.stl
│ │ │ │ │ ├── palm.stl
│ │ │ │ │ └── wrist.stl
│ │ │ └── textures
│ │ │ │ ├── block.png
│ │ │ │ └── block_hidden.png
│ │ ├── fetch
│ │ │ ├── __init__.py
│ │ │ ├── dpush.py
│ │ │ ├── drawer.py
│ │ │ ├── drawer_box.py
│ │ │ ├── insert.py
│ │ │ ├── insert_rand.py
│ │ │ ├── obs_push.py
│ │ │ ├── occ_push.py
│ │ │ ├── pick_and_place.py
│ │ │ ├── push.py
│ │ │ ├── reach.py
│ │ │ ├── slide.py
│ │ │ ├── stack.py
│ │ │ ├── tpush.py
│ │ │ └── tstack.py
│ │ ├── fetch_double_push_env.py
│ │ ├── fetch_drawer_box_env.py
│ │ ├── fetch_drawer_env.py
│ │ ├── fetch_env.py
│ │ ├── fetch_insert_env.py
│ │ ├── fetch_insert_rand_env.py
│ │ ├── fetch_obs_push_env.py
│ │ ├── fetch_occ_push_env.py
│ │ ├── fetch_stack_env.py
│ │ ├── fetch_three_push_env.py
│ │ ├── fetch_three_stack_env.py
│ │ ├── hand
│ │ │ ├── __init__.py
│ │ │ ├── manipulate.py
│ │ │ ├── manipulate_touch_sensors.py
│ │ │ └── reach.py
│ │ ├── hand_env.py
│ │ ├── robot_double_push_env.py
│ │ ├── robot_env.py
│ │ ├── robot_joint_env.py
│ │ ├── robot_stack_env.py
│ │ ├── robot_three_push_env.py
│ │ ├── robot_three_stack_env.py
│ │ ├── rotations.py
│ │ └── utils.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── spec_list.py
│ │ ├── test_determinism.py
│ │ ├── test_envs.py
│ │ ├── test_envs_semantics.py
│ │ ├── test_frozenlake_dfs.py
│ │ ├── test_kellycoinflip.py
│ │ ├── test_mujoco_v2_to_v3_conversion.py
│ │ └── test_registration.py
│ ├── toy_text
│ │ ├── __init__.py
│ │ ├── blackjack.py
│ │ ├── cliffwalking.py
│ │ ├── discrete.py
│ │ ├── frozen_lake.py
│ │ ├── guessing_game.py
│ │ ├── hotter_colder.py
│ │ ├── kellycoinflip.py
│ │ ├── nchain.py
│ │ ├── roulette.py
│ │ └── taxi.py
│ └── unittest
│ │ ├── __init__.py
│ │ ├── cube_crash.py
│ │ └── memorize_digits.py
├── error.py
├── logger.py
├── mjkey.txt
├── obstacle_push.png
├── spaces
│ ├── __init__.py
│ ├── box.py
│ ├── dict.py
│ ├── discrete.py
│ ├── multi_binary.py
│ ├── multi_discrete.py
│ ├── space.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── test_spaces.py
│ │ └── test_utils.py
│ ├── tuple.py
│ └── utils.py
├── stack_demo.py
├── three_push_demo.py
├── three_stack_demo.py
├── utils
│ ├── __init__.py
│ ├── atomic_write.py
│ ├── closer.py
│ ├── colorize.py
│ ├── ezpickle.py
│ ├── json_utils.py
│ ├── play.py
│ └── seeding.py
├── vector
│ ├── __init__.py
│ ├── async_vector_env.py
│ ├── sync_vector_env.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── test_async_vector_env.py
│ │ ├── test_numpy_utils.py
│ │ ├── test_shared_memory.py
│ │ ├── test_spaces.py
│ │ ├── test_sync_vector_env.py
│ │ ├── test_vector_env.py
│ │ ├── test_vector_env_wrapper.py
│ │ └── utils.py
│ ├── utils
│ │ ├── __init__.py
│ │ ├── misc.py
│ │ ├── numpy_utils.py
│ │ ├── shared_memory.py
│ │ └── spaces.py
│ └── vector_env.py
├── version.py
└── wrappers
│ ├── __init__.py
│ ├── atari_preprocessing.py
│ ├── clip_action.py
│ ├── filter_observation.py
│ ├── flatten_observation.py
│ ├── frame_stack.py
│ ├── gray_scale_observation.py
│ ├── monitor.py
│ ├── monitoring
│ ├── __init__.py
│ ├── stats_recorder.py
│ ├── tests
│ │ ├── __init__.py
│ │ ├── helpers.py
│ │ └── test_video_recorder.py
│ └── video_recorder.py
│ ├── pixel_observation.py
│ ├── record_episode_statistics.py
│ ├── rescale_action.py
│ ├── resize_observation.py
│ ├── test_atari_preprocessing.py
│ ├── test_clip_action.py
│ ├── test_filter_observation.py
│ ├── test_flatten_observation.py
│ ├── test_frame_stack.py
│ ├── test_gray_scale_observation.py
│ ├── test_pixel_observation.py
│ ├── test_record_episode_statistics.py
│ ├── test_rescale_action.py
│ ├── test_resize_observation.py
│ ├── test_time_aware_observation.py
│ ├── test_transform_observation.py
│ ├── test_transform_reward.py
│ ├── tests
│ └── __init__.py
│ ├── time_aware_observation.py
│ ├── time_limit.py
│ ├── transform_observation.py
│ └── transform_reward.py
├── memory
├── __init__.py
├── per_memory.py
├── simple_memory.py
├── sp_memory.py
├── sp_memory_torch.py
├── sp_per_memory.py
└── sp_per_memory_torch.py
├── pip_requirement.txt
├── spinup_utils
├── README.md
├── __init__.py
├── delete_no_checkpoint_or_pth.py
├── demo.png
├── group_plot.py
├── log2table.py
├── logx.py
├── mpi_pytorch.py
├── mpi_tf.py
├── mpi_tools.py
├── plot.py
├── plot_demo_files
│ ├── 2020-10-02_12-18-23-gym_clean_buffer_reach_d2s_dense_n2b2_s5958
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2020-10-02_12-18-42-gym_clean_buffer_reach_d2s_dense_n2b2_s9317
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2020-10-02_12-18-43-gym_clean_buffer_reach_d2s_dense_n2b2_s7515
│ │ ├── config.json
│ │ └── progress.txt
│ ├── 2020-10-02_12-18-50-gym_clean_buffer_reach_d2s_dense_n2b2_s9180
│ │ ├── config.json
│ │ └── progress.txt
│ └── __init__.py
├── plot_success.py
├── plot_success_group.py
├── print_logger.py
├── run_entrypoint.py
├── run_utils.py
├── serialization_utils.py
└── user_config.py
├── tf1_arguments.py
├── torch_arguments.py
├── train_tf1.py
├── train_tf1_mpi.py
├── train_torch.py
├── train_torch_mpi.py
├── train_torch_mpi_norm_load.py
├── train_torch_mpi_norm_save.py
└── tune_exps
├── __init__.py
├── tune_arguments.py
├── tune_exps_demo.py
└── tune_func.py
/D2SR_RCAR_中文版.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/D2SR_RCAR_中文版.pdf
--------------------------------------------------------------------------------
/D2SSR/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/D2SSR/__init__.py
--------------------------------------------------------------------------------
/DRLib.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/DRLib.jpg
--------------------------------------------------------------------------------
/HER_DRLib_Net_Reload/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/actor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/HER_DRLib_Net_Reload/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/actor.pth
--------------------------------------------------------------------------------
/HER_DRLib_Net_Reload/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_Net_Reload/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/norm.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/HER_DRLib_Net_Reload/2022-08-12_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-12_15-57-53-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/norm.pkl
--------------------------------------------------------------------------------
/HER_DRLib_mpi/2022-07-29_HER_mpi19_random_TD3_FetchPush-v1/2022-07-29_12-21-04-HER_mpi19_random_TD3_FetchPush-v1_s123/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi19_random_TD3_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi19_random_TD3_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi19_random_TD3_FetchPush-v1/2022-07-29_12-21-04-HER_mpi19_random_TD3_FetchPush-v1_s123"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi/2022-07-29_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-07-29_12-22-08-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s123/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-07-29_12-22-08-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s123"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-21-16-HER_mpi1_random_TD3_FetchPush-v1_s123/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-21-16-HER_mpi1_random_TD3_FetchPush-v1_s123"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-48-HER_mpi1_random_TD3_FetchPush-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-48-HER_mpi1_random_TD3_FetchPush-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-56-HER_mpi1_random_TD3_FetchPush-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-56-HER_mpi1_random_TD3_FetchPush-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi/2022-07-29_HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1/2022-07-29_12-22-27-HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1_s123/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1/2022-07-29_12-22-27-HER_mpi6_random_TD3Torch_FetchPickAndPlace-v1_s123"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi/MPI1-6-19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/HER_DRLib_mpi/MPI1-6-19.png
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1/2022-07-29_12-25-10-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1/2022-07-29_12-25-10-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1/2022-07-29_12-25-21-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1/2022-07-29_12-25-21-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1/2022-07-29_12-25-27-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPickAndPlace-v1/2022-07-29_12-25-27-HER_mpi1_random_TD3_FetchPickAndPlace-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-21-16-HER_mpi1_random_TD3_FetchPush-v1_s123/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-21-16-HER_mpi1_random_TD3_FetchPush-v1_s123"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-48-HER_mpi1_random_TD3_FetchPush-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-48-HER_mpi1_random_TD3_FetchPush-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-56-HER_mpi1_random_TD3_FetchPush-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchPush-v1/2022-07-29_12-24-56-HER_mpi1_random_TD3_FetchPush-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1/2022-07-29_12-25-42-HER_mpi1_random_TD3_FetchSlide-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1/2022-07-29_12-25-42-HER_mpi1_random_TD3_FetchSlide-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1/2022-07-29_12-25-46-HER_mpi1_random_TD3_FetchSlide-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1/2022-07-29_12-25-46-HER_mpi1_random_TD3_FetchSlide-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1/2022-07-29_12-25-47-HER_mpi1_random_TD3_FetchSlide-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-29_HER_mpi1_random_TD3_FetchSlide-v1/2022-07-29_12-25-47-HER_mpi1_random_TD3_FetchSlide-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1/2022-07-30_22-19-36-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPG_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPG_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1/2022-07-30_22-19-36-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1/2022-07-30_22-19-42-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPG_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPG_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1/2022-07-30_22-19-42-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1/2022-07-30_22-19-45-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPG_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPG_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchPickAndPlace-v1/2022-07-30_22-19-45-HER_mpi1_random_DDPG_FetchPickAndPlace-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1/2022-07-30_22-19-22-HER_mpi1_random_DDPG_FetchPush-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPG_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPG_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1/2022-07-30_22-19-22-HER_mpi1_random_DDPG_FetchPush-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1/2022-07-30_22-19-27-HER_mpi1_random_DDPG_FetchPush-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPG_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPG_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1/2022-07-30_22-19-27-HER_mpi1_random_DDPG_FetchPush-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1/2022-07-30_22-19-30-HER_mpi1_random_DDPG_FetchPush-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPG_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPG_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchPush-v1/2022-07-30_22-19-30-HER_mpi1_random_DDPG_FetchPush-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1/2022-07-30_22-18-55-HER_mpi1_random_DDPG_FetchSlide-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPG_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPG_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1/2022-07-30_22-18-55-HER_mpi1_random_DDPG_FetchSlide-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1/2022-07-30_22-19-10-HER_mpi1_random_DDPG_FetchSlide-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPG_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPG_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1/2022-07-30_22-19-10-HER_mpi1_random_DDPG_FetchSlide-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1/2022-07-30_22-19-14-HER_mpi1_random_DDPG_FetchSlide-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPG_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPG_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-30_HER_mpi1_random_DDPG_FetchSlide-v1/2022-07-30_22-19-14-HER_mpi1_random_DDPG_FetchSlide-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1/2022-07-31_17-09-31-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1/2022-07-31_17-09-31-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1/2022-07-31_17-09-33-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1/2022-07-31_17-09-33-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1/2022-07-31_17-09-34-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1/2022-07-31_17-09-34-HER_mpi1_random_SAC_AUTO_FetchPickAndPlace-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1/2022-07-31_17-09-45-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1/2022-07-31_17-09-45-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1/2022-07-31_17-09-48-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1/2022-07-31_17-09-48-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1/2022-07-31_17-09-48-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchPush-v1/2022-07-31_17-09-48-HER_mpi1_random_SAC_AUTO_FetchPush-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1/2022-07-31_17-09-58-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1/2022-07-31_17-09-58-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1/2022-07-31_17-10-05-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1/2022-07-31_17-10-05-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1/2022-07-31_17-10-07-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_AUTO_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_AUTO_FetchSlide-v1/2022-07-31_17-10-07-HER_mpi1_random_SAC_AUTO_FetchSlide-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1/2022-07-31_17-09-01-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1/2022-07-31_17-09-01-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1/2022-07-31_17-09-03-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1/2022-07-31_17-09-03-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1/2022-07-31_17-09-05-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchPickAndPlace-v1/2022-07-31_17-09-05-HER_mpi1_random_SAC_FetchPickAndPlace-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchPush-v1/2022-07-31_17-08-42-HER_mpi1_random_SAC_FetchPush-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchPush-v1/2022-07-31_17-08-42-HER_mpi1_random_SAC_FetchPush-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchPush-v1/2022-07-31_17-08-44-HER_mpi1_random_SAC_FetchPush-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchPush-v1/2022-07-31_17-08-44-HER_mpi1_random_SAC_FetchPush-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchPush-v1/2022-07-31_17-08-48-HER_mpi1_random_SAC_FetchPush-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_FetchPush-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchPush-v1/2022-07-31_17-08-48-HER_mpi1_random_SAC_FetchPush-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1/2022-07-31_17-08-20-HER_mpi1_random_SAC_FetchSlide-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1/2022-07-31_17-08-20-HER_mpi1_random_SAC_FetchSlide-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1/2022-07-31_17-08-26-HER_mpi1_random_SAC_FetchSlide-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1/2022-07-31_17-08-26-HER_mpi1_random_SAC_FetchSlide-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1/2022-07-31_17-08-29-HER_mpi1_random_SAC_FetchSlide-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SAC_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SAC_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_exps/2022-07-31_HER_mpi1_random_SAC_FetchSlide-v1/2022-07-31_17-08-29-HER_mpi1_random_SAC_FetchSlide-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1/2022-08-01_12-21-12-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1/2022-08-01_12-21-12-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1/2022-08-01_12-21-16-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1/2022-08-01_12-21-16-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1/2022-08-01_12-21-19-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1/2022-08-01_12-21-19-HER_mpi1_random_DDPGTorch_FetchPickAndPlace-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1/2022-08-01_12-20-44-HER_mpi1_random_DDPGTorch_FetchPush-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPush-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1/2022-08-01_12-20-44-HER_mpi1_random_DDPGTorch_FetchPush-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1/2022-08-01_12-20-54-HER_mpi1_random_DDPGTorch_FetchPush-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPush-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1/2022-08-01_12-20-54-HER_mpi1_random_DDPGTorch_FetchPush-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1/2022-08-01_12-20-57-HER_mpi1_random_DDPGTorch_FetchPush-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchPush-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchPush-v1/2022-08-01_12-20-57-HER_mpi1_random_DDPGTorch_FetchPush-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1/2022-08-01_12-21-31-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1/2022-08-01_12-21-31-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1/2022-08-01_12-21-36-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1/2022-08-01_12-21-36-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1/2022-08-01_12-21-39-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_DDPGTorch_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_DDPGTorch_FetchSlide-v1/2022-08-01_12-21-39-HER_mpi1_random_DDPGTorch_FetchSlide-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1/2022-08-01_12-22-28-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SACTorch_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SACTorch_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1/2022-08-01_12-22-28-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1/2022-08-01_12-22-30-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SACTorch_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SACTorch_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1/2022-08-01_12-22-30-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1/2022-08-01_12-22-34-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SACTorch_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SACTorch_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchPickAndPlace-v1/2022-08-01_12-22-34-HER_mpi1_random_SACTorch_FetchPickAndPlace-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1/2022-08-01_12-22-12-HER_mpi1_random_SACTorch_FetchPush-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SACTorch_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SACTorch_FetchPush-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1/2022-08-01_12-22-12-HER_mpi1_random_SACTorch_FetchPush-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1/2022-08-01_12-22-16-HER_mpi1_random_SACTorch_FetchPush-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SACTorch_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SACTorch_FetchPush-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1/2022-08-01_12-22-16-HER_mpi1_random_SACTorch_FetchPush-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1/2022-08-01_12-22-18-HER_mpi1_random_SACTorch_FetchPush-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SACTorch_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SACTorch_FetchPush-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchPush-v1/2022-08-01_12-22-18-HER_mpi1_random_SACTorch_FetchPush-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1/2022-08-01_12-21-55-HER_mpi1_random_SACTorch_FetchSlide-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SACTorch_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SACTorch_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1/2022-08-01_12-21-55-HER_mpi1_random_SACTorch_FetchSlide-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1/2022-08-01_12-21-58-HER_mpi1_random_SACTorch_FetchSlide-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SACTorch_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SACTorch_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1/2022-08-01_12-21-58-HER_mpi1_random_SACTorch_FetchSlide-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1/2022-08-01_12-22-01-HER_mpi1_random_SACTorch_FetchSlide-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_SACTorch_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_SACTorch_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_SACTorch_FetchSlide-v1/2022-08-01_12-22-01-HER_mpi1_random_SACTorch_FetchSlide-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-08-01_12-15-29-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-08-01_12-15-29-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-08-01_12-15-36-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-08-01_12-15-36-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-08-01_12-15-40-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1/2022-08-01_12-15-40-HER_mpi1_random_TD3Torch_FetchPickAndPlace-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-01_12-15-51-HER_mpi1_random_TD3Torch_FetchPush-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-01_12-15-51-HER_mpi1_random_TD3Torch_FetchPush-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-01_12-16-00-HER_mpi1_random_TD3Torch_FetchPush-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-01_12-16-00-HER_mpi1_random_TD3Torch_FetchPush-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-01_12-16-07-HER_mpi1_random_TD3Torch_FetchPush-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchPush-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchPush-v1/2022-08-01_12-16-07-HER_mpi1_random_TD3Torch_FetchPush-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1/2022-08-01_12-16-17-HER_mpi1_random_TD3Torch_FetchSlide-v1_s100/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1/2022-08-01_12-16-17-HER_mpi1_random_TD3Torch_FetchSlide-v1_s100"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1/2022-08-01_12-16-22-HER_mpi1_random_TD3Torch_FetchSlide-v1_s200/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1/2022-08-01_12-16-22-HER_mpi1_random_TD3Torch_FetchSlide-v1_s200"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1/2022-08-01_12-16-25-HER_mpi1_random_TD3Torch_FetchSlide-v1_s300/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "args": null,
3 | "env": null,
4 | "exp_name": "HER_mpi1_random_TD3Torch_FetchSlide-v1",
5 | "logger": null,
6 | "logger_kwargs": {
7 | "exp_name": "HER_mpi1_random_TD3Torch_FetchSlide-v1",
8 | "output_dir": "HER_DRLib_Torch_exps/2022-08-01_HER_mpi1_random_TD3Torch_FetchSlide-v1/2022-08-01_12-16-25-HER_mpi1_random_TD3Torch_FetchSlide-v1_s300"
9 | },
10 | "net": null
11 | }
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2ep_reward_Pick.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/HER_DRLib_mpi1/2ep_reward_Pick.png
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2ep_reward_Push.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/HER_DRLib_mpi1/2ep_reward_Push.png
--------------------------------------------------------------------------------
/HER_DRLib_mpi1/2ep_reward_Slide.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/HER_DRLib_mpi1/2ep_reward_Slide.png
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2021 kaixindelele
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/algos/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/algos/pytorch/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/algos/pytorch/ddpg_sp/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/algos/pytorch/offPolicy/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/algos/pytorch/offPolicy/norm.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class StateNorm:
5 | def __init__(self, size, eps=1e-2, default_clip_range=5):
6 | self.size = size
7 | self.eps = eps
8 | self.default_clip_range = default_clip_range
9 |
10 | self.sum = np.zeros(self.size, np.float32)
11 | self.sumsq = np.zeros(self.size, np.float32)
12 | self.count = np.zeros(1, np.float32)
13 |
14 | # get the mean and std
15 | self.mean = np.zeros(self.size, np.float32)
16 | self.std = np.ones(self.size, np.float32)
17 |
18 | # update the parameters of the normalizer
19 | def update(self, v):
20 | v = v.reshape(-1, self.size)
21 | self.sum += v.sum(axis=0)
22 | self.sumsq += (np.square(v)).sum(axis=0)
23 | self.count += v.shape[0]
24 |
25 | self.mean = self.sum / self.count
26 | self.std = np.sqrt(np.maximum(np.square(self.eps),
27 | (self.sumsq / self.count) - np.square(
28 | self.sum / self.count)))
29 | # print("mean:", self.mean)
30 | # print("std:", self.std)
31 |
32 | # normalize the observation
33 | def normalize(self, v, clip_range=None):
34 | if clip_range is None:
35 | clip_range = self.default_clip_range
36 |
37 | return np.clip((v - self.mean) / self.std,
38 | -clip_range, clip_range)
39 |
40 |
41 | def main():
42 | norm = Norm(size=3)
43 | v = np.random.random((4, 2, 3))
44 | print("v:", v)
45 |
46 | r0 = v.reshape(-1, 3)
47 | print(r0.shape)
48 | print(r0)
49 | r0 = r0[:, 0]
50 |
51 | print(r0.shape)
52 | print(r0)
53 | std = np.std(r0)
54 | print(std.shape)
55 | print(std)
56 | norm.update(v=v)
57 |
58 |
59 | if __name__ == '__main__':
60 | main()
61 |
--------------------------------------------------------------------------------
/algos/pytorch/sac_sp/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/algos/pytorch/td3_sp/README.md:
--------------------------------------------------------------------------------
1 | 更新啦!
2 |
3 | 我终于把tf版本-基于mpi的多进程调好了~
4 |
5 | torch版本的没有测试完毕,有报错!
6 |
7 | 如果大家的CPU核心足够多的情况下,试试mpi多进程,性能会提升比较大的。
8 |
9 | 目前测试的结果是,tf-DDPG的性能最佳,TD3的结果竟然会比ddpg的差,简直了~
10 |
--------------------------------------------------------------------------------
/algos/pytorch/td3_sp/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/algos/tf1/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/algos/tf1/ddpg_sp/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/algos/tf1/ddpg_sp/core.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 |
5 | def placeholder(dim=None):
6 | return tf.placeholder(dtype=tf.float32, shape=(None,dim) if dim else (None,))
7 |
8 |
9 | def placeholders(*args):
10 | return [placeholder(dim) for dim in args]
11 |
12 |
13 | def mlp(x, hidden_sizes=(32,), activation=tf.tanh, output_activation=None):
14 | for h in hidden_sizes[:-1]:
15 | x = tf.layers.dense(x, units=h, activation=activation)
16 | return tf.layers.dense(x, units=hidden_sizes[-1], activation=output_activation)
17 |
18 |
19 | def get_vars(scope):
20 | return [x for x in tf.global_variables() if scope in x.name]
21 |
22 |
23 | def count_vars(scope):
24 | v = get_vars(scope)
25 | return sum([np.prod(var.shape.as_list()) for var in v])
26 |
27 |
28 | """
29 | Actor-Critics
30 | """
31 |
32 |
33 | def mlp_actor_critic(x, a, hidden_sizes=(400, 300), activation=tf.nn.relu,
34 | output_activation=tf.tanh, action_space=None):
35 | act_dim = a.shape.as_list()[-1]
36 | act_limit = action_space
37 | with tf.variable_scope('pi'):
38 | pi = act_limit * mlp(x, list(hidden_sizes)+[act_dim], activation, output_activation)
39 | with tf.variable_scope('q'):
40 | q = tf.squeeze(mlp(tf.concat([x, a], axis=-1),
41 | list(hidden_sizes)+[1], activation, None), axis=1)
42 | with tf.variable_scope('q', reuse=True):
43 | q_pi = tf.squeeze(mlp(tf.concat([x, pi], axis=-1),
44 | list(hidden_sizes)+[1], activation, None), axis=1)
45 | return pi, q, q_pi
46 |
--------------------------------------------------------------------------------
/algos/tf1/offPolicy/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/algos/tf1/offPolicy/core.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 |
5 | def placeholder(dim=None):
6 | return tf.placeholder(dtype=tf.float32, shape=(None,dim) if dim else (None,))
7 |
8 |
9 | def placeholders(*args):
10 | return [placeholder(dim) for dim in args]
11 |
12 |
13 | def mlp(x, hidden_sizes=(32,), activation=tf.tanh, output_activation=None):
14 | for h in hidden_sizes[:-1]:
15 | x = tf.layers.dense(x, units=h, activation=activation)
16 | return tf.layers.dense(x, units=hidden_sizes[-1], activation=output_activation)
17 |
18 |
19 | def get_vars(scope):
20 | return [x for x in tf.global_variables() if scope in x.name]
21 |
22 |
23 | def count_vars(scope):
24 | v = get_vars(scope)
25 | return sum([np.prod(var.shape.as_list()) for var in v])
26 |
27 |
28 | """
29 | Actor-Critics
30 | """
31 |
32 |
33 | def mlp_actor_critic(x, a, hidden_sizes=(400, 300), activation=tf.nn.relu,
34 | output_activation=tf.tanh, action_space=None):
35 | act_dim = a.shape.as_list()[-1]
36 | act_limit = action_space
37 | with tf.variable_scope('pi'):
38 | pi = act_limit * mlp(x, list(hidden_sizes)+[act_dim], activation, output_activation)
39 | with tf.variable_scope('q'):
40 | q = tf.squeeze(mlp(tf.concat([x, a], axis=-1),
41 | list(hidden_sizes)+[1], activation, None), axis=1)
42 | with tf.variable_scope('q', reuse=True):
43 | q_pi = tf.squeeze(mlp(tf.concat([x, pi], axis=-1),
44 | list(hidden_sizes)+[1], activation, None), axis=1)
45 | return pi, q, q_pi
46 |
--------------------------------------------------------------------------------
/algos/tf1/offPolicy/norm.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class StateNorm:
5 | def __init__(self, size, eps=1e-2, default_clip_range=5):
6 | self.size = size
7 | self.eps = eps
8 | self.default_clip_range = default_clip_range
9 |
10 | self.sum = np.zeros(self.size, np.float32)
11 | self.sumsq = np.zeros(self.size, np.float32)
12 | self.count = np.zeros(1, np.float32)
13 |
14 | # get the mean and std
15 | self.mean = np.zeros(self.size, np.float32)
16 | self.std = np.ones(self.size, np.float32)
17 |
18 | # update the parameters of the normalizer
19 | def update(self, v):
20 | v = v.reshape(-1, self.size)
21 | self.sum += v.sum(axis=0)
22 | self.sumsq += (np.square(v)).sum(axis=0)
23 | self.count += v.shape[0]
24 |
25 | self.mean = self.sum / self.count
26 | self.std = np.sqrt(np.maximum(np.square(self.eps),
27 | (self.sumsq / self.count) - np.square(
28 | self.sum / self.count)))
29 | # print("mean:", self.mean)
30 | # print("std:", self.std)
31 |
32 | # normalize the observation
33 | def normalize(self, v, clip_range=None):
34 | if clip_range is None:
35 | clip_range = self.default_clip_range
36 |
37 | return np.clip((v - self.mean) / self.std,
38 | -clip_range, clip_range)
39 |
40 |
41 | def main():
42 | norm = Norm(size=3)
43 | v = np.random.random((4, 2, 3))
44 | print("v:", v)
45 |
46 | r0 = v.reshape(-1, 3)
47 | print(r0.shape)
48 | print(r0)
49 | r0 = r0[:, 0]
50 |
51 | print(r0.shape)
52 | print(r0)
53 | std = np.std(r0)
54 | print(std.shape)
55 | print(std)
56 | norm.update(v=v)
57 |
58 |
59 | if __name__ == '__main__':
60 | main()
61 |
--------------------------------------------------------------------------------
/algos/tf1/sac_auto/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/algos/tf1/sac_sp/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/algos/tf1/td3_sp/README.md:
--------------------------------------------------------------------------------
1 | 更新啦!
2 |
3 | 我终于把tf版本-基于mpi的多进程调好了~
4 |
5 | torch版本的没有测试完毕,有报错!
6 |
7 | 如果大家的CPU核心足够多的情况下,试试mpi多进程,性能会提升比较大的。
8 |
9 | 目前测试的结果是,tf-DDPG的性能最佳,TD3的结果竟然会比ddpg的差,简直了~
10 |
--------------------------------------------------------------------------------
/algos/tf1/td3_sp/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/algos/tf1/td3_sp/core.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 |
5 | def placeholder(dim=None):
6 | return tf.placeholder(dtype=tf.float32,
7 | shape=(None,dim) if dim else (None,))
8 |
9 |
10 | def placeholders(*args):
11 | return [placeholder(dim) for dim in args]
12 |
13 |
14 | def mlp(x, hidden_sizes=(32,), activation=tf.tanh, output_activation=None):
15 | for h in hidden_sizes[:-1]:
16 | x = tf.layers.dense(x, units=h, activation=activation)
17 | return tf.layers.dense(x, units=hidden_sizes[-1], activation=output_activation)
18 |
19 |
20 | def get_vars(scope):
21 | return [x for x in tf.global_variables() if scope in x.name]
22 |
23 |
24 | def count_vars(scope):
25 | v = get_vars(scope)
26 | return sum([np.prod(var.shape.as_list()) for var in v])
27 |
28 |
29 | """
30 | Actor-Critics
31 | """
32 |
33 |
34 | def mlp_actor_critic(x, a, hidden_sizes=(400, 300), activation=tf.nn.relu,
35 | output_activation=tf.tanh, action_space=None):
36 | act_dim = a.shape.as_list()[-1]
37 | act_limit = action_space
38 | with tf.variable_scope('pi'):
39 | pi = act_limit * mlp(x, list(hidden_sizes)+[act_dim],
40 | activation, output_activation)
41 | with tf.variable_scope('q1'):
42 | q1 = tf.squeeze(mlp(tf.concat([x, a], axis=-1),
43 | list(hidden_sizes)+[1],
44 | activation, None), axis=1)
45 | with tf.variable_scope('q2'):
46 | q2 = tf.squeeze(mlp(tf.concat([x, a], axis=-1),
47 | list(hidden_sizes)+[1],
48 | activation, None), axis=1)
49 | with tf.variable_scope('q1', reuse=True):
50 | q1_pi = tf.squeeze(mlp(tf.concat([x, pi], axis=-1),
51 | list(hidden_sizes)+[1],
52 | activation, None), axis=1)
53 | return pi, q1, q2, q1_pi
54 |
--------------------------------------------------------------------------------
/gym/__init__.py:
--------------------------------------------------------------------------------
1 | import distutils.version
2 | import os
3 | import sys
4 | import warnings
5 |
6 | from gym import error
7 | from gym.version import VERSION as __version__
8 |
9 | from gym.core import Env, GoalEnv, Wrapper, ObservationWrapper, ActionWrapper, RewardWrapper
10 | from gym.spaces import Space
11 | from gym.envs import make, spec, register
12 | from gym import logger
13 | from gym import vector
14 | from gym import wrappers
15 |
16 | __all__ = ["Env", "Space", "Wrapper", "make", "spec", "register"]
17 |
--------------------------------------------------------------------------------
/gym/demo.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import gym
4 | # from gym.envs.robotics.fetch.insert import FetchInsertEnv
5 | # env = FetchInsertEnv()
6 | # env = gym.make("FetchPushFixed-v1")
7 | env = gym.make("FetchPickAndPlaceFixed-v1")
8 | # env = gym.make("FetchSlideFixed-v1")
9 |
10 |
11 | def p_control(env, obs, p_rate=0.2):
12 | a = env.action_space.sample()
13 | gg = obs['grip_goal']
14 | ag = obs['achieved_goal']
15 | error = ag - gg
16 | for axis, value in enumerate(error):
17 | if abs(value) > 0.02:
18 | if value > 0:
19 | a[axis] = p_rate
20 | else:
21 | a[axis] = -p_rate
22 | else:
23 | a[axis] = 0
24 | # if axis == 0:
25 | # a[axis] = -p_rate
26 | # else:
27 | # a[axis] = p_rate
28 | action = a
29 | return action
30 |
31 |
32 | for ep in range(20):
33 | obs = env.reset()
34 | for i in range(200):
35 | # a = p_control(env, obs=obs)
36 | #
37 | a = env.action_space.sample()
38 | a[0] = 0.01
39 | if obs['grip_goal'][2] < 0.3:
40 | pass
41 | else:
42 | a[1] = -0.2
43 | a[2] = -0.2
44 | print("gg:", obs['grip_goal'])
45 |
46 | obs, reward, done, info = env.step(a)
47 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info))
48 | env.render()
49 |
--------------------------------------------------------------------------------
/gym/demo_drawer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import gym
4 | import cv2
5 | from gym.envs.robotics.fetch.drawer import FetchDrawerEnv
6 | env = FetchDrawerEnv()
7 |
8 |
9 | def p_control(env, obs, p_rate=0.1):
10 | a = env.action_space.sample()
11 | gg = obs['grip_goal']
12 | ag = obs['achieved_goal']
13 | ag[1] -= 0.01
14 | # ag[0] += 0.05
15 | error = ag - gg
16 | for axis, value in enumerate(error):
17 | if abs(value) > 0.01:
18 | if value > 0:
19 | a[axis] = p_rate
20 | else:
21 | a[axis] = -p_rate
22 | else:
23 | a[axis] = 0
24 | action = a
25 | # if np.random.random() < 0.1:
26 | # action[-1] = 1.0
27 | # else:
28 | # action[-1] = 0.0
29 | return action
30 |
31 |
32 | for ep in range(20):
33 | ag_list = []
34 | obs = env.reset()
35 | move = False
36 | for i in range(50):
37 | if not move:
38 | a = p_control(env, obs=obs)
39 | gg2ag = np.linalg.norm(obs['grip_goal'] - obs['achieved_goal'])
40 | print("gg2ag:", gg2ag)
41 | if gg2ag < 0.03:
42 | a = env.action_space.sample()
43 | a[0] = -0.1
44 | a[-1] = 0
45 | # move = True
46 | # print("a:", a)
47 | # a = env.action_space.sample()
48 | a[2] = -1.0
49 | obs, reward, done, info = env.step(a)
50 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info))
51 | print("ag:", obs['achieved_goal'])
52 | print("gg:", obs['grip_goal'])
53 | ag_list.append(obs['achieved_goal'])
54 |
55 | # env.render()
56 | image_size = 2048
57 | img = env.render(mode='rgb_array', width=image_size, height=image_size)
58 | clip_value = 200
59 | # [上下, 左右,:]
60 | img = img[clip_value*2:image_size-1*clip_value, 0:image_size-2*clip_value, :]
61 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
62 | cv2.imwrite('drawer2.png', img)
63 |
64 | # plt.plot(ag_list)
65 | # plt.pause(2)
66 |
--------------------------------------------------------------------------------
/gym/demo_obs_push.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import gym
4 | import cv2
5 | from gym.envs.robotics.fetch.obs_push import FetchObsPushEnv
6 | env = FetchObsPushEnv()
7 |
8 |
9 | def p_control(env, obs, p_rate=0.2):
10 | a = env.action_space.sample()
11 | gg = obs['grip_goal']
12 | ag = obs['achieved_goal']
13 | error = ag - gg
14 | for axis, value in enumerate(error):
15 | if abs(value) > 0.02:
16 | if value > 0:
17 | a[axis] = p_rate
18 | else:
19 | a[axis] = -p_rate
20 | else:
21 | a[axis] = 0
22 | action = a
23 | return action
24 |
25 |
26 | for ep in range(20):
27 | obs = env.reset()
28 | for i in range(20):
29 | a = p_control(env, obs=obs)
30 | a[-1] = 0.0
31 | # a = env.action_space.sample()
32 | # a[0] = 0.01
33 | # if obs['grip_goal'][2] < 0.3:
34 | # pass
35 | # else:
36 | # a[1] = -0.2
37 | # a[2] = -0.2
38 | print("gg:", obs['grip_goal'])
39 | a *= 0
40 | obs, reward, done, info = env.step(a)
41 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info))
42 | # env.render()
43 | image_size = 2048
44 | img = env.render(mode='rgb_array', width=image_size, height=image_size)
45 | clip_value = 200
46 | # [上下, 左右,:]
47 | img = img[clip_value*2:image_size-1*clip_value, 0:image_size-2*clip_value, :]
48 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
49 | cv2.imwrite('obstacle_push.png', img)
50 |
--------------------------------------------------------------------------------
/gym/double_push_demo.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import gym
4 | from gym.envs.robotics.fetch.dpush import FetchDoublePushEnv
5 | env = FetchDoublePushEnv()
6 |
7 |
8 | def p_control(env, obs, p_rate=0.2):
9 | a = env.action_space.sample()
10 | gg = obs['grip_goal']
11 | ag = obs['achieved_goal']
12 | error = ag - gg
13 | for axis, value in enumerate(error):
14 | if abs(value) > 0.02:
15 | if value > 0:
16 | a[axis] = p_rate
17 | else:
18 | a[axis] = -p_rate
19 | else:
20 | a[axis] = 0
21 | action = a
22 | return action
23 |
24 |
25 | for ep in range(20):
26 | obs = env.reset()
27 | for i in range(20):
28 | a = p_control(env, obs=obs)
29 | # a[-1] = 0.0
30 | #
31 | # a = env.action_space.sample()
32 | # a[0] = 0.01
33 | # if obs['grip_goal'][2] < 0.3:
34 | # pass
35 | # else:
36 | # a[1] = -0.2
37 | # a[2] = -0.2
38 | print("gg:", obs['grip_goal'])
39 |
40 | obs, reward, done, info = env.step(a)
41 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info))
42 | env.render()
43 |
--------------------------------------------------------------------------------
/gym/drawer2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/drawer2.png
--------------------------------------------------------------------------------
/gym/drawer_box_demo.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import gym
4 | from gym.envs.robotics.fetch.drawer_open import FetchDrawerOpenEnv
5 | from gym.envs.robotics.fetch.drawer_horiz_open import FetchHorizonDrawerOpenEnv
6 | from gym.envs.robotics.fetch.drawer_box import FetchDrawerBoxEnv
7 | env = FetchDrawerBoxEnv()
8 | # env = FetchHorizonDrawerOpenEnv()
9 |
10 |
11 | def p_control(env, obs, p_rate=0.99):
12 | a = env.action_space.sample()
13 | gg = obs['grip_goal']
14 | ag = obs['ag0']
15 | ag[1] -= 0.01
16 | # ag[0] += 0.05
17 | error = ag - gg
18 | for axis, value in enumerate(error):
19 | if abs(value) > 0.01:
20 | if value > 0:
21 | a[axis] = p_rate
22 | else:
23 | a[axis] = -p_rate
24 | else:
25 | a[axis] = 0
26 | action = a
27 | # action = np.zeros(4)
28 | # if np.random.random() < 0.1:
29 | # action[-1] = 1.0
30 | # else:
31 | # action[-1] = 0.0
32 | return action
33 |
34 |
35 | env.task = 'in2out'
36 | # env.task = 'out2in'
37 | for ep in range(20):
38 | ag_list = []
39 | obs = env.reset()
40 | move = False
41 | for i in range(100):
42 | if not move:
43 | a = p_control(env, obs=obs)
44 | gg2ag = np.linalg.norm(obs['grip_goal'] - obs['ag0'])
45 | print("gg2ag:", gg2ag)
46 | if gg2ag < 0.05:
47 | # a = env.action_space.sample()
48 | a[0] = -0.1
49 | a[-1] = -1.0
50 | # move = True
51 | # print("a:", a)
52 | # a = env.action_space.sample()
53 | # a[2] = -1.0
54 | # if i > 60:
55 | # a[-1] = 1
56 | # a[2] = 1
57 | obs, reward, done, info = env.step(a)
58 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info))
59 | print("ag:", obs['achieved_goal'])
60 | print("ag1:", obs['ag1'])
61 | print("gg:", obs['grip_goal'])
62 | ag_list.append(obs['achieved_goal'])
63 |
64 | env.render()
65 | # plt.plot(ag_list)
66 | # plt.pause(2)
67 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.algorithmic.copy_ import CopyEnv
2 | from gym.envs.algorithmic.repeat_copy import RepeatCopyEnv
3 | from gym.envs.algorithmic.duplicated_input import DuplicatedInputEnv
4 | from gym.envs.algorithmic.reverse import ReverseEnv
5 | from gym.envs.algorithmic.reversed_addition import ReversedAdditionEnv
6 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/copy_.py:
--------------------------------------------------------------------------------
1 | """
2 | Task is to copy content from the input tape to
3 | the output tape. http://arxiv.org/abs/1511.07275
4 | """
5 | from gym.envs.algorithmic import algorithmic_env
6 |
7 |
8 | class CopyEnv(algorithmic_env.TapeAlgorithmicEnv):
9 | def __init__(self, base=5, chars=True):
10 | super(CopyEnv, self).__init__(base=base, chars=chars)
11 |
12 | def target_from_input_data(self, input_data):
13 | return input_data
14 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/duplicated_input.py:
--------------------------------------------------------------------------------
1 | """
2 | Task is to return every nth character from the input tape.
3 | http://arxiv.org/abs/1511.07275
4 | """
5 | from gym.envs.algorithmic import algorithmic_env
6 |
7 |
8 | class DuplicatedInputEnv(algorithmic_env.TapeAlgorithmicEnv):
9 | def __init__(self, duplication=2, base=5):
10 | self.duplication = duplication
11 | super(DuplicatedInputEnv, self).__init__(base=base, chars=True)
12 |
13 | def generate_input_data(self, size):
14 | res = []
15 | if size < self.duplication:
16 | size = self.duplication
17 | for _ in range(size // self.duplication):
18 | char = self.np_random.randint(self.base)
19 | for _ in range(self.duplication):
20 | res.append(char)
21 | return res
22 |
23 | def target_from_input_data(self, input_data):
24 | return [
25 | input_data[i] for i in range(0, len(input_data), self.duplication)
26 | ]
27 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/repeat_copy.py:
--------------------------------------------------------------------------------
1 | """
2 | Task is to copy content multiple times from the input tape to
3 | the output tape. http://arxiv.org/abs/1511.07275
4 | """
5 | from gym.envs.algorithmic import algorithmic_env
6 |
7 |
8 | class RepeatCopyEnv(algorithmic_env.TapeAlgorithmicEnv):
9 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
10 |
11 | def __init__(self, base=5):
12 | super(RepeatCopyEnv, self).__init__(base=base, chars=True)
13 | self.last = 50
14 |
15 | def target_from_input_data(self, input_data):
16 | return input_data + list(reversed(input_data)) + input_data
17 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/reverse.py:
--------------------------------------------------------------------------------
1 | """
2 | Task is to reverse content over the input tape.
3 | http://arxiv.org/abs/1511.07275
4 | """
5 | from gym.envs.algorithmic import algorithmic_env
6 |
7 |
8 | class ReverseEnv(algorithmic_env.TapeAlgorithmicEnv):
9 | MIN_REWARD_SHORTFALL_FOR_PROMOTION = -.1
10 |
11 | def __init__(self, base=2):
12 | super(ReverseEnv, self).__init__(
13 | base=base, chars=True, starting_min_length=1
14 | )
15 | self.last = 50
16 |
17 | def target_from_input_data(self, input_str):
18 | return list(reversed(input_str))
19 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/reversed_addition.py:
--------------------------------------------------------------------------------
1 | from gym.envs.algorithmic import algorithmic_env
2 |
3 |
4 | class ReversedAdditionEnv(algorithmic_env.GridAlgorithmicEnv):
5 | def __init__(self, rows=2, base=3):
6 | super(ReversedAdditionEnv, self).__init__(
7 | rows=rows, base=base, chars=False
8 | )
9 |
10 | def target_from_input_data(self, input_strings):
11 | curry = 0
12 | target = []
13 | for digits in input_strings:
14 | total = sum(digits) + curry
15 | target.append(total % self.base)
16 | curry = total // self.base
17 |
18 | if curry > 0:
19 | target.append(curry)
20 | return target
21 |
22 | @property
23 | def time_limit(self):
24 | # Quirk preserved for the sake of consistency: add the length of the
25 | # input rather than the length of the desired output (which may differ
26 | # if there's an extra carried digit).
27 | # TODO: It seems like this time limit is so strict as to make
28 | # Addition3-v0 unsolvable, since agents aren't even given enough time
29 | # steps to look at all the digits. (The solutions on the scoreboard
30 | # seem to only work by save-scumming.)
31 | return self.input_width*2 + 4
32 |
--------------------------------------------------------------------------------
/gym/envs/algorithmic/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/algorithmic/tests/__init__.py
--------------------------------------------------------------------------------
/gym/envs/atari/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.atari.atari_env import AtariEnv
2 |
--------------------------------------------------------------------------------
/gym/envs/box2d/__init__.py:
--------------------------------------------------------------------------------
1 | try:
2 | import Box2D
3 | from gym.envs.box2d.lunar_lander import LunarLander
4 | from gym.envs.box2d.lunar_lander import LunarLanderContinuous
5 | from gym.envs.box2d.bipedal_walker import BipedalWalker, BipedalWalkerHardcore
6 | from gym.envs.box2d.car_racing import CarRacing
7 | except ImportError:
8 | Box2D = None
9 |
--------------------------------------------------------------------------------
/gym/envs/box2d/test_lunar_lander.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | try:
3 | import Box2D
4 | from .lunar_lander import LunarLander, LunarLanderContinuous, demo_heuristic_lander
5 | except ImportError:
6 | Box2D = None
7 |
8 |
9 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed')
10 | def test_lunar_lander():
11 | _test_lander(LunarLander(), seed=0)
12 |
13 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed')
14 | def test_lunar_lander_continuous():
15 | _test_lander(LunarLanderContinuous(), seed=0)
16 |
17 | @pytest.mark.skipif(Box2D is None, reason='Box2D not installed')
18 | def _test_lander(env, seed=None, render=False):
19 | total_reward = demo_heuristic_lander(env, seed=seed, render=render)
20 | assert total_reward > 100
21 |
22 |
23 |
--------------------------------------------------------------------------------
/gym/envs/classic_control/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.classic_control.cartpole import CartPoleEnv
2 | from gym.envs.classic_control.mountain_car import MountainCarEnv
3 | from gym.envs.classic_control.continuous_mountain_car import Continuous_MountainCarEnv
4 | from gym.envs.classic_control.pendulum import PendulumEnv
5 | from gym.envs.classic_control.acrobot import AcrobotEnv
6 |
7 |
--------------------------------------------------------------------------------
/gym/envs/classic_control/assets/clockwise.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/classic_control/assets/clockwise.png
--------------------------------------------------------------------------------
/gym/envs/mujoco/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.mujoco.mujoco_env import MujocoEnv
2 | # ^^^^^ so that user gets the correct error
3 | # message if mujoco is not installed correctly
4 | from gym.envs.mujoco.ant import AntEnv
5 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
6 | from gym.envs.mujoco.hopper import HopperEnv
7 | from gym.envs.mujoco.walker2d import Walker2dEnv
8 | from gym.envs.mujoco.humanoid import HumanoidEnv
9 | from gym.envs.mujoco.inverted_pendulum import InvertedPendulumEnv
10 | from gym.envs.mujoco.inverted_double_pendulum import InvertedDoublePendulumEnv
11 | from gym.envs.mujoco.reacher import ReacherEnv
12 | from gym.envs.mujoco.reacher2d_her import ReacherHEREnv
13 | from gym.envs.mujoco.reacher2d_her_harder import ReacherHERHarderEnv
14 | from gym.envs.mujoco.swimmer import SwimmerEnv
15 | from gym.envs.mujoco.humanoidstandup import HumanoidStandupEnv
16 | from gym.envs.mujoco.pusher import PusherEnv
17 | from gym.envs.mujoco.pusher2d_her import PusherHEREnv
18 | from gym.envs.mujoco.thrower import ThrowerEnv
19 | from gym.envs.mujoco.striker import StrikerEnv
20 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/ant.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class AntEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'ant.xml', 5)
8 | utils.EzPickle.__init__(self)
9 |
10 | def step(self, a):
11 | xposbefore = self.get_body_com("torso")[0]
12 | self.do_simulation(a, self.frame_skip)
13 | xposafter = self.get_body_com("torso")[0]
14 | forward_reward = (xposafter - xposbefore)/self.dt
15 | ctrl_cost = .5 * np.square(a).sum()
16 | contact_cost = 0.5 * 1e-3 * np.sum(
17 | np.square(np.clip(self.sim.data.cfrc_ext, -1, 1)))
18 | survive_reward = 1.0
19 | reward = forward_reward - ctrl_cost - contact_cost + survive_reward
20 | state = self.state_vector()
21 | notdone = np.isfinite(state).all() \
22 | and state[2] >= 0.2 and state[2] <= 1.0
23 | done = not notdone
24 | ob = self._get_obs()
25 | return ob, reward, done, dict(
26 | reward_forward=forward_reward,
27 | reward_ctrl=-ctrl_cost,
28 | reward_contact=-contact_cost,
29 | reward_survive=survive_reward)
30 |
31 | def _get_obs(self):
32 | return np.concatenate([
33 | self.sim.data.qpos.flat[2:],
34 | self.sim.data.qvel.flat,
35 | np.clip(self.sim.data.cfrc_ext, -1, 1).flat,
36 | ])
37 |
38 | def reset_model(self):
39 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-.1, high=.1)
40 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
41 | self.set_state(qpos, qvel)
42 | return self._get_obs()
43 |
44 | def viewer_setup(self):
45 | self.viewer.cam.distance = self.model.stat.extent * 0.5
46 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/inverted_double_pendulum.xml:
--------------------------------------------------------------------------------
1 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/inverted_pendulum.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/assets/point.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/half_cheetah.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5)
8 | utils.EzPickle.__init__(self)
9 |
10 | def step(self, action):
11 | xposbefore = self.sim.data.qpos[0]
12 | self.do_simulation(action, self.frame_skip)
13 | xposafter = self.sim.data.qpos[0]
14 | ob = self._get_obs()
15 | reward_ctrl = - 0.1 * np.square(action).sum()
16 | reward_run = (xposafter - xposbefore)/self.dt
17 | reward = reward_ctrl + reward_run
18 | done = False
19 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
20 |
21 | def _get_obs(self):
22 | return np.concatenate([
23 | self.sim.data.qpos.flat[1:],
24 | self.sim.data.qvel.flat,
25 | ])
26 |
27 | def reset_model(self):
28 | qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
29 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
30 | self.set_state(qpos, qvel)
31 | return self._get_obs()
32 |
33 | def viewer_setup(self):
34 | self.viewer.cam.distance = self.model.stat.extent * 0.5
35 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/hopper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class HopperEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'hopper.xml', 4)
8 | utils.EzPickle.__init__(self)
9 |
10 | def step(self, a):
11 | posbefore = self.sim.data.qpos[0]
12 | self.do_simulation(a, self.frame_skip)
13 | posafter, height, ang = self.sim.data.qpos[0:3]
14 | alive_bonus = 1.0
15 | reward = (posafter - posbefore) / self.dt
16 | reward += alive_bonus
17 | reward -= 1e-3 * np.square(a).sum()
18 | s = self.state_vector()
19 | done = not (np.isfinite(s).all() and (np.abs(s[2:]) < 100).all() and
20 | (height > .7) and (abs(ang) < .2))
21 | ob = self._get_obs()
22 | return ob, reward, done, {}
23 |
24 | def _get_obs(self):
25 | return np.concatenate([
26 | self.sim.data.qpos.flat[1:],
27 | np.clip(self.sim.data.qvel.flat, -10, 10)
28 | ])
29 |
30 | def reset_model(self):
31 | qpos = self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq)
32 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
33 | self.set_state(qpos, qvel)
34 | return self._get_obs()
35 |
36 | def viewer_setup(self):
37 | self.viewer.cam.trackbodyid = 2
38 | self.viewer.cam.distance = self.model.stat.extent * 0.75
39 | self.viewer.cam.lookat[2] = 1.15
40 | self.viewer.cam.elevation = -20
41 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/humanoidstandup.py:
--------------------------------------------------------------------------------
1 | from gym.envs.mujoco import mujoco_env
2 | from gym import utils
3 | import numpy as np
4 |
5 | class HumanoidStandupEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'humanoidstandup.xml', 5)
8 | utils.EzPickle.__init__(self)
9 |
10 | def _get_obs(self):
11 | data = self.sim.data
12 | return np.concatenate([data.qpos.flat[2:],
13 | data.qvel.flat,
14 | data.cinert.flat,
15 | data.cvel.flat,
16 | data.qfrc_actuator.flat,
17 | data.cfrc_ext.flat])
18 |
19 | def step(self, a):
20 | self.do_simulation(a, self.frame_skip)
21 | pos_after = self.sim.data.qpos[2]
22 | data = self.sim.data
23 | uph_cost = (pos_after - 0) / self.model.opt.timestep
24 |
25 | quad_ctrl_cost = 0.1 * np.square(data.ctrl).sum()
26 | quad_impact_cost = .5e-6 * np.square(data.cfrc_ext).sum()
27 | quad_impact_cost = min(quad_impact_cost, 10)
28 | reward = uph_cost - quad_ctrl_cost - quad_impact_cost + 1
29 |
30 | done = bool(False)
31 | return self._get_obs(), reward, done, dict(reward_linup=uph_cost, reward_quadctrl=-quad_ctrl_cost, reward_impact=-quad_impact_cost)
32 |
33 | def reset_model(self):
34 | c = 0.01
35 | self.set_state(
36 | self.init_qpos + self.np_random.uniform(low=-c, high=c, size=self.model.nq),
37 | self.init_qvel + self.np_random.uniform(low=-c, high=c, size=self.model.nv,)
38 | )
39 | return self._get_obs()
40 |
41 | def viewer_setup(self):
42 | self.viewer.cam.trackbodyid = 1
43 | self.viewer.cam.distance = self.model.stat.extent * 1.0
44 | self.viewer.cam.lookat[2] = 0.8925
45 | self.viewer.cam.elevation = -20
46 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/inverted_double_pendulum.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class InvertedDoublePendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 |
7 | def __init__(self):
8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_double_pendulum.xml', 5)
9 | utils.EzPickle.__init__(self)
10 |
11 | def step(self, action):
12 | self.do_simulation(action, self.frame_skip)
13 | ob = self._get_obs()
14 | x, _, y = self.sim.data.site_xpos[0]
15 | dist_penalty = 0.01 * x ** 2 + (y - 2) ** 2
16 | v1, v2 = self.sim.data.qvel[1:3]
17 | vel_penalty = 1e-3 * v1**2 + 5e-3 * v2**2
18 | alive_bonus = 10
19 | r = alive_bonus - dist_penalty - vel_penalty
20 | done = bool(y <= 1)
21 | return ob, r, done, {}
22 |
23 | def _get_obs(self):
24 | return np.concatenate([
25 | self.sim.data.qpos[:1], # cart x pos
26 | np.sin(self.sim.data.qpos[1:]), # link angles
27 | np.cos(self.sim.data.qpos[1:]),
28 | np.clip(self.sim.data.qvel, -10, 10),
29 | np.clip(self.sim.data.qfrc_constraint, -10, 10)
30 | ]).ravel()
31 |
32 | def reset_model(self):
33 | self.set_state(
34 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
35 | self.init_qvel + self.np_random.randn(self.model.nv) * .1
36 | )
37 | return self._get_obs()
38 |
39 | def viewer_setup(self):
40 | v = self.viewer
41 | v.cam.trackbodyid = 0
42 | v.cam.distance = self.model.stat.extent * 0.5
43 | v.cam.lookat[2] = 0.12250000000000005 # v.model.stat.center[2]
44 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/inverted_pendulum.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class InvertedPendulumEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | utils.EzPickle.__init__(self)
8 | mujoco_env.MujocoEnv.__init__(self, 'inverted_pendulum.xml', 2)
9 |
10 | def step(self, a):
11 | reward = 1.0
12 | self.do_simulation(a, self.frame_skip)
13 | ob = self._get_obs()
14 | notdone = np.isfinite(ob).all() and (np.abs(ob[1]) <= .2)
15 | done = not notdone
16 | return ob, reward, done, {}
17 |
18 | def reset_model(self):
19 | qpos = self.init_qpos + self.np_random.uniform(size=self.model.nq, low=-0.01, high=0.01)
20 | qvel = self.init_qvel + self.np_random.uniform(size=self.model.nv, low=-0.01, high=0.01)
21 | self.set_state(qpos, qvel)
22 | return self._get_obs()
23 |
24 | def _get_obs(self):
25 | return np.concatenate([self.sim.data.qpos, self.sim.data.qvel]).ravel()
26 |
27 | def viewer_setup(self):
28 | v = self.viewer
29 | v.cam.trackbodyid = 0
30 | v.cam.distance = self.model.stat.extent
31 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/pusher.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | import mujoco_py
6 |
7 |
8 | class PusherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
9 | def __init__(self):
10 | utils.EzPickle.__init__(self)
11 | mujoco_env.MujocoEnv.__init__(self, 'pusher.xml', 5)
12 |
13 | def step(self, a):
14 | vec_1 = self.get_body_com("object") - self.get_body_com("tips_arm")
15 | vec_2 = self.get_body_com("object") - self.get_body_com("goal")
16 |
17 | reward_near = - np.linalg.norm(vec_1)
18 | reward_dist = - np.linalg.norm(vec_2)
19 | reward_ctrl = - np.square(a).sum()
20 | reward = reward_dist + 0.1 * reward_ctrl + 0.5 * reward_near
21 |
22 | self.do_simulation(a, self.frame_skip)
23 | ob = self._get_obs()
24 | done = False
25 | return ob, reward, done, dict(reward_dist=reward_dist,
26 | reward_ctrl=reward_ctrl)
27 |
28 | def viewer_setup(self):
29 | self.viewer.cam.trackbodyid = -1
30 | self.viewer.cam.distance = 4.0
31 |
32 | def reset_model(self):
33 | qpos = self.init_qpos
34 |
35 | self.goal_pos = np.asarray([0, 0])
36 | while True:
37 | self.cylinder_pos = np.concatenate([
38 | self.np_random.uniform(low=-0.3, high=0, size=1),
39 | self.np_random.uniform(low=-0.2, high=0.2, size=1)])
40 | if np.linalg.norm(self.cylinder_pos - self.goal_pos) > 0.17:
41 | break
42 |
43 | qpos[-4:-2] = self.cylinder_pos
44 | qpos[-2:] = self.goal_pos
45 | qvel = self.init_qvel + self.np_random.uniform(low=-0.005,
46 | high=0.005, size=self.model.nv)
47 | qvel[-4:] = 0
48 | self.set_state(qpos, qvel)
49 | return self._get_obs()
50 |
51 | def _get_obs(self):
52 | return np.concatenate([
53 | self.sim.data.qpos.flat[:7],
54 | self.sim.data.qvel.flat[:7],
55 | self.get_body_com("tips_arm"),
56 | self.get_body_com("object"),
57 | self.get_body_com("goal"),
58 | ])
59 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/reacher.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class ReacherEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | utils.EzPickle.__init__(self)
8 | mujoco_env.MujocoEnv.__init__(self, 'reacher.xml', 2)
9 |
10 | def step(self, a):
11 | vec = self.get_body_com("fingertip")-self.get_body_com("target")
12 | reward_dist = - np.linalg.norm(vec)
13 | reward_ctrl = - np.square(a).sum()
14 | reward = reward_dist + reward_ctrl
15 | self.do_simulation(a, self.frame_skip)
16 | ob = self._get_obs()
17 | done = False
18 | return ob, reward, done, dict(reward_dist=reward_dist, reward_ctrl=reward_ctrl)
19 |
20 | def viewer_setup(self):
21 | self.viewer.cam.trackbodyid = 0
22 |
23 | def reset_model(self):
24 | qpos = self.np_random.uniform(low=-0.1, high=0.1, size=self.model.nq) + self.init_qpos
25 | while True:
26 | self.goal = self.np_random.uniform(low=-.2, high=.2, size=2)
27 | if np.linalg.norm(self.goal) < 0.2:
28 | break
29 | qpos[-2:] = self.goal
30 | qvel = self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
31 | qvel[-2:] = 0
32 | self.set_state(qpos, qvel)
33 | return self._get_obs()
34 |
35 | def _get_obs(self):
36 | theta = self.sim.data.qpos.flat[:2]
37 | return np.concatenate([
38 | np.cos(theta),
39 | np.sin(theta),
40 | self.sim.data.qpos.flat[2:],
41 | self.sim.data.qvel.flat[:2],
42 | self.get_body_com("fingertip") - self.get_body_com("target")
43 | ])
44 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/swimmer.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class SwimmerEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 | def __init__(self):
7 | mujoco_env.MujocoEnv.__init__(self, 'swimmer.xml', 4)
8 | utils.EzPickle.__init__(self)
9 |
10 | def step(self, a):
11 | ctrl_cost_coeff = 0.0001
12 | xposbefore = self.sim.data.qpos[0]
13 | self.do_simulation(a, self.frame_skip)
14 | xposafter = self.sim.data.qpos[0]
15 | reward_fwd = (xposafter - xposbefore) / self.dt
16 | reward_ctrl = - ctrl_cost_coeff * np.square(a).sum()
17 | reward = reward_fwd + reward_ctrl
18 | ob = self._get_obs()
19 | return ob, reward, False, dict(reward_fwd=reward_fwd, reward_ctrl=reward_ctrl)
20 |
21 | def _get_obs(self):
22 | qpos = self.sim.data.qpos
23 | qvel = self.sim.data.qvel
24 | return np.concatenate([qpos.flat[2:], qvel.flat])
25 |
26 | def reset_model(self):
27 | self.set_state(
28 | self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq),
29 | self.init_qvel + self.np_random.uniform(low=-.1, high=.1, size=self.model.nv)
30 | )
31 | return self._get_obs()
32 |
--------------------------------------------------------------------------------
/gym/envs/mujoco/walker2d.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 |
5 | class Walker2dEnv(mujoco_env.MujocoEnv, utils.EzPickle):
6 |
7 | def __init__(self):
8 | mujoco_env.MujocoEnv.__init__(self, "walker2d.xml", 4)
9 | utils.EzPickle.__init__(self)
10 |
11 | def step(self, a):
12 | posbefore = self.sim.data.qpos[0]
13 | self.do_simulation(a, self.frame_skip)
14 | posafter, height, ang = self.sim.data.qpos[0:3]
15 | alive_bonus = 1.0
16 | reward = ((posafter - posbefore) / self.dt)
17 | reward += alive_bonus
18 | reward -= 1e-3 * np.square(a).sum()
19 | done = not (height > 0.8 and height < 2.0 and
20 | ang > -1.0 and ang < 1.0)
21 | ob = self._get_obs()
22 | return ob, reward, done, {}
23 |
24 | def _get_obs(self):
25 | qpos = self.sim.data.qpos
26 | qvel = self.sim.data.qvel
27 | return np.concatenate([qpos[1:], np.clip(qvel, -10, 10)]).ravel()
28 |
29 | def reset_model(self):
30 | self.set_state(
31 | self.init_qpos + self.np_random.uniform(low=-.005, high=.005, size=self.model.nq),
32 | self.init_qvel + self.np_random.uniform(low=-.005, high=.005, size=self.model.nv)
33 | )
34 | return self._get_obs()
35 |
36 | def viewer_setup(self):
37 | self.viewer.cam.trackbodyid = 2
38 | self.viewer.cam.distance = self.model.stat.extent * 0.5
39 | self.viewer.cam.lookat[2] = 1.15
40 | self.viewer.cam.elevation = -20
41 |
--------------------------------------------------------------------------------
/gym/envs/robotics/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.robotics.fetch_env import FetchEnv
2 | from gym.envs.robotics.fetch.slide import FetchSlideEnv
3 | from gym.envs.robotics.fetch.pick_and_place import FetchPickAndPlaceEnv
4 | from gym.envs.robotics.fetch.push import FetchPushEnv
5 |
6 | from gym.envs.robotics.fetch.reach import FetchReachEnv
7 |
8 | from gym.envs.robotics.fetch.insert import FetchInsertEnv
9 | from gym.envs.robotics.fetch.drawer import FetchDrawerEnv
10 | from gym.envs.robotics.fetch.obs_push import FetchObsPushEnv
11 |
12 | from gym.envs.robotics.hand.reach import HandReachEnv
13 | from gym.envs.robotics.hand.manipulate import HandBlockEnv
14 | from gym.envs.robotics.hand.manipulate import HandEggEnv
15 | from gym.envs.robotics.hand.manipulate import HandPenEnv
16 |
17 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandBlockTouchSensorsEnv
18 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandEggTouchSensorsEnv
19 | from gym.envs.robotics.hand.manipulate_touch_sensors import HandPenTouchSensorsEnv
20 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/drawer.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/drawer_box.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/drawer_dependencies.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/obs_push.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/occ_push.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/pick_and_place.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/push.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/reach.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/fetch/slide.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/reach.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/hand/shared_asset.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/base_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/bellows_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/drawer.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/drawer.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/drawercase.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/drawercase.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/drawerhandle.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/drawerhandle.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/elbow_flex_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/estop_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/estop_link.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/forearm_roll_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/gripper_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/gripper_link.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/head_pan_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/head_tilt_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/l_wheel_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/laser_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/laser_link.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/r_wheel_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/shoulder_lift_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/shoulder_pan_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/torso_fixed_link.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/torso_lift_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/upperarm_roll_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/window_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/window_base.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/window_frame.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/window_frame.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/window_h_base.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/window_h_base.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/window_h_frame.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/window_h_frame.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/windowa_frame.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowa_frame.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/windowa_glass.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowa_glass.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/windowa_h_frame.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowa_h_frame.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/windowa_h_glass.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowa_h_glass.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/windowb_frame.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowb_frame.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/windowb_glass.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowb_glass.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/windowb_h_frame.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowb_h_frame.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/windowb_h_glass.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/windowb_h_glass.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/wrist_flex_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/fetch/wrist_roll_link_collision.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/F1.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/F1.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/F2.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/F2.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/F3.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/F3.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/TH1_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/TH1_z.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/TH2_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/TH2_z.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/TH3_z.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/TH3_z.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/forearm_electric.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/forearm_electric.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/forearm_electric_cvx.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/knuckle.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/knuckle.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/lfmetacarpal.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/palm.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/palm.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/stls/hand/wrist.stl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/stls/hand/wrist.stl
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/textures/block.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/textures/block.png
--------------------------------------------------------------------------------
/gym/envs/robotics/assets/textures/block_hidden.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/assets/textures/block_hidden.png
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/fetch/__init__.py
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/dpush.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics.fetch_double_push_env import FetchEnv
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'double_push.xml')
8 |
9 |
10 | class FetchDoublePushEnv(FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0,
21 | obj_range=0.1, target_range=0.1, distance_threshold=0.05,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/drawer.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_drawer_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'drawer.xml')
8 |
9 |
10 | class FetchDrawerEnv(fetch_drawer_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.15,
16 | # 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | fetch_drawer_env.FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=False, block_gripper=False, n_substeps=20,
20 | gripper_extra_height=0.15, target_in_the_air=False, target_offset=0.0,
21 | obj_range=0.0, target_range=0.0, distance_threshold=0.02,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/drawer_box.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_drawer_box_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'drawer_box.xml')
8 |
9 |
10 | class FetchDrawerBoxEnv(fetch_drawer_box_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.15,
16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | fetch_drawer_box_env.FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20,
20 | gripper_extra_height=0.15, target_in_the_air=False, target_offset=0.0,
21 | obj_range=0.0, target_range=0.0, distance_threshold=0.02,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/insert.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_insert_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'insert_rand.xml')
8 |
9 |
10 | class FetchInsertEnv(fetch_insert_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | # 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | fetch_insert_env.FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20,
20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0,
21 | obj_range=0.0, target_range=0.0, distance_threshold=0.01,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/insert_rand.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_insert_rand_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'insert_rand.xml')
8 |
9 |
10 | class FetchInsertRandEnv(fetch_insert_rand_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.15,
16 | # 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | fetch_insert_rand_env.FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20,
20 | gripper_extra_height=0.15, target_in_the_air=False, target_offset=0.0,
21 | obj_range=0.0, target_range=0.0, distance_threshold=0.02,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/obs_push.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics.fetch_obs_push_env import FetchEnv
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'obs_push.xml')
8 |
9 |
10 | class FetchObsPushEnv(FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0,
21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/occ_push.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics.fetch_occ_push_env import FetchEnv
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'occ_push.xml')
8 |
9 |
10 | class FetchPushEnv(FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0,
21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/pick_and_place.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'pick_and_place.xml')
8 |
9 |
10 | class FetchPickAndPlaceEnv(fetch_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | fetch_env.FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20,
20 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/push.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'push.xml')
8 |
9 |
10 | class FetchPushEnv(fetch_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | fetch_env.FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0,
21 | obj_range=0.15, target_range=0.15, distance_threshold=0.05,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/reach.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics import fetch_env
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'reach.xml')
8 |
9 |
10 | class FetchReachEnv(fetch_env.FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.4049,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | }
17 | fetch_env.FetchEnv.__init__(
18 | self, MODEL_XML_PATH, has_object=False, block_gripper=True, n_substeps=20,
19 | gripper_extra_height=0.2, target_in_the_air=True, target_offset=0.0,
20 | obj_range=0.15, target_range=0.15, distance_threshold=0.05,
21 | initial_qpos=initial_qpos, reward_type=reward_type)
22 | utils.EzPickle.__init__(self)
23 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/slide.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 |
4 | from gym import utils
5 | from gym.envs.robotics import fetch_env
6 |
7 |
8 | # Ensure we get the path separator correct on windows
9 | MODEL_XML_PATH = os.path.join('fetch', 'slide.xml')
10 |
11 |
12 | class FetchSlideEnv(fetch_env.FetchEnv, utils.EzPickle):
13 | def __init__(self, reward_type='sparse'):
14 | initial_qpos = {
15 | 'robot0:slide0': 0.05,
16 | 'robot0:slide1': 0.48,
17 | 'robot0:slide2': 0.0,
18 | 'object0:joint': [1.7, 1.1, 0.41, 1., 0., 0., 0.],
19 | }
20 | fetch_env.FetchEnv.__init__(
21 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
22 | gripper_extra_height=-0.02, target_in_the_air=False, target_offset=np.array([0.4, 0.0, 0.0]),
23 | obj_range=0.1, target_range=0.3, distance_threshold=0.05,
24 | initial_qpos=initial_qpos, reward_type=reward_type)
25 | utils.EzPickle.__init__(self)
26 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/stack.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics.fetch_stack_env import FetchEnv
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'stack.xml')
8 |
9 |
10 | class FetchStackEnv(FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20,
20 | gripper_extra_height=0.2, target_in_the_air=False, target_offset=0.0,
21 | obj_range=0.1, target_range=0.1, distance_threshold=0.05,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/tpush.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics.fetch_three_push_env import FetchEnv
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'three_push.xml')
8 |
9 |
10 | class FetchThreePushEnv(FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=True, block_gripper=True, n_substeps=20,
20 | gripper_extra_height=0.0, target_in_the_air=False, target_offset=0.0,
21 | obj_range=0.1, target_range=0.1, distance_threshold=0.05,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/envs/robotics/fetch/tstack.py:
--------------------------------------------------------------------------------
1 | import os
2 | from gym import utils
3 | from gym.envs.robotics.fetch_three_stack_env import FetchEnv
4 |
5 |
6 | # Ensure we get the path separator correct on windows
7 | MODEL_XML_PATH = os.path.join('fetch', 'three_stack.xml')
8 |
9 |
10 | class FetchThreeStackEnv(FetchEnv, utils.EzPickle):
11 | def __init__(self, reward_type='sparse'):
12 | initial_qpos = {
13 | 'robot0:slide0': 0.405,
14 | 'robot0:slide1': 0.48,
15 | 'robot0:slide2': 0.0,
16 | 'object0:joint': [1.25, 0.53, 0.4, 1., 0., 0., 0.],
17 | }
18 | FetchEnv.__init__(
19 | self, MODEL_XML_PATH, has_object=True, block_gripper=False, n_substeps=20,
20 | gripper_extra_height=0.2, target_in_the_air=False, target_offset=0.0,
21 | obj_range=0.1, target_range=0.1, distance_threshold=0.05,
22 | initial_qpos=initial_qpos, reward_type=reward_type)
23 | utils.EzPickle.__init__(self)
24 |
--------------------------------------------------------------------------------
/gym/envs/robotics/hand/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/robotics/hand/__init__.py
--------------------------------------------------------------------------------
/gym/envs/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/envs/tests/__init__.py
--------------------------------------------------------------------------------
/gym/envs/tests/spec_list.py:
--------------------------------------------------------------------------------
1 | from gym import envs, logger
2 | import os
3 |
4 |
5 | SKIP_MUJOCO_WARNING_MESSAGE = (
6 | "Cannot run mujoco test (either license key not found or mujoco not"
7 | "installed properly).")
8 |
9 |
10 | skip_mujoco = not (os.environ.get('MUJOCO_KEY'))
11 | if not skip_mujoco:
12 | try:
13 | import mujoco_py
14 | except ImportError:
15 | skip_mujoco = True
16 |
17 | def should_skip_env_spec_for_tests(spec):
18 | # We skip tests for envs that require dependencies or are otherwise
19 | # troublesome to run frequently
20 | ep = spec.entry_point
21 | # Skip mujoco tests for pull request CI
22 | if skip_mujoco and (ep.startswith('gym.envs.mujoco') or ep.startswith('gym.envs.robotics:')):
23 | return True
24 | try:
25 | import atari_py
26 | except ImportError:
27 | if ep.startswith('gym.envs.atari'):
28 | return True
29 | try:
30 | import Box2D
31 | except ImportError:
32 | if ep.startswith('gym.envs.box2d'):
33 | return True
34 |
35 | if ( 'GoEnv' in ep or
36 | 'HexEnv' in ep or
37 | (ep.startswith("gym.envs.atari") and not spec.id.startswith("Pong") and not spec.id.startswith("Seaquest"))
38 | ):
39 | logger.warn("Skipping tests for env {}".format(ep))
40 | return True
41 | return False
42 |
43 | spec_list = [spec for spec in sorted(envs.registry.all(), key=lambda x: x.id) if spec.entry_point is not None and not should_skip_env_spec_for_tests(spec)]
44 |
--------------------------------------------------------------------------------
/gym/envs/tests/test_frozenlake_dfs.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import numpy as np
3 |
4 | from gym.envs.toy_text.frozen_lake import generate_random_map
5 |
6 | # Test that FrozenLake map generation creates valid maps of various sizes.
7 | def test_frozenlake_dfs_map_generation():
8 |
9 | def frozenlake_dfs_path_exists(res):
10 | frontier, discovered = [], set()
11 | frontier.append((0,0))
12 | while frontier:
13 | r, c = frontier.pop()
14 | if not (r,c) in discovered:
15 | discovered.add((r,c))
16 | directions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
17 | for x, y in directions:
18 | r_new = r + x
19 | c_new = c + y
20 | if r_new < 0 or r_new >= size or c_new < 0 or c_new >= size:
21 | continue
22 | if res[r_new][c_new] == 'G':
23 | return True
24 | if (res[r_new][c_new] not in '#H'):
25 | frontier.append((r_new, c_new))
26 | return False
27 |
28 | map_sizes = [5, 10, 200]
29 | for size in map_sizes:
30 | new_frozenlake = generate_random_map(size)
31 | assert len(new_frozenlake) == size
32 | assert len(new_frozenlake[0]) == size
33 | assert frozenlake_dfs_path_exists(new_frozenlake)
34 |
--------------------------------------------------------------------------------
/gym/envs/tests/test_kellycoinflip.py:
--------------------------------------------------------------------------------
1 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv
2 |
3 |
4 | class TestKellyCoinflipEnv:
5 | @staticmethod
6 | def test_done_when_reaches_max_wealth():
7 | # https://github.com/openai/gym/issues/1266
8 | env = KellyCoinflipEnv()
9 | env.seed(1)
10 | env.reset()
11 | done = False
12 |
13 | while not done:
14 | action = int(env.wealth * 20) # bet 20% of the wealth
15 | observation, reward, done, info = env.step(action)
16 |
17 | assert env.wealth == env.max_wealth
18 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.toy_text.blackjack import BlackjackEnv
2 | from gym.envs.toy_text.roulette import RouletteEnv
3 | from gym.envs.toy_text.frozen_lake import FrozenLakeEnv
4 | from gym.envs.toy_text.nchain import NChainEnv
5 | from gym.envs.toy_text.hotter_colder import HotterColder
6 | from gym.envs.toy_text.guessing_game import GuessingGame
7 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipEnv
8 | from gym.envs.toy_text.kellycoinflip import KellyCoinflipGeneralizedEnv
9 | from gym.envs.toy_text.cliffwalking import CliffWalkingEnv
10 | from gym.envs.toy_text.taxi import TaxiEnv
11 | from gym.envs.toy_text.guessing_game import GuessingGame
12 | from gym.envs.toy_text.hotter_colder import HotterColder
13 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/discrete.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym import Env, spaces
4 | from gym.utils import seeding
5 |
6 |
7 | def categorical_sample(prob_n, np_random):
8 | """
9 | Sample from categorical distribution
10 | Each row specifies class probabilities
11 | """
12 | prob_n = np.asarray(prob_n)
13 | csprob_n = np.cumsum(prob_n)
14 | return (csprob_n > np_random.rand()).argmax()
15 |
16 |
17 | class DiscreteEnv(Env):
18 |
19 | """
20 | Has the following members
21 | - nS: number of states
22 | - nA: number of actions
23 | - P: transitions (*)
24 | - isd: initial state distribution (**)
25 |
26 | (*) dictionary of lists, where
27 | P[s][a] == [(probability, nextstate, reward, done), ...]
28 | (**) list or array of length nS
29 |
30 |
31 | """
32 | def __init__(self, nS, nA, P, isd):
33 | self.P = P
34 | self.isd = isd
35 | self.lastaction = None # for rendering
36 | self.nS = nS
37 | self.nA = nA
38 |
39 | self.action_space = spaces.Discrete(self.nA)
40 | self.observation_space = spaces.Discrete(self.nS)
41 |
42 | self.seed()
43 | self.s = categorical_sample(self.isd, self.np_random)
44 |
45 | def seed(self, seed=None):
46 | self.np_random, seed = seeding.np_random(seed)
47 | return [seed]
48 |
49 | def reset(self):
50 | self.s = categorical_sample(self.isd, self.np_random)
51 | self.lastaction = None
52 | return int(self.s)
53 |
54 | def step(self, a):
55 | transitions = self.P[self.s][a]
56 | i = categorical_sample([t[0] for t in transitions], self.np_random)
57 | p, s, r, d = transitions[i]
58 | self.s = s
59 | self.lastaction = a
60 | return (int(s), r, d, {"prob": p})
61 |
--------------------------------------------------------------------------------
/gym/envs/toy_text/roulette.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym import spaces
3 | from gym.utils import seeding
4 |
5 |
6 | class RouletteEnv(gym.Env):
7 | """Simple roulette environment
8 |
9 | The roulette wheel has 37 spots. If the bet is 0 and a 0 comes up,
10 | you win a reward of 35. If the parity of your bet matches the parity
11 | of the spin, you win 1. Otherwise you receive a reward of -1.
12 |
13 | The long run reward for playing 0 should be -1/37 for any state
14 |
15 | The last action (38) stops the rollout for a return of 0 (walking away)
16 | """
17 | def __init__(self, spots=37):
18 | self.n = spots + 1
19 | self.action_space = spaces.Discrete(self.n)
20 | self.observation_space = spaces.Discrete(1)
21 | self.seed()
22 |
23 | def seed(self, seed=None):
24 | self.np_random, seed = seeding.np_random(seed)
25 | return [seed]
26 |
27 | def step(self, action):
28 | assert self.action_space.contains(action)
29 | if action == self.n - 1:
30 | # observation, reward, done, info
31 | return 0, 0, True, {}
32 |
33 | # N.B. np.random.randint draws from [A, B) while random.randint draws from [A,B]
34 | val = self.np_random.randint(0, self.n - 1)
35 | if val == action == 0:
36 | reward = self.n - 2.0
37 | elif val != 0 and action != 0 and val % 2 == action % 2:
38 | reward = 1.0
39 | else:
40 | reward = -1.0
41 | return 0, reward, False, {}
42 |
43 | def reset(self):
44 | return 0
45 |
--------------------------------------------------------------------------------
/gym/envs/unittest/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.unittest.cube_crash import CubeCrash
2 | from gym.envs.unittest.cube_crash import CubeCrashSparse
3 | from gym.envs.unittest.cube_crash import CubeCrashScreenBecomesBlack
4 | from gym.envs.unittest.memorize_digits import MemorizeDigits
5 |
6 |
--------------------------------------------------------------------------------
/gym/logger.py:
--------------------------------------------------------------------------------
1 | import warnings
2 |
3 | from gym.utils import colorize
4 |
5 | DEBUG = 10
6 | INFO = 20
7 | WARN = 30
8 | ERROR = 40
9 | DISABLED = 50
10 |
11 | MIN_LEVEL = 30
12 |
13 | def set_level(level):
14 | """
15 | Set logging threshold on current logger.
16 | """
17 | global MIN_LEVEL
18 | MIN_LEVEL = level
19 |
20 | def debug(msg, *args):
21 | if MIN_LEVEL <= DEBUG:
22 | print('%s: %s'%('DEBUG', msg % args))
23 |
24 | def info(msg, *args):
25 | if MIN_LEVEL <= INFO:
26 | print('%s: %s'%('INFO', msg % args))
27 |
28 | def warn(msg, *args):
29 | if MIN_LEVEL <= WARN:
30 | warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
31 |
32 | def error(msg, *args):
33 | if MIN_LEVEL <= ERROR:
34 | print(colorize('%s: %s'%('ERROR', msg % args), 'red'))
35 |
36 | # DEPRECATED:
37 | setLevel = set_level
38 |
--------------------------------------------------------------------------------
/gym/mjkey.txt:
--------------------------------------------------------------------------------
1 | MuJoCo Pro Individual license activation key, number 7777, type 6.
2 |
3 | Issued to Everyone.
4 |
5 | Expires October 18, 2031.
6 |
7 | Do not modify this file. Its entire content, including the
8 | plain text section, is used by the activation manager.
9 |
10 | 9aaedeefb37011a8a52361c736643665c7f60e796ff8ff70bb3f7a1d78e9a605
11 | 0453a3c853e4aa416e712d7e80cf799c6314ee5480ec6bd0f1ab51d1bb3c768f
12 | 8c06e7e572f411ecb25c3d6ef82cc20b00f672db88e6001b3dfdd3ab79e6c480
13 | 185d681811cfdaff640fb63295e391b05374edba90dd54cc1e162a9d99b82a8b
14 | ea3e87f2c67d08006c53daac2e563269cdb286838b168a2071c48c29fedfbea2
15 | 5effe96fe3cb05e85fb8af2d3851f385618ef8cdac42876831f095e052bd18c9
16 | 5dce57ff9c83670aad77e5a1f41444bec45e30e4e827f7bf9799b29f2c934e23
17 | dcf6d3c3ee9c8dd2ed057317100cd21b4abbbf652d02bf72c3d322e0c55dcc24
18 |
--------------------------------------------------------------------------------
/gym/obstacle_push.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/obstacle_push.png
--------------------------------------------------------------------------------
/gym/spaces/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.spaces.space import Space
2 | from gym.spaces.box import Box
3 | from gym.spaces.discrete import Discrete
4 | from gym.spaces.multi_discrete import MultiDiscrete
5 | from gym.spaces.multi_binary import MultiBinary
6 | from gym.spaces.tuple import Tuple
7 | from gym.spaces.dict import Dict
8 |
9 | from gym.spaces.utils import flatdim
10 | from gym.spaces.utils import flatten_space
11 | from gym.spaces.utils import flatten
12 | from gym.spaces.utils import unflatten
13 |
14 | __all__ = ["Space", "Box", "Discrete", "MultiDiscrete", "MultiBinary", "Tuple", "Dict", "flatdim", "flatten_space", "flatten", "unflatten"]
15 |
--------------------------------------------------------------------------------
/gym/spaces/discrete.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .space import Space
3 |
4 |
5 | class Discrete(Space):
6 | r"""A discrete space in :math:`\{ 0, 1, \\dots, n-1 \}`.
7 |
8 | Example::
9 |
10 | >>> Discrete(2)
11 |
12 | """
13 | def __init__(self, n):
14 | assert n >= 0
15 | self.n = n
16 | super(Discrete, self).__init__((), np.int64)
17 |
18 | def sample(self):
19 | return self.np_random.randint(self.n)
20 |
21 | def contains(self, x):
22 | if isinstance(x, int):
23 | as_int = x
24 | elif isinstance(x, (np.generic, np.ndarray)) and (x.dtype.char in np.typecodes['AllInteger'] and x.shape == ()):
25 | as_int = int(x)
26 | else:
27 | return False
28 | return as_int >= 0 and as_int < self.n
29 |
30 | def __repr__(self):
31 | return "Discrete(%d)" % self.n
32 |
33 | def __eq__(self, other):
34 | return isinstance(other, Discrete) and self.n == other.n
35 |
--------------------------------------------------------------------------------
/gym/spaces/multi_binary.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .space import Space
3 |
4 |
5 | class MultiBinary(Space):
6 | '''
7 | An n-shape binary space.
8 |
9 | The argument to MultiBinary defines n, which could be a number or a `list` of numbers.
10 |
11 | Example Usage:
12 |
13 | >> self.observation_space = spaces.MultiBinary(5)
14 |
15 | >> self.observation_space.sample()
16 |
17 | array([0,1,0,1,0], dtype =int8)
18 |
19 | >> self.observation_space = spaces.MultiBinary([3,2])
20 |
21 | >> self.observation_space.sample()
22 |
23 | array([[0, 0],
24 | [0, 1],
25 | [1, 1]], dtype=int8)
26 |
27 | '''
28 | def __init__(self, n):
29 | self.n = n
30 | if type(n) in [tuple, list, np.ndarray]:
31 | input_n = n
32 | else:
33 | input_n = (n, )
34 | super(MultiBinary, self).__init__(input_n, np.int8)
35 |
36 | def sample(self):
37 | return self.np_random.randint(low=0, high=2, size=self.n, dtype=self.dtype)
38 |
39 | def contains(self, x):
40 | if isinstance(x, list) or isinstance(x, tuple):
41 | x = np.array(x) # Promote list to array for contains check
42 | if self.shape != x.shape:
43 | return False
44 | return ((x==0) | (x==1)).all()
45 |
46 | def to_jsonable(self, sample_n):
47 | return np.array(sample_n).tolist()
48 |
49 | def from_jsonable(self, sample_n):
50 | return [np.asarray(sample) for sample in sample_n]
51 |
52 | def __repr__(self):
53 | return "MultiBinary({})".format(self.n)
54 |
55 | def __eq__(self, other):
56 | return isinstance(other, MultiBinary) and self.n == other.n
57 |
--------------------------------------------------------------------------------
/gym/spaces/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/spaces/tests/__init__.py
--------------------------------------------------------------------------------
/gym/spaces/tuple.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from .space import Space
3 |
4 |
5 | class Tuple(Space):
6 | """
7 | A tuple (i.e., product) of simpler spaces
8 |
9 | Example usage:
10 | self.observation_space = spaces.Tuple((spaces.Discrete(2), spaces.Discrete(3)))
11 | """
12 | def __init__(self, spaces):
13 | self.spaces = spaces
14 | for space in spaces:
15 | assert isinstance(space, Space), "Elements of the tuple must be instances of gym.Space"
16 | super(Tuple, self).__init__(None, None)
17 |
18 | def seed(self, seed=None):
19 | [space.seed(seed) for space in self.spaces]
20 |
21 | def sample(self):
22 | return tuple([space.sample() for space in self.spaces])
23 |
24 | def contains(self, x):
25 | if isinstance(x, list):
26 | x = tuple(x) # Promote list to tuple for contains check
27 | return isinstance(x, tuple) and len(x) == len(self.spaces) and all(
28 | space.contains(part) for (space,part) in zip(self.spaces,x))
29 |
30 | def __repr__(self):
31 | return "Tuple(" + ", ". join([str(s) for s in self.spaces]) + ")"
32 |
33 | def to_jsonable(self, sample_n):
34 | # serialize as list-repr of tuple of vectors
35 | return [space.to_jsonable([sample[i] for sample in sample_n]) \
36 | for i, space in enumerate(self.spaces)]
37 |
38 | def from_jsonable(self, sample_n):
39 | return [sample for sample in zip(*[space.from_jsonable(sample_n[i]) for i, space in enumerate(self.spaces)])]
40 |
41 | def __getitem__(self, index):
42 | return self.spaces[index]
43 |
44 | def __len__(self):
45 | return len(self.spaces)
46 |
47 | def __eq__(self, other):
48 | return isinstance(other, Tuple) and self.spaces == other.spaces
49 |
--------------------------------------------------------------------------------
/gym/stack_demo.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import gym
4 | from gym.envs.robotics.fetch.stack import FetchStackEnv
5 | env = FetchStackEnv()
6 |
7 |
8 | def p_control(env, obs, p_rate=0.2):
9 | a = env.action_space.sample()
10 | gg = obs['grip_goal']
11 | ag = obs['achieved_goal']
12 | error = ag - gg
13 | for axis, value in enumerate(error):
14 | if abs(value) > 0.02:
15 | if value > 0:
16 | a[axis] = p_rate
17 | else:
18 | a[axis] = -p_rate
19 | else:
20 | a[axis] = 0
21 | action = a
22 | return action
23 |
24 |
25 | for ep in range(20):
26 | obs = env.reset()
27 | for i in range(20):
28 | a = p_control(env, obs=obs)
29 | # a[-1] = 0.0
30 | #
31 | # a = env.action_space.sample()
32 | # a[0] = 0.01
33 | # if obs['grip_goal'][2] < 0.3:
34 | # pass
35 | # else:
36 | # a[1] = -0.2
37 | # a[2] = -0.2
38 | print("gg:", obs['grip_goal'])
39 |
40 | obs, reward, done, info = env.step(a)
41 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info))
42 | env.render()
43 |
--------------------------------------------------------------------------------
/gym/three_push_demo.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import matplotlib.pyplot as plt
3 | import gym
4 | import time
5 | from gym.envs.robotics.fetch.tpush import FetchThreePushEnv
6 | env = FetchThreePushEnv()
7 |
8 |
9 | def p_control(env, obs, p_rate=0.2):
10 | a = env.action_space.sample()
11 | gg = obs['grip_goal']
12 | ag = obs['achieved_goal']
13 | error = ag - gg
14 | for axis, value in enumerate(error):
15 | if abs(value) > 0.02:
16 | if value > 0:
17 | a[axis] = p_rate
18 | else:
19 | a[axis] = -p_rate
20 | else:
21 | a[axis] = 0
22 | action = a
23 | return action
24 |
25 |
26 | for ep in range(20):
27 | st = time.time()
28 | obs = env.reset()
29 | for i in range(50):
30 | # a = p_control(env, obs=obs)
31 | # a[2] = 0.0
32 | a = np.random.random(4)
33 | a[2] = 1.0
34 |
35 | print("gg:", obs['grip_goal'])
36 |
37 | obs, reward, done, info = env.step(a)
38 | print("obs:", obs)
39 | print("ep:{}, i:{}, reward:{}, done:{}, info:{}".format(ep, i, reward, done, info))
40 | env.render()
41 | # print('ep_time:', time.time() - st)
42 |
--------------------------------------------------------------------------------
/gym/utils/__init__.py:
--------------------------------------------------------------------------------
1 | """A set of common utilities used within the environments. These are
2 | not intended as API functions, and will not remain stable over time.
3 | """
4 |
5 | # These submodules should not have any import-time dependencies.
6 | # We want this since we use `utils` during our import-time sanity checks
7 | # that verify that our dependencies are actually present.
8 | from .colorize import colorize
9 | from .ezpickle import EzPickle
10 |
--------------------------------------------------------------------------------
/gym/utils/colorize.py:
--------------------------------------------------------------------------------
1 | """A set of common utilities used within the environments. These are
2 | not intended as API functions, and will not remain stable over time.
3 | """
4 |
5 | color2num = dict(
6 | gray=30,
7 | red=31,
8 | green=32,
9 | yellow=33,
10 | blue=34,
11 | magenta=35,
12 | cyan=36,
13 | white=37,
14 | crimson=38
15 | )
16 |
17 |
18 | def colorize(string, color, bold=False, highlight = False):
19 | """Return string surrounded by appropriate terminal color codes to
20 | print colorized text. Valid colors: gray, red, green, yellow,
21 | blue, magenta, cyan, white, crimson
22 | """
23 |
24 | attr = []
25 | num = color2num[color]
26 | if highlight: num += 10
27 | attr.append(str(num))
28 | if bold: attr.append('1')
29 | attrs = ';'.join(attr)
30 | return '\x1b[%sm%s\x1b[0m' % (attrs, string)
31 |
--------------------------------------------------------------------------------
/gym/utils/ezpickle.py:
--------------------------------------------------------------------------------
1 | class EzPickle(object):
2 | """Objects that are pickled and unpickled via their constructor
3 | arguments.
4 |
5 | Example usage:
6 |
7 | class Dog(Animal, EzPickle):
8 | def __init__(self, furcolor, tailkind="bushy"):
9 | Animal.__init__()
10 | EzPickle.__init__(furcolor, tailkind)
11 | ...
12 |
13 | When this object is unpickled, a new Dog will be constructed by passing the provided
14 | furcolor and tailkind into the constructor. However, philosophers are still not sure
15 | whether it is still the same dog.
16 |
17 | This is generally needed only for environments which wrap C/C++ code, such as MuJoCo
18 | and Atari.
19 | """
20 | def __init__(self, *args, **kwargs):
21 | self._ezpickle_args = args
22 | self._ezpickle_kwargs = kwargs
23 | def __getstate__(self):
24 | return {"_ezpickle_args" : self._ezpickle_args, "_ezpickle_kwargs": self._ezpickle_kwargs}
25 | def __setstate__(self, d):
26 | out = type(self)(*d["_ezpickle_args"], **d["_ezpickle_kwargs"])
27 | self.__dict__.update(out.__dict__)
28 |
--------------------------------------------------------------------------------
/gym/utils/json_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def json_encode_np(obj):
4 | """
5 | JSON can't serialize numpy types, so convert to pure python
6 | """
7 | if isinstance(obj, np.ndarray):
8 | return list(obj)
9 | elif isinstance(obj, np.float32):
10 | return float(obj)
11 | elif isinstance(obj, np.float64):
12 | return float(obj)
13 | elif isinstance(obj, np.int8):
14 | return int(obj)
15 | elif isinstance(obj, np.int16):
16 | return int(obj)
17 | elif isinstance(obj, np.int32):
18 | return int(obj)
19 | elif isinstance(obj, np.int64):
20 | return int(obj)
21 | else:
22 | return obj
23 |
--------------------------------------------------------------------------------
/gym/vector/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/vector/tests/__init__.py
--------------------------------------------------------------------------------
/gym/vector/tests/test_vector_env_wrapper.py:
--------------------------------------------------------------------------------
1 | import gym
2 | from gym.vector import make
3 | from gym.vector import VectorEnvWrapper
4 |
5 | class DummyWrapper(VectorEnvWrapper):
6 | def __init__(self, env):
7 | self.env = env
8 | self.counter = 0
9 |
10 | def reset_async(self):
11 | super().reset_async()
12 | self.counter += 1
13 |
14 |
15 | def test_vector_env_wrapper_inheritance():
16 | env = make('FrozenLake-v0', asynchronous=False)
17 | wrapped = DummyWrapper(env)
18 | wrapped.reset()
19 | assert wrapped.counter == 1
20 |
21 |
--------------------------------------------------------------------------------
/gym/vector/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.vector.utils.misc import CloudpickleWrapper, clear_mpi_env_vars
2 | from gym.vector.utils.numpy_utils import concatenate, create_empty_array
3 | from gym.vector.utils.shared_memory import create_shared_memory, read_from_shared_memory, write_to_shared_memory
4 | from gym.vector.utils.spaces import _BaseGymSpaces, batch_space
5 |
6 | __all__ = [
7 | 'CloudpickleWrapper',
8 | 'clear_mpi_env_vars',
9 | 'concatenate',
10 | 'create_empty_array',
11 | 'create_shared_memory',
12 | 'read_from_shared_memory',
13 | 'write_to_shared_memory',
14 | '_BaseGymSpaces',
15 | 'batch_space'
16 | ]
17 |
--------------------------------------------------------------------------------
/gym/vector/utils/misc.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import os
3 |
4 | __all__ = ['CloudpickleWrapper', 'clear_mpi_env_vars']
5 |
6 | class CloudpickleWrapper(object):
7 | def __init__(self, fn):
8 | self.fn = fn
9 |
10 | def __getstate__(self):
11 | import cloudpickle
12 | return cloudpickle.dumps(self.fn)
13 |
14 | def __setstate__(self, ob):
15 | import pickle
16 | self.fn = pickle.loads(ob)
17 |
18 | def __call__(self):
19 | return self.fn()
20 |
21 | @contextlib.contextmanager
22 | def clear_mpi_env_vars():
23 | """
24 | `from mpi4py import MPI` will call `MPI_Init` by default. If the child
25 | process has MPI environment variables, MPI will think that the child process
26 | is an MPI process just like the parent and do bad things such as hang.
27 |
28 | This context manager is a hacky way to clear those environment variables
29 | temporarily such as when we are starting multiprocessing Processes.
30 | """
31 | removed_environment = {}
32 | for k, v in list(os.environ.items()):
33 | for prefix in ['OMPI_', 'PMI_']:
34 | if k.startswith(prefix):
35 | removed_environment[k] = v
36 | del os.environ[k]
37 | try:
38 | yield
39 | finally:
40 | os.environ.update(removed_environment)
41 |
--------------------------------------------------------------------------------
/gym/version.py:
--------------------------------------------------------------------------------
1 | VERSION = '0.18.0'
2 |
--------------------------------------------------------------------------------
/gym/wrappers/__init__.py:
--------------------------------------------------------------------------------
1 | from gym import error
2 | from gym.wrappers.monitor import Monitor
3 | from gym.wrappers.time_limit import TimeLimit
4 | from gym.wrappers.filter_observation import FilterObservation
5 | from gym.wrappers.atari_preprocessing import AtariPreprocessing
6 | from gym.wrappers.time_aware_observation import TimeAwareObservation
7 | from gym.wrappers.rescale_action import RescaleAction
8 | from gym.wrappers.flatten_observation import FlattenObservation
9 | from gym.wrappers.gray_scale_observation import GrayScaleObservation
10 | from gym.wrappers.frame_stack import LazyFrames
11 | from gym.wrappers.frame_stack import FrameStack
12 | from gym.wrappers.transform_observation import TransformObservation
13 | from gym.wrappers.transform_reward import TransformReward
14 | from gym.wrappers.resize_observation import ResizeObservation
15 | from gym.wrappers.clip_action import ClipAction
16 | from gym.wrappers.record_episode_statistics import RecordEpisodeStatistics
17 |
--------------------------------------------------------------------------------
/gym/wrappers/clip_action.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym import ActionWrapper
4 | from gym.spaces import Box
5 |
6 |
7 | class ClipAction(ActionWrapper):
8 | r"""Clip the continuous action within the valid bound. """
9 | def __init__(self, env):
10 | assert isinstance(env.action_space, Box)
11 | super(ClipAction, self).__init__(env)
12 |
13 | def action(self, action):
14 | return np.clip(action, self.action_space.low, self.action_space.high)
15 |
--------------------------------------------------------------------------------
/gym/wrappers/flatten_observation.py:
--------------------------------------------------------------------------------
1 | import gym.spaces as spaces
2 | from gym import ObservationWrapper
3 |
4 |
5 | class FlattenObservation(ObservationWrapper):
6 | r"""Observation wrapper that flattens the observation."""
7 | def __init__(self, env):
8 | super(FlattenObservation, self).__init__(env)
9 | self.observation_space = spaces.flatten_space(env.observation_space)
10 |
11 | def observation(self, observation):
12 | return spaces.flatten(self.env.observation_space, observation)
13 |
--------------------------------------------------------------------------------
/gym/wrappers/gray_scale_observation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym.spaces import Box
4 | from gym import ObservationWrapper
5 |
6 |
7 | class GrayScaleObservation(ObservationWrapper):
8 | r"""Convert the image observation from RGB to gray scale. """
9 | def __init__(self, env, keep_dim=False):
10 | super(GrayScaleObservation, self).__init__(env)
11 | self.keep_dim = keep_dim
12 |
13 | assert len(env.observation_space.shape) == 3 and env.observation_space.shape[-1] == 3
14 | obs_shape = self.observation_space.shape[:2]
15 | if self.keep_dim:
16 | self.observation_space = Box(low=0, high=255, shape=(obs_shape[0], obs_shape[1], 1), dtype=np.uint8)
17 | else:
18 | self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8)
19 |
20 | def observation(self, observation):
21 | import cv2
22 | observation = cv2.cvtColor(observation, cv2.COLOR_RGB2GRAY)
23 | if self.keep_dim:
24 | observation = np.expand_dims(observation, -1)
25 | return observation
26 |
--------------------------------------------------------------------------------
/gym/wrappers/monitoring/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/wrappers/monitoring/__init__.py
--------------------------------------------------------------------------------
/gym/wrappers/monitoring/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/wrappers/monitoring/tests/__init__.py
--------------------------------------------------------------------------------
/gym/wrappers/monitoring/tests/helpers.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import shutil
3 | import tempfile
4 |
5 | @contextlib.contextmanager
6 | def tempdir():
7 | temp = tempfile.mkdtemp()
8 | yield temp
9 | shutil.rmtree(temp)
10 |
--------------------------------------------------------------------------------
/gym/wrappers/monitoring/tests/test_video_recorder.py:
--------------------------------------------------------------------------------
1 | import json
2 | import os
3 | import shutil
4 | import tempfile
5 | import numpy as np
6 |
7 | import gym
8 | from gym.wrappers.monitoring.video_recorder import VideoRecorder
9 |
10 | class BrokenRecordableEnv(object):
11 | metadata = {'render.modes': [None, 'rgb_array']}
12 |
13 | def render(self, mode=None):
14 | pass
15 |
16 | class UnrecordableEnv(object):
17 | metadata = {'render.modes': [None]}
18 |
19 | def render(self, mode=None):
20 | pass
21 |
22 | def test_record_simple():
23 | env = gym.make("CartPole-v1")
24 | rec = VideoRecorder(env)
25 | env.reset()
26 | rec.capture_frame()
27 | rec.close()
28 | assert not rec.empty
29 | assert not rec.broken
30 | assert os.path.exists(rec.path)
31 | f = open(rec.path)
32 | assert os.fstat(f.fileno()).st_size > 100
33 |
34 | def test_no_frames():
35 | env = BrokenRecordableEnv()
36 | rec = VideoRecorder(env)
37 | rec.close()
38 | assert rec.empty
39 | assert rec.functional
40 | assert not os.path.exists(rec.path)
41 |
42 | def test_record_unrecordable_method():
43 | env = UnrecordableEnv()
44 | rec = VideoRecorder(env)
45 | assert not rec.enabled
46 | rec.close()
47 |
48 | def test_record_breaking_render_method():
49 | env = BrokenRecordableEnv()
50 | rec = VideoRecorder(env)
51 | rec.capture_frame()
52 | rec.close()
53 | assert rec.empty
54 | assert rec.broken
55 | assert not os.path.exists(rec.path)
56 |
57 | def test_text_envs():
58 | env = gym.make('FrozenLake-v0')
59 | video = VideoRecorder(env)
60 | try:
61 | env.reset()
62 | video.capture_frame()
63 | video.close()
64 | finally:
65 | os.remove(video.path)
66 |
--------------------------------------------------------------------------------
/gym/wrappers/record_episode_statistics.py:
--------------------------------------------------------------------------------
1 | import time
2 | from collections import deque
3 |
4 | import gym
5 |
6 |
7 | class RecordEpisodeStatistics(gym.Wrapper):
8 | def __init__(self, env, deque_size=100):
9 | super(RecordEpisodeStatistics, self).__init__(env)
10 | self.t0 = time.time() # TODO: use perf_counter when gym removes Python 2 support
11 | self.episode_return = 0.0
12 | self.episode_length = 0
13 | self.return_queue = deque(maxlen=deque_size)
14 | self.length_queue = deque(maxlen=deque_size)
15 |
16 | def reset(self, **kwargs):
17 | observation = super(RecordEpisodeStatistics, self).reset(**kwargs)
18 | self.episode_return = 0.0
19 | self.episode_length = 0
20 | return observation
21 |
22 | def step(self, action):
23 | observation, reward, done, info = super(RecordEpisodeStatistics, self).step(action)
24 | self.episode_return += reward
25 | self.episode_length += 1
26 | if done:
27 | info['episode'] = {'r': self.episode_return,
28 | 'l': self.episode_length,
29 | 't': round(time.time() - self.t0, 6)}
30 | self.return_queue.append(self.episode_return)
31 | self.length_queue.append(self.episode_length)
32 | self.episode_return = 0.0
33 | self.episode_length = 0
34 | return observation, reward, done, info
35 |
--------------------------------------------------------------------------------
/gym/wrappers/rescale_action.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import gym
4 | from gym import spaces
5 |
6 |
7 | class RescaleAction(gym.ActionWrapper):
8 | r"""Rescales the continuous action space of the environment to a range [a,b].
9 |
10 | Example::
11 |
12 | >>> RescaleAction(env, a, b).action_space == Box(a,b)
13 | True
14 |
15 | """
16 | def __init__(self, env, a, b):
17 | assert isinstance(env.action_space, spaces.Box), (
18 | "expected Box action space, got {}".format(type(env.action_space)))
19 | assert np.less_equal(a, b).all(), (a, b)
20 | super(RescaleAction, self).__init__(env)
21 | self.a = np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + a
22 | self.b = np.zeros(env.action_space.shape, dtype=env.action_space.dtype) + b
23 | self.action_space = spaces.Box(low=a, high=b, shape=env.action_space.shape, dtype=env.action_space.dtype)
24 |
25 | def action(self, action):
26 | assert np.all(np.greater_equal(action, self.a)), (action, self.a)
27 | assert np.all(np.less_equal(action, self.b)), (action, self.b)
28 | low = self.env.action_space.low
29 | high = self.env.action_space.high
30 | action = low + (high - low)*((action - self.a)/(self.b - self.a))
31 | action = np.clip(action, low, high)
32 | return action
33 |
--------------------------------------------------------------------------------
/gym/wrappers/resize_observation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym.spaces import Box
4 | from gym import ObservationWrapper
5 |
6 |
7 | class ResizeObservation(ObservationWrapper):
8 | r"""Downsample the image observation to a square image. """
9 | def __init__(self, env, shape):
10 | super(ResizeObservation, self).__init__(env)
11 | if isinstance(shape, int):
12 | shape = (shape, shape)
13 | assert all(x > 0 for x in shape), shape
14 | self.shape = tuple(shape)
15 |
16 | obs_shape = self.shape + self.observation_space.shape[2:]
17 | self.observation_space = Box(low=0, high=255, shape=obs_shape, dtype=np.uint8)
18 |
19 | def observation(self, observation):
20 | import cv2
21 | observation = cv2.resize(observation, self.shape[::-1], interpolation=cv2.INTER_AREA)
22 | if observation.ndim == 2:
23 | observation = np.expand_dims(observation, -1)
24 | return observation
25 |
--------------------------------------------------------------------------------
/gym/wrappers/test_clip_action.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | import gym
4 | from gym.wrappers import ClipAction
5 |
6 |
7 | def test_clip_action():
8 | # mountaincar: action-based rewards
9 | make_env = lambda: gym.make('MountainCarContinuous-v0')
10 | env = make_env()
11 | wrapped_env = ClipAction(make_env())
12 |
13 | seed = 0
14 | env.seed(seed)
15 | wrapped_env.seed(seed)
16 |
17 | env.reset()
18 | wrapped_env.reset()
19 |
20 | actions = [[.4], [1.2], [-0.3], [0.0], [-2.5]]
21 | for action in actions:
22 | obs1, r1, d1, _ = env.step(np.clip(action, env.action_space.low, env.action_space.high))
23 | obs2, r2, d2, _ = wrapped_env.step(action)
24 | assert np.allclose(r1, r2)
25 | assert np.allclose(obs1, obs2)
26 | assert d1 == d2
27 |
--------------------------------------------------------------------------------
/gym/wrappers/test_flatten_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import FlattenObservation
7 | from gym import spaces
8 |
9 |
10 | @pytest.mark.parametrize('env_id', ['Blackjack-v0', 'KellyCoinflip-v0'])
11 | def test_flatten_observation(env_id):
12 | env = gym.make(env_id)
13 | wrapped_env = FlattenObservation(env)
14 |
15 | obs = env.reset()
16 | wrapped_obs = wrapped_env.reset()
17 |
18 | if env_id == 'Blackjack-v0':
19 | space = spaces.Tuple((
20 | spaces.Discrete(32),
21 | spaces.Discrete(11),
22 | spaces.Discrete(2)))
23 | wrapped_space = spaces.Box(-np.inf, np.inf,
24 | [32 + 11 + 2], dtype=np.float32)
25 | elif env_id == 'KellyCoinflip-v0':
26 | space = spaces.Tuple((
27 | spaces.Box(0, 250.0, [1], dtype=np.float32),
28 | spaces.Discrete(300 + 1)))
29 | wrapped_space = spaces.Box(-np.inf, np.inf,
30 | [1 + (300 + 1)], dtype=np.float32)
31 |
32 | assert space.contains(obs)
33 | assert wrapped_space.contains(wrapped_obs)
34 |
--------------------------------------------------------------------------------
/gym/wrappers/test_frame_stack.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | pytest.importorskip("atari_py")
3 |
4 | import numpy as np
5 | import gym
6 | from gym.wrappers import FrameStack
7 | try:
8 | import lz4
9 | except ImportError:
10 | lz4 = None
11 |
12 |
13 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0', 'Pong-v0'])
14 | @pytest.mark.parametrize('num_stack', [2, 3, 4])
15 | @pytest.mark.parametrize('lz4_compress', [
16 | pytest.param(True, marks=pytest.mark.skipif(lz4 is None, reason="Need lz4 to run tests with compression")),
17 | False
18 | ])
19 | def test_frame_stack(env_id, num_stack, lz4_compress):
20 | env = gym.make(env_id)
21 | shape = env.observation_space.shape
22 | env = FrameStack(env, num_stack, lz4_compress)
23 | assert env.observation_space.shape == (num_stack,) + shape
24 | assert env.observation_space.dtype == env.env.observation_space.dtype
25 |
26 | obs = env.reset()
27 | obs = np.asarray(obs)
28 | assert obs.shape == (num_stack,) + shape
29 | for i in range(1, num_stack):
30 | assert np.allclose(obs[i - 1], obs[i])
31 |
32 | obs, _, _, _ = env.step(env.action_space.sample())
33 | obs = np.asarray(obs)
34 | assert obs.shape == (num_stack,) + shape
35 | for i in range(1, num_stack - 1):
36 | assert np.allclose(obs[i - 1], obs[i])
37 | assert not np.allclose(obs[-1], obs[-2])
38 |
39 | obs, _, _, _ = env.step(env.action_space.sample())
40 | assert len(obs) == num_stack
41 |
--------------------------------------------------------------------------------
/gym/wrappers/test_gray_scale_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import GrayScaleObservation
7 | from gym.wrappers import AtariPreprocessing
8 | pytest.importorskip('atari_py')
9 | pytest.importorskip('cv2')
10 |
11 | @pytest.mark.parametrize('env_id', ['PongNoFrameskip-v0', 'SpaceInvadersNoFrameskip-v0'])
12 | @pytest.mark.parametrize('keep_dim', [True, False])
13 | def test_gray_scale_observation(env_id, keep_dim):
14 | gray_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=True)
15 | rgb_env = AtariPreprocessing(gym.make(env_id), screen_size=84, grayscale_obs=False)
16 | wrapped_env = GrayScaleObservation(rgb_env, keep_dim=keep_dim)
17 | assert rgb_env.observation_space.shape[-1] == 3
18 |
19 | seed = 0
20 | gray_env.seed(seed)
21 | wrapped_env.seed(seed)
22 |
23 | gray_obs = gray_env.reset()
24 | wrapped_obs = wrapped_env.reset()
25 |
26 | if keep_dim:
27 | assert wrapped_env.observation_space.shape[-1] == 1
28 | assert len(wrapped_obs.shape) == 3
29 | wrapped_obs = wrapped_obs.squeeze(-1)
30 | else:
31 | assert len(wrapped_env.observation_space.shape) == 2
32 | assert len(wrapped_obs.shape) == 2
33 |
34 | # ALE gray scale is slightly different, but no more than by one shade
35 | assert np.allclose(gray_obs.astype('int32'), wrapped_obs.astype('int32'), atol=1)
36 |
--------------------------------------------------------------------------------
/gym/wrappers/test_record_episode_statistics.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import gym
4 | from gym.wrappers import RecordEpisodeStatistics
5 |
6 |
7 | @pytest.mark.parametrize('env_id', ['CartPole-v0', 'Pendulum-v0'])
8 | @pytest.mark.parametrize('deque_size', [2, 5])
9 | def test_record_episode_statistics(env_id, deque_size):
10 | env = gym.make(env_id)
11 | env = RecordEpisodeStatistics(env, deque_size)
12 |
13 | for n in range(5):
14 | env.reset()
15 | assert env.episode_return == 0.0
16 | assert env.episode_length == 0
17 | for t in range(env.spec.max_episode_steps):
18 | _, _, done, info = env.step(env.action_space.sample())
19 | if done:
20 | assert 'episode' in info
21 | assert all([item in info['episode'] for item in ['r', 'l', 't']])
22 | break
23 | assert len(env.return_queue) == deque_size
24 | assert len(env.length_queue) == deque_size
25 |
--------------------------------------------------------------------------------
/gym/wrappers/test_rescale_action.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import RescaleAction
7 |
8 |
9 | def test_rescale_action():
10 | env = gym.make('CartPole-v1')
11 | with pytest.raises(AssertionError):
12 | env = RescaleAction(env, -1, 1)
13 | del env
14 |
15 | env = gym.make('Pendulum-v0')
16 | wrapped_env = RescaleAction(gym.make('Pendulum-v0'), -1, 1)
17 |
18 | seed = 0
19 | env.seed(seed)
20 | wrapped_env.seed(seed)
21 |
22 | obs = env.reset()
23 | wrapped_obs = wrapped_env.reset()
24 | assert np.allclose(obs, wrapped_obs)
25 |
26 | obs, reward, _, _ = env.step([1.5])
27 | with pytest.raises(AssertionError):
28 | wrapped_env.step([1.5])
29 | wrapped_obs, wrapped_reward, _, _ = wrapped_env.step([0.75])
30 |
31 | assert np.allclose(obs, wrapped_obs)
32 | assert np.allclose(reward, wrapped_reward)
33 |
--------------------------------------------------------------------------------
/gym/wrappers/test_resize_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import gym
4 | from gym.wrappers import ResizeObservation
5 | try:
6 | import atari_py
7 | except ImportError:
8 | atari_py = None
9 |
10 |
11 | @pytest.mark.skipif(atari_py is None, reason='Only run this test when atari_py is installed')
12 | @pytest.mark.parametrize('env_id', ['PongNoFrameskip-v0', 'SpaceInvadersNoFrameskip-v0'])
13 | @pytest.mark.parametrize('shape', [16, 32, (8, 5), [10, 7]])
14 | def test_resize_observation(env_id, shape):
15 | env = gym.make(env_id)
16 | env = ResizeObservation(env, shape)
17 |
18 |
19 | assert env.observation_space.shape[-1] == 3
20 | obs = env.reset()
21 | if isinstance(shape, int):
22 | assert env.observation_space.shape[:2] == (shape, shape)
23 | assert obs.shape == (shape, shape, 3)
24 | else:
25 | assert env.observation_space.shape[:2] == tuple(shape)
26 | assert obs.shape == tuple(shape) + (3,)
27 |
--------------------------------------------------------------------------------
/gym/wrappers/test_time_aware_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import gym
4 | from gym.wrappers import TimeAwareObservation
5 |
6 |
7 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0'])
8 | def test_time_aware_observation(env_id):
9 | env = gym.make(env_id)
10 | wrapped_env = TimeAwareObservation(env)
11 |
12 | assert wrapped_env.observation_space.shape[0] == env.observation_space.shape[0] + 1
13 |
14 | obs = env.reset()
15 | wrapped_obs = wrapped_env.reset()
16 | assert wrapped_env.t == 0.0
17 | assert wrapped_obs[-1] == 0.0
18 | assert wrapped_obs.shape[0] == obs.shape[0] + 1
19 |
20 | wrapped_obs, _, _, _ = wrapped_env.step(env.action_space.sample())
21 | assert wrapped_env.t == 1.0
22 | assert wrapped_obs[-1] == 1.0
23 | assert wrapped_obs.shape[0] == obs.shape[0] + 1
24 |
25 | wrapped_obs, _, _, _ = wrapped_env.step(env.action_space.sample())
26 | assert wrapped_env.t == 2.0
27 | assert wrapped_obs[-1] == 2.0
28 | assert wrapped_obs.shape[0] == obs.shape[0] + 1
29 |
30 | wrapped_obs = wrapped_env.reset()
31 | assert wrapped_env.t == 0.0
32 | assert wrapped_obs[-1] == 0.0
33 | assert wrapped_obs.shape[0] == obs.shape[0] + 1
34 |
--------------------------------------------------------------------------------
/gym/wrappers/test_transform_observation.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import TransformObservation
7 |
8 |
9 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0'])
10 | def test_transform_observation(env_id):
11 | affine_transform = lambda x: 3*x + 2
12 | env = gym.make(env_id)
13 | wrapped_env = TransformObservation(gym.make(env_id), lambda obs: affine_transform(obs))
14 |
15 | env.seed(0)
16 | wrapped_env.seed(0)
17 |
18 | obs = env.reset()
19 | wrapped_obs = wrapped_env.reset()
20 | assert np.allclose(wrapped_obs, affine_transform(obs))
21 |
22 | action = env.action_space.sample()
23 | obs, reward, done, _ = env.step(action)
24 | wrapped_obs, wrapped_reward, wrapped_done, _ = wrapped_env.step(action)
25 | assert np.allclose(wrapped_obs, affine_transform(obs))
26 | assert np.allclose(wrapped_reward, reward)
27 | assert wrapped_done == done
28 |
--------------------------------------------------------------------------------
/gym/wrappers/test_transform_reward.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import numpy as np
4 |
5 | import gym
6 | from gym.wrappers import TransformReward
7 |
8 |
9 | @pytest.mark.parametrize('env_id', ['CartPole-v1', 'Pendulum-v0'])
10 | def test_transform_reward(env_id):
11 | # use case #1: scale
12 | scales = [0.1, 200]
13 | for scale in scales:
14 | env = gym.make(env_id)
15 | wrapped_env = TransformReward(gym.make(env_id), lambda r: scale*r)
16 | action = env.action_space.sample()
17 |
18 | env.seed(0)
19 | env.reset()
20 | wrapped_env.seed(0)
21 | wrapped_env.reset()
22 |
23 | _, reward, _, _ = env.step(action)
24 | _, wrapped_reward, _, _ = wrapped_env.step(action)
25 |
26 | assert wrapped_reward == scale*reward
27 | del env, wrapped_env
28 |
29 | # use case #2: clip
30 | min_r = -0.0005
31 | max_r = 0.0002
32 | env = gym.make(env_id)
33 | wrapped_env = TransformReward(gym.make(env_id), lambda r: np.clip(r, min_r, max_r))
34 | action = env.action_space.sample()
35 |
36 | env.seed(0)
37 | env.reset()
38 | wrapped_env.seed(0)
39 | wrapped_env.reset()
40 |
41 | _, reward, _, _ = env.step(action)
42 | _, wrapped_reward, _, _ = wrapped_env.step(action)
43 |
44 | assert abs(wrapped_reward) < abs(reward)
45 | assert wrapped_reward == -0.0005 or wrapped_reward == 0.0002
46 | del env, wrapped_env
47 |
48 | # use case #3: sign
49 | env = gym.make(env_id)
50 | wrapped_env = TransformReward(gym.make(env_id), lambda r: np.sign(r))
51 |
52 | env.seed(0)
53 | env.reset()
54 | wrapped_env.seed(0)
55 | wrapped_env.reset()
56 |
57 | for _ in range(1000):
58 | action = env.action_space.sample()
59 | _, wrapped_reward, done, _ = wrapped_env.step(action)
60 | assert wrapped_reward in [-1.0, 0.0, 1.0]
61 | if done:
62 | break
63 | del env, wrapped_env
64 |
--------------------------------------------------------------------------------
/gym/wrappers/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/gym/wrappers/tests/__init__.py
--------------------------------------------------------------------------------
/gym/wrappers/time_aware_observation.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from gym.spaces import Box
4 | from gym import ObservationWrapper
5 |
6 |
7 | class TimeAwareObservation(ObservationWrapper):
8 | r"""Augment the observation with current time step in the trajectory.
9 |
10 | .. note::
11 | Currently it only works with one-dimensional observation space. It doesn't
12 | support pixel observation space yet.
13 |
14 | """
15 | def __init__(self, env):
16 | super(TimeAwareObservation, self).__init__(env)
17 | assert isinstance(env.observation_space, Box)
18 | assert env.observation_space.dtype == np.float32
19 | low = np.append(self.observation_space.low, 0.0)
20 | high = np.append(self.observation_space.high, np.inf)
21 | self.observation_space = Box(low, high, dtype=np.float32)
22 |
23 | def observation(self, observation):
24 | return np.append(observation, self.t)
25 |
26 | def step(self, action):
27 | self.t += 1
28 | return super(TimeAwareObservation, self).step(action)
29 |
30 | def reset(self, **kwargs):
31 | self.t = 0
32 | return super(TimeAwareObservation, self).reset(**kwargs)
33 |
--------------------------------------------------------------------------------
/gym/wrappers/time_limit.py:
--------------------------------------------------------------------------------
1 | import gym
2 |
3 |
4 | class TimeLimit(gym.Wrapper):
5 | def __init__(self, env, max_episode_steps=None):
6 | super(TimeLimit, self).__init__(env)
7 | if max_episode_steps is None and self.env.spec is not None:
8 | max_episode_steps = env.spec.max_episode_steps
9 | if self.env.spec is not None:
10 | self.env.spec.max_episode_steps = max_episode_steps
11 | self._max_episode_steps = max_episode_steps
12 | self._elapsed_steps = None
13 |
14 | def step(self, action):
15 | assert self._elapsed_steps is not None, "Cannot call env.step() before calling reset()"
16 | observation, reward, done, info = self.env.step(action)
17 | self._elapsed_steps += 1
18 | if self._elapsed_steps >= self._max_episode_steps:
19 | info['TimeLimit.truncated'] = not done
20 | done = True
21 | return observation, reward, done, info
22 |
23 | def reset(self, **kwargs):
24 | self._elapsed_steps = 0
25 | return self.env.reset(**kwargs)
26 |
--------------------------------------------------------------------------------
/gym/wrappers/transform_observation.py:
--------------------------------------------------------------------------------
1 | from gym import ObservationWrapper
2 |
3 |
4 | class TransformObservation(ObservationWrapper):
5 | r"""Transform the observation via an arbitrary function.
6 |
7 | Example::
8 |
9 | >>> import gym
10 | >>> env = gym.make('CartPole-v1')
11 | >>> env = TransformObservation(env, lambda obs: obs + 0.1*np.random.randn(*obs.shape))
12 | >>> env.reset()
13 | array([-0.08319338, 0.04635121, -0.07394746, 0.20877492])
14 |
15 | Args:
16 | env (Env): environment
17 | f (callable): a function that transforms the observation
18 |
19 | """
20 | def __init__(self, env, f):
21 | super(TransformObservation, self).__init__(env)
22 | assert callable(f)
23 | self.f = f
24 |
25 | def observation(self, observation):
26 | return self.f(observation)
27 |
--------------------------------------------------------------------------------
/gym/wrappers/transform_reward.py:
--------------------------------------------------------------------------------
1 | from gym import RewardWrapper
2 |
3 |
4 | class TransformReward(RewardWrapper):
5 | r"""Transform the reward via an arbitrary function.
6 |
7 | Example::
8 |
9 | >>> import gym
10 | >>> env = gym.make('CartPole-v1')
11 | >>> env = TransformReward(env, lambda r: 0.01*r)
12 | >>> env.reset()
13 | >>> observation, reward, done, info = env.step(env.action_space.sample())
14 | >>> reward
15 | 0.01
16 |
17 | Args:
18 | env (Env): environment
19 | f (callable): a function that transforms the reward
20 |
21 | """
22 | def __init__(self, env, f):
23 | super(TransformReward, self).__init__(env)
24 | assert callable(f)
25 | self.f = f
26 |
27 | def reward(self, reward):
28 | return self.f(reward)
29 |
--------------------------------------------------------------------------------
/memory/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/memory/sp_memory.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | class ReplayBuffer:
5 | """
6 | A simple FIFO experience replay buffer for TD3 agents.
7 | """
8 |
9 | def __init__(self, obs_dim, act_dim, size):
10 | self.obs1_buf = np.zeros([size, obs_dim], dtype=np.float32)
11 | self.obs2_buf = np.zeros([size, obs_dim], dtype=np.float32)
12 | self.acts_buf = np.zeros([size, act_dim], dtype=np.float32)
13 | self.rews_buf = np.zeros(size, dtype=np.float32)
14 | self.done_buf = np.zeros(size, dtype=np.float32)
15 | self.ptr, self.size, self.max_size = 0, 0, size
16 |
17 | def store(self, obs, act, rew, next_obs, done):
18 | self.obs1_buf[self.ptr] = obs
19 | self.obs2_buf[self.ptr] = next_obs
20 | self.acts_buf[self.ptr] = act
21 | self.rews_buf[self.ptr] = rew
22 | self.done_buf[self.ptr] = done
23 | self.ptr = (self.ptr + 1) % self.max_size
24 | self.size = min(self.size + 1, self.max_size)
25 |
26 | def sample_batch(self, batch_size=32):
27 | idxs = np.random.randint(0, self.size, size=batch_size)
28 | return dict(obs1=self.obs1_buf[idxs],
29 | obs2=self.obs2_buf[idxs],
30 | acts=self.acts_buf[idxs],
31 | rews=self.rews_buf[idxs],
32 | done=self.done_buf[idxs])
--------------------------------------------------------------------------------
/memory/sp_memory_torch.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 |
5 | class ReplayBuffer:
6 | """
7 | A simple FIFO experience replay buffer for TD3 agents.
8 | """
9 |
10 | def __init__(self, obs_dim, act_dim, size, device=None):
11 | self.device = device
12 | self.obs_buf = np.zeros(self.combined_shape(size, obs_dim), dtype=np.float32)
13 | self.obs2_buf = np.zeros(self.combined_shape(size, obs_dim), dtype=np.float32)
14 | self.act_buf = np.zeros(self.combined_shape(size, act_dim), dtype=np.float32)
15 | self.rew_buf = np.zeros(size, dtype=np.float32)
16 | self.done_buf = np.zeros(size, dtype=np.float32)
17 | self.ptr, self.size, self.max_size = 0, 0, size
18 |
19 | def combined_shape(self, length, shape=None):
20 | if shape is None:
21 | return (length,)
22 | return (length, shape) if np.isscalar(shape) else (length, *shape)
23 |
24 | def store(self, obs, act, rew, next_obs, done):
25 | self.obs_buf[self.ptr] = obs
26 | self.obs2_buf[self.ptr] = next_obs
27 | self.act_buf[self.ptr] = act
28 | self.rew_buf[self.ptr] = rew
29 | self.done_buf[self.ptr] = done
30 | self.ptr = (self.ptr+1) % self.max_size
31 | self.size = min(self.size+1, self.max_size)
32 |
33 | def sample_batch(self, batch_size=32):
34 | idxs = np.random.randint(0, self.size, size=batch_size)
35 | batch = dict(obs=self.obs_buf[idxs],
36 | obs2=self.obs2_buf[idxs],
37 | act=self.act_buf[idxs],
38 | rew=self.rew_buf[idxs],
39 | done=self.done_buf[idxs])
40 | return {k: torch.as_tensor(v, dtype=torch.float32, device=self.device) for k,v in batch.items()}
41 |
--------------------------------------------------------------------------------
/pip_requirement.txt:
--------------------------------------------------------------------------------
1 | cloudpickle==1.2.1
2 | ipython==7.19.0
3 | joblib==1.0.0
4 | matplotlib==3.3.3
5 | # mpi4py @ file:///tmp/build/80754af9/mpi4py_1594373948151/work
6 | # or conda install mpi4py -y
7 | pandas==1.2.0
8 | psutil==5.8.0
9 | pyglet==1.5.0
10 | pytest==6.2.1
11 | # scipy @ file:///tmp/build/80754af9/scipy_1612469579197/work
12 | # seaborn @ file:///tmp/build/80754af9/seaborn_1608578541026/work
13 | # or seaborn==0.8.1
14 | tqdm==4.55.0
15 |
--------------------------------------------------------------------------------
/spinup_utils/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/spinup_utils/delete_no_checkpoint_or_pth.py:
--------------------------------------------------------------------------------
1 | 还是删了吧~
2 |
--------------------------------------------------------------------------------
/spinup_utils/demo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kaixindelele/DRLib/abf7ece9ad09eb40d8544e2f654a673242225d7a/spinup_utils/demo.png
--------------------------------------------------------------------------------
/spinup_utils/plot_demo_files/2020-10-02_12-18-23-gym_clean_buffer_reach_d2s_dense_n2b2_s5958/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "argparse": "argparse",
3 | "args": null,
4 | "b_lr": 0.001,
5 | "base_lr": 0.001,
6 | "env_str": "FetchReach-v1",
7 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2",
8 | "logger": null,
9 | "logger_kwargs": {
10 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2",
11 | "output_dir": "../Gym_reach_D2S_clean_buffer/2020-10-02_gym_clean_buffer_reach_d2s_dense_n2b2/2020-10-02_12-18-23-gym_clean_buffer_reach_d2s_dense_n2b2_s5958"
12 | },
13 | "noise_value": 0.1,
14 | "parser": null,
15 | "random_seed": 5958
16 | }
--------------------------------------------------------------------------------
/spinup_utils/plot_demo_files/2020-10-02_12-18-42-gym_clean_buffer_reach_d2s_dense_n2b2_s9317/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "argparse": "argparse",
3 | "args": null,
4 | "b_lr": 0.001,
5 | "base_lr": 0.001,
6 | "env_str": "FetchReach-v1",
7 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2",
8 | "logger": null,
9 | "logger_kwargs": {
10 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2",
11 | "output_dir": "../Gym_reach_D2S_clean_buffer/2020-10-02_gym_clean_buffer_reach_d2s_dense_n2b2/2020-10-02_12-18-42-gym_clean_buffer_reach_d2s_dense_n2b2_s9317"
12 | },
13 | "noise_value": 0.1,
14 | "parser": null,
15 | "random_seed": 9317
16 | }
--------------------------------------------------------------------------------
/spinup_utils/plot_demo_files/2020-10-02_12-18-43-gym_clean_buffer_reach_d2s_dense_n2b2_s7515/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "argparse": "argparse",
3 | "args": null,
4 | "b_lr": 0.001,
5 | "base_lr": 0.001,
6 | "env_str": "FetchReach-v1",
7 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2",
8 | "logger": null,
9 | "logger_kwargs": {
10 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2",
11 | "output_dir": "../Gym_reach_D2S_clean_buffer/2020-10-02_gym_clean_buffer_reach_d2s_dense_n2b2/2020-10-02_12-18-43-gym_clean_buffer_reach_d2s_dense_n2b2_s7515"
12 | },
13 | "noise_value": 0.1,
14 | "parser": null,
15 | "random_seed": 7515
16 | }
--------------------------------------------------------------------------------
/spinup_utils/plot_demo_files/2020-10-02_12-18-50-gym_clean_buffer_reach_d2s_dense_n2b2_s9180/config.json:
--------------------------------------------------------------------------------
1 | {
2 | "argparse": "argparse",
3 | "args": null,
4 | "b_lr": 0.001,
5 | "base_lr": 0.001,
6 | "env_str": "FetchReach-v1",
7 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2",
8 | "logger": null,
9 | "logger_kwargs": {
10 | "exp_name": "gym_clean_buffer_reach_d2s_dense_n2b2",
11 | "output_dir": "../Gym_reach_D2S_clean_buffer/2020-10-02_gym_clean_buffer_reach_d2s_dense_n2b2/2020-10-02_12-18-50-gym_clean_buffer_reach_d2s_dense_n2b2_s9180"
12 | },
13 | "noise_value": 0.1,
14 | "parser": null,
15 | "random_seed": 9180
16 | }
--------------------------------------------------------------------------------
/spinup_utils/plot_demo_files/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/spinup_utils/print_logger.py:
--------------------------------------------------------------------------------
1 | import sys
2 |
3 |
4 | class Logger(object):
5 | def __init__(self, filename='default.log', add_flag=True, stream=sys.stdout):
6 | self.terminal = stream
7 | print("filename:", filename)
8 | self.filename = filename
9 | self.add_flag = add_flag
10 | # self.log = open(filename, 'a+')
11 |
12 | def write(self, message):
13 | if self.add_flag:
14 | with open(self.filename, 'a+') as log:
15 | self.terminal.write(message)
16 | log.write(message)
17 | else:
18 | with open(self.filename, 'w') as log:
19 | self.terminal.write(message)
20 | log.write(message)
21 |
22 | def flush(self):
23 | pass
24 |
25 |
26 | def main():
27 | logger_kwargs = {'output_dir':"logger/"}
28 | try:
29 | import os
30 | os.mkdir(logger_kwargs['output_dir'])
31 | except:
32 | pass
33 | sys.stdout = Logger(logger_kwargs["output_dir"]+"print.log",
34 | sys.stdout)
35 |
36 | print('print something')
37 | print("*" * 3)
38 | import time
39 | time.sleep(2)
40 | print("other things")
41 |
42 |
43 | if __name__ == '__main__':
44 | main()
45 |
--------------------------------------------------------------------------------
/spinup_utils/run_entrypoint.py:
--------------------------------------------------------------------------------
1 | """
2 | 如果用mpi_fork的话,就要多次启动当前脚本。
3 | 如果不进行隔离的话,在run_utils.py中的for var in vars中执行ppo.
4 | 执行到ppo中的mpi_fork(num_cpu)这句话时,会
5 |
6 | """
7 |
8 |
9 | import zlib
10 | import pickle
11 | import base64
12 | import time
13 | from spinup_utils.mpi_tools import proc_id
14 |
15 |
16 | if __name__ == '__main__':
17 | import argparse
18 | parser = argparse.ArgumentParser()
19 | # 为什么加了这个,就能直接获取这个变量?
20 | # 当执行python run_entrypoint.py encoded_thunk 时,
21 | parser.add_argument('encoded_thunk')
22 | args = parser.parse_args()
23 | # print("thunk.args:", args)
24 | # input(("args"))
25 | # pickle.loads是读取函数
26 | thunk = pickle.loads(zlib.decompress(base64.b64decode(args.encoded_thunk)))
27 | # print("thunk:", thunk)
28 | # print("entry_point_proc_id:", proc_id())
29 | # time.sleep(1)
30 | thunk()
31 |
32 |
33 |
--------------------------------------------------------------------------------
/spinup_utils/serialization_utils.py:
--------------------------------------------------------------------------------
1 | import json
2 |
3 |
4 | def convert_json(obj):
5 | """ Convert obj to a version which can be serialized with JSON.
6 | 垃圾递归!
7 | """
8 |
9 | if is_json_serializable(obj):
10 | return obj
11 | else:
12 | if isinstance(obj, dict):
13 | return {convert_json(k): convert_json(v)
14 | for k,v in obj.items()}
15 |
16 | elif isinstance(obj, tuple):
17 | return (convert_json(x) for x in obj)
18 |
19 | elif isinstance(obj, list):
20 | return [convert_json(x) for x in obj]
21 |
22 | elif hasattr(obj,'__name__') and not('lambda' in obj.__name__):
23 | # return
24 | return convert_json(obj.__name__)
25 |
26 | elif hasattr(obj,'__dict__') and obj.__dict__:
27 | return
28 | obj_dict = {convert_json(k): convert_json(v)
29 | for k,v in obj.__dict__.items()}
30 | return {str(obj): obj_dict}
31 |
32 | return str(obj)
33 |
34 | def is_json_serializable(v):
35 | try:
36 | json.dumps(v)
37 | return True
38 | except:
39 | return False
--------------------------------------------------------------------------------
/spinup_utils/user_config.py:
--------------------------------------------------------------------------------
1 | import os
2 | import os.path as osp
3 |
4 | # Where experiment outputs are saved by default:
5 | DEFAULT_DATA_DIR = osp.join(osp.abspath(osp.dirname(osp.dirname(__file__))),'data')
6 |
7 | # Whether to automatically insert a date and time stamp into the names of
8 | # save directories:
9 | FORCE_DATESTAMP = True
10 |
11 | # Whether GridSearch provides automatically-generated default shorthands:
12 | DEFAULT_SHORTHAND = True
13 |
14 | # Tells the GridSearch how many seconds to pause for before launching
15 | # experiments.
16 | WAIT_BEFORE_LAUNCH = 5
--------------------------------------------------------------------------------
/tune_exps/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/tune_exps/tune_func.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import os
3 | from mpi4py import MPI
4 | import numpy as np
5 |
6 | # 主函数替换成你需要的任何关于强化的主程序
7 | def func(params_dict_list):
8 | proc_id = MPI.COMM_WORLD.Get_rank()
9 | if proc_id > len(params_dict_list)-1:
10 | print("proc_id:", proc_id)
11 | print("sys.exit()")
12 | sys.exit()
13 | print("sys.exit()")
14 | params_dict = params_dict_list[proc_id]
15 | print("proc_id:", proc_id)
16 | print("params_dict:", params_dict)
17 | print("-"*20)
18 |
19 |
20 | if __name__=='__main__':
21 | params_dict = {
22 | 'lr': [2, 3, 4, 5, 6, 7],
23 | "batch": [10, 20, 30, 40, 50,],
24 | "epoch": [100, 200, 300, 400, 500, 600],
25 | }
26 | import itertools
27 |
28 | params_list = [list(value) for value in itertools.product(*params_dict.values())]
29 | params_dict_list = [{key: cur_param.pop(0) for key, value in params_dict.items()} for cur_param in params_list]
30 | for i in range(2):
31 | func(params_dict_list=params_dict_list)
32 |
--------------------------------------------------------------------------------