├── .gitignore ├── LICENSE ├── README.md ├── data └── README.txt ├── examples ├── atari_dqn_async_cpu.py ├── atari_dqn_async_gpu.py ├── atari_dqn_async_serial.py ├── example_1.py ├── example_2.py ├── example_3.py ├── example_4.py ├── example_5.py ├── example_6.py ├── example_6a.py └── example_7.py ├── images ├── bair_logo.png └── blank.md ├── linux_cpu.yml ├── linux_cuda10.yml ├── linux_cuda9.yml ├── macos_cpu.yml ├── max_q_eval_policy.py ├── random_policy.py ├── rlpyt ├── __init__.py ├── agents │ ├── __init__.py │ ├── base.py │ ├── dqn │ │ ├── __init__.py │ │ ├── atari │ │ │ ├── __init__.py │ │ │ ├── atari_catdqn_agent.py │ │ │ ├── atari_dqn_agent.py │ │ │ ├── atari_r2d1_agent.py │ │ │ └── mixin.py │ │ ├── catdqn_agent.py │ │ ├── dqn_agent.py │ │ ├── epsilon_greedy.py │ │ └── r2d1_agent.py │ ├── pg │ │ ├── __init__.py │ │ ├── atari.py │ │ ├── base.py │ │ ├── categorical.py │ │ ├── gaussian.py │ │ └── mujoco.py │ └── qpg │ │ ├── __init__.py │ │ ├── ddpg_agent.py │ │ ├── sac_agent.py │ │ ├── sac_agent_autoreg_v2.py │ │ ├── sac_agent_autoreg_v2_generic.py │ │ ├── sac_agent_v2.py │ │ ├── sac_agent_v2_generic.py │ │ └── td3_agent.py ├── algos │ ├── __init__.py │ ├── base.py │ ├── dqn │ │ ├── __init__.py │ │ ├── cat_dqn.py │ │ ├── dqn.py │ │ └── r2d1.py │ ├── pg │ │ ├── __init__.py │ │ ├── a2c.py │ │ ├── base.py │ │ └── ppo.py │ ├── qpg │ │ ├── __init__.py │ │ ├── ddpg.py │ │ ├── sac.py │ │ ├── sac_v2.py │ │ ├── sac_v2_generic.py │ │ └── td3.py │ └── utils.py ├── distributions │ ├── __init__.py │ ├── base.py │ ├── categorical.py │ ├── discrete.py │ ├── epsilon_greedy.py │ └── gaussian.py ├── envs │ ├── __init__.py │ ├── atari │ │ ├── __init__.py │ │ └── atari_env.py │ ├── base.py │ ├── dm_control_env.py │ └── gym.py ├── experiments │ ├── __init__.py │ ├── configs │ │ ├── __init__.py │ │ ├── atari │ │ │ ├── __init__.py │ │ │ ├── dqn │ │ │ │ ├── __init__.py │ │ │ │ ├── atari_dqn.py │ │ │ │ ├── atari_dqn_debug.py │ │ │ │ └── atari_r2d1.py │ │ │ └── pg │ │ │ │ ├── __init__.py │ │ │ │ ├── atari_ff_a2c.py │ │ │ │ ├── atari_ff_ppo.py │ │ │ │ ├── atari_lstm_a2c.py │ │ │ │ └── atari_lstm_ppo.py │ │ ├── dm_control │ │ │ └── qpg │ │ │ │ └── sac │ │ │ │ └── dm_control_sac.py │ │ └── mujoco │ │ │ ├── __init__.py │ │ │ ├── pg │ │ │ ├── __init__.py │ │ │ ├── mujoco_a2c.py │ │ │ └── mujoco_ppo.py │ │ │ └── qpg │ │ │ ├── __init__.py │ │ │ ├── mujoco_ddpg.py │ │ │ ├── mujoco_sac.py │ │ │ └── mujoco_td3.py │ └── scripts │ │ ├── atari │ │ ├── dqn │ │ │ ├── launch │ │ │ │ ├── dgx │ │ │ │ │ ├── launch_atari_r2d1_async_alt.py │ │ │ │ │ ├── launch_atari_r2d1_async_alt_seaquest.py │ │ │ │ │ └── launch_atari_r2d1_async_gpu.py │ │ │ │ ├── got │ │ │ │ │ ├── launch_atari_dqn_cpu_basic_1of2.py │ │ │ │ │ ├── launch_atari_dqn_cpu_basic_2of2.py │ │ │ │ │ ├── launch_atari_dqn_gpu_pong.py │ │ │ │ │ ├── launch_atari_r2d1_async_alt.py │ │ │ │ │ ├── launch_atari_r2d1_async_alt_amidar.py │ │ │ │ │ ├── launch_atari_r2d1_async_alt_gravitar.py │ │ │ │ │ ├── launch_atari_r2d1_async_alt_pong.py │ │ │ │ │ ├── launch_atari_r2d1_async_alt_seaquest.py │ │ │ │ │ ├── launch_atari_r2d1_async_gpu.py │ │ │ │ │ ├── launch_atari_r2d1_async_gpu_amidar.py │ │ │ │ │ ├── launch_atari_r2d1_async_gpu_test.py │ │ │ │ │ └── launch_atari_r2d1_long_4tr_gravitar.py │ │ │ │ ├── launch_atari_catdqn_gpu_basic.py │ │ │ │ ├── launch_atari_dpd_dqn_gpu_basic.py │ │ │ │ ├── launch_atari_dqn_cpu_basic.py │ │ │ │ ├── launch_atari_dqn_gpu.py │ │ │ │ ├── launch_atari_dqn_gpu_basic.py │ │ │ │ ├── launch_atari_ernbw_gpu_basic.py │ │ │ │ ├── launch_atari_r2d1_gpu_basic.py │ │ │ │ └── pabti │ │ │ │ │ ├── launch_atari_catdqn_gpu_basic.py │ │ │ │ │ ├── launch_atari_dpd_dqn_gpu_basic.py │ │ │ │ │ ├── launch_atari_dqn_async_cpu.py │ │ │ │ │ ├── launch_atari_dqn_async_gpu.py │ │ │ │ │ ├── launch_atari_dqn_async_gpu_scale_pri.py │ │ │ │ │ ├── launch_atari_dqn_async_serial.py │ │ │ │ │ ├── launch_atari_dqn_cpu_basic.py │ │ │ │ │ ├── launch_atari_dqn_gpu_basic.py │ │ │ │ │ ├── launch_atari_dqn_gpu_noeval.py │ │ │ │ │ ├── launch_atari_dqn_serial.py │ │ │ │ │ ├── launch_atari_ernbw_gpu_basic.py │ │ │ │ │ ├── launch_atari_r2d1_async_alt.py │ │ │ │ │ ├── launch_atari_r2d1_async_alt_chopper_command.py │ │ │ │ │ ├── launch_atari_r2d1_async_alt_gravitar.py │ │ │ │ │ ├── launch_atari_r2d1_async_alt_qbert.py │ │ │ │ │ ├── launch_atari_r2d1_async_gpu_qbert.py │ │ │ │ │ ├── launch_atari_r2d1_long_4tr_asteroids.py │ │ │ │ │ ├── launch_atari_r2d1_long_4tr_chopper_command.py │ │ │ │ │ ├── launch_atari_r2d1_long_4tr_gravitar.py │ │ │ │ │ ├── launch_atari_r2d1_long_4tr_seaquest.py │ │ │ │ │ ├── launch_atari_r2d1_long_gt_ad.py │ │ │ │ │ └── launch_atari_r2d1_long_sq_cc.py │ │ │ └── train │ │ │ │ ├── atari_catdqn_gpu.py │ │ │ │ ├── atari_dqn_async_cpu.py │ │ │ │ ├── atari_dqn_async_gpu.py │ │ │ │ ├── atari_dqn_async_serial.py │ │ │ │ ├── atari_dqn_cpu.py │ │ │ │ ├── atari_dqn_gpu.py │ │ │ │ ├── atari_dqn_gpu_noeval.py │ │ │ │ ├── atari_dqn_serial.py │ │ │ │ ├── atari_r2d1_async_alt.py │ │ │ │ ├── atari_r2d1_async_gpu.py │ │ │ │ └── atari_r2d1_gpu.py │ │ └── pg │ │ │ ├── launch │ │ │ ├── got │ │ │ │ ├── launch_atari_ff_a2c_gpu_basic.py │ │ │ │ ├── launch_atari_ff_a2c_gpu_low_lr.py │ │ │ │ ├── launch_atari_ff_a2c_gpu_multi.py │ │ │ │ ├── launch_atari_ff_lstm_a2c_ppo_gpu.py │ │ │ │ ├── launch_atari_ff_ppo_gpu.py │ │ │ │ ├── launch_atari_ff_ppo_gpu_basic.py │ │ │ │ ├── launch_atari_lstm_a2c_gpu_basic.py │ │ │ │ └── launch_atari_lstm_ppo_gpu_basic.py │ │ │ ├── launch_atari_ff_a2c_cpu.py │ │ │ ├── launch_atari_ff_a2c_gpu_basic.py │ │ │ ├── launch_atari_ff_a2c_gpu_multi.py │ │ │ ├── launch_atari_ff_ppo_gpu_basic.py │ │ │ ├── launch_atari_lstm_a2c_cpu.py │ │ │ ├── launch_atari_lstm_a2c_gpu.py │ │ │ ├── launch_atari_lstm_a2c_gpu_basic.py │ │ │ ├── launch_atari_lstm_ppo_gpu_basic.py │ │ │ └── pabti │ │ │ │ └── launch_atari_ff_a2c_gpu_multi.py │ │ │ └── train │ │ │ ├── atari_ff_a2c_cpu.py │ │ │ ├── atari_ff_a2c_cpu_test.py │ │ │ ├── atari_ff_a2c_gpu.py │ │ │ ├── atari_ff_a2c_gpu_multi.py │ │ │ ├── atari_ff_ppo_gpu.py │ │ │ ├── atari_lstm_a2c_cpu.py │ │ │ ├── atari_lstm_a2c_cpu_test.py │ │ │ ├── atari_lstm_a2c_gpu.py │ │ │ └── atari_lstm_ppo_gpu.py │ │ ├── dm_control │ │ └── qpg │ │ │ └── sac │ │ │ ├── launch │ │ │ ├── cloth_env_overview.txt │ │ │ ├── launch_dm_control_sac_pixels_cloth_corner.py │ │ │ ├── launch_dm_control_sac_pixels_cloth_point.py │ │ │ ├── launch_dm_control_sac_pixels_cloth_sim.py │ │ │ ├── launch_dm_control_sac_pixels_cloth_two_hand.py │ │ │ ├── launch_dm_control_sac_pixels_clothv0.py │ │ │ ├── launch_dm_control_sac_pixels_clothv8.py │ │ │ ├── launch_dm_control_sac_pixels_rope.py │ │ │ ├── launch_dm_control_sac_pixels_rope_two_hand.py │ │ │ ├── launch_dm_control_sac_pixels_rope_v1.py │ │ │ ├── launch_dm_control_sac_state_cloth_autoreg.py │ │ │ ├── launch_dm_control_sac_state_cloth_corner.py │ │ │ ├── launch_dm_control_sac_state_cloth_point.py │ │ │ ├── launch_dm_control_sac_state_cloth_script.py │ │ │ ├── launch_dm_control_sac_state_cloth_sim.py │ │ │ ├── launch_dm_control_sac_state_clothv0.py │ │ │ ├── launch_dm_control_sac_state_clothv7.py │ │ │ ├── launch_dm_control_sac_state_clothv8.py │ │ │ ├── launch_dm_control_sac_state_rope.py │ │ │ └── launch_dm_control_sac_state_rope_script.py │ │ │ ├── old │ │ │ ├── dm_control_sac_serial.py │ │ │ ├── dm_control_sac_serial_v2.py │ │ │ ├── launch_dm_control_sac_serial.py │ │ │ └── launch_dm_control_sac_serial_v2.py │ │ │ └── train │ │ │ ├── dm_control_sac.py │ │ │ ├── dm_control_sac_autoreg.py │ │ │ └── dm_control_sac_generic.py │ │ └── mujoco │ │ ├── launch │ │ └── pabti │ │ │ ├── launch_mujoco_ppo_ddpg_td3_sac_serial.py │ │ │ └── launch_mujoco_sac_serial.py │ │ ├── pg │ │ ├── launch │ │ │ ├── launch_mujoco_a2c_cpu.py │ │ │ ├── launch_mujoco_ppo_cpu.py │ │ │ ├── launch_mujoco_ppo_gpu.py │ │ │ ├── launch_mujoco_ppo_serial.py │ │ │ └── pabti │ │ │ │ ├── launch_mujoco_ppo_cpu.py │ │ │ │ └── launch_mujoco_ppo_serial.py │ │ └── train │ │ │ ├── mujoco_ff_a2c_cpu.py │ │ │ ├── mujoco_ff_ppo_cpu.py │ │ │ ├── mujoco_ff_ppo_gpu.py │ │ │ └── mujoco_ppo_serial.py │ │ └── qpg │ │ ├── launch │ │ ├── got │ │ │ ├── launch_mujoco_ddpg_async_serial.py │ │ │ ├── launch_mujoco_ddpg_serial.py │ │ │ ├── launch_mujoco_sac_async_gpu.py │ │ │ ├── launch_mujoco_sac_serial.py │ │ │ ├── launch_mujoco_td3_async_cpu.py │ │ │ └── launch_mujoco_td3_serial.py │ │ ├── launch_mujoco_ddpg_cpu.py │ │ ├── launch_mujoco_ddpg_serial.py │ │ ├── launch_mujoco_ddpg_td3_sac_serial.py │ │ ├── launch_mujoco_sac_serial.py │ │ ├── launch_mujoco_td3_serial.py │ │ └── pabti │ │ │ └── launch_mujoco_ddpg_td3_sac_serial.py │ │ └── train │ │ ├── mujoco_ddpg_async_serial.py │ │ ├── mujoco_ddpg_cpu.py │ │ ├── mujoco_ddpg_serial.py │ │ ├── mujoco_sac_async_gpu.py │ │ ├── mujoco_sac_serial.py │ │ ├── mujoco_td3_async_cpu.py │ │ └── mujoco_td3_serial.py ├── models │ ├── __init__.py │ ├── conv2d.py │ ├── dqn │ │ ├── __init__.py │ │ ├── atari_catdqn_model.py │ │ ├── atari_dqn_model.py │ │ ├── atari_r2d1_model.py │ │ └── dueling.py │ ├── mlp.py │ ├── pg │ │ ├── __init__.py │ │ ├── atari_ff_model.py │ │ ├── atari_lstm_model.py │ │ ├── mujoco_ff_model.py │ │ └── mujoco_lstm_model.py │ ├── preprocessor.py │ ├── qpg │ │ ├── __init__.py │ │ ├── conv2d.py │ │ └── mlp.py │ └── utils.py ├── replays │ ├── __init__.py │ ├── async_.py │ ├── base.py │ ├── frame.py │ ├── n_step.py │ ├── non_sequence │ │ ├── __init__.py │ │ ├── frame.py │ │ ├── n_step.py │ │ ├── prioritized.py │ │ └── uniform.py │ ├── sequence │ │ ├── __init__.py │ │ ├── frame.py │ │ ├── n_step.py │ │ ├── prioritized.py │ │ └── uniform.py │ └── sum_tree.py ├── runners │ ├── __init__.py │ ├── async_rl.py │ ├── base.py │ ├── minibatch_rl.py │ └── sync_rl.py ├── samplers │ ├── __init__.py │ ├── async_ │ │ ├── __init__.py │ │ ├── action_server.py │ │ ├── alternating_sampler.py │ │ ├── base.py │ │ ├── collectors.py │ │ ├── cpu_sampler.py │ │ ├── gpu_sampler.py │ │ └── serial_sampler.py │ ├── base.py │ ├── buffer.py │ ├── collections.py │ ├── collectors.py │ ├── parallel │ │ ├── __init__.py │ │ ├── base.py │ │ ├── cpu │ │ │ ├── __init__.py │ │ │ ├── collectors.py │ │ │ └── sampler.py │ │ ├── gpu │ │ │ ├── __init__.py │ │ │ ├── action_server.py │ │ │ ├── alternating_sampler.py │ │ │ ├── collectors.py │ │ │ └── sampler.py │ │ └── worker.py │ └── serial │ │ ├── __init__.py │ │ ├── collectors.py │ │ └── sampler.py ├── spaces │ ├── __init__.py │ ├── base.py │ ├── box.py │ ├── composite.py │ ├── float_box.py │ ├── gym_wrapper.py │ └── int_box.py └── utils │ ├── __init__.py │ ├── array.py │ ├── buffer.py │ ├── collections.py │ ├── launching │ ├── __init__.py │ ├── affinity.py │ ├── exp_launcher.py │ └── variant.py │ ├── logging │ ├── __init__.py │ ├── autoargs.py │ ├── console.py │ ├── context.py │ ├── logger.py │ └── tabulate.py │ ├── misc.py │ ├── prog_bar.py │ ├── quick_args.py │ ├── seed.py │ ├── shmemarray.py │ ├── synchronize.py │ └── tensor.py ├── sample_random.py ├── scratch └── README.txt ├── setup.py └── simulate_policy.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 astooke 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # Learning to Manipulate Deformable Objects without Demonstrations 4 | 5 | This is the code that corresponds to the [paper](https://arxiv.org/abs/1910.13439) 6 | 7 | ## rlpyt Usage 8 | See the [original library](https://github.com/astooke/rlpyt) for more information on the design of the library 9 | 10 | ## Installation 11 | 12 | Install mujoco-py with mujoco200 13 | 14 | Install a custom version of [dm_control](https://github.com/wilson1yan/dm_control) 15 | 16 | Install a custom version of [dm_env](https://github.com/wilson1yan/dm_env) 17 | 18 | Install the original rlpyt environment 19 | 20 | ## Running 21 | 22 | All launch scripts are in rlpyt/experiments/scripts/dm_control/qpg/sac/launch 23 | 24 | ### Cloth 25 | 26 | For Cloth (State), see launch_dm_control_sac_state_cloth_point.py 27 | 28 | For Cloth (Pixel), see launch_dm_control_sac_pixels_cloth_point.py 29 | 30 | For Cloth-Simplified (State), see launch_dm_control_sac_state_cloth_corner.py 31 | 32 | ### Rope 33 | 34 | For Rope (State), see launch_dm_control_state_rope.py 35 | 36 | For Rope (Pixel), see launch_dm_control_pixels_rope.py 37 | -------------------------------------------------------------------------------- /data/README.txt: -------------------------------------------------------------------------------- 1 | All experiment data here (in .gitignore). -------------------------------------------------------------------------------- /images/bair_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/images/bair_logo.png -------------------------------------------------------------------------------- /images/blank.md: -------------------------------------------------------------------------------- 1 | blank 2 | -------------------------------------------------------------------------------- /linux_cpu.yml: -------------------------------------------------------------------------------- 1 | name: rlpyt 2 | channels: 3 | - pytorch 4 | dependencies: 5 | - python=3.7 6 | - pytorch-cpu=1.2 7 | - numpy 8 | - psutil 9 | - opencv # atari. 10 | - pip 11 | - pip: 12 | - atari-py 13 | - pyprind # progbar. 14 | -------------------------------------------------------------------------------- /linux_cuda10.yml: -------------------------------------------------------------------------------- 1 | name: rlpyt 2 | channels: 3 | - pytorch 4 | dependencies: 5 | - python=3.7 6 | - pytorch=1.2 7 | - cudatoolkit=10. 8 | - numpy 9 | - psutil 10 | - opencv # atari. 11 | - pip 12 | - pip: 13 | - atari-py 14 | - pyprind # progbar. 15 | -------------------------------------------------------------------------------- /linux_cuda9.yml: -------------------------------------------------------------------------------- 1 | name: rlpyt 2 | channels: 3 | - pytorch 4 | dependencies: 5 | - python=3.7 6 | - pytorch=1.2 7 | - cudatoolkit=9. 8 | - numpy 9 | - psutil 10 | - opencv # atari. 11 | - pip 12 | - pip: 13 | - atari-py 14 | - pyprind # progbar. 15 | -------------------------------------------------------------------------------- /macos_cpu.yml: -------------------------------------------------------------------------------- 1 | name: rlpyt 2 | channels: 3 | - pytorch 4 | dependencies: 5 | - python=3.7 6 | - pytorch 7 | - numpy 8 | - psutil 9 | - opencv # atari. 10 | - pip 11 | - pip: 12 | - atari-py 13 | - pyprind # progbar. -------------------------------------------------------------------------------- /random_policy.py: -------------------------------------------------------------------------------- 1 | from os.path import join 2 | import json 3 | 4 | import torch 5 | import numpy as np 6 | 7 | from rlpyt.envs.dm_control_env import DMControlEnv 8 | 9 | 10 | def cloth_corner_random(obs): 11 | idx = np.random.randint(0, 4) 12 | one_hot = np.zeros(4) 13 | one_hot[idx] = 1 14 | 15 | delta = np.random.rand(3) * 2 - 1 16 | return np.concatenate((one_hot, delta)).astype(np.float32) 17 | 18 | 19 | def rope_v2_random(obs): 20 | return np.random.rand(3) * 2 - 1 21 | 22 | 23 | def cloth_point_random(obs): 24 | return np.random.rand(4) * 2 - 1 25 | 26 | 27 | def simulate_policy(): 28 | policy = cloth_point_random 29 | env = DMControlEnv(domain='cloth_point', task='easy', 30 | max_path_length=120, task_kwargs=dict(random_location=False)) 31 | 32 | n_episodes = 40 33 | returns = [] 34 | 35 | for i in range(n_episodes): 36 | o = env.reset() 37 | done = False 38 | reward = 0 39 | 40 | while not done: 41 | o, r, done, info = env.step(policy(o)) 42 | reward += r 43 | 44 | if done or info.traj_done: 45 | break 46 | print(reward) 47 | returns.append(reward) 48 | print('Finished episode', i) 49 | 50 | print('Rewards', returns) 51 | print('Average Reward', np.mean(returns)) 52 | 53 | if __name__ == '__main__': 54 | simulate_policy() 55 | -------------------------------------------------------------------------------- /rlpyt/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/__init__.py -------------------------------------------------------------------------------- /rlpyt/agents/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/agents/__init__.py -------------------------------------------------------------------------------- /rlpyt/agents/dqn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/agents/dqn/__init__.py -------------------------------------------------------------------------------- /rlpyt/agents/dqn/atari/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/agents/dqn/atari/__init__.py -------------------------------------------------------------------------------- /rlpyt/agents/dqn/atari/atari_catdqn_agent.py: -------------------------------------------------------------------------------- 1 | from rlpyt.agents.dqn.catdqn_agent import CatDqnAgent 2 | from rlpyt.models.dqn.atari_catdqn_model import AtariCatDqnModel 3 | from rlpyt.agents.dqn.atari.mixin import AtariMixin 4 | 5 | 6 | class AtariCatDqnAgent(AtariMixin, CatDqnAgent): 7 | 8 | def __init__(self, ModelCls=AtariCatDqnModel, **kwargs): 9 | super().__init__(ModelCls=ModelCls, **kwargs) 10 | -------------------------------------------------------------------------------- /rlpyt/agents/dqn/atari/atari_dqn_agent.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.agents.dqn.dqn_agent import DqnAgent 3 | from rlpyt.models.dqn.atari_dqn_model import AtariDqnModel 4 | from rlpyt.agents.dqn.atari.mixin import AtariMixin 5 | 6 | 7 | class AtariDqnAgent(AtariMixin, DqnAgent): 8 | 9 | def __init__(self, ModelCls=AtariDqnModel, **kwargs): 10 | super().__init__(ModelCls=ModelCls, **kwargs) 11 | -------------------------------------------------------------------------------- /rlpyt/agents/dqn/atari/atari_r2d1_agent.py: -------------------------------------------------------------------------------- 1 | from rlpyt.agents.dqn.r2d1_agent import R2d1Agent, R2d1AlternatingAgent 2 | from rlpyt.models.dqn.atari_r2d1_model import AtariR2d1Model 3 | from rlpyt.agents.dqn.atari.mixin import AtariMixin 4 | 5 | 6 | class AtariR2d1Agent(AtariMixin, R2d1Agent): 7 | 8 | def __init__(self, ModelCls=AtariR2d1Model, **kwargs): 9 | super().__init__(ModelCls=ModelCls, **kwargs) 10 | 11 | 12 | class AtariR2d1AlternatingAgent(AtariMixin, R2d1AlternatingAgent): 13 | 14 | def __init__(self, ModelCls=AtariR2d1Model, **kwargs): 15 | super().__init__(ModelCls=ModelCls, **kwargs) 16 | 17 | -------------------------------------------------------------------------------- /rlpyt/agents/dqn/atari/mixin.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class AtariMixin: 4 | 5 | def make_env_to_model_kwargs(self, env_spaces): 6 | return dict(image_shape=env_spaces.observation.shape, 7 | output_size=env_spaces.action.n) 8 | -------------------------------------------------------------------------------- /rlpyt/agents/dqn/catdqn_agent.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | 4 | from rlpyt.agents.base import AgentStep 5 | from rlpyt.agents.dqn.dqn_agent import DqnAgent 6 | from rlpyt.distributions.epsilon_greedy import CategoricalEpsilonGreedy 7 | from rlpyt.utils.buffer import buffer_to 8 | from rlpyt.utils.collections import namedarraytuple 9 | 10 | AgentInfo = namedarraytuple("AgentInfo", ["p"]) 11 | 12 | 13 | class CatDqnAgent(DqnAgent): 14 | 15 | def __init__(self, n_atoms=51, **kwargs): 16 | super().__init__(**kwargs) 17 | self.n_atoms = self.model_kwargs["n_atoms"] = n_atoms 18 | 19 | def initialize(self, env_spaces, share_memory=False, 20 | global_B=1, env_ranks=None): 21 | super().initialize(env_spaces, share_memory, global_B, env_ranks) 22 | # Overwrite distribution. 23 | self.distribution = CategoricalEpsilonGreedy(dim=env_spaces.action.n, 24 | z=torch.linspace(-1, 1, self.n_atoms)) # z placeholder for init. 25 | 26 | def give_V_min_max(self, V_min, V_max): 27 | self.V_min = V_min 28 | self.V_max = V_max 29 | self.distribution.set_z(torch.linspace(V_min, V_max, self.n_atoms)) 30 | 31 | @torch.no_grad() 32 | def step(self, observation, prev_action, prev_reward): 33 | prev_action = self.distribution.to_onehot(prev_action) 34 | model_inputs = buffer_to((observation, prev_action, prev_reward), 35 | device=self.device) 36 | p = self.model(*model_inputs) 37 | p = p.cpu() 38 | action = self.distribution.sample(p) 39 | agent_info = AgentInfo(p=p) # Only change from DQN: q -> p. 40 | action, agent_info = buffer_to((action, agent_info), device="cpu") 41 | return AgentStep(action=action, agent_info=agent_info) 42 | -------------------------------------------------------------------------------- /rlpyt/agents/pg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/agents/pg/__init__.py -------------------------------------------------------------------------------- /rlpyt/agents/pg/atari.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from rlpyt.agents.pg.categorical import (CategoricalPgAgent, 4 | RecurrentCategoricalPgAgent, AlternatingRecurrentCategoricalPgAgent) 5 | from rlpyt.models.pg.atari_ff_model import AtariFfModel 6 | from rlpyt.models.pg.atari_lstm_model import AtariLstmModel 7 | 8 | 9 | class AtariMixin: 10 | 11 | def make_env_to_model_kwargs(self, env_spaces): 12 | return dict(image_shape=env_spaces.observation.shape, 13 | output_size=env_spaces.action.n) 14 | 15 | 16 | class AtariFfAgent(AtariMixin, CategoricalPgAgent): 17 | 18 | def __init__(self, ModelCls=AtariFfModel, **kwargs): 19 | super().__init__(ModelCls=ModelCls, **kwargs) 20 | 21 | 22 | class AtariLstmAgent(AtariMixin, RecurrentCategoricalPgAgent): 23 | 24 | def __init__(self, ModelCls=AtariLstmModel, **kwargs): 25 | super().__init__(ModelCls=ModelCls, **kwargs) 26 | 27 | 28 | class AlternatingAtariLstmAgent(AtariMixin, 29 | AlternatingRecurrentCategoricalPgAgent): 30 | 31 | def __init__(self, ModelCls=AtariLstmModel, **kwargs): 32 | super().__init__(ModelCls=ModelCls, **kwargs) 33 | -------------------------------------------------------------------------------- /rlpyt/agents/pg/base.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.collections import namedarraytuple 3 | 4 | AgentInfo = namedarraytuple("AgentInfo", ["dist_info", "value"]) 5 | AgentInfoRnn = namedarraytuple("AgentInfoRnn", 6 | ["dist_info", "value", "prev_rnn_state"]) 7 | 8 | -------------------------------------------------------------------------------- /rlpyt/agents/pg/mujoco.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from rlpyt.agents.pg.gaussian import (GaussianPgAgent, 4 | RecurrentGaussianPgAgent, AlternatingRecurrentGaussianPgAgent) 5 | from rlpyt.models.pg.mujoco_ff_model import MujocoFfModel 6 | from rlpyt.models.pg.mujoco_lstm_model import MujocoLstmModel 7 | 8 | 9 | class MujocoMixin: 10 | 11 | def make_env_to_model_kwargs(self, env_spaces): 12 | assert len(env_spaces.action.shape) == 1 13 | return dict(observation_shape=env_spaces.observation.shape, 14 | action_size=env_spaces.action.shape[0]) 15 | 16 | 17 | class MujocoFfAgent(MujocoMixin, GaussianPgAgent): 18 | 19 | def __init__(self, ModelCls=MujocoFfModel, **kwargs): 20 | super().__init__(ModelCls=ModelCls, **kwargs) 21 | 22 | 23 | class MujocoLstmAgent(MujocoMixin, RecurrentGaussianPgAgent): 24 | 25 | def __init__(self, ModelCls=MujocoLstmModel, **kwargs): 26 | super().__init__(ModelCls=ModelCls, **kwargs) 27 | 28 | 29 | class AlternatingMujocoLstmAgent(MujocoMixin, 30 | AlternatingRecurrentGaussianPgAgent): 31 | 32 | def __init__(self, ModelCls=MujocoLstmModel, **kwargs): 33 | super().__init__(ModelCls=ModelCls, **kwargs) 34 | -------------------------------------------------------------------------------- /rlpyt/agents/qpg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/agents/qpg/__init__.py -------------------------------------------------------------------------------- /rlpyt/algos/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/algos/__init__.py -------------------------------------------------------------------------------- /rlpyt/algos/base.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class RlAlgorithm: 4 | 5 | opt_info_fields = () 6 | bootstrap_value = False 7 | update_counter = 0 8 | 9 | def initialize(self, agent, n_itr, batch_spec, mid_batch_reset, examples, 10 | world_size=1, rank=0): 11 | raise NotImplementedError 12 | 13 | def async_initialize(self, agent, sampler_n_itr, batch_spec, mid_batch_reset, 14 | examples, world_size=1): 15 | """Called instead of initialize() in async runner. 16 | Should return async replay_buffer using shared memory.""" 17 | raise NotImplementedError 18 | 19 | def optim_initialize(self, rank=0): 20 | """Called in async runner, and possibly self.initialize().""" 21 | raise NotImplementedError 22 | 23 | def optimize_agent(self, itr, samples=None, sampler_itr=None): 24 | raise NotImplementedError 25 | 26 | def optim_state_dict(self): 27 | """If carrying multiple optimizers, overwrite to return dict state_dicts.""" 28 | return self.optimizer.state_dict() 29 | 30 | def load_optim_state_dict(self, state_dict): 31 | self.optimizer.load_state_dict(state_dict) 32 | 33 | @property 34 | def batch_size(self): 35 | return self._batch_size # For logging at least. 36 | -------------------------------------------------------------------------------- /rlpyt/algos/dqn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/algos/dqn/__init__.py -------------------------------------------------------------------------------- /rlpyt/algos/pg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/algos/pg/__init__.py -------------------------------------------------------------------------------- /rlpyt/algos/qpg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/algos/qpg/__init__.py -------------------------------------------------------------------------------- /rlpyt/algos/qpg/td3.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | 4 | from rlpyt.algos.qpg.ddpg import DDPG 5 | from rlpyt.utils.quick_args import save__init__args 6 | from rlpyt.utils.tensor import valid_mean 7 | 8 | 9 | class TD3(DDPG): 10 | 11 | def __init__( 12 | self, 13 | batch_size=100, 14 | replay_ratio=100, # data_consumption / data_generation 15 | target_update_tau=0.005, 16 | target_update_interval=2, 17 | policy_update_interval=2, 18 | mu_learning_rate=1e-3, 19 | q_learning_rate=1e-3, 20 | **kwargs 21 | ): 22 | _batch_size = batch_size 23 | del batch_size # Property. 24 | save__init__args(locals()) 25 | super().__init__(**kwargs) 26 | 27 | def initialize(self, *args, **kwargs): 28 | super().initialize(*args, **kwargs) 29 | self.agent.give_min_itr_learn(self.min_itr_learn) 30 | 31 | def async_initialize(self, *args, **kwargs): 32 | ret = super().async_initialize(*args, **kwargs) 33 | self.agent.give_min_itr_learn(self.min_itr_learn) 34 | return ret 35 | 36 | def q_loss(self, samples, valid): 37 | q1, q2 = self.agent.q(*samples.agent_inputs, samples.action) 38 | with torch.no_grad(): 39 | target_q1, target_q2 = self.agent.target_q_at_mu( 40 | *samples.target_inputs) # Includes target action noise. 41 | target_q = torch.min(target_q1, target_q2) 42 | disc = self.discount ** self.n_step_return 43 | y = samples.return_ + (1 - samples.done_n.float()) * disc * target_q 44 | q1_losses = 0.5 * (y - q1) ** 2 45 | q2_losses = 0.5 * (y - q2) ** 2 46 | q_loss = valid_mean(q1_losses + q2_losses, valid) # valid can be None. 47 | return q_loss 48 | -------------------------------------------------------------------------------- /rlpyt/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/distributions/__init__.py -------------------------------------------------------------------------------- /rlpyt/distributions/base.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | 4 | from rlpyt.utils.tensor import valid_mean 5 | 6 | DistInfo = None 7 | 8 | 9 | class Distribution: 10 | 11 | @property 12 | def dim(self): 13 | raise NotImplementedError 14 | 15 | def sample(self, dist_info): 16 | raise NotImplementedError 17 | 18 | def kl(self, old_dist_info, new_dist_info): 19 | """ 20 | Compute the KL divergence of two distributions 21 | """ 22 | raise NotImplementedError 23 | 24 | def mean_kl(self, old_dist_info, new_dist_info, valid): 25 | raise NotImplementedError 26 | 27 | def log_likelihood(self, x, dist_info): 28 | raise NotImplementedError 29 | 30 | def likelihood_ratio(self, x, old_dist_info, new_dist_info): 31 | raise NotImplementedError 32 | 33 | def entropy(self, dist_info): 34 | raise NotImplementedError 35 | 36 | def perplexity(self, dist_info): 37 | return torch.exp(self.entropy(dist_info)) 38 | 39 | def mean_entropy(self, dist_info, valid=None): 40 | return valid_mean(self.entropy(dist_info), valid) 41 | 42 | def mean_perplexity(self, dist_info, valid=None): 43 | return valid_mean(self.perplexity(dist_info), valid) 44 | -------------------------------------------------------------------------------- /rlpyt/distributions/categorical.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | 4 | from rlpyt.distributions.base import Distribution 5 | from rlpyt.distributions.discrete import DiscreteMixin 6 | from rlpyt.utils.collections import namedarraytuple 7 | from rlpyt.utils.tensor import valid_mean, select_at_indexes 8 | 9 | EPS = 1e-8 10 | 11 | DistInfo = namedarraytuple("DistInfo", ["prob"]) 12 | 13 | 14 | class Categorical(DiscreteMixin, Distribution): 15 | 16 | def kl(self, old_dist_info, new_dist_info): 17 | p = old_dist_info.prob 18 | q = new_dist_info.prob 19 | return torch.sum(p * (torch.log(p + EPS) - torch.log(q + EPS)), dim=-1) 20 | 21 | def mean_kl(self, old_dist_info, new_dist_info, valid=None): 22 | return valid_mean(self.kl(old_dist_info, new_dist_info), valid) 23 | 24 | def sample(self, dist_info): 25 | p = dist_info.prob 26 | sample = torch.multinomial(p.view(-1, self.dim), num_samples=1) 27 | return sample.view(p.shape[:-1]).type(self.dtype) # Returns indexes. 28 | 29 | def entropy(self, dist_info): 30 | p = dist_info.prob 31 | return -torch.sum(p * torch.log(p + EPS), dim=-1) 32 | 33 | def log_likelihood(self, indexes, dist_info): 34 | selected_likelihood = select_at_indexes(indexes, dist_info.prob) 35 | return torch.log(selected_likelihood + EPS) 36 | 37 | def sample_loglikelihood(self, dist_info): 38 | indexes = self.sample(dist_info) 39 | log_likelihood = self.log_likelihood(indexes, dist_info) 40 | return indexes, log_likelihood 41 | 42 | def likelihood_ratio(self, indexes, old_dist_info, new_dist_info): 43 | num = select_at_indexes(indexes, new_dist_info.prob) 44 | den = select_at_indexes(indexes, old_dist_info.prob) 45 | return (num + EPS) / (den + EPS) 46 | -------------------------------------------------------------------------------- /rlpyt/distributions/discrete.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | 4 | from rlpyt.utils.tensor import to_onehot, from_onehot 5 | 6 | 7 | class DiscreteMixin: 8 | 9 | def __init__(self, dim, dtype=torch.long, onehot_dtype=torch.float): 10 | self._dim = dim 11 | self.dtype = dtype 12 | self.onehot_dtype = onehot_dtype 13 | 14 | @property 15 | def dim(self): 16 | return self._dim 17 | 18 | def to_onehot(self, indexes, dtype=None): 19 | return to_onehot(indexes, self._dim, dtype=dtype or self.onehot_dtype) 20 | 21 | def from_onehot(self, onehot, dtype=None): 22 | return from_onehot(onehot, dtpye=dtype or self.dtype) 23 | -------------------------------------------------------------------------------- /rlpyt/distributions/epsilon_greedy.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | 4 | from rlpyt.distributions.base import Distribution 5 | from rlpyt.distributions.discrete import DiscreteMixin 6 | 7 | 8 | class EpsilonGreedy(DiscreteMixin, Distribution): 9 | """Input can be shaped [T,B,Q] or [B,Q], and vector epsilon of length 10 | B will apply across the Batch dimension (same epsilon for all T).""" 11 | 12 | def __init__(self, epsilon=1, **kwargs): 13 | super().__init__(**kwargs) 14 | self._epsilon = epsilon 15 | 16 | def sample(self, q): 17 | arg_select = torch.argmax(q, dim=-1) 18 | mask = torch.rand(arg_select.shape) < self._epsilon 19 | arg_rand = torch.randint(low=0, high=q.shape[-1], size=(mask.sum(),)) 20 | arg_select[mask] = arg_rand 21 | return arg_select 22 | 23 | @property 24 | def epsilon(self): 25 | return self._epsilon 26 | 27 | def set_epsilon(self, epsilon): 28 | self._epsilon = epsilon 29 | 30 | 31 | class CategoricalEpsilonGreedy(EpsilonGreedy): 32 | """Input p to be shaped [T,B,A,P] or [B,A,P], A: number of actions, 33 | P: number of atoms. Input z is domain of atom-values, shaped [P].""" 34 | 35 | def __init__(self, z=None, **kwargs): 36 | super().__init__(**kwargs) 37 | self.z = z 38 | 39 | def sample(self, p, z=None): 40 | q = torch.tensordot(p, z or self.z, dims=1) 41 | return super().sample(q) 42 | 43 | def set_z(self, z): 44 | self.z = z 45 | -------------------------------------------------------------------------------- /rlpyt/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/envs/__init__.py -------------------------------------------------------------------------------- /rlpyt/envs/atari/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/envs/atari/__init__.py -------------------------------------------------------------------------------- /rlpyt/experiments/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/experiments/__init__.py -------------------------------------------------------------------------------- /rlpyt/experiments/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/experiments/configs/__init__.py -------------------------------------------------------------------------------- /rlpyt/experiments/configs/atari/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/experiments/configs/atari/__init__.py -------------------------------------------------------------------------------- /rlpyt/experiments/configs/atari/dqn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/experiments/configs/atari/dqn/__init__.py -------------------------------------------------------------------------------- /rlpyt/experiments/configs/atari/pg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/experiments/configs/atari/pg/__init__.py -------------------------------------------------------------------------------- /rlpyt/experiments/configs/atari/pg/atari_ff_a2c.py: -------------------------------------------------------------------------------- 1 | 2 | import copy 3 | 4 | configs = dict() 5 | 6 | 7 | config = dict( 8 | agent=dict(), 9 | algo=dict( 10 | discount=0.99, 11 | learning_rate=3e-4, 12 | value_loss_coeff=0.5, 13 | entropy_loss_coeff=0.01, 14 | clip_grad_norm=1., 15 | ), 16 | env=dict(game="pong"), 17 | model=dict(), 18 | optim=dict(), 19 | runner=dict( 20 | n_steps=50e6, 21 | # log_interval_steps=1e3, 22 | ), 23 | sampler=dict( 24 | batch_T=5, 25 | batch_B=32, 26 | max_decorrelation_steps=1000, 27 | ), 28 | ) 29 | 30 | configs["0"] = config 31 | 32 | config = copy.deepcopy(config) 33 | 34 | config["algo"]["learning_rate"] = 4e-4 35 | 36 | configs["low_lr"] = config 37 | 38 | config = copy.deepcopy(configs["0"]) 39 | config["sampler"]["batch_B"] = 16 40 | configs["2gpu"] = config 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/configs/atari/pg/atari_ff_ppo.py: -------------------------------------------------------------------------------- 1 | 2 | configs = dict() 3 | 4 | 5 | config = dict( 6 | agent=dict(), 7 | algo=dict( 8 | discount=0.99, 9 | learning_rate=1e-3, 10 | value_loss_coeff=1., 11 | entropy_loss_coeff=0.01, 12 | clip_grad_norm=1., 13 | gae_lambda=0.98, 14 | linear_lr_schedule=True, 15 | minibatches=4, 16 | epochs=4, 17 | ), 18 | env=dict(game="pong"), 19 | model=dict(), 20 | optim=dict(), 21 | runner=dict( 22 | n_steps=50e6, 23 | # log_interval_steps=1e3, 24 | ), 25 | sampler=dict( 26 | batch_T=64, 27 | batch_B=32, 28 | max_decorrelation_steps=1000, 29 | ), 30 | ) 31 | 32 | configs["0"] = config 33 | -------------------------------------------------------------------------------- /rlpyt/experiments/configs/atari/pg/atari_lstm_a2c.py: -------------------------------------------------------------------------------- 1 | 2 | import copy 3 | 4 | configs = dict() 5 | 6 | 7 | config = dict( 8 | agent=dict(), 9 | algo=dict( 10 | discount=0.99, 11 | learning_rate=3e-4, 12 | value_loss_coeff=0.5, 13 | entropy_loss_coeff=0.01, 14 | clip_grad_norm=1., 15 | ), 16 | env=dict( 17 | game="pong", 18 | num_img_obs=1, 19 | ), 20 | model=dict(), 21 | optim=dict(), 22 | runner=dict( 23 | n_steps=50e6, 24 | # log_interval_steps=1e5, 25 | ), 26 | sampler=dict( 27 | batch_T=20, 28 | batch_B=32, 29 | max_decorrelation_steps=1000, 30 | ), 31 | ) 32 | 33 | configs["0"] = config 34 | 35 | 36 | config = copy.deepcopy(config) 37 | config["env"]["num_img_obs"] = 4 38 | config["sampler"]["batch_T"] = 5 39 | config["sampler"]["batch_B"] = 16 40 | config["algo"]["learning_rate"] = 1e-4 41 | configs["4frame"] = config 42 | 43 | 44 | config = copy.deepcopy(config) 45 | config["algo"]["learning_rate"] = 7e-4 46 | config["sampler"]["batch_B"] = 32 47 | config["algo"]["clip_grad_norm"] = 1 48 | configs["like_ff"] = config 49 | -------------------------------------------------------------------------------- /rlpyt/experiments/configs/atari/pg/atari_lstm_ppo.py: -------------------------------------------------------------------------------- 1 | 2 | configs = dict() 3 | 4 | 5 | config = dict( 6 | agent=dict(), 7 | algo=dict( 8 | discount=0.99, 9 | learning_rate=1e-3, 10 | value_loss_coeff=1., 11 | entropy_loss_coeff=0.01, 12 | clip_grad_norm=1., 13 | gae_lambda=0.98, 14 | linear_lr_schedule=True, 15 | ), 16 | env=dict( 17 | game="pong", 18 | num_img_obs=1, 19 | ), 20 | model=dict(), 21 | optim=dict(), 22 | runner=dict( 23 | n_steps=50e6, 24 | # log_interval_steps=1e3, 25 | ), 26 | sampler=dict( 27 | batch_T=64, 28 | batch_B=32, 29 | max_decorrelation_steps=1000, 30 | ), 31 | ) 32 | 33 | configs["0"] = config 34 | -------------------------------------------------------------------------------- /rlpyt/experiments/configs/mujoco/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/experiments/configs/mujoco/__init__.py -------------------------------------------------------------------------------- /rlpyt/experiments/configs/mujoco/pg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/experiments/configs/mujoco/pg/__init__.py -------------------------------------------------------------------------------- /rlpyt/experiments/configs/mujoco/pg/mujoco_a2c.py: -------------------------------------------------------------------------------- 1 | 2 | import copy 3 | 4 | configs = dict() 5 | 6 | config = dict( 7 | agent=dict(), 8 | algo=dict( 9 | discount=0.99, 10 | learning_rate=3e-5, 11 | clip_grad_norm=1e6, 12 | entropy_loss_coeff=0.0, 13 | value_loss_coeff=0.5, 14 | normalize_advantage=True, 15 | ), 16 | env=dict(id="Hopper-v3"), 17 | model=dict(), 18 | optim=dict(), 19 | runner=dict( 20 | n_steps=1e6, 21 | log_interval_steps=2e4, 22 | ), 23 | sampler=dict( 24 | batch_T=100, 25 | batch_B=8, 26 | max_decorrelation_steps=1000, 27 | ), 28 | ) 29 | 30 | configs["a2c_1M"] = config 31 | 32 | 33 | -------------------------------------------------------------------------------- /rlpyt/experiments/configs/mujoco/pg/mujoco_ppo.py: -------------------------------------------------------------------------------- 1 | 2 | import copy 3 | 4 | configs = dict() 5 | 6 | config = dict( 7 | agent=dict(), 8 | algo=dict( 9 | discount=0.99, 10 | learning_rate=3e-4, 11 | clip_grad_norm=1e6, 12 | entropy_loss_coeff=0.0, 13 | gae_lambda=0.95, 14 | minibatches=32, 15 | epochs=10, 16 | ratio_clip=0.2, 17 | normalize_advantage=True, 18 | linear_lr_schedule=True, 19 | ), 20 | env=dict(id="Hopper-v3"), 21 | model=dict(), 22 | optim=dict(), 23 | runner=dict( 24 | n_steps=1e6, 25 | log_interval_steps=2048 * 10, 26 | ), 27 | sampler=dict( 28 | batch_T=2048, 29 | batch_B=1, 30 | max_decorrelation_steps=0, 31 | ), 32 | ) 33 | 34 | configs["ppo_1M_serial"] = config 35 | 36 | config = copy.deepcopy(configs["ppo_1M_serial"]) 37 | 38 | config = copy.deepcopy(configs["ppo_1M_serial"]) 39 | config["sampler"]["batch_B"] = 8 40 | config["sampler"]["batch_T"] = 256 41 | configs["ppo_1M_cpu"] = config 42 | 43 | config["algo"]["minibatches"] = 1 44 | config["algo"]["epochs"] = 32 45 | configs["ppo_32ep_1mb"] = config 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/configs/mujoco/qpg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/experiments/configs/mujoco/qpg/__init__.py -------------------------------------------------------------------------------- /rlpyt/experiments/configs/mujoco/qpg/mujoco_ddpg.py: -------------------------------------------------------------------------------- 1 | 2 | import copy 3 | 4 | configs = dict() 5 | 6 | config = dict( 7 | agent=dict( 8 | model_kwargs=None, 9 | q_model_kwargs=None, 10 | ), 11 | algo=dict( 12 | discount=0.99, 13 | batch_size=100, 14 | replay_ratio=100, 15 | target_update_tau=0.01, 16 | target_update_interval=1, 17 | policy_update_interval=1, 18 | learning_rate=1e-3, 19 | q_learning_rate=1e-3, 20 | ), 21 | env=dict(id="Hopper-v3"), 22 | # eval_env=dict(id="Hopper-v3"), # Same kwargs as env, in train script. 23 | optim=dict(), 24 | runner=dict( 25 | n_steps=1e6, 26 | log_interval_steps=1e4, 27 | ), 28 | sampler=dict( 29 | batch_T=1, 30 | batch_B=1, 31 | max_decorrelation_steps=0, 32 | eval_n_envs=5, 33 | eval_max_steps=int(51e3), 34 | eval_max_trajectories=50, 35 | ), 36 | ) 37 | 38 | configs["ddpg_from_td3_1M_serial"] = config 39 | 40 | config = copy.deepcopy(config) 41 | config["sampler"]["batch_T"] = 5 42 | config["algo"]["updates_per_sync"] = 1 43 | configs["async_serial"] = config 44 | -------------------------------------------------------------------------------- /rlpyt/experiments/configs/mujoco/qpg/mujoco_sac.py: -------------------------------------------------------------------------------- 1 | 2 | import copy 3 | 4 | configs = dict() 5 | 6 | config = dict( 7 | agent=dict( 8 | model_kwargs=None, 9 | q_model_kwargs=None, 10 | v_model_kwargs=None, 11 | ), 12 | algo=dict( 13 | discount=0.99, 14 | batch_size=256, 15 | replay_ratio=256, 16 | target_update_tau=0.005, 17 | target_update_interval=1, 18 | learning_rate=3e-4, 19 | reparameterize=True, 20 | policy_output_regularization=0.001, 21 | reward_scale=5, 22 | ), 23 | env=dict(id="Hopper-v3"), 24 | # eval_env=dict(id="Hopper-v3"), # Train script uses "env". 25 | model=dict(), 26 | optim=dict(), 27 | runner=dict( 28 | n_steps=1e6, 29 | log_interval_steps=1e4, 30 | ), 31 | sampler=dict( 32 | batch_T=1, 33 | batch_B=1, 34 | max_decorrelation_steps=0, 35 | eval_n_envs=6, 36 | eval_max_steps=int(51e3), 37 | eval_max_trajectories=50, 38 | ), 39 | ) 40 | 41 | configs["sac_1M_serial"] = config 42 | 43 | config = copy.deepcopy(config) 44 | config["sampler"]["batch_T"] = 5 45 | config["sampler"]["batch_B"] = 3 46 | config["algo"]["updates_per_sync"] = 1 47 | configs["async_gpu"] = config 48 | -------------------------------------------------------------------------------- /rlpyt/experiments/configs/mujoco/qpg/mujoco_td3.py: -------------------------------------------------------------------------------- 1 | 2 | import copy 3 | 4 | configs = dict() 5 | 6 | config = dict( 7 | agent=dict( 8 | model_kwargs=None, 9 | q_model_kwargs=None, 10 | ), 11 | algo=dict( 12 | discount=0.99, 13 | batch_size=100, 14 | replay_ratio=100, 15 | target_update_tau=0.005, 16 | target_update_interval=2, 17 | policy_update_interval=2, 18 | learning_rate=1e-3, 19 | q_learning_rate=1e-3, 20 | ), 21 | env=dict(id="Hopper-v3"), 22 | # eval_env=dict(id="Hopper-v3"), # Train script uses "env". 23 | model=dict(), 24 | optim=dict(), 25 | runner=dict( 26 | n_steps=1e6, 27 | log_interval_steps=1e4, 28 | ), 29 | sampler=dict( 30 | batch_T=1, 31 | batch_B=1, 32 | max_decorrelation_steps=0, 33 | eval_n_envs=6, 34 | eval_max_steps=int(51e3), 35 | eval_max_trajectories=50, 36 | ), 37 | ) 38 | 39 | configs["td3_1M_serial"] = config 40 | 41 | config = copy.deepcopy(config) 42 | 43 | config = copy.deepcopy(config) 44 | config["sampler"]["batch_T"] = 5 45 | config["sampler"]["batch_B"] = 3 46 | config["algo"]["updates_per_sync"] = 1 47 | configs["async_cpu"] = config 48 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/dgx/launch_atari_r2d1_async_alt.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=40, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=True, 17 | ) 18 | runs_per_setting = 2 19 | experiment_title = "atari_r2d1_async_alt" 20 | variant_levels = list() 21 | 22 | games = ["pong", "seaquest", "qbert", "chopper_command"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_alt_dgx" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/dgx/launch_atari_r2d1_async_alt_seaquest.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=40, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=True, 17 | ) 18 | runs_per_setting = 1 19 | experiment_title = "atari_r2d1_async_alt" 20 | variant_levels = list() 21 | 22 | games = ["seaquest"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_alt_dgx" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/dgx/launch_atari_r2d1_async_gpu.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=40, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=3, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=False, 17 | ) 18 | runs_per_setting = 1 19 | experiment_title = "atari_r2d1_async_gpu" 20 | variant_levels = list() 21 | 22 | games = ["seaquest"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_gpu_dgx" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/got/launch_atari_dqn_cpu_basic_1of2.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_cpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=4, 10 | hyperthread_offset=20, 11 | n_socket=2, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_dqn_basic_cpusamp_1of2" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest"] #, "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "dqn" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/got/launch_atari_dqn_cpu_basic_2of2.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_cpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=4, 10 | hyperthread_offset=20, 11 | n_socket=2, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_dqn_basic_cpusamp_2of2" 16 | variant_levels = list() 17 | 18 | games = ["qbert", "chopper_command"] # ["pong", "seaquest"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "dqn" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/got/launch_atari_dqn_gpu_pong.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=4, 10 | hyperthread_offset=20, 11 | n_socket=2, 12 | ) 13 | runs_per_setting = 1 14 | experiment_title = "atari_dqn_pong_got" 15 | variant_levels = list() 16 | 17 | 18 | learning_rates = [5e-5, 1e-4, 2.5e-4, 5e-4] 19 | values = list(zip(learning_rates)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("algo", "learning_rate")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "short_run" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/got/launch_atari_r2d1_async_alt.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=True, 17 | ) 18 | runs_per_setting = 2 19 | experiment_title = "atari_r2d1_async_alt" 20 | variant_levels = list() 21 | 22 | games = ["pong", "seaquest", "qbert", "chopper_command"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_alt_got" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/got/launch_atari_r2d1_async_alt_amidar.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=True, 17 | ) 18 | runs_per_setting = 1 19 | experiment_title = "atari_r2d1_async_alt" 20 | variant_levels = list() 21 | 22 | games = ["amidar"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_alt_got" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/got/launch_atari_r2d1_async_alt_gravitar.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=True, 17 | ) 18 | runs_per_setting = 1 19 | experiment_title = "atari_r2d1_async_alt" 20 | variant_levels = list() 21 | 22 | games = ["gravitar"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_alt_got" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/got/launch_atari_r2d1_async_alt_pong.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=True, 17 | ) 18 | runs_per_setting = 1 19 | experiment_title = "atari_r2d1_async_alt" 20 | variant_levels = list() 21 | 22 | games = ["pong"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_alt_got" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/got/launch_atari_r2d1_async_alt_seaquest.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=True, 17 | ) 18 | runs_per_setting = 1 19 | experiment_title = "atari_r2d1_async_alt" 20 | variant_levels = list() 21 | 22 | games = ["seaquest"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_alt_got" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/got/launch_atari_r2d1_async_gpu.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=2, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | ) 17 | runs_per_setting = 2 18 | experiment_title = "atari_r2d1_async_gpu" 19 | variant_levels = list() 20 | 21 | games = ["pong", "seaquest", "qbert", "chopper_command"] 22 | values = list(zip(games)) 23 | dir_names = ["{}".format(*v) for v in values] 24 | keys = [("env", "game")] 25 | variant_levels.append(VariantLevel(keys, values, dir_names)) 26 | 27 | variants, log_dirs = make_variants(*variant_levels) 28 | 29 | default_config_key = "async_gpu" 30 | 31 | run_experiments( 32 | script=script, 33 | affinity_code=affinity_code, 34 | experiment_title=experiment_title, 35 | runs_per_setting=runs_per_setting, 36 | variants=variants, 37 | log_dirs=log_dirs, 38 | common_args=(default_config_key,), 39 | ) 40 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/got/launch_atari_r2d1_async_gpu_amidar.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=2, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=False 17 | ) 18 | runs_per_setting = 2 19 | experiment_title = "atari_r2d1_async_gpu" 20 | variant_levels = list() 21 | 22 | games = ["amidar"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_alt_got" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/got/launch_atari_r2d1_async_gpu_test.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=9, 9 | n_gpu=2, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=1, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | ) 17 | runs_per_setting = 1 18 | experiment_title = "atari_r2d1_async_gpu_test" 19 | variant_levels = list() 20 | 21 | games = ["seaquest"] 22 | values = list(zip(games)) 23 | dir_names = ["{}".format(*v) for v in values] 24 | keys = [("env", "game")] 25 | variant_levels.append(VariantLevel(keys, values, dir_names)) 26 | 27 | variants, log_dirs = make_variants(*variant_levels) 28 | 29 | default_config_key = "r2d1_test" 30 | 31 | run_experiments( 32 | script=script, 33 | affinity_code=affinity_code, 34 | experiment_title=experiment_title, 35 | runs_per_setting=runs_per_setting, 36 | variants=variants, 37 | log_dirs=log_dirs, 38 | common_args=(default_config_key,), 39 | ) 40 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/got/launch_atari_r2d1_long_4tr_gravitar.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=12, 9 | n_gpu=1, 10 | hyperthread_offset=20, 11 | n_socket=1, 12 | ) 13 | runs_per_setting = 2 14 | experiment_title = "atari_r2d1_long_4tr" 15 | variant_levels = list() 16 | 17 | games = ["gravitar"] 18 | values = list(zip(games)) 19 | dir_names = ["{}".format(*v) for v in values] 20 | keys = [("env", "game")] 21 | variant_levels.append(VariantLevel(keys, values, dir_names)) 22 | 23 | variants, log_dirs = make_variants(*variant_levels) 24 | 25 | default_config_key = "r2d1_long_4tr" 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/launch_atari_catdqn_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_catdqn_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=4, 9 | n_gpu=1, 10 | hyperthread_offset=8, 11 | n_socket=1, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_catdqn_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "catdqn" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/launch_atari_dpd_dqn_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=4, 9 | n_gpu=1, 10 | hyperthread_offset=8, 11 | n_socket=1, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_dpd_dqn_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "double_pri_duel" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/launch_atari_dqn_cpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_cpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=4, 9 | n_gpu=1, 10 | hyperthread_offset=8, 11 | n_socket=1, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_dqn_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "dqn" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/launch_atari_dqn_gpu.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=4, 9 | n_gpu=1, 10 | hyperthread_offset=8, 11 | n_socket=1, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_dqn_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "dqn" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/launch_atari_dqn_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=4, 9 | n_gpu=1, 10 | hyperthread_offset=8, 11 | n_socket=1, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_dqn_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "dqn" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/launch_atari_ernbw_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_catdqn_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=4, 9 | n_gpu=1, 10 | hyperthread_offset=8, 11 | n_socket=1, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_ernbw_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "ernbw" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/launch_atari_r2d1_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=4, 9 | n_gpu=1, 10 | hyperthread_offset=8, 11 | n_socket=1, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_r2d1_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "r2d1" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_catdqn_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_catdqn_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=8, 10 | hyperthread_offset=24, 11 | n_socket=2, 12 | ) 13 | runs_per_setting = 2 14 | experiment_title = "atari_catdqn_basic" 15 | variant_levels = list() 16 | 17 | games = ["pong", "seaquest", "qbert", "chopper_command"] 18 | values = list(zip(games)) 19 | dir_names = ["{}".format(*v) for v in values] 20 | keys = [("env", "game")] 21 | variant_levels.append(VariantLevel(keys, values, dir_names)) 22 | 23 | variants, log_dirs = make_variants(*variant_levels) 24 | 25 | default_config_key = "catdqn" 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_dpd_dqn_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=8, 10 | hyperthread_offset=24, 11 | n_socket=2, 12 | ) 13 | runs_per_setting = 2 14 | experiment_title = "atari_dpd_dqn_basic" 15 | variant_levels = list() 16 | 17 | games = ["pong", "seaquest", "qbert", "chopper_command"] 18 | values = list(zip(games)) 19 | dir_names = ["{}".format(*v) for v in values] 20 | keys = [("env", "game")] 21 | variant_levels.append(VariantLevel(keys, values, dir_names)) 22 | 23 | variants, log_dirs = make_variants(*variant_levels) 24 | 25 | default_config_key = "double_pri_duel" 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_dqn_async_cpu.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_async_cpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=8, 10 | async_sample=True, 11 | sample_gpu_per_run=0, 12 | gpu_per_run=1, 13 | hyperthread_offset=24, 14 | # n_socket=2, 15 | ) 16 | runs_per_setting = 2 17 | experiment_title = "atari_dqn_async_cpu" 18 | variant_levels = list() 19 | 20 | games = ["pong", "seaquest", "qbert", "chopper_command"] 21 | values = list(zip(games)) 22 | dir_names = ["{}".format(*v) for v in values] 23 | keys = [("env", "game")] 24 | variant_levels.append(VariantLevel(keys, values, dir_names)) 25 | 26 | variants, log_dirs = make_variants(*variant_levels) 27 | 28 | default_config_key = "cpu" 29 | 30 | run_experiments( 31 | script=script, 32 | affinity_code=affinity_code, 33 | experiment_title=experiment_title, 34 | runs_per_setting=runs_per_setting, 35 | variants=variants, 36 | log_dirs=log_dirs, 37 | common_args=(default_config_key,), 38 | ) 39 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_dqn_async_gpu.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_async_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=8, 10 | async_sample=True, 11 | sample_gpu_per_run=0, 12 | gpu_per_run=1, 13 | hyperthread_offset=24, 14 | optim_sample_share_gpu=True, 15 | # n_socket=2, 16 | ) 17 | runs_per_setting = 2 18 | experiment_title = "atari_dqn_async_gpu" 19 | variant_levels = list() 20 | 21 | games = ["pong", "seaquest", "qbert", "chopper_command"] 22 | values = list(zip(games)) 23 | dir_names = ["{}".format(*v) for v in values] 24 | keys = [("env", "game")] 25 | variant_levels.append(VariantLevel(keys, values, dir_names)) 26 | 27 | variants, log_dirs = make_variants(*variant_levels) 28 | 29 | default_config_key = "cpu" 30 | 31 | run_experiments( 32 | script=script, 33 | affinity_code=affinity_code, 34 | experiment_title=experiment_title, 35 | runs_per_setting=runs_per_setting, 36 | variants=variants, 37 | log_dirs=log_dirs, 38 | common_args=(default_config_key,), 39 | ) 40 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_dqn_async_gpu_scale_pri.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_async_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=8, 10 | async_sample=True, 11 | sample_gpu_per_run=2, 12 | gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | # n_socket=2, 16 | ) 17 | runs_per_setting = 2 18 | experiment_title = "atari_dqn_async_gpu" 19 | variant_levels = list() 20 | 21 | games = ["pong"] # , "seaquest", "qbert", "chopper_command"] 22 | values = list(zip(games)) 23 | dir_names = ["{}".format(*v) for v in values] 24 | keys = [("env", "game")] 25 | variant_levels.append(VariantLevel(keys, values, dir_names)) 26 | 27 | priorities = [False, True] 28 | values = list(zip(priorities)) 29 | dir_names = ["pri_{}".format(*v) for v in values] 30 | keys = [("algo", "prioritized_replay")] 31 | variant_levels.append(VariantLevel(keys, values, dir_names)) 32 | 33 | variants, log_dirs = make_variants(*variant_levels) 34 | 35 | default_config_key = "async_big" 36 | 37 | run_experiments( 38 | script=script, 39 | affinity_code=affinity_code, 40 | experiment_title=experiment_title, 41 | runs_per_setting=runs_per_setting, 42 | variants=variants, 43 | log_dirs=log_dirs, 44 | common_args=(default_config_key,), 45 | ) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_dqn_async_serial.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_async_serial.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=8, 10 | async_sample=True, 11 | sample_gpu_per_run=0, 12 | gpu_per_run=1, 13 | hyperthread_offset=24, 14 | # n_socket=2, 15 | ) 16 | runs_per_setting = 2 17 | experiment_title = "atari_dqn_async_serial" 18 | variant_levels = list() 19 | 20 | games = ["pong", "seaquest", "qbert", "chopper_command"] 21 | values = list(zip(games)) 22 | dir_names = ["{}".format(*v) for v in values] 23 | keys = [("env", "game")] 24 | variant_levels.append(VariantLevel(keys, values, dir_names)) 25 | 26 | variants, log_dirs = make_variants(*variant_levels) 27 | 28 | default_config_key = "cpu" # "serial" 29 | 30 | run_experiments( 31 | script=script, 32 | affinity_code=affinity_code, 33 | experiment_title=experiment_title, 34 | runs_per_setting=runs_per_setting, 35 | variants=variants, 36 | log_dirs=log_dirs, 37 | common_args=(default_config_key,), 38 | ) 39 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_dqn_cpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_cpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=24, 9 | n_gpu=6, 10 | # hyperthread_offset=24, 11 | n_socket=2, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_dqn_basic_cpu" 16 | variant_levels = list() 17 | 18 | games = ["pong", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "dqn" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_dqn_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=8, 10 | hyperthread_offset=24, 11 | n_socket=2, 12 | ) 13 | runs_per_setting = 2 14 | experiment_title = "atari_dqn_basic" 15 | variant_levels = list() 16 | 17 | games = ["pong", "seaquest", "qbert", "chopper_command"] 18 | values = list(zip(games)) 19 | dir_names = ["{}".format(*v) for v in values] 20 | keys = [("env", "game")] 21 | variant_levels.append(VariantLevel(keys, values, dir_names)) 22 | 23 | variants, log_dirs = make_variants(*variant_levels) 24 | 25 | default_config_key = "dqn" 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_dqn_gpu_noeval.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_gpu_noeval.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=8, 10 | hyperthread_offset=24, 11 | n_socket=2, 12 | ) 13 | runs_per_setting = 2 14 | experiment_title = "atari_dqn_no_eval" 15 | variant_levels = list() 16 | 17 | games = ["pong", "seaquest", "qbert", "chopper_command"] 18 | values = list(zip(games)) 19 | dir_names = ["{}".format(*v) for v in values] 20 | keys = [("env", "game")] 21 | variant_levels.append(VariantLevel(keys, values, dir_names)) 22 | 23 | variants, log_dirs = make_variants(*variant_levels) 24 | 25 | default_config_key = "no_eval" 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_dqn_serial.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_serial.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=8, 10 | hyperthread_offset=24, 11 | n_socket=2, 12 | ) 13 | runs_per_setting = 2 14 | experiment_title = "atari_dqn_basic_serial" 15 | variant_levels = list() 16 | 17 | games = ["pong", "seaquest", "qbert", "chopper_command"] 18 | values = list(zip(games)) 19 | dir_names = ["{}".format(*v) for v in values] 20 | keys = [("env", "game")] 21 | variant_levels.append(VariantLevel(keys, values, dir_names)) 22 | 23 | variants, log_dirs = make_variants(*variant_levels) 24 | 25 | default_config_key = "dqn" 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_ernbw_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_catdqn_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=8, 10 | hyperthread_offset=24, 11 | n_socket=2, 12 | ) 13 | runs_per_setting = 2 14 | experiment_title = "atari_ernbw_basic" 15 | variant_levels = list() 16 | 17 | games = ["pong", "seaquest", "qbert", "chopper_command"] 18 | values = list(zip(games)) 19 | dir_names = ["{}".format(*v) for v in values] 20 | keys = [("env", "game")] 21 | variant_levels.append(VariantLevel(keys, values, dir_names)) 22 | 23 | variants, log_dirs = make_variants(*variant_levels) 24 | 25 | default_config_key = "ernbw" 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_r2d1_async_alt.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=True, 17 | ) 18 | runs_per_setting = 2 19 | experiment_title = "atari_r2d1_async_alt" 20 | variant_levels = list() 21 | 22 | games = ["pong", "seaquest", "qbert", "chopper_command"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_alt_pabti" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_r2d1_async_alt_chopper_command.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=True, 17 | ) 18 | runs_per_setting = 1 19 | experiment_title = "atari_r2d1_async_alt" 20 | variant_levels = list() 21 | 22 | games = ["chopper_command"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_alt_pabti" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_r2d1_async_alt_gravitar.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=True, 17 | ) 18 | runs_per_setting = 1 19 | experiment_title = "atari_r2d1_async_alt" 20 | variant_levels = list() 21 | 22 | games = ["gravitar"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_alt_pabti" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_r2d1_async_alt_qbert.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_alt.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=True, 17 | ) 18 | runs_per_setting = 1 19 | experiment_title = "atari_r2d1_async_alt" 20 | variant_levels = list() 21 | 22 | games = ["qbert"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_alt_pabti" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_r2d1_async_gpu_qbert.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_async_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=4, 10 | async_sample=True, 11 | gpu_per_run=1, 12 | sample_gpu_per_run=2, 13 | # hyperthread_offset=24, 14 | # optim_sample_share_gpu=True, 15 | n_socket=1, # Force this. 16 | alternating=False, 17 | ) 18 | runs_per_setting = 1 19 | experiment_title = "atari_r2d1_async_gpu" 20 | variant_levels = list() 21 | 22 | games = ["qbert"] 23 | values = list(zip(games)) 24 | dir_names = ["{}".format(*v) for v in values] 25 | keys = [("env", "game")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | default_config_key = "async_alt_pabti" 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_r2d1_long_4tr_asteroids.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=24, 9 | n_gpu=2, 10 | hyperthread_offset=24, 11 | n_socket=2, 12 | ) 13 | runs_per_setting = 2 14 | experiment_title = "atari_r2d1_long_4tr" 15 | variant_levels = list() 16 | 17 | games = ["asteroids"] 18 | values = list(zip(games)) 19 | dir_names = ["{}".format(*v) for v in values] 20 | keys = [("env", "game")] 21 | variant_levels.append(VariantLevel(keys, values, dir_names)) 22 | 23 | variants, log_dirs = make_variants(*variant_levels) 24 | 25 | default_config_key = "r2d1_long_4tr" 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_r2d1_long_4tr_chopper_command.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=24, 9 | n_gpu=2, 10 | hyperthread_offset=24, 11 | n_socket=2, 12 | ) 13 | runs_per_setting = 2 14 | experiment_title = "atari_r2d1_long_4tr" 15 | variant_levels = list() 16 | 17 | games = ["chopper_command"] 18 | values = list(zip(games)) 19 | dir_names = ["{}".format(*v) for v in values] 20 | keys = [("env", "game")] 21 | variant_levels.append(VariantLevel(keys, values, dir_names)) 22 | 23 | variants, log_dirs = make_variants(*variant_levels) 24 | 25 | default_config_key = "r2d1_long_4tr" 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_r2d1_long_4tr_gravitar.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=24, 9 | n_gpu=2, 10 | hyperthread_offset=24, 11 | n_socket=2, 12 | ) 13 | runs_per_setting = 2 14 | experiment_title = "atari_r2d1_long_4tr" 15 | variant_levels = list() 16 | 17 | games = ["gravitar"] 18 | values = list(zip(games)) 19 | dir_names = ["{}".format(*v) for v in values] 20 | keys = [("env", "game")] 21 | variant_levels.append(VariantLevel(keys, values, dir_names)) 22 | 23 | variants, log_dirs = make_variants(*variant_levels) 24 | 25 | default_config_key = "r2d1_long_4tr" 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_r2d1_long_4tr_seaquest.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=24, 9 | n_gpu=2, 10 | hyperthread_offset=24, 11 | n_socket=2, 12 | ) 13 | runs_per_setting = 2 14 | experiment_title = "atari_r2d1_long_4tr" 15 | variant_levels = list() 16 | 17 | games = ["seaquest"] 18 | values = list(zip(games)) 19 | dir_names = ["{}".format(*v) for v in values] 20 | keys = [("env", "game")] 21 | variant_levels.append(VariantLevel(keys, values, dir_names)) 22 | 23 | variants, log_dirs = make_variants(*variant_levels) 24 | 25 | default_config_key = "r2d1_long_4tr" 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_r2d1_long_gt_ad.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=24, 9 | n_gpu=4, 10 | hyperthread_offset=24, 11 | n_socket=2, 12 | ) 13 | runs_per_setting = 2 14 | experiment_title = "atari_r2d1_long" 15 | variant_levels = list() 16 | 17 | games = ["gravitar", "asteroids"] 18 | values = list(zip(games)) 19 | dir_names = ["{}".format(*v) for v in values] 20 | keys = [("env", "game")] 21 | variant_levels.append(VariantLevel(keys, values, dir_names)) 22 | 23 | variants, log_dirs = make_variants(*variant_levels) 24 | 25 | default_config_key = "r2d1_long" 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/launch/pabti/launch_atari_r2d1_long_sq_cc.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/dqn/train/atari_r2d1_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=24, 9 | n_gpu=4, 10 | hyperthread_offset=24, 11 | n_socket=2, 12 | ) 13 | runs_per_setting = 2 14 | experiment_title = "atari_r2d1_long" 15 | variant_levels = list() 16 | 17 | games = ["seaquest", "chopper_command"] 18 | values = list(zip(games)) 19 | dir_names = ["{}".format(*v) for v in values] 20 | keys = [("env", "game")] 21 | variant_levels.append(VariantLevel(keys, values, dir_names)) 22 | 23 | variants, log_dirs = make_variants(*variant_levels) 24 | 25 | default_config_key = "r2d1_long" 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_gpu_noeval.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.gpu.parallel_sampler import GpuParallelSampler 6 | from rlpyt.samplers.gpu.collectors import WaitResetCollector 7 | from rlpyt.envs.atari.atari_env import AtariEnv, AtariTrajInfo 8 | from rlpyt.algos.dqn.dqn import DQN 9 | from rlpyt.agents.dqn.atari.atari_dqn_agent import AtariDqnAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.atari.dqn.atari_dqn import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | config["eval_env"]["game"] = config["env"]["game"] 23 | 24 | sampler = GpuParallelSampler( 25 | EnvCls=AtariEnv, 26 | env_kwargs=config["env"], 27 | CollectorCls=WaitResetCollector, 28 | TrajInfoCls=AtariTrajInfo, 29 | eval_env_kwargs=config["eval_env"], 30 | **config["sampler"] 31 | ) 32 | algo = DQN(optim_kwargs=config["optim"], **config["algo"]) 33 | agent = AtariDqnAgent(model_kwargs=config["model"], **config["agent"]) 34 | runner = MinibatchRl( 35 | algo=algo, 36 | agent=agent, 37 | sampler=sampler, 38 | affinity=affinity, 39 | **config["runner"] 40 | ) 41 | name = config["env"]["game"] 42 | with logger_context(log_dir, run_ID, name, config): 43 | runner.train() 44 | 45 | 46 | if __name__ == "__main__": 47 | build_and_train(*sys.argv[1:]) 48 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/dqn/train/atari_dqn_serial.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.serial_sampler import SerialSampler 6 | from rlpyt.samplers.cpu.collectors import ResetCollector 7 | from rlpyt.envs.atari.atari_env import AtariEnv, AtariTrajInfo 8 | from rlpyt.algos.dqn.dqn import DQN 9 | from rlpyt.agents.dqn.atari.atari_dqn_agent import AtariDqnAgent 10 | from rlpyt.runners.minibatch_rl_eval import MinibatchRlEval 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.atari.dqn.atari_dqn import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | config["eval_env"]["game"] = config["env"]["game"] 23 | 24 | sampler = SerialSampler( 25 | EnvCls=AtariEnv, 26 | env_kwargs=config["env"], 27 | CollectorCls=ResetCollector, 28 | TrajInfoCls=AtariTrajInfo, 29 | eval_env_kwargs=config["eval_env"], 30 | **config["sampler"] 31 | ) 32 | algo = DQN(optim_kwargs=config["optim"], **config["algo"]) 33 | agent = AtariDqnAgent(model_kwargs=config["model"], **config["agent"]) 34 | runner = MinibatchRlEval( 35 | algo=algo, 36 | agent=agent, 37 | sampler=sampler, 38 | affinity=affinity, 39 | **config["runner"] 40 | ) 41 | name = config["env"]["game"] 42 | with logger_context(log_dir, run_ID, name, config): 43 | runner.train() 44 | 45 | 46 | if __name__ == "__main__": 47 | build_and_train(*sys.argv[1:]) 48 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/got/launch_atari_ff_a2c_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=4, 10 | hyperthread_offset=20, 11 | n_socket=2 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_ff_a2c_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "0" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/got/launch_atari_ff_a2c_gpu_low_lr.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=4, 10 | hyperthread_offset=20, 11 | n_socket=2 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_ff_a2c_low_lr" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "low_lr" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/got/launch_atari_ff_a2c_gpu_multi.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu_multi.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=16, 9 | n_gpu=4, 10 | hyperthread_offset=20, 11 | n_socket=2, 12 | gpu_per_run=2, 13 | # cpu_per_run=2, 14 | ) 15 | runs_per_setting = 2 16 | experiment_title = "atari_ff_a2c_multi" 17 | variant_levels = list() 18 | 19 | games = ["pong", "seaquest", "qbert", "chopper_command"] 20 | values = list(zip(games)) 21 | dir_names = ["{}".format(*v) for v in values] 22 | keys = [("env", "game")] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | default_config_key = "2gpu" 28 | 29 | run_experiments( 30 | script=script, 31 | affinity_code=affinity_code, 32 | experiment_title=experiment_title, 33 | runs_per_setting=runs_per_setting, 34 | variants=variants, 35 | log_dirs=log_dirs, 36 | common_args=(default_config_key,), 37 | ) 38 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/got/launch_atari_ff_ppo_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_ppo_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=4, 10 | hyperthread_offset=20, 11 | n_socket=2 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_ff_ppo_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "0" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/got/launch_atari_lstm_a2c_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_lstm_a2c_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=4, 10 | hyperthread_offset=20, 11 | n_socket=2 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_lstm_a2c_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "0" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/got/launch_atari_lstm_ppo_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_lstm_ppo_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=4, 10 | hyperthread_offset=20, 11 | n_socket=2 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_lstm_ppo_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "0" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/launch_atari_ff_a2c_cpu.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_cpu.py" 7 | default_config_key = "0" 8 | affinity_code = encode_affinity( 9 | n_cpu_cores=4, 10 | n_gpu=2, 11 | hyperthread_offset=8, 12 | n_socket=1, 13 | # cpu_per_run=4, 14 | ) 15 | runs_per_setting = 2 16 | experiment_title = "ff_retest_GPU_opt" 17 | variant_levels = list() 18 | 19 | learning_rate = [7e-4] 20 | batch_B = [32] 21 | values = list(zip(learning_rate, batch_B)) 22 | dir_names = ["test_{}lr_{}B".format(*v) for v in values] 23 | keys = [("algo", "learning_rate"), ("sampler", "batch_B")] 24 | variant_levels.append(VariantLevel(keys, values, dir_names)) 25 | 26 | 27 | games = ["pong", "seaquest"] 28 | values = list(zip(games)) 29 | dir_names = ["{}".format(*v) for v in values] 30 | keys = [("env", "game")] 31 | variant_levels.append(VariantLevel(keys, values, dir_names)) 32 | 33 | variants, log_dirs = make_variants(*variant_levels) 34 | 35 | run_experiments( 36 | script=script, 37 | affinity_code=affinity_code, 38 | experiment_title=experiment_title, 39 | runs_per_setting=runs_per_setting, 40 | variants=variants, 41 | log_dirs=log_dirs, 42 | common_args=(default_config_key,), 43 | ) 44 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/launch_atari_ff_a2c_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=4, 9 | n_gpu=2, 10 | hyperthread_offset=8, 11 | n_socket=1, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_ff_a2c_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "0" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/launch_atari_ff_a2c_gpu_multi.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu_multi.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=8, 9 | n_gpu=2, 10 | hyperthread_offset=8, 11 | n_socket=1, 12 | contexts_per_run=2, 13 | # cpu_per_run=2, 14 | ) 15 | runs_per_setting = 1 16 | experiment_title = "atari_ff_a2c_multi" 17 | variant_levels = list() 18 | 19 | games = ["pong"] # , "seaquest", "qbert", "chopper_command"] 20 | values = list(zip(games)) 21 | dir_names = ["{}".format(*v) for v in values] 22 | keys = [("env", "game")] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | default_config_key = "0" 28 | 29 | run_experiments( 30 | script=script, 31 | affinity_code=affinity_code, 32 | experiment_title=experiment_title, 33 | runs_per_setting=runs_per_setting, 34 | variants=variants, 35 | log_dirs=log_dirs, 36 | common_args=(default_config_key,), 37 | ) 38 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/launch_atari_ff_ppo_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_ppo_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=4, 9 | n_gpu=2, 10 | hyperthread_offset=8, 11 | n_socket=1, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_ff_ppo_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "0" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/launch_atari_lstm_a2c_gpu.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_lstm_a2c_gpu.py" 7 | # default_config_key = "0" 8 | affinity_code = encode_affinity( 9 | n_cpu_cores=6, 10 | n_gpu=2, 11 | hyperthread_offset=8, 12 | n_socket=1, 13 | # cpu_per_run=2, 14 | ) 15 | runs_per_setting = 1 16 | experiment_title = "lstm_4frame_test" 17 | variant_levels = list() 18 | 19 | learning_rate = [1e-4] * 4 20 | entropy_loss_coeff = [0.01, 0.4, 0.04, 0.1] 21 | values = list(zip(learning_rate, entropy_loss_coeff)) 22 | dir_names = ["test_{}lr_{}ent".format(*v) for v in values] 23 | keys = [("algo", "learning_rate"), ("algo", "entropy_loss_coeff")] 24 | variant_levels.append(VariantLevel(keys, values, dir_names)) 25 | 26 | 27 | games = ["seaquest"] 28 | values = list(zip(games)) 29 | dir_names = ["{}".format(*v) for v in values] 30 | keys = [("env", "game")] 31 | variant_levels.append(VariantLevel(keys, values, dir_names)) 32 | 33 | variants, log_dirs = make_variants(*variant_levels) 34 | 35 | default_config_key = "4frame" 36 | 37 | run_experiments( 38 | script=script, 39 | affinity_code=affinity_code, 40 | experiment_title=experiment_title, 41 | runs_per_setting=runs_per_setting, 42 | variants=variants, 43 | log_dirs=log_dirs, 44 | common_args=(default_config_key,), 45 | ) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/launch_atari_lstm_a2c_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_lstm_a2c_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=4, 9 | n_gpu=2, 10 | hyperthread_offset=8, 11 | n_socket=1, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_lstm_a2c_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "0" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/launch_atari_lstm_ppo_gpu_basic.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_lstm_ppo_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=4, 9 | n_gpu=2, 10 | hyperthread_offset=8, 11 | n_socket=1, 12 | # cpu_per_run=2, 13 | ) 14 | runs_per_setting = 2 15 | experiment_title = "atari_lstm_ppo_basic" 16 | variant_levels = list() 17 | 18 | games = ["pong", "seaquest", "qbert", "chopper_command"] 19 | values = list(zip(games)) 20 | dir_names = ["{}".format(*v) for v in values] 21 | keys = [("env", "game")] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | variants, log_dirs = make_variants(*variant_levels) 25 | 26 | default_config_key = "0" 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/launch/pabti/launch_atari_ff_a2c_gpu_multi.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu_multi.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=8, 10 | hyperthread_offset=24, 11 | n_socket=2, 12 | contexts_per_run=2, 13 | # cpu_per_run=2, 14 | ) 15 | runs_per_setting = 1 16 | experiment_title = "atari_ff_a2c_multi" 17 | variant_levels = list() 18 | 19 | games = ["pong", "seaquest", "qbert", "chopper_command"] 20 | values = list(zip(games)) 21 | dir_names = ["{}".format(*v) for v in values] 22 | keys = [("env", "game")] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | default_config_key = "0" 28 | 29 | run_experiments( 30 | script=script, 31 | affinity_code=affinity_code, 32 | experiment_title=experiment_title, 33 | runs_per_setting=runs_per_setting, 34 | variants=variants, 35 | log_dirs=log_dirs, 36 | common_args=(default_config_key,), 37 | ) 38 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_cpu.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.cpu.parallel_sampler import CpuParallelSampler 6 | from rlpyt.samplers.cpu.episodic_lives_collectors import EpisodicLivesWaitResetCollector 7 | from rlpyt.envs.atari.atari_env import AtariEnv, AtariTrajInfo 8 | from rlpyt.algos.policy_gradient.a2c import A2C 9 | from rlpyt.agents.policy_gradient.atari.atari_ff_agent import AtariFfAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.atari.pg.atari_ff_a2c import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | 23 | sampler = CpuParallelSampler( 24 | EnvCls=AtariEnv, 25 | env_kwargs=config["env"], 26 | CollectorCls=EpisodicLivesWaitResetCollector, 27 | TrajInfoCls=AtariTrajInfo, 28 | **config["sampler"] 29 | ) 30 | algo = A2C(optim_kwargs=config["optim"], **config["algo"]) 31 | agent = AtariFfAgent(model_kwargs=config["model"], **config["agent"]) 32 | runner = MinibatchRl( 33 | algo=algo, 34 | agent=agent, 35 | sampler=sampler, 36 | affinity=affinity, 37 | **config["runner"] 38 | ) 39 | name = config["env"]["game"] 40 | with logger_context(log_dir, run_ID, name, config): # Might have to flatten config 41 | runner.train() 42 | 43 | 44 | if __name__ == "__main__": 45 | build_and_train(*sys.argv[1:]) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_cpu_test.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.cpu.parallel_sampler import CpuParallelSampler 6 | from rlpyt.samplers.cpu.episodic_lives_collectors import EpisodicLivesWaitResetCollector 7 | from rlpyt.envs.atari.atari_env import AtariEnv, AtariTrajInfo 8 | from rlpyt.algos.policy_gradient.a2c import A2C 9 | from rlpyt.agents.policy_gradient.atari.atari_ff_agent import AtariFfAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.atari.pg.atari_ff_a2c import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | # variant = load_variant(log_dir) 21 | # config = update_config(config, variant) 22 | 23 | sampler = CpuParallelSampler( 24 | EnvCls=AtariEnv, 25 | env_kwargs=config["env"], 26 | CollectorCls=EpisodicLivesWaitResetCollector, 27 | TrajInfoCls=AtariTrajInfo, 28 | **config["sampler"] 29 | ) 30 | algo = A2C(optim_kwargs=config["optim"], **config["algo"]) 31 | agent = AtariFfAgent(model_kwargs=config["model"], **config["agent"]) 32 | runner = MinibatchRl( 33 | algo=algo, 34 | agent=agent, 35 | sampler=sampler, 36 | affinity=affinity, 37 | **config["runner"] 38 | ) 39 | name = config["env"]["game"] 40 | with logger_context(log_dir, run_ID, name, config): # Might have to flatten config 41 | runner.train() 42 | 43 | 44 | if __name__ == "__main__": 45 | build_and_train(*sys.argv[1:]) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.parallel.gpu.sampler import GpuSampler 6 | from rlpyt.samplers.parallel.gpu.collectors import GpuWaitResetCollector 7 | from rlpyt.envs.atari.atari_env import AtariEnv, AtariTrajInfo 8 | from rlpyt.algos.pg.a2c import A2C 9 | from rlpyt.agents.pg.atari import AtariFfAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.atari.pg.atari_ff_a2c import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | 23 | sampler = GpuSampler( 24 | EnvCls=AtariEnv, 25 | env_kwargs=config["env"], 26 | CollectorCls=GpuWaitResetCollector, 27 | TrajInfoCls=AtariTrajInfo, 28 | **config["sampler"] 29 | ) 30 | algo = A2C(optim_kwargs=config["optim"], **config["algo"]) 31 | agent = AtariFfAgent(model_kwargs=config["model"], **config["agent"]) 32 | runner = MinibatchRl( 33 | algo=algo, 34 | agent=agent, 35 | sampler=sampler, 36 | affinity=affinity, 37 | **config["runner"] 38 | ) 39 | name = config["env"]["game"] 40 | with logger_context(log_dir, run_ID, name, config): 41 | runner.train() 42 | 43 | 44 | if __name__ == "__main__": 45 | build_and_train(*sys.argv[1:]) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/train/atari_ff_a2c_gpu_multi.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.parallel.gpu.sampler import GpuSampler 6 | from rlpyt.samplers.parallel.gpu.collectors import GpuWaitResetCollector 7 | from rlpyt.envs.atari.atari_env import AtariEnv, AtariTrajInfo 8 | from rlpyt.algos.pg.a2c import A2C 9 | from rlpyt.agents.pg.atari import AtariFfAgent 10 | from rlpyt.runners.sync_rl import SyncRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.atari.pg.atari_ff_a2c import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | assert isinstance(affinity, list) # One for each GPU. 20 | config = configs[config_key] 21 | variant = load_variant(log_dir) 22 | config = update_config(config, variant) 23 | 24 | sampler = GpuSampler( 25 | EnvCls=AtariEnv, 26 | env_kwargs=config["env"], 27 | CollectorCls=GpuWaitResetCollector, 28 | TrajInfoCls=AtariTrajInfo, 29 | **config["sampler"] 30 | ) 31 | algo = A2C(optim_kwargs=config["optim"], **config["algo"]) 32 | agent = AtariFfAgent(model_kwargs=config["model"], **config["agent"]) 33 | runner = SyncRl( 34 | algo=algo, 35 | agent=agent, 36 | sampler=sampler, 37 | affinity=affinity, 38 | **config["runner"] 39 | ) 40 | name = config["env"]["game"] 41 | with logger_context(log_dir, run_ID, name, config): 42 | runner.train() 43 | 44 | 45 | if __name__ == "__main__": 46 | build_and_train(*sys.argv[1:]) 47 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/train/atari_ff_ppo_gpu.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.parallel.gpu.sampler import GpuSampler 6 | from rlpyt.samplers.parallel.gpu.collectors import GpuWaitResetCollector 7 | from rlpyt.envs.atari.atari_env import AtariEnv, AtariTrajInfo 8 | from rlpyt.algos.pg.ppo import PPO 9 | from rlpyt.agents.pg.atari import AtariFfAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.atari.pg.atari_ff_ppo import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | 23 | sampler = GpuSampler( 24 | EnvCls=AtariEnv, 25 | env_kwargs=config["env"], 26 | CollectorCls=GpuWaitResetCollector, 27 | TrajInfoCls=AtariTrajInfo, 28 | **config["sampler"] 29 | ) 30 | algo = PPO(optim_kwargs=config["optim"], **config["algo"]) 31 | agent = AtariFfAgent(model_kwargs=config["model"], **config["agent"]) 32 | runner = MinibatchRl( 33 | algo=algo, 34 | agent=agent, 35 | sampler=sampler, 36 | affinity=affinity, 37 | **config["runner"] 38 | ) 39 | name = config["env"]["game"] 40 | with logger_context(log_dir, run_ID, name, config): 41 | runner.train() 42 | 43 | 44 | if __name__ == "__main__": 45 | build_and_train(*sys.argv[1:]) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/train/atari_lstm_a2c_cpu_test.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.cpu.parallel_sampler import CpuParallelSampler 6 | from rlpyt.samplers.cpu.episodic_lives_collectors import EpisodicLivesWaitResetCollector 7 | from rlpyt.envs.atari.atari_env import AtariEnv 8 | from rlpyt.algos.policy_gradient.a2c import A2C 9 | from rlpyt.agents.policy_gradient.atari.atari_lstm_agent import AtariLstmAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.atari.pg.atari_lstm_a2c import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | # variant = load_variant(log_dir) 21 | # config = update_config(config, variant) 22 | 23 | sampler = CpuParallelSampler( 24 | EnvCls=AtariEnv, 25 | env_kwargs=config["env"], 26 | CollectorCls=EpisodicLivesWaitResetCollector, 27 | **config["sampler"] 28 | ) 29 | algo = A2C(optim_kwargs=config["optim"], **config["algo"]) 30 | agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"]) 31 | runner = MinibatchRl( 32 | algo=algo, 33 | agent=agent, 34 | sampler=sampler, 35 | affinity=affinity, 36 | **config["runner"] 37 | ) 38 | name = config["env"]["game"] + str(config["algo"]["entropy_loss_coeff"]) 39 | with logger_context(log_dir, run_ID, name, config): 40 | runner.train() 41 | 42 | 43 | if __name__ == "__main__": 44 | build_and_train(*sys.argv[1:]) 45 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/train/atari_lstm_a2c_gpu.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.parallel.gpu.sampler import GpuSampler 6 | from rlpyt.samplers.parallel.gpu.collectors import GpuWaitResetCollector 7 | from rlpyt.envs.atari.atari_env import AtariEnv, AtariTrajInfo 8 | from rlpyt.algos.pg.a2c import A2C 9 | from rlpyt.agents.pg.atari import AtariLstmAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.atari.pg.atari_lstm_a2c import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | 23 | sampler = GpuSampler( 24 | EnvCls=AtariEnv, 25 | env_kwargs=config["env"], 26 | CollectorCls=GpuWaitResetCollector, 27 | TrajInfoCls=AtariTrajInfo, 28 | **config["sampler"] 29 | ) 30 | algo = A2C(optim_kwargs=config["optim"], **config["algo"]) 31 | agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"]) 32 | runner = MinibatchRl( 33 | algo=algo, 34 | agent=agent, 35 | sampler=sampler, 36 | affinity=affinity, 37 | **config["runner"] 38 | ) 39 | name = config["env"]["game"] 40 | with logger_context(log_dir, run_ID, name, config): 41 | runner.train() 42 | 43 | 44 | if __name__ == "__main__": 45 | build_and_train(*sys.argv[1:]) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/atari/pg/train/atari_lstm_ppo_gpu.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.gpu.parallel_sampler import GpuParallelSampler 6 | from rlpyt.samplers.gpu.collectors import WaitResetCollector 7 | from rlpyt.envs.atari.atari_env import AtariEnv, AtariTrajInfo 8 | from rlpyt.algos.pg.ppo import PPO 9 | from rlpyt.agents.pg.atari import AtariLstmAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.atari.pg.atari_lstm_ppo import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | 23 | sampler = GpuParallelSampler( 24 | EnvCls=AtariEnv, 25 | env_kwargs=config["env"], 26 | CollectorCls=WaitResetCollector, 27 | TrajInfoCls=AtariTrajInfo, 28 | **config["sampler"] 29 | ) 30 | algo = PPO(optim_kwargs=config["optim"], **config["algo"]) 31 | agent = AtariLstmAgent(model_kwargs=config["model"], **config["agent"]) 32 | runner = MinibatchRl( 33 | algo=algo, 34 | agent=agent, 35 | sampler=sampler, 36 | affinity=affinity, 37 | **config["runner"] 38 | ) 39 | name = config["env"]["game"] 40 | with logger_context(log_dir, run_ID, name, config): 41 | runner.train() 42 | 43 | 44 | if __name__ == "__main__": 45 | build_and_train(*sys.argv[1:]) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/cloth_env_overview.txt: -------------------------------------------------------------------------------- 1 | Cloth_v0: The agent can actuate all four corners at the same time 2 | Cloth_v7: The agent picks both the location and force, location on a 1x1 grid which is then mapped to the nearest joint 3 | Cloth_v8: The environment picks a random joint to activate, which is incorporated in to the state observation 4 | 5 | cloth_sim_state: new cloth environment, state observations + random joint position to perturb 6 | cloth_sim: new cloth environment, pixel observations + perturb random point on image -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_pixels_cloth_sim.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | contexts_per_gpu=1, 11 | ) 12 | 13 | runs_per_setting = 2 14 | default_config_key = "sac_pixels_cloth_sim" 15 | experiment_title = "sac_dm_control_pixels_cloth_sim" 16 | variant_levels = list() 17 | 18 | domains = ['cloth_sim'] 19 | tasks = ['easy'] 20 | values = list(zip(domains, tasks)) 21 | dir_names = ['domain_{}_task_{}'.format(*v) for v in values] 22 | keys = [('env', 'domain'), ('env', 'task')] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_pixels_clothv0.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | contexts_per_gpu=1, 11 | n_socket=2, 12 | ) 13 | 14 | runs_per_setting = 2 15 | default_config_key = "sac_pixels_clothv0" 16 | experiment_title = "sac_dm_control_pixels_clothv0" 17 | variant_levels = list() 18 | 19 | domain = ['cloth_v0'] 20 | task = ['easy'] 21 | values = list(zip(domain, task)) 22 | dir_names = ["env_{}_{}".format(*v) for v in values] 23 | keys = [('env', 'domain'), ('env', 'task')] 24 | variant_levels.append(VariantLevel(keys, values, dir_names)) 25 | 26 | batch_B = [2, 32] 27 | batch_size = [256, 1024] 28 | learning_rate = [3e-4, 6e-4] 29 | replay_ratio = [128, 128] 30 | values = list(zip(batch_B, batch_size, learning_rate, replay_ratio)) 31 | dir_names = ["batch_B{}_bs{}_lr{}_ratio{}".format(*v) for v in values] 32 | keys = [('sampler', 'batch_B'), ('algo', 'batch_size'), ('algo', 'learning_rate'), ('algo', 'replay_ratio')] 33 | variant_levels.append(VariantLevel(keys, values, dir_names)) 34 | 35 | variants, log_dirs = make_variants(*variant_levels) 36 | 37 | run_experiments( 38 | script=script, 39 | affinity_code=affinity_code, 40 | experiment_title=experiment_title, 41 | runs_per_setting=runs_per_setting, 42 | variants=variants, 43 | log_dirs=log_dirs, 44 | common_args=(default_config_key,), 45 | ) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_pixels_clothv8.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | contexts_per_gpu=1, 11 | n_socket=2, 12 | ) 13 | 14 | runs_per_setting = 1 15 | default_config_key = "sac_pixels_clothv8" 16 | experiment_title = "sac_dm_control_pixels_clothv8" 17 | variant_levels = list() 18 | 19 | domain = ['cloth_v8'] 20 | task = ['easy'] 21 | values = list(zip(domain, task)) 22 | dir_names = ["env_{}_{}".format(*v) for v in values] 23 | keys = [('env', 'domain'), ('env', 'task')] 24 | variant_levels.append(VariantLevel(keys, values, dir_names)) 25 | 26 | modes = ['corners', 'border', '3x3', '9x9'] 27 | values = list(zip(modes)) 28 | dir_names = ['mode_{}'.format(*v) for v in values] 29 | keys = [('env', 'task_kwargs', 'mode')] 30 | variant_levels.append(VariantLevel(keys, values, dir_names)) 31 | 32 | #distance_weight = [0.0, 2.0] 33 | #values = list(zip(distance_weight)) 34 | #dir_names = ['distance_weight_{}'.format(*v) for v in values] 35 | #keys = [('env', 'task_kwargs', 'distance_weight')] 36 | #variant_levels.append(VariantLevel(keys, values, dir_names)) 37 | 38 | variants, log_dirs = make_variants(*variant_levels) 39 | 40 | run_experiments( 41 | script=script, 42 | affinity_code=affinity_code, 43 | experiment_title=experiment_title, 44 | runs_per_setting=runs_per_setting, 45 | variants=variants, 46 | log_dirs=log_dirs, 47 | common_args=(default_config_key,), 48 | ) 49 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_pixels_rope.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=4, 10 | contexts_per_gpu=1, 11 | ) 12 | 13 | runs_per_setting = 4 14 | default_config_key = "sac_pixels_rope" 15 | experiment_title = "pixels_rope" 16 | variant_levels = list() 17 | 18 | domain = ['rope_sac'] 19 | task = ['easy'] 20 | values = list(zip(domain, task)) 21 | dir_names = ["env_{}_{}".format(*v) for v in values] 22 | keys = [('env', 'domain'), ('env', 'task')] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | model_cls = ['PiConvModel']#, 'GumbelPiConvModel'] 26 | sac_module = ['sac_v2']#, 'sac_v2_generic'] 27 | sac_agent_module = ['sac_agent_v2']#, 'sac_agent_v2_generic'] 28 | state_keys= [['location', 'pixels']]#, ['pixels']] 29 | values = list(zip(model_cls, sac_module, sac_agent_module, state_keys)) 30 | dir_names = ["model_cls_{}".format(*v) for v in values] 31 | keys = [('agent', 'ModelCls'), ('sac_module',), 32 | ('sac_agent_module',), ('state_keys',)] 33 | variant_levels.append(VariantLevel(keys, values, dir_names)) 34 | 35 | variants, log_dirs = make_variants(*variant_levels) 36 | 37 | run_experiments( 38 | script=script, 39 | affinity_code=affinity_code, 40 | experiment_title=experiment_title, 41 | runs_per_setting=runs_per_setting, 42 | variants=variants, 43 | log_dirs=log_dirs, 44 | common_args=(default_config_key,), 45 | ) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_pixels_rope_two_hand.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | contexts_per_gpu=1, 11 | ) 12 | 13 | runs_per_setting = 4 14 | default_config_key = "sac_pixels_rope" 15 | experiment_title = "pixels_rope_two_hand_fixed_geom" 16 | variant_levels = list() 17 | 18 | domain = ['rope_two_hand'] 19 | task = ['easy'] 20 | values = list(zip(domain, task)) 21 | dir_names = ["env_{}_{}".format(*v) for v in values] 22 | keys = [('env', 'domain'), ('env', 'task')] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | model_cls = ['PiConvModel']#, 'GumbelPiConvModel'] 26 | sac_module = ['sac_v2']#, 'sac_v2_generic'] 27 | sac_agent_module = ['sac_agent_v2']#, 'sac_agent_v2_generic'] 28 | state_keys= [['location', 'pixels']]#, ['pixels']] 29 | values = list(zip(model_cls, sac_module, sac_agent_module, state_keys)) 30 | dir_names = ["model_cls_{}".format(*v) for v in values] 31 | keys = [('agent', 'ModelCls'), ('sac_module',), 32 | ('sac_agent_module',), ('state_keys',)] 33 | variant_levels.append(VariantLevel(keys, values, dir_names)) 34 | 35 | variants, log_dirs = make_variants(*variant_levels) 36 | 37 | run_experiments( 38 | script=script, 39 | affinity_code=affinity_code, 40 | experiment_title=experiment_title, 41 | runs_per_setting=runs_per_setting, 42 | variants=variants, 43 | log_dirs=log_dirs, 44 | common_args=(default_config_key,), 45 | ) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_pixels_rope_v1.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=4, 10 | contexts_per_gpu=1, 11 | ) 12 | 13 | runs_per_setting = 4 14 | default_config_key = "sac_pixels_ropev1" 15 | experiment_title = "sac_dm_control_pixels_ropev1" 16 | variant_levels = list() 17 | 18 | domain = ['rope_v1'] 19 | task = ['easy'] 20 | values = list(zip(domain, task)) 21 | dir_names = ["env_{}_{}".format(*v) for v in values] 22 | keys = [('env', 'domain'), ('env', 'task')] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_state_cloth_corner.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | contexts_per_gpu=1, 11 | ) 12 | 13 | runs_per_setting = 4 14 | default_config_key = "sac_state_cloth_corner" 15 | experiment_title = "sac_dm_control_state_cloth_corner" 16 | variant_levels = list() 17 | 18 | domain = ['cloth_corner'] 19 | task = ['easy'] 20 | values = list(zip(domain, task)) 21 | dir_names = ["env_{}_{}".format(*v) for v in values] 22 | keys = [('env', 'domain'), ('env', 'task')] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | model_cls = ['PiMlpModel'] #['PiMlpModel', 'GumbelPiMlpModel'] 26 | random_location = [True] #[True, False] 27 | sac_module = ['sac_v2'] #['sac_v2', 'sac_v2_generic'] 28 | sac_agent_module = ['sac_agent_v2'] #['sac_agent_v2', 'sac_agent_v2_generic'] 29 | values = list(zip(model_cls, random_location, sac_module, sac_agent_module)) 30 | dir_names = ["cloth_corner".format(*v) for v in values] 31 | keys = [('agent', 'ModelCls'), ('env', 'task_kwargs', 'random_location'), 32 | ('sac_module',), ('sac_agent_module',)] 33 | variant_levels.append(VariantLevel(keys, values, dir_names)) 34 | 35 | variants, log_dirs = make_variants(*variant_levels) 36 | 37 | run_experiments( 38 | script=script, 39 | affinity_code=affinity_code, 40 | experiment_title=experiment_title, 41 | runs_per_setting=runs_per_setting, 42 | variants=variants, 43 | log_dirs=log_dirs, 44 | common_args=(default_config_key,), 45 | ) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_state_cloth_point.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | contexts_per_gpu=1, 11 | ) 12 | 13 | runs_per_setting = 4 14 | default_config_key = "sac_state_cloth_corner" 15 | experiment_title = "sac_dm_control_state_cloth_point" 16 | variant_levels = list() 17 | 18 | domain = ['cloth_point_state'] 19 | task = ['easy'] 20 | values = list(zip(domain, task)) 21 | dir_names = ["env_{}_{}".format(*v) for v in values] 22 | keys = [('env', 'domain'), ('env', 'task')] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | model_cls = ['PiMlpModel'] 26 | random_location = [True] 27 | sac_module = ['sac_v2'] 28 | sac_agent_module = ['sac_agent_v2'] 29 | values = list(zip(model_cls, random_location, sac_module, sac_agent_module)) 30 | dir_names = ["model_cls_{}_rnd_loc_{}".format(*v) for v in values] 31 | keys = [('agent', 'ModelCls'), ('env', 'task_kwargs', 'random_location'), 32 | ('sac_module',), ('sac_agent_module',)] 33 | variant_levels.append(VariantLevel(keys, values, dir_names)) 34 | 35 | variants, log_dirs = make_variants(*variant_levels) 36 | 37 | run_experiments( 38 | script=script, 39 | affinity_code=affinity_code, 40 | experiment_title=experiment_title, 41 | runs_per_setting=runs_per_setting, 42 | variants=variants, 43 | log_dirs=log_dirs, 44 | common_args=(default_config_key,), 45 | ) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_state_cloth_script.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=8, 10 | contexts_per_gpu=1, 11 | ) 12 | 13 | runs_per_setting = 4 14 | default_config_key = "sac_state_cloth_corner" 15 | experiment_title = "sac_state_cloth_corner_point_condition_and_rnd_loc" 16 | variant_levels = list() 17 | 18 | domain = ['cloth_corner', 'cloth_corner', 'cloth_point_state', 'cloth_point_state'] 19 | task = ['easy'] * 4 20 | model_cls = ['PiMlpModel', 'GumbelPiMlpModel', 'PiMlpModel', 'PiMlpModel'] 21 | random_location = [True, False, True, False] 22 | sac_module = ['sac_v2', 'sac_v2_generic', 'sac_v2', 'sac_v2'] 23 | sac_agent_module = ['sac_agent_v2', 'sac_agent_v2_generic', 'sac_agent_v2', 'sac_agent_v2'] 24 | values = list(zip(domain, task, model_cls, random_location, sac_module, sac_agent_module)) 25 | dir_names = ["env_{}_{}_modelcls_{}_rnd_loc_{}".format(*v) for v in values] 26 | keys = [('env', 'domain'), ('env', 'task'), ('agent', 'ModelCls'), ('env', 'task_kwargs', 'random_location'), 27 | ('sac_module',), ('sac_agent_module',)] 28 | variant_levels.append(VariantLevel(keys, values, dir_names)) 29 | variants, log_dirs = make_variants(*variant_levels) 30 | 31 | run_experiments( 32 | script=script, 33 | affinity_code=affinity_code, 34 | experiment_title=experiment_title, 35 | runs_per_setting=runs_per_setting, 36 | variants=variants, 37 | log_dirs=log_dirs, 38 | common_args=(default_config_key,), 39 | ) 40 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_state_cloth_sim.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=20, 9 | n_gpu=4, 10 | contexts_per_gpu=1, 11 | ) 12 | 13 | runs_per_setting = 2 14 | default_config_key = "sac_state_cloth_sim" 15 | experiment_title = "sac_dm_control_state_cloth_sim" 16 | variant_levels = list() 17 | 18 | modes = ['corners', 'border', 'inner_border', '3x3', '5x5', '9x9'] 19 | values = list(zip(modes)) 20 | dir_names = ['mode_{}'.format(*v) for v in values] 21 | keys = [('env', 'task_kwargs', 'mode')] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | max_path_lengths = [15, 30] 25 | values = list(zip(max_path_lengths)) 26 | dir_names = ['mpl_{}'.format(*v) for v in values] 27 | keys = [('env', 'max_path_length')] 28 | variant_levels.append(VariantLevel(keys, values, dir_names)) 29 | 30 | variants, log_dirs = make_variants(*variant_levels) 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_state_clothv0.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=8, 10 | contexts_per_gpu=1, 11 | n_socket=2, 12 | ) 13 | 14 | runs_per_setting = 2 15 | default_config_key = "sac_state_clothv0" 16 | experiment_title = "sac_dm_control_state_clothv0" 17 | variant_levels = list() 18 | 19 | domain = ['cloth_v0'] 20 | task = ['easy'] 21 | values = list(zip(domain, task)) 22 | dir_names = ["env_{}_{}".format(*v) for v in values] 23 | keys = [('env', 'domain'), ('env', 'task')] 24 | variant_levels.append(VariantLevel(keys, values, dir_names)) 25 | 26 | batch_B = [32, 32, 32, 32] 27 | batch_size = [256, 256, 1024, 1024] 28 | learning_rate = [3e-4, 3e-4, 6e-4, 6e-4] 29 | replay_ratio = [32, 128, 32, 128] 30 | values = list(zip(batch_B, batch_size, learning_rate, replay_ratio)) 31 | dir_names = ["batch_B{}_bs{}_lr{}_ratio{}".format(*v) for v in values] 32 | keys = [('sampler', 'batch_B'), ('algo', 'batch_size'), ('algo', 'learning_rate'), ('algo', 'replay_ratio')] 33 | variant_levels.append(VariantLevel(keys, values, dir_names)) 34 | 35 | variants, log_dirs = make_variants(*variant_levels) 36 | 37 | run_experiments( 38 | script=script, 39 | affinity_code=affinity_code, 40 | experiment_title=experiment_title, 41 | runs_per_setting=runs_per_setting, 42 | variants=variants, 43 | log_dirs=log_dirs, 44 | common_args=(default_config_key,), 45 | ) 46 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_state_clothv7.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac_autoreg.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=16, 9 | n_gpu=4, 10 | contexts_per_gpu=2, 11 | ) 12 | 13 | runs_per_setting = 2 14 | default_config_key = "sac_state_clothv7" 15 | experiment_title = "sac_dm_control_state_clothv7" 16 | variant_levels = list() 17 | 18 | modes = ['corners', 'border', 'inner_border', '3x3', '5x5', '9x9'] 19 | values = list(zip(modes)) 20 | dir_names = ['mode_{}'.format(*v) for v in values] 21 | keys = [('env', 'task_kwargs', 'mode')] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | distance_weight = [0.0, 2.0] 25 | values = list(zip(distance_weight)) 26 | dir_names = ['distance_weight_{}'.format(*v) for v in values] 27 | keys = [('env', 'task_kwargs', 'distance_weight')] 28 | variant_levels.append(VariantLevel(keys, values, dir_names)) 29 | 30 | variants, log_dirs = make_variants(*variant_levels) 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_state_clothv8.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=16, 9 | n_gpu=4, 10 | contexts_per_gpu=2, 11 | ) 12 | 13 | runs_per_setting = 2 14 | default_config_key = "sac_state_clothv8" 15 | experiment_title = "sac_dm_control_state_clothv8" 16 | variant_levels = list() 17 | 18 | modes = ['corners', 'border', 'inner_border', '3x3', '5x5', '9x9'] 19 | values = list(zip(modes)) 20 | dir_names = ['mode_{}'.format(*v) for v in values] 21 | keys = [('env', 'task_kwargs', 'mode')] 22 | variant_levels.append(VariantLevel(keys, values, dir_names)) 23 | 24 | distance_weight = [0.0, 2.0] 25 | values = list(zip(distance_weight)) 26 | dir_names = ['distance_weight_{}'.format(*v) for v in values] 27 | keys = [('env', 'task_kwargs', 'distance_weight')] 28 | variant_levels.append(VariantLevel(keys, values, dir_names)) 29 | 30 | variants, log_dirs = make_variants(*variant_levels) 31 | 32 | run_experiments( 33 | script=script, 34 | affinity_code=affinity_code, 35 | experiment_title=experiment_title, 36 | runs_per_setting=runs_per_setting, 37 | variants=variants, 38 | log_dirs=log_dirs, 39 | common_args=(default_config_key,), 40 | ) 41 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_state_rope.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=4, 10 | contexts_per_gpu=1, 11 | ) 12 | 13 | runs_per_setting = 4 14 | default_config_key = "sac_state_rope_v2" 15 | experiment_title = "sac_dm_control_state_ropev2" 16 | variant_levels = list() 17 | 18 | domain = ['rope_v2'] 19 | task = ['easy'] 20 | values = list(zip(domain, task)) 21 | dir_names = ["env_{}_{}".format(*v) for v in values] 22 | keys = [('env', 'domain'), ('env', 'task')] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/launch/launch_dm_control_sac_state_rope_script.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=24, 9 | n_gpu=8, 10 | contexts_per_gpu=1, 11 | ) 12 | 13 | runs_per_setting = 4 14 | default_config_key = "sac_state_cloth_corner" 15 | experiment_title = "sac_state_cloth_point_all" 16 | variant_levels = list() 17 | 18 | # exp 1: simultaneous pick / place 19 | # exp 2: conditional pick / place 20 | # exp 3: random loc, learned place 21 | 22 | domain = ['cloth_point_state'] * 3 23 | task = ['easy'] * 3 24 | model_cls = ['PiMlpModel', 'AutoregPiMlpModel', 'PiMlpModel'] 25 | random_location = [False, False, True] 26 | sac_module = ['sac_v2'] * 3 27 | sac_agent_module = ['sac_agent_v2', 'sac_agent_autoreg_v2', 'sac_agent_v2'] 28 | name = ['simultaneous', 'conditional', 'random_pick'] 29 | values = list(zip(domain, task, model_cls, random_location, sac_module, sac_agent_module, name)) 30 | dir_names = ['simultaneous', 'conditional', 'random_pick'] 31 | keys = [('env', 'domain'), ('env', 'task'), ('agent', 'ModelCls'), ('env', 'task_kwargs', 'random_location'), 32 | ('sac_module',), ('sac_agent_module',), ('name',)] 33 | variant_levels.append(VariantLevel(keys, values, dir_names)) 34 | variants, log_dirs = make_variants(*variant_levels) 35 | 36 | run_experiments( 37 | script=script, 38 | affinity_code=affinity_code, 39 | experiment_title=experiment_title, 40 | runs_per_setting=runs_per_setting, 41 | variants=variants, 42 | log_dirs=log_dirs, 43 | common_args=(default_config_key,), 44 | ) 45 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/old/dm_control_sac_serial.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.serial.sampler import SerialSampler 6 | from rlpyt.samplers.parallel.cpu.collectors import CpuResetCollector 7 | from rlpyt.envs.dm_control_env import DMControlEnv 8 | from rlpyt.algos.qpg.sac import SAC 9 | from rlpyt.agents.qpg.sac_agent import SacAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRlEval 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.dm_control.qpg.sac.dm_control_sac import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | print('Variant', variant) 22 | config = update_config(config, variant) 23 | 24 | sampler = SerialSampler( 25 | EnvCls=DMControlEnv, 26 | env_kwargs=config["env"], 27 | CollectorCls=CpuResetCollector, 28 | eval_env_kwargs=config["eval_env"], 29 | **config["sampler"] 30 | ) 31 | algo = SAC(optim_kwargs=config["optim"], **config["algo"]) 32 | agent = SacAgent(**config["agent"]) 33 | runner = MinibatchRlEval( 34 | algo=algo, 35 | agent=agent, 36 | sampler=sampler, 37 | affinity=affinity, 38 | **config["runner"] 39 | ) 40 | name = "sac_{}_{}".format(config['env']['domain'], 41 | config['env']['task']) 42 | with logger_context(log_dir, run_ID, name, config, snapshot_mode='last'): 43 | runner.train() 44 | 45 | 46 | if __name__ == "__main__": 47 | build_and_train(*sys.argv[1:]) 48 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/old/launch_dm_control_sac_serial.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac_serial.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=2, 9 | n_gpu=1, 10 | hyperthread_offset=2, 11 | n_socket=1, 12 | cpu_per_run=1, 13 | ) 14 | runs_per_setting = 1 15 | default_config_key = "sac_1M_serial" 16 | experiment_title = "sac_dm_control_serial" 17 | variant_levels = list() 18 | 19 | domain = ['cloth_v0'] 20 | task = ['easy'] 21 | values = list(zip(domain, task)) 22 | dir_names = ["env_{}_{}".format(*v) for v in values] 23 | keys = [('env', 'domain'), ('env', 'task')] 24 | variant_levels.append(VariantLevel(keys, values, dir_names)) 25 | 26 | variants, log_dirs = make_variants(*variant_levels) 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/dm_control/qpg/sac/old/launch_dm_control_sac_serial_v2.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/dm_control/qpg/sac/train/dm_control_sac_serial_v2.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=2, 9 | n_gpu=1, 10 | hyperthread_offset=2, 11 | n_socket=1, 12 | cpu_per_run=1, 13 | ) 14 | runs_per_setting = 2 15 | default_config_key = "sac_1M_serial_v2" 16 | experiment_title = "sac_dm_control_serial_v2" 17 | variant_levels = list() 18 | 19 | domain = ['cloth_v0'] 20 | task = ['easy'] 21 | values = list(zip(domain, task)) 22 | dir_names = ["env_{}_{}".format(*v) for v in values] 23 | keys = [('env', 'domain'), ('env', 'task')] 24 | variant_levels.append(VariantLevel(keys, values, dir_names)) 25 | 26 | variants, log_dirs = make_variants(*variant_levels) 27 | 28 | run_experiments( 29 | script=script, 30 | affinity_code=affinity_code, 31 | experiment_title=experiment_title, 32 | runs_per_setting=runs_per_setting, 33 | variants=variants, 34 | log_dirs=log_dirs, 35 | common_args=(default_config_key,), 36 | ) 37 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/pg/launch/launch_mujoco_a2c_cpu.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_a2c_cpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=2, 9 | n_gpu=0, 10 | hyperthread_offset=2, 11 | n_socket=1, 12 | cpu_per_run=2, 13 | ) 14 | runs_per_setting = 1 15 | default_config_key = "a2c_1M" 16 | experiment_title = "first_test_mujoco" 17 | variant_levels = list() 18 | 19 | env_ids = ["Hopper-v2"] # , "Swimmer-v3"] 20 | values = list(zip(env_ids)) 21 | dir_names = ["env_{}".format(*v) for v in values] 22 | keys = [("env", "id")] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/pg/launch/launch_mujoco_ppo_cpu.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_ppo_cpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=2, 9 | n_gpu=0, 10 | hyperthread_offset=2, 11 | n_socket=1, 12 | cpu_per_run=2, 13 | ) 14 | runs_per_setting = 1 15 | default_config_key = "ppo_1M" 16 | experiment_title = "first_test_mujoco" 17 | variant_levels = list() 18 | 19 | env_ids = ["Hopper-v2"] # , "Swimmer-v3"] 20 | values = list(zip(env_ids)) 21 | dir_names = ["env_{}".format(*v) for v in values] 22 | keys = [("env", "id")] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/pg/launch/launch_mujoco_ppo_gpu.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_ppo_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=2, 9 | n_gpu=0, 10 | hyperthread_offset=2, 11 | n_socket=1, 12 | cpu_per_run=2, 13 | ) 14 | runs_per_setting = 1 15 | default_config_key = "ppo_1M" 16 | experiment_title = "first_test_mujoco" 17 | variant_levels = list() 18 | 19 | env_ids = ["Hopper-v2"] # , "Swimmer-v3"] 20 | values = list(zip(env_ids)) 21 | dir_names = ["env_{}".format(*v) for v in values] 22 | keys = [("env", "id")] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/pg/launch/launch_mujoco_ppo_serial.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_ppo_serial.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=2, 9 | n_gpu=0, 10 | hyperthread_offset=2, 11 | n_socket=1, 12 | cpu_per_run=2, 13 | ) 14 | runs_per_setting = 1 15 | default_config_key = "ppo_1M_serial" 16 | experiment_title = "first_test_mujoco" 17 | variant_levels = list() 18 | 19 | env_ids = ["Hopper-v3"] # , "Swimmer-v3"] 20 | values = list(zip(env_ids)) 21 | dir_names = ["env_{}".format(*v) for v in values] 22 | keys = [("env", "id")] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/pg/launch/pabti/launch_mujoco_ppo_serial.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_ppo_serial.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=16, 9 | n_gpu=8, 10 | contexts_per_gpu=2, 11 | hyperthread_offset=24, 12 | n_socket=2, 13 | # cpu_per_run=2, 14 | ) 15 | runs_per_setting = 4 16 | default_config_key = "ppo_1M_serial" 17 | experiment_title = "ppo_mujoco_serial" 18 | variant_levels = list() 19 | 20 | env_ids = ["Hopper-v3", "Swimmer-v3", "HalfCheetah-v3", 21 | "Walker2d-v3", "Ant-v3", "Humanoid-v3"] 22 | values = list(zip(env_ids)) 23 | dir_names = ["env_{}".format(*v) for v in values] 24 | keys = [("env", "id")] 25 | variant_levels.append(VariantLevel(keys, values, dir_names)) 26 | 27 | variants, log_dirs = make_variants(*variant_levels) 28 | 29 | run_experiments( 30 | script=script, 31 | affinity_code=affinity_code, 32 | experiment_title=experiment_title, 33 | runs_per_setting=runs_per_setting, 34 | variants=variants, 35 | log_dirs=log_dirs, 36 | common_args=(default_config_key,), 37 | ) 38 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_a2c_cpu.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.cpu.parallel_sampler import CpuParallelSampler 6 | from rlpyt.samplers.cpu.collectors import ResetCollector 7 | from rlpyt.envs.gym import make as gym_make 8 | from rlpyt.algos.pg.a2c import A2C 9 | from rlpyt.agents.pg.mujoco import MujocoFfAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.mujoco.pg.mujoco_a2c import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | 23 | sampler = CpuParallelSampler( 24 | EnvCls=gym_make, 25 | env_kwargs=config["env"], 26 | CollectorCls=ResetCollector, 27 | **config["sampler"] 28 | ) 29 | algo = A2C(optim_kwargs=config["optim"], **config["algo"]) 30 | agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"]) 31 | runner = MinibatchRl( 32 | algo=algo, 33 | agent=agent, 34 | sampler=sampler, 35 | affinity=affinity, 36 | **config["runner"] 37 | ) 38 | name = config["env"]["id"] 39 | with logger_context(log_dir, run_ID, name, config): 40 | runner.train() 41 | 42 | 43 | if __name__ == "__main__": 44 | build_and_train(*sys.argv[1:]) 45 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_ppo_cpu.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.parallel.cpu.sampler import CpuSampler 6 | from rlpyt.samplers.parallel.cpu.collectors import CpuResetCollector 7 | from rlpyt.envs.gym import make as gym_make 8 | from rlpyt.algos.pg.ppo import PPO 9 | from rlpyt.agents.pg.mujoco import MujocoFfAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.mujoco.pg.mujoco_ppo import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | 23 | sampler = CpuSampler( 24 | EnvCls=gym_make, 25 | env_kwargs=config["env"], 26 | CollectorCls=CpuResetCollector, 27 | **config["sampler"] 28 | ) 29 | algo = PPO(optim_kwargs=config["optim"], **config["algo"]) 30 | agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"]) 31 | runner = MinibatchRl( 32 | algo=algo, 33 | agent=agent, 34 | sampler=sampler, 35 | affinity=affinity, 36 | **config["runner"] 37 | ) 38 | name = config["env"]["id"] 39 | with logger_context(log_dir, run_ID, name, config): 40 | runner.train() 41 | 42 | 43 | if __name__ == "__main__": 44 | build_and_train(*sys.argv[1:]) 45 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ff_ppo_gpu.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.gpu.parallel_sampler import GpuParallelSampler 6 | from rlpyt.samplers.gpu.collectors import ResetCollector 7 | from rlpyt.envs.gym import make as gym_make 8 | from rlpyt.algos.pg.ppo import PPO 9 | from rlpyt.agents.pg.mujoco import MujocoFfAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.mujoco.pg.mujoco_ppo import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | 23 | sampler = GpuParallelSampler( 24 | EnvCls=gym_make, 25 | env_kwargs=config["env"], 26 | CollectorCls=ResetCollector, 27 | **config["sampler"] 28 | ) 29 | algo = PPO(optim_kwargs=config["optim"], **config["algo"]) 30 | agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"]) 31 | runner = MinibatchRl( 32 | algo=algo, 33 | agent=agent, 34 | sampler=sampler, 35 | affinity=affinity, 36 | **config["runner"] 37 | ) 38 | name = config["env"]["id"] 39 | with logger_context(log_dir, run_ID, name, config): 40 | runner.train() 41 | 42 | 43 | if __name__ == "__main__": 44 | build_and_train(*sys.argv[1:]) 45 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/pg/train/mujoco_ppo_serial.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.serial.sampler import SerialSampler 6 | from rlpyt.samplers.parallel.cpu.collectors import CpuResetCollector 7 | from rlpyt.envs.gym import make as gym_make 8 | from rlpyt.algos.pg.ppo import PPO 9 | from rlpyt.agents.pg.mujoco import MujocoFfAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.mujoco.pg.mujoco_ppo import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | 23 | sampler = SerialSampler( 24 | EnvCls=gym_make, 25 | env_kwargs=config["env"], 26 | CollectorCls=CpuResetCollector, 27 | **config["sampler"] 28 | ) 29 | algo = PPO(optim_kwargs=config["optim"], **config["algo"]) 30 | agent = MujocoFfAgent(model_kwargs=config["model"], **config["agent"]) 31 | runner = MinibatchRl( 32 | algo=algo, 33 | agent=agent, 34 | sampler=sampler, 35 | affinity=affinity, 36 | **config["runner"] 37 | ) 38 | name = "ppo_" + config["env"]["id"] 39 | with logger_context(log_dir, run_ID, name, config): 40 | runner.train() 41 | 42 | 43 | if __name__ == "__main__": 44 | build_and_train(*sys.argv[1:]) 45 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/qpg/launch/got/launch_mujoco_ddpg_async_serial.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_ddpg_async_serial.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=16, 9 | n_gpu=4, 10 | # contexts_per_gpu=2, 11 | async_sample=True, 12 | # hyperthread_offset=2, 13 | # n_socket=1, 14 | # cpu_per_run=1, 15 | ) 16 | runs_per_setting = 2 17 | default_config_key = "async_serial" 18 | experiment_title = "ddpg_mujoco_async" 19 | variant_levels = list() 20 | 21 | env_ids = ["Hopper-v3", "HalfCheetah-v3"] # , "Swimmer-v3"] 22 | values = list(zip(env_ids)) 23 | dir_names = ["env_{}".format(*v) for v in values] 24 | keys = [("env", "id")] 25 | variant_levels.append(VariantLevel(keys, values, dir_names)) 26 | 27 | variants, log_dirs = make_variants(*variant_levels) 28 | 29 | run_experiments( 30 | script=script, 31 | affinity_code=affinity_code, 32 | experiment_title=experiment_title, 33 | runs_per_setting=runs_per_setting, 34 | variants=variants, 35 | log_dirs=log_dirs, 36 | common_args=(default_config_key,), 37 | ) 38 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/qpg/launch/got/launch_mujoco_sac_async_gpu.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_sac_async_gpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=16, 9 | n_gpu=4, 10 | # contexts_per_gpu=2, 11 | async_sample=True, 12 | sample_gpu_per_run=1, 13 | # hyperthread_offset=2, 14 | # n_socket=1, 15 | # cpu_per_run=1, 16 | ) 17 | runs_per_setting = 2 18 | default_config_key = "async_gpu" 19 | experiment_title = "sac_mujoco_async" 20 | variant_levels = list() 21 | 22 | env_ids = ["Hopper-v3", "HalfCheetah-v3"] # , "Swimmer-v3"] 23 | values = list(zip(env_ids)) 24 | dir_names = ["env_{}".format(*v) for v in values] 25 | keys = [("env", "id")] 26 | variant_levels.append(VariantLevel(keys, values, dir_names)) 27 | 28 | variants, log_dirs = make_variants(*variant_levels) 29 | 30 | run_experiments( 31 | script=script, 32 | affinity_code=affinity_code, 33 | experiment_title=experiment_title, 34 | runs_per_setting=runs_per_setting, 35 | variants=variants, 36 | log_dirs=log_dirs, 37 | common_args=(default_config_key,), 38 | ) 39 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/qpg/launch/got/launch_mujoco_td3_async_cpu.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_td3_async_cpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_core=16, 9 | n_gpu=4, 10 | # contexts_per_gpu=2, 11 | async_sample=True, 12 | # hyperthread_offset=2, 13 | # n_socket=1, 14 | # cpu_per_run=1, 15 | ) 16 | runs_per_setting = 2 17 | default_config_key = "async_cpu" 18 | experiment_title = "td3_mujoco_async" 19 | variant_levels = list() 20 | 21 | env_ids = ["Hopper-v3", "HalfCheetah-v3"] # , "Swimmer-v3"] 22 | values = list(zip(env_ids)) 23 | dir_names = ["env_{}".format(*v) for v in values] 24 | keys = [("env", "id")] 25 | variant_levels.append(VariantLevel(keys, values, dir_names)) 26 | 27 | variants, log_dirs = make_variants(*variant_levels) 28 | 29 | run_experiments( 30 | script=script, 31 | affinity_code=affinity_code, 32 | experiment_title=experiment_title, 33 | runs_per_setting=runs_per_setting, 34 | variants=variants, 35 | log_dirs=log_dirs, 36 | common_args=(default_config_key,), 37 | ) 38 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/qpg/launch/launch_mujoco_ddpg_cpu.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_ddpg_cpu.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=2, 9 | n_gpu=0, 10 | hyperthread_offset=2, 11 | n_socket=1, 12 | cpu_per_run=2, 13 | ) 14 | runs_per_setting = 1 15 | default_config_key = "ddpg_from_td3_1M" 16 | experiment_title = "first_test_mujoco" 17 | variant_levels = list() 18 | 19 | env_ids = ["Hopper-v2"] # , "Swimmer-v3"] 20 | values = list(zip(env_ids)) 21 | dir_names = ["env_{}".format(*v) for v in values] 22 | keys = [("env", "id")] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/qpg/launch/launch_mujoco_ddpg_serial.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_ddpg_serial.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=2, 9 | n_gpu=0, 10 | hyperthread_offset=2, 11 | n_socket=1, 12 | cpu_per_run=1, 13 | ) 14 | runs_per_setting = 2 15 | default_config_key = "ddpg_from_td3_1M_serial" 16 | experiment_title = "ddpg_mujoco" 17 | variant_levels = list() 18 | 19 | env_ids = ["Hopper-v2"] # , "Swimmer-v3"] 20 | values = list(zip(env_ids)) 21 | dir_names = ["env_{}".format(*v) for v in values] 22 | keys = [("env", "id")] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/qpg/launch/launch_mujoco_sac_serial.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_sac_serial.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=2, 9 | n_gpu=0, 10 | hyperthread_offset=2, 11 | n_socket=1, 12 | cpu_per_run=1, 13 | ) 14 | runs_per_setting = 2 15 | default_config_key = "sac_1M_serial" 16 | experiment_title = "sac_mujoco" 17 | variant_levels = list() 18 | 19 | env_ids = ["Hopper-v2"] # , "Swimmer-v3"] 20 | values = list(zip(env_ids)) 21 | dir_names = ["env_{}".format(*v) for v in values] 22 | keys = [("env", "id")] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/qpg/launch/launch_mujoco_td3_serial.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.utils.launching.affinity import encode_affinity 3 | from rlpyt.utils.launching.exp_launcher import run_experiments 4 | from rlpyt.utils.launching.variant import make_variants, VariantLevel 5 | 6 | script = "rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_td3_serial.py" 7 | affinity_code = encode_affinity( 8 | n_cpu_cores=2, 9 | n_gpu=0, 10 | hyperthread_offset=2, 11 | n_socket=1, 12 | cpu_per_run=1, 13 | ) 14 | runs_per_setting = 2 15 | default_config_key = "td3_1M_serial" 16 | experiment_title = "td3_mujoco" 17 | variant_levels = list() 18 | 19 | env_ids = ["Hopper-v2"] # , "Swimmer-v3"] 20 | values = list(zip(env_ids)) 21 | dir_names = ["env_{}".format(*v) for v in values] 22 | keys = [("env", "id")] 23 | variant_levels.append(VariantLevel(keys, values, dir_names)) 24 | 25 | variants, log_dirs = make_variants(*variant_levels) 26 | 27 | run_experiments( 28 | script=script, 29 | affinity_code=affinity_code, 30 | experiment_title=experiment_title, 31 | runs_per_setting=runs_per_setting, 32 | variants=variants, 33 | log_dirs=log_dirs, 34 | common_args=(default_config_key,), 35 | ) 36 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_ddpg_cpu.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.cpu.parallel_sampler import CpuParallelSampler 6 | from rlpyt.samplers.cpu.collectors import ResetCollector 7 | from rlpyt.envs.gym import make as gym_make 8 | from rlpyt.algos.qpg.ddpg import DDPG 9 | from rlpyt.agents.qpg.ddpg_agent import DdpgAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRl 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.mujoco.qpg.mujoco_a2c import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | 23 | sampler = CpuParallelSampler( 24 | EnvCls=gym_make, 25 | env_kwargs=config["env"], 26 | CollectorCls=ResetCollector, 27 | **config["sampler"] 28 | ) 29 | algo = DDPG(optim_kwargs=config["optim"], **config["algo"]) 30 | agent = DdpgAgent(**config["agent"]) 31 | runner = MinibatchRl( 32 | algo=algo, 33 | agent=agent, 34 | sampler=sampler, 35 | affinity=affinity, 36 | **config["runner"] 37 | ) 38 | name = config["env"]["id"] 39 | with logger_context(log_dir, run_ID, name, config): 40 | runner.train() 41 | 42 | 43 | if __name__ == "__main__": 44 | build_and_train(*sys.argv[1:]) 45 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_ddpg_serial.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.serial.sampler import SerialSampler 6 | from rlpyt.samplers.parallel.cpu.collectors import CpuResetCollector 7 | from rlpyt.envs.gym import make as gym_make 8 | from rlpyt.algos.qpg.ddpg import DDPG 9 | from rlpyt.agents.qpg.ddpg_agent import DdpgAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRlEval 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.mujoco.qpg.mujoco_ddpg import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | # config["eval_env"] = config["env"] 23 | 24 | sampler = SerialSampler( 25 | EnvCls=gym_make, 26 | env_kwargs=config["env"], 27 | CollectorCls=CpuResetCollector, 28 | eval_env_kwargs=config["env"], 29 | **config["sampler"] 30 | ) 31 | algo = DDPG(optim_kwargs=config["optim"], **config["algo"]) 32 | agent = DdpgAgent(**config["agent"]) 33 | runner = MinibatchRlEval( 34 | algo=algo, 35 | agent=agent, 36 | sampler=sampler, 37 | affinity=affinity, 38 | **config["runner"] 39 | ) 40 | name = "ddpg_" + config["env"]["id"] 41 | with logger_context(log_dir, run_ID, name, config): 42 | runner.train() 43 | 44 | 45 | if __name__ == "__main__": 46 | build_and_train(*sys.argv[1:]) 47 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_sac_serial.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.serial.sampler import SerialSampler 6 | from rlpyt.samplers.parallel.cpu.collectors import CpuResetCollector 7 | from rlpyt.envs.gym import make as gym_make 8 | from rlpyt.algos.qpg.sac import SAC 9 | from rlpyt.agents.qpg.sac_agent import SacAgent 10 | from rlpyt.runners.minibatch_rl import MinibatchRlEval 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.mujoco.qpg.mujoco_sac import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | # config["eval_env"]["id"] = config["env"]["id"] 23 | 24 | sampler = SerialSampler( 25 | EnvCls=gym_make, 26 | env_kwargs=config["env"], 27 | CollectorCls=CpuResetCollector, 28 | eval_env_kwargs=config["env"], 29 | **config["sampler"] 30 | ) 31 | algo = SAC(optim_kwargs=config["optim"], **config["algo"]) 32 | agent = SacAgent(**config["agent"]) 33 | runner = MinibatchRlEval( 34 | algo=algo, 35 | agent=agent, 36 | sampler=sampler, 37 | affinity=affinity, 38 | **config["runner"] 39 | ) 40 | name = "sac_" + config["env"]["id"] 41 | with logger_context(log_dir, run_ID, name, config): 42 | runner.train() 43 | 44 | 45 | if __name__ == "__main__": 46 | build_and_train(*sys.argv[1:]) 47 | -------------------------------------------------------------------------------- /rlpyt/experiments/scripts/mujoco/qpg/train/mujoco_td3_serial.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | 4 | from rlpyt.utils.launching.affinity import affinity_from_code 5 | from rlpyt.samplers.serial.sampler import SerialSampler 6 | from rlpyt.samplers.parallel.cpu.collectors import CpuResetCollector 7 | from rlpyt.envs.gym import make as gym_make 8 | from rlpyt.algos.qpg.td3 import TD3 9 | from rlpyt.agents.qpg.td3_agent import Td3Agent 10 | from rlpyt.runners.minibatch_rl import MinibatchRlEval 11 | from rlpyt.utils.logging.context import logger_context 12 | from rlpyt.utils.launching.variant import load_variant, update_config 13 | 14 | from rlpyt.experiments.configs.mujoco.qpg.mujoco_td3 import configs 15 | 16 | 17 | def build_and_train(slot_affinity_code, log_dir, run_ID, config_key): 18 | affinity = affinity_from_code(slot_affinity_code) 19 | config = configs[config_key] 20 | variant = load_variant(log_dir) 21 | config = update_config(config, variant) 22 | # config["eval_env"]["id"] = config["env"]["id"] 23 | 24 | sampler = SerialSampler( 25 | EnvCls=gym_make, 26 | env_kwargs=config["env"], 27 | CollectorCls=CpuResetCollector, 28 | eval_env_kwargs=config["env"], 29 | **config["sampler"] 30 | ) 31 | algo = TD3(optim_kwargs=config["optim"], **config["algo"]) 32 | agent = Td3Agent(**config["agent"]) 33 | runner = MinibatchRlEval( 34 | algo=algo, 35 | agent=agent, 36 | sampler=sampler, 37 | affinity=affinity, 38 | **config["runner"] 39 | ) 40 | name = "td3_" + config["env"]["id"] 41 | with logger_context(log_dir, run_ID, name, config): 42 | runner.train() 43 | 44 | 45 | if __name__ == "__main__": 46 | build_and_train(*sys.argv[1:]) 47 | -------------------------------------------------------------------------------- /rlpyt/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/models/__init__.py -------------------------------------------------------------------------------- /rlpyt/models/dqn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/models/dqn/__init__.py -------------------------------------------------------------------------------- /rlpyt/models/mlp.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | 4 | 5 | class MlpModel(torch.nn.Module): 6 | """Multilayer Perceptron with last layer linear.""" 7 | 8 | def __init__( 9 | self, 10 | input_size, 11 | hidden_sizes, # Can be empty list for none. 12 | output_size=None, # if None, last layer has nonlinearity applied. 13 | nonlinearity=torch.nn.ReLU, # Module, not Functional. 14 | ): 15 | super().__init__() 16 | if isinstance(hidden_sizes, int): 17 | hidden_sizes = [hidden_sizes] 18 | hidden_layers = [torch.nn.Linear(n_in, n_out) for n_in, n_out in 19 | zip([input_size] + hidden_sizes[:-1], hidden_sizes)] 20 | sequence = list() 21 | for layer in hidden_layers: 22 | sequence.extend([layer, nonlinearity()]) 23 | if output_size is not None: 24 | last_size = hidden_sizes[-1] if hidden_sizes else input_size 25 | sequence.append(torch.nn.Linear(last_size, output_size)) 26 | self.model = torch.nn.Sequential(*sequence) 27 | self._output_size = (hidden_sizes[-1] if output_size is None 28 | else output_size) 29 | 30 | def forward(self, input): 31 | return self.model(input) 32 | 33 | @property 34 | def output_size(self): 35 | return self._output_size 36 | -------------------------------------------------------------------------------- /rlpyt/models/pg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/models/pg/__init__.py -------------------------------------------------------------------------------- /rlpyt/models/preprocessor.py: -------------------------------------------------------------------------------- 1 | 2 | def get_preprocessor(type): 3 | if type is None: 4 | return lambda x: x # Identity 5 | elif type == 'image': 6 | def image_preprocess(x): 7 | x = x.permute(0, 3, 1, 2).contiguous() 8 | x /= 255 # to [0, 1] 9 | x = 2 * x - 1 # to [-1, 1] 10 | return x 11 | return image_preprocess 12 | else: 13 | raise ValueError(type) 14 | 15 | -------------------------------------------------------------------------------- /rlpyt/models/qpg/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/models/qpg/__init__.py -------------------------------------------------------------------------------- /rlpyt/replays/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/replays/__init__.py -------------------------------------------------------------------------------- /rlpyt/replays/async_.py: -------------------------------------------------------------------------------- 1 | 2 | import multiprocessing as mp 3 | import ctypes 4 | 5 | from rlpyt.utils.synchronize import RWLock 6 | 7 | 8 | class AsyncReplayBufferMixin: 9 | 10 | async_ = True 11 | 12 | def __init__(self, *args, **kwargs): 13 | super().__init__(*args, **kwargs) 14 | self.async_t = mp.RawValue("l") # Type c_long. 15 | self.rw_lock = RWLock() 16 | self._async_buffer_full = mp.RawValue(ctypes.c_bool, False) 17 | 18 | def append_samples(self, *args, **kwargs): 19 | with self.rw_lock.write_lock: 20 | self._async_pull() # Updates from other writers. 21 | ret = super().append_samples(*args, **kwargs) 22 | self._async_push() # Updates to other writers + readers. 23 | return ret 24 | 25 | def sample_batch(self, *args, **kwargs): 26 | with self.rw_lock: # Read lock. 27 | self._async_pull() # Updates from writers. 28 | return super().sample_batch(*args, **kwargs) 29 | 30 | def update_batch_priorities(self, *args, **kwargs): 31 | with self.rw_lock.write_lock: 32 | return super().update_batch_priorities(*args, **kwargs) 33 | 34 | def _async_pull(self): 35 | self.t = self.async_t.value 36 | self._buffer_full = self._async_buffer_full.value 37 | 38 | def _async_push(self): 39 | self.async_t.value = self.t 40 | self._async_buffer_full.value = self._buffer_full 41 | -------------------------------------------------------------------------------- /rlpyt/replays/base.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class BaseReplayBuffer: 4 | 5 | async_ = False 6 | 7 | def append_samples(self, samples): 8 | """Add new data to the replay buffer, possibly ejecting old data.""" 9 | raise NotImplementedError 10 | 11 | def sample_batch(self, batch_B, batch_T=None): 12 | """Returns a data batch, e.g. for training.""" 13 | raise NotImplementedError 14 | -------------------------------------------------------------------------------- /rlpyt/replays/non_sequence/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/replays/non_sequence/__init__.py -------------------------------------------------------------------------------- /rlpyt/replays/non_sequence/frame.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from rlpyt.replays.non_sequence.n_step import NStepReturnBuffer 5 | from rlpyt.replays.frame import FrameBufferMixin 6 | from rlpyt.replays.non_sequence.uniform import UniformReplay 7 | from rlpyt.replays.non_sequence.prioritized import PrioritizedReplay 8 | from rlpyt.replays.async_ import AsyncReplayBufferMixin 9 | 10 | 11 | class NStepFrameBuffer(FrameBufferMixin, NStepReturnBuffer): 12 | 13 | def extract_observation(self, T_idxs, B_idxs): 14 | """Frames are returned OLDEST to NEWEST.""" 15 | # Begin/end frames duplicated in samples_frames so no wrapping here. 16 | # return np.stack([self.samples_frames[t:t + self.n_frames, b] 17 | # for t, b in zip(T_idxs, B_idxs)], axis=0) # [B,C,H,W] 18 | observation = np.stack([self.samples_frames[t:t + self.n_frames, b] 19 | for t, b in zip(T_idxs, B_idxs)], axis=0) # [B,C,H,W] 20 | # Populate empty (zero) frames after environment done. 21 | for f in range(1, self.n_frames): 22 | # e.g. if done 1 step prior, all but newest frame go blank. 23 | b_blanks = np.where(self.samples.done[T_idxs - f, B_idxs])[0] 24 | observation[b_blanks, :self.n_frames - f] = 0 25 | return observation 26 | 27 | 28 | class UniformReplayFrameBuffer(UniformReplay, NStepFrameBuffer): 29 | pass 30 | 31 | 32 | class PrioritizedReplayFrameBuffer(PrioritizedReplay, NStepFrameBuffer): 33 | pass 34 | 35 | 36 | class AsyncUniformReplayFrameBuffer(AsyncReplayBufferMixin, 37 | UniformReplayFrameBuffer): 38 | pass 39 | 40 | 41 | class AsyncPrioritizedReplayFrameBuffer(AsyncReplayBufferMixin, 42 | PrioritizedReplayFrameBuffer): 43 | pass 44 | -------------------------------------------------------------------------------- /rlpyt/replays/non_sequence/n_step.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from rlpyt.replays.n_step import BaseNStepReturnBuffer 5 | from rlpyt.agents.base import AgentInputs 6 | from rlpyt.utils.collections import namedarraytuple 7 | from rlpyt.utils.buffer import torchify_buffer 8 | 9 | SamplesFromReplay = namedarraytuple("SamplesFromReplay", 10 | ["agent_inputs", "action", "return_", "done", "done_n", "target_inputs"]) 11 | 12 | 13 | class NStepReturnBuffer(BaseNStepReturnBuffer): 14 | 15 | def extract_batch(self, T_idxs, B_idxs): 16 | s = self.samples 17 | target_T_idxs = (T_idxs + self.n_step_return) % self.T 18 | batch = SamplesFromReplay( 19 | agent_inputs=AgentInputs( 20 | observation=self.extract_observation(T_idxs, B_idxs), 21 | prev_action=s.action[T_idxs - 1, B_idxs], 22 | prev_reward=s.reward[T_idxs - 1, B_idxs], 23 | ), 24 | action=s.action[T_idxs, B_idxs], 25 | return_=self.samples_return_[T_idxs, B_idxs], 26 | done=self.samples.done[T_idxs, B_idxs], 27 | done_n=self.samples_done_n[T_idxs, B_idxs], 28 | target_inputs=AgentInputs( 29 | observation=self.extract_observation(target_T_idxs, B_idxs), 30 | prev_action=s.action[target_T_idxs - 1, B_idxs], 31 | prev_reward=s.reward[target_T_idxs - 1, B_idxs], 32 | ), 33 | ) 34 | t_news = np.where(s.done[T_idxs - 1, B_idxs])[0] 35 | batch.agent_inputs.prev_action[t_news] = 0 36 | batch.agent_inputs.prev_reward[t_news] = 0 37 | return torchify_buffer(batch) 38 | 39 | def extract_observation(self, T_idxs, B_idxs): 40 | """Generalization anticipating frame-based buffer.""" 41 | return self.samples.observation[T_idxs, B_idxs] 42 | -------------------------------------------------------------------------------- /rlpyt/replays/non_sequence/uniform.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from rlpyt.replays.non_sequence.n_step import NStepReturnBuffer 5 | from rlpyt.replays.async_ import AsyncReplayBufferMixin 6 | 7 | 8 | class UniformReplay: 9 | 10 | def sample_batch(self, batch_B): 11 | T_idxs, B_idxs = self.sample_idxs(batch_B) 12 | return self.extract_batch(T_idxs, B_idxs) 13 | 14 | def sample_idxs(self, batch_B): 15 | t, b, f = self.t, self.off_backward, self.off_forward 16 | high = self.T - b - f if self._buffer_full else t - b 17 | low = 0 if self._buffer_full else f 18 | T_idxs = np.random.randint(low=low, high=high, size=(batch_B,)) 19 | T_idxs[T_idxs >= t - b] += min(t, b) + f # min for invalid high t. 20 | B_idxs = np.random.randint(low=0, high=self.B, size=(batch_B,)) 21 | return T_idxs, B_idxs 22 | 23 | 24 | class UniformReplayBuffer(UniformReplay, NStepReturnBuffer): 25 | pass 26 | 27 | 28 | class AsyncUniformReplayBuffer(AsyncReplayBufferMixin, UniformReplayBuffer): 29 | pass 30 | -------------------------------------------------------------------------------- /rlpyt/replays/sequence/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/replays/sequence/__init__.py -------------------------------------------------------------------------------- /rlpyt/replays/sequence/uniform.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from rlpyt.replays.sequence.n_step import SequenceNStepReturnBuffer 5 | from rlpyt.replays.async_ import AsyncReplayBufferMixin 6 | 7 | 8 | class UniformSequenceReplay: 9 | 10 | def set_batch_T(self, batch_T): 11 | self.batch_T = batch_T # Can set dynamically. 12 | 13 | def sample_batch(self, batch_B, batch_T=None): 14 | T_idxs, B_idxs = self.sample_idxs(batch_B, batch_T) 15 | return self.extract_batch(T_idxs, B_idxs, batch_T) 16 | 17 | def sample_idxs(self, batch_B, batch_T=None): 18 | batch_T = self.batch_T if batch_T is None else batch_T 19 | t, b, f = self.t, self.off_backward + batch_T, self.off_forward 20 | high = self.T - b - f if self._buffer_full else t - b - f 21 | T_idxs = np.random.randint(low=0, high=high, size=(batch_B,)) 22 | T_idxs[T_idxs >= t - b] += min(t, b) + f 23 | if self.rnn_state_interval > 0: # Some rnn states stored; only sample those. 24 | T_idxs = (T_idxs // self.rnn_state_interval) * self.rnn_state_interval 25 | B_idxs = np.random.randint(low=0, high=self.B, size=(batch_B,)) 26 | return T_idxs, B_idxs 27 | 28 | 29 | class UniformSequenceReplayBuffer(UniformSequenceReplay, 30 | SequenceNStepReturnBuffer): 31 | pass 32 | 33 | 34 | class AsyncUniformSequenceReplayBuffer(AsyncReplayBufferMixin, 35 | UniformSequenceReplayBuffer): 36 | pass 37 | -------------------------------------------------------------------------------- /rlpyt/runners/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/runners/__init__.py -------------------------------------------------------------------------------- /rlpyt/runners/base.py: -------------------------------------------------------------------------------- 1 | 2 | class BaseRunner: 3 | 4 | def train(self): 5 | raise NotImplementedError 6 | -------------------------------------------------------------------------------- /rlpyt/samplers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/samplers/__init__.py -------------------------------------------------------------------------------- /rlpyt/samplers/async_/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/samplers/async_/__init__.py -------------------------------------------------------------------------------- /rlpyt/samplers/async_/collectors.py: -------------------------------------------------------------------------------- 1 | 2 | from rlpyt.samplers.parallel.cpu.collectors import (CpuResetCollector, 3 | CpuWaitResetCollector) 4 | from rlpyt.samplers.parallel.gpu.collectors import (GpuResetCollector, 5 | GpuWaitResetCollector) 6 | 7 | 8 | class DoubleBufferCollectorMixin: 9 | 10 | def __init__(self, *args, **kwargs): 11 | super().__init__(*args, **kwargs) 12 | self.double_buffer = self.samples_np 13 | self.samples_np = self.double_buffer[0] 14 | 15 | def collect_batch(self, *args, **kwargs): 16 | """Swap in the called-for double buffer to record samples into.""" 17 | self.samples_np = self.double_buffer[self.sync.db_idx.value] 18 | return super().collect_batch(*args, **kwargs) 19 | 20 | 21 | class DbCpuResetCollector(DoubleBufferCollectorMixin, CpuResetCollector): 22 | pass 23 | 24 | 25 | class DbCpuWaitResetCollector(DoubleBufferCollectorMixin, CpuWaitResetCollector): 26 | pass 27 | 28 | 29 | class DbGpuResetCollector(DoubleBufferCollectorMixin, GpuResetCollector): 30 | pass 31 | 32 | 33 | class DbGpuWaitResetCollector(DoubleBufferCollectorMixin, GpuWaitResetCollector): 34 | pass 35 | -------------------------------------------------------------------------------- /rlpyt/samplers/base.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from rlpyt.samplers.collections import BatchSpec, TrajInfo 4 | from rlpyt.utils.quick_args import save__init__args 5 | 6 | 7 | class BaseSampler: 8 | """Class which interfaces with the Runner, in master process only.""" 9 | 10 | alternating = False 11 | 12 | def __init__( 13 | self, 14 | EnvCls, 15 | env_kwargs, 16 | batch_T, 17 | batch_B, 18 | CollectorCls, 19 | max_decorrelation_steps=100, 20 | TrajInfoCls=TrajInfo, 21 | eval_n_envs=0, # 0 for no eval setup. 22 | eval_CollectorCls=None, # Must supply if doing eval. 23 | eval_env_kwargs=None, 24 | eval_max_steps=None, # int if using evaluation. 25 | eval_max_trajectories=None, # Optional earlier cutoff. 26 | is_pixel=False, # Special if using pixel render on dm_control 27 | ): 28 | eval_max_steps = None if eval_max_steps is None else int(eval_max_steps) 29 | eval_max_trajectories = (None if eval_max_trajectories is None else 30 | int(eval_max_trajectories)) 31 | save__init__args(locals()) 32 | self.batch_spec = BatchSpec(batch_T, batch_B) 33 | self.mid_batch_reset = CollectorCls.mid_batch_reset 34 | 35 | def initialize(self, *args, **kwargs): 36 | raise NotImplementedError 37 | 38 | def obtain_samples(self, itr): 39 | raise NotImplementedError # type: Samples 40 | 41 | def evaluate_agent(self, itr): 42 | raise NotImplementedError 43 | 44 | def shutdown(self): 45 | pass 46 | 47 | @property 48 | def batch_size(self): 49 | return self.batch_spec.size # For logging at least. 50 | -------------------------------------------------------------------------------- /rlpyt/samplers/parallel/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/samplers/parallel/__init__.py -------------------------------------------------------------------------------- /rlpyt/samplers/parallel/cpu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/samplers/parallel/cpu/__init__.py -------------------------------------------------------------------------------- /rlpyt/samplers/parallel/cpu/sampler.py: -------------------------------------------------------------------------------- 1 | 2 | import multiprocessing as mp 3 | import time 4 | 5 | 6 | from rlpyt.samplers.parallel.base import ParallelSamplerBase 7 | from rlpyt.samplers.parallel.cpu.collectors import (CpuResetCollector, 8 | CpuEvalCollector) 9 | 10 | 11 | class CpuSampler(ParallelSamplerBase): 12 | 13 | def __init__(self, *args, CollectorCls=CpuResetCollector, 14 | eval_CollectorCls=CpuEvalCollector, **kwargs): 15 | # e.g. or use CpuWaitResetCollector, etc... 16 | super().__init__(*args, CollectorCls=CollectorCls, 17 | eval_CollectorCls=eval_CollectorCls, **kwargs) 18 | 19 | def obtain_samples(self, itr): 20 | self.agent.sync_shared_memory() # New weights in workers, if needed. 21 | return super().obtain_samples(itr) 22 | 23 | def evaluate_agent(self, itr): 24 | self.agent.sync_shared_memory() 25 | return super().evaluate_agent(itr) 26 | -------------------------------------------------------------------------------- /rlpyt/samplers/parallel/gpu/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/samplers/parallel/gpu/__init__.py -------------------------------------------------------------------------------- /rlpyt/samplers/serial/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/samplers/serial/__init__.py -------------------------------------------------------------------------------- /rlpyt/spaces/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/spaces/__init__.py -------------------------------------------------------------------------------- /rlpyt/spaces/base.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | class Space(): 4 | """ 5 | Common definitions for observations and actions. 6 | """ 7 | 8 | def sample(self): 9 | """ 10 | Uniformly randomly sample a random element of this space. 11 | """ 12 | raise NotImplementedError 13 | 14 | def null_value(self): 15 | """ 16 | Return a null value used to fill for absence of element. 17 | """ 18 | raise NotImplementedError 19 | -------------------------------------------------------------------------------- /rlpyt/spaces/box.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from rlpyt.spaces.float_box import FloatBox 4 | from rlpyt.spaces.int_box import IntBox 5 | from rlpyt.spaces.base import Space 6 | 7 | 8 | class Box(Space): 9 | """A box in R^n, with specificiable bound and dtype.""" 10 | 11 | def __init__(self, low, high, shape=None, dtype="float32", null_value=None): 12 | """ 13 | low and high are scalars, applied across all dimensions of shape. 14 | """ 15 | dtype = np.dtype(dtype) 16 | if dtype.kind == 'i' or dtype.kind == 'u': 17 | self.box = IntBox(low, high, shape=shape, dtype=dtype, null_value=None) 18 | elif dtype.kind == 'f': 19 | self.box = FloatBox(low, high, shape=shape, dtype=dtype, null_value=None) 20 | else: 21 | raise NotImplementedError(dtype) 22 | 23 | def sample(self): 24 | return self.box.sample() 25 | 26 | def null_value(self): 27 | return self.box.null_value() 28 | 29 | def __repr__(self): 30 | return f"Box({self.box.low}-{self.box.high - 1} shape={self.box.shape} dtype={self.box.dtype})" 31 | 32 | @property 33 | def shape(self): 34 | return self.box.shape 35 | 36 | @property 37 | def bounds(self): 38 | return self.box.bounds 39 | -------------------------------------------------------------------------------- /rlpyt/spaces/composite.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | from rlpyt.spaces.base import Space 4 | 5 | 6 | class Composite(Space): 7 | 8 | def __init__(self, spaces, NamedTupleCls): 9 | self._spaces = spaces 10 | # Should define NamedTupleCls in the module creating this space. 11 | self._NamedTupleCls = NamedTupleCls 12 | 13 | def sample(self): 14 | return self._NamedTupleCls(*(s.sample() for s in self._spaces)) 15 | 16 | def null_value(self): 17 | return self._NamedTupleCls(*(s.null_value() for s in self._spaces)) 18 | 19 | @property 20 | def shape(self): 21 | return self._NamedTupleCls(*(s.shape for s in self._spaces)) 22 | 23 | @property 24 | def names(self): 25 | return self._NamedTupleCls._fields 26 | 27 | @property 28 | def spaces(self): 29 | return self._spaces 30 | 31 | def __repr__(self): 32 | return ", ".join(space.__repr__() for space in self._spaces) 33 | -------------------------------------------------------------------------------- /rlpyt/spaces/int_box.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | from rlpyt.spaces.base import Space 5 | 6 | 7 | class IntBox(Space): 8 | """A box in J^n, with specificiable bound and dtype.""" 9 | 10 | def __init__(self, low, high, shape=None, dtype="int32", null_value=None): 11 | """ 12 | low and high are scalars, applied across all dimensions of shape. 13 | """ 14 | assert np.isscalar(low) and np.isscalar(high) 15 | self.low = low 16 | self.high = high 17 | self.shape = shape if shape is not None else () # np.ndarray sample 18 | self.dtype = np.dtype(dtype) 19 | assert np.issubdtype(self.dtype, np.integer) 20 | null_value = low if null_value is None else null_value 21 | assert null_value >= low and null_value < high 22 | self._null_value = null_value 23 | 24 | def sample(self): 25 | return np.random.randint(low=self.low, high=self.high, 26 | size=self.shape, dtype=self.dtype) 27 | 28 | def null_value(self): 29 | null = np.zeros(self.shape, dtype=self.dtype) 30 | if self._null_value is not None: 31 | try: 32 | null[:] = self._null_value 33 | except IndexError: 34 | null.fill(self._null_value) 35 | return null 36 | 37 | @property 38 | def bounds(self): 39 | return self.low, self.high 40 | 41 | @property 42 | def n(self): 43 | return self.high - self.low 44 | 45 | def __repr__(self): 46 | return f"IntBox({self.low}-{self.high - 1} shape={self.shape})" 47 | -------------------------------------------------------------------------------- /rlpyt/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/utils/__init__.py -------------------------------------------------------------------------------- /rlpyt/utils/array.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | 4 | 5 | def select_at_indexes(indexes, array): 6 | """Leading dimensions of array must match dimensions of indexes.""" 7 | dim = len(indexes.shape) 8 | assert indexes.shape == array.shape[:dim] 9 | num = int(np.prod(indexes.shape)) 10 | a_flat = array.reshape((num,) + array.shape[dim:]) 11 | s_flat = a_flat[np.arange(num), indexes.reshape(-1)] 12 | selected = s_flat.reshape(array.shape[:dim] + array.shape[dim + 1:]) 13 | return selected 14 | 15 | 16 | def to_onehot(indexes, dim, dtype=None): 17 | dtype = indexes.dtype if dtype is None else dtype 18 | onehot = np.zeros((indexes.size, dim), dtype=dtype) 19 | onehot[np.arange(indexes.size), indexes.reshape(-1)] = 1 20 | return onehot.reshape(indexes.shape + (dim,)) 21 | 22 | 23 | def from_onehot(onehot, dtype=None): 24 | return np.asarray(np.argmax(onehot, axis=-1), dtype=dtype) 25 | 26 | 27 | def valid_mean(array, valid=None, axis=None): 28 | if valid is None: 29 | return array.mean(axis=axis) 30 | return (array * valid).sum(axis=axis) / valid.sum(axis=axis) 31 | 32 | 33 | def infer_leading_dims(array, dim): 34 | """Param 'dim': number of data dimensions, check for [B] or [T,B] leading.""" 35 | assert array.ndim in (dim, dim + 1, dim + 2) 36 | shape = array.shape[-dim:] 37 | T = B = 1 38 | has_T = has_B = False 39 | if array.ndim == dim + 2: 40 | T, B = array.shape[:2] 41 | has_T = has_B = True # Might have T=1 or B=1. 42 | elif array.ndim == dim + 1: 43 | B = array.shape[0] 44 | has_B = True 45 | return T, B, shape, has_T, has_B 46 | -------------------------------------------------------------------------------- /rlpyt/utils/launching/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wilson1yan/rlpyt/51ed14dc7855ce1c6b9d6c8a3a89e9e3f3916b47/rlpyt/utils/launching/__init__.py -------------------------------------------------------------------------------- /rlpyt/utils/logging/__init__.py: -------------------------------------------------------------------------------- 1 | """Logger mostly unchnaged from rllab.""" -------------------------------------------------------------------------------- /rlpyt/utils/prog_bar.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import pyprind 4 | from rlpyt.utils.logging import logger 5 | 6 | 7 | class ProgBarCounter: 8 | 9 | def __init__(self, total_count): 10 | self.total_count = total_count 11 | self.max_progress = 1000000 12 | self.cur_progress = 0 13 | self.cur_count = 0 14 | if not logger.get_log_tabular_only(): 15 | self.pbar = pyprind.ProgBar(self.max_progress) 16 | else: 17 | self.pbar = None 18 | 19 | def update(self, current_count): 20 | if not logger.get_log_tabular_only(): 21 | self.cur_count = current_count 22 | new_progress = self.cur_count * self.max_progress / self.total_count 23 | if new_progress < self.max_progress: 24 | self.pbar.update(new_progress - self.cur_progress) 25 | self.cur_progress = new_progress 26 | 27 | def stop(self): 28 | if self.pbar is not None and self.pbar.active: 29 | self.pbar.stop() 30 | -------------------------------------------------------------------------------- /rlpyt/utils/quick_args.py: -------------------------------------------------------------------------------- 1 | 2 | from inspect import getfullargspec 3 | 4 | 5 | def save__init__args(values, underscore=False, overwrite=False, subclass_only=False): 6 | """ 7 | Use in __init__() only; assign all args/kwargs to instance attributes. 8 | To maintain precedence of args provided to subclasses, call this in the 9 | subclass before super().__init__() if save__init__args() also appears in 10 | base class, or use overwrite=True. With subclass_only==True, only args/kwargs 11 | listed in current subclass apply. 12 | """ 13 | prefix = "_" if underscore else "" 14 | self = values['self'] 15 | args = list() 16 | Classes = type(self).mro() 17 | if subclass_only: 18 | Classes = Classes[:1] 19 | for Cls in Classes: # class inheritances 20 | if '__init__' in vars(Cls): 21 | args += getfullargspec(Cls.__init__).args[1:] 22 | for arg in args: 23 | attr = prefix + arg 24 | if arg in values and (not hasattr(self, attr) or overwrite): 25 | setattr(self, attr, values[arg]) 26 | -------------------------------------------------------------------------------- /rlpyt/utils/seed.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import time 4 | 5 | from rlpyt.utils.logging.console import colorize 6 | 7 | seed_ = None 8 | 9 | 10 | def set_seed(seed): 11 | seed %= 4294967294 12 | global seed_ 13 | seed_ = seed 14 | import random 15 | random.seed(seed) 16 | np.random.seed(seed) 17 | import torch 18 | torch.manual_seed(seed) 19 | torch.cuda.manual_seed(seed) 20 | print(colorize(f"using seed {seed}", "green")) 21 | 22 | 23 | def get_seed(): 24 | return seed_ 25 | 26 | 27 | def make_seed(): 28 | """ 29 | Returns a random number between [0, 10000], using timing jitter. 30 | 31 | This has a white noise spectrum and gives unique values for multiple 32 | simultaneous processes...some simpler attempts did not achieve that, but 33 | there's probably a better way. 34 | """ 35 | d = 10000 36 | t = time.time() 37 | sub1 = int(t * d) % d 38 | sub2 = int(t * d ** 2) % d 39 | s = 1e-3 40 | s_inv = 1. / s 41 | time.sleep(s * sub2 / d) 42 | t2 = time.time() 43 | t2 = t2 - int(t2) 44 | t2 = int(t2 * d * s_inv) % d 45 | time.sleep(s * sub1 / d) 46 | t3 = time.time() 47 | t3 = t3 - int(t3) 48 | t3 = int(t3 * d * s_inv * 10) % d 49 | return (t3 - t2) % d 50 | -------------------------------------------------------------------------------- /scratch/README.txt: -------------------------------------------------------------------------------- 1 | Write local tests / dev in this directory (in .gitignore). -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from setuptools import find_packages 3 | 4 | setup( 5 | name='rlpyt', 6 | version='0.1.0dev', 7 | packages=find_packages(), 8 | license='MIT License', 9 | long_description=open('README.md').read(), 10 | ) 11 | --------------------------------------------------------------------------------