├── LICENSE ├── README.md ├── hw1 ├── BehavioralCloning.png ├── DAgger.png ├── README.md ├── demo.bash ├── experts │ ├── Ant-v2.pkl │ ├── HalfCheetah-v2.pkl │ ├── Hopper-v2.pkl │ ├── Humanoid-v2.pkl │ ├── Reacher-v2.pkl │ └── Walker2d-v2.pkl ├── load_policy.py ├── main.py ├── model.py ├── plot.py ├── report.md ├── requirements.txt ├── run_expert.py └── train.py ├── hw2 ├── README.md ├── data │ ├── hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── lb_rtg_na_CartPole-v0_20-09-2018_09-26-24 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ ├── sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22 │ │ ├── 1 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ ├── 11 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ │ └── 21 │ │ │ ├── hyperparams.json │ │ │ ├── log.txt │ │ │ └── model.pkl │ └── sb_rtg_na_CartPole-v0_20-09-2018_09-08-07 │ │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── hw2_instructions.pdf ├── logz.py ├── lunar_lander.py ├── plot.py ├── report.md ├── requirements.txt ├── result │ ├── result_CartPole_lb.png │ ├── result_CartPole_sb.png │ ├── result_HalfCheetah_1.png │ ├── result_HalfCheetah_2.png │ ├── result_InvertedPendulum.png │ └── result_LunarLander.png └── train_pg_f18.py └── hw3 ├── README.md ├── atari_wrappers.py ├── data ├── ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl ├── dqn_Asterix_double_dqn_AsterixNoFrameskip-v4_04-10-2018_03-16-57 │ └── 2333 │ │ ├── hyperparams.json │ │ └── log.txt ├── dqn_Asterix_double_dqn_singlebias_AsterixNoFrameskip-v4_08-10-2018_09-14-56 │ └── 2333 │ │ ├── hyperparams.json │ │ └── log.txt ├── dqn_Asterix_vanilla_dqn_AsterixNoFrameskip-v4_04-10-2018_03-16-01 │ └── 2333 │ │ ├── hyperparams.json │ │ └── log.txt ├── dqn_Bowling_double_dqn_BowlingNoFrameskip-v4_04-10-2018_15-13-45 │ └── 2333 │ │ ├── hyperparams.json │ │ └── log.txt ├── dqn_Bowling_double_dqn_singlebias_BowlingNoFrameskip-v4_10-10-2018_16-14-12 │ └── 2333 │ │ ├── hyperparams.json │ │ └── log.txt ├── dqn_Bowling_vanilla_dqn_BowlingNoFrameskip-v4_04-10-2018_15-14-21 │ └── 2333 │ │ ├── hyperparams.json │ │ └── log.txt ├── dqn_KungFuMaster_double_dqn_KungFuMasterNoFrameskip-v4_04-10-2018_11-14-51 │ └── 2333 │ │ ├── hyperparams.json │ │ └── log.txt ├── dqn_KungFuMaster_double_dqn_singlebias_KungFuMasterNoFrameskip-v4_10-10-2018_16-14-59 │ └── 2333 │ │ ├── hyperparams.json │ │ └── log.txt ├── dqn_KungFuMaster_vanilla_dqn_KungFuMasterNoFrameskip-v4_04-10-2018_11-15-11 │ └── 2333 │ │ ├── hyperparams.json │ │ └── log.txt ├── dqn_Pong_double_dqn_PongNoFrameskip-v4_04-10-2018_23-10-47 │ └── 2333 │ │ ├── hyperparams.json │ │ └── log.txt ├── dqn_Pong_double_dqn_singlebias_PongNoFrameskip-v4_08-10-2018_09-12-30 │ └── 2333 │ │ ├── hyperparams.json │ │ └── log.txt ├── dqn_Pong_vanilla_dqn_PongNoFrameskip-v4_04-10-2018_23-11-56 │ └── 2333 │ │ ├── hyperparams.json │ │ └── log.txt ├── hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04 │ ├── 1 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ ├── 11 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl │ └── 21 │ │ ├── hyperparams.json │ │ ├── log.txt │ │ └── model.pkl └── ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46 │ ├── 1 │ ├── hyperparams.json │ ├── log.txt │ └── model.pkl │ ├── 11 │ ├── hyperparams.json │ ├── log.txt │ └── model.pkl │ └── 21 │ ├── hyperparams.json │ ├── log.txt │ └── model.pkl ├── dqn.py ├── dqn_utils.py ├── logz.py ├── lunar_lander.py ├── plot.py ├── report.md ├── requirements.txt ├── result ├── result_Asterix.png ├── result_Bowling.png ├── result_KungFuMaster.png ├── result_Pong.png ├── result_cp.png ├── result_hc_1.png ├── result_hc_2.png ├── result_hc_3.png ├── result_hc_4.png ├── result_hc_4_old.png ├── result_ip_1.png ├── result_ip_2.png ├── result_ip_3.png └── result_ip_4.png ├── run_dqn_atari.py ├── run_dqn_lander.py ├── run_dqn_ram.py └── train_ac_f18.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 KuNya 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Berkeley DeepRLcourse Homework Solutions in Pytorch 2 | 3 | Solutions for [CS294-112 Fall2018 assignments in Pytorch](https://github.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch) 4 | -------------------------------------------------------------------------------- /hw1/BehavioralCloning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/BehavioralCloning.png -------------------------------------------------------------------------------- /hw1/DAgger.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/DAgger.png -------------------------------------------------------------------------------- /hw1/README.md: -------------------------------------------------------------------------------- 1 | #### This folder contains the code and report for HW1. 2 | 3 | 4 | 5 | ## Code 6 | 7 | The function for different files: 8 | 9 | - ###### main.py 10 | 11 | Hyperparameters are stored here, you can modify them for different experiment. 12 | 13 | - ###### train.py 14 | 15 | Behavioral Cloning and DAgger algorithm are impliemented here. 16 | 17 | - ###### model.py 18 | 19 | The code used for defining neural network. 20 | 21 | - ###### plot.py 22 | 23 | The code used for generating figure in report. 24 | 25 | ​ 26 | 27 | 28 | 29 | If you want to run the experiment, use the command below 30 | 31 | ```shell 32 | python main.py 33 | ``` 34 | 35 | 36 | 37 | ## Report 38 | 39 | Please see [report.md](./report.md) -------------------------------------------------------------------------------- /hw1/demo.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -eux 3 | for e in Hopper-v2 Ant-v2 HalfCheetah-v2 Humanoid-v2 Reacher-v2 Walker2d-v2 4 | do 5 | python run_expert.py experts/$e.pkl $e --render --num_rollouts=1 6 | done 7 | -------------------------------------------------------------------------------- /hw1/experts/Ant-v2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/experts/Ant-v2.pkl -------------------------------------------------------------------------------- /hw1/experts/HalfCheetah-v2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/experts/HalfCheetah-v2.pkl -------------------------------------------------------------------------------- /hw1/experts/Hopper-v2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/experts/Hopper-v2.pkl -------------------------------------------------------------------------------- /hw1/experts/Humanoid-v2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/experts/Humanoid-v2.pkl -------------------------------------------------------------------------------- /hw1/experts/Reacher-v2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/experts/Reacher-v2.pkl -------------------------------------------------------------------------------- /hw1/experts/Walker2d-v2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/experts/Walker2d-v2.pkl -------------------------------------------------------------------------------- /hw1/load_policy.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | from functools import reduce 4 | 5 | 6 | def load_policy(filename): 7 | def read_layer(l): 8 | assert list(l.keys()) == ['AffineLayer'] 9 | assert sorted(l['AffineLayer'].keys()) == ['W', 'b'] 10 | W, b = l['AffineLayer']['W'].astype(np.float32), l['AffineLayer']['b'].astype(np.float32) 11 | return lambda x: np.matmul(x, W) + b 12 | 13 | def build_nonlin_fn(nonlin_type): 14 | if nonlin_type == 'lrelu': 15 | leak = 0.01 # openai/imitation nn.py:233 16 | return lambda x: 0.5 * (1 + leak) * x + 0.5 * (1 - leak) * np.abs(x) 17 | elif nonlin_type == 'tanh': 18 | return lambda x: np.tanh(x) 19 | else: 20 | raise NotImplementedError(nonlin_type) 21 | 22 | with open(filename, 'rb') as f: 23 | data = pickle.loads(f.read()) 24 | 25 | # assert len(data.keys()) == 2 26 | nonlin_type = data['nonlin_type'] 27 | nonlin_fn = build_nonlin_fn(nonlin_type) 28 | policy_type = [k for k in data.keys() if k != 'nonlin_type'][0] 29 | 30 | assert policy_type == 'GaussianPolicy', 'Policy type {} not supported'.format(policy_type) 31 | policy_params = data[policy_type] 32 | 33 | assert set(policy_params.keys()) == {'logstdevs_1_Da', 'hidden', 'obsnorm', 'out'} 34 | 35 | # Build observation normalization layer 36 | assert list(policy_params['obsnorm'].keys()) == ['Standardizer'] 37 | obsnorm_mean = policy_params['obsnorm']['Standardizer']['mean_1_D'] 38 | obsnorm_meansq = policy_params['obsnorm']['Standardizer']['meansq_1_D'] 39 | obsnorm_stdev = np.sqrt(np.maximum(0, obsnorm_meansq - np.square(obsnorm_mean))) 40 | #print('obs', obsnorm_mean.shape, obsnorm_stdev.shape) 41 | 42 | 43 | # Build hidden layers 44 | assert list(policy_params['hidden'].keys()) == ['FeedforwardNet'] 45 | layer_params = policy_params['hidden']['FeedforwardNet'] 46 | layers = [] 47 | for layer_name in sorted(layer_params.keys()): 48 | l = layer_params[layer_name] 49 | fc_layer = read_layer(l) 50 | layers += [fc_layer, nonlin_fn] 51 | 52 | # Build output layer 53 | fc_layer = read_layer(policy_params['out']) 54 | layers += [fc_layer] 55 | layers_forward = lambda inp: reduce(lambda x, fn: fn(x), [inp] + layers) 56 | 57 | 58 | def forward_pass(obs): 59 | ''' Build the forward pass for policy net. 60 | 61 | Input: batched observation. (shape: [batch_size, obs_dim]) 62 | 63 | Output: batched action. (shape: [batch_size, action_dim]) 64 | ''' 65 | obs = obs.astype(np.float32) 66 | normed_obs = (obs - obsnorm_mean) / (obsnorm_stdev + 1e-6) # 1e-6 constant from Standardizer class in nn.py:409 in openai/imitation 67 | output = layers_forward(normed_obs.astype(np.float32)) 68 | 69 | return output 70 | 71 | return forward_pass 72 | -------------------------------------------------------------------------------- /hw1/main.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import torch 3 | from load_policy import load_policy 4 | from model import Agent 5 | from train import BehavioralCloning, DAgger, Eval 6 | 7 | class Config(): 8 | seed = 3 9 | envname = 'Humanoid-v2' 10 | env = gym.make(envname) 11 | method = 'DA' # BC: Behavioral Cloning DA: DAgger 12 | device = torch.device('cuda') 13 | expert_path = './experts/' 14 | model_save_path = './models/' 15 | n_expert_rollouts = 30 # number of rollouts from expert 16 | n_dagger_rollouts = 10 # number of new rollouts from learned model for a DAgger iteration 17 | n_dagger_iter = 10 # number of DAgger iterations 18 | n_eval_rollouts = 10 # number of rollouts for evaluating a policy 19 | L2 = 0.00001 20 | lr = 0.0001 21 | epochs = 20 22 | batch_size = 64 23 | 24 | eval_steps = 500 25 | 26 | 27 | 28 | def main(): 29 | config = Config() 30 | print('*' * 20, config.envname, config.method, '*' * 20) 31 | env = config.env 32 | if config.seed: 33 | env.seed(config.seed) 34 | torch.manual_seed(config.seed) 35 | agent = Agent(env.observation_space.shape[0], env.action_space.shape[0]).to(config.device) 36 | expert = load_policy(config.expert_path + config.envname + '.pkl') 37 | method = config.method 38 | 39 | if method == 'BC': 40 | agent = BehavioralCloning(config, agent, expert) 41 | elif method == 'DA': 42 | agent = DAgger(config, agent, expert) 43 | else: 44 | NotImplementedError(method) 45 | 46 | 47 | avrg_mean, avrg_std = Eval(config, expert) 48 | print('[expert] avrg_mean:{:.2f} avrg_std:{:.2f}'.format(avrg_mean, avrg_std)) 49 | 50 | avrg_mean, avrg_std = Eval(config, agent) 51 | print('[agent] avrg_mean:{:.2f} avrg_std:{:.2f}'.format(avrg_mean, avrg_std)) 52 | 53 | if __name__ == '__main__': 54 | main() 55 | 56 | -------------------------------------------------------------------------------- /hw1/model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class Agent(nn.Module): 5 | def __init__(self, in_dim, out_dim): 6 | super(Agent, self).__init__() 7 | 8 | self.mlp = nn.Sequential( 9 | nn.Linear(in_dim, 64), 10 | nn.ReLU(True), 11 | nn.Linear(64, 64), 12 | nn.ReLU(True), 13 | nn.Linear(64, out_dim), 14 | ) 15 | 16 | def forward(self, obs): 17 | return self.mlp(obs) 18 | -------------------------------------------------------------------------------- /hw1/plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import seaborn as sns 4 | 5 | 6 | # plot for BehavioralCloning 7 | plt.figure(figsize=(12, 8)) 8 | 9 | n_rollouts = [10, 20, 30, 40, 50] 10 | x_length = len(n_rollouts) 11 | r_means = [3398.92, 4347.00, 4844.75, 4789.42, 4802.01] 12 | r_stds = [1178.80, 1216.37, 105.57, 130.72, 151.23] 13 | expert_mean = [4851.54] * x_length 14 | expert_std = [134.09] * x_length 15 | 16 | plt.errorbar(n_rollouts, r_means, yerr=r_stds, marker='o', capsize=8, linestyle='--', label='Behavioral Cloning') 17 | plt.errorbar(n_rollouts, expert_mean, yerr=expert_std, marker='o', capsize=8, linestyle='--', label='Expert') 18 | plt.xlabel('Number of Expert Rollouts', fontsize=18) 19 | plt.ylabel('Average Return', fontsize=18) 20 | plt.xlim([5, 55]) 21 | plt.ylim([2000, 5600]) 22 | plt.legend(loc='lower right', fontsize=16) 23 | plt.savefig('./BehavioralCloning.png', format='png') 24 | #plt.show() 25 | 26 | # plot for DAgge 27 | plt.figure(figsize=(12, 8)) 28 | 29 | n_iters = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 30 | x_length = len(n_iters) 31 | r_means = [544.86, 629.28, 1081.33, 1087.13, 1909.09, 4150.72, 6840.35, 7865.56, 9568.10, 8978.60] 32 | r_stds = [129.61, 200.03, 819.98, 423.87, 1193.86, 2163.90, 3266.94, 3581.33, 2205.40, 2832.79] 33 | expert_mean = [10438.35] * x_length 34 | expert_std = [39.50] * x_length 35 | BC_mean = [954.69] * x_length 36 | BC_std = [490.10] * x_length 37 | 38 | plt.errorbar(n_iters, r_means, yerr=r_stds, marker='o', capsize=8, linestyle='--', label='DAgger') 39 | plt.errorbar(n_iters, expert_mean, yerr=expert_std, marker='o', capsize=8, linestyle='--', label='Expert') 40 | plt.errorbar(n_iters, BC_mean, yerr=BC_std, marker='o', capsize=8, linestyle='--', label='Behavioral Cloning') 41 | plt.xlabel('Number of DAgger Iterations', fontsize=18) 42 | plt.ylabel('Average Return', fontsize=18) 43 | plt.legend(fontsize=16) 44 | plt.savefig('./DAgger.png', format='png') 45 | #plt.show() 46 | 47 | 48 | -------------------------------------------------------------------------------- /hw1/report.md: -------------------------------------------------------------------------------- 1 | # CS294-112 HW 1: Imitation Learning 2 | 3 | In all of experiments below, we use the same network architecture as the expert. 4 | 5 | ## Problem 2.2 6 | 7 | At the beginning, we sample 50 rollouts from the expert, then we let our model do Behavioral Cloning on these data. 8 | 9 | In the learning process, we do 50 epochs learning with Adam optimizer (batch size = 64, lr = 0.0001, weight_decay = 0.00001). 10 | 11 | Finally, we pick the model that perform best in the training process to be our final model. 12 | 13 | To evaluate a model, we run model 10 times in the environment, and calculate the mean and std for total rewards. 14 | 15 | The performance of final model are reported as below. 16 | 17 | | Task name | learned model | expert | 18 | | :------------: | :---------------: | :--------------: | 19 | | Ant-v2 | 4802.01 ± 151.23 | 4851.54 ± 134.09 | 20 | | HalfCheetah-v2 | 4085.17 ± 73.67 | 4095.67 ± 122.68 | 21 | | Hopper-v2 | 1368.64 ± 215.29 | 3779.60 ± 4.13 | 22 | | Humanoid-v2 | 954.69 ± 490.10 | 10438.35 ± 39.50 | 23 | | Reacher-v2 | -10.12 ± 3.23 | -4.03 ± 2.37 | 24 | | Walker2d-v2 | 3305.00 ± 1514.62 | 5515.11 ± 42.41 | 25 | 26 | 27 | 28 | ## Problem 2.3 29 | 30 | In this part, we only change the nunber of total expert rollouts for Behavioral Cloning algorithm. 31 | 32 | For other hyperparameters, we use the same setup as above experiments. 33 | 34 | The performance of final learned model with different expert rollouts are reported as below. 35 | 36 | ![BehavioralCloning](./BehavioralCloning.png) 37 | 38 | ## Problem 3.2 39 | 40 | Before the DAgger interactions, we first construct our dataset by 30 rollouts from the expert. 41 | 42 | After that, we do 20 epochs learning to fit the dataset. (use the same hyperparameters as above) 43 | 44 | Then, we run our model in the environment, and get 10 rollouts with expert`s gold actions. 45 | 46 | Finally, we add these new rollouts to our dataset, and repeat above process. 47 | 48 | The performance of learned model in the different iteration are reported as below. 49 | 50 | ![DAgger](./DAgger.png) -------------------------------------------------------------------------------- /hw1/requirements.txt: -------------------------------------------------------------------------------- 1 | gym==0.10.5 2 | mujoco-py==1.50.1.56 3 | numpy 4 | seaborn 5 | -------------------------------------------------------------------------------- /hw1/run_expert.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Code to load an expert policy and generate roll-out data for behavioral cloning. 4 | Example usage: 5 | python run_expert.py experts/Humanoid-v1.pkl Humanoid-v1 --render \ 6 | --num_rollouts 20 7 | 8 | Modified from the script written by Jonathan Ho (hoj@openai.com) 9 | """ 10 | 11 | import os 12 | import argparse 13 | import pickle 14 | import numpy as np 15 | import gym 16 | import load_policy 17 | 18 | def main(): 19 | parser = argparse.ArgumentParser() 20 | parser.add_argument('expert_policy_file', type=str) 21 | parser.add_argument('envname', type=str) 22 | parser.add_argument('--render', action='store_true') 23 | parser.add_argument("--max_timesteps", type=int) 24 | parser.add_argument('--num_rollouts', type=int, default=20, 25 | help='Number of expert roll outs') 26 | args = parser.parse_args() 27 | 28 | print('loading and building expert policy') 29 | policy_net = load_policy.load_policy(args.expert_policy_file) 30 | print('loaded and built') 31 | 32 | env = gym.make(args.envname) 33 | max_steps = args.max_timesteps or env.spec.timestep_limit 34 | 35 | returns = [] 36 | observations = [] 37 | actions = [] 38 | for i in range(args.num_rollouts): 39 | print('iter', i) 40 | obs = env.reset() 41 | done = False 42 | totalr = 0. 43 | steps = 0 44 | while not done: 45 | action = policy_net(obs[None, :]) 46 | observations.append(obs) 47 | actions.append(action) 48 | obs, r, done, _ = env.step(action) 49 | totalr += r 50 | steps += 1 51 | if args.render: 52 | env.render() 53 | if steps % 100 == 0: print("%i/%i"%(steps, max_steps)) 54 | if steps >= max_steps: 55 | break 56 | returns.append(totalr) 57 | 58 | print('returns', returns) 59 | print('mean return', np.mean(returns)) 60 | print('std of return', np.std(returns)) 61 | 62 | expert_data = {'observations': np.array(observations), 63 | 'actions': np.array(actions)} 64 | 65 | if not os.path.exists('expert_data'): 66 | os.makedirs('expert_data') 67 | 68 | with open(os.path.join('expert_data', args.envname + '.pkl'), 'wb') as f: 69 | pickle.dump(expert_data, f, pickle.HIGHEST_PROTOCOL) 70 | 71 | if __name__ == '__main__': 72 | main() 73 | -------------------------------------------------------------------------------- /hw1/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | from torch import optim, nn 5 | from torch.utils.data import TensorDataset, ConcatDataset, DataLoader 6 | 7 | def agent_wapper(config, agent): 8 | def fn(obs): 9 | with torch.no_grad(): 10 | obs = obs.astype(np.float32) 11 | assert len(obs.shape) == 2 12 | obs = torch.from_numpy(obs).to(config.device) 13 | action = agent(obs) 14 | return action.cpu().numpy() 15 | return fn 16 | 17 | def fit_dataset(config, agent, dataset, n_epochs): 18 | optimizer = optim.Adam(agent.parameters(), lr=config.lr, weight_decay=config.L2) 19 | loss_fn = nn.MSELoss() 20 | dataloader = DataLoader(dataset, batch_size=config.batch_size, shuffle=True) 21 | 22 | step = 0 23 | best_reward = None 24 | loss_his = [] 25 | 26 | for k in range(n_epochs): 27 | for batch in dataloader: 28 | obs, gold_actions = batch 29 | pred_actions = agent(obs) 30 | loss = loss_fn(pred_actions, gold_actions) 31 | 32 | optimizer.zero_grad() 33 | loss.backward() 34 | optimizer.step() 35 | 36 | loss_his.append(loss.item()) 37 | 38 | if step % config.eval_steps == 0: 39 | avrg_mean, avrg_std = Eval(config, agent_wapper(config, agent)) 40 | avrg_loss = np.mean(loss_his) 41 | loss_his = [] 42 | print('[epoch {} step {}] loss: {:.4f} r_mean: {:.2f} r_std: {:.2f}'.format( 43 | k + 1, step, avrg_loss, avrg_mean, avrg_std)) 44 | 45 | avrg_reward = avrg_mean - avrg_std 46 | if best_reward is None or best_reward < avrg_reward: 47 | best_reward = avrg_reward 48 | save_model(config, agent, config.model_save_path) 49 | 50 | step += 1 51 | 52 | load_model(config, agent, config.model_save_path) 53 | 54 | def BehavioralCloning(config, agent, expert): 55 | 56 | # get expert demonstration 57 | expert_obs, expert_actions, *_ = run_agent(config, expert, config.n_expert_rollouts) 58 | expert_obs = torch.from_numpy(expert_obs).to(config.device) 59 | expert_actions = torch.from_numpy(expert_actions).to(config.device) 60 | dataset = TensorDataset(expert_obs, expert_actions) 61 | 62 | # training agent 63 | fit_dataset(config, agent, dataset, config.epochs) 64 | 65 | return agent_wapper(config, agent) 66 | 67 | def DAgger(config, agent, expert): 68 | # get expert demonstration 69 | expert_obs, expert_actions, *_ = run_agent(config, expert, config.n_expert_rollouts) 70 | expert_obs = torch.from_numpy(expert_obs).to(config.device) 71 | expert_actions = torch.from_numpy(expert_actions).to(config.device) 72 | dataset = TensorDataset(expert_obs, expert_actions) 73 | 74 | for k in range(config.n_dagger_iter): 75 | # training agent 76 | fit_dataset(config, agent, dataset, config.epochs) 77 | 78 | # run agent to get new on-policy observations 79 | new_obs, *_ = run_agent(config, agent_wapper(config, agent), config.n_dagger_rollouts) 80 | expert_actions = expert(new_obs) 81 | 82 | new_obs = torch.from_numpy(new_obs).to(config.device) 83 | expert_actions = torch.from_numpy(expert_actions).to(config.device) 84 | new_data = TensorDataset(new_obs, expert_actions) 85 | 86 | # add new data to dataset 87 | dataset = ConcatDataset([dataset, new_data]) 88 | 89 | 90 | avrg_mean, avrg_std = Eval(config, agent_wapper(config, agent)) 91 | print('[DAgger iter {}] r_mean: {:.2f} r_std: {:.2f}'.format(k + 1, avrg_mean, avrg_std)) 92 | 93 | 94 | return agent_wapper(config, agent) 95 | 96 | def run_agent(config, agent, num_rollouts): 97 | env = config.env 98 | max_steps = env.spec.timestep_limit 99 | 100 | returns = [] 101 | observations = [] 102 | actions = [] 103 | for _ in range(num_rollouts): 104 | obs = env.reset() 105 | done = False 106 | totalr = 0 107 | steps = 0 108 | while not done: 109 | action = agent(obs[None, :]) 110 | action = action.reshape(-1) 111 | observations.append(obs) 112 | actions.append(action) 113 | obs, r, done, _ = env.step(action) 114 | totalr += r 115 | steps += 1 116 | if steps >= max_steps: 117 | break 118 | returns.append(totalr) 119 | 120 | avrg_mean, avrg_std = np.mean(returns), np.std(returns) 121 | observations = np.array(observations).astype(np.float32) 122 | actions = np.array(actions).astype(np.float32) 123 | 124 | return observations, actions, avrg_mean, avrg_std 125 | 126 | def Eval(config, agent): 127 | *_, avrg_mean, avrg_std = run_agent(config, agent, config.n_eval_rollouts) 128 | 129 | return avrg_mean, avrg_std 130 | 131 | 132 | def save_model(config, model, PATH): 133 | if not os.path.exists(PATH): 134 | os.makedirs(PATH) 135 | PATH = PATH + config.envname + '-' + 'parameters.tar' 136 | torch.save(model.state_dict(), PATH) 137 | print('model saved.') 138 | 139 | def load_model(config, model, PATH): 140 | PATH = PATH + config.envname + '-' + 'parameters.tar' 141 | model.load_state_dict(torch.load(PATH)) 142 | print('model loaded.') 143 | -------------------------------------------------------------------------------- /hw2/README.md: -------------------------------------------------------------------------------- 1 | #### This folder contains the code, experiments result and report for HW2. 2 | 3 | 4 | 5 | ## Code 6 | 7 | - ###### train_pg_f18.py 8 | 9 | The entire model for different experiments. 10 | 11 | - ###### logz.py 12 | 13 | The code used for logging. 14 | 15 | - ###### plot.py 16 | 17 | The code used for generating figure in report. 18 | 19 | ​ 20 | 21 | ## Folder 22 | 23 | - ###### data 24 | 25 | Contains logging files generated by logz.py 26 | 27 | - ###### result 28 | 29 | Contains learning curves for different experiments. 30 | 31 | 32 | 33 | ## Report 34 | 35 | Please see [report.md](./report.md) -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b10000_r0.005", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 10000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b10000_r0.005", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 10000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b10000_r0.005", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 10000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b10000_r0.01", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 10000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b10000_r0.01", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 10000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b10000_r0.01", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 10000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b10000_r0.02", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 10000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b10000_r0.02", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 10000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b10000_r0.02", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 10000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "InvertedPendulum-v2", 3 | "exp_name" : "hc_b10000_r0.05", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.05, 6 | "logdir" : "data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/1", 7 | "max_path_length" : 1000.0, 8 | "min_timesteps_per_batch" : 10000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "InvertedPendulum-v2", 3 | "exp_name" : "hc_b10000_r0.05", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.05, 6 | "logdir" : "data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/11", 7 | "max_path_length" : 1000.0, 8 | "min_timesteps_per_batch" : 10000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "InvertedPendulum-v2", 3 | "exp_name" : "hc_b10000_r0.05", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.05, 6 | "logdir" : "data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/21", 7 | "max_path_length" : 1000.0, 8 | "min_timesteps_per_batch" : 10000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.005", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.005", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.005", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.01", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.01", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.01", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02_no_rtg_bl", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-27-06/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : false, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02_no_rtg_bl", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-27-06/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : false, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02_no_rtg_bl", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-27-06/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : false, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02_no_rtg_no_bl", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.002, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-26-08/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : false, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02_no_rtg_no_bl", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.002, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-26-08/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : false, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02_no_rtg_no_bl", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.002, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-26-08/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : false, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02_rtg_bl", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-03-08/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02_rtg_bl", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-03-08/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02_rtg_bl", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-03-08/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02_rtg_no_bl", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-26-44/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02_rtg_no_bl", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-26-44/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b30000_r0.02_rtg_no_bl", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-26-44/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "InvertedPendulum-v2", 3 | "exp_name" : "hc_b300_r0.01", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/1", 7 | "max_path_length" : 1000.0, 8 | "min_timesteps_per_batch" : 300, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "InvertedPendulum-v2", 3 | "exp_name" : "hc_b300_r0.01", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/11", 7 | "max_path_length" : 1000.0, 8 | "min_timesteps_per_batch" : 300, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "InvertedPendulum-v2", 3 | "exp_name" : "hc_b300_r0.01", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/21", 7 | "max_path_length" : 1000.0, 8 | "min_timesteps_per_batch" : 300, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b50000_r0.005", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 50000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b50000_r0.005", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 50000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b50000_r0.005", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 50000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b50000_r0.01", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 50000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b50000_r0.01", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 50000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b50000_r0.01", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 50000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b50000_r0.02", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 50000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b50000_r0.02", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 50000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_b50000_r0.02", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 50000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "lb_no_rtg_dna", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/1", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 5000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : false, 13 | "reward_to_go" : false, 14 | "seed" : 1, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "lb_no_rtg_dna", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/11", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 5000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : false, 13 | "reward_to_go" : false, 14 | "seed" : 11, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "lb_no_rtg_dna", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/21", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 5000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : false, 13 | "reward_to_go" : false, 14 | "seed" : 21, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "lb_rtg_dna", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/1", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 5000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : false, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "lb_rtg_dna", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/11", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 5000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : false, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "lb_rtg_dna", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/21", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 5000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : false, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "lb_rtg_na", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/1", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 5000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "lb_rtg_na", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/11", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 5000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "lb_rtg_na", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/21", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 5000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "LunarLanderContinuous-v2", 3 | "exp_name" : "ll_b40000_r0.005", 4 | "gamma" : 0.99, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/1", 7 | "max_path_length" : 1000.0, 8 | "min_timesteps_per_batch" : 40000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "LunarLanderContinuous-v2", 3 | "exp_name" : "ll_b40000_r0.005", 4 | "gamma" : 0.99, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/11", 7 | "max_path_length" : 1000.0, 8 | "min_timesteps_per_batch" : 40000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "LunarLanderContinuous-v2", 3 | "exp_name" : "ll_b40000_r0.005", 4 | "gamma" : 0.99, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/21", 7 | "max_path_length" : 1000.0, 8 | "min_timesteps_per_batch" : 40000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "sb_no_rtg_dna", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/1", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 1000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : false, 13 | "reward_to_go" : false, 14 | "seed" : 1, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "sb_no_rtg_dna", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/11", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 1000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : false, 13 | "reward_to_go" : false, 14 | "seed" : 11, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "sb_no_rtg_dna", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/21", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 1000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : false, 13 | "reward_to_go" : false, 14 | "seed" : 21, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "sb_rtg_dna", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/1", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 1000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : false, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "sb_rtg_dna", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/11", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 1000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : false, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "sb_rtg_dna", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/21", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 1000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : false, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/21/model.pkl -------------------------------------------------------------------------------- /hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "sb_rtg_na", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/1", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 1000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/1/model.pkl -------------------------------------------------------------------------------- /hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "sb_rtg_na", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/11", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 1000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/11/model.pkl -------------------------------------------------------------------------------- /hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "CartPole-v0", 3 | "exp_name" : "sb_rtg_na", 4 | "gamma" : 1.0, 5 | "learning_rate" : 0.005, 6 | "logdir" : "data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/21", 7 | "max_path_length" : null, 8 | "min_timesteps_per_batch" : 1000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : false, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/21/model.pkl -------------------------------------------------------------------------------- /hw2/hw2_instructions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/hw2_instructions.pdf -------------------------------------------------------------------------------- /hw2/logz.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | """ 4 | 5 | Some simple logging functionality, inspired by rllab's logging. 6 | Assumes that each diagnostic gets logged each iteration 7 | 8 | Call logz.configure_output_dir() to start logging to a 9 | tab-separated-values file (some_folder_name/log.txt) 10 | 11 | To load the learning curves, you can do, for example 12 | 13 | A = np.genfromtxt('/tmp/expt_1468984536/log.txt',delimiter='\t',dtype=None, names=True) 14 | A['EpRewMean'] 15 | 16 | """ 17 | 18 | import os.path as osp, shutil, time, atexit, os, subprocess 19 | import pickle 20 | import torch 21 | 22 | color2num = dict( 23 | gray=30, 24 | red=31, 25 | green=32, 26 | yellow=33, 27 | blue=34, 28 | magenta=35, 29 | cyan=36, 30 | white=37, 31 | crimson=38 32 | ) 33 | 34 | def colorize(string, color, bold=False, highlight=False): 35 | attr = [] 36 | num = color2num[color] 37 | if highlight: num += 10 38 | attr.append(str(num)) 39 | if bold: attr.append('1') 40 | return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string) 41 | 42 | class G: 43 | output_dir = None 44 | output_file = None 45 | first_row = True 46 | log_headers = [] 47 | log_current_row = {} 48 | 49 | def configure_output_dir(d=None): 50 | """ 51 | Set output directory to d, or to /tmp/somerandomnumber if d is None 52 | """ 53 | G.output_dir = d or "/tmp/experiments/%i"%int(time.time()) 54 | assert not osp.exists(G.output_dir), "Log dir %s already exists! Delete it first or use a different dir"%G.output_dir 55 | os.makedirs(G.output_dir) 56 | G.output_file = open(osp.join(G.output_dir, "log.txt"), 'w') 57 | atexit.register(G.output_file.close) 58 | print(colorize("Logging data to %s"%G.output_file.name, 'green', bold=True)) 59 | 60 | def log_tabular(key, val): 61 | """ 62 | Log a value of some diagnostic 63 | Call this once for each diagnostic quantity, each iteration 64 | """ 65 | if G.first_row: 66 | G.log_headers.append(key) 67 | else: 68 | assert key in G.log_headers, "Trying to introduce a new key %s that you didn't include in the first iteration"%key 69 | assert key not in G.log_current_row, "You already set %s this iteration. Maybe you forgot to call dump_tabular()"%key 70 | G.log_current_row[key] = val 71 | 72 | def save_hyperparams(params): 73 | with open(osp.join(G.output_dir, "hyperparams.json"), 'w') as out: 74 | out.write(json.dumps(params, separators=(',\n','\t:\t'), sort_keys=True)) 75 | 76 | def save_pytorch_model(model): 77 | """ 78 | Saves the entire pytorch Module 79 | """ 80 | torch.save(model, osp.join(G.output_dir, "model.pkl")) 81 | 82 | 83 | def dump_tabular(): 84 | """ 85 | Write all of the diagnostics from the current iteration 86 | """ 87 | vals = [] 88 | key_lens = [len(key) for key in G.log_headers] 89 | max_key_len = max(15,max(key_lens)) 90 | keystr = '%'+'%d'%max_key_len 91 | fmt = "| " + keystr + "s | %15s |" 92 | n_slashes = 22 + max_key_len 93 | print("-"*n_slashes) 94 | for key in G.log_headers: 95 | val = G.log_current_row.get(key, "") 96 | if hasattr(val, "__float__"): valstr = "%8.3g"%val 97 | else: valstr = val 98 | print(fmt%(key, valstr)) 99 | vals.append(val) 100 | print("-"*n_slashes) 101 | if G.output_file is not None: 102 | if G.first_row: 103 | G.output_file.write("\t".join(G.log_headers)) 104 | G.output_file.write("\n") 105 | G.output_file.write("\t".join(map(str,vals))) 106 | G.output_file.write("\n") 107 | G.output_file.flush() 108 | G.log_current_row.clear() 109 | G.first_row=False 110 | -------------------------------------------------------------------------------- /hw2/plot.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import json 5 | import os 6 | 7 | """ 8 | Using the plotter: 9 | 10 | Call it from the command line, and supply it with logdirs to experiments. 11 | Suppose you ran an experiment with name 'test', and you ran 'test' for 10 12 | random seeds. The runner code stored it in the directory structure 13 | 14 | data 15 | L test_EnvName_DateTime 16 | L 0 17 | L log.txt 18 | L params.json 19 | L 1 20 | L log.txt 21 | L params.json 22 | . 23 | . 24 | . 25 | L 9 26 | L log.txt 27 | L params.json 28 | 29 | To plot learning curves from the experiment, averaged over all random 30 | seeds, call 31 | 32 | python plot.py data/test_EnvName_DateTime --value AverageReturn 33 | 34 | and voila. To see a different statistics, change what you put in for 35 | the keyword --value. You can also enter /multiple/ values, and it will 36 | make all of them in order. 37 | 38 | 39 | Suppose you ran two experiments: 'test1' and 'test2'. In 'test2' you tried 40 | a different set of hyperparameters from 'test1', and now you would like 41 | to compare them -- see their learning curves side-by-side. Just call 42 | 43 | python plot.py data/test1 data/test2 44 | 45 | and it will plot them both! They will be given titles in the legend according 46 | to their exp_name parameters. If you want to use custom legend titles, use 47 | the --legend flag and then provide a title for each logdir. 48 | 49 | """ 50 | 51 | def plot_data(data, value="AverageReturn"): 52 | if isinstance(data, list): 53 | data = pd.concat(data, ignore_index=True) 54 | plt.figure(figsize=(16, 9)) 55 | sns.set(style="darkgrid", font_scale=1.5) 56 | sns.tsplot(data=data, time="Iteration", value=value, unit="Unit", condition="Condition") 57 | plt.legend(loc='best').draggable() 58 | plt.savefig('result.png', bbox_inches='tight') 59 | plt.show() 60 | 61 | 62 | def get_datasets(fpath, condition=None): 63 | unit = 0 64 | datasets = [] 65 | for root, dir, files in os.walk(fpath): 66 | if 'log.txt' in files: 67 | param_path = open(os.path.join(root,'hyperparams.json')) 68 | params = json.load(param_path) 69 | exp_name = params['exp_name'] 70 | 71 | log_path = os.path.join(root,'log.txt') 72 | experiment_data = pd.read_table(log_path) 73 | 74 | experiment_data.insert( 75 | len(experiment_data.columns), 76 | 'Unit', 77 | unit 78 | ) 79 | experiment_data.insert( 80 | len(experiment_data.columns), 81 | 'Condition', 82 | condition or exp_name 83 | ) 84 | 85 | datasets.append(experiment_data) 86 | unit += 1 87 | 88 | return datasets 89 | 90 | 91 | def main(): 92 | import argparse 93 | parser = argparse.ArgumentParser() 94 | parser.add_argument('logdir', nargs='*') 95 | parser.add_argument('--legend', nargs='*') 96 | parser.add_argument('--value', default='AverageReturn', nargs='*') 97 | args = parser.parse_args() 98 | 99 | use_legend = False 100 | if args.legend is not None: 101 | assert len(args.legend) == len(args.logdir), \ 102 | "Must give a legend title for each set of experiments." 103 | use_legend = True 104 | 105 | data = [] 106 | if use_legend: 107 | for logdir, legend_title in zip(args.logdir, args.legend): 108 | data += get_datasets(logdir, legend_title) 109 | else: 110 | for logdir in args.logdir: 111 | data += get_datasets(logdir) 112 | 113 | if isinstance(args.value, list): 114 | values = args.value 115 | else: 116 | values = [args.value] 117 | for value in values: 118 | plot_data(data, value=value) 119 | 120 | if __name__ == "__main__": 121 | main() 122 | -------------------------------------------------------------------------------- /hw2/report.md: -------------------------------------------------------------------------------- 1 | # CS294-112 HW 2: Policy Gradient 2 | 3 | ## Problem 4: CartPole 4 | 5 | Learning curve for small batch experiments: 6 | 7 | ![result_CartPole_sb](./result/result_CartPole_sb.png) 8 | 9 | Learning curve for large batch experiments: 10 | 11 | ![result_CartPole_lb](./result/result_CartPole_lb.png) 12 | 13 | From the result, we can see that the reward-to-go gradient estimator is better than trajectory-centric one, when advantage-centering is not applied. And reward-to-go one have higher stability, learning speed and average performance. 14 | 15 | With the help of advantage centering, reward-to-go gradient estimator became more stable and achieved a better performance in the end. 16 | 17 | In addition, if we compare the result between small batch and large batch, we will find that a larger batch size can stabilize and speed up the learning process, which can also guarantee a better performance in the same time. 18 | 19 | 20 | 21 | ## Problem 5: InvertedPendulum 22 | 23 | Learning curves for the smallest batch size setting(blue one) and largest learning rate setting(orange one): 24 | 25 | ![result_InvertedPendulum](./result/result_InvertedPendulum.png) 26 | 27 | The smallest batch size we found was 300(with learning rate = 0.01), and the largest learning rate we found was 0.05(with batch size = 10000). 28 | 29 | 30 | 31 | ## Problem 7: LunarLander 32 | 33 | Learning curve for the LunarLander: 34 | 35 | ![result_LunarLander](./result/result_LunarLander.png) 36 | 37 | 38 | 39 | ## Problem 8: HalfCheetah 40 | 41 | Learning curves for different batch size and learning rate setting: 42 | 43 | ![result_HalfCheetah_1](./result/result_HalfCheetah_1.png) 44 | 45 | In general, with the increasing of the batch size, agent can get a higher average performance and a relatively more stable learning process. But in some special case (batch size 30000/50000, learning rate 0.02), a samller batch size can achieve a better final performance. From my point of view, I guess that larger batch size will make agent easier to get stuck in the saddle point with a large learning rate setting, which might hurt final performance. However, it can just because of the unstability of the learning algorithm, which make this special case happened occasionally. 46 | 47 | For the learning rate, a larger learning rate can accelerate the learning process, but might also increase the unstability of the learning process. When learning rate is too large, although the learning can be speeded up, the agent will finally converge to a lower performance. 48 | 49 | 50 | 51 | Learning curves for different gradient estimator and baseline setting: 52 | 53 | ![result_HalfCheetah_2](./result/result_HalfCheetah_2.png) 54 | 55 | In this problem, using reward-to-go estimator is the key to learn a good policy. Moreover, the neural network baseline might hurt the performance when a wrong gradient estimator is used. But with the use of the correct gradient estimator, the neural network baseline do help agent to learn a far better policy than before. -------------------------------------------------------------------------------- /hw2/requirements.txt: -------------------------------------------------------------------------------- 1 | mujoco-py==1.50.1.56 2 | gym==0.10.5 3 | tensorflow==1.10.0 4 | numpy==1.14.5 5 | seaborn 6 | Box2D==2.3.2 7 | -------------------------------------------------------------------------------- /hw2/result/result_CartPole_lb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/result/result_CartPole_lb.png -------------------------------------------------------------------------------- /hw2/result/result_CartPole_sb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/result/result_CartPole_sb.png -------------------------------------------------------------------------------- /hw2/result/result_HalfCheetah_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/result/result_HalfCheetah_1.png -------------------------------------------------------------------------------- /hw2/result/result_HalfCheetah_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/result/result_HalfCheetah_2.png -------------------------------------------------------------------------------- /hw2/result/result_InvertedPendulum.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/result/result_InvertedPendulum.png -------------------------------------------------------------------------------- /hw2/result/result_LunarLander.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/result/result_LunarLander.png -------------------------------------------------------------------------------- /hw3/README.md: -------------------------------------------------------------------------------- 1 | #### This folder contains the code, experiments result and report for HW3. 2 | 3 | 4 | 5 | ## Code 6 | 7 | - ###### dqn_utils.py atari_wrappers.py 8 | 9 | The helper functions for DQN experiments. 10 | 11 | - ###### run_dqn_atari.py run_dqn_ram.py run_dqn_lander.py 12 | 13 | The neural network models and hyperparameters for different environment setting. 14 | 15 | - ###### dqn.py 16 | 17 | The main model of the DQN experiments. 18 | 19 | - ###### train_ac_f18.py 20 | 21 | The main model of the Actor-Critic experiments. 22 | 23 | - ###### train_pg_f18.py 24 | 25 | The Policy Gradient model. (copied from HW2) 26 | 27 | - ###### logz.py 28 | 29 | The code used for logging. 30 | 31 | - ###### plot.py 32 | 33 | The code used for generating figure in report. 34 | 35 | ​ 36 | 37 | ## Folder 38 | 39 | - ###### data 40 | 41 | Contains logging files generated by logz.py 42 | 43 | - ###### result 44 | 45 | Contains learning curves for different experiments. 46 | 47 | ## Report 48 | 49 | Please see [report.md](./report.md) -------------------------------------------------------------------------------- /hw3/atari_wrappers.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from collections import deque 4 | import gym 5 | from gym import spaces 6 | 7 | 8 | class NoopResetEnv(gym.Wrapper): 9 | def __init__(self, env=None, noop_max=30): 10 | """Sample initial states by taking random number of no-ops on reset. 11 | No-op is assumed to be action 0. 12 | """ 13 | super(NoopResetEnv, self).__init__(env) 14 | self.noop_max = noop_max 15 | assert env.unwrapped.get_action_meanings()[0] == 'NOOP' 16 | 17 | def _reset(self): 18 | """ Do no-op action for a number of steps in [1, noop_max].""" 19 | self.env.reset() 20 | noops = np.random.randint(1, self.noop_max + 1) 21 | for _ in range(noops): 22 | obs, _, _, _ = self.env.step(0) 23 | return obs 24 | 25 | class FireResetEnv(gym.Wrapper): 26 | def __init__(self, env=None): 27 | """Take action on reset for environments that are fixed until firing.""" 28 | super(FireResetEnv, self).__init__(env) 29 | assert env.unwrapped.get_action_meanings()[1] == 'FIRE' 30 | assert len(env.unwrapped.get_action_meanings()) >= 3 31 | 32 | def _reset(self): 33 | self.env.reset() 34 | obs, _, _, _ = self.env.step(1) 35 | obs, _, _, _ = self.env.step(2) 36 | return obs 37 | 38 | class EpisodicLifeEnv(gym.Wrapper): 39 | def __init__(self, env=None): 40 | """Make end-of-life == end-of-episode, but only reset on true game over. 41 | Done by DeepMind for the DQN and co. since it helps value estimation. 42 | """ 43 | super(EpisodicLifeEnv, self).__init__(env) 44 | self.lives = 0 45 | self.was_real_done = True 46 | self.was_real_reset = False 47 | 48 | def _step(self, action): 49 | obs, reward, done, info = self.env.step(action) 50 | self.was_real_done = done 51 | # check current lives, make loss of life terminal, 52 | # then update lives to handle bonus lives 53 | lives = self.env.unwrapped.ale.lives() 54 | if lives < self.lives and lives > 0: 55 | # for Qbert somtimes we stay in lives == 0 condtion for a few frames 56 | # so its important to keep lives > 0, so that we only reset once 57 | # the environment advertises done. 58 | done = True 59 | self.lives = lives 60 | return obs, reward, done, info 61 | 62 | def _reset(self): 63 | """Reset only when lives are exhausted. 64 | This way all states are still reachable even though lives are episodic, 65 | and the learner need not know about any of this behind-the-scenes. 66 | """ 67 | if self.was_real_done: 68 | obs = self.env.reset() 69 | self.was_real_reset = True 70 | else: 71 | # no-op step to advance from terminal/lost life state 72 | obs, _, _, _ = self.env.step(0) 73 | self.was_real_reset = False 74 | self.lives = self.env.unwrapped.ale.lives() 75 | return obs 76 | 77 | class MaxAndSkipEnv(gym.Wrapper): 78 | def __init__(self, env=None, skip=4): 79 | """Return only every `skip`-th frame""" 80 | super(MaxAndSkipEnv, self).__init__(env) 81 | # most recent raw observations (for max pooling across time steps) 82 | self._obs_buffer = deque(maxlen=2) 83 | self._skip = skip 84 | 85 | def _step(self, action): 86 | total_reward = 0.0 87 | done = None 88 | for _ in range(self._skip): 89 | obs, reward, done, info = self.env.step(action) 90 | self._obs_buffer.append(obs) 91 | total_reward += reward 92 | if done: 93 | break 94 | 95 | max_frame = np.max(np.stack(self._obs_buffer), axis=0) 96 | 97 | return max_frame, total_reward, done, info 98 | 99 | def _reset(self): 100 | """Clear past frame buffer and init. to first obs. from inner env.""" 101 | self._obs_buffer.clear() 102 | obs = self.env.reset() 103 | self._obs_buffer.append(obs) 104 | return obs 105 | 106 | def _process_frame84(frame): 107 | img = np.reshape(frame, [210, 160, 3]).astype(np.float32) 108 | img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :, 2] * 0.114 109 | resized_screen = cv2.resize(img, (84, 110), interpolation=cv2.INTER_LINEAR) 110 | x_t = resized_screen[18:102, :] 111 | x_t = np.reshape(x_t, [84, 84, 1]) 112 | return x_t.astype(np.uint8) 113 | 114 | class ProcessFrame84(gym.Wrapper): 115 | def __init__(self, env=None): 116 | super(ProcessFrame84, self).__init__(env) 117 | self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84, 1)) 118 | 119 | def _step(self, action): 120 | obs, reward, done, info = self.env.step(action) 121 | return _process_frame84(obs), reward, done, info 122 | 123 | def _reset(self): 124 | return _process_frame84(self.env.reset()) 125 | 126 | class ClippedRewardsWrapper(gym.Wrapper): 127 | def _step(self, action): 128 | obs, reward, done, info = self.env.step(action) 129 | return obs, np.sign(reward), done, info 130 | 131 | def wrap_deepmind_ram(env): 132 | env = EpisodicLifeEnv(env) 133 | env = NoopResetEnv(env, noop_max=30) 134 | env = MaxAndSkipEnv(env, skip=4) 135 | if 'FIRE' in env.unwrapped.get_action_meanings(): 136 | env = FireResetEnv(env) 137 | env = ClippedRewardsWrapper(env) 138 | return env 139 | 140 | def wrap_deepmind(env): 141 | assert 'NoFrameskip' in env.spec.id 142 | env = EpisodicLifeEnv(env) 143 | env = NoopResetEnv(env, noop_max=30) 144 | env = MaxAndSkipEnv(env, skip=4) 145 | if 'FIRE' in env.unwrapped.get_action_meanings(): 146 | env = FireResetEnv(env) 147 | env = ProcessFrame84(env) 148 | env = ClippedRewardsWrapper(env) 149 | return env 150 | -------------------------------------------------------------------------------- /hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.005, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.005, 4 | "env_name" : "CartPole-v0", 5 | "exp_name" : "cp_100_1", 6 | "gamma" : 1.0, 7 | "logdir" : "data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/1", 8 | "max_path_length" : null, 9 | "min_timesteps_per_batch" : 1000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 1, 14 | "num_target_updates" : 100, 15 | "seed" : 1, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.005, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.005, 4 | "env_name" : "CartPole-v0", 5 | "exp_name" : "cp_100_1", 6 | "gamma" : 1.0, 7 | "logdir" : "data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/11", 8 | "max_path_length" : null, 9 | "min_timesteps_per_batch" : 1000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 1, 14 | "num_target_updates" : 100, 15 | "seed" : 11, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.005, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.005, 4 | "env_name" : "CartPole-v0", 5 | "exp_name" : "cp_100_1", 6 | "gamma" : 1.0, 7 | "logdir" : "data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/21", 8 | "max_path_length" : null, 9 | "min_timesteps_per_batch" : 1000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 1, 14 | "num_target_updates" : 100, 15 | "seed" : 21, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.005, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.005, 4 | "env_name" : "CartPole-v0", 5 | "exp_name" : "cp_10_10", 6 | "gamma" : 1.0, 7 | "logdir" : "data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/1", 8 | "max_path_length" : null, 9 | "min_timesteps_per_batch" : 1000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 10, 14 | "num_target_updates" : 10, 15 | "seed" : 1, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.005, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.005, 4 | "env_name" : "CartPole-v0", 5 | "exp_name" : "cp_10_10", 6 | "gamma" : 1.0, 7 | "logdir" : "data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/11", 8 | "max_path_length" : null, 9 | "min_timesteps_per_batch" : 1000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 10, 14 | "num_target_updates" : 10, 15 | "seed" : 11, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.005, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.005, 4 | "env_name" : "CartPole-v0", 5 | "exp_name" : "cp_10_10", 6 | "gamma" : 1.0, 7 | "logdir" : "data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/21", 8 | "max_path_length" : null, 9 | "min_timesteps_per_batch" : 1000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 10, 14 | "num_target_updates" : 10, 15 | "seed" : 21, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.005, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.005, 4 | "env_name" : "CartPole-v0", 5 | "exp_name" : "cp_1_100", 6 | "gamma" : 1.0, 7 | "logdir" : "data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/1", 8 | "max_path_length" : null, 9 | "min_timesteps_per_batch" : 1000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 100, 14 | "num_target_updates" : 1, 15 | "seed" : 1, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.005, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.005, 4 | "env_name" : "CartPole-v0", 5 | "exp_name" : "cp_1_100", 6 | "gamma" : 1.0, 7 | "logdir" : "data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/11", 8 | "max_path_length" : null, 9 | "min_timesteps_per_batch" : 1000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 100, 14 | "num_target_updates" : 1, 15 | "seed" : 11, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.005, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.005, 4 | "env_name" : "CartPole-v0", 5 | "exp_name" : "cp_1_100", 6 | "gamma" : 1.0, 7 | "logdir" : "data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/21", 8 | "max_path_length" : null, 9 | "min_timesteps_per_batch" : 1000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 100, 14 | "num_target_updates" : 1, 15 | "seed" : 21, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.005, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.005, 4 | "env_name" : "CartPole-v0", 5 | "exp_name" : "cp_1_1", 6 | "gamma" : 1.0, 7 | "logdir" : "data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/1", 8 | "max_path_length" : null, 9 | "min_timesteps_per_batch" : 1000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 1, 14 | "num_target_updates" : 1, 15 | "seed" : 1, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.005, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.005, 4 | "env_name" : "CartPole-v0", 5 | "exp_name" : "cp_1_1", 6 | "gamma" : 1.0, 7 | "logdir" : "data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/11", 8 | "max_path_length" : null, 9 | "min_timesteps_per_batch" : 1000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 1, 14 | "num_target_updates" : 1, 15 | "seed" : 11, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.005, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.005, 4 | "env_name" : "CartPole-v0", 5 | "exp_name" : "cp_1_1", 6 | "gamma" : 1.0, 7 | "logdir" : "data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/21", 8 | "max_path_length" : null, 9 | "min_timesteps_per_batch" : 1000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 1, 14 | "num_target_updates" : 1, 15 | "seed" : 21, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.02, 4 | "env_name" : "HalfCheetah-v2", 5 | "exp_name" : "hc_critic", 6 | "gamma" : 0.9, 7 | "logdir" : "data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/1", 8 | "max_path_length" : 150.0, 9 | "min_timesteps_per_batch" : 30000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 10, 14 | "num_target_updates" : 10, 15 | "seed" : 1, 16 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.02, 4 | "env_name" : "HalfCheetah-v2", 5 | "exp_name" : "hc_critic", 6 | "gamma" : 0.9, 7 | "logdir" : "data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/11", 8 | "max_path_length" : 150.0, 9 | "min_timesteps_per_batch" : 30000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 10, 14 | "num_target_updates" : 10, 15 | "seed" : 11, 16 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.02, 4 | "env_name" : "HalfCheetah-v2", 5 | "exp_name" : "hc_critic", 6 | "gamma" : 0.9, 7 | "logdir" : "data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/21", 8 | "max_path_length" : 150.0, 9 | "min_timesteps_per_batch" : 30000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 10, 14 | "num_target_updates" : 10, 15 | "seed" : 21, 16 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.02, 5 | "critic_n_layers" : 3, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_cl3", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/1", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.02, 5 | "critic_n_layers" : 3, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_cl3", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/11", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.02, 5 | "critic_n_layers" : 3, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_cl3", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/21", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.004, 5 | "critic_n_layers" : 2, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.004", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/1", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.004, 5 | "critic_n_layers" : 2, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.004", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/11", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.004, 5 | "critic_n_layers" : 2, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.004", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/21", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.004, 5 | "critic_n_layers" : 3, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.004_cl3", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/1", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.004, 5 | "critic_n_layers" : 3, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.004_cl3", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/11", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.004, 5 | "critic_n_layers" : 3, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.004_cl3", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/21", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.01, 5 | "critic_n_layers" : 2, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.01", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/1", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.01, 5 | "critic_n_layers" : 2, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.01", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/11", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.01, 5 | "critic_n_layers" : 2, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.01", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/21", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.01, 5 | "critic_n_layers" : 3, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.01_cl3", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/1", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.01, 5 | "critic_n_layers" : 3, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.01_cl3", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/11", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.01, 5 | "critic_n_layers" : 3, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.01_cl3", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/21", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.04, 5 | "critic_n_layers" : 2, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.04", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/1", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.04, 5 | "critic_n_layers" : 2, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.04", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/11", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.04, 5 | "critic_n_layers" : 2, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.04", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/21", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.04, 5 | "critic_n_layers" : 3, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.04_cl3", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/1", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.04, 5 | "critic_n_layers" : 3, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.04_cl3", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/11", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.02, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.04, 5 | "critic_n_layers" : 3, 6 | "env_name" : "HalfCheetah-v2", 7 | "exp_name" : "hc_critic_clr0.04_cl3", 8 | "gamma" : 0.9, 9 | "logdir" : "data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/21", 10 | "max_path_length" : 150.0, 11 | "min_timesteps_per_batch" : 30000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.01, 4 | "env_name" : "InvertedPendulum-v2", 5 | "exp_name" : "ip_critic", 6 | "gamma" : 0.95, 7 | "logdir" : "data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/1", 8 | "max_path_length" : 1000.0, 9 | "min_timesteps_per_batch" : 5000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 10, 14 | "num_target_updates" : 10, 15 | "seed" : 1, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.01, 4 | "env_name" : "InvertedPendulum-v2", 5 | "exp_name" : "ip_critic", 6 | "gamma" : 0.95, 7 | "logdir" : "data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/11", 8 | "max_path_length" : 1000.0, 9 | "min_timesteps_per_batch" : 5000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 10, 14 | "num_target_updates" : 10, 15 | "seed" : 11, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "animate" : false, 3 | "critic_learning_rate" : 0.01, 4 | "env_name" : "InvertedPendulum-v2", 5 | "exp_name" : "ip_critic", 6 | "gamma" : 0.95, 7 | "logdir" : "data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/21", 8 | "max_path_length" : 1000.0, 9 | "min_timesteps_per_batch" : 5000, 10 | "n_iter" : 100, 11 | "n_layers" : 2, 12 | "normalize_advantages" : true, 13 | "num_grad_steps_per_target_update" : 10, 14 | "num_target_updates" : 10, 15 | "seed" : 21, 16 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.01, 5 | "critic_n_layers" : 3, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_cl3", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/1", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.01, 5 | "critic_n_layers" : 3, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_cl3", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/11", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.01, 5 | "critic_n_layers" : 3, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_cl3", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/21", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.002, 5 | "critic_n_layers" : 2, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.002", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/1", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.002, 5 | "critic_n_layers" : 2, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.002", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/11", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.002, 5 | "critic_n_layers" : 2, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.002", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/21", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.002, 5 | "critic_n_layers" : 3, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.002_cl3", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/1", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.002, 5 | "critic_n_layers" : 3, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.002_cl3", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/11", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.002, 5 | "critic_n_layers" : 3, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.002_cl3", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/21", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.005, 5 | "critic_n_layers" : 2, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.005", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/1", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.005, 5 | "critic_n_layers" : 2, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.005", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/11", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.005, 5 | "critic_n_layers" : 2, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.005", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/21", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.005, 5 | "critic_n_layers" : 3, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.005_cl3", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/1", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.005, 5 | "critic_n_layers" : 3, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.005_cl3", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/11", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.005, 5 | "critic_n_layers" : 3, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.005_cl3", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/21", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.02, 5 | "critic_n_layers" : 2, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.02", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/1", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.02, 5 | "critic_n_layers" : 2, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.02", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/11", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.02, 5 | "critic_n_layers" : 2, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.02", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/21", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.02, 5 | "critic_n_layers" : 3, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.02_cl3", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/1", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 1, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.02, 5 | "critic_n_layers" : 3, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.02_cl3", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/11", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 11, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"actor_learning_rate" : 0.01, 2 | "actor_n_layers" : 2, 3 | "animate" : false, 4 | "critic_learning_rate" : 0.02, 5 | "critic_n_layers" : 3, 6 | "env_name" : "InvertedPendulum-v2", 7 | "exp_name" : "ip_critic_clr0.02_cl3", 8 | "gamma" : 0.95, 9 | "logdir" : "data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/21", 10 | "max_path_length" : 1000.0, 11 | "min_timesteps_per_batch" : 5000, 12 | "n_iter" : 100, 13 | "normalize_advantages" : true, 14 | "num_grad_steps_per_target_update" : 10, 15 | "num_target_updates" : 10, 16 | "seed" : 21, 17 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/dqn_Asterix_double_dqn_AsterixNoFrameskip-v4_04-10-2018_03-16-57/2333/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"env_name" : "AsterixNoFrameskip-v4", 2 | "exp_name" : "Asterix_double_dqn"} -------------------------------------------------------------------------------- /hw3/data/dqn_Asterix_double_dqn_singlebias_AsterixNoFrameskip-v4_08-10-2018_09-14-56/2333/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"env_name" : "AsterixNoFrameskip-v4", 2 | "exp_name" : "Asterix_double_dqn_singlebias"} -------------------------------------------------------------------------------- /hw3/data/dqn_Asterix_vanilla_dqn_AsterixNoFrameskip-v4_04-10-2018_03-16-01/2333/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"env_name" : "AsterixNoFrameskip-v4", 2 | "exp_name" : "Asterix_vanilla_dqn"} -------------------------------------------------------------------------------- /hw3/data/dqn_Bowling_double_dqn_BowlingNoFrameskip-v4_04-10-2018_15-13-45/2333/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"env_name" : "BowlingNoFrameskip-v4", 2 | "exp_name" : "Bowling_double_dqn"} -------------------------------------------------------------------------------- /hw3/data/dqn_Bowling_double_dqn_singlebias_BowlingNoFrameskip-v4_10-10-2018_16-14-12/2333/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"env_name" : "BowlingNoFrameskip-v4", 2 | "exp_name" : "Bowling_double_dqn_singlebias"} -------------------------------------------------------------------------------- /hw3/data/dqn_Bowling_vanilla_dqn_BowlingNoFrameskip-v4_04-10-2018_15-14-21/2333/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"env_name" : "BowlingNoFrameskip-v4", 2 | "exp_name" : "Bowling_vanilla_dqn"} -------------------------------------------------------------------------------- /hw3/data/dqn_KungFuMaster_double_dqn_KungFuMasterNoFrameskip-v4_04-10-2018_11-14-51/2333/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"env_name" : "KungFuMasterNoFrameskip-v4", 2 | "exp_name" : "KungFuMaster_double_dqn"} -------------------------------------------------------------------------------- /hw3/data/dqn_KungFuMaster_double_dqn_singlebias_KungFuMasterNoFrameskip-v4_10-10-2018_16-14-59/2333/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"env_name" : "KungFuMasterNoFrameskip-v4", 2 | "exp_name" : "KungFuMaster_double_dqn_singlebias"} -------------------------------------------------------------------------------- /hw3/data/dqn_KungFuMaster_vanilla_dqn_KungFuMasterNoFrameskip-v4_04-10-2018_11-15-11/2333/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"env_name" : "KungFuMasterNoFrameskip-v4", 2 | "exp_name" : "KungFuMaster_vanilla_dqn"} -------------------------------------------------------------------------------- /hw3/data/dqn_Pong_double_dqn_PongNoFrameskip-v4_04-10-2018_23-10-47/2333/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"env_name" : "PongNoFrameskip-v4", 2 | "exp_name" : "Pong_double_dqn"} -------------------------------------------------------------------------------- /hw3/data/dqn_Pong_double_dqn_singlebias_PongNoFrameskip-v4_08-10-2018_09-12-30/2333/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"env_name" : "PongNoFrameskip-v4", 2 | "exp_name" : "Pong_double_dqn_singlebias"} -------------------------------------------------------------------------------- /hw3/data/dqn_Pong_vanilla_dqn_PongNoFrameskip-v4_04-10-2018_23-11-56/2333/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"env_name" : "PongNoFrameskip-v4", 2 | "exp_name" : "Pong_vanilla_dqn"} -------------------------------------------------------------------------------- /hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_no_critic", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/1", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_no_critic", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/11", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "HalfCheetah-v2", 3 | "exp_name" : "hc_no_critic", 4 | "gamma" : 0.9, 5 | "learning_rate" : 0.02, 6 | "logdir" : "data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/21", 7 | "max_path_length" : 150.0, 8 | "min_timesteps_per_batch" : 30000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 32} -------------------------------------------------------------------------------- /hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/21/model.pkl -------------------------------------------------------------------------------- /hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/1/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "InvertedPendulum-v2", 3 | "exp_name" : "ip_no_critic", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/1", 7 | "max_path_length" : 1000.0, 8 | "min_timesteps_per_batch" : 5000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 1, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/1/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/1/model.pkl -------------------------------------------------------------------------------- /hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/11/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "InvertedPendulum-v2", 3 | "exp_name" : "ip_no_critic", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/11", 7 | "max_path_length" : 1000.0, 8 | "min_timesteps_per_batch" : 5000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 11, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/11/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/11/model.pkl -------------------------------------------------------------------------------- /hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/21/hyperparams.json: -------------------------------------------------------------------------------- 1 | {"animate" : false, 2 | "env_name" : "InvertedPendulum-v2", 3 | "exp_name" : "ip_no_critic", 4 | "gamma" : 0.95, 5 | "learning_rate" : 0.01, 6 | "logdir" : "data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/21", 7 | "max_path_length" : 1000.0, 8 | "min_timesteps_per_batch" : 5000, 9 | "n_iter" : 100, 10 | "n_layers" : 2, 11 | "nn_baseline" : true, 12 | "normalize_advantages" : true, 13 | "reward_to_go" : true, 14 | "seed" : 21, 15 | "size" : 64} -------------------------------------------------------------------------------- /hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/21/model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/21/model.pkl -------------------------------------------------------------------------------- /hw3/logz.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | """ 4 | 5 | Some simple logging functionality, inspired by rllab's logging. 6 | Assumes that each diagnostic gets logged each iteration 7 | 8 | Call logz.configure_output_dir() to start logging to a 9 | tab-separated-values file (some_folder_name/log.txt) 10 | 11 | To load the learning curves, you can do, for example 12 | 13 | A = np.genfromtxt('/tmp/expt_1468984536/log.txt',delimiter='\t',dtype=None, names=True) 14 | A['EpRewMean'] 15 | 16 | """ 17 | 18 | import os.path as osp, shutil, time, atexit, os, subprocess 19 | import pickle 20 | import torch 21 | 22 | color2num = dict( 23 | gray=30, 24 | red=31, 25 | green=32, 26 | yellow=33, 27 | blue=34, 28 | magenta=35, 29 | cyan=36, 30 | white=37, 31 | crimson=38 32 | ) 33 | 34 | def colorize(string, color, bold=False, highlight=False): 35 | attr = [] 36 | num = color2num[color] 37 | if highlight: num += 10 38 | attr.append(str(num)) 39 | if bold: attr.append('1') 40 | return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string) 41 | 42 | class G: 43 | output_dir = None 44 | output_file = None 45 | first_row = True 46 | log_headers = [] 47 | log_current_row = {} 48 | 49 | def configure_output_dir(d=None): 50 | """ 51 | Set output directory to d, or to /tmp/somerandomnumber if d is None 52 | """ 53 | G.output_dir = d or "/tmp/experiments/%i"%int(time.time()) 54 | assert not osp.exists(G.output_dir), "Log dir %s already exists! Delete it first or use a different dir"%G.output_dir 55 | os.makedirs(G.output_dir) 56 | G.output_file = open(osp.join(G.output_dir, "log.txt"), 'w') 57 | atexit.register(G.output_file.close) 58 | print(colorize("Logging data to %s"%G.output_file.name, 'green', bold=True)) 59 | 60 | def log_tabular(key, val): 61 | """ 62 | Log a value of some diagnostic 63 | Call this once for each diagnostic quantity, each iteration 64 | """ 65 | if G.first_row: 66 | G.log_headers.append(key) 67 | else: 68 | assert key in G.log_headers, "Trying to introduce a new key %s that you didn't include in the first iteration"%key 69 | assert key not in G.log_current_row, "You already set %s this iteration. Maybe you forgot to call dump_tabular()"%key 70 | G.log_current_row[key] = val 71 | 72 | def save_hyperparams(params): 73 | with open(osp.join(G.output_dir, "hyperparams.json"), 'w') as out: 74 | out.write(json.dumps(params, separators=(',\n','\t:\t'), sort_keys=True)) 75 | 76 | def save_pytorch_model(model): 77 | """ 78 | Saves the entire pytorch Module 79 | """ 80 | torch.save(model, osp.join(G.output_dir, "model.pkl")) 81 | 82 | 83 | def dump_tabular(): 84 | """ 85 | Write all of the diagnostics from the current iteration 86 | """ 87 | vals = [] 88 | key_lens = [len(key) for key in G.log_headers] 89 | max_key_len = max(15,max(key_lens)) 90 | keystr = '%'+'%d'%max_key_len 91 | fmt = "| " + keystr + "s | %15s |" 92 | n_slashes = 22 + max_key_len 93 | print("-"*n_slashes) 94 | for key in G.log_headers: 95 | val = G.log_current_row.get(key, "") 96 | if hasattr(val, "__float__"): valstr = "%8.3g"%val 97 | else: valstr = val 98 | print(fmt%(key, valstr)) 99 | vals.append(val) 100 | print("-"*n_slashes) 101 | if G.output_file is not None: 102 | if G.first_row: 103 | G.output_file.write("\t".join(G.log_headers)) 104 | G.output_file.write("\n") 105 | G.output_file.write("\t".join(map(str,vals))) 106 | G.output_file.write("\n") 107 | G.output_file.flush() 108 | G.log_current_row.clear() 109 | G.first_row=False 110 | -------------------------------------------------------------------------------- /hw3/plot.py: -------------------------------------------------------------------------------- 1 | import seaborn as sns 2 | import pandas as pd 3 | import matplotlib.pyplot as plt 4 | import json 5 | import os 6 | 7 | """ 8 | Using the plotter: 9 | 10 | Call it from the command line, and supply it with logdirs to experiments. 11 | Suppose you ran an experiment with name 'test', and you ran 'test' for 10 12 | random seeds. The runner code stored it in the directory structure 13 | 14 | data 15 | L test_EnvName_DateTime 16 | L 0 17 | L log.txt 18 | L params.json 19 | L 1 20 | L log.txt 21 | L params.json 22 | . 23 | . 24 | . 25 | L 9 26 | L log.txt 27 | L params.json 28 | 29 | To plot learning curves from the experiment, averaged over all random 30 | seeds, call 31 | 32 | python plot.py data/test_EnvName_DateTime --value AverageReturn 33 | 34 | and voila. To see a different statistics, change what you put in for 35 | the keyword --value. You can also enter /multiple/ values, and it will 36 | make all of them in order. 37 | 38 | 39 | Suppose you ran two experiments: 'test1' and 'test2'. In 'test2' you tried 40 | a different set of hyperparameters from 'test1', and now you would like 41 | to compare them -- see their learning curves side-by-side. Just call 42 | 43 | python plot.py data/test1 data/test2 44 | 45 | and it will plot them both! They will be given titles in the legend according 46 | to their exp_name parameters. If you want to use custom legend titles, use 47 | the --legend flag and then provide a title for each logdir. 48 | 49 | """ 50 | 51 | def plot_data(data, time="Iteration", value="AverageReturn", combine=False): 52 | if isinstance(data, list): 53 | data = pd.concat(data, ignore_index=True) 54 | plt.figure(figsize=(16, 9)) 55 | sns.set(style="darkgrid", font_scale=1.5) 56 | if not combine: 57 | sns.tsplot(data=data, time=time, value=value, unit="Unit", condition="Condition") 58 | else: 59 | df1 = data.loc[:, [time, value[0], 'Condition']] 60 | df1['Statistics'] = value[0] 61 | df1.rename(columns={value[0]:'Value', 'Condition':'ExpName'}, inplace = True) 62 | df2 = data.loc[:, [time, value[1], 'Condition']] 63 | df2['Statistics'] = value[1] 64 | df2.rename(columns={value[1]:'Value', 'Condition':'ExpName'}, inplace = True) 65 | data = pd.concat([df1, df2], ignore_index=True) 66 | sns.lineplot(x=time, y='Value', hue='ExpName', style='Statistics', data=data) 67 | 68 | plt.legend(loc='best').draggable() 69 | plt.savefig('result.png', bbox_inches='tight') 70 | plt.show() 71 | 72 | 73 | def get_datasets(fpath, condition=None): 74 | unit = 0 75 | datasets = [] 76 | for root, dir, files in os.walk(fpath): 77 | if 'log.txt' in files: 78 | param_path = open(os.path.join(root,'hyperparams.json')) 79 | params = json.load(param_path) 80 | exp_name = params['exp_name'] 81 | 82 | log_path = os.path.join(root,'log.txt') 83 | experiment_data = pd.read_table(log_path) 84 | 85 | experiment_data.insert( 86 | len(experiment_data.columns), 87 | 'Unit', 88 | unit 89 | ) 90 | experiment_data.insert( 91 | len(experiment_data.columns), 92 | 'Condition', 93 | condition or exp_name 94 | ) 95 | 96 | datasets.append(experiment_data) 97 | unit += 1 98 | 99 | return datasets 100 | 101 | 102 | def main(): 103 | import argparse 104 | parser = argparse.ArgumentParser() 105 | parser.add_argument('logdir', nargs='*') 106 | parser.add_argument('--legend', nargs='*') 107 | parser.add_argument('--time', type=str, default='Iteration') 108 | parser.add_argument('--value', default='AverageReturn', nargs='*') 109 | parser.add_argument('--combine', action='store_true') 110 | args = parser.parse_args() 111 | 112 | use_legend = False 113 | if args.legend is not None: 114 | assert len(args.legend) == len(args.logdir), \ 115 | "Must give a legend title for each set of experiments." 116 | use_legend = True 117 | 118 | data = [] 119 | if use_legend: 120 | for logdir, legend_title in zip(args.logdir, args.legend): 121 | data += get_datasets(logdir, legend_title) 122 | else: 123 | for logdir in args.logdir: 124 | data += get_datasets(logdir) 125 | 126 | time = args.time 127 | 128 | if isinstance(args.value, list): 129 | values = args.value 130 | else: 131 | values = [args.value] 132 | 133 | if args.combine and len(values) == 2: 134 | plot_data(data, time=time, value=values, combine=True) 135 | else: 136 | for value in values: 137 | plot_data(data, time=time, value=value, combine=False) 138 | 139 | if __name__ == "__main__": 140 | main() 141 | -------------------------------------------------------------------------------- /hw3/report.md: -------------------------------------------------------------------------------- 1 | # CS294-112 HW 3: Q-Learning 2 | 3 | ## Deep Q-learning 4 | 5 | In this part, we run our vanilla DQN and double DQN in 4 different atari environments, and we also experimented with the single bias architecture mentioned in original Double DQN paper. 6 | 7 | Here are the results: 8 | 9 | ### Pong 10 | 11 | ![result_Pong](./result/result_Pong.png) 12 | 13 | ### Kung-Fu Master 14 | 15 | ![result_KungFuMaster](./result/result_KungFuMaster.png) 16 | 17 | ### Bowling 18 | 19 | ![result_Bowling](./result/result_Bowling.png) 20 | 21 | ### Asterix 22 | 23 | ![result_Asterix](./result/result_Asterix.png) 24 | 25 | Surprisingly, almost in all 4 environment above, the vanilla DQN model are superior to double DQN model. Even for the single bias variant, the double DQN model can only beat vanilla DQN model in **Bowling** game. From my point of view, I think this might be caused by hyperparameters are not well tuned for the double DQN model. Also, we can find that the final performance of the vanilla DQN in **Asterix** game is far better than reported results in the original Double DQN paper. And this might be another reason why we can not observe the performance improvement of the double DQN model. 26 | 27 | 28 | 29 | 30 | 31 | ## Actor-Critic 32 | 33 | ### Cartpole 34 | 35 | We run our model on different setting of critic target update frequency and the number of critic gradient step: 36 | 37 | ![result_cp](./result/result_cp.png) 38 | 39 | From the results above, we can observe that the actor fail to learn a good policy when the number of critic updates is not enough. Even if the critic is performed with enough updates, the target update frequency can significantly affect the performance. 40 | 41 | We can treat the target update frequency as a kind of trade-off between the learning stability and learning speed. When the target update too fast, the learning process will become unstable. It just like the issue occured in Deep Q-learning, which partially fixed by the target network trick. In the other side, if the target update too slow, the learning process wiil be slow down, which is unacceptable to us. And also have the risk of overfitting the current sampled mini-batch. All in all, to get a good result, we have to tune this hyperparameter carefully. 42 | 43 | 44 | 45 | ### HalfCheetah 46 | 47 | At first, we run our Actor-Critic model with the default hyperparameters, and compare with the Policy Gradient model (with the reward-to-go gradient estimator and neural network baseline) which we implemented in HW2. 48 | 49 | Here is the result: 50 | 51 | ![result_hc_1](./result/result_hc_1.png) 52 | 53 | Generally, we can find that the critic network can help, but don't have significant improvement in the final performance. As the instructions says, the critic network might need a different learning rate and more hidden layers or units. So we tried different learning rate for critic network: 54 | 55 | ![result_hc_2](./result/result_hc_2.png) 56 | 57 | It did help! After that, we also tried adding a hidden layer to the critic network (with the same hidden units), and here is the results: 58 | 59 | ![result_hc_3](./result/result_hc_3.png) 60 | 61 | In the best setting, the performance of the actor do have a notrival improvement. To make the comparison more clearly, we pick the best setting in 2 hidden layer critic and 3 hidden layer critic respectively, and plot them with the baseline model: 62 | 63 | ![result_hc_4](./result/result_hc_4.png) 64 | 65 | We can clearly see the improvement. 66 | 67 | Finally, these are commands for the experiments : 68 | 69 | ```shell 70 | python train_pg_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 - 71 | lr 0.02 -rtg --nn_baseline --exp_name hc_no_critic 72 | 73 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 --exp_name hc_critic -ntu 10 -ngsptu 10 74 | 75 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -clr 0.04 --exp_name hc_critic_clr0.04 -ntu 10 -ngsptu 10 76 | 77 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -clr 0.01 --exp_name hc_critic_clr0.01 -ntu 10 -ngsptu 10 78 | 79 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -clr 0.004 --exp_name hc_critic_clr0.004 -ntu 10 -ngsptu 10 80 | 81 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -cl 3 --exp_name hc_critic_cl3 -ntu 10 -ngsptu 10 82 | 83 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -clr 0.04 -cl 3 --exp_name hc_critic_clr0.04_cl3 -ntu 10 -ngsptu 10 84 | 85 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -clr 0.01 -cl 3 --exp_name hc_critic_clr0.01_cl3 -ntu 10 -ngsptu 10 86 | 87 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -clr 0.004 -cl 3 --exp_name hc_critic_clr0.004_cl3 -ntu 10 -ngsptu 10 88 | ``` 89 | 90 | 91 | 92 | ### InvertedPendulum 93 | 94 | In this problem, we generally did the same thing as above. 95 | 96 | Firstly, the baseline models: 97 | 98 | ![result_ip_1](./result/result_ip_1.png) 99 | 100 | Secondly, the different learning rate setting for the critic network: 101 | 102 | ![result_ip_2](./result/result_ip_2.png) 103 | 104 | Also, with one more hidden layer: 105 | 106 | ![result_ip_3](./result/result_ip_3.png) 107 | 108 | Finally, the comparison between baseline and best model: 109 | 110 | ![result_ip_4](./result/result_ip_4.png) 111 | 112 | With the tuning of the learning rate of critic, the learning curve can be more stable and get a little bit performance improvement. However, the extra layer don't help so much as it in former problem. It might because this problem is relatively easy to solve (the beseline model can already reach the max score), or we just have yet to find the appropriate learning rate for the critic. 113 | 114 | Here are commands for the experiments : 115 | 116 | ```shell 117 | python train_pg_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -rtg --nn_baseline --exp_name ip_no_critic 118 | 119 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 --exp_name ip_critic -ntu 10 -ngsptu 10 120 | 121 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -clr 0.02 --exp_name ip_critic_clr0.02 -ntu 10 -ngsptu 10 122 | 123 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -clr 0.005 --exp_name ip_critic_clr0.005 -ntu 10 -ngsptu 10 124 | 125 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -clr 0.002 --exp_name ip_critic_clr0.002 -ntu 10 -ngsptu 10 126 | 127 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -cl 3 --exp_name ip_critic_cl3 -ntu 10 -ngsptu 10 128 | 129 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -clr 0.02 -cl 3 --exp_name ip_critic_clr0.02_cl3 -ntu 10 -ngsptu 10 130 | 131 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -clr 0.005 -cl 3 --exp_name ip_critic_clr0.005_cl3 -ntu 10 -ngsptu 10 132 | 133 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -clr 0.002 -cl 3 --exp_name ip_critic_clr0.002_cl3 -ntu 10 -ngsptu 10 134 | ``` 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /hw3/requirements.txt: -------------------------------------------------------------------------------- 1 | gym==0.10.5 2 | gym[atari] 3 | box2d 4 | mujoco-py==1.50.1.56 5 | torch==0.4.0 6 | numpy 7 | seaborn 8 | opencv-python 9 | -------------------------------------------------------------------------------- /hw3/result/result_Asterix.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_Asterix.png -------------------------------------------------------------------------------- /hw3/result/result_Bowling.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_Bowling.png -------------------------------------------------------------------------------- /hw3/result/result_KungFuMaster.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_KungFuMaster.png -------------------------------------------------------------------------------- /hw3/result/result_Pong.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_Pong.png -------------------------------------------------------------------------------- /hw3/result/result_cp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_cp.png -------------------------------------------------------------------------------- /hw3/result/result_hc_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_hc_1.png -------------------------------------------------------------------------------- /hw3/result/result_hc_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_hc_2.png -------------------------------------------------------------------------------- /hw3/result/result_hc_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_hc_3.png -------------------------------------------------------------------------------- /hw3/result/result_hc_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_hc_4.png -------------------------------------------------------------------------------- /hw3/result/result_hc_4_old.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_hc_4_old.png -------------------------------------------------------------------------------- /hw3/result/result_ip_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_ip_1.png -------------------------------------------------------------------------------- /hw3/result/result_ip_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_ip_2.png -------------------------------------------------------------------------------- /hw3/result/result_ip_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_ip_3.png -------------------------------------------------------------------------------- /hw3/result/result_ip_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_ip_4.png -------------------------------------------------------------------------------- /hw3/run_dqn_atari.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gym 3 | from gym import wrappers 4 | import time 5 | import logz 6 | import os.path as osp 7 | import random 8 | import numpy as np 9 | import torch 10 | from torch import nn 11 | 12 | import dqn 13 | from dqn_utils import PiecewiseSchedule, get_wrapper_by_name 14 | from atari_wrappers import wrap_deepmind 15 | 16 | def weights_init(m): 17 | if hasattr(m, 'weight'): 18 | nn.init.xavier_normal_(m.weight) 19 | if hasattr(m, 'bias') and m.bias is not None: 20 | nn.init.constant_(m.bias, 0) 21 | 22 | class DQN(nn.Module): # for atari 23 | def __init__(self, in_channels, num_actions): 24 | # as described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf 25 | super(DQN, self).__init__() 26 | self.convnet = nn.Sequential( 27 | nn.Conv2d(in_channels, out_channels=32, kernel_size=8, stride=4), 28 | nn.ReLU(True), 29 | nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2), 30 | nn.ReLU(True), 31 | nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1), 32 | nn.ReLU(True), 33 | ) 34 | self.classifier = nn.Sequential( 35 | nn.Linear(in_features=7 * 7 * 64, out_features=512), 36 | nn.ReLU(True), 37 | nn.Linear(in_features=512, out_features=num_actions), 38 | ) 39 | 40 | self.apply(weights_init) 41 | 42 | 43 | def forward(self, obs): 44 | out = obs.float() / 255 # convert 8-bits RGB color to float in [0, 1] 45 | out = out.permute(0, 3, 1, 2) # reshape to [batch_size, img_c * frames, img_h, img_w] 46 | out = self.convnet(out) 47 | out = out.view(out.size(0), -1) # flatten feature maps to a big vector 48 | out = self.classifier(out) 49 | return out 50 | 51 | def atari_learn(env, 52 | num_timesteps): 53 | # This is just a rough estimate 54 | num_iterations = float(num_timesteps) / 4.0 55 | 56 | lr_multiplier = 1.0 57 | lr_schedule = PiecewiseSchedule( 58 | [ 59 | (0, 1e-4 * lr_multiplier), 60 | (num_iterations / 10, 1e-4 * lr_multiplier), 61 | (num_iterations / 2, 5e-5 * lr_multiplier), 62 | ], 63 | outside_value=5e-5 * lr_multiplier 64 | ) 65 | lr_lambda = lambda t: lr_schedule.value(t) 66 | 67 | optimizer = dqn.OptimizerSpec( 68 | constructor=torch.optim.Adam, 69 | kwargs=dict(eps=1e-4), 70 | lr_lambda=lr_lambda 71 | ) 72 | 73 | def stopping_criterion(env, t): 74 | # notice that here t is the number of steps of the wrapped env, 75 | # which is different from the number of steps in the underlying env 76 | return get_wrapper_by_name(env, "Monitor").get_total_steps() >= num_timesteps 77 | 78 | exploration_schedule = PiecewiseSchedule( 79 | [ 80 | (0, 1.0), 81 | (1e6, 0.1), 82 | (num_iterations / 2, 0.01), 83 | ], 84 | outside_value=0.01 85 | ) 86 | 87 | dqn.learn( 88 | env=env, 89 | q_func=DQN, 90 | optimizer_spec=optimizer, 91 | exploration=exploration_schedule, 92 | stopping_criterion=stopping_criterion, 93 | replay_buffer_size=1000000, 94 | batch_size=32, 95 | gamma=0.99, 96 | learning_starts=50000, 97 | learning_freq=4, 98 | frame_history_len=4, 99 | target_update_freq=10000, 100 | grad_norm_clipping=10, 101 | double_q=True 102 | ) 103 | env.close() 104 | 105 | def set_global_seeds(i): 106 | torch.manual_seed(i) 107 | if torch.cuda.is_available: 108 | torch.cuda.manual_seed(i) 109 | np.random.seed(i) 110 | random.seed(i) 111 | 112 | def get_env(env_name, exp_name, seed): 113 | env = gym.make(env_name) 114 | 115 | set_global_seeds(seed) 116 | env.seed(seed) 117 | 118 | # Set Up Logger 119 | logdir = 'dqn_' + exp_name + '_' + env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S") 120 | logdir = osp.join('data', logdir) 121 | logdir = osp.join(logdir, '%d'%seed) 122 | logz.configure_output_dir(logdir) 123 | hyperparams = {'exp_name': exp_name, 'env_name': env_name} 124 | logz.save_hyperparams(hyperparams) 125 | 126 | expt_dir = '/tmp/hw3_vid_dir2/' 127 | env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True) 128 | env = wrap_deepmind(env) 129 | 130 | return env 131 | 132 | def main(): 133 | # Choose Atari games. 134 | env_name = 'PongNoFrameskip-v4' 135 | exp_name = 'Pong_double_dqn' # you can use it to mark different experiments 136 | 137 | # Run training 138 | seed = 2333 #random.randint(0, 9999) 139 | print('random seed = %d' % seed) 140 | env = get_env(env_name, exp_name, seed) 141 | atari_learn(env, num_timesteps=2e8) 142 | 143 | if __name__ == "__main__": 144 | main() 145 | -------------------------------------------------------------------------------- /hw3/run_dqn_lander.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gym 3 | from gym import wrappers 4 | import time 5 | import logz 6 | import os.path as osp 7 | import random 8 | import numpy as np 9 | import torch 10 | from torch import nn 11 | 12 | import dqn 13 | from dqn_utils import ConstantSchedule, PiecewiseSchedule, get_wrapper_by_name 14 | 15 | 16 | def weights_init(m): 17 | if hasattr(m, 'weight'): 18 | nn.init.orthogonal_(m.weight) 19 | if hasattr(m, 'bias'): 20 | nn.init.constant_(m.bias, 0) 21 | 22 | class DQN(nn.Module): # for lunar lander 23 | def __init__(self, in_features, num_actions): 24 | super(DQN, self).__init__() 25 | self.classifier = nn.Sequential( 26 | nn.Linear(in_features, out_features=64), 27 | nn.ReLU(True), 28 | nn.Linear(in_features=64, out_features=64), 29 | nn.ReLU(True), 30 | nn.Linear(in_features=64, out_features=num_actions), 31 | ) 32 | 33 | self.apply(weights_init) 34 | 35 | def forward(self, obs): 36 | out = self.classifier(obs) 37 | return out 38 | 39 | def lander_optimizer(): 40 | lr_schedule = ConstantSchedule(1e-3) 41 | lr_lambda = lambda t: lr_schedule.value(t) 42 | return dqn.OptimizerSpec( 43 | constructor=torch.optim.Adam, 44 | lr_lambda=lr_lambda, 45 | kwargs=dict(amsgrad=True) 46 | ) 47 | 48 | def lander_stopping_criterion(num_timesteps): 49 | def stopping_criterion(env, t): 50 | # notice that here t is the number of steps of the wrapped env, 51 | # which is different from the number of steps in the underlying env 52 | return get_wrapper_by_name(env, "Monitor").get_total_steps() >= num_timesteps 53 | return stopping_criterion 54 | 55 | def lander_exploration_schedule(num_timesteps): 56 | return PiecewiseSchedule( 57 | [ 58 | (0, 1), 59 | (num_timesteps * 0.1, 0.02), 60 | ], outside_value=0.02 61 | ) 62 | 63 | def lander_kwargs(): 64 | return { 65 | 'optimizer_spec': lander_optimizer(), 66 | 'q_func': DQN, 67 | 'replay_buffer_size': 50000, 68 | 'batch_size': 32, 69 | 'gamma': 1.00, 70 | 'learning_starts': 1000, 71 | 'learning_freq': 1, 72 | 'frame_history_len': 1, 73 | 'target_update_freq': 3000, 74 | 'grad_norm_clipping': 10, 75 | 'lander': True 76 | } 77 | 78 | def lander_learn(env, 79 | num_timesteps): 80 | 81 | optimizer = lander_optimizer() 82 | stopping_criterion = lander_stopping_criterion(num_timesteps) 83 | exploration_schedule = lander_exploration_schedule(num_timesteps) 84 | 85 | dqn.learn( 86 | env=env, 87 | exploration=lander_exploration_schedule(num_timesteps), 88 | stopping_criterion=lander_stopping_criterion(num_timesteps), 89 | double_q=True, 90 | **lander_kwargs() 91 | ) 92 | env.close() 93 | 94 | def set_global_seeds(i): 95 | torch.manual_seed(i) 96 | if torch.cuda.is_available: 97 | torch.cuda.manual_seed(i) 98 | np.random.seed(i) 99 | random.seed(i) 100 | 101 | def get_env(env_name, exp_name, seed): 102 | env = gym.make(env_name) 103 | 104 | set_global_seeds(seed) 105 | env.seed(seed) 106 | 107 | # Set Up Logger 108 | logdir = 'dqn_' + exp_name + '_' + env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S") 109 | logdir = osp.join('data', logdir) 110 | logdir = osp.join(logdir, '%d'%seed) 111 | logz.configure_output_dir(logdir) 112 | hyperparams = {'exp_name': exp_name, 'env_name': env_name} 113 | logz.save_hyperparams(hyperparams) 114 | 115 | expt_dir = '/tmp/hw3_vid_dir/' 116 | env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True, video_callable=False) 117 | 118 | 119 | return env 120 | 121 | def main(): 122 | # Choose Atari games. 123 | env_name = 'LunarLander-v2' 124 | exp_name = 'orthogonal_init' # you can use it to mark different experiments 125 | 126 | # Run training 127 | seed = 4565 # you may want to randomize this 128 | print('random seed = %d' % seed) 129 | env = get_env(env_name, exp_name, seed) 130 | lander_learn(env, num_timesteps=500000) 131 | 132 | if __name__ == "__main__": 133 | main() 134 | -------------------------------------------------------------------------------- /hw3/run_dqn_ram.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import gym 3 | from gym import wrappers 4 | import time 5 | import logz 6 | import os.path as osp 7 | import random 8 | import numpy as np 9 | import torch 10 | from torch import nn 11 | 12 | import dqn 13 | from dqn_utils import PiecewiseSchedule, get_wrapper_by_name 14 | from atari_wrappers import wrap_deepmind_ram 15 | 16 | def weights_init(m): 17 | if hasattr(m, 'weight'): 18 | nn.init.xavier_uniform_(m.weight) 19 | #if hasattr(m, 'bias'): 20 | # nn.init.constant_(m.bias, 0) 21 | 22 | class DQN(nn.Module): # for atari ram 23 | def __init__(self, in_features, num_actions): 24 | super(DQN, self).__init__() 25 | self.classifier = nn.Sequential( 26 | nn.Linear(in_features, out_features=256), 27 | nn.ReLU(True), 28 | nn.Linear(in_features=256, out_features=128), 29 | nn.ReLU(True), 30 | nn.Linear(in_features=128, out_features=64), 31 | nn.ReLU(True), 32 | nn.Linear(in_features=64, out_features=num_actions), 33 | ) 34 | 35 | self.apply(weights_init) 36 | 37 | def forward(self, obs): 38 | out = obs.float() / 255 # convert 8-bits ram state to float in [0, 1] 39 | out = self.classifier(out) 40 | return out 41 | 42 | def atari_learn(env, 43 | num_timesteps): 44 | # This is just a rough estimate 45 | num_iterations = float(num_timesteps) / 4.0 46 | 47 | lr_multiplier = 1.0 48 | lr_schedule = PiecewiseSchedule( 49 | [ 50 | (0, 1e-4 * lr_multiplier), 51 | (num_iterations / 10, 1e-4 * lr_multiplier), 52 | (num_iterations / 2, 5e-5 * lr_multiplier), 53 | ], 54 | outside_value=5e-5 * lr_multiplier 55 | ) 56 | lr_lambda = lambda t: lr_schedule.value(t) 57 | 58 | optimizer = dqn.OptimizerSpec( 59 | constructor=torch.optim.Adam, 60 | kwargs=dict(eps=1e-4, amsgrad=True), 61 | lr_lambda=lr_lambda 62 | ) 63 | 64 | def stopping_criterion(env, t): 65 | # notice that here t is the number of steps of the wrapped env, 66 | # which is different from the number of steps in the underlying env 67 | return get_wrapper_by_name(env, "Monitor").get_total_steps() >= num_timesteps 68 | 69 | exploration_schedule = PiecewiseSchedule( 70 | [ 71 | (0, 0.2), 72 | (1e6, 0.1), 73 | (num_iterations / 2, 0.01), 74 | ], outside_value=0.01 75 | ) 76 | 77 | dqn.learn( 78 | env, 79 | q_func=DQN, 80 | optimizer_spec=optimizer, 81 | exploration=exploration_schedule, 82 | stopping_criterion=stopping_criterion, 83 | replay_buffer_size=1000000, 84 | batch_size=32, 85 | gamma=0.99, 86 | learning_starts=50000, 87 | learning_freq=4, 88 | frame_history_len=1, 89 | target_update_freq=10000, 90 | grad_norm_clipping=10 91 | ) 92 | env.close() 93 | 94 | def set_global_seeds(i): 95 | torch.manual_seed(i) 96 | if torch.cuda.is_available: 97 | torch.cuda.manual_seed(i) 98 | np.random.seed(i) 99 | random.seed(i) 100 | 101 | def get_env(env_name, exp_name, seed): 102 | env = gym.make(env_name) 103 | 104 | set_global_seeds(seed) 105 | env.seed(seed) 106 | 107 | # Set Up Logger 108 | logdir = 'dqn_' + exp_name + '_' + env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S") 109 | logdir = osp.join('data', logdir) 110 | logdir = osp.join(logdir, '%d'%seed) 111 | logz.configure_output_dir(logdir) 112 | hyperparams = {'exp_name': exp_name, 'env_name': env_name} 113 | logz.save_hyperparams(hyperparams) 114 | 115 | expt_dir = '/tmp/hw3_vid_dir/' 116 | env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True) 117 | env = wrap_deepmind_ram(env) 118 | 119 | return env 120 | 121 | def main(): 122 | # Choose Atari games. 123 | env_name = 'Pong-ram-v0' 124 | exp_name = 'default' # you can use it to mark different experiments 125 | 126 | # Run training 127 | seed = 0 # Use a seed of zero (you may want to randomize the seed!) 128 | print('random seed = %d' % seed) 129 | env = get_env(env_name, exp_name, seed) 130 | atari_learn(env, num_timesteps=int(4e7)) 131 | 132 | if __name__ == "__main__": 133 | main() 134 | --------------------------------------------------------------------------------