├── LICENSE
├── README.md
├── hw1
    ├── BehavioralCloning.png
    ├── DAgger.png
    ├── README.md
    ├── demo.bash
    ├── experts
    │   ├── Ant-v2.pkl
    │   ├── HalfCheetah-v2.pkl
    │   ├── Hopper-v2.pkl
    │   ├── Humanoid-v2.pkl
    │   ├── Reacher-v2.pkl
    │   └── Walker2d-v2.pkl
    ├── load_policy.py
    ├── main.py
    ├── model.py
    ├── plot.py
    ├── report.md
    ├── requirements.txt
    ├── run_expert.py
    └── train.py
├── hw2
    ├── README.md
    ├── data
    │   ├── hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── lb_rtg_na_CartPole-v0_20-09-2018_09-26-24
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   ├── sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22
    │   │   ├── 1
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   ├── 11
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   │   └── 21
    │   │   │   ├── hyperparams.json
    │   │   │   ├── log.txt
    │   │   │   └── model.pkl
    │   └── sb_rtg_na_CartPole-v0_20-09-2018_09-08-07
    │   │   ├── 1
    │   │       ├── hyperparams.json
    │   │       ├── log.txt
    │   │       └── model.pkl
    │   │   ├── 11
    │   │       ├── hyperparams.json
    │   │       ├── log.txt
    │   │       └── model.pkl
    │   │   └── 21
    │   │       ├── hyperparams.json
    │   │       ├── log.txt
    │   │       └── model.pkl
    ├── hw2_instructions.pdf
    ├── logz.py
    ├── lunar_lander.py
    ├── plot.py
    ├── report.md
    ├── requirements.txt
    ├── result
    │   ├── result_CartPole_lb.png
    │   ├── result_CartPole_sb.png
    │   ├── result_HalfCheetah_1.png
    │   ├── result_HalfCheetah_2.png
    │   ├── result_InvertedPendulum.png
    │   └── result_LunarLander.png
    └── train_pg_f18.py
└── hw3
    ├── README.md
    ├── atari_wrappers.py
    ├── data
        ├── ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        ├── dqn_Asterix_double_dqn_AsterixNoFrameskip-v4_04-10-2018_03-16-57
        │   └── 2333
        │   │   ├── hyperparams.json
        │   │   └── log.txt
        ├── dqn_Asterix_double_dqn_singlebias_AsterixNoFrameskip-v4_08-10-2018_09-14-56
        │   └── 2333
        │   │   ├── hyperparams.json
        │   │   └── log.txt
        ├── dqn_Asterix_vanilla_dqn_AsterixNoFrameskip-v4_04-10-2018_03-16-01
        │   └── 2333
        │   │   ├── hyperparams.json
        │   │   └── log.txt
        ├── dqn_Bowling_double_dqn_BowlingNoFrameskip-v4_04-10-2018_15-13-45
        │   └── 2333
        │   │   ├── hyperparams.json
        │   │   └── log.txt
        ├── dqn_Bowling_double_dqn_singlebias_BowlingNoFrameskip-v4_10-10-2018_16-14-12
        │   └── 2333
        │   │   ├── hyperparams.json
        │   │   └── log.txt
        ├── dqn_Bowling_vanilla_dqn_BowlingNoFrameskip-v4_04-10-2018_15-14-21
        │   └── 2333
        │   │   ├── hyperparams.json
        │   │   └── log.txt
        ├── dqn_KungFuMaster_double_dqn_KungFuMasterNoFrameskip-v4_04-10-2018_11-14-51
        │   └── 2333
        │   │   ├── hyperparams.json
        │   │   └── log.txt
        ├── dqn_KungFuMaster_double_dqn_singlebias_KungFuMasterNoFrameskip-v4_10-10-2018_16-14-59
        │   └── 2333
        │   │   ├── hyperparams.json
        │   │   └── log.txt
        ├── dqn_KungFuMaster_vanilla_dqn_KungFuMasterNoFrameskip-v4_04-10-2018_11-15-11
        │   └── 2333
        │   │   ├── hyperparams.json
        │   │   └── log.txt
        ├── dqn_Pong_double_dqn_PongNoFrameskip-v4_04-10-2018_23-10-47
        │   └── 2333
        │   │   ├── hyperparams.json
        │   │   └── log.txt
        ├── dqn_Pong_double_dqn_singlebias_PongNoFrameskip-v4_08-10-2018_09-12-30
        │   └── 2333
        │   │   ├── hyperparams.json
        │   │   └── log.txt
        ├── dqn_Pong_vanilla_dqn_PongNoFrameskip-v4_04-10-2018_23-11-56
        │   └── 2333
        │   │   ├── hyperparams.json
        │   │   └── log.txt
        ├── hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04
        │   ├── 1
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   ├── 11
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        │   └── 21
        │   │   ├── hyperparams.json
        │   │   ├── log.txt
        │   │   └── model.pkl
        └── ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46
        │   ├── 1
        │       ├── hyperparams.json
        │       ├── log.txt
        │       └── model.pkl
        │   ├── 11
        │       ├── hyperparams.json
        │       ├── log.txt
        │       └── model.pkl
        │   └── 21
        │       ├── hyperparams.json
        │       ├── log.txt
        │       └── model.pkl
    ├── dqn.py
    ├── dqn_utils.py
    ├── logz.py
    ├── lunar_lander.py
    ├── plot.py
    ├── report.md
    ├── requirements.txt
    ├── result
        ├── result_Asterix.png
        ├── result_Bowling.png
        ├── result_KungFuMaster.png
        ├── result_Pong.png
        ├── result_cp.png
        ├── result_hc_1.png
        ├── result_hc_2.png
        ├── result_hc_3.png
        ├── result_hc_4.png
        ├── result_hc_4_old.png
        ├── result_ip_1.png
        ├── result_ip_2.png
        ├── result_ip_3.png
        └── result_ip_4.png
    ├── run_dqn_atari.py
    ├── run_dqn_lander.py
    ├── run_dqn_ram.py
    └── train_ac_f18.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 KuNya
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Berkeley DeepRLcourse Homework Solutions in Pytorch
2 | 
3 | Solutions for [CS294-112 Fall2018 assignments in Pytorch](https://github.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch) 
4 | 


--------------------------------------------------------------------------------
/hw1/BehavioralCloning.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/BehavioralCloning.png


--------------------------------------------------------------------------------
/hw1/DAgger.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/DAgger.png


--------------------------------------------------------------------------------
/hw1/README.md:
--------------------------------------------------------------------------------
 1 | #### This folder contains the code and report for HW1.
 2 | 
 3 | 
 4 | 
 5 | ## Code 
 6 | 
 7 | The function for different files:
 8 | 
 9 | - ###### main.py    
10 | 
11 |   Hyperparameters are stored here, you can modify them for different experiment.
12 | 
13 | - ###### train.py
14 | 
15 |   Behavioral Cloning and  DAgger algorithm are impliemented here.
16 | 
17 | - ###### model.py
18 | 
19 |   The code used for defining neural network.
20 | 
21 | - ###### plot.py
22 | 
23 |   The code used for generating figure in report.
24 | 
25 |   ​
26 | 
27 | 
28 | 
29 | If you want to run the experiment, use the command below
30 | 
31 | ```shell
32 | python main.py
33 | ```
34 | 
35 | 
36 | 
37 | ## Report
38 | 
39 | Please see [report.md](./report.md)


--------------------------------------------------------------------------------
/hw1/demo.bash:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -eux
3 | for e in Hopper-v2 Ant-v2 HalfCheetah-v2 Humanoid-v2 Reacher-v2 Walker2d-v2
4 | do
5 |     python run_expert.py experts/$e.pkl $e --render --num_rollouts=1
6 | done
7 | 


--------------------------------------------------------------------------------
/hw1/experts/Ant-v2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/experts/Ant-v2.pkl


--------------------------------------------------------------------------------
/hw1/experts/HalfCheetah-v2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/experts/HalfCheetah-v2.pkl


--------------------------------------------------------------------------------
/hw1/experts/Hopper-v2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/experts/Hopper-v2.pkl


--------------------------------------------------------------------------------
/hw1/experts/Humanoid-v2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/experts/Humanoid-v2.pkl


--------------------------------------------------------------------------------
/hw1/experts/Reacher-v2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/experts/Reacher-v2.pkl


--------------------------------------------------------------------------------
/hw1/experts/Walker2d-v2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw1/experts/Walker2d-v2.pkl


--------------------------------------------------------------------------------
/hw1/load_policy.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import numpy as np
 3 | from functools import reduce
 4 | 
 5 | 
 6 | def load_policy(filename):
 7 |     def read_layer(l):
 8 |         assert list(l.keys()) == ['AffineLayer']
 9 |         assert sorted(l['AffineLayer'].keys()) == ['W', 'b']
10 |         W, b = l['AffineLayer']['W'].astype(np.float32), l['AffineLayer']['b'].astype(np.float32)
11 |         return lambda x: np.matmul(x, W) + b
12 |         
13 |     def build_nonlin_fn(nonlin_type):
14 |         if nonlin_type == 'lrelu':
15 |             leak = 0.01 # openai/imitation nn.py:233
16 |             return lambda x: 0.5 * (1 + leak) * x + 0.5 * (1 - leak) * np.abs(x)
17 |         elif nonlin_type == 'tanh':
18 |             return lambda x: np.tanh(x)
19 |         else:
20 |             raise NotImplementedError(nonlin_type)
21 |     
22 |     with open(filename, 'rb') as f:
23 |         data = pickle.loads(f.read())
24 | 
25 |     # assert len(data.keys()) == 2
26 |     nonlin_type = data['nonlin_type']
27 |     nonlin_fn = build_nonlin_fn(nonlin_type)
28 |     policy_type = [k for k in data.keys() if k != 'nonlin_type'][0]
29 | 
30 |     assert policy_type == 'GaussianPolicy', 'Policy type {} not supported'.format(policy_type)
31 |     policy_params = data[policy_type]
32 | 
33 |     assert set(policy_params.keys()) == {'logstdevs_1_Da', 'hidden', 'obsnorm', 'out'}
34 |     
35 |     # Build observation normalization layer
36 |     assert list(policy_params['obsnorm'].keys()) == ['Standardizer']
37 |     obsnorm_mean = policy_params['obsnorm']['Standardizer']['mean_1_D']
38 |     obsnorm_meansq = policy_params['obsnorm']['Standardizer']['meansq_1_D']
39 |     obsnorm_stdev = np.sqrt(np.maximum(0, obsnorm_meansq - np.square(obsnorm_mean)))
40 |     #print('obs', obsnorm_mean.shape, obsnorm_stdev.shape)
41 | 
42 |     
43 |     # Build hidden layers
44 |     assert list(policy_params['hidden'].keys()) == ['FeedforwardNet']
45 |     layer_params = policy_params['hidden']['FeedforwardNet']
46 |     layers = []
47 |     for layer_name in sorted(layer_params.keys()):
48 |         l = layer_params[layer_name]
49 |         fc_layer = read_layer(l)
50 |         layers += [fc_layer, nonlin_fn]
51 | 
52 |     # Build output layer
53 |     fc_layer = read_layer(policy_params['out'])
54 |     layers += [fc_layer]
55 |     layers_forward = lambda inp: reduce(lambda x, fn: fn(x), [inp] + layers)
56 |     
57 |     
58 |     def forward_pass(obs):
59 |         ''' Build the forward pass for policy net.
60 | 
61 |         Input: batched observation. (shape: [batch_size, obs_dim])
62 | 
63 |         Output: batched action. (shape: [batch_size, action_dim])
64 |         '''
65 |         obs = obs.astype(np.float32)
66 |         normed_obs = (obs - obsnorm_mean) / (obsnorm_stdev + 1e-6) # 1e-6 constant from Standardizer class in nn.py:409 in openai/imitation
67 |         output = layers_forward(normed_obs.astype(np.float32))
68 | 
69 |         return output
70 | 
71 |     return forward_pass
72 | 


--------------------------------------------------------------------------------
/hw1/main.py:
--------------------------------------------------------------------------------
 1 | import gym
 2 | import torch
 3 | from load_policy import load_policy
 4 | from model import Agent
 5 | from train import BehavioralCloning, DAgger, Eval
 6 | 
 7 | class Config():
 8 |     seed = 3
 9 |     envname = 'Humanoid-v2'
10 |     env = gym.make(envname)
11 |     method = 'DA' # BC: Behavioral Cloning   DA: DAgger
12 |     device = torch.device('cuda')
13 |     expert_path = './experts/'
14 |     model_save_path = './models/'
15 |     n_expert_rollouts = 30 # number of rollouts from expert
16 |     n_dagger_rollouts = 10 # number of new rollouts from learned model for a DAgger iteration
17 |     n_dagger_iter = 10 # number of DAgger iterations
18 |     n_eval_rollouts = 10 # number of rollouts for evaluating a policy
19 |     L2 = 0.00001
20 |     lr = 0.0001
21 |     epochs = 20
22 |     batch_size = 64
23 | 
24 |     eval_steps = 500
25 |     
26 | 
27 | 
28 | def main():
29 |     config = Config()
30 |     print('*' * 20, config.envname, config.method, '*' * 20)
31 |     env = config.env
32 |     if config.seed:
33 |         env.seed(config.seed)
34 |         torch.manual_seed(config.seed)
35 |     agent = Agent(env.observation_space.shape[0], env.action_space.shape[0]).to(config.device)
36 |     expert = load_policy(config.expert_path + config.envname + '.pkl')
37 |     method = config.method
38 | 
39 |     if method == 'BC':
40 |         agent = BehavioralCloning(config, agent, expert)
41 |     elif method == 'DA':
42 |         agent = DAgger(config, agent, expert)
43 |     else:
44 |         NotImplementedError(method)
45 | 
46 |     
47 |     avrg_mean, avrg_std = Eval(config, expert)
48 |     print('[expert] avrg_mean:{:.2f}  avrg_std:{:.2f}'.format(avrg_mean, avrg_std))
49 |         
50 |     avrg_mean, avrg_std = Eval(config, agent)
51 |     print('[agent] avrg_mean:{:.2f}  avrg_std:{:.2f}'.format(avrg_mean, avrg_std))
52 | 
53 | if __name__ == '__main__':
54 |     main()
55 |     
56 | 


--------------------------------------------------------------------------------
/hw1/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | class Agent(nn.Module):
 5 |     def __init__(self, in_dim, out_dim):
 6 |         super(Agent, self).__init__()
 7 | 
 8 |         self.mlp = nn.Sequential(
 9 |             nn.Linear(in_dim, 64),
10 |             nn.ReLU(True),
11 |             nn.Linear(64, 64),
12 |             nn.ReLU(True),
13 |             nn.Linear(64, out_dim),
14 |         )
15 | 
16 |     def forward(self, obs):
17 |         return self.mlp(obs)
18 | 


--------------------------------------------------------------------------------
/hw1/plot.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import matplotlib.pyplot as plt
 3 | import seaborn as sns
 4 | 
 5 | 
 6 | # plot for BehavioralCloning
 7 | plt.figure(figsize=(12, 8))
 8 | 
 9 | n_rollouts = [10, 20, 30, 40, 50]
10 | x_length = len(n_rollouts)
11 | r_means = [3398.92, 4347.00, 4844.75, 4789.42, 4802.01]
12 | r_stds = [1178.80, 1216.37, 105.57, 130.72, 151.23]
13 | expert_mean = [4851.54] * x_length 
14 | expert_std = [134.09] * x_length 
15 | 
16 | plt.errorbar(n_rollouts, r_means, yerr=r_stds, marker='o', capsize=8, linestyle='--', label='Behavioral Cloning')
17 | plt.errorbar(n_rollouts, expert_mean, yerr=expert_std, marker='o', capsize=8, linestyle='--', label='Expert')
18 | plt.xlabel('Number of Expert Rollouts', fontsize=18)
19 | plt.ylabel('Average Return', fontsize=18)
20 | plt.xlim([5, 55])
21 | plt.ylim([2000, 5600])
22 | plt.legend(loc='lower right', fontsize=16)
23 | plt.savefig('./BehavioralCloning.png', format='png')
24 | #plt.show()
25 | 
26 | # plot for DAgge
27 | plt.figure(figsize=(12, 8))
28 | 
29 | n_iters = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
30 | x_length = len(n_iters)
31 | r_means = [544.86, 629.28, 1081.33, 1087.13, 1909.09, 4150.72, 6840.35, 7865.56, 9568.10, 8978.60]
32 | r_stds = [129.61, 200.03, 819.98, 423.87, 1193.86, 2163.90, 3266.94, 3581.33, 2205.40, 2832.79]
33 | expert_mean = [10438.35] * x_length
34 | expert_std = [39.50] * x_length
35 | BC_mean = [954.69] * x_length
36 | BC_std = [490.10] * x_length
37 | 
38 | plt.errorbar(n_iters, r_means, yerr=r_stds, marker='o', capsize=8, linestyle='--', label='DAgger')
39 | plt.errorbar(n_iters, expert_mean, yerr=expert_std, marker='o', capsize=8, linestyle='--', label='Expert')
40 | plt.errorbar(n_iters, BC_mean, yerr=BC_std, marker='o', capsize=8, linestyle='--', label='Behavioral Cloning')
41 | plt.xlabel('Number of DAgger Iterations', fontsize=18)
42 | plt.ylabel('Average Return', fontsize=18)
43 | plt.legend(fontsize=16)
44 | plt.savefig('./DAgger.png', format='png')
45 | #plt.show()
46 | 
47 | 
48 | 


--------------------------------------------------------------------------------
/hw1/report.md:
--------------------------------------------------------------------------------
 1 | # CS294-112 HW 1: Imitation Learning
 2 | 
 3 | In all of experiments below, we use the same network architecture as the expert. 
 4 | 
 5 | ## Problem 2.2
 6 | 
 7 | At the beginning, we sample 50 rollouts from the expert, then we let our model do Behavioral Cloning on these data.
 8 | 
 9 | In the learning process, we do 50 epochs learning with Adam optimizer (batch size = 64, lr = 0.0001, weight_decay = 0.00001).
10 | 
11 | Finally, we pick the model that perform best in the training process to be our final model.
12 | 
13 | To evaluate a model, we run model 10 times in the environment, and calculate the mean and std for total rewards.  
14 | 
15 | The performance of final model are reported as below.
16 | 
17 | |   Task name    |   learned model   |      expert      |
18 | | :------------: | :---------------: | :--------------: |
19 | |     Ant-v2     | 4802.01 ± 151.23  | 4851.54 ± 134.09 |
20 | | HalfCheetah-v2 |  4085.17 ± 73.67  | 4095.67 ± 122.68 |
21 | |   Hopper-v2    | 1368.64 ± 215.29  |  3779.60 ± 4.13  |
22 | |  Humanoid-v2   |  954.69 ± 490.10  | 10438.35 ± 39.50 |
23 | |   Reacher-v2   |   -10.12 ± 3.23   |   -4.03 ± 2.37   |
24 | |  Walker2d-v2   | 3305.00 ± 1514.62 | 5515.11 ± 42.41  |
25 | 
26 | 
27 | 
28 | ## Problem 2.3
29 | 
30 | In this part, we only change the nunber of total expert rollouts for Behavioral Cloning algorithm.
31 | 
32 | For other hyperparameters, we use the same setup as above experiments.
33 | 
34 | The performance of final learned model with different expert rollouts are reported as below.
35 | 
36 | ![BehavioralCloning](./BehavioralCloning.png)
37 | 
38 | ## Problem 3.2
39 | 
40 | Before the DAgger interactions, we first construct our dataset by 30 rollouts from the expert.
41 | 
42 | After that, we do 20 epochs learning to fit the dataset. (use the same hyperparameters as above)
43 | 
44 | Then, we run our model in the environment, and get 10 rollouts with expert`s gold actions.
45 | 
46 | Finally, we add these new rollouts to our dataset, and repeat above process.
47 | 
48 | The performance of learned model in the different iteration are reported as below.
49 | 
50 | ![DAgger](./DAgger.png)


--------------------------------------------------------------------------------
/hw1/requirements.txt:
--------------------------------------------------------------------------------
1 | gym==0.10.5
2 | mujoco-py==1.50.1.56
3 | numpy
4 | seaborn
5 | 


--------------------------------------------------------------------------------
/hw1/run_expert.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Code to load an expert policy and generate roll-out data for behavioral cloning.
 4 | Example usage:
 5 |     python run_expert.py experts/Humanoid-v1.pkl Humanoid-v1 --render \
 6 |             --num_rollouts 20
 7 | 
 8 | Modified from the script written by Jonathan Ho (hoj@openai.com)
 9 | """
10 | 
11 | import os
12 | import argparse
13 | import pickle
14 | import numpy as np
15 | import gym
16 | import load_policy
17 | 
18 | def main():
19 |     parser = argparse.ArgumentParser()
20 |     parser.add_argument('expert_policy_file', type=str)
21 |     parser.add_argument('envname', type=str)
22 |     parser.add_argument('--render', action='store_true')
23 |     parser.add_argument("--max_timesteps", type=int)
24 |     parser.add_argument('--num_rollouts', type=int, default=20,
25 |                         help='Number of expert roll outs')
26 |     args = parser.parse_args()
27 | 
28 |     print('loading and building expert policy')
29 |     policy_net = load_policy.load_policy(args.expert_policy_file)
30 |     print('loaded and built')
31 | 
32 |     env = gym.make(args.envname)
33 |     max_steps = args.max_timesteps or env.spec.timestep_limit
34 | 
35 |     returns = []
36 |     observations = []
37 |     actions = []
38 |     for i in range(args.num_rollouts):
39 |         print('iter', i)
40 |         obs = env.reset()
41 |         done = False
42 |         totalr = 0.
43 |         steps = 0
44 |         while not done:
45 |             action = policy_net(obs[None, :])
46 |             observations.append(obs)
47 |             actions.append(action)
48 |             obs, r, done, _ = env.step(action)
49 |             totalr += r
50 |             steps += 1
51 |             if args.render:
52 |                 env.render()
53 |             if steps % 100 == 0: print("%i/%i"%(steps, max_steps))
54 |             if steps >= max_steps:
55 |                 break
56 |         returns.append(totalr)
57 | 
58 |     print('returns', returns)
59 |     print('mean return', np.mean(returns))
60 |     print('std of return', np.std(returns))
61 |     
62 |     expert_data = {'observations': np.array(observations),
63 |                    'actions': np.array(actions)}
64 | 
65 |     if not os.path.exists('expert_data'):
66 |         os.makedirs('expert_data')
67 |     
68 |     with open(os.path.join('expert_data', args.envname + '.pkl'), 'wb') as f:
69 |         pickle.dump(expert_data, f, pickle.HIGHEST_PROTOCOL)
70 | 
71 | if __name__ == '__main__':
72 |     main()
73 | 


--------------------------------------------------------------------------------
/hw1/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import numpy as np
  4 | from torch import optim, nn
  5 | from torch.utils.data import TensorDataset, ConcatDataset, DataLoader
  6 | 
  7 | def agent_wapper(config, agent):
  8 |     def fn(obs):
  9 |         with torch.no_grad():
 10 |             obs = obs.astype(np.float32)
 11 |             assert len(obs.shape) == 2
 12 |             obs = torch.from_numpy(obs).to(config.device)
 13 |             action = agent(obs)
 14 |         return action.cpu().numpy()
 15 |     return fn
 16 | 
 17 | def fit_dataset(config, agent, dataset, n_epochs):
 18 |     optimizer = optim.Adam(agent.parameters(), lr=config.lr, weight_decay=config.L2)
 19 |     loss_fn = nn.MSELoss()
 20 |     dataloader = DataLoader(dataset, batch_size=config.batch_size, shuffle=True)
 21 |     
 22 |     step = 0
 23 |     best_reward = None
 24 |     loss_his = []
 25 |     
 26 |     for k in range(n_epochs):
 27 |         for batch in dataloader:
 28 |             obs, gold_actions = batch
 29 |             pred_actions = agent(obs)
 30 |             loss = loss_fn(pred_actions, gold_actions)
 31 | 
 32 |             optimizer.zero_grad()
 33 |             loss.backward()
 34 |             optimizer.step()
 35 | 
 36 |             loss_his.append(loss.item())
 37 | 
 38 |             if step % config.eval_steps == 0:
 39 |                 avrg_mean, avrg_std = Eval(config, agent_wapper(config, agent))
 40 |                 avrg_loss = np.mean(loss_his)
 41 |                 loss_his = []
 42 |                 print('[epoch {}  step {}] loss: {:.4f}  r_mean: {:.2f}  r_std: {:.2f}'.format(
 43 |                     k + 1, step, avrg_loss, avrg_mean, avrg_std))
 44 | 
 45 |                 avrg_reward = avrg_mean - avrg_std
 46 |                 if best_reward is None or best_reward < avrg_reward:
 47 |                     best_reward = avrg_reward
 48 |                     save_model(config, agent, config.model_save_path)
 49 |                 
 50 |             step += 1
 51 |     
 52 |     load_model(config, agent, config.model_save_path)
 53 | 
 54 | def BehavioralCloning(config, agent, expert):
 55 | 
 56 |     # get expert demonstration
 57 |     expert_obs, expert_actions, *_ = run_agent(config, expert, config.n_expert_rollouts)
 58 |     expert_obs = torch.from_numpy(expert_obs).to(config.device)
 59 |     expert_actions = torch.from_numpy(expert_actions).to(config.device)
 60 |     dataset = TensorDataset(expert_obs, expert_actions)
 61 | 
 62 |     # training agent
 63 |     fit_dataset(config, agent, dataset, config.epochs)
 64 | 
 65 |     return agent_wapper(config, agent)
 66 | 
 67 | def DAgger(config, agent, expert):
 68 |     # get expert demonstration
 69 |     expert_obs, expert_actions, *_ = run_agent(config, expert, config.n_expert_rollouts)
 70 |     expert_obs = torch.from_numpy(expert_obs).to(config.device)
 71 |     expert_actions = torch.from_numpy(expert_actions).to(config.device)
 72 |     dataset = TensorDataset(expert_obs, expert_actions)
 73 | 
 74 |     for k in range(config.n_dagger_iter):
 75 |         # training agent
 76 |         fit_dataset(config, agent, dataset, config.epochs)
 77 |         
 78 |         # run agent to get new on-policy observations
 79 |         new_obs, *_ = run_agent(config, agent_wapper(config, agent), config.n_dagger_rollouts)
 80 |         expert_actions = expert(new_obs)
 81 |         
 82 |         new_obs = torch.from_numpy(new_obs).to(config.device)
 83 |         expert_actions = torch.from_numpy(expert_actions).to(config.device)
 84 |         new_data = TensorDataset(new_obs, expert_actions)
 85 |         
 86 |         # add new data to dataset
 87 |         dataset = ConcatDataset([dataset, new_data])
 88 |             
 89 | 
 90 |         avrg_mean, avrg_std = Eval(config, agent_wapper(config, agent))
 91 |         print('[DAgger iter {}] r_mean: {:.2f}  r_std: {:.2f}'.format(k + 1, avrg_mean, avrg_std))
 92 | 
 93 |         
 94 |     return agent_wapper(config, agent)
 95 |     
 96 | def run_agent(config, agent, num_rollouts):
 97 |     env = config.env
 98 |     max_steps = env.spec.timestep_limit
 99 | 
100 |     returns = []
101 |     observations = []
102 |     actions = []
103 |     for _ in range(num_rollouts):
104 |         obs = env.reset()
105 |         done = False
106 |         totalr = 0
107 |         steps = 0
108 |         while not done:
109 |             action = agent(obs[None, :])
110 |             action = action.reshape(-1)
111 |             observations.append(obs)
112 |             actions.append(action)
113 |             obs, r, done, _ = env.step(action)
114 |             totalr += r
115 |             steps += 1
116 |             if steps >= max_steps:
117 |                 break
118 |         returns.append(totalr)
119 | 
120 |     avrg_mean, avrg_std = np.mean(returns), np.std(returns)
121 |     observations = np.array(observations).astype(np.float32)
122 |     actions = np.array(actions).astype(np.float32)
123 | 
124 |     return observations, actions, avrg_mean, avrg_std
125 | 
126 | def Eval(config, agent):
127 |     *_, avrg_mean, avrg_std = run_agent(config, agent, config.n_eval_rollouts)
128 | 
129 |     return avrg_mean, avrg_std
130 | 
131 | 
132 | def save_model(config, model, PATH):
133 |     if not os.path.exists(PATH):
134 |         os.makedirs(PATH)
135 |     PATH = PATH + config.envname + '-' + 'parameters.tar'
136 |     torch.save(model.state_dict(), PATH)
137 |     print('model saved.')
138 | 
139 | def load_model(config, model, PATH):
140 |     PATH = PATH + config.envname + '-' + 'parameters.tar'
141 |     model.load_state_dict(torch.load(PATH))
142 |     print('model loaded.')
143 | 


--------------------------------------------------------------------------------
/hw2/README.md:
--------------------------------------------------------------------------------
 1 | #### This folder contains the code, experiments result and report for HW2.
 2 | 
 3 | 
 4 | 
 5 | ## Code 
 6 | 
 7 | - ###### train_pg_f18.py    
 8 | 
 9 |   The entire model for different experiments.
10 | 
11 | - ###### logz.py
12 | 
13 |   The code used for logging.
14 | 
15 | - ###### plot.py
16 | 
17 |   The code used for generating figure in report.
18 | 
19 |   ​
20 | 
21 | ## Folder
22 | 
23 | - ###### data
24 | 
25 |   Contains logging files generated by logz.py
26 | 
27 | - ###### result
28 | 
29 |   Contains learning curves for different experiments.
30 | 
31 | 
32 | 
33 | ## Report
34 | 
35 | Please see [report.md](./report.md)


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b10000_r0.005",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	10000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b10000_r0.005",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	10000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b10000_r0.005",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	10000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.005_HalfCheetah-v2_20-09-2018_09-52-26/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b10000_r0.01",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	10000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b10000_r0.01",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	10000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b10000_r0.01",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	10000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.01_HalfCheetah-v2_20-09-2018_09-52-50/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b10000_r0.02",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	10000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b10000_r0.02",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	10000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b10000_r0.02",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	10000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.02_HalfCheetah-v2_20-09-2018_09-53-11/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"InvertedPendulum-v2",
 3 | "exp_name"	:	"hc_b10000_r0.05",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.05,
 6 | "logdir"	:	"data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/1",
 7 | "max_path_length"	:	1000.0,
 8 | "min_timesteps_per_batch"	:	10000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"InvertedPendulum-v2",
 3 | "exp_name"	:	"hc_b10000_r0.05",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.05,
 6 | "logdir"	:	"data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/11",
 7 | "max_path_length"	:	1000.0,
 8 | "min_timesteps_per_batch"	:	10000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"InvertedPendulum-v2",
 3 | "exp_name"	:	"hc_b10000_r0.05",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.05,
 6 | "logdir"	:	"data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/21",
 7 | "max_path_length"	:	1000.0,
 8 | "min_timesteps_per_batch"	:	10000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b10000_r0.05_InvertedPendulum-v2_20-09-2018_09-29-20/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.005",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.005",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.005",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.005_HalfCheetah-v2_20-09-2018_10-10-08/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.01",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.01",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.01",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.01_HalfCheetah-v2_20-09-2018_10-10-16/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_10-10-23/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02_no_rtg_bl",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-27-06/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	false,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02_no_rtg_bl",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-27-06/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	false,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02_no_rtg_bl",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-27-06/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	false,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_no_rtg_bl_HalfCheetah-v2_20-09-2018_11-27-06/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02_no_rtg_no_bl",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.002,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-26-08/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	false,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02_no_rtg_no_bl",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.002,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-26-08/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	false,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02_no_rtg_no_bl",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.002,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-26-08/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	false,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_no_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-08/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02_rtg_bl",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-03-08/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02_rtg_bl",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-03-08/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02_rtg_bl",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-03-08/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_rtg_bl_HalfCheetah-v2_20-09-2018_11-03-08/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02_rtg_no_bl",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-26-44/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02_rtg_no_bl",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-26-44/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b30000_r0.02_rtg_no_bl",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b30000_r0.02_HalfCheetah-v2_20-09-2018_11-26-44/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b30000_r0.02_rtg_no_bl_HalfCheetah-v2_20-09-2018_11-26-44/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"InvertedPendulum-v2",
 3 | "exp_name"	:	"hc_b300_r0.01",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/1",
 7 | "max_path_length"	:	1000.0,
 8 | "min_timesteps_per_batch"	:	300,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"InvertedPendulum-v2",
 3 | "exp_name"	:	"hc_b300_r0.01",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/11",
 7 | "max_path_length"	:	1000.0,
 8 | "min_timesteps_per_batch"	:	300,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"InvertedPendulum-v2",
 3 | "exp_name"	:	"hc_b300_r0.01",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/21",
 7 | "max_path_length"	:	1000.0,
 8 | "min_timesteps_per_batch"	:	300,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b300_r0.01_InvertedPendulum-v2_20-09-2018_09-28-52/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b50000_r0.005",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	50000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b50000_r0.005",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	50000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b50000_r0.005",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	50000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.005_HalfCheetah-v2_20-09-2018_10-28-25/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b50000_r0.01",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	50000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b50000_r0.01",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	50000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b50000_r0.01",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	50000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.01_HalfCheetah-v2_20-09-2018_10-28-34/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b50000_r0.02",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	50000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b50000_r0.02",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	50000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_b50000_r0.02",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	50000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/hc_b50000_r0.02_HalfCheetah-v2_20-09-2018_10-28-44/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"lb_no_rtg_dna",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/1",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	5000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	false,
13 | "reward_to_go"	:	false,
14 | "seed"	:	1,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"lb_no_rtg_dna",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/11",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	5000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	false,
13 | "reward_to_go"	:	false,
14 | "seed"	:	11,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"lb_no_rtg_dna",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/21",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	5000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	false,
13 | "reward_to_go"	:	false,
14 | "seed"	:	21,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_no_rtg_dna_CartPole-v0_20-09-2018_09-11-01/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"lb_rtg_dna",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/1",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	5000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	false,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"lb_rtg_dna",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/11",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	5000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	false,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"lb_rtg_dna",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/21",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	5000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	false,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_rtg_dna_CartPole-v0_20-09-2018_09-25-57/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"lb_rtg_na",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/1",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	5000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"lb_rtg_na",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/11",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	5000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"lb_rtg_na",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/21",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	5000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/lb_rtg_na_CartPole-v0_20-09-2018_09-26-24/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"LunarLanderContinuous-v2",
 3 | "exp_name"	:	"ll_b40000_r0.005",
 4 | "gamma"	:	0.99,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/1",
 7 | "max_path_length"	:	1000.0,
 8 | "min_timesteps_per_batch"	:	40000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"LunarLanderContinuous-v2",
 3 | "exp_name"	:	"ll_b40000_r0.005",
 4 | "gamma"	:	0.99,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/11",
 7 | "max_path_length"	:	1000.0,
 8 | "min_timesteps_per_batch"	:	40000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"LunarLanderContinuous-v2",
 3 | "exp_name"	:	"ll_b40000_r0.005",
 4 | "gamma"	:	0.99,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/21",
 7 | "max_path_length"	:	1000.0,
 8 | "min_timesteps_per_batch"	:	40000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/ll_b40000_r0.005_LunarLanderContinuous-v2_20-09-2018_09-40-54/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"sb_no_rtg_dna",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/1",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	1000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	false,
13 | "reward_to_go"	:	false,
14 | "seed"	:	1,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"sb_no_rtg_dna",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/11",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	1000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	false,
13 | "reward_to_go"	:	false,
14 | "seed"	:	11,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"sb_no_rtg_dna",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/21",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	1000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	false,
13 | "reward_to_go"	:	false,
14 | "seed"	:	21,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_no_rtg_dna_CartPole-v0_20-09-2018_09-02-45/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"sb_rtg_dna",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/1",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	1000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	false,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"sb_rtg_dna",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/11",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	1000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	false,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"sb_rtg_dna",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/21",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	1000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	false,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_rtg_dna_CartPole-v0_20-09-2018_09-05-22/21/model.pkl


--------------------------------------------------------------------------------
/hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"sb_rtg_na",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/1",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	1000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/1/model.pkl


--------------------------------------------------------------------------------
/hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"sb_rtg_na",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/11",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	1000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/11/model.pkl


--------------------------------------------------------------------------------
/hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"CartPole-v0",
 3 | "exp_name"	:	"sb_rtg_na",
 4 | "gamma"	:	1.0,
 5 | "learning_rate"	:	0.005,
 6 | "logdir"	:	"data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/21",
 7 | "max_path_length"	:	null,
 8 | "min_timesteps_per_batch"	:	1000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	false,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/data/sb_rtg_na_CartPole-v0_20-09-2018_09-08-07/21/model.pkl


--------------------------------------------------------------------------------
/hw2/hw2_instructions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/hw2_instructions.pdf


--------------------------------------------------------------------------------
/hw2/logz.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | """
  4 | 
  5 | Some simple logging functionality, inspired by rllab's logging.
  6 | Assumes that each diagnostic gets logged each iteration
  7 | 
  8 | Call logz.configure_output_dir() to start logging to a 
  9 | tab-separated-values file (some_folder_name/log.txt)
 10 | 
 11 | To load the learning curves, you can do, for example
 12 | 
 13 | A = np.genfromtxt('/tmp/expt_1468984536/log.txt',delimiter='\t',dtype=None, names=True)
 14 | A['EpRewMean']
 15 | 
 16 | """
 17 | 
 18 | import os.path as osp, shutil, time, atexit, os, subprocess
 19 | import pickle
 20 | import torch
 21 | 
 22 | color2num = dict(
 23 |     gray=30,
 24 |     red=31,
 25 |     green=32,
 26 |     yellow=33,
 27 |     blue=34,
 28 |     magenta=35,
 29 |     cyan=36,
 30 |     white=37,
 31 |     crimson=38
 32 | )
 33 | 
 34 | def colorize(string, color, bold=False, highlight=False):
 35 |     attr = []
 36 |     num = color2num[color]
 37 |     if highlight: num += 10
 38 |     attr.append(str(num))
 39 |     if bold: attr.append('1')
 40 |     return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
 41 | 
 42 | class G:
 43 |     output_dir = None
 44 |     output_file = None
 45 |     first_row = True
 46 |     log_headers = []
 47 |     log_current_row = {}
 48 | 
 49 | def configure_output_dir(d=None):
 50 |     """
 51 |     Set output directory to d, or to /tmp/somerandomnumber if d is None
 52 |     """
 53 |     G.output_dir = d or "/tmp/experiments/%i"%int(time.time())
 54 |     assert not osp.exists(G.output_dir), "Log dir %s already exists! Delete it first or use a different dir"%G.output_dir
 55 |     os.makedirs(G.output_dir)
 56 |     G.output_file = open(osp.join(G.output_dir, "log.txt"), 'w')
 57 |     atexit.register(G.output_file.close)
 58 |     print(colorize("Logging data to %s"%G.output_file.name, 'green', bold=True))
 59 | 
 60 | def log_tabular(key, val):
 61 |     """
 62 |     Log a value of some diagnostic
 63 |     Call this once for each diagnostic quantity, each iteration
 64 |     """
 65 |     if G.first_row:
 66 |         G.log_headers.append(key)
 67 |     else:
 68 |         assert key in G.log_headers, "Trying to introduce a new key %s that you didn't include in the first iteration"%key
 69 |     assert key not in G.log_current_row, "You already set %s this iteration. Maybe you forgot to call dump_tabular()"%key
 70 |     G.log_current_row[key] = val
 71 | 
 72 | def save_hyperparams(params):
 73 |     with open(osp.join(G.output_dir, "hyperparams.json"), 'w') as out:
 74 |         out.write(json.dumps(params, separators=(',\n','\t:\t'), sort_keys=True))
 75 | 
 76 | def save_pytorch_model(model):  
 77 |     """
 78 |     Saves the entire pytorch Module 
 79 |     """
 80 |     torch.save(model, osp.join(G.output_dir, "model.pkl"))
 81 |     
 82 | 
 83 | def dump_tabular():
 84 |     """
 85 |     Write all of the diagnostics from the current iteration
 86 |     """
 87 |     vals = []
 88 |     key_lens = [len(key) for key in G.log_headers]
 89 |     max_key_len = max(15,max(key_lens))
 90 |     keystr = '%'+'%d'%max_key_len
 91 |     fmt = "| " + keystr + "s | %15s |"
 92 |     n_slashes = 22 + max_key_len
 93 |     print("-"*n_slashes)
 94 |     for key in G.log_headers:
 95 |         val = G.log_current_row.get(key, "")
 96 |         if hasattr(val, "__float__"): valstr = "%8.3g"%val
 97 |         else: valstr = val
 98 |         print(fmt%(key, valstr))
 99 |         vals.append(val)
100 |     print("-"*n_slashes)
101 |     if G.output_file is not None:
102 |         if G.first_row:
103 |             G.output_file.write("\t".join(G.log_headers))
104 |             G.output_file.write("\n")
105 |         G.output_file.write("\t".join(map(str,vals)))
106 |         G.output_file.write("\n")
107 |         G.output_file.flush()
108 |     G.log_current_row.clear()
109 |     G.first_row=False
110 | 


--------------------------------------------------------------------------------
/hw2/plot.py:
--------------------------------------------------------------------------------
  1 | import seaborn as sns
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | import json
  5 | import os
  6 | 
  7 | """
  8 | Using the plotter:
  9 | 
 10 | Call it from the command line, and supply it with logdirs to experiments.
 11 | Suppose you ran an experiment with name 'test', and you ran 'test' for 10 
 12 | random seeds. The runner code stored it in the directory structure
 13 | 
 14 |     data
 15 |     L test_EnvName_DateTime
 16 |       L  0
 17 |         L log.txt
 18 |         L params.json
 19 |       L  1
 20 |         L log.txt
 21 |         L params.json
 22 |        .
 23 |        .
 24 |        .
 25 |       L  9
 26 |         L log.txt
 27 |         L params.json
 28 | 
 29 | To plot learning curves from the experiment, averaged over all random
 30 | seeds, call
 31 | 
 32 |     python plot.py data/test_EnvName_DateTime --value AverageReturn
 33 | 
 34 | and voila. To see a different statistics, change what you put in for
 35 | the keyword --value. You can also enter /multiple/ values, and it will 
 36 | make all of them in order.
 37 | 
 38 | 
 39 | Suppose you ran two experiments: 'test1' and 'test2'. In 'test2' you tried
 40 | a different set of hyperparameters from 'test1', and now you would like 
 41 | to compare them -- see their learning curves side-by-side. Just call
 42 | 
 43 |     python plot.py data/test1 data/test2
 44 | 
 45 | and it will plot them both! They will be given titles in the legend according
 46 | to their exp_name parameters. If you want to use custom legend titles, use
 47 | the --legend flag and then provide a title for each logdir.
 48 | 
 49 | """
 50 | 
 51 | def plot_data(data, value="AverageReturn"):
 52 |     if isinstance(data, list):
 53 |         data = pd.concat(data, ignore_index=True)
 54 |     plt.figure(figsize=(16, 9))
 55 |     sns.set(style="darkgrid", font_scale=1.5)
 56 |     sns.tsplot(data=data, time="Iteration", value=value, unit="Unit", condition="Condition")
 57 |     plt.legend(loc='best').draggable()
 58 |     plt.savefig('result.png', bbox_inches='tight')
 59 |     plt.show()
 60 | 
 61 | 
 62 | def get_datasets(fpath, condition=None):
 63 |     unit = 0
 64 |     datasets = []
 65 |     for root, dir, files in os.walk(fpath):
 66 |         if 'log.txt' in files:
 67 |             param_path = open(os.path.join(root,'hyperparams.json'))
 68 |             params = json.load(param_path)
 69 |             exp_name = params['exp_name']
 70 |             
 71 |             log_path = os.path.join(root,'log.txt')
 72 |             experiment_data = pd.read_table(log_path)
 73 | 
 74 |             experiment_data.insert(
 75 |                 len(experiment_data.columns),
 76 |                 'Unit',
 77 |                 unit
 78 |                 )        
 79 |             experiment_data.insert(
 80 |                 len(experiment_data.columns),
 81 |                 'Condition',
 82 |                 condition or exp_name
 83 |                 )
 84 | 
 85 |             datasets.append(experiment_data)
 86 |             unit += 1
 87 | 
 88 |     return datasets
 89 | 
 90 | 
 91 | def main():
 92 |     import argparse
 93 |     parser = argparse.ArgumentParser()
 94 |     parser.add_argument('logdir', nargs='*')
 95 |     parser.add_argument('--legend', nargs='*')
 96 |     parser.add_argument('--value', default='AverageReturn', nargs='*')
 97 |     args = parser.parse_args()
 98 | 
 99 |     use_legend = False
100 |     if args.legend is not None:
101 |         assert len(args.legend) == len(args.logdir), \
102 |             "Must give a legend title for each set of experiments."
103 |         use_legend = True
104 | 
105 |     data = []
106 |     if use_legend:
107 |         for logdir, legend_title in zip(args.logdir, args.legend):
108 |             data += get_datasets(logdir, legend_title)
109 |     else:
110 |         for logdir in args.logdir:
111 |             data += get_datasets(logdir)
112 | 
113 |     if isinstance(args.value, list):
114 |         values = args.value
115 |     else:
116 |         values = [args.value]
117 |     for value in values:
118 |         plot_data(data, value=value)
119 | 
120 | if __name__ == "__main__":
121 |     main()
122 | 


--------------------------------------------------------------------------------
/hw2/report.md:
--------------------------------------------------------------------------------
 1 | # CS294-112 HW 2: Policy Gradient
 2 | 
 3 | ## Problem 4: CartPole 
 4 | 
 5 | Learning curve for small batch experiments:
 6 | 
 7 | ![result_CartPole_sb](./result/result_CartPole_sb.png)
 8 | 
 9 | Learning curve for large batch experiments:
10 | 
11 | ![result_CartPole_lb](./result/result_CartPole_lb.png)
12 | 
13 | From the result, we can see that the reward-to-go gradient estimator is better than trajectory-centric one, when advantage-centering is not applied. And reward-to-go one have higher stability, learning speed and average performance.
14 | 
15 | With the help of advantage centering, reward-to-go gradient estimator became more stable and achieved a better performance in the end.
16 | 
17 | In addition, if we compare the result between small batch and large batch, we will find that a larger batch size can stabilize and speed up the learning process, which can also guarantee a better performance in the same time.
18 | 
19 | 
20 | 
21 | ## Problem 5: InvertedPendulum 
22 | 
23 | Learning curves for the smallest batch size setting(blue one) and largest learning rate setting(orange one):
24 | 
25 | ![result_InvertedPendulum](./result/result_InvertedPendulum.png)
26 | 
27 | The smallest batch size we found was 300(with learning rate = 0.01), and the largest learning rate we found was 0.05(with batch size = 10000).
28 | 
29 | 
30 | 
31 | ## Problem 7: LunarLander 
32 | 
33 | Learning curve for the LunarLander:
34 | 
35 | ![result_LunarLander](./result/result_LunarLander.png)
36 | 
37 | 
38 | 
39 | ## Problem 8: HalfCheetah 
40 | 
41 | Learning curves for different batch size and learning rate setting:
42 | 
43 | ![result_HalfCheetah_1](./result/result_HalfCheetah_1.png)
44 | 
45 | In general, with the increasing of the batch size, agent can get a higher average performance and a relatively more stable learning process. But in some special case (batch size 30000/50000, learning rate 0.02), a samller batch size can achieve a better final performance. From my point of view, I guess that larger batch size will make agent easier to get stuck in the saddle point with a large learning rate setting, which might hurt final performance. However, it can just because of the unstability of the learning algorithm, which make this special case happened occasionally.
46 | 
47 | For the learning rate, a larger learning rate can accelerate the learning process, but might also increase the unstability of the learning process. When learning rate is too large, although the learning can be speeded up, the agent will finally converge to a lower performance.
48 | 
49 | 
50 | 
51 | Learning curves for different gradient estimator and baseline setting:
52 | 
53 | ![result_HalfCheetah_2](./result/result_HalfCheetah_2.png)
54 | 
55 | In this problem, using reward-to-go estimator is the key to learn a good policy. Moreover, the neural network baseline might hurt the performance when a wrong gradient estimator is used. But with the use of  the correct gradient estimator, the neural network baseline do help agent to learn a far better policy than before.


--------------------------------------------------------------------------------
/hw2/requirements.txt:
--------------------------------------------------------------------------------
1 | mujoco-py==1.50.1.56
2 | gym==0.10.5
3 | tensorflow==1.10.0
4 | numpy==1.14.5
5 | seaborn
6 | Box2D==2.3.2
7 | 


--------------------------------------------------------------------------------
/hw2/result/result_CartPole_lb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/result/result_CartPole_lb.png


--------------------------------------------------------------------------------
/hw2/result/result_CartPole_sb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/result/result_CartPole_sb.png


--------------------------------------------------------------------------------
/hw2/result/result_HalfCheetah_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/result/result_HalfCheetah_1.png


--------------------------------------------------------------------------------
/hw2/result/result_HalfCheetah_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/result/result_HalfCheetah_2.png


--------------------------------------------------------------------------------
/hw2/result/result_InvertedPendulum.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/result/result_InvertedPendulum.png


--------------------------------------------------------------------------------
/hw2/result/result_LunarLander.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw2/result/result_LunarLander.png


--------------------------------------------------------------------------------
/hw3/README.md:
--------------------------------------------------------------------------------
 1 | #### This folder contains the code, experiments result and report for HW3.
 2 | 
 3 | 
 4 | 
 5 | ## Code
 6 | 
 7 | - ###### dqn_utils.py  atari_wrappers.py
 8 | 
 9 |   The helper functions for DQN experiments.
10 | 
11 | - ###### run_dqn_atari.py  run_dqn_ram.py  run_dqn_lander.py
12 | 
13 |   The neural network models and hyperparameters for different environment setting.
14 | 
15 | - ###### dqn.py
16 | 
17 |   The main model of the DQN experiments.
18 | 
19 | - ###### train_ac_f18.py
20 | 
21 |   The main model of the Actor-Critic experiments.
22 | 
23 | - ###### train_pg_f18.py
24 | 
25 |   The Policy Gradient model. (copied from HW2)
26 | 
27 | - ###### logz.py
28 | 
29 |   The code used for logging.
30 | 
31 | - ###### plot.py
32 | 
33 |   The code used for generating figure in report.
34 | 
35 |   ​
36 | 
37 | ## Folder
38 | 
39 | - ###### data
40 | 
41 |   Contains logging files generated by logz.py
42 | 
43 | - ###### result
44 | 
45 |   Contains learning curves for different experiments.
46 | 
47 | ## Report
48 | 
49 | Please see [report.md](./report.md)


--------------------------------------------------------------------------------
/hw3/atari_wrappers.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | from collections import deque
  4 | import gym
  5 | from gym import spaces
  6 | 
  7 | 
  8 | class NoopResetEnv(gym.Wrapper):
  9 |     def __init__(self, env=None, noop_max=30):
 10 |         """Sample initial states by taking random number of no-ops on reset.
 11 |         No-op is assumed to be action 0.
 12 |         """
 13 |         super(NoopResetEnv, self).__init__(env)
 14 |         self.noop_max = noop_max
 15 |         assert env.unwrapped.get_action_meanings()[0] == 'NOOP'
 16 | 
 17 |     def _reset(self):
 18 |         """ Do no-op action for a number of steps in [1, noop_max]."""
 19 |         self.env.reset()
 20 |         noops = np.random.randint(1, self.noop_max + 1)
 21 |         for _ in range(noops):
 22 |             obs, _, _, _ = self.env.step(0)
 23 |         return obs
 24 | 
 25 | class FireResetEnv(gym.Wrapper):
 26 |     def __init__(self, env=None):
 27 |         """Take action on reset for environments that are fixed until firing."""
 28 |         super(FireResetEnv, self).__init__(env)
 29 |         assert env.unwrapped.get_action_meanings()[1] == 'FIRE'
 30 |         assert len(env.unwrapped.get_action_meanings()) >= 3
 31 | 
 32 |     def _reset(self):
 33 |         self.env.reset()
 34 |         obs, _, _, _ = self.env.step(1)
 35 |         obs, _, _, _ = self.env.step(2)
 36 |         return obs
 37 | 
 38 | class EpisodicLifeEnv(gym.Wrapper):
 39 |     def __init__(self, env=None):
 40 |         """Make end-of-life == end-of-episode, but only reset on true game over.
 41 |         Done by DeepMind for the DQN and co. since it helps value estimation.
 42 |         """
 43 |         super(EpisodicLifeEnv, self).__init__(env)
 44 |         self.lives = 0
 45 |         self.was_real_done  = True
 46 |         self.was_real_reset = False
 47 | 
 48 |     def _step(self, action):
 49 |         obs, reward, done, info = self.env.step(action)
 50 |         self.was_real_done = done
 51 |         # check current lives, make loss of life terminal,
 52 |         # then update lives to handle bonus lives
 53 |         lives = self.env.unwrapped.ale.lives()
 54 |         if lives < self.lives and lives > 0:
 55 |             # for Qbert somtimes we stay in lives == 0 condtion for a few frames
 56 |             # so its important to keep lives > 0, so that we only reset once
 57 |             # the environment advertises done.
 58 |             done = True
 59 |         self.lives = lives
 60 |         return obs, reward, done, info
 61 | 
 62 |     def _reset(self):
 63 |         """Reset only when lives are exhausted.
 64 |         This way all states are still reachable even though lives are episodic,
 65 |         and the learner need not know about any of this behind-the-scenes.
 66 |         """
 67 |         if self.was_real_done:
 68 |             obs = self.env.reset()
 69 |             self.was_real_reset = True
 70 |         else:
 71 |             # no-op step to advance from terminal/lost life state
 72 |             obs, _, _, _ = self.env.step(0)
 73 |             self.was_real_reset = False
 74 |         self.lives = self.env.unwrapped.ale.lives()
 75 |         return obs
 76 | 
 77 | class MaxAndSkipEnv(gym.Wrapper):
 78 |     def __init__(self, env=None, skip=4):
 79 |         """Return only every `skip`-th frame"""
 80 |         super(MaxAndSkipEnv, self).__init__(env)
 81 |         # most recent raw observations (for max pooling across time steps)
 82 |         self._obs_buffer = deque(maxlen=2)
 83 |         self._skip       = skip
 84 | 
 85 |     def _step(self, action):
 86 |         total_reward = 0.0
 87 |         done = None
 88 |         for _ in range(self._skip):
 89 |             obs, reward, done, info = self.env.step(action)
 90 |             self._obs_buffer.append(obs)
 91 |             total_reward += reward
 92 |             if done:
 93 |                 break
 94 | 
 95 |         max_frame = np.max(np.stack(self._obs_buffer), axis=0)
 96 | 
 97 |         return max_frame, total_reward, done, info
 98 | 
 99 |     def _reset(self):
100 |         """Clear past frame buffer and init. to first obs. from inner env."""
101 |         self._obs_buffer.clear()
102 |         obs = self.env.reset()
103 |         self._obs_buffer.append(obs)
104 |         return obs
105 | 
106 | def _process_frame84(frame):
107 |     img = np.reshape(frame, [210, 160, 3]).astype(np.float32)
108 |     img = img[:, :, 0] * 0.299 + img[:, :, 1] * 0.587 + img[:, :, 2] * 0.114
109 |     resized_screen = cv2.resize(img, (84, 110),  interpolation=cv2.INTER_LINEAR)
110 |     x_t = resized_screen[18:102, :]
111 |     x_t = np.reshape(x_t, [84, 84, 1])
112 |     return x_t.astype(np.uint8)
113 | 
114 | class ProcessFrame84(gym.Wrapper):
115 |     def __init__(self, env=None):
116 |         super(ProcessFrame84, self).__init__(env)
117 |         self.observation_space = spaces.Box(low=0, high=255, shape=(84, 84, 1))
118 | 
119 |     def _step(self, action):
120 |         obs, reward, done, info = self.env.step(action)
121 |         return _process_frame84(obs), reward, done, info
122 | 
123 |     def _reset(self):
124 |         return _process_frame84(self.env.reset())
125 | 
126 | class ClippedRewardsWrapper(gym.Wrapper):
127 |     def _step(self, action):
128 |         obs, reward, done, info = self.env.step(action)
129 |         return obs, np.sign(reward), done, info
130 | 
131 | def wrap_deepmind_ram(env):
132 |     env = EpisodicLifeEnv(env)
133 |     env = NoopResetEnv(env, noop_max=30)
134 |     env = MaxAndSkipEnv(env, skip=4)
135 |     if 'FIRE' in env.unwrapped.get_action_meanings():
136 |         env = FireResetEnv(env)
137 |     env = ClippedRewardsWrapper(env)
138 |     return env
139 | 
140 | def wrap_deepmind(env):
141 |     assert 'NoFrameskip' in env.spec.id
142 |     env = EpisodicLifeEnv(env)
143 |     env = NoopResetEnv(env, noop_max=30)
144 |     env = MaxAndSkipEnv(env, skip=4)
145 |     if 'FIRE' in env.unwrapped.get_action_meanings():
146 |         env = FireResetEnv(env)
147 |     env = ProcessFrame84(env)
148 |     env = ClippedRewardsWrapper(env)
149 |     return env
150 | 


--------------------------------------------------------------------------------
/hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.005,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.005,
 4 | "env_name"	:	"CartPole-v0",
 5 | "exp_name"	:	"cp_100_1",
 6 | "gamma"	:	1.0,
 7 | "logdir"	:	"data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/1",
 8 | "max_path_length"	:	null,
 9 | "min_timesteps_per_batch"	:	1000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	1,
14 | "num_target_updates"	:	100,
15 | "seed"	:	1,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.005,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.005,
 4 | "env_name"	:	"CartPole-v0",
 5 | "exp_name"	:	"cp_100_1",
 6 | "gamma"	:	1.0,
 7 | "logdir"	:	"data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/11",
 8 | "max_path_length"	:	null,
 9 | "min_timesteps_per_batch"	:	1000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	1,
14 | "num_target_updates"	:	100,
15 | "seed"	:	11,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.005,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.005,
 4 | "env_name"	:	"CartPole-v0",
 5 | "exp_name"	:	"cp_100_1",
 6 | "gamma"	:	1.0,
 7 | "logdir"	:	"data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/21",
 8 | "max_path_length"	:	null,
 9 | "min_timesteps_per_batch"	:	1000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	1,
14 | "num_target_updates"	:	100,
15 | "seed"	:	21,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_100_1_CartPole-v0_03-10-2018_10-58-54/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.005,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.005,
 4 | "env_name"	:	"CartPole-v0",
 5 | "exp_name"	:	"cp_10_10",
 6 | "gamma"	:	1.0,
 7 | "logdir"	:	"data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/1",
 8 | "max_path_length"	:	null,
 9 | "min_timesteps_per_batch"	:	1000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	10,
14 | "num_target_updates"	:	10,
15 | "seed"	:	1,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.005,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.005,
 4 | "env_name"	:	"CartPole-v0",
 5 | "exp_name"	:	"cp_10_10",
 6 | "gamma"	:	1.0,
 7 | "logdir"	:	"data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/11",
 8 | "max_path_length"	:	null,
 9 | "min_timesteps_per_batch"	:	1000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	10,
14 | "num_target_updates"	:	10,
15 | "seed"	:	11,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.005,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.005,
 4 | "env_name"	:	"CartPole-v0",
 5 | "exp_name"	:	"cp_10_10",
 6 | "gamma"	:	1.0,
 7 | "logdir"	:	"data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/21",
 8 | "max_path_length"	:	null,
 9 | "min_timesteps_per_batch"	:	1000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	10,
14 | "num_target_updates"	:	10,
15 | "seed"	:	21,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_10_10_CartPole-v0_03-10-2018_11-04-15/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.005,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.005,
 4 | "env_name"	:	"CartPole-v0",
 5 | "exp_name"	:	"cp_1_100",
 6 | "gamma"	:	1.0,
 7 | "logdir"	:	"data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/1",
 8 | "max_path_length"	:	null,
 9 | "min_timesteps_per_batch"	:	1000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	100,
14 | "num_target_updates"	:	1,
15 | "seed"	:	1,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.005,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.005,
 4 | "env_name"	:	"CartPole-v0",
 5 | "exp_name"	:	"cp_1_100",
 6 | "gamma"	:	1.0,
 7 | "logdir"	:	"data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/11",
 8 | "max_path_length"	:	null,
 9 | "min_timesteps_per_batch"	:	1000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	100,
14 | "num_target_updates"	:	1,
15 | "seed"	:	11,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.005,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.005,
 4 | "env_name"	:	"CartPole-v0",
 5 | "exp_name"	:	"cp_1_100",
 6 | "gamma"	:	1.0,
 7 | "logdir"	:	"data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/21",
 8 | "max_path_length"	:	null,
 9 | "min_timesteps_per_batch"	:	1000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	100,
14 | "num_target_updates"	:	1,
15 | "seed"	:	21,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_1_100_CartPole-v0_03-10-2018_11-03-27/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.005,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.005,
 4 | "env_name"	:	"CartPole-v0",
 5 | "exp_name"	:	"cp_1_1",
 6 | "gamma"	:	1.0,
 7 | "logdir"	:	"data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/1",
 8 | "max_path_length"	:	null,
 9 | "min_timesteps_per_batch"	:	1000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	1,
14 | "num_target_updates"	:	1,
15 | "seed"	:	1,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.005,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.005,
 4 | "env_name"	:	"CartPole-v0",
 5 | "exp_name"	:	"cp_1_1",
 6 | "gamma"	:	1.0,
 7 | "logdir"	:	"data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/11",
 8 | "max_path_length"	:	null,
 9 | "min_timesteps_per_batch"	:	1000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	1,
14 | "num_target_updates"	:	1,
15 | "seed"	:	11,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.005,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.005,
 4 | "env_name"	:	"CartPole-v0",
 5 | "exp_name"	:	"cp_1_1",
 6 | "gamma"	:	1.0,
 7 | "logdir"	:	"data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/21",
 8 | "max_path_length"	:	null,
 9 | "min_timesteps_per_batch"	:	1000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	1,
14 | "num_target_updates"	:	1,
15 | "seed"	:	21,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_cp_1_1_CartPole-v0_03-10-2018_10-50-10/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.02,
 4 | "env_name"	:	"HalfCheetah-v2",
 5 | "exp_name"	:	"hc_critic",
 6 | "gamma"	:	0.9,
 7 | "logdir"	:	"data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/1",
 8 | "max_path_length"	:	150.0,
 9 | "min_timesteps_per_batch"	:	30000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	10,
14 | "num_target_updates"	:	10,
15 | "seed"	:	1,
16 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.02,
 4 | "env_name"	:	"HalfCheetah-v2",
 5 | "exp_name"	:	"hc_critic",
 6 | "gamma"	:	0.9,
 7 | "logdir"	:	"data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/11",
 8 | "max_path_length"	:	150.0,
 9 | "min_timesteps_per_batch"	:	30000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	10,
14 | "num_target_updates"	:	10,
15 | "seed"	:	11,
16 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.02,
 4 | "env_name"	:	"HalfCheetah-v2",
 5 | "exp_name"	:	"hc_critic",
 6 | "gamma"	:	0.9,
 7 | "logdir"	:	"data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/21",
 8 | "max_path_length"	:	150.0,
 9 | "min_timesteps_per_batch"	:	30000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	10,
14 | "num_target_updates"	:	10,
15 | "seed"	:	21,
16 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_HalfCheetah-v2_03-10-2018_11-12-54/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.02,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_cl3",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/1",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.02,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_cl3",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/11",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.02,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_cl3",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/21",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_cl3_HalfCheetah-v2_04-10-2018_11-32-35/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.004,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.004",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/1",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.004,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.004",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/11",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.004,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.004",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/21",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.004_HalfCheetah-v2_03-10-2018_22-43-59/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.004,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.004_cl3",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/1",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.004,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.004_cl3",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/11",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.004,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.004_cl3",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/21",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.004_cl3_HalfCheetah-v2_04-10-2018_11-33-01/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.01,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.01",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/1",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.01,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.01",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/11",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.01,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.01",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/21",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.01_HalfCheetah-v2_03-10-2018_22-43-49/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.01,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.01_cl3",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/1",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.01,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.01_cl3",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/11",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.01,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.01_cl3",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/21",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.01_cl3_HalfCheetah-v2_04-10-2018_11-32-54/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.04,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.04",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/1",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.04,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.04",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/11",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.04,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.04",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/21",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.04_HalfCheetah-v2_03-10-2018_22-43-38/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.04,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.04_cl3",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/1",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.04,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.04_cl3",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/11",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.02,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.04,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"HalfCheetah-v2",
 7 | "exp_name"	:	"hc_critic_clr0.04_cl3",
 8 | "gamma"	:	0.9,
 9 | "logdir"	:	"data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/21",
10 | "max_path_length"	:	150.0,
11 | "min_timesteps_per_batch"	:	30000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_hc_critic_clr0.04_cl3_HalfCheetah-v2_04-10-2018_11-32-43/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.01,
 4 | "env_name"	:	"InvertedPendulum-v2",
 5 | "exp_name"	:	"ip_critic",
 6 | "gamma"	:	0.95,
 7 | "logdir"	:	"data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/1",
 8 | "max_path_length"	:	1000.0,
 9 | "min_timesteps_per_batch"	:	5000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	10,
14 | "num_target_updates"	:	10,
15 | "seed"	:	1,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.01,
 4 | "env_name"	:	"InvertedPendulum-v2",
 5 | "exp_name"	:	"ip_critic",
 6 | "gamma"	:	0.95,
 7 | "logdir"	:	"data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/11",
 8 | "max_path_length"	:	1000.0,
 9 | "min_timesteps_per_batch"	:	5000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	10,
14 | "num_target_updates"	:	10,
15 | "seed"	:	11,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "animate"	:	false,
 3 | "critic_learning_rate"	:	0.01,
 4 | "env_name"	:	"InvertedPendulum-v2",
 5 | "exp_name"	:	"ip_critic",
 6 | "gamma"	:	0.95,
 7 | "logdir"	:	"data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/21",
 8 | "max_path_length"	:	1000.0,
 9 | "min_timesteps_per_batch"	:	5000,
10 | "n_iter"	:	100,
11 | "n_layers"	:	2,
12 | "normalize_advantages"	:	true,
13 | "num_grad_steps_per_target_update"	:	10,
14 | "num_target_updates"	:	10,
15 | "seed"	:	21,
16 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_InvertedPendulum-v2_03-10-2018_11-11-16/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.01,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_cl3",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/1",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.01,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_cl3",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/11",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.01,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_cl3",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/21",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_cl3_InvertedPendulum-v2_03-10-2018_21-13-04/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.002,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.002",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/1",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.002,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.002",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/11",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.002,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.002",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/21",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.002_InvertedPendulum-v2_03-10-2018_20-26-52/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.002,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.002_cl3",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/1",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.002,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.002_cl3",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/11",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.002,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.002_cl3",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/21",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.002_cl3_InvertedPendulum-v2_03-10-2018_21-13-40/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.005,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.005",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/1",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.005,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.005",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/11",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.005,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.005",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/21",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.005_InvertedPendulum-v2_03-10-2018_20-26-44/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.005,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.005_cl3",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/1",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.005,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.005_cl3",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/11",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.005,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.005_cl3",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/21",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.005_cl3_InvertedPendulum-v2_03-10-2018_21-13-28/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.02,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.02",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/1",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.02,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.02",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/11",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.02,
 5 | "critic_n_layers"	:	2,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.02",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/21",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.02_InvertedPendulum-v2_03-10-2018_20-26-21/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.02,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.02_cl3",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/1",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	1,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.02,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.02_cl3",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/11",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	11,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"actor_learning_rate"	:	0.01,
 2 | "actor_n_layers"	:	2,
 3 | "animate"	:	false,
 4 | "critic_learning_rate"	:	0.02,
 5 | "critic_n_layers"	:	3,
 6 | "env_name"	:	"InvertedPendulum-v2",
 7 | "exp_name"	:	"ip_critic_clr0.02_cl3",
 8 | "gamma"	:	0.95,
 9 | "logdir"	:	"data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/21",
10 | "max_path_length"	:	1000.0,
11 | "min_timesteps_per_batch"	:	5000,
12 | "n_iter"	:	100,
13 | "normalize_advantages"	:	true,
14 | "num_grad_steps_per_target_update"	:	10,
15 | "num_target_updates"	:	10,
16 | "seed"	:	21,
17 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ac_ip_critic_clr0.02_cl3_InvertedPendulum-v2_03-10-2018_21-13-15/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/dqn_Asterix_double_dqn_AsterixNoFrameskip-v4_04-10-2018_03-16-57/2333/hyperparams.json:
--------------------------------------------------------------------------------
1 | {"env_name"	:	"AsterixNoFrameskip-v4",
2 | "exp_name"	:	"Asterix_double_dqn"}


--------------------------------------------------------------------------------
/hw3/data/dqn_Asterix_double_dqn_singlebias_AsterixNoFrameskip-v4_08-10-2018_09-14-56/2333/hyperparams.json:
--------------------------------------------------------------------------------
1 | {"env_name"	:	"AsterixNoFrameskip-v4",
2 | "exp_name"	:	"Asterix_double_dqn_singlebias"}


--------------------------------------------------------------------------------
/hw3/data/dqn_Asterix_vanilla_dqn_AsterixNoFrameskip-v4_04-10-2018_03-16-01/2333/hyperparams.json:
--------------------------------------------------------------------------------
1 | {"env_name"	:	"AsterixNoFrameskip-v4",
2 | "exp_name"	:	"Asterix_vanilla_dqn"}


--------------------------------------------------------------------------------
/hw3/data/dqn_Bowling_double_dqn_BowlingNoFrameskip-v4_04-10-2018_15-13-45/2333/hyperparams.json:
--------------------------------------------------------------------------------
1 | {"env_name"	:	"BowlingNoFrameskip-v4",
2 | "exp_name"	:	"Bowling_double_dqn"}


--------------------------------------------------------------------------------
/hw3/data/dqn_Bowling_double_dqn_singlebias_BowlingNoFrameskip-v4_10-10-2018_16-14-12/2333/hyperparams.json:
--------------------------------------------------------------------------------
1 | {"env_name"	:	"BowlingNoFrameskip-v4",
2 | "exp_name"	:	"Bowling_double_dqn_singlebias"}


--------------------------------------------------------------------------------
/hw3/data/dqn_Bowling_vanilla_dqn_BowlingNoFrameskip-v4_04-10-2018_15-14-21/2333/hyperparams.json:
--------------------------------------------------------------------------------
1 | {"env_name"	:	"BowlingNoFrameskip-v4",
2 | "exp_name"	:	"Bowling_vanilla_dqn"}


--------------------------------------------------------------------------------
/hw3/data/dqn_KungFuMaster_double_dqn_KungFuMasterNoFrameskip-v4_04-10-2018_11-14-51/2333/hyperparams.json:
--------------------------------------------------------------------------------
1 | {"env_name"	:	"KungFuMasterNoFrameskip-v4",
2 | "exp_name"	:	"KungFuMaster_double_dqn"}


--------------------------------------------------------------------------------
/hw3/data/dqn_KungFuMaster_double_dqn_singlebias_KungFuMasterNoFrameskip-v4_10-10-2018_16-14-59/2333/hyperparams.json:
--------------------------------------------------------------------------------
1 | {"env_name"	:	"KungFuMasterNoFrameskip-v4",
2 | "exp_name"	:	"KungFuMaster_double_dqn_singlebias"}


--------------------------------------------------------------------------------
/hw3/data/dqn_KungFuMaster_vanilla_dqn_KungFuMasterNoFrameskip-v4_04-10-2018_11-15-11/2333/hyperparams.json:
--------------------------------------------------------------------------------
1 | {"env_name"	:	"KungFuMasterNoFrameskip-v4",
2 | "exp_name"	:	"KungFuMaster_vanilla_dqn"}


--------------------------------------------------------------------------------
/hw3/data/dqn_Pong_double_dqn_PongNoFrameskip-v4_04-10-2018_23-10-47/2333/hyperparams.json:
--------------------------------------------------------------------------------
1 | {"env_name"	:	"PongNoFrameskip-v4",
2 | "exp_name"	:	"Pong_double_dqn"}


--------------------------------------------------------------------------------
/hw3/data/dqn_Pong_double_dqn_singlebias_PongNoFrameskip-v4_08-10-2018_09-12-30/2333/hyperparams.json:
--------------------------------------------------------------------------------
1 | {"env_name"	:	"PongNoFrameskip-v4",
2 | "exp_name"	:	"Pong_double_dqn_singlebias"}


--------------------------------------------------------------------------------
/hw3/data/dqn_Pong_vanilla_dqn_PongNoFrameskip-v4_04-10-2018_23-11-56/2333/hyperparams.json:
--------------------------------------------------------------------------------
1 | {"env_name"	:	"PongNoFrameskip-v4",
2 | "exp_name"	:	"Pong_vanilla_dqn"}


--------------------------------------------------------------------------------
/hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_no_critic",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/1",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_no_critic",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/11",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"HalfCheetah-v2",
 3 | "exp_name"	:	"hc_no_critic",
 4 | "gamma"	:	0.9,
 5 | "learning_rate"	:	0.02,
 6 | "logdir"	:	"data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/21",
 7 | "max_path_length"	:	150.0,
 8 | "min_timesteps_per_batch"	:	30000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	32}


--------------------------------------------------------------------------------
/hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/hc_no_critic_HalfCheetah-v2_03-10-2018_19-59-04/21/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/1/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"InvertedPendulum-v2",
 3 | "exp_name"	:	"ip_no_critic",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/1",
 7 | "max_path_length"	:	1000.0,
 8 | "min_timesteps_per_batch"	:	5000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	1,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/1/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/1/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/11/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"InvertedPendulum-v2",
 3 | "exp_name"	:	"ip_no_critic",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/11",
 7 | "max_path_length"	:	1000.0,
 8 | "min_timesteps_per_batch"	:	5000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	11,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/11/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/11/model.pkl


--------------------------------------------------------------------------------
/hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/21/hyperparams.json:
--------------------------------------------------------------------------------
 1 | {"animate"	:	false,
 2 | "env_name"	:	"InvertedPendulum-v2",
 3 | "exp_name"	:	"ip_no_critic",
 4 | "gamma"	:	0.95,
 5 | "learning_rate"	:	0.01,
 6 | "logdir"	:	"data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/21",
 7 | "max_path_length"	:	1000.0,
 8 | "min_timesteps_per_batch"	:	5000,
 9 | "n_iter"	:	100,
10 | "n_layers"	:	2,
11 | "nn_baseline"	:	true,
12 | "normalize_advantages"	:	true,
13 | "reward_to_go"	:	true,
14 | "seed"	:	21,
15 | "size"	:	64}


--------------------------------------------------------------------------------
/hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/21/model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/data/ip_no_critic_InvertedPendulum-v2_03-10-2018_19-59-46/21/model.pkl


--------------------------------------------------------------------------------
/hw3/logz.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | """
  4 | 
  5 | Some simple logging functionality, inspired by rllab's logging.
  6 | Assumes that each diagnostic gets logged each iteration
  7 | 
  8 | Call logz.configure_output_dir() to start logging to a 
  9 | tab-separated-values file (some_folder_name/log.txt)
 10 | 
 11 | To load the learning curves, you can do, for example
 12 | 
 13 | A = np.genfromtxt('/tmp/expt_1468984536/log.txt',delimiter='\t',dtype=None, names=True)
 14 | A['EpRewMean']
 15 | 
 16 | """
 17 | 
 18 | import os.path as osp, shutil, time, atexit, os, subprocess
 19 | import pickle
 20 | import torch
 21 | 
 22 | color2num = dict(
 23 |     gray=30,
 24 |     red=31,
 25 |     green=32,
 26 |     yellow=33,
 27 |     blue=34,
 28 |     magenta=35,
 29 |     cyan=36,
 30 |     white=37,
 31 |     crimson=38
 32 | )
 33 | 
 34 | def colorize(string, color, bold=False, highlight=False):
 35 |     attr = []
 36 |     num = color2num[color]
 37 |     if highlight: num += 10
 38 |     attr.append(str(num))
 39 |     if bold: attr.append('1')
 40 |     return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
 41 | 
 42 | class G:
 43 |     output_dir = None
 44 |     output_file = None
 45 |     first_row = True
 46 |     log_headers = []
 47 |     log_current_row = {}
 48 | 
 49 | def configure_output_dir(d=None):
 50 |     """
 51 |     Set output directory to d, or to /tmp/somerandomnumber if d is None
 52 |     """
 53 |     G.output_dir = d or "/tmp/experiments/%i"%int(time.time())
 54 |     assert not osp.exists(G.output_dir), "Log dir %s already exists! Delete it first or use a different dir"%G.output_dir
 55 |     os.makedirs(G.output_dir)
 56 |     G.output_file = open(osp.join(G.output_dir, "log.txt"), 'w')
 57 |     atexit.register(G.output_file.close)
 58 |     print(colorize("Logging data to %s"%G.output_file.name, 'green', bold=True))
 59 | 
 60 | def log_tabular(key, val):
 61 |     """
 62 |     Log a value of some diagnostic
 63 |     Call this once for each diagnostic quantity, each iteration
 64 |     """
 65 |     if G.first_row:
 66 |         G.log_headers.append(key)
 67 |     else:
 68 |         assert key in G.log_headers, "Trying to introduce a new key %s that you didn't include in the first iteration"%key
 69 |     assert key not in G.log_current_row, "You already set %s this iteration. Maybe you forgot to call dump_tabular()"%key
 70 |     G.log_current_row[key] = val
 71 | 
 72 | def save_hyperparams(params):
 73 |     with open(osp.join(G.output_dir, "hyperparams.json"), 'w') as out:
 74 |         out.write(json.dumps(params, separators=(',\n','\t:\t'), sort_keys=True))
 75 | 
 76 | def save_pytorch_model(model):  
 77 |     """
 78 |     Saves the entire pytorch Module 
 79 |     """
 80 |     torch.save(model, osp.join(G.output_dir, "model.pkl"))
 81 |     
 82 | 
 83 | def dump_tabular():
 84 |     """
 85 |     Write all of the diagnostics from the current iteration
 86 |     """
 87 |     vals = []
 88 |     key_lens = [len(key) for key in G.log_headers]
 89 |     max_key_len = max(15,max(key_lens))
 90 |     keystr = '%'+'%d'%max_key_len
 91 |     fmt = "| " + keystr + "s | %15s |"
 92 |     n_slashes = 22 + max_key_len
 93 |     print("-"*n_slashes)
 94 |     for key in G.log_headers:
 95 |         val = G.log_current_row.get(key, "")
 96 |         if hasattr(val, "__float__"): valstr = "%8.3g"%val
 97 |         else: valstr = val
 98 |         print(fmt%(key, valstr))
 99 |         vals.append(val)
100 |     print("-"*n_slashes)
101 |     if G.output_file is not None:
102 |         if G.first_row:
103 |             G.output_file.write("\t".join(G.log_headers))
104 |             G.output_file.write("\n")
105 |         G.output_file.write("\t".join(map(str,vals)))
106 |         G.output_file.write("\n")
107 |         G.output_file.flush()
108 |     G.log_current_row.clear()
109 |     G.first_row=False
110 | 


--------------------------------------------------------------------------------
/hw3/plot.py:
--------------------------------------------------------------------------------
  1 | import seaborn as sns
  2 | import pandas as pd
  3 | import matplotlib.pyplot as plt
  4 | import json
  5 | import os
  6 | 
  7 | """
  8 | Using the plotter:
  9 | 
 10 | Call it from the command line, and supply it with logdirs to experiments.
 11 | Suppose you ran an experiment with name 'test', and you ran 'test' for 10 
 12 | random seeds. The runner code stored it in the directory structure
 13 | 
 14 |     data
 15 |     L test_EnvName_DateTime
 16 |       L  0
 17 |         L log.txt
 18 |         L params.json
 19 |       L  1
 20 |         L log.txt
 21 |         L params.json
 22 |        .
 23 |        .
 24 |        .
 25 |       L  9
 26 |         L log.txt
 27 |         L params.json
 28 | 
 29 | To plot learning curves from the experiment, averaged over all random
 30 | seeds, call
 31 | 
 32 |     python plot.py data/test_EnvName_DateTime --value AverageReturn
 33 | 
 34 | and voila. To see a different statistics, change what you put in for
 35 | the keyword --value. You can also enter /multiple/ values, and it will 
 36 | make all of them in order.
 37 | 
 38 | 
 39 | Suppose you ran two experiments: 'test1' and 'test2'. In 'test2' you tried
 40 | a different set of hyperparameters from 'test1', and now you would like 
 41 | to compare them -- see their learning curves side-by-side. Just call
 42 | 
 43 |     python plot.py data/test1 data/test2
 44 | 
 45 | and it will plot them both! They will be given titles in the legend according
 46 | to their exp_name parameters. If you want to use custom legend titles, use
 47 | the --legend flag and then provide a title for each logdir.
 48 | 
 49 | """
 50 | 
 51 | def plot_data(data, time="Iteration", value="AverageReturn", combine=False):
 52 |     if isinstance(data, list):
 53 |         data = pd.concat(data, ignore_index=True)
 54 |     plt.figure(figsize=(16, 9))
 55 |     sns.set(style="darkgrid", font_scale=1.5)
 56 |     if not combine:
 57 |         sns.tsplot(data=data, time=time, value=value, unit="Unit", condition="Condition")
 58 |     else:
 59 |         df1 = data.loc[:, [time, value[0], 'Condition']]
 60 |         df1['Statistics'] = value[0]
 61 |         df1.rename(columns={value[0]:'Value', 'Condition':'ExpName'}, inplace = True)
 62 |         df2 = data.loc[:, [time, value[1], 'Condition']]
 63 |         df2['Statistics'] = value[1]
 64 |         df2.rename(columns={value[1]:'Value', 'Condition':'ExpName'}, inplace = True)
 65 |         data = pd.concat([df1, df2], ignore_index=True)
 66 |         sns.lineplot(x=time, y='Value', hue='ExpName', style='Statistics', data=data)
 67 |         
 68 |     plt.legend(loc='best').draggable()
 69 |     plt.savefig('result.png', bbox_inches='tight')
 70 |     plt.show()
 71 | 
 72 | 
 73 | def get_datasets(fpath, condition=None):
 74 |     unit = 0
 75 |     datasets = []
 76 |     for root, dir, files in os.walk(fpath):
 77 |         if 'log.txt' in files:
 78 |             param_path = open(os.path.join(root,'hyperparams.json'))
 79 |             params = json.load(param_path)
 80 |             exp_name = params['exp_name']
 81 |             
 82 |             log_path = os.path.join(root,'log.txt')
 83 |             experiment_data = pd.read_table(log_path)
 84 | 
 85 |             experiment_data.insert(
 86 |                 len(experiment_data.columns),
 87 |                 'Unit',
 88 |                 unit
 89 |                 )        
 90 |             experiment_data.insert(
 91 |                 len(experiment_data.columns),
 92 |                 'Condition',
 93 |                 condition or exp_name
 94 |                 )
 95 | 
 96 |             datasets.append(experiment_data)
 97 |             unit += 1
 98 | 
 99 |     return datasets
100 | 
101 | 
102 | def main():
103 |     import argparse
104 |     parser = argparse.ArgumentParser()
105 |     parser.add_argument('logdir', nargs='*')
106 |     parser.add_argument('--legend', nargs='*')
107 |     parser.add_argument('--time', type=str, default='Iteration')
108 |     parser.add_argument('--value', default='AverageReturn', nargs='*')
109 |     parser.add_argument('--combine', action='store_true')
110 |     args = parser.parse_args()
111 | 
112 |     use_legend = False
113 |     if args.legend is not None:
114 |         assert len(args.legend) == len(args.logdir), \
115 |             "Must give a legend title for each set of experiments."
116 |         use_legend = True
117 | 
118 |     data = []
119 |     if use_legend:
120 |         for logdir, legend_title in zip(args.logdir, args.legend):
121 |             data += get_datasets(logdir, legend_title)
122 |     else:
123 |         for logdir in args.logdir:
124 |             data += get_datasets(logdir)
125 | 
126 |     time = args.time
127 |             
128 |     if isinstance(args.value, list):
129 |         values = args.value
130 |     else:
131 |         values = [args.value]
132 | 
133 |     if args.combine and len(values) == 2:
134 |         plot_data(data, time=time, value=values, combine=True)
135 |     else:
136 |         for value in values:
137 |             plot_data(data, time=time, value=value, combine=False)
138 | 
139 | if __name__ == "__main__":
140 |     main()
141 | 


--------------------------------------------------------------------------------
/hw3/report.md:
--------------------------------------------------------------------------------
  1 | # CS294-112 HW 3: Q-Learning
  2 | 
  3 | ## Deep Q-learning
  4 | 
  5 | In this part, we run our vanilla DQN and double DQN in 4 different atari environments, and we also experimented with the single bias architecture mentioned in original Double DQN paper. 
  6 | 
  7 | Here are the results:
  8 | 
  9 | ### Pong
 10 | 
 11 | ![result_Pong](./result/result_Pong.png)
 12 | 
 13 | ### Kung-Fu Master
 14 | 
 15 | ![result_KungFuMaster](./result/result_KungFuMaster.png)
 16 | 
 17 | ### Bowling
 18 | 
 19 | ![result_Bowling](./result/result_Bowling.png)
 20 | 
 21 | ### Asterix
 22 | 
 23 | ![result_Asterix](./result/result_Asterix.png)
 24 | 
 25 | Surprisingly, almost in all 4 environment above, the vanilla DQN model are superior to double DQN model. Even for the single bias variant, the double DQN model can only beat vanilla DQN model in **Bowling** game. From my point of view, I think this might be caused by hyperparameters are not well tuned for the double DQN model. Also, we can find that the final performance of the vanilla DQN in **Asterix** game is far better than reported results in the original Double DQN paper. And this might be another reason why we can not observe the performance improvement of the double DQN model.
 26 | 
 27 | 
 28 | 
 29 | 
 30 | 
 31 | ## Actor-Critic
 32 | 
 33 | ### Cartpole 
 34 | 
 35 | We run our model on different setting of critic target update frequency and the number of critic gradient step: 
 36 | 
 37 | ![result_cp](./result/result_cp.png)
 38 | 
 39 | From the results above, we can observe that the actor fail to learn a good policy when the number of critic updates is not enough. Even if  the critic is performed with enough updates, the target update frequency can significantly affect the performance.
 40 | 
 41 | We can treat the target update frequency as a kind of trade-off between the learning stability and learning speed. When the target update too fast, the learning process will become unstable. It just like the issue occured in Deep Q-learning, which partially fixed by the target network trick. In the other side, if  the target update too slow, the learning process wiil be slow down, which is unacceptable to us. And also have the risk of overfitting the current sampled mini-batch. All in all, to get a good result, we have to tune this hyperparameter carefully.
 42 | 
 43 | 
 44 | 
 45 | ### HalfCheetah 
 46 | 
 47 | At first, we run our Actor-Critic model with the default hyperparameters, and compare with the Policy Gradient model (with the reward-to-go gradient estimator and neural network baseline) which we implemented in HW2.
 48 | 
 49 | Here is the result:
 50 | 
 51 | ![result_hc_1](./result/result_hc_1.png)
 52 | 
 53 | Generally, we can find that the critic network can help, but don't have significant improvement in the final performance. As the instructions says, the critic network might need a different learning rate and more hidden layers or units.  So we tried different learning rate for critic network:
 54 | 
 55 | ![result_hc_2](./result/result_hc_2.png)
 56 | 
 57 | It did help! After that, we also tried adding a hidden layer to the critic network (with the same hidden units), and here is the results:
 58 | 
 59 | ![result_hc_3](./result/result_hc_3.png)
 60 | 
 61 | In the best setting, the performance of the actor do have a notrival improvement.  To make the comparison more clearly, we pick the best setting in 2 hidden layer critic and 3 hidden layer critic respectively, and plot them with the baseline model:
 62 | 
 63 | ![result_hc_4](./result/result_hc_4.png)
 64 | 
 65 | We can clearly see the improvement.
 66 | 
 67 | Finally, these are commands for the experiments :
 68 | 
 69 | ```shell
 70 | python train_pg_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -
 71 | lr 0.02 -rtg --nn_baseline --exp_name hc_no_critic
 72 | 
 73 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 --exp_name hc_critic -ntu 10 -ngsptu 10
 74 | 
 75 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -clr 0.04 --exp_name hc_critic_clr0.04 -ntu 10 -ngsptu 10
 76 | 
 77 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -clr 0.01 --exp_name hc_critic_clr0.01 -ntu 10 -ngsptu 10
 78 | 
 79 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -clr 0.004 --exp_name hc_critic_clr0.004 -ntu 10 -ngsptu 10
 80 | 
 81 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -cl 3 --exp_name hc_critic_cl3 -ntu 10 -ngsptu 10
 82 | 
 83 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -clr 0.04 -cl 3 --exp_name hc_critic_clr0.04_cl3 -ntu 10 -ngsptu 10
 84 | 
 85 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -clr 0.01 -cl 3 --exp_name hc_critic_clr0.01_cl3 -ntu 10 -ngsptu 10
 86 | 
 87 | python train_ac_f18.py HalfCheetah-v2 -ep 150 --discount 0.90 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 -clr 0.004 -cl 3 --exp_name hc_critic_clr0.004_cl3 -ntu 10 -ngsptu 10
 88 | ```
 89 | 
 90 | 
 91 | 
 92 | ### InvertedPendulum 
 93 | 
 94 | In this problem, we generally did the same thing as above.
 95 | 
 96 | Firstly, the baseline models:
 97 | 
 98 | ![result_ip_1](./result/result_ip_1.png)
 99 | 
100 | Secondly, the different learning rate setting for the critic network:
101 | 
102 | ![result_ip_2](./result/result_ip_2.png)
103 | 
104 | Also, with one more hidden layer:
105 | 
106 | ![result_ip_3](./result/result_ip_3.png)
107 | 
108 | Finally, the comparison between baseline and best model:
109 | 
110 | ![result_ip_4](./result/result_ip_4.png)
111 | 
112 | With the tuning of the learning rate of critic, the learning curve can be more stable and get a little bit performance improvement. However, the extra layer don't help so much as it in former problem. It might because this problem is relatively easy to solve (the beseline model can already reach the max score), or we just have yet to find the appropriate learning rate for the critic.
113 | 
114 | Here are commands for the experiments :
115 | 
116 | ```shell
117 | python train_pg_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -rtg --nn_baseline --exp_name ip_no_critic
118 | 
119 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 --exp_name ip_critic -ntu 10 -ngsptu 10
120 | 
121 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -clr 0.02 --exp_name ip_critic_clr0.02 -ntu 10 -ngsptu 10
122 | 
123 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -clr 0.005 --exp_name ip_critic_clr0.005 -ntu 10 -ngsptu 10
124 | 
125 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -clr 0.002 --exp_name ip_critic_clr0.002 -ntu 10 -ngsptu 10
126 | 
127 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -cl 3 --exp_name ip_critic_cl3 -ntu 10 -ngsptu 10
128 | 
129 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -clr 0.02 -cl 3 --exp_name ip_critic_clr0.02_cl3 -ntu 10 -ngsptu 10
130 | 
131 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -clr 0.005 -cl 3 --exp_name ip_critic_clr0.005_cl3 -ntu 10 -ngsptu 10
132 | 
133 | python train_ac_f18.py InvertedPendulum-v2 -ep 1000 --discount 0.95 -n 100 -e 3 -l 2 -s 64 -b 5000 -lr 0.01 -clr 0.002 -cl 3 --exp_name ip_critic_clr0.002_cl3 -ntu 10 -ngsptu 10
134 | ```
135 | 
136 | 
137 | 
138 | 


--------------------------------------------------------------------------------
/hw3/requirements.txt:
--------------------------------------------------------------------------------
1 | gym==0.10.5
2 | gym[atari]
3 | box2d
4 | mujoco-py==1.50.1.56
5 | torch==0.4.0
6 | numpy
7 | seaborn
8 | opencv-python
9 | 


--------------------------------------------------------------------------------
/hw3/result/result_Asterix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_Asterix.png


--------------------------------------------------------------------------------
/hw3/result/result_Bowling.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_Bowling.png


--------------------------------------------------------------------------------
/hw3/result/result_KungFuMaster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_KungFuMaster.png


--------------------------------------------------------------------------------
/hw3/result/result_Pong.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_Pong.png


--------------------------------------------------------------------------------
/hw3/result/result_cp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_cp.png


--------------------------------------------------------------------------------
/hw3/result/result_hc_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_hc_1.png


--------------------------------------------------------------------------------
/hw3/result/result_hc_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_hc_2.png


--------------------------------------------------------------------------------
/hw3/result/result_hc_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_hc_3.png


--------------------------------------------------------------------------------
/hw3/result/result_hc_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_hc_4.png


--------------------------------------------------------------------------------
/hw3/result/result_hc_4_old.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_hc_4_old.png


--------------------------------------------------------------------------------
/hw3/result/result_ip_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_ip_1.png


--------------------------------------------------------------------------------
/hw3/result/result_ip_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_ip_2.png


--------------------------------------------------------------------------------
/hw3/result/result_ip_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_ip_3.png


--------------------------------------------------------------------------------
/hw3/result/result_ip_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/KuNyaa/berkeleydeeprlcourse-homework-pytorch-solution/a7ffcc94d0387aa6c4cee949c47657aaa8176fe0/hw3/result/result_ip_4.png


--------------------------------------------------------------------------------
/hw3/run_dqn_atari.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import gym
  3 | from gym import wrappers
  4 | import time
  5 | import logz
  6 | import os.path as osp
  7 | import random
  8 | import numpy as np
  9 | import torch
 10 | from torch import nn
 11 | 
 12 | import dqn
 13 | from dqn_utils import PiecewiseSchedule, get_wrapper_by_name
 14 | from atari_wrappers import wrap_deepmind
 15 | 
 16 | def weights_init(m):
 17 |     if hasattr(m, 'weight'):
 18 |         nn.init.xavier_normal_(m.weight)
 19 |     if hasattr(m, 'bias') and m.bias is not None:
 20 |         nn.init.constant_(m.bias, 0)
 21 | 
 22 | class DQN(nn.Module): # for atari
 23 |     def __init__(self, in_channels, num_actions):
 24 |         # as described in https://storage.googleapis.com/deepmind-data/assets/papers/DeepMindNature14236Paper.pdf
 25 |         super(DQN, self).__init__()
 26 |         self.convnet = nn.Sequential(
 27 |             nn.Conv2d(in_channels, out_channels=32, kernel_size=8, stride=4),
 28 |             nn.ReLU(True),
 29 |             nn.Conv2d(in_channels=32, out_channels=64, kernel_size=4, stride=2),
 30 |             nn.ReLU(True),
 31 |             nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1),
 32 |             nn.ReLU(True),
 33 |         )
 34 |         self.classifier = nn.Sequential(
 35 |             nn.Linear(in_features=7 * 7 * 64, out_features=512),
 36 |             nn.ReLU(True),
 37 |             nn.Linear(in_features=512, out_features=num_actions),
 38 |         )
 39 | 
 40 |         self.apply(weights_init)
 41 |         
 42 | 
 43 |     def forward(self, obs):
 44 |         out = obs.float() / 255 # convert 8-bits RGB color to float in [0, 1]
 45 |         out = out.permute(0, 3, 1, 2) # reshape to [batch_size, img_c * frames, img_h, img_w]
 46 |         out = self.convnet(out)
 47 |         out = out.view(out.size(0), -1) # flatten feature maps to a big vector
 48 |         out = self.classifier(out)
 49 |         return out
 50 | 
 51 | def atari_learn(env,
 52 |                 num_timesteps):
 53 |     # This is just a rough estimate
 54 |     num_iterations = float(num_timesteps) / 4.0
 55 | 
 56 |     lr_multiplier = 1.0
 57 |     lr_schedule = PiecewiseSchedule(
 58 |         [
 59 |             (0,                   1e-4 * lr_multiplier),
 60 |             (num_iterations / 10, 1e-4 * lr_multiplier),
 61 |             (num_iterations / 2,  5e-5 * lr_multiplier),
 62 |         ],
 63 |         outside_value=5e-5 * lr_multiplier
 64 |     )
 65 |     lr_lambda = lambda t: lr_schedule.value(t)
 66 | 
 67 |     optimizer = dqn.OptimizerSpec(
 68 |         constructor=torch.optim.Adam,
 69 |         kwargs=dict(eps=1e-4),
 70 |         lr_lambda=lr_lambda
 71 |     )
 72 | 
 73 |     def stopping_criterion(env, t):
 74 |         # notice that here t is the number of steps of the wrapped env,
 75 |         # which is different from the number of steps in the underlying env
 76 |         return get_wrapper_by_name(env, "Monitor").get_total_steps() >= num_timesteps
 77 | 
 78 |     exploration_schedule = PiecewiseSchedule(
 79 |         [
 80 |             (0, 1.0),
 81 |             (1e6, 0.1),
 82 |             (num_iterations / 2, 0.01),
 83 |         ],
 84 |         outside_value=0.01
 85 |     )
 86 | 
 87 |     dqn.learn(
 88 |         env=env,
 89 |         q_func=DQN,
 90 |         optimizer_spec=optimizer,
 91 |         exploration=exploration_schedule,
 92 |         stopping_criterion=stopping_criterion,
 93 |         replay_buffer_size=1000000,
 94 |         batch_size=32,
 95 |         gamma=0.99,
 96 |         learning_starts=50000,
 97 |         learning_freq=4,
 98 |         frame_history_len=4,
 99 |         target_update_freq=10000,
100 |         grad_norm_clipping=10,
101 |         double_q=True
102 |     )
103 |     env.close()
104 | 
105 | def set_global_seeds(i):
106 |     torch.manual_seed(i)
107 |     if torch.cuda.is_available:
108 |         torch.cuda.manual_seed(i)
109 |     np.random.seed(i)
110 |     random.seed(i)
111 | 
112 | def get_env(env_name, exp_name, seed):
113 |     env = gym.make(env_name)
114 | 
115 |     set_global_seeds(seed)
116 |     env.seed(seed)
117 |     
118 |     # Set Up Logger
119 |     logdir = 'dqn_' + exp_name + '_' + env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S")
120 |     logdir = osp.join('data', logdir)
121 |     logdir = osp.join(logdir, '%d'%seed)
122 |     logz.configure_output_dir(logdir)
123 |     hyperparams = {'exp_name': exp_name, 'env_name': env_name}
124 |     logz.save_hyperparams(hyperparams)
125 | 
126 |     expt_dir = '/tmp/hw3_vid_dir2/'
127 |     env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
128 |     env = wrap_deepmind(env)
129 | 
130 |     return env
131 | 
132 | def main():
133 |     # Choose Atari games.
134 |     env_name = 'PongNoFrameskip-v4'
135 |     exp_name = 'Pong_double_dqn' # you can use it to mark different experiments
136 | 
137 |     # Run training
138 |     seed = 2333 #random.randint(0, 9999)
139 |     print('random seed = %d' % seed)
140 |     env = get_env(env_name, exp_name, seed)
141 |     atari_learn(env, num_timesteps=2e8)
142 | 
143 | if __name__ == "__main__":
144 |     main()
145 | 


--------------------------------------------------------------------------------
/hw3/run_dqn_lander.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import gym
  3 | from gym import wrappers
  4 | import time
  5 | import logz
  6 | import os.path as osp
  7 | import random
  8 | import numpy as np
  9 | import torch
 10 | from torch import nn
 11 | 
 12 | import dqn
 13 | from dqn_utils import ConstantSchedule, PiecewiseSchedule, get_wrapper_by_name
 14 | 
 15 | 
 16 | def weights_init(m):
 17 |     if hasattr(m, 'weight'):
 18 |         nn.init.orthogonal_(m.weight)
 19 |     if hasattr(m, 'bias'):
 20 |         nn.init.constant_(m.bias, 0)
 21 | 
 22 | class DQN(nn.Module): # for lunar lander
 23 |     def __init__(self, in_features, num_actions):
 24 |         super(DQN, self).__init__()
 25 |         self.classifier = nn.Sequential(
 26 |             nn.Linear(in_features, out_features=64),
 27 |             nn.ReLU(True),
 28 |             nn.Linear(in_features=64, out_features=64),
 29 |             nn.ReLU(True),
 30 |             nn.Linear(in_features=64, out_features=num_actions),
 31 |         )
 32 | 
 33 |         self.apply(weights_init)
 34 | 
 35 |     def forward(self, obs):
 36 |         out = self.classifier(obs)
 37 |         return out
 38 | 
 39 | def lander_optimizer():
 40 |     lr_schedule = ConstantSchedule(1e-3)
 41 |     lr_lambda = lambda t: lr_schedule.value(t)
 42 |     return dqn.OptimizerSpec(
 43 |         constructor=torch.optim.Adam,
 44 |         lr_lambda=lr_lambda,
 45 |         kwargs=dict(amsgrad=True)
 46 |     )
 47 | 
 48 | def lander_stopping_criterion(num_timesteps):
 49 |     def stopping_criterion(env, t):
 50 |         # notice that here t is the number of steps of the wrapped env,
 51 |         # which is different from the number of steps in the underlying env
 52 |         return get_wrapper_by_name(env, "Monitor").get_total_steps() >= num_timesteps
 53 |     return stopping_criterion
 54 | 
 55 | def lander_exploration_schedule(num_timesteps):
 56 |     return PiecewiseSchedule(
 57 |         [
 58 |             (0, 1),
 59 |             (num_timesteps * 0.1, 0.02),
 60 |         ], outside_value=0.02
 61 |     )
 62 | 
 63 | def lander_kwargs():
 64 |     return {
 65 |         'optimizer_spec': lander_optimizer(),
 66 |         'q_func': DQN,
 67 |         'replay_buffer_size': 50000,
 68 |         'batch_size': 32,
 69 |         'gamma': 1.00,
 70 |         'learning_starts': 1000,
 71 |         'learning_freq': 1,
 72 |         'frame_history_len': 1,
 73 |         'target_update_freq': 3000,
 74 |         'grad_norm_clipping': 10,
 75 |         'lander': True
 76 |     }
 77 | 
 78 | def lander_learn(env,
 79 |                  num_timesteps):
 80 | 
 81 |     optimizer = lander_optimizer()
 82 |     stopping_criterion = lander_stopping_criterion(num_timesteps)
 83 |     exploration_schedule = lander_exploration_schedule(num_timesteps)
 84 | 
 85 |     dqn.learn(
 86 |         env=env,
 87 |         exploration=lander_exploration_schedule(num_timesteps),
 88 |         stopping_criterion=lander_stopping_criterion(num_timesteps),
 89 |         double_q=True,
 90 |         **lander_kwargs()
 91 |     )
 92 |     env.close()
 93 | 
 94 | def set_global_seeds(i):
 95 |     torch.manual_seed(i)
 96 |     if torch.cuda.is_available:
 97 |         torch.cuda.manual_seed(i)
 98 |     np.random.seed(i)
 99 |     random.seed(i)
100 | 
101 | def get_env(env_name, exp_name, seed):
102 |     env = gym.make(env_name)
103 | 
104 |     set_global_seeds(seed)
105 |     env.seed(seed)
106 | 
107 |     # Set Up Logger
108 |     logdir = 'dqn_' + exp_name + '_' + env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S")
109 |     logdir = osp.join('data', logdir)
110 |     logdir = osp.join(logdir, '%d'%seed)
111 |     logz.configure_output_dir(logdir)
112 |     hyperparams = {'exp_name': exp_name, 'env_name': env_name}
113 |     logz.save_hyperparams(hyperparams)
114 | 
115 |     expt_dir = '/tmp/hw3_vid_dir/'
116 |     env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True, video_callable=False)
117 |     
118 | 
119 |     return env
120 | 
121 | def main():
122 |     # Choose Atari games.
123 |     env_name = 'LunarLander-v2'
124 |     exp_name = 'orthogonal_init' # you can use it to mark different experiments
125 |     
126 |     # Run training
127 |     seed = 4565 # you may want to randomize this
128 |     print('random seed = %d' % seed)
129 |     env = get_env(env_name, exp_name, seed)
130 |     lander_learn(env, num_timesteps=500000)
131 | 
132 | if __name__ == "__main__":
133 |     main()
134 | 


--------------------------------------------------------------------------------
/hw3/run_dqn_ram.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import gym
  3 | from gym import wrappers
  4 | import time
  5 | import logz
  6 | import os.path as osp
  7 | import random
  8 | import numpy as np
  9 | import torch
 10 | from torch import nn
 11 | 
 12 | import dqn
 13 | from dqn_utils import PiecewiseSchedule, get_wrapper_by_name
 14 | from atari_wrappers import wrap_deepmind_ram
 15 | 
 16 | def weights_init(m):
 17 |     if hasattr(m, 'weight'):
 18 |         nn.init.xavier_uniform_(m.weight)
 19 |     #if hasattr(m, 'bias'):
 20 |     #    nn.init.constant_(m.bias, 0)
 21 |         
 22 | class DQN(nn.Module): # for atari ram
 23 |     def __init__(self, in_features, num_actions):
 24 |         super(DQN, self).__init__()
 25 |         self.classifier = nn.Sequential(
 26 |             nn.Linear(in_features, out_features=256),
 27 |             nn.ReLU(True),
 28 |             nn.Linear(in_features=256, out_features=128),
 29 |             nn.ReLU(True),
 30 |             nn.Linear(in_features=128, out_features=64),
 31 |             nn.ReLU(True),
 32 |             nn.Linear(in_features=64, out_features=num_actions),
 33 |         )
 34 | 
 35 |         self.apply(weights_init)
 36 | 
 37 |     def forward(self, obs):
 38 |         out = obs.float() / 255 # convert 8-bits ram state to float in [0, 1]
 39 |         out = self.classifier(out)
 40 |         return out
 41 | 
 42 | def atari_learn(env,
 43 |                 num_timesteps):
 44 |     # This is just a rough estimate
 45 |     num_iterations = float(num_timesteps) / 4.0
 46 | 
 47 |     lr_multiplier = 1.0
 48 |     lr_schedule = PiecewiseSchedule(
 49 |         [
 50 |             (0,                   1e-4 * lr_multiplier),
 51 |             (num_iterations / 10, 1e-4 * lr_multiplier),
 52 |             (num_iterations / 2,  5e-5 * lr_multiplier),
 53 |         ],
 54 |         outside_value=5e-5 * lr_multiplier
 55 |     )
 56 |     lr_lambda = lambda t: lr_schedule.value(t)
 57 | 
 58 |     optimizer = dqn.OptimizerSpec(
 59 |         constructor=torch.optim.Adam,
 60 |         kwargs=dict(eps=1e-4, amsgrad=True),
 61 |         lr_lambda=lr_lambda
 62 |     )
 63 | 
 64 |     def stopping_criterion(env, t):
 65 |         # notice that here t is the number of steps of the wrapped env,
 66 |         # which is different from the number of steps in the underlying env
 67 |         return get_wrapper_by_name(env, "Monitor").get_total_steps() >= num_timesteps
 68 | 
 69 |     exploration_schedule = PiecewiseSchedule(
 70 |         [
 71 |             (0, 0.2),
 72 |             (1e6, 0.1),
 73 |             (num_iterations / 2, 0.01),
 74 |         ], outside_value=0.01
 75 |     )
 76 | 
 77 |     dqn.learn(
 78 |         env,
 79 |         q_func=DQN,
 80 |         optimizer_spec=optimizer,
 81 |         exploration=exploration_schedule,
 82 |         stopping_criterion=stopping_criterion,
 83 |         replay_buffer_size=1000000,
 84 |         batch_size=32,
 85 |         gamma=0.99,
 86 |         learning_starts=50000,
 87 |         learning_freq=4,
 88 |         frame_history_len=1,
 89 |         target_update_freq=10000,
 90 |         grad_norm_clipping=10
 91 |     )
 92 |     env.close()
 93 | 
 94 | def set_global_seeds(i):
 95 |     torch.manual_seed(i)
 96 |     if torch.cuda.is_available:
 97 |         torch.cuda.manual_seed(i)
 98 |     np.random.seed(i)
 99 |     random.seed(i)
100 | 
101 | def get_env(env_name, exp_name, seed):
102 |     env = gym.make(env_name)
103 | 
104 |     set_global_seeds(seed)
105 |     env.seed(seed)
106 | 
107 |     # Set Up Logger
108 |     logdir = 'dqn_' + exp_name + '_' + env_name + '_' + time.strftime("%d-%m-%Y_%H-%M-%S")
109 |     logdir = osp.join('data', logdir)
110 |     logdir = osp.join(logdir, '%d'%seed)
111 |     logz.configure_output_dir(logdir)
112 |     hyperparams = {'exp_name': exp_name, 'env_name': env_name}
113 |     logz.save_hyperparams(hyperparams)
114 | 
115 |     expt_dir = '/tmp/hw3_vid_dir/'
116 |     env = wrappers.Monitor(env, osp.join(expt_dir, "gym"), force=True)
117 |     env = wrap_deepmind_ram(env)
118 | 
119 |     return env
120 | 
121 | def main():
122 |     # Choose Atari games.
123 |     env_name = 'Pong-ram-v0'
124 |     exp_name = 'default' # you can use it to mark different experiments
125 |     
126 |     # Run training
127 |     seed = 0 # Use a seed of zero (you may want to randomize the seed!)
128 |     print('random seed = %d' % seed)
129 |     env = get_env(env_name, exp_name, seed)
130 |     atari_learn(env, num_timesteps=int(4e7))
131 | 
132 | if __name__ == "__main__":
133 |     main()
134 | 


--------------------------------------------------------------------------------