├── .gitattributes ├── README.md ├── hw1 ├── README.md ├── README.txt ├── cs285 │ ├── agents │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── base_agent.cpython-37.pyc │ │ │ └── bc_agent.cpython-37.pyc │ │ └── bc_agent.py │ ├── data │ │ ├── Q1-2 │ │ │ ├── bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37 │ │ │ │ ├── events.out.tfevents.1593151299.DESKTOP-U53KV1A.17880.0 │ │ │ │ └── policy_itr_0 │ │ │ ├── bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25 │ │ │ │ ├── events.out.tfevents.1593151346.DESKTOP-U53KV1A.3080.0 │ │ │ │ └── policy_itr_0 │ │ │ ├── bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26 │ │ │ │ ├── events.out.tfevents.1593151407.DESKTOP-U53KV1A.17880.0 │ │ │ │ └── policy_itr_0 │ │ │ ├── bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13 │ │ │ │ ├── events.out.tfevents.1593151514.DESKTOP-U53KV1A.2796.0 │ │ │ │ └── policy_itr_0 │ │ │ └── bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41 │ │ │ │ ├── events.out.tfevents.1593151663.DESKTOP-U53KV1A.14512.0 │ │ │ │ └── policy_itr_0 │ │ ├── Q1-3 │ │ │ └── bc_test_bc_walker2d_Walker2d-v2_26-06-2020_20-57-08 │ │ │ │ └── events.out.tfevents.1593219429.DESKTOP-U53KV1A.388.0 │ │ └── Q2-2 │ │ │ └── dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29 │ │ │ ├── events.out.tfevents.1593231811.DESKTOP-U53KV1A.16276.0 │ │ │ ├── policy_itr_0 │ │ │ ├── policy_itr_1 │ │ │ ├── policy_itr_2 │ │ │ ├── policy_itr_3 │ │ │ ├── policy_itr_4 │ │ │ ├── policy_itr_5 │ │ │ ├── policy_itr_6 │ │ │ ├── policy_itr_7 │ │ │ ├── policy_itr_8 │ │ │ └── policy_itr_9 │ ├── expert_data │ │ ├── expert_data_Ant-v2.pkl │ │ ├── expert_data_HalfCheetah-v2.pkl │ │ ├── expert_data_Hopper-v2.pkl │ │ ├── expert_data_Humanoid-v2.pkl │ │ └── expert_data_Walker2d-v2.pkl │ ├── infrastructure │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── logger.cpython-37.pyc │ │ │ ├── replay_buffer.cpython-37.pyc │ │ │ ├── rl_trainer.cpython-37.pyc │ │ │ ├── tf_utils.cpython-37.pyc │ │ │ └── utils.cpython-37.pyc │ │ ├── logger.py │ │ ├── replay_buffer.py │ │ ├── rl_trainer.py │ │ └── utils.py │ ├── policies │ │ ├── MLP_policy.py │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── MLP_policy.cpython-37.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── base_policy.cpython-37.pyc │ │ │ └── loaded_gaussian_policy.cpython-37.pyc │ │ ├── experts │ │ │ ├── Ant.pkl │ │ │ ├── HalfCheetah.pkl │ │ │ ├── Hopper.pkl │ │ │ ├── Humanoid.pkl │ │ │ └── Walker2d.pkl │ │ └── loaded_gaussian_policy.py │ └── scripts │ │ └── run_hw1_behavior_cloning.py ├── cs285_hw1.pdf ├── downloads │ └── mjpro150 │ │ ├── bin │ │ ├── basic │ │ ├── compile │ │ ├── derivative │ │ ├── libglew.so │ │ ├── libglewegl.so │ │ ├── libglewosmesa.so │ │ ├── libglfw.so.3 │ │ ├── libmujoco150.so │ │ ├── libmujoco150nogl.so │ │ ├── record │ │ ├── simulate │ │ └── test │ │ ├── doc │ │ ├── README.txt │ │ └── REFERENCE.txt │ │ ├── include │ │ ├── glfw3.h │ │ ├── mjdata.h │ │ ├── mjmodel.h │ │ ├── mjrender.h │ │ ├── mjvisualize.h │ │ ├── mjxmacro.h │ │ └── mujoco.h │ │ ├── model │ │ ├── humanoid.xml │ │ └── humanoid100.xml │ │ └── sample │ │ ├── basic.cpp │ │ ├── compile.cpp │ │ ├── derivative.cpp │ │ ├── makefile │ │ ├── record.cpp │ │ ├── simulate.cpp │ │ └── test.cpp ├── requirements.txt ├── results │ ├── Q1-2 │ │ ├── bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37 │ │ │ ├── events.out.tfevents.1593151299.DESKTOP-U53KV1A.17880.0 │ │ │ └── policy_itr_0 │ │ ├── bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25 │ │ │ ├── events.out.tfevents.1593151346.DESKTOP-U53KV1A.3080.0 │ │ │ └── policy_itr_0 │ │ ├── bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26 │ │ │ ├── events.out.tfevents.1593151407.DESKTOP-U53KV1A.17880.0 │ │ │ └── policy_itr_0 │ │ ├── bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13 │ │ │ ├── events.out.tfevents.1593151514.DESKTOP-U53KV1A.2796.0 │ │ │ └── policy_itr_0 │ │ └── bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41 │ │ │ ├── events.out.tfevents.1593151663.DESKTOP-U53KV1A.14512.0 │ │ │ └── policy_itr_0 │ ├── Q1-3 │ │ ├── bc-eval-avg.PNG │ │ ├── bc-eval-std.PNG │ │ └── bc_test_bc_walker2d_Walker2d-v2_26-06-2020_20-57-08 │ │ │ └── events.out.tfevents.1593219429.DESKTOP-U53KV1A.388.0 │ └── Q2-2 │ │ ├── dagger-eval-avg.PNG │ │ ├── dagger-eval-std.PNG │ │ └── dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29 │ │ ├── events.out.tfevents.1593231811.DESKTOP-U53KV1A.16276.0 │ │ ├── policy_itr_0 │ │ ├── policy_itr_1 │ │ ├── policy_itr_2 │ │ ├── policy_itr_3 │ │ ├── policy_itr_4 │ │ ├── policy_itr_5 │ │ ├── policy_itr_6 │ │ ├── policy_itr_7 │ │ ├── policy_itr_8 │ │ └── policy_itr_9 └── setup.py ├── hw2 ├── README.md ├── README.txt ├── cs285 │ ├── agents │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── pg_agent.cpython-37.pyc │ │ └── pg_agent.py │ ├── infrastructure │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── logger.cpython-37.pyc │ │ │ ├── replay_buffer.cpython-37.pyc │ │ │ ├── rl_trainer.cpython-37.pyc │ │ │ └── utils.cpython-37.pyc │ │ ├── logger.py │ │ ├── replay_buffer.py │ │ ├── rl_trainer.py │ │ └── utils.py │ ├── policies │ │ ├── MLP_policy.py │ │ ├── __init__.py │ │ └── __pycache__ │ │ │ ├── MLP_policy.cpython-37.pyc │ │ │ └── __init__.cpython-37.pyc │ └── scripts │ │ └── run_hw2_policy_gradient.py ├── cs285_hw2.pdf ├── requirements.txt ├── results │ ├── bonus-gae-cp │ │ ├── gae_cp_comp.png │ │ ├── pg_cp_lambda0.4_CartPole-v0_28-07-2020_00-18-04 │ │ │ └── events.out.tfevents.1595909884.DESKTOP-U53KV1A │ │ ├── pg_cp_lambda0.8_CartPole-v0_28-07-2020_00-17-56 │ │ │ └── events.out.tfevents.1595909876.DESKTOP-U53KV1A │ │ ├── pg_cp_lambda0.95_CartPole-v0_28-07-2020_00-17-50 │ │ │ └── events.out.tfevents.1595909870.DESKTOP-U53KV1A │ │ └── pg_cp_lambda1_CartPole-v0_28-07-2020_00-17-41 │ │ │ └── events.out.tfevents.1595909861.DESKTOP-U53KV1A │ ├── bonus-gae │ │ ├── gae_hc_comp.png │ │ ├── pg_lambda0.4_HalfCheetah-v2_27-07-2020_21-59-54 │ │ │ └── events.out.tfevents.1595901594.DESKTOP-U53KV1A │ │ ├── pg_lambda0.8_HalfCheetah-v2_27-07-2020_19-21-50 │ │ │ └── events.out.tfevents.1595892110.DESKTOP-U53KV1A │ │ ├── pg_lambda0.95_HalfCheetah-v2_27-07-2020_19-21-45 │ │ │ └── events.out.tfevents.1595892105.DESKTOP-U53KV1A │ │ ├── pg_lambda0.975_HalfCheetah-v2_27-07-2020_19-21-43 │ │ │ └── events.out.tfevents.1595892103.DESKTOP-U53KV1A │ │ ├── pg_lambda0.99_HalfCheetah-v2_27-07-2020_21-59-27 │ │ │ └── events.out.tfevents.1595901567.DESKTOP-U53KV1A │ │ ├── pg_lambda0.9_HalfCheetah-v2_27-07-2020_21-59-37 │ │ │ └── events.out.tfevents.1595901577.DESKTOP-U53KV1A │ │ ├── pg_lambda0_HalfCheetah-v2_27-07-2020_19-21-48 │ │ │ └── events.out.tfevents.1595892108.DESKTOP-U53KV1A │ │ └── pg_lambda1_HalfCheetah-v2_27-07-2020_21-59-20 │ │ │ └── events.out.tfevents.1595901560.DESKTOP-U53KV1A │ ├── problem-3-lb │ │ ├── eval-avg-cp-lb.png │ │ ├── pg_lb_no_rtg_dna_CartPole-v0_29-06-2020_00-52-35 │ │ │ └── events.out.tfevents.1593406355.DESKTOP-U53KV1A │ │ ├── pg_lb_rtg_dna_CartPole-v0_29-06-2020_00-52-41 │ │ │ └── events.out.tfevents.1593406361.DESKTOP-U53KV1A │ │ └── pg_lb_rtg_na_CartPole-v0_29-06-2020_00-52-47 │ │ │ └── events.out.tfevents.1593406367.DESKTOP-U53KV1A │ ├── problem-3-sb │ │ ├── eval-avg-cp-sb.png │ │ ├── pg_sb_no_rtg_dna_CartPole-v0_29-06-2020_00-45-57 │ │ │ └── events.out.tfevents.1593405957.DESKTOP-U53KV1A │ │ ├── pg_sb_rtg_dna_CartPole-v0_29-06-2020_00-46-05 │ │ │ └── events.out.tfevents.1593405965.DESKTOP-U53KV1A │ │ └── pg_sb_rtg_na_CartPole-v0_29-06-2020_00-46-11 │ │ │ └── events.out.tfevents.1593405971.DESKTOP-U53KV1A │ ├── problem-4 │ │ ├── ideal-params-comparison.png │ │ ├── pg_ip_b100_lr0.06_InvertedPendulum-v2_01-07-2020_00-34-14 │ │ │ └── events.out.tfevents.1593578054.DESKTOP-U53KV1A │ │ ├── pg_ip_b80_lr0.06_InvertedPendulum-v2_01-07-2020_00-33-01 │ │ │ └── events.out.tfevents.1593577981.DESKTOP-U53KV1A │ │ ├── pg_ip_b90_lr0.05_InvertedPendulum-v2_01-07-2020_00-26-36 │ │ │ └── events.out.tfevents.1593577596.DESKTOP-U53KV1A │ │ ├── pg_ip_b90_lr0.06_InvertedPendulum-v2_01-07-2020_00-27-32 │ │ │ └── events.out.tfevents.1593577652.DESKTOP-U53KV1A │ │ └── pg_ip_b90_lr0.07_InvertedPendulum-v2_01-07-2020_00-28-39 │ │ │ └── events.out.tfevents.1593577719.DESKTOP-U53KV1A │ ├── problem-6 │ │ ├── eval-avg-ll.png │ │ └── pg_ll_b40000_r0.005_LunarLanderContinuous-v2_29-06-2020_21-30-30 │ │ │ └── events.out.tfevents.1593480630.DESKTOP-U53KV1A │ ├── problem-7-ideal │ │ ├── cheetah-ideal-eval-avg.png │ │ ├── pg_hc_b30000_lr0.02_HalfCheetah-v2_30-06-2020_20-07-51 │ │ │ └── events.out.tfevents.1593562071.DESKTOP-U53KV1A │ │ ├── pg_hc_nnb_b30000_lr0.02_HalfCheetah-v2_30-06-2020_22-15-46 │ │ │ └── events.out.tfevents.1593569746.DESKTOP-U53KV1A │ │ ├── pg_hc_rtg_b30000_lr0.02_HalfCheetah-v2_30-06-2020_20-44-51 │ │ │ └── events.out.tfevents.1593564291.DESKTOP-U53KV1A │ │ └── pg_hc_rtg_nnb_b30000_lr0.02_HalfCheetah-v2_30-06-2020_21-19-20 │ │ │ └── events.out.tfevents.1593566360.DESKTOP-U53KV1A │ └── problem-7-search │ │ ├── cheetah-search-eval-avg.png │ │ ├── pg_hc_b10000_lr0.005_nnbaseline_HalfCheetah-v2_30-06-2020_01-48-46 │ │ └── events.out.tfevents.1593496126.DESKTOP-U53KV1A │ │ ├── pg_hc_b10000_lr0.01_nnbaseline_HalfCheetah-v2_30-06-2020_00-21-43 │ │ └── events.out.tfevents.1593490903.DESKTOP-U53KV1A │ │ ├── pg_hc_b10000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_00-37-16 │ │ └── events.out.tfevents.1593491836.DESKTOP-U53KV1A │ │ ├── pg_hc_b30000_lr0.005_nnbaseline_HalfCheetah-v2_30-06-2020_00-51-38 │ │ └── events.out.tfevents.1593492698.DESKTOP-U53KV1A │ │ ├── pg_hc_b30000_lr0.01_nnbaseline_HalfCheetah-v2_30-06-2020_02-09-33 │ │ └── events.out.tfevents.1593497373.DESKTOP-U53KV1A │ │ └── pg_hc_b30000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_11-45-56 │ │ └── events.out.tfevents.1593531956.DESKTOP-U53KV1A └── setup.py ├── hw3 ├── README.md ├── README.txt ├── cs285 │ ├── agents │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── ac_agent.cpython-37.pyc │ │ │ └── dqn_agent.cpython-37.pyc │ │ ├── ac_agent.py │ │ ├── dqn_agent.py │ │ └── pg_agent.py │ ├── critics │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── bootstrapped_continuous_critic.cpython-37.pyc │ │ │ └── dqn_critic.cpython-37.pyc │ │ ├── bootstrapped_continuous_critic.py │ │ └── dqn_critic.py │ ├── infrastructure │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── atari_wrappers.cpython-37.pyc │ │ │ ├── dqn_utils.cpython-37.pyc │ │ │ ├── logger.cpython-37.pyc │ │ │ ├── models.cpython-37.pyc │ │ │ ├── replay_buffer.cpython-37.pyc │ │ │ ├── rl_trainer.cpython-37.pyc │ │ │ └── utils.cpython-37.pyc │ │ ├── atari_wrappers.py │ │ ├── dqn_utils.py │ │ ├── logger.py │ │ ├── models.py │ │ ├── replay_buffer.py │ │ ├── rl_trainer.py │ │ └── utils.py │ ├── policies │ │ ├── MLP_policy.py │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── MLP_policy.cpython-37.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── argmax_policy.cpython-37.pyc │ │ └── argmax_policy.py │ └── scripts │ │ ├── run_hw3_actor_critic.py │ │ └── run_hw3_dqn.py ├── cs285_hw3.pdf ├── lunar_lander.py ├── requirements.txt ├── results │ ├── Q1 │ │ ├── dqn_double_q_test_pong_PongNoFrameskip-v4_06-07-2020_01-47-43 │ │ │ ├── events.out.tfevents.1594014463.DESKTOP-U53KV1A │ │ │ └── openaigym.video.0.15504.video004000.mp4 │ │ ├── dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39 │ │ │ ├── events.out.tfevents.1593891399.DESKTOP-U53KV1A │ │ │ └── openaigym.video.0.15892.video004000.mp4 │ │ └── pong-comp.png │ ├── Q2 │ │ ├── double_dqn │ │ │ ├── dqn_double_q_double_dqn_ll_seed1_LunarLander-v2_04-07-2020_21-47-22 │ │ │ │ └── events.out.tfevents.1593913642.DESKTOP-U53KV1A │ │ │ ├── dqn_double_q_double_dqn_ll_seed2_LunarLander-v2_04-07-2020_21-47-30 │ │ │ │ └── events.out.tfevents.1593913650.DESKTOP-U53KV1A │ │ │ └── dqn_double_q_double_dqn_ll_seed3_LunarLander-v2_04-07-2020_21-47-38 │ │ │ │ └── events.out.tfevents.1593913658.DESKTOP-U53KV1A │ │ ├── double_dqn_eval_ll.png │ │ └── dqn │ │ │ ├── dqn_dqn_ll_seed1_LunarLander-v2_04-07-2020_21-46-58 │ │ │ └── events.out.tfevents.1593913618.DESKTOP-U53KV1A │ │ │ ├── dqn_dqn_ll_seed2_LunarLander-v2_04-07-2020_21-47-06 │ │ │ └── events.out.tfevents.1593913626.DESKTOP-U53KV1A │ │ │ └── dqn_dqn_ll_seed3_LunarLander-v2_04-07-2020_21-47-15 │ │ │ └── events.out.tfevents.1593913635.DESKTOP-U53KV1A │ ├── Q3-init │ │ ├── dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39 │ │ │ ├── events.out.tfevents.1593891399.DESKTOP-U53KV1A │ │ │ └── openaigym.video.0.15892.video004000.mp4 │ │ ├── dqn_test_pong_PongNoFrameskip-v4_06-07-2020_01-46-09 │ │ │ ├── events.out.tfevents.1594014369.DESKTOP-U53KV1A │ │ │ └── openaigym.video.0.14904.video005000.mp4 │ │ └── init_pong_comp.png │ ├── Q3-lrmult │ │ ├── dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39 │ │ │ ├── events.out.tfevents.1593891399.DESKTOP-U53KV1A │ │ │ └── openaigym.video.0.15892.video004000.mp4 │ │ ├── dqn_test_pong_lrmult0.5_PongNoFrameskip-v4_06-07-2020_01-51-13 │ │ │ ├── events.out.tfevents.1594014673.DESKTOP-U53KV1A │ │ │ └── openaigym.video.0.12644.video004000.mp4 │ │ ├── dqn_test_pong_lrmult2.0_PongNoFrameskip-v4_06-07-2020_01-51-32 │ │ │ ├── events.out.tfevents.1594014692.DESKTOP-U53KV1A │ │ │ └── openaigym.video.0.9872.video004000.mp4 │ │ └── lrmult_pong_comp.png │ ├── Q4-1-100 │ │ ├── 5-seed-1-100.png │ │ ├── ac_1_100_CartPole-v0_05-07-2020_14-59-37 │ │ │ └── events.out.tfevents.1593975577.DESKTOP-U53KV1A │ │ ├── ac_1_100_CartPole-v0_05-07-2020_16-14-01 │ │ │ └── events.out.tfevents.1593980041.DESKTOP-U53KV1A │ │ ├── ac_1_100_CartPole-v0_05-07-2020_16-20-04 │ │ │ └── events.out.tfevents.1593980404.DESKTOP-U53KV1A │ │ ├── ac_1_100_CartPole-v0_05-07-2020_16-23-26 │ │ │ └── events.out.tfevents.1593980606.DESKTOP-U53KV1A │ │ └── ac_1_100_CartPole-v0_05-07-2020_16-27-20 │ │ │ └── events.out.tfevents.1593980840.DESKTOP-U53KV1A │ ├── Q4-10-10 │ │ ├── 5-seed-10-10.png │ │ ├── ac_10_10_CartPole-v0_05-07-2020_15-12-48 │ │ │ └── events.out.tfevents.1593976368.DESKTOP-U53KV1A │ │ ├── ac_10_10_CartPole-v0_05-07-2020_15-47-19 │ │ │ └── events.out.tfevents.1593978439.DESKTOP-U53KV1A │ │ ├── ac_10_10_CartPole-v0_05-07-2020_15-49-24 │ │ │ └── events.out.tfevents.1593978564.DESKTOP-U53KV1A │ │ ├── ac_10_10_CartPole-v0_05-07-2020_15-59-55 │ │ │ └── events.out.tfevents.1593979195.DESKTOP-U53KV1A │ │ └── ac_10_10_CartPole-v0_05-07-2020_16-02-28 │ │ │ └── events.out.tfevents.1593979348.DESKTOP-U53KV1A │ ├── Q4 │ │ ├── ac_100_1_CartPole-v0_05-07-2020_14-54-21 │ │ │ └── events.out.tfevents.1593975261.DESKTOP-U53KV1A │ │ ├── ac_10_10_CartPole-v0_05-07-2020_15-12-48 │ │ │ └── events.out.tfevents.1593976368.DESKTOP-U53KV1A │ │ ├── ac_1_100_CartPole-v0_05-07-2020_14-59-37 │ │ │ └── events.out.tfevents.1593975577.DESKTOP-U53KV1A │ │ ├── ac_1_1_CartPole-v0_05-07-2020_14-51-47 │ │ │ └── events.out.tfevents.1593975107.DESKTOP-U53KV1A │ │ └── q4-comp.png │ ├── Q5-HC │ │ ├── ac_10_10_HalfCheetah-v2_05-07-2020_18-28-51 │ │ │ └── events.out.tfevents.1593988131.DESKTOP-U53KV1A │ │ ├── hc_comp.png │ │ └── pg_hc_b30000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_11-45-56 │ │ │ └── events.out.tfevents.1593531956.DESKTOP-U53KV1A │ └── Q5-IP │ │ ├── IP_ac_eval.png │ │ ├── ac_10_10_InvertedPendulum-v2_05-07-2020_18-28-19 │ │ └── events.out.tfevents.1593988099.DESKTOP-U53KV1A │ │ └── ac_b30k_10_10_InvertedPendulum-v2_05-07-2020_19-45-39 │ │ └── events.out.tfevents.1593992739.DESKTOP-U53KV1A └── setup.py ├── hw4 ├── README.md ├── README.txt ├── cs285 │ ├── agents │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ └── mb_agent.cpython-37.pyc │ │ └── mb_agent.py │ ├── envs │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ └── __init__.cpython-37.pyc │ │ ├── ant │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-35.pyc │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ ├── ant.cpython-35.pyc │ │ │ │ └── ant.cpython-37.pyc │ │ │ └── ant.py │ │ ├── cheetah │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-35.pyc │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ ├── cheetah.cpython-35.pyc │ │ │ │ └── cheetah.cpython-37.pyc │ │ │ └── cheetah.py │ │ ├── obstacles │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-35.pyc │ │ │ │ ├── __init__.cpython-37.pyc │ │ │ │ ├── obstacles_env.cpython-35.pyc │ │ │ │ └── obstacles_env.cpython-37.pyc │ │ │ └── obstacles_env.py │ │ └── reacher │ │ │ ├── __init__.py │ │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-35.pyc │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── reacher_env.cpython-35.pyc │ │ │ └── reacher_env.cpython-37.pyc │ │ │ ├── assets │ │ │ └── sawyer.xml │ │ │ └── reacher_env.py │ ├── infrastructure │ │ ├── __init__.py │ │ ├── __pycache__ │ │ │ ├── __init__.cpython-37.pyc │ │ │ ├── logger.cpython-37.pyc │ │ │ ├── replay_buffer.cpython-37.pyc │ │ │ ├── rl_trainer.cpython-37.pyc │ │ │ └── utils.cpython-37.pyc │ │ ├── logger.py │ │ ├── replay_buffer.py │ │ ├── rl_trainer.py │ │ └── utils.py │ ├── models │ │ ├── __pycache__ │ │ │ └── ff_model.cpython-37.pyc │ │ └── ff_model.py │ ├── policies │ │ ├── MPC_policy.py │ │ ├── __init__.py │ │ └── __pycache__ │ │ │ ├── MPC_policy.cpython-37.pyc │ │ │ └── __init__.cpython-37.pyc │ └── scripts │ │ └── run_hw4_mb.py ├── cs285_hw4.pdf ├── results │ ├── problem-1 │ │ ├── n500_arch1x32_losses.png │ │ ├── n500_arch1x32_predictions.png │ │ ├── n500_arch2x250_losses.png │ │ ├── n500_arch2x250_predictions.png │ │ ├── n5_arch2x250_losses.png │ │ └── n5_arch2x250_predictions.png │ ├── problem-2 │ │ └── mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56 │ │ │ ├── events.out.tfevents.1594352996.DESKTOP-U53KV1A │ │ │ ├── itr_0_losses.npy │ │ │ ├── itr_0_losses.png │ │ │ └── itr_0_predictions.png │ ├── problem-3-cheetah │ │ ├── cheetah_returns.png │ │ └── mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41 │ │ │ ├── events.out.tfevents.1594417421.DESKTOP-U53KV1A │ │ │ ├── itr_0_losses.npy │ │ │ ├── itr_0_losses.png │ │ │ └── itr_0_predictions.png │ ├── problem-3-obstacles │ │ ├── mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49 │ │ │ ├── events.out.tfevents.1594417429.DESKTOP-U53KV1A │ │ │ ├── itr_0_losses.npy │ │ │ ├── itr_0_losses.png │ │ │ └── itr_0_predictions.png │ │ └── obstacles_returns.png │ ├── problem-3-reacher │ │ ├── mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36 │ │ │ ├── events.out.tfevents.1594417416.DESKTOP-U53KV1A │ │ │ ├── itr_0_losses.npy │ │ │ ├── itr_0_losses.png │ │ │ └── itr_0_predictions.png │ │ └── reacher_returns.png │ ├── problem-4-ensemble │ │ ├── ensemble_comp.png │ │ ├── mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11 │ │ │ ├── events.out.tfevents.1594364771.DESKTOP-U53KV1A │ │ │ ├── itr_0_losses.npy │ │ │ ├── itr_0_losses.png │ │ │ └── itr_0_predictions.png │ │ ├── mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21 │ │ │ ├── events.out.tfevents.1594364781.DESKTOP-U53KV1A │ │ │ ├── itr_0_losses.npy │ │ │ ├── itr_0_losses.png │ │ │ └── itr_0_predictions.png │ │ └── mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33 │ │ │ ├── events.out.tfevents.1594364793.DESKTOP-U53KV1A │ │ │ ├── itr_0_losses.npy │ │ │ ├── itr_0_losses.png │ │ │ └── itr_0_predictions.png │ ├── problem-4-horizon │ │ ├── horizon_comp.png │ │ ├── mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28 │ │ │ ├── events.out.tfevents.1594364548.DESKTOP-U53KV1A │ │ │ ├── itr_0_losses.npy │ │ │ ├── itr_0_losses.png │ │ │ └── itr_0_predictions.png │ │ ├── mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36 │ │ │ ├── events.out.tfevents.1594364556.DESKTOP-U53KV1A │ │ │ ├── itr_0_losses.npy │ │ │ ├── itr_0_losses.png │ │ │ └── itr_0_predictions.png │ │ └── mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52 │ │ │ ├── events.out.tfevents.1594364512.DESKTOP-U53KV1A │ │ │ ├── itr_0_losses.npy │ │ │ ├── itr_0_losses.png │ │ │ └── itr_0_predictions.png │ └── problem-4-numseq │ │ ├── mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23 │ │ ├── events.out.tfevents.1594364663.DESKTOP-U53KV1A │ │ ├── itr_0_losses.npy │ │ ├── itr_0_losses.png │ │ └── itr_0_predictions.png │ │ ├── mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40 │ │ ├── events.out.tfevents.1594364620.DESKTOP-U53KV1A │ │ ├── itr_0_losses.npy │ │ ├── itr_0_losses.png │ │ └── itr_0_predictions.png │ │ └── numseq_comp.png └── setup.py └── hw5 ├── README.md ├── README.txt ├── cs285 ├── agents │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ └── ac_agent.cpython-37.pyc │ └── ac_agent.py ├── critics │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ └── bootstrapped_continuous_critic.cpython-37.pyc │ └── bootstrapped_continuous_critic.py ├── envs │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── pointmass.cpython-37.pyc │ │ └── sparse_half_cheetah.cpython-37.pyc │ ├── pointmass.py │ └── sparse_half_cheetah.py ├── exploration │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── density_model.cpython-37.pyc │ │ └── exploration.cpython-37.pyc │ ├── density_model.py │ └── exploration.py ├── infrastructure │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-37.pyc │ │ ├── logger.cpython-37.pyc │ │ ├── replay.cpython-37.pyc │ │ ├── replay_buffer.cpython-37.pyc │ │ ├── rl_trainer.cpython-37.pyc │ │ └── utils.cpython-37.pyc │ ├── logger.py │ ├── replay_buffer.py │ ├── rl_trainer.py │ └── utils.py ├── policies │ ├── MLP_policy.py │ ├── __init__.py │ └── __pycache__ │ │ ├── MLP_policy.cpython-37.pyc │ │ └── __init__.cpython-37.pyc └── scripts │ └── train_ac_exploration_f18.py ├── cs285_hw5.pdf ├── requirements.txt ├── results ├── problem-1-hist │ ├── hist_returns.png │ ├── seed11_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-11-30 │ │ └── events.out.tfevents.1595135490.DESKTOP-U53KV1A │ ├── seed1_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-06-41 │ │ └── events.out.tfevents.1595135201.DESKTOP-U53KV1A │ └── seed21_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-17-01 │ │ └── events.out.tfevents.1595135821.DESKTOP-U53KV1A ├── problem-1-none │ ├── no_exploration_returns.png │ ├── seed11_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-05-20 │ │ └── events.out.tfevents.1595135120.DESKTOP-U53KV1A │ ├── seed1_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-03-45 │ │ └── events.out.tfevents.1595135025.DESKTOP-U53KV1A │ └── seed21_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-06-56 │ │ └── events.out.tfevents.1595135216.DESKTOP-U53KV1A ├── problem-2 │ ├── rbf_returns.png │ ├── seed11_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-13-38 │ │ └── events.out.tfevents.1594937618.DESKTOP-U53KV1A │ ├── seed1_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-10-03 │ │ └── events.out.tfevents.1594937403.DESKTOP-U53KV1A │ └── seed21_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-17-11 │ │ └── events.out.tfevents.1594937831.DESKTOP-U53KV1A ├── problem-3 │ ├── ex2_returns.png │ ├── seed11_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_02-37-10 │ │ └── events.out.tfevents.1595140630.DESKTOP-U53KV1A │ ├── seed1_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_01-08-12 │ │ └── events.out.tfevents.1595135292.DESKTOP-U53KV1A │ └── seed21_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_03-56-28 │ │ └── events.out.tfevents.1595145388.DESKTOP-U53KV1A └── problem-4 │ ├── bc0.0001 │ ├── seed11_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_19-07-2020_14-18-04 │ │ └── events.out.tfevents.1595182684.DESKTOP-U53KV1A │ ├── seed1_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_19-07-2020_01-08-22 │ │ └── events.out.tfevents.1595135302.DESKTOP-U53KV1A │ └── seed21_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_20-07-2020_05-22-16 │ │ └── events.out.tfevents.1595236936.DESKTOP-U53KV1A │ ├── bc0.001 │ ├── seed11_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_04-18-46 │ │ └── events.out.tfevents.1595146726.DESKTOP-U53KV1A │ ├── seed1_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_01-08-17 │ │ └── events.out.tfevents.1595135297.DESKTOP-U53KV1A │ └── seed21_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_06-40-41 │ │ └── events.out.tfevents.1595155241.DESKTOP-U53KV1A │ ├── full_comp.png │ └── none │ ├── seed11_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_00-57-30 │ └── events.out.tfevents.1594875450.DESKTOP-U53KV1A │ ├── seed1_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_00-26-39 │ └── events.out.tfevents.1594873599.DESKTOP-U53KV1A │ └── seed21_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_01-28-19 │ └── events.out.tfevents.1594877299.DESKTOP-U53KV1A └── setup.py /.gitattributes: -------------------------------------------------------------------------------- 1 | hw1/downloads/* linguist-detectable=false 2 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UC Berkeley Deep RL Pytorch Solutions 2 | 3 | Pytorch solutions for [UC Berkeley's CS285 Deep RL course](http://rail.eecs.berkeley.edu/deeprlcourse/). If you wish to complete the assignments yourself, a [pytorch version](https://github.com/mdeib/berkeley-deep-RL-pytorch-starter) of the [official starter code](https://github.com/berkeleydeeprlcourse/homework_fall2019) has also been made. 4 | 5 | While these solutions have produced reasonable results be aware that there may still be small bugs in the code and/or the solutions. 6 | -------------------------------------------------------------------------------- /hw1/README.md: -------------------------------------------------------------------------------- 1 | # Section 1 Behavior Cloning 2 | 3 | Below is the HW1 report. All data used can be found in the results folder - videos aren't included to save space. To view the tensorboard for a specific part navigate to that part's folder (not the subfolders) and run 4 | ```commandline 5 | tensorboard --logdir . 6 | ``` 7 | 8 | ## Question 1.2 9 | 10 | The agent was trained on 10,000 steps of expert behavior in each environment. It was then evaluated for >10,000 steps to get an accurate mean performance. The agent itself had an MLP policy consisting of 2 hidden layers of 64 neurons each. 11 | 12 | | Environment | Expert | Behavioral Cloning | Mean Percent Performance | 13 | |-------------|:----------------:|:------------------:|:------------------------:| 14 | | Ant | 4713.65 ± 12.2 | 4696.46 ± 90.39 | 99.64% | 15 | | HalfCheetah | 4205.78 ± 83.04 | 3521.82 ± 181.00 | 83.74% | 16 | | Hopper | 3772.67 ± 1.95 | 660.8 ± 348.67 | 17.52% | 17 | | Humanoid | 10344.52 ± 20.98 | 414.05 ± 105.71 | 4.00% | 18 | | Walker2d | 5566.84 ± 9.24 | 60.96 ± 94.77 | 1.10% | 19 | 20 | It can be seen that the agent achieved >30% performance in both the ant and the half chettah environment. It failed to reach this benchmark in the other three environments. These environments seem to be harder for behavioral cloning, requiring more training to reach a comparable level of performance. 21 | 22 | ## Question 1.3 23 | 24 | The agent in the Walker2d environment was only able to achieve 1.1% expert performance after 10,000 steps. It seems likely that it could do better with more training. For this question we will graph evaluation performance as a function of training steps. A data point was taken every 2000 training steps, and it was trained for a total of 100,000 steps. The mean returns throughout training are shown below: 25 | 26 | ![Evaluation Average](results/Q1-3/bc-eval-avg.PNG) 27 | 28 | It can be seen that the agent was able to improve greatly with more training updates, reaching almost 90 percent of the expert performance. Also notable is the significant initial time it took to actually begin performing. 29 | 30 | ![Evaluation Standard Deviation](results/Q1-3/bc-eval-std.PNG) 31 | 32 | While average performance seems to be quite good, the standard deviation over the course of training is a bit more telling, as is the min/max returns. The agent continues to have trials where it makes a mistake and is unable to recover, resulting in a terrible rollout and a large standard deviation. If the agent was really learning to perform well in the environment we would see the standard deviation fall as it begins to consistently do well. This perfectly illustrates the weaknesses of behavioral cloning, and leads into question 2.2. 33 | 34 | ## Question 2.2 35 | 36 | For this question dagger learning was done on the Walker2d environment used in question 1.3. In the first 10k steps behavioral cloning was done, after which 9 iterations of dagger were carried out. Thus a total of 100k training steps were done, just like in question 1.3. All other things were kept the same. This allows the usage of dagger to be fairly tested. The average returns are below: 37 | 38 | ![Evaluation Average](results/Q2-2/dagger-eval-avg.PNG) 39 | 40 | It can be seen that using dagger instead of training behavioral cloning further yielded better average returns. This is good but the real test is the standard deviation: 41 | 42 | ![Evaluation Standard Deviation](results/Q2-2/dagger-eval-std.PNG) 43 | 44 | Unlike in question 1.3 the standard deviation drops dramatically as more dagger iterations are done. This shows that dagger has taught the agent to actually correct its mistakes, instead of failing as soon as it deviates slightly from the experts path. Thus dagger is shown to provide agent robustness that pure behavioral cloning fails to give. 45 | -------------------------------------------------------------------------------- /hw1/cs285/agents/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making folder a package 2 | -------------------------------------------------------------------------------- /hw1/cs285/agents/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/agents/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw1/cs285/agents/__pycache__/base_agent.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/agents/__pycache__/base_agent.cpython-37.pyc -------------------------------------------------------------------------------- /hw1/cs285/agents/__pycache__/bc_agent.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/agents/__pycache__/bc_agent.cpython-37.pyc -------------------------------------------------------------------------------- /hw1/cs285/agents/bc_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import time 3 | 4 | from cs285.policies.MLP_policy import * 5 | from cs285.infrastructure.replay_buffer import ReplayBuffer 6 | from cs285.infrastructure.utils import * 7 | 8 | class BCAgent: 9 | def __init__(self, env, agent_params): 10 | # init vars 11 | self.env = env 12 | self.agent_params = agent_params 13 | 14 | # actor/policy 15 | self.actor = MLPPolicySL(self.agent_params['ac_dim'], 16 | self.agent_params['ob_dim'], 17 | self.agent_params['n_layers'], 18 | self.agent_params['size'], 19 | self.agent_params['device'], 20 | discrete = self.agent_params['discrete'], 21 | learning_rate = self.agent_params['learning_rate'], 22 | ) ## TODO: look in here and implement this 23 | 24 | # replay buffer 25 | self.replay_buffer = ReplayBuffer(self.agent_params['max_replay_buffer_size']) 26 | 27 | def train(self, ob_no, ac_na, re_n, next_ob_no, terminal_n): 28 | # training a BC agent refers to updating its actor using 29 | # the given observations and corresponding action labels 30 | self.actor.update(ob_no, ac_na) ## TODO: look in here and implement this 31 | 32 | def add_to_replay_buffer(self, paths): 33 | self.replay_buffer.add_rollouts(paths) 34 | 35 | def sample(self, batch_size): 36 | return self.replay_buffer.sample_random_data(batch_size) ## TODO: look in here and implement this 37 | -------------------------------------------------------------------------------- /hw1/cs285/data/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/events.out.tfevents.1593151299.DESKTOP-U53KV1A.17880.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/events.out.tfevents.1593151299.DESKTOP-U53KV1A.17880.0 -------------------------------------------------------------------------------- /hw1/cs285/data/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/policy_itr_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/policy_itr_0 -------------------------------------------------------------------------------- /hw1/cs285/data/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/events.out.tfevents.1593151346.DESKTOP-U53KV1A.3080.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/events.out.tfevents.1593151346.DESKTOP-U53KV1A.3080.0 -------------------------------------------------------------------------------- /hw1/cs285/data/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/policy_itr_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/policy_itr_0 -------------------------------------------------------------------------------- /hw1/cs285/data/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/events.out.tfevents.1593151407.DESKTOP-U53KV1A.17880.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/events.out.tfevents.1593151407.DESKTOP-U53KV1A.17880.0 -------------------------------------------------------------------------------- /hw1/cs285/data/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/policy_itr_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/policy_itr_0 -------------------------------------------------------------------------------- /hw1/cs285/data/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/events.out.tfevents.1593151514.DESKTOP-U53KV1A.2796.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/events.out.tfevents.1593151514.DESKTOP-U53KV1A.2796.0 -------------------------------------------------------------------------------- /hw1/cs285/data/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/policy_itr_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/policy_itr_0 -------------------------------------------------------------------------------- /hw1/cs285/data/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/events.out.tfevents.1593151663.DESKTOP-U53KV1A.14512.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/events.out.tfevents.1593151663.DESKTOP-U53KV1A.14512.0 -------------------------------------------------------------------------------- /hw1/cs285/data/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/policy_itr_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/policy_itr_0 -------------------------------------------------------------------------------- /hw1/cs285/data/Q1-3/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_20-57-08/events.out.tfevents.1593219429.DESKTOP-U53KV1A.388.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-3/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_20-57-08/events.out.tfevents.1593219429.DESKTOP-U53KV1A.388.0 -------------------------------------------------------------------------------- /hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/events.out.tfevents.1593231811.DESKTOP-U53KV1A.16276.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/events.out.tfevents.1593231811.DESKTOP-U53KV1A.16276.0 -------------------------------------------------------------------------------- /hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_0 -------------------------------------------------------------------------------- /hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_1 -------------------------------------------------------------------------------- /hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_2 -------------------------------------------------------------------------------- /hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_3 -------------------------------------------------------------------------------- /hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_4 -------------------------------------------------------------------------------- /hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_5 -------------------------------------------------------------------------------- /hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_6: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_6 -------------------------------------------------------------------------------- /hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_7 -------------------------------------------------------------------------------- /hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_8 -------------------------------------------------------------------------------- /hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_9 -------------------------------------------------------------------------------- /hw1/cs285/expert_data/expert_data_Ant-v2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/expert_data/expert_data_Ant-v2.pkl -------------------------------------------------------------------------------- /hw1/cs285/expert_data/expert_data_HalfCheetah-v2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/expert_data/expert_data_HalfCheetah-v2.pkl -------------------------------------------------------------------------------- /hw1/cs285/expert_data/expert_data_Hopper-v2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/expert_data/expert_data_Hopper-v2.pkl -------------------------------------------------------------------------------- /hw1/cs285/expert_data/expert_data_Humanoid-v2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/expert_data/expert_data_Humanoid-v2.pkl -------------------------------------------------------------------------------- /hw1/cs285/expert_data/expert_data_Walker2d-v2.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/expert_data/expert_data_Walker2d-v2.pkl -------------------------------------------------------------------------------- /hw1/cs285/infrastructure/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making folder a package 2 | -------------------------------------------------------------------------------- /hw1/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw1/cs285/infrastructure/__pycache__/logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/infrastructure/__pycache__/logger.cpython-37.pyc -------------------------------------------------------------------------------- /hw1/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc -------------------------------------------------------------------------------- /hw1/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc -------------------------------------------------------------------------------- /hw1/cs285/infrastructure/__pycache__/tf_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/infrastructure/__pycache__/tf_utils.cpython-37.pyc -------------------------------------------------------------------------------- /hw1/cs285/infrastructure/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/infrastructure/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /hw1/cs285/infrastructure/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from torch.utils.tensorboard import SummaryWriter 4 | import numpy as np 5 | 6 | class Logger: 7 | def __init__(self, log_dir, n_logged_samples=10, summary_writer=None): 8 | self._log_dir = log_dir 9 | print('########################') 10 | print('logging outputs to ', log_dir) 11 | print('########################') 12 | self._n_logged_samples = n_logged_samples 13 | self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1) 14 | 15 | def log_scalar(self, scalar, name, step_): 16 | self._summ_writer.add_scalar('{}'.format(name), scalar, step_) 17 | 18 | def log_scalars(self, scalar_dict, group_name, step, phase): 19 | """Will log all scalars in the same plot.""" 20 | self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step) 21 | 22 | def log_image(self, image, name, step): 23 | assert(len(image.shape) == 3) # [C, H, W] 24 | self._summ_writer.add_image('{}'.format(name), image, step) 25 | 26 | def log_video(self, video_frames, name, step, fps=10): 27 | assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!" 28 | self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps) 29 | 30 | def log_paths_as_videos(self, paths, step, max_videos_to_save=2, fps=10, video_title='video'): 31 | 32 | # reshape the rollouts 33 | videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths] 34 | 35 | # max rollout length 36 | max_videos_to_save = np.min([max_videos_to_save, len(videos)]) 37 | max_length = videos[0].shape[0] 38 | for i in range(max_videos_to_save): 39 | if videos[i].shape[0]>max_length: 40 | max_length = videos[i].shape[0] 41 | 42 | # pad rollouts to all be same length 43 | for i in range(max_videos_to_save): 44 | if videos[i].shape[0] 0, "Figure logging requires input shape [batch x figures]!" 56 | self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) 57 | 58 | def log_figure(self, figure, name, step, phase): 59 | """figure: matplotlib.pyplot figure handle""" 60 | self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) 61 | 62 | def log_graph(self, array, name, step, phase): 63 | """figure: matplotlib.pyplot figure handle""" 64 | im = plot_graph(array) 65 | self._summ_writer.add_image('{}_{}'.format(name, phase), im, step) 66 | 67 | def dump_scalars(self, log_path=None): 68 | log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path 69 | self._summ_writer.export_scalars_to_json(log_path) 70 | 71 | def flush(self): 72 | self._summ_writer.flush() 73 | -------------------------------------------------------------------------------- /hw1/cs285/infrastructure/replay_buffer.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import gym 4 | import os 5 | 6 | from cs285.infrastructure.utils import * 7 | 8 | class ReplayBuffer(object): 9 | 10 | def __init__(self, max_size=1000000): 11 | 12 | self.max_size = max_size 13 | 14 | # store each rollout 15 | self.paths = [] 16 | 17 | # store (concatenated) component arrays from each rollout 18 | self.obs = None 19 | self.acs = None 20 | self.rews = None 21 | self.next_obs = None 22 | self.terminals = None 23 | 24 | def __len__(self): 25 | if self.obs is not None: 26 | return self.obs.shape[0] 27 | else: 28 | return 0 29 | 30 | def add_rollouts(self, paths, concat_rew=True): 31 | 32 | # add new rollouts into our list of rollouts 33 | for path in paths: 34 | self.paths.append(path) 35 | 36 | # convert new rollouts into their component arrays, and append them onto our arrays 37 | observations, actions, rewards, next_observations, terminals = convert_listofrollouts(paths, concat_rew) 38 | 39 | if self.obs is None: 40 | self.obs = observations[-self.max_size:] 41 | self.acs = actions[-self.max_size:] 42 | self.rews = rewards[-self.max_size:] 43 | self.next_obs = next_observations[-self.max_size:] 44 | self.terminals = terminals[-self.max_size:] 45 | else: 46 | self.obs = np.concatenate([self.obs, observations])[-self.max_size:] 47 | self.acs = np.concatenate([self.acs, actions])[-self.max_size:] 48 | if concat_rew: 49 | self.rews = np.concatenate([self.rews, rewards])[-self.max_size:] 50 | else: 51 | if isinstance(rewards, list): 52 | self.rews += rewards 53 | else: 54 | self.rews.append(rewards) 55 | self.rews = self.rews[-self.max_size:] 56 | self.next_obs = np.concatenate([self.next_obs, next_observations])[-self.max_size:] 57 | self.terminals = np.concatenate([self.terminals, terminals])[-self.max_size:] 58 | 59 | ######################################## 60 | ######################################## 61 | 62 | def sample_random_data(self, batch_size): 63 | assert self.obs.shape[0] == self.acs.shape[0] == self.rews.shape[0] == self.next_obs.shape[0] == self.terminals.shape[0] 64 | 65 | ## TODO return batch_size number of random entries from each of the 5 component arrays above 66 | ## HINT 1: use np.random.permutation to sample random indices 67 | ## HINT 2: return corresponding data points from each array (i.e., not different indices from each array) 68 | ## HINT 3: look at the sample_recent_data function below 69 | 70 | indices = np.random.permutation(len(self))[:batch_size] 71 | return self.obs[indices], self.acs[indices], self.rews[indices], self.next_obs[indices], self.terminals[indices] 72 | 73 | def sample_recent_data(self, batch_size=1): 74 | return self.obs[-batch_size:], self.acs[-batch_size:], self.rews[-batch_size:], self.next_obs[-batch_size:], self.terminals[-batch_size:] 75 | -------------------------------------------------------------------------------- /hw1/cs285/policies/MLP_policy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | 5 | class MLPPolicy(nn.Module): 6 | 7 | def __init__(self, 8 | ac_dim, 9 | ob_dim, 10 | n_layers, 11 | size, 12 | device, 13 | lr = 1e-4, 14 | training=True, 15 | discrete=False, # unused for now 16 | nn_baseline=False, # unused for now 17 | **kwargs): 18 | super().__init__() 19 | 20 | # init vars 21 | self.training = training 22 | self.device = device 23 | 24 | # network architecture 25 | self.mlp = nn.ModuleList() 26 | self.mlp.append(nn.Linear(ob_dim, size))#first hidden layer 27 | self.mlp.append(nn.Tanh()) 28 | 29 | for h in range(n_layers - 1): #additional hidden layers 30 | self.mlp.append(nn.Linear(size, size)) 31 | self.mlp.append(nn.Tanh()) 32 | 33 | self.mlp.append(nn.Linear(size, ac_dim)) #output layer, no activation function 34 | 35 | #loss and optimizer 36 | if self.training: 37 | self.loss_func = nn.MSELoss() 38 | self.optimizer = torch.optim.Adam(self.parameters(), lr) 39 | 40 | self.to(device) 41 | 42 | ################################## 43 | 44 | def forward(self, x): 45 | for layer in self.mlp: 46 | x = layer(x) 47 | return x 48 | 49 | ################################## 50 | 51 | def save(self, filepath): 52 | torch.save(self.state_dict(), filepath) 53 | 54 | def restore(self, filepath): 55 | self.load_state_dict(torch.load(filepath)) 56 | 57 | ################################## 58 | 59 | # query this policy with observation(s) to get selected action(s) 60 | def get_action(self, obs): 61 | if len(obs.shape)>1: 62 | observation = obs 63 | else: 64 | observation = obs[None] 65 | 66 | return self(torch.Tensor(observation).to(self.device)).cpu().detach().numpy() 67 | 68 | # update/train this policy 69 | def update(self, observations, actions): 70 | raise NotImplementedError 71 | 72 | ##################################################### 73 | ##################################################### 74 | 75 | class MLPPolicySL(MLPPolicy): 76 | 77 | """ 78 | This class is a special case of MLPPolicy, 79 | which is trained using supervised learning. 80 | The relevant functions to define are included below. 81 | """ 82 | 83 | def update(self, observations, actions): 84 | assert self.training, 'Policy must be created with training = true in order to perform training updates...' 85 | 86 | # TODO define network update 87 | self.optimizer.zero_grad() 88 | predicted_actions = self(torch.Tensor(observations).to(self.device)) 89 | loss = self.loss_func(predicted_actions, torch.Tensor(actions).to(self.device)) 90 | loss.backward() 91 | self.optimizer.step() 92 | 93 | #print("loss:", loss.item()) 94 | -------------------------------------------------------------------------------- /hw1/cs285/policies/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making folder a package 2 | -------------------------------------------------------------------------------- /hw1/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc -------------------------------------------------------------------------------- /hw1/cs285/policies/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw1/cs285/policies/__pycache__/base_policy.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/__pycache__/base_policy.cpython-37.pyc -------------------------------------------------------------------------------- /hw1/cs285/policies/__pycache__/loaded_gaussian_policy.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/__pycache__/loaded_gaussian_policy.cpython-37.pyc -------------------------------------------------------------------------------- /hw1/cs285/policies/experts/Ant.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/experts/Ant.pkl -------------------------------------------------------------------------------- /hw1/cs285/policies/experts/HalfCheetah.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/experts/HalfCheetah.pkl -------------------------------------------------------------------------------- /hw1/cs285/policies/experts/Hopper.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/experts/Hopper.pkl -------------------------------------------------------------------------------- /hw1/cs285/policies/experts/Humanoid.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/experts/Humanoid.pkl -------------------------------------------------------------------------------- /hw1/cs285/policies/experts/Walker2d.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/experts/Walker2d.pkl -------------------------------------------------------------------------------- /hw1/cs285/policies/loaded_gaussian_policy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import torch.nn as nn 4 | import pickle 5 | 6 | class Loaded_Gaussian_Policy(nn.Module): 7 | def __init__(self, filename, **kwargs): 8 | super().__init__() 9 | with open(filename, 'rb') as f: 10 | data = pickle.loads(f.read()) 11 | 12 | self.nonlin_type = data['nonlin_type'] 13 | policy_type = [k for k in data.keys() if k != 'nonlin_type'][0] 14 | 15 | assert policy_type == 'GaussianPolicy', 'Policy type {} not supported'.format(policy_type) 16 | self.policy_params = data[policy_type] 17 | 18 | assert set(self.policy_params.keys()) == {'logstdevs_1_Da', 'hidden', 'obsnorm', 'out'} 19 | 20 | self.obsnorm_mean = self.policy_params['obsnorm']['Standardizer']['mean_1_D'] 21 | self.obsnorm_meansq = self.policy_params['obsnorm']['Standardizer']['meansq_1_D'] 22 | layer_params = self.policy_params['hidden']['FeedforwardNet'] 23 | 24 | self.mlp = nn.ModuleList() 25 | for layer_name in sorted(layer_params.keys()): 26 | W = layer_params[layer_name]['AffineLayer']['W'].astype(np.float32) 27 | b = layer_params[layer_name]['AffineLayer']['b'].astype(np.float32) 28 | r, h = W.shape 29 | 30 | layer = nn.Linear(r,h) 31 | layer.weight.data.copy_(torch.from_numpy(W.transpose())) 32 | layer.bias.data.copy_(torch.from_numpy(b.squeeze(0))) 33 | self.mlp.append(layer) 34 | 35 | if self.nonlin_type == 'lrelu': 36 | self.mlp.append(nn.LeakyReLU()) 37 | elif self.nonlin_type == 'tanh': 38 | self.mlp.append(nn.Tanh()) 39 | else: 40 | raise NotImplementedError(self.nonlin_type) 41 | 42 | #output layer 43 | W = self.policy_params['out']['AffineLayer']['W'].astype(np.float32) 44 | b = self.policy_params['out']['AffineLayer']['b'].astype(np.float32) 45 | r, h = W.shape 46 | layer = nn.Linear(r, h) 47 | layer.weight.data.copy_(torch.from_numpy(W.transpose())) 48 | layer.bias.data.copy_(torch.from_numpy(b.squeeze(0))) 49 | self.mlp.append(layer) 50 | 51 | ################################## 52 | 53 | def obs_norm(self, obs_bo, obsnorm_mean, obsnorm_meansq): 54 | obsnorm_stdev = np.sqrt(np.maximum(0, obsnorm_meansq - np.square(obsnorm_mean))) 55 | normedobs_bo = (obs_bo - obsnorm_mean) / (obsnorm_stdev + 1e-6) 56 | return torch.FloatTensor(normedobs_bo).squeeze(0) 57 | 58 | ################################## 59 | 60 | def forward(self, obs): 61 | x = self.obs_norm(obs, self.obsnorm_mean, self.obsnorm_meansq) 62 | for layer in self.mlp: 63 | x = layer(x) 64 | return x 65 | 66 | ################################## 67 | 68 | def update(self, obs_no, acs_na, adv_n=None, acs_labels_na=None): 69 | print("\n\nThis policy class simply loads in a particular type of policy and queries it.") 70 | print("Not training procedure has been written, so do not try to train it.\n\n") 71 | raise NotImplementedError 72 | 73 | def get_action(self, obs): 74 | if len(obs.shape) > 1: 75 | observation = obs 76 | else: 77 | observation = obs[None, :] 78 | return self(obs) 79 | -------------------------------------------------------------------------------- /hw1/cs285_hw1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285_hw1.pdf -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/bin/basic: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/basic -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/bin/compile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/compile -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/bin/derivative: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/derivative -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/bin/libglew.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/libglew.so -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/bin/libglewegl.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/libglewegl.so -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/bin/libglewosmesa.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/libglewosmesa.so -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/bin/libglfw.so.3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/libglfw.so.3 -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/bin/libmujoco150.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/libmujoco150.so -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/bin/libmujoco150nogl.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/libmujoco150nogl.so -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/bin/record: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/record -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/bin/simulate: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/simulate -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/bin/test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/test -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/doc/README.txt: -------------------------------------------------------------------------------- 1 | Welcome to MuJoCo Pro version 1.50. 2 | 3 | The full documentation is available at http://www.mujoco.org/book 4 | The most relevant chapters are Overview, MJCF Models, and MuJoCo Pro. 5 | 6 | Here we provide brief notes to get you started: 7 | 8 | 9 | The activation key (which you should have received with your license) is a 10 | plain-text file whose path must be passed to the mj_activate() function. 11 | The code samples assume that it is called mjkey.txt in the bin directory. 12 | 13 | Once you have mjkey.txt in the bin directory, run: 14 | simulate ../model/humanoid.xml (or ./simulate on Linux and OSX) 15 | to see MuJoCo Pro in action. 16 | 17 | On Linux, you can use LD_LIBRARY_PATH to point the dynamic linker to the 18 | .so files, or copy them to a directory that is already in the linker path. 19 | On OSX, the MuJoCo Pro dynamic library is compiled with @executable_path/ 20 | to avoid the need for installation in a predefined directory. 21 | 22 | In general, the directory structure we have provided is merely a suggestion; 23 | feel free to re-organize it if needed. MuJoCo Pro does not have an installer 24 | and does not write any files outside the executable directory. 25 | 26 | The makefile in the sample directory generates binaries in the bin directory. 27 | These binaries are pre-compiled and included in the software distribution. 28 | 29 | While the software distribution contains only one model (humanoid.xml), 30 | additional models are available at http://www.mujoco.org/forum under Resources. 31 | -------------------------------------------------------------------------------- /hw1/downloads/mjpro150/sample/makefile: -------------------------------------------------------------------------------- 1 | COMMON=-O2 -I../include -L../bin -std=c++11 -mavx 2 | 3 | default: 4 | g++ $(COMMON) test.cpp -lmujoco150nogl -o ../bin/test 5 | g++ $(COMMON) compile.cpp -lmujoco150nogl -o ../bin/compile 6 | g++ $(COMMON) derivative.cpp -lmujoco150nogl -fopenmp -o ../bin/derivative 7 | g++ $(COMMON) simulate.cpp -lmujoco150 -lGL -lglew ../bin/libglfw.so.3 -o ../bin/simulate 8 | g++ $(COMMON) record.cpp -lmujoco150 -lGL -lglew ../bin/libglfw.so.3 -o ../bin/record 9 | g++ $(COMMON) basic.cpp -lmujoco150 -lGL -lglew ../bin/libglfw.so.3 -o ../bin/basic 10 | 11 | egl: 12 | g++ $(COMMON) -DMJ_EGL record.cpp -lmujoco150 -lOpenGL -lEGL -lglewegl -o ../bin/recordegl 13 | 14 | osmesa: 15 | g++ $(COMMON) -DMJ_OSMESA record.cpp -lmujoco150 -lOSMesa -lglewosmesa -o ../bin/recordosmesa 16 | 17 | all: default egl osmesa 18 | -------------------------------------------------------------------------------- /hw1/requirements.txt: -------------------------------------------------------------------------------- 1 | gym==0.10.11 2 | mujoco-py==1.50.1.35 3 | matplotlib==2.2.2 4 | ipython==6.4.0 5 | moviepy==1.0.0 -------------------------------------------------------------------------------- /hw1/results/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/events.out.tfevents.1593151299.DESKTOP-U53KV1A.17880.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/events.out.tfevents.1593151299.DESKTOP-U53KV1A.17880.0 -------------------------------------------------------------------------------- /hw1/results/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/policy_itr_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/policy_itr_0 -------------------------------------------------------------------------------- /hw1/results/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/events.out.tfevents.1593151346.DESKTOP-U53KV1A.3080.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/events.out.tfevents.1593151346.DESKTOP-U53KV1A.3080.0 -------------------------------------------------------------------------------- /hw1/results/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/policy_itr_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/policy_itr_0 -------------------------------------------------------------------------------- /hw1/results/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/events.out.tfevents.1593151407.DESKTOP-U53KV1A.17880.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/events.out.tfevents.1593151407.DESKTOP-U53KV1A.17880.0 -------------------------------------------------------------------------------- /hw1/results/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/policy_itr_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/policy_itr_0 -------------------------------------------------------------------------------- /hw1/results/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/events.out.tfevents.1593151514.DESKTOP-U53KV1A.2796.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/events.out.tfevents.1593151514.DESKTOP-U53KV1A.2796.0 -------------------------------------------------------------------------------- /hw1/results/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/policy_itr_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/policy_itr_0 -------------------------------------------------------------------------------- /hw1/results/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/events.out.tfevents.1593151663.DESKTOP-U53KV1A.14512.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/events.out.tfevents.1593151663.DESKTOP-U53KV1A.14512.0 -------------------------------------------------------------------------------- /hw1/results/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/policy_itr_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/policy_itr_0 -------------------------------------------------------------------------------- /hw1/results/Q1-3/bc-eval-avg.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-3/bc-eval-avg.PNG -------------------------------------------------------------------------------- /hw1/results/Q1-3/bc-eval-std.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-3/bc-eval-std.PNG -------------------------------------------------------------------------------- /hw1/results/Q1-3/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_20-57-08/events.out.tfevents.1593219429.DESKTOP-U53KV1A.388.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-3/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_20-57-08/events.out.tfevents.1593219429.DESKTOP-U53KV1A.388.0 -------------------------------------------------------------------------------- /hw1/results/Q2-2/dagger-eval-avg.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger-eval-avg.PNG -------------------------------------------------------------------------------- /hw1/results/Q2-2/dagger-eval-std.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger-eval-std.PNG -------------------------------------------------------------------------------- /hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/events.out.tfevents.1593231811.DESKTOP-U53KV1A.16276.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/events.out.tfevents.1593231811.DESKTOP-U53KV1A.16276.0 -------------------------------------------------------------------------------- /hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_0 -------------------------------------------------------------------------------- /hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_1: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_1 -------------------------------------------------------------------------------- /hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_2 -------------------------------------------------------------------------------- /hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_3 -------------------------------------------------------------------------------- /hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_4 -------------------------------------------------------------------------------- /hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_5 -------------------------------------------------------------------------------- /hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_6: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_6 -------------------------------------------------------------------------------- /hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_7: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_7 -------------------------------------------------------------------------------- /hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_8: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_8 -------------------------------------------------------------------------------- /hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_9: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_9 -------------------------------------------------------------------------------- /hw1/setup.py: -------------------------------------------------------------------------------- 1 | # setup.py 2 | from setuptools import setup 3 | 4 | setup( 5 | name='cs285', 6 | version='0.1.0', 7 | packages=['cs285'], 8 | ) -------------------------------------------------------------------------------- /hw2/README.txt: -------------------------------------------------------------------------------- 1 | 2 | 1) See hw1 if you'd like to see installation instructions. You do NOT have to redo them. 3 | 4 | 5 | ############################################## 6 | ############################################## 7 | 8 | 9 | 2) Code: 10 | 11 | ------------------------------------------- 12 | 13 | Files to look at, even though there are no explicit 'TODO' markings: 14 | - scripts/run_hw2_policy_gradient.py 15 | 16 | ------------------------------------------- 17 | 18 | Blanks to be filled in by using your code from hw1 are marked with 'TODO: GETTHIS from HW1' 19 | 20 | The following files have these: 21 | - infrastructure/rl_trainer.py 22 | - infrastructure/utils.py 23 | - policies/MLP_policy.py 24 | 25 | ------------------------------------------- 26 | 27 | Blanks to be filled in now (for this assignment) are marked with 'TODO' 28 | 29 | The following files have these: 30 | - agents/pg_agent.py 31 | - policies/MLP_policy.py 32 | 33 | 34 | ############################################## 35 | ############################################## 36 | 37 | 38 | 3) Run code with the following command: 39 | 40 | $ python cs285/scripts/run_hw2_policy_gradient.py --env_name CartPole-v1 --exp_name test_pg_cartpole 41 | $ python cs285/scripts/run_hw2_policy_gradient.py --env_name InvertedPendulum-v2 --exp_name test_pg_pendulum 42 | 43 | Flags of relevance, when running the commands above (see pdf for more info): 44 | -n number of policy training iterations 45 | -rtg use reward_to_go for the value 46 | -dsa do not standardize the advantage values 47 | 48 | ############################################## 49 | 50 | 51 | 4) Visualize saved tensorboard event file: 52 | 53 | $ cd cs285/data/ 54 | $ tensorboard --logdir . 55 | 56 | Then, navigate to shown url to see scalar summaries as plots (in 'scalar' tab), as well as videos (in 'images' tab) 57 | 58 | -------------------------------------------------------------------------------- /hw2/cs285/agents/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw2/cs285/agents/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/agents/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw2/cs285/agents/__pycache__/pg_agent.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/agents/__pycache__/pg_agent.cpython-37.pyc -------------------------------------------------------------------------------- /hw2/cs285/infrastructure/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw2/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw2/cs285/infrastructure/__pycache__/logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/infrastructure/__pycache__/logger.cpython-37.pyc -------------------------------------------------------------------------------- /hw2/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc -------------------------------------------------------------------------------- /hw2/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc -------------------------------------------------------------------------------- /hw2/cs285/infrastructure/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/infrastructure/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /hw2/cs285/infrastructure/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tensorboardX import SummaryWriter 3 | import numpy as np 4 | 5 | class Logger: 6 | def __init__(self, log_dir, n_logged_samples=10, summary_writer=None): 7 | self._log_dir = log_dir 8 | print('########################') 9 | print('logging outputs to ', log_dir) 10 | print('########################') 11 | self._n_logged_samples = n_logged_samples 12 | self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1) 13 | 14 | def log_scalar(self, scalar, name, step_): 15 | self._summ_writer.add_scalar('{}'.format(name), scalar, step_) 16 | 17 | def log_scalars(self, scalar_dict, group_name, step, phase): 18 | """Will log all scalars in the same plot.""" 19 | self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step) 20 | 21 | def log_image(self, image, name, step): 22 | assert(len(image.shape) == 3) # [C, H, W] 23 | self._summ_writer.add_image('{}'.format(name), image, step) 24 | 25 | def log_video(self, video_frames, name, step, fps=10): 26 | assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!" 27 | self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps) 28 | 29 | def log_paths_as_videos(self, paths, step, max_videos_to_save=2, fps=10, video_title='video'): 30 | 31 | # reshape the rollouts 32 | videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths] 33 | 34 | # max rollout length 35 | max_videos_to_save = np.min([max_videos_to_save, len(videos)]) 36 | max_length = videos[0].shape[0] 37 | for i in range(max_videos_to_save): 38 | if videos[i].shape[0]>max_length: 39 | max_length = videos[i].shape[0] 40 | 41 | # pad rollouts to all be same length 42 | for i in range(max_videos_to_save): 43 | if videos[i].shape[0] 0, "Figure logging requires input shape [batch x figures]!" 54 | self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) 55 | 56 | def log_figure(self, figure, name, step, phase): 57 | """figure: matplotlib.pyplot figure handle""" 58 | self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) 59 | 60 | def log_graph(self, array, name, step, phase): 61 | """figure: matplotlib.pyplot figure handle""" 62 | im = plot_graph(array) 63 | self._summ_writer.add_image('{}_{}'.format(name, phase), im, step) 64 | 65 | def dump_scalars(self, log_path=None): 66 | log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path 67 | self._summ_writer.export_scalars_to_json(log_path) 68 | 69 | def flush(self): 70 | self._summ_writer.flush() 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /hw2/cs285/infrastructure/replay_buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from cs285.infrastructure.utils import * 4 | 5 | class ReplayBuffer(object): 6 | 7 | def __init__(self, max_size=1000000): 8 | 9 | self.max_size = max_size 10 | self.paths = [] 11 | self.obs = None 12 | self.acs = None 13 | self.concatenated_rews = None 14 | self.unconcatenated_rews = None 15 | self.next_obs = None 16 | self.terminals = None 17 | 18 | def add_rollouts(self, paths): 19 | 20 | # add new rollouts into our list of rollouts 21 | for path in paths: 22 | self.paths.append(path) 23 | 24 | # convert new rollouts into their component arrays, and append them onto our arrays 25 | observations, actions, next_observations, terminals, concatenated_rews, unconcatenated_rews = convert_listofrollouts(paths) 26 | 27 | if self.obs is None: 28 | self.obs = observations[-self.max_size:] 29 | self.acs = actions[-self.max_size:] 30 | self.next_obs = next_observations[-self.max_size:] 31 | self.terminals = terminals[-self.max_size:] 32 | self.concatenated_rews = concatenated_rews[-self.max_size:] 33 | self.unconcatenated_rews = unconcatenated_rews[-self.max_size:] 34 | else: 35 | self.obs = np.concatenate([self.obs, observations])[-self.max_size:] 36 | self.acs = np.concatenate([self.acs, actions])[-self.max_size:] 37 | self.next_obs = np.concatenate([self.next_obs, next_observations])[-self.max_size:] 38 | self.terminals = np.concatenate([self.terminals, terminals])[-self.max_size:] 39 | self.concatenated_rews = np.concatenate([self.concatenated_rews, concatenated_rews])[-self.max_size:] 40 | if isinstance(unconcatenated_rews, list): 41 | self.unconcatenated_rews += unconcatenated_rews 42 | else: 43 | self.unconcatenated_rews.append(unconcatenated_rews) 44 | 45 | ######################################## 46 | ######################################## 47 | 48 | def sample_random_rollouts(self, num_rollouts): 49 | rand_indices = np.random.permutation(len(self.paths))[:num_rollouts] 50 | return self.paths[rand_indices] 51 | 52 | def sample_recent_rollouts(self, num_rollouts=1): 53 | return self.paths[-num_rollouts:] 54 | 55 | ######################################## 56 | ######################################## 57 | 58 | def sample_random_data(self, batch_size): 59 | 60 | assert self.obs.shape[0] == self.acs.shape[0] == self.concatenated_rews.shape[0] == self.next_obs.shape[0] == self.terminals.shape[0] 61 | rand_indices = np.random.permutation(self.obs.shape[0])[:batch_size] 62 | return self.obs[rand_indices], self.acs[rand_indices], self.concatenated_rews[rand_indices], self.next_obs[rand_indices], self.terminals[rand_indices] 63 | 64 | def sample_recent_data(self, batch_size=1, concat_rew=True): 65 | 66 | if concat_rew: 67 | return self.obs[-batch_size:], self.acs[-batch_size:], self.concatenated_rews[-batch_size:], self.next_obs[-batch_size:], self.terminals[-batch_size:] 68 | else: 69 | num_recent_rollouts_to_return = 0 70 | num_datapoints_so_far = 0 71 | index = -1 72 | while num_datapoints_so_far < batch_size: 73 | recent_rollout = self.paths[index] 74 | index -=1 75 | num_recent_rollouts_to_return +=1 76 | num_datapoints_so_far += get_pathlength(recent_rollout) 77 | rollouts_to_return = self.paths[-num_recent_rollouts_to_return:] 78 | observations, actions, next_observations, terminals, concatenated_rews, unconcatenated_rews = convert_listofrollouts(rollouts_to_return) 79 | return observations, actions, unconcatenated_rews, next_observations, terminals -------------------------------------------------------------------------------- /hw2/cs285/policies/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw2/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc -------------------------------------------------------------------------------- /hw2/cs285/policies/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/policies/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw2/cs285_hw2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285_hw2.pdf -------------------------------------------------------------------------------- /hw2/requirements.txt: -------------------------------------------------------------------------------- 1 | gym==0.10.11 2 | mujoco-py==1.50.1.35 3 | matplotlib==2.2.2 4 | ipython==6.4.0 5 | moviepy==1.0.0 -------------------------------------------------------------------------------- /hw2/results/bonus-gae-cp/gae_cp_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae-cp/gae_cp_comp.png -------------------------------------------------------------------------------- /hw2/results/bonus-gae-cp/pg_cp_lambda0.4_CartPole-v0_28-07-2020_00-18-04/events.out.tfevents.1595909884.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae-cp/pg_cp_lambda0.4_CartPole-v0_28-07-2020_00-18-04/events.out.tfevents.1595909884.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/bonus-gae-cp/pg_cp_lambda0.8_CartPole-v0_28-07-2020_00-17-56/events.out.tfevents.1595909876.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae-cp/pg_cp_lambda0.8_CartPole-v0_28-07-2020_00-17-56/events.out.tfevents.1595909876.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/bonus-gae-cp/pg_cp_lambda0.95_CartPole-v0_28-07-2020_00-17-50/events.out.tfevents.1595909870.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae-cp/pg_cp_lambda0.95_CartPole-v0_28-07-2020_00-17-50/events.out.tfevents.1595909870.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/bonus-gae-cp/pg_cp_lambda1_CartPole-v0_28-07-2020_00-17-41/events.out.tfevents.1595909861.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae-cp/pg_cp_lambda1_CartPole-v0_28-07-2020_00-17-41/events.out.tfevents.1595909861.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/bonus-gae/gae_hc_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/gae_hc_comp.png -------------------------------------------------------------------------------- /hw2/results/bonus-gae/pg_lambda0.4_HalfCheetah-v2_27-07-2020_21-59-54/events.out.tfevents.1595901594.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0.4_HalfCheetah-v2_27-07-2020_21-59-54/events.out.tfevents.1595901594.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/bonus-gae/pg_lambda0.8_HalfCheetah-v2_27-07-2020_19-21-50/events.out.tfevents.1595892110.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0.8_HalfCheetah-v2_27-07-2020_19-21-50/events.out.tfevents.1595892110.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/bonus-gae/pg_lambda0.95_HalfCheetah-v2_27-07-2020_19-21-45/events.out.tfevents.1595892105.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0.95_HalfCheetah-v2_27-07-2020_19-21-45/events.out.tfevents.1595892105.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/bonus-gae/pg_lambda0.975_HalfCheetah-v2_27-07-2020_19-21-43/events.out.tfevents.1595892103.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0.975_HalfCheetah-v2_27-07-2020_19-21-43/events.out.tfevents.1595892103.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/bonus-gae/pg_lambda0.99_HalfCheetah-v2_27-07-2020_21-59-27/events.out.tfevents.1595901567.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0.99_HalfCheetah-v2_27-07-2020_21-59-27/events.out.tfevents.1595901567.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/bonus-gae/pg_lambda0.9_HalfCheetah-v2_27-07-2020_21-59-37/events.out.tfevents.1595901577.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0.9_HalfCheetah-v2_27-07-2020_21-59-37/events.out.tfevents.1595901577.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/bonus-gae/pg_lambda0_HalfCheetah-v2_27-07-2020_19-21-48/events.out.tfevents.1595892108.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0_HalfCheetah-v2_27-07-2020_19-21-48/events.out.tfevents.1595892108.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/bonus-gae/pg_lambda1_HalfCheetah-v2_27-07-2020_21-59-20/events.out.tfevents.1595901560.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda1_HalfCheetah-v2_27-07-2020_21-59-20/events.out.tfevents.1595901560.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-3-lb/eval-avg-cp-lb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-lb/eval-avg-cp-lb.png -------------------------------------------------------------------------------- /hw2/results/problem-3-lb/pg_lb_no_rtg_dna_CartPole-v0_29-06-2020_00-52-35/events.out.tfevents.1593406355.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-lb/pg_lb_no_rtg_dna_CartPole-v0_29-06-2020_00-52-35/events.out.tfevents.1593406355.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-3-lb/pg_lb_rtg_dna_CartPole-v0_29-06-2020_00-52-41/events.out.tfevents.1593406361.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-lb/pg_lb_rtg_dna_CartPole-v0_29-06-2020_00-52-41/events.out.tfevents.1593406361.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-3-lb/pg_lb_rtg_na_CartPole-v0_29-06-2020_00-52-47/events.out.tfevents.1593406367.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-lb/pg_lb_rtg_na_CartPole-v0_29-06-2020_00-52-47/events.out.tfevents.1593406367.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-3-sb/eval-avg-cp-sb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-sb/eval-avg-cp-sb.png -------------------------------------------------------------------------------- /hw2/results/problem-3-sb/pg_sb_no_rtg_dna_CartPole-v0_29-06-2020_00-45-57/events.out.tfevents.1593405957.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-sb/pg_sb_no_rtg_dna_CartPole-v0_29-06-2020_00-45-57/events.out.tfevents.1593405957.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-3-sb/pg_sb_rtg_dna_CartPole-v0_29-06-2020_00-46-05/events.out.tfevents.1593405965.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-sb/pg_sb_rtg_dna_CartPole-v0_29-06-2020_00-46-05/events.out.tfevents.1593405965.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-3-sb/pg_sb_rtg_na_CartPole-v0_29-06-2020_00-46-11/events.out.tfevents.1593405971.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-sb/pg_sb_rtg_na_CartPole-v0_29-06-2020_00-46-11/events.out.tfevents.1593405971.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-4/ideal-params-comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-4/ideal-params-comparison.png -------------------------------------------------------------------------------- /hw2/results/problem-4/pg_ip_b100_lr0.06_InvertedPendulum-v2_01-07-2020_00-34-14/events.out.tfevents.1593578054.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-4/pg_ip_b100_lr0.06_InvertedPendulum-v2_01-07-2020_00-34-14/events.out.tfevents.1593578054.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-4/pg_ip_b80_lr0.06_InvertedPendulum-v2_01-07-2020_00-33-01/events.out.tfevents.1593577981.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-4/pg_ip_b80_lr0.06_InvertedPendulum-v2_01-07-2020_00-33-01/events.out.tfevents.1593577981.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-4/pg_ip_b90_lr0.05_InvertedPendulum-v2_01-07-2020_00-26-36/events.out.tfevents.1593577596.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-4/pg_ip_b90_lr0.05_InvertedPendulum-v2_01-07-2020_00-26-36/events.out.tfevents.1593577596.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-4/pg_ip_b90_lr0.06_InvertedPendulum-v2_01-07-2020_00-27-32/events.out.tfevents.1593577652.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-4/pg_ip_b90_lr0.06_InvertedPendulum-v2_01-07-2020_00-27-32/events.out.tfevents.1593577652.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-4/pg_ip_b90_lr0.07_InvertedPendulum-v2_01-07-2020_00-28-39/events.out.tfevents.1593577719.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-4/pg_ip_b90_lr0.07_InvertedPendulum-v2_01-07-2020_00-28-39/events.out.tfevents.1593577719.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-6/eval-avg-ll.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-6/eval-avg-ll.png -------------------------------------------------------------------------------- /hw2/results/problem-6/pg_ll_b40000_r0.005_LunarLanderContinuous-v2_29-06-2020_21-30-30/events.out.tfevents.1593480630.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-6/pg_ll_b40000_r0.005_LunarLanderContinuous-v2_29-06-2020_21-30-30/events.out.tfevents.1593480630.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-7-ideal/cheetah-ideal-eval-avg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-ideal/cheetah-ideal-eval-avg.png -------------------------------------------------------------------------------- /hw2/results/problem-7-ideal/pg_hc_b30000_lr0.02_HalfCheetah-v2_30-06-2020_20-07-51/events.out.tfevents.1593562071.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-ideal/pg_hc_b30000_lr0.02_HalfCheetah-v2_30-06-2020_20-07-51/events.out.tfevents.1593562071.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-7-ideal/pg_hc_nnb_b30000_lr0.02_HalfCheetah-v2_30-06-2020_22-15-46/events.out.tfevents.1593569746.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-ideal/pg_hc_nnb_b30000_lr0.02_HalfCheetah-v2_30-06-2020_22-15-46/events.out.tfevents.1593569746.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-7-ideal/pg_hc_rtg_b30000_lr0.02_HalfCheetah-v2_30-06-2020_20-44-51/events.out.tfevents.1593564291.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-ideal/pg_hc_rtg_b30000_lr0.02_HalfCheetah-v2_30-06-2020_20-44-51/events.out.tfevents.1593564291.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-7-ideal/pg_hc_rtg_nnb_b30000_lr0.02_HalfCheetah-v2_30-06-2020_21-19-20/events.out.tfevents.1593566360.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-ideal/pg_hc_rtg_nnb_b30000_lr0.02_HalfCheetah-v2_30-06-2020_21-19-20/events.out.tfevents.1593566360.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-7-search/cheetah-search-eval-avg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/cheetah-search-eval-avg.png -------------------------------------------------------------------------------- /hw2/results/problem-7-search/pg_hc_b10000_lr0.005_nnbaseline_HalfCheetah-v2_30-06-2020_01-48-46/events.out.tfevents.1593496126.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/pg_hc_b10000_lr0.005_nnbaseline_HalfCheetah-v2_30-06-2020_01-48-46/events.out.tfevents.1593496126.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-7-search/pg_hc_b10000_lr0.01_nnbaseline_HalfCheetah-v2_30-06-2020_00-21-43/events.out.tfevents.1593490903.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/pg_hc_b10000_lr0.01_nnbaseline_HalfCheetah-v2_30-06-2020_00-21-43/events.out.tfevents.1593490903.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-7-search/pg_hc_b10000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_00-37-16/events.out.tfevents.1593491836.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/pg_hc_b10000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_00-37-16/events.out.tfevents.1593491836.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-7-search/pg_hc_b30000_lr0.005_nnbaseline_HalfCheetah-v2_30-06-2020_00-51-38/events.out.tfevents.1593492698.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/pg_hc_b30000_lr0.005_nnbaseline_HalfCheetah-v2_30-06-2020_00-51-38/events.out.tfevents.1593492698.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-7-search/pg_hc_b30000_lr0.01_nnbaseline_HalfCheetah-v2_30-06-2020_02-09-33/events.out.tfevents.1593497373.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/pg_hc_b30000_lr0.01_nnbaseline_HalfCheetah-v2_30-06-2020_02-09-33/events.out.tfevents.1593497373.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/results/problem-7-search/pg_hc_b30000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_11-45-56/events.out.tfevents.1593531956.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/pg_hc_b30000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_11-45-56/events.out.tfevents.1593531956.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw2/setup.py: -------------------------------------------------------------------------------- 1 | # setup.py 2 | from setuptools import setup 3 | 4 | setup( 5 | name='cs285', 6 | version='0.1.0', 7 | packages=['cs285'], 8 | ) -------------------------------------------------------------------------------- /hw3/README.txt: -------------------------------------------------------------------------------- 1 | 2 | 1) See hw1 if you'd like to see installation instructions. You do NOT have to redo them. But, you need to install OpenCV for this assignment: 3 | `pip install opencv-python==3.4.0.12` 4 | 5 | You also need to replace `/gym/envs/box2d/lunar_lander.py` with the provided `lunar_lander.py` file. To find the file: 6 | $ locate lunar_lander.py 7 | (or if there are multiple options there): 8 | $ source activate cs285_env 9 | $ ipython 10 | $ import gym 11 | $ gym.__file__ 12 | /gym/__init__.py 13 | ############################################## 14 | ############################################## 15 | 16 | 17 | 2) Code: 18 | 19 | ------------------------------------------- 20 | 21 | Files to look at, even though there are no explicit 'TODO' markings: 22 | - scripts/run_hw3_dqn.py 23 | - scripts/run_hw3_actor_critic.py 24 | - infrastructure/models.py 25 | - policies/MLP_policy.py 26 | 27 | ------------------------------------------- 28 | 29 | Blanks to be filled in now (for this assignment) are marked with 'TODO' 30 | 31 | The following files have these: 32 | - critics/dqn_critic.py 33 | - agents/dqn_agent.py 34 | - policies/argmax_policy.py 35 | - critics/bootstrapped_continuous_critic.py 36 | - agents/ac_agent.py 37 | 38 | ############################################## 39 | ############################################## 40 | 41 | 42 | 3) Run code with the following command: 43 | 44 | $ python cs285/scripts/run_hw3_dqn.py --env_name PongNoFrameskip-v4 --exp_name test_pong 45 | $ python cs285/scripts/run_hw3_actor_critic.py --env_name CartPole-v0 -n 100 -b 1000 --exp_name 100_1 -ntu 100 -ngsptu 1 46 | 47 | Flags of relevance, when running the commands above (see pdf for more info): 48 | -double_q Whether to use double Q learning or not. 49 | 50 | ############################################## 51 | 52 | 53 | 4) Visualize saved tensorboard event file: 54 | 55 | $ cd cs285/data/ 56 | $ tensorboard --logdir . 57 | 58 | Then, navigate to shown url to see scalar summaries as plots (in 'scalar' tab), as well as videos (in 'images' tab) 59 | -------------------------------------------------------------------------------- /hw3/cs285/agents/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw3/cs285/agents/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/agents/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/agents/__pycache__/ac_agent.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/agents/__pycache__/ac_agent.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/agents/__pycache__/dqn_agent.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/agents/__pycache__/dqn_agent.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/agents/ac_agent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from collections import OrderedDict 4 | 5 | from cs285.policies.MLP_policy import MLPPolicyAC 6 | from cs285.critics.bootstrapped_continuous_critic import BootstrappedContinuousCritic 7 | from cs285.infrastructure.replay_buffer import ReplayBuffer 8 | from cs285.infrastructure.utils import * 9 | 10 | class ACAgent: 11 | def __init__(self, env, agent_params): 12 | super(ACAgent, self).__init__() 13 | 14 | self.env = env 15 | self.agent_params = agent_params 16 | self.num_critic_updates_per_agent_update = agent_params['num_critic_updates_per_agent_update'] 17 | self.num_actor_updates_per_agent_update = agent_params['num_actor_updates_per_agent_update'] 18 | self.device = agent_params['device'] 19 | 20 | self.gamma = self.agent_params['gamma'] 21 | self.standardize_advantages = self.agent_params['standardize_advantages'] 22 | 23 | self.actor = MLPPolicyAC(self.agent_params['ac_dim'], 24 | self.agent_params['ob_dim'], 25 | self.agent_params['n_layers'], 26 | self.agent_params['size'], 27 | self.agent_params['device'], 28 | discrete=self.agent_params['discrete'], 29 | learning_rate=self.agent_params['learning_rate'], 30 | ) 31 | self.critic = BootstrappedContinuousCritic(self.agent_params) 32 | 33 | self.replay_buffer = ReplayBuffer() 34 | 35 | def estimate_advantage(self, ob_no, next_ob_no, re_n, terminal_n): 36 | ob, next_ob, rew, done = map(lambda x: torch.from_numpy(x).to(self.device), [ob_no, next_ob_no, re_n, terminal_n]) 37 | 38 | # TODO Implement the following pseudocode: 39 | # 1) query the critic with ob_no, to get V(s) 40 | # 2) query the critic with next_ob_no, to get V(s') 41 | # 3) estimate the Q value as Q(s, a) = r(s, a) + gamma*V(s') 42 | # HINT: Remember to cut off the V(s') term (ie set it to 0) at terminal states (ie terminal_n=1) 43 | # 4) calculate advantage (adv_n) as A(s, a) = Q(s, a) - V(s) 44 | 45 | value = self.critic.value_func(ob) 46 | next_value = self.critic.value_func(next_ob).squeeze() * (1 - done) 47 | adv_n = rew + (self.gamma * next_value) - value 48 | adv_n = adv_n.cpu().detach().numpy() 49 | 50 | if self.standardize_advantages: 51 | adv_n = (adv_n - np.mean(adv_n)) / (np.std(adv_n) + 1e-8) 52 | return adv_n 53 | 54 | def train(self, ob_no, ac_na, re_n, next_ob_no, terminal_n): 55 | 56 | # TODO Implement the following pseudocode: 57 | # for agent_params['num_critic_updates_per_agent_update'] steps, 58 | # update the critic 59 | 60 | # advantage = estimate_advantage(...) 61 | 62 | # for agent_params['num_actor_updates_per_agent_update'] steps, 63 | # update the actor 64 | 65 | loss = OrderedDict() 66 | 67 | for critic_update in range(self.num_critic_updates_per_agent_update): 68 | loss['Critic_Loss'] = self.critic.update(ob_no, next_ob_no, re_n, terminal_n) 69 | 70 | adv_n = self.estimate_advantage(ob_no, next_ob_no, re_n, terminal_n) # put final critic loss here 71 | 72 | for actor_update in range(self.num_actor_updates_per_agent_update): 73 | loss['Actor_Loss'] = self.actor.update(ob_no, ac_na, adv_n) # put final actor loss here 74 | 75 | return loss 76 | 77 | def add_to_replay_buffer(self, paths): 78 | self.replay_buffer.add_rollouts(paths) 79 | 80 | def sample(self, batch_size): 81 | return self.replay_buffer.sample_recent_data(batch_size) 82 | -------------------------------------------------------------------------------- /hw3/cs285/critics/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw3/cs285/critics/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/critics/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/critics/__pycache__/bootstrapped_continuous_critic.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/critics/__pycache__/bootstrapped_continuous_critic.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/critics/__pycache__/dqn_critic.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/critics/__pycache__/dqn_critic.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/critics/bootstrapped_continuous_critic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from cs285.infrastructure.models import MLP 4 | 5 | class BootstrappedContinuousCritic: 6 | def __init__(self, hparams): 7 | self.ob_dim = hparams['ob_dim'] 8 | self.ac_dim = hparams['ac_dim'] 9 | self.size = hparams['size'] 10 | self.n_layers = hparams['n_layers'] 11 | self.device = hparams['device'] 12 | self.learning_rate = hparams['learning_rate'] 13 | self.num_target_updates = hparams['num_target_updates'] 14 | self.num_grad_steps_per_target_update = hparams['num_grad_steps_per_target_update'] 15 | self.gamma = hparams['gamma'] 16 | 17 | self.value_func = MLP(1, self.ob_dim, self.n_layers, self.size, self.device, True) 18 | # TODO: use the Adam optimizer to optimize the loss 19 | self.optimizer = torch.optim.Adam(self.value_func.parameters(), lr = self.learning_rate) 20 | 21 | def update(self, ob_no, next_ob_no, re_n, terminal_n): 22 | """ 23 | Update the parameters of the critic. 24 | 25 | let sum_of_path_lengths be the sum of the lengths of the sampled paths 26 | let num_paths be the number of sampled paths 27 | 28 | arguments: 29 | ob_no: shape: (sum_of_path_lengths, ob_dim) 30 | next_ob_no: shape: (sum_of_path_lengths, ob_dim). The observation after taking one step forward 31 | re_n: length: sum_of_path_lengths. Each element in re_n is a scalar containing 32 | the reward for each timestep 33 | terminal_n: length: sum_of_path_lengths. Each element in terminal_n is either 1 if the episode ended 34 | at that timestep of 0 if the episode did not end 35 | 36 | returns: 37 | loss 38 | """ 39 | 40 | # TODO: Implement the pseudocode below: 41 | 42 | # do the following (self.num_grad_steps_per_target_update * self.num_target_updates) times: 43 | # every self.num_grad_steps_per_target_update steps (which includes the first step), 44 | # recompute the target values by 45 | #a) calculating V(s') by querying this critic network (ie calling 'forward') with next_ob_no 46 | #b) and computing the target values as r(s, a) + gamma * V(s') 47 | # HINT: don't forget to use terminal_n to cut off the V(s') (ie set it to 0) when a terminal state is reached 48 | # every time, 49 | # update this critic using the observations and targets 50 | # HINT: use nn.MSE() 51 | 52 | ob, next_ob, rew, done = map(lambda x: torch.from_numpy(x).to(self.device), [ob_no, next_ob_no, re_n, terminal_n]) 53 | 54 | for update in range(self.num_grad_steps_per_target_update * self.num_target_updates): 55 | if update % self.num_grad_steps_per_target_update == 0: 56 | next_value = self.value_func(next_ob).squeeze() * (1 - done) 57 | target_value = rew + self.gamma * next_value 58 | 59 | self.optimizer.zero_grad() 60 | loss = nn.functional.mse_loss(self.value_func(ob).squeeze(), target_value) 61 | loss.backward() 62 | self.optimizer.step() 63 | target_value.detach_() 64 | 65 | return loss 66 | -------------------------------------------------------------------------------- /hw3/cs285/critics/dqn_critic.py: -------------------------------------------------------------------------------- 1 | from cs285.infrastructure.models import * 2 | import torch 3 | from torch import nn 4 | 5 | class DQNCritic: 6 | def __init__(self, hparams, optimizer_spec, **kwargs): 7 | super().__init__(**kwargs) 8 | self.env_name = hparams['env_name'] 9 | self.device = hparams['device'] 10 | self.ob_dim = hparams['ob_dim'] 11 | 12 | if isinstance(self.ob_dim, int): 13 | self.input_shape = self.ob_dim 14 | else: 15 | self.input_shape = hparams['input_shape'] 16 | 17 | self.ac_dim = hparams['ac_dim'] 18 | self.double_q = hparams['double_q'] 19 | self.grad_norm_clipping = hparams['grad_norm_clipping'] 20 | self.gamma = hparams['gamma'] 21 | 22 | self.optimizer_spec = optimizer_spec 23 | 24 | if self.env_name == 'LunarLander-v2': 25 | self.Q_func = LL_DQN(self.ac_dim, self.input_shape, self.device) 26 | self.target_Q_func = LL_DQN(self.ac_dim, self.input_shape, self.device) 27 | 28 | elif self.env_name == 'PongNoFrameskip-v4': 29 | self.Q_func = atari_DQN(self.ac_dim, self.input_shape, self.device) 30 | self.target_Q_func = atari_DQN(self.ac_dim, self.input_shape, self.device) 31 | 32 | else: raise NotImplementedError 33 | 34 | self.optimizer = self.optimizer_spec.constructor(self.Q_func.parameters(), lr = 1, **self.optimizer_spec.kwargs) 35 | self.lr_scheduler = torch.optim.lr_scheduler.LambdaLR(self.optimizer, self.optimizer_spec.lr_schedule) 36 | 37 | def get_loss(self, ob_no, ac_na, re_n, next_ob_no, terminal_n): 38 | ob, ac, rew, next_ob, done = map(lambda x: torch.from_numpy(x).to(self.device), [ob_no, ac_na, re_n, next_ob_no, terminal_n]) 39 | 40 | with torch.no_grad(): 41 | if self.double_q: 42 | max_ac = self.Q_func(next_ob).argmax(-1, True) 43 | else: 44 | max_ac = self.target_Q_func(next_ob).argmax(-1, True) 45 | 46 | curr_Q = self.Q_func(ob).gather(-1, ac.long().view(-1, 1)).squeeze() 47 | best_next_Q = self.target_Q_func(next_ob).gather(-1, max_ac).squeeze() 48 | calc_Q = rew + (self.gamma * best_next_Q * (1 - done)) 49 | 50 | return nn.functional.smooth_l1_loss(curr_Q, calc_Q) #Huber Loss 51 | 52 | 53 | def update(self, ob_no, ac_na, re_n, next_ob_no, terminal_n): 54 | self.optimizer.zero_grad() 55 | 56 | loss = self.get_loss(ob_no, ac_na, re_n, next_ob_no, terminal_n) 57 | loss.backward() 58 | 59 | nn.utils.clip_grad_norm_(self.Q_func.parameters(), max_norm = self.grad_norm_clipping) #perform grad clipping 60 | self.optimizer.step() #take step with optimizer 61 | self.lr_scheduler.step() #move forward learning rate 62 | 63 | return loss 64 | -------------------------------------------------------------------------------- /hw3/cs285/infrastructure/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw3/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/infrastructure/__pycache__/atari_wrappers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/atari_wrappers.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/infrastructure/__pycache__/dqn_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/dqn_utils.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/infrastructure/__pycache__/logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/logger.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/infrastructure/__pycache__/models.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/models.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/infrastructure/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/infrastructure/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tensorboardX import SummaryWriter 3 | import numpy as np 4 | 5 | class Logger: 6 | def __init__(self, log_dir, n_logged_samples=10, summary_writer=None): 7 | self._log_dir = log_dir 8 | print('########################') 9 | print('logging outputs to ', log_dir) 10 | print('########################') 11 | self._n_logged_samples = n_logged_samples 12 | self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1) 13 | 14 | def log_scalar(self, scalar, name, step_): 15 | self._summ_writer.add_scalar('{}'.format(name), scalar, step_) 16 | 17 | def log_scalars(self, scalar_dict, group_name, step, phase): 18 | """Will log all scalars in the same plot.""" 19 | self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step) 20 | 21 | def log_image(self, image, name, step): 22 | assert(len(image.shape) == 3) # [C, H, W] 23 | self._summ_writer.add_image('{}'.format(name), image, step) 24 | 25 | def log_video(self, video_frames, name, step, fps=10): 26 | assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!" 27 | self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps) 28 | 29 | def log_paths_as_videos(self, paths, step, max_videos_to_save=2, fps=10, video_title='video'): 30 | 31 | # reshape the rollouts 32 | videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths] 33 | 34 | # max rollout length 35 | max_videos_to_save = np.min([max_videos_to_save, len(videos)]) 36 | max_length = videos[0].shape[0] 37 | for i in range(max_videos_to_save): 38 | if videos[i].shape[0]>max_length: 39 | max_length = videos[i].shape[0] 40 | 41 | # pad rollouts to all be same length 42 | for i in range(max_videos_to_save): 43 | if videos[i].shape[0] 0, "Figure logging requires input shape [batch x figures]!" 54 | self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) 55 | 56 | def log_figure(self, figure, name, step, phase): 57 | """figure: matplotlib.pyplot figure handle""" 58 | self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) 59 | 60 | def log_graph(self, array, name, step, phase): 61 | """figure: matplotlib.pyplot figure handle""" 62 | im = plot_graph(array) 63 | self._summ_writer.add_image('{}_{}'.format(name, phase), im, step) 64 | 65 | def dump_scalars(self, log_path=None): 66 | log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path 67 | self._summ_writer.export_scalars_to_json(log_path) 68 | 69 | def flush(self): 70 | self._summ_writer.flush() 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /hw3/cs285/infrastructure/models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | class MLP(nn.Module): 5 | def __init__(self, 6 | ac_dim, 7 | ob_dim, 8 | n_layers, 9 | size, 10 | device, 11 | discrete, 12 | activation = nn.Tanh()): 13 | super().__init__() 14 | 15 | self.discrete = discrete 16 | 17 | # network architecture 18 | self.mlp = nn.ModuleList() 19 | self.mlp.append(nn.Linear(ob_dim, size)) #first hidden layer 20 | self.mlp.append(activation) 21 | 22 | for h in range(n_layers - 1): #additional hidden layers 23 | self.mlp.append(nn.Linear(size, size)) 24 | self.mlp.append(activation) 25 | 26 | self.mlp.append(nn.Linear(size, ac_dim)) #output layer, no activation function 27 | 28 | #if continuous define logstd variable 29 | if not self.discrete: 30 | self.logstd = nn.Parameter(torch.zeros(ac_dim)) 31 | 32 | self.to(device) 33 | 34 | def forward(self, x): 35 | for layer in self.mlp: 36 | x = layer(x) 37 | if self.discrete: 38 | return x 39 | else: 40 | return (x, self.logstd.exp()) 41 | 42 | def save(self, filepath): 43 | torch.save(self.state_dict(), filepath) 44 | 45 | def restore(self, filepath): 46 | self.load_state_dict(torch.load(filepath)) 47 | 48 | class LL_DQN(MLP): 49 | def __init__(self, ac_dim, ob_dim, device): 50 | super().__init__(ac_dim, ob_dim, 2, 64, device, True, nn.ReLU()) 51 | 52 | class atari_DQN(nn.Module): 53 | def __init__(self, ac_dim, ob_dim, device): 54 | super().__init__() 55 | 56 | self.convnet = nn.Sequential( 57 | nn.Conv2d(ob_dim[2], 32, 8, stride = 4), 58 | nn.ReLU(True), 59 | nn.Conv2d(32, 64, 4, stride = 2), 60 | nn.ReLU(True), 61 | nn.Conv2d(64, 64, 3, stride = 1), 62 | nn.ReLU(True), 63 | ) 64 | self.action_value = nn.Sequential( 65 | nn.Linear(7 * 7 * 64, 512), 66 | nn.ReLU(True), 67 | nn.Linear(512, ac_dim), 68 | ) 69 | self.to(device) 70 | 71 | def forward(self, obs): 72 | out = obs.float() / 255 73 | out = out.permute(0, 3, 1, 2) #reshape to [batch size, channels, height, width] 74 | out = self.convnet(out) 75 | out = out.reshape(out.size(0), -1) 76 | out = self.action_value(out) 77 | return out 78 | 79 | def save(self, filepath): 80 | torch.save(self.state_dict(), filepath) 81 | 82 | def restore(self, filepath): 83 | self.load_state_dict(torch.load(filepath)) 84 | -------------------------------------------------------------------------------- /hw3/cs285/infrastructure/replay_buffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from cs285.infrastructure.utils import * 4 | 5 | class ReplayBuffer(object): 6 | 7 | def __init__(self, max_size=1000000): 8 | 9 | self.max_size = max_size 10 | self.paths = [] 11 | self.obs = None 12 | self.acs = None 13 | self.concatenated_rews = None 14 | self.unconcatenated_rews = None 15 | self.next_obs = None 16 | self.terminals = None 17 | 18 | def add_rollouts(self, paths): 19 | 20 | # add new rollouts into our list of rollouts 21 | for path in paths: 22 | self.paths.append(path) 23 | 24 | # convert new rollouts into their component arrays, and append them onto our arrays 25 | observations, actions, next_observations, terminals, concatenated_rews, unconcatenated_rews = convert_listofrollouts(paths) 26 | 27 | if self.obs is None: 28 | self.obs = observations[-self.max_size:] 29 | self.acs = actions[-self.max_size:] 30 | self.next_obs = next_observations[-self.max_size:] 31 | self.terminals = terminals[-self.max_size:] 32 | self.concatenated_rews = concatenated_rews[-self.max_size:] 33 | self.unconcatenated_rews = unconcatenated_rews[-self.max_size:] 34 | else: 35 | self.obs = np.concatenate([self.obs, observations])[-self.max_size:] 36 | self.acs = np.concatenate([self.acs, actions])[-self.max_size:] 37 | self.next_obs = np.concatenate([self.next_obs, next_observations])[-self.max_size:] 38 | self.terminals = np.concatenate([self.terminals, terminals])[-self.max_size:] 39 | self.concatenated_rews = np.concatenate([self.concatenated_rews, concatenated_rews])[-self.max_size:] 40 | if isinstance(unconcatenated_rews, list): 41 | self.unconcatenated_rews += unconcatenated_rews 42 | else: 43 | self.unconcatenated_rews.append(unconcatenated_rews) 44 | 45 | ######################################## 46 | ######################################## 47 | 48 | def sample_random_rollouts(self, num_rollouts): 49 | rand_indices = np.random.permutation(len(self.paths))[:num_rollouts] 50 | return self.paths[rand_indices] 51 | 52 | def sample_recent_rollouts(self, num_rollouts=1): 53 | return self.paths[-num_rollouts:] 54 | 55 | ######################################## 56 | ######################################## 57 | 58 | def sample_random_data(self, batch_size): 59 | 60 | assert self.obs.shape[0] == self.acs.shape[0] == self.concatenated_rews.shape[0] == self.next_obs.shape[0] == self.terminals.shape[0] 61 | rand_indices = np.random.permutation(self.obs.shape[0])[:batch_size] 62 | return self.obs[rand_indices], self.acs[rand_indices], self.concatenated_rews[rand_indices], self.next_obs[rand_indices], self.terminals[rand_indices] 63 | 64 | def sample_recent_data(self, batch_size=1, concat_rew=True): 65 | 66 | if concat_rew: 67 | return self.obs[-batch_size:], self.acs[-batch_size:], self.concatenated_rews[-batch_size:], self.next_obs[-batch_size:], self.terminals[-batch_size:] 68 | else: 69 | num_recent_rollouts_to_return = 0 70 | num_datapoints_so_far = 0 71 | index = -1 72 | while num_datapoints_so_far < batch_size: 73 | recent_rollout = self.paths[index] 74 | index -=1 75 | num_recent_rollouts_to_return +=1 76 | num_datapoints_so_far += get_pathlength(recent_rollout) 77 | rollouts_to_return = self.paths[-num_recent_rollouts_to_return:] 78 | observations, actions, next_observations, terminals, concatenated_rews, unconcatenated_rews = convert_listofrollouts(rollouts_to_return) 79 | return observations, actions, unconcatenated_rews, next_observations, terminals -------------------------------------------------------------------------------- /hw3/cs285/policies/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw3/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/policies/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/policies/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/policies/__pycache__/argmax_policy.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/policies/__pycache__/argmax_policy.cpython-37.pyc -------------------------------------------------------------------------------- /hw3/cs285/policies/argmax_policy.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class ArgMaxPolicy: 4 | 5 | def __init__(self, critic, device): 6 | self.critic = critic 7 | self.device = device 8 | 9 | def get_action(self, obs): 10 | if len(obs.shape) > 1: 11 | observation = torch.tensor(obs).to(self.device) 12 | else: 13 | observation = torch.tensor(obs[None]).to(self.device) 14 | # TODO: pass observation to critic and use argmax of the resulting Q values as the action 15 | return self.critic.Q_func(observation).squeeze().argmax().item() 16 | -------------------------------------------------------------------------------- /hw3/cs285_hw3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285_hw3.pdf -------------------------------------------------------------------------------- /hw3/requirements.txt: -------------------------------------------------------------------------------- 1 | gym==0.10.11 2 | mujoco-py==1.50.1.35 3 | matplotlib==2.2.2 4 | ipython==6.4.0 5 | moviepy==1.0.0 6 | box2d-py -------------------------------------------------------------------------------- /hw3/results/Q1/dqn_double_q_test_pong_PongNoFrameskip-v4_06-07-2020_01-47-43/events.out.tfevents.1594014463.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q1/dqn_double_q_test_pong_PongNoFrameskip-v4_06-07-2020_01-47-43/events.out.tfevents.1594014463.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q1/dqn_double_q_test_pong_PongNoFrameskip-v4_06-07-2020_01-47-43/openaigym.video.0.15504.video004000.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q1/dqn_double_q_test_pong_PongNoFrameskip-v4_06-07-2020_01-47-43/openaigym.video.0.15504.video004000.mp4 -------------------------------------------------------------------------------- /hw3/results/Q1/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/events.out.tfevents.1593891399.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q1/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/events.out.tfevents.1593891399.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q1/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/openaigym.video.0.15892.video004000.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q1/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/openaigym.video.0.15892.video004000.mp4 -------------------------------------------------------------------------------- /hw3/results/Q1/pong-comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q1/pong-comp.png -------------------------------------------------------------------------------- /hw3/results/Q2/double_dqn/dqn_double_q_double_dqn_ll_seed1_LunarLander-v2_04-07-2020_21-47-22/events.out.tfevents.1593913642.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/double_dqn/dqn_double_q_double_dqn_ll_seed1_LunarLander-v2_04-07-2020_21-47-22/events.out.tfevents.1593913642.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q2/double_dqn/dqn_double_q_double_dqn_ll_seed2_LunarLander-v2_04-07-2020_21-47-30/events.out.tfevents.1593913650.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/double_dqn/dqn_double_q_double_dqn_ll_seed2_LunarLander-v2_04-07-2020_21-47-30/events.out.tfevents.1593913650.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q2/double_dqn/dqn_double_q_double_dqn_ll_seed3_LunarLander-v2_04-07-2020_21-47-38/events.out.tfevents.1593913658.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/double_dqn/dqn_double_q_double_dqn_ll_seed3_LunarLander-v2_04-07-2020_21-47-38/events.out.tfevents.1593913658.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q2/double_dqn_eval_ll.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/double_dqn_eval_ll.png -------------------------------------------------------------------------------- /hw3/results/Q2/dqn/dqn_dqn_ll_seed1_LunarLander-v2_04-07-2020_21-46-58/events.out.tfevents.1593913618.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/dqn/dqn_dqn_ll_seed1_LunarLander-v2_04-07-2020_21-46-58/events.out.tfevents.1593913618.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q2/dqn/dqn_dqn_ll_seed2_LunarLander-v2_04-07-2020_21-47-06/events.out.tfevents.1593913626.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/dqn/dqn_dqn_ll_seed2_LunarLander-v2_04-07-2020_21-47-06/events.out.tfevents.1593913626.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q2/dqn/dqn_dqn_ll_seed3_LunarLander-v2_04-07-2020_21-47-15/events.out.tfevents.1593913635.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/dqn/dqn_dqn_ll_seed3_LunarLander-v2_04-07-2020_21-47-15/events.out.tfevents.1593913635.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/events.out.tfevents.1593891399.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/events.out.tfevents.1593891399.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/openaigym.video.0.15892.video004000.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/openaigym.video.0.15892.video004000.mp4 -------------------------------------------------------------------------------- /hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_06-07-2020_01-46-09/events.out.tfevents.1594014369.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_06-07-2020_01-46-09/events.out.tfevents.1594014369.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_06-07-2020_01-46-09/openaigym.video.0.14904.video005000.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_06-07-2020_01-46-09/openaigym.video.0.14904.video005000.mp4 -------------------------------------------------------------------------------- /hw3/results/Q3-init/init_pong_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-init/init_pong_comp.png -------------------------------------------------------------------------------- /hw3/results/Q3-lrmult/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/events.out.tfevents.1593891399.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/events.out.tfevents.1593891399.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q3-lrmult/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/openaigym.video.0.15892.video004000.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/openaigym.video.0.15892.video004000.mp4 -------------------------------------------------------------------------------- /hw3/results/Q3-lrmult/dqn_test_pong_lrmult0.5_PongNoFrameskip-v4_06-07-2020_01-51-13/events.out.tfevents.1594014673.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/dqn_test_pong_lrmult0.5_PongNoFrameskip-v4_06-07-2020_01-51-13/events.out.tfevents.1594014673.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q3-lrmult/dqn_test_pong_lrmult0.5_PongNoFrameskip-v4_06-07-2020_01-51-13/openaigym.video.0.12644.video004000.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/dqn_test_pong_lrmult0.5_PongNoFrameskip-v4_06-07-2020_01-51-13/openaigym.video.0.12644.video004000.mp4 -------------------------------------------------------------------------------- /hw3/results/Q3-lrmult/dqn_test_pong_lrmult2.0_PongNoFrameskip-v4_06-07-2020_01-51-32/events.out.tfevents.1594014692.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/dqn_test_pong_lrmult2.0_PongNoFrameskip-v4_06-07-2020_01-51-32/events.out.tfevents.1594014692.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q3-lrmult/dqn_test_pong_lrmult2.0_PongNoFrameskip-v4_06-07-2020_01-51-32/openaigym.video.0.9872.video004000.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/dqn_test_pong_lrmult2.0_PongNoFrameskip-v4_06-07-2020_01-51-32/openaigym.video.0.9872.video004000.mp4 -------------------------------------------------------------------------------- /hw3/results/Q3-lrmult/lrmult_pong_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/lrmult_pong_comp.png -------------------------------------------------------------------------------- /hw3/results/Q4-1-100/5-seed-1-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-1-100/5-seed-1-100.png -------------------------------------------------------------------------------- /hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_14-59-37/events.out.tfevents.1593975577.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_14-59-37/events.out.tfevents.1593975577.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-14-01/events.out.tfevents.1593980041.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-14-01/events.out.tfevents.1593980041.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-20-04/events.out.tfevents.1593980404.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-20-04/events.out.tfevents.1593980404.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-23-26/events.out.tfevents.1593980606.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-23-26/events.out.tfevents.1593980606.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-27-20/events.out.tfevents.1593980840.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-27-20/events.out.tfevents.1593980840.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4-10-10/5-seed-10-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-10-10/5-seed-10-10.png -------------------------------------------------------------------------------- /hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-12-48/events.out.tfevents.1593976368.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-12-48/events.out.tfevents.1593976368.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-47-19/events.out.tfevents.1593978439.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-47-19/events.out.tfevents.1593978439.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-49-24/events.out.tfevents.1593978564.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-49-24/events.out.tfevents.1593978564.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-59-55/events.out.tfevents.1593979195.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-59-55/events.out.tfevents.1593979195.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_16-02-28/events.out.tfevents.1593979348.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_16-02-28/events.out.tfevents.1593979348.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4/ac_100_1_CartPole-v0_05-07-2020_14-54-21/events.out.tfevents.1593975261.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4/ac_100_1_CartPole-v0_05-07-2020_14-54-21/events.out.tfevents.1593975261.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4/ac_10_10_CartPole-v0_05-07-2020_15-12-48/events.out.tfevents.1593976368.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4/ac_10_10_CartPole-v0_05-07-2020_15-12-48/events.out.tfevents.1593976368.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4/ac_1_100_CartPole-v0_05-07-2020_14-59-37/events.out.tfevents.1593975577.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4/ac_1_100_CartPole-v0_05-07-2020_14-59-37/events.out.tfevents.1593975577.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4/ac_1_1_CartPole-v0_05-07-2020_14-51-47/events.out.tfevents.1593975107.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4/ac_1_1_CartPole-v0_05-07-2020_14-51-47/events.out.tfevents.1593975107.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q4/q4-comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4/q4-comp.png -------------------------------------------------------------------------------- /hw3/results/Q5-HC/ac_10_10_HalfCheetah-v2_05-07-2020_18-28-51/events.out.tfevents.1593988131.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q5-HC/ac_10_10_HalfCheetah-v2_05-07-2020_18-28-51/events.out.tfevents.1593988131.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q5-HC/hc_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q5-HC/hc_comp.png -------------------------------------------------------------------------------- /hw3/results/Q5-HC/pg_hc_b30000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_11-45-56/events.out.tfevents.1593531956.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q5-HC/pg_hc_b30000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_11-45-56/events.out.tfevents.1593531956.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q5-IP/IP_ac_eval.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q5-IP/IP_ac_eval.png -------------------------------------------------------------------------------- /hw3/results/Q5-IP/ac_10_10_InvertedPendulum-v2_05-07-2020_18-28-19/events.out.tfevents.1593988099.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q5-IP/ac_10_10_InvertedPendulum-v2_05-07-2020_18-28-19/events.out.tfevents.1593988099.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/results/Q5-IP/ac_b30k_10_10_InvertedPendulum-v2_05-07-2020_19-45-39/events.out.tfevents.1593992739.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q5-IP/ac_b30k_10_10_InvertedPendulum-v2_05-07-2020_19-45-39/events.out.tfevents.1593992739.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw3/setup.py: -------------------------------------------------------------------------------- 1 | # setup.py 2 | from setuptools import setup 3 | 4 | setup( 5 | name='cs285', 6 | version='0.1.0', 7 | packages=['cs285'], 8 | ) -------------------------------------------------------------------------------- /hw4/README.txt: -------------------------------------------------------------------------------- 1 | 1) See hw1 if you'd like to see installation instructions. You do NOT have to redo them. 2 | 3 | ############################################## 4 | ############################################## 5 | 6 | 7 | 2) Code: 8 | 9 | ------------------------------------------- 10 | 11 | Files to look at, even though there are no explicit 'TODO' markings: 12 | - scripts/run_hw4_mb.py 13 | - infrastructure/rl_trainer.py 14 | 15 | ------------------------------------------- 16 | 17 | Blanks to be filled in now (for this assignment) are marked with 'TODO' 18 | 19 | The following files have these: 20 | - agents/mb_agent.py 21 | - models/ff_model.py 22 | - policies/MPC_policy.py 23 | - infrastructure/utils.py 24 | 25 | ############################################## 26 | ############################################## 27 | 28 | 29 | 3) Commands: 30 | 31 | Please refer to the PDF for the specific commands needed for different questions. 32 | 33 | ############################################## 34 | 35 | 36 | 4) Visualize saved tensorboard event file: 37 | 38 | $ cd cs285/data/ 39 | $ tensorboard --logdir . 40 | 41 | Then, navigate to shown url to see scalar summaries as plots (in 'scalar' tab), as well as videos (in 'images' tab) -------------------------------------------------------------------------------- /hw4/cs285/agents/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw4/cs285/agents/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/agents/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/agents/__pycache__/mb_agent.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/agents/__pycache__/mb_agent.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from cs285.envs import ant 2 | from cs285.envs import cheetah 3 | from cs285.envs import obstacles 4 | from cs285.envs import reacher -------------------------------------------------------------------------------- /hw4/cs285/envs/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/ant/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='ant-cs285-v0', 5 | entry_point='cs285.envs.ant:AntEnv', 6 | max_episode_steps=1000, 7 | ) 8 | from cs285.envs.ant.ant import AntEnv 9 | -------------------------------------------------------------------------------- /hw4/cs285/envs/ant/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/ant/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/ant/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/ant/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/ant/__pycache__/ant.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/ant/__pycache__/ant.cpython-35.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/ant/__pycache__/ant.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/ant/__pycache__/ant.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/cheetah/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='cheetah-cs285-v0', 5 | entry_point='cs285.envs.cheetah:HalfCheetahEnv', 6 | max_episode_steps=1000, 7 | ) 8 | from cs285.envs.cheetah.cheetah import HalfCheetahEnv 9 | -------------------------------------------------------------------------------- /hw4/cs285/envs/cheetah/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/cheetah/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/cheetah/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/cheetah/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/cheetah/__pycache__/cheetah.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/cheetah/__pycache__/cheetah.cpython-35.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/cheetah/__pycache__/cheetah.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/cheetah/__pycache__/cheetah.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/obstacles/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='obstacles-cs285-v0', 5 | entry_point='cs285.envs.obstacles:Obstacles', 6 | max_episode_steps=500, 7 | ) 8 | from cs285.envs.obstacles.obstacles_env import Obstacles 9 | -------------------------------------------------------------------------------- /hw4/cs285/envs/obstacles/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/obstacles/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/obstacles/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/obstacles/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/obstacles/__pycache__/obstacles_env.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/obstacles/__pycache__/obstacles_env.cpython-35.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/obstacles/__pycache__/obstacles_env.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/obstacles/__pycache__/obstacles_env.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/reacher/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | 3 | register( 4 | id='reacher-cs285-v0', 5 | entry_point='cs285.envs.reacher:Reacher7DOFEnv', 6 | max_episode_steps=500, 7 | ) 8 | from cs285.envs.reacher.reacher_env import Reacher7DOFEnv 9 | -------------------------------------------------------------------------------- /hw4/cs285/envs/reacher/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/reacher/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/reacher/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/reacher/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/reacher/__pycache__/reacher_env.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/reacher/__pycache__/reacher_env.cpython-35.pyc -------------------------------------------------------------------------------- /hw4/cs285/envs/reacher/__pycache__/reacher_env.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/reacher/__pycache__/reacher_env.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/infrastructure/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw4/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/infrastructure/__pycache__/logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/infrastructure/__pycache__/logger.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/infrastructure/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/infrastructure/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/infrastructure/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tensorboardX import SummaryWriter 3 | import numpy as np 4 | 5 | class Logger: 6 | def __init__(self, log_dir, n_logged_samples=10, summary_writer=None): 7 | self._log_dir = log_dir 8 | print('########################') 9 | print('logging outputs to ', log_dir) 10 | print('########################') 11 | self._n_logged_samples = n_logged_samples 12 | self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1) 13 | 14 | def log_scalar(self, scalar, name, step_): 15 | self._summ_writer.add_scalar('{}'.format(name), scalar, step_) 16 | 17 | def log_scalars(self, scalar_dict, group_name, step, phase): 18 | """Will log all scalars in the same plot.""" 19 | self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step) 20 | 21 | def log_image(self, image, name, step): 22 | assert(len(image.shape) == 3) # [C, H, W] 23 | self._summ_writer.add_image('{}'.format(name), image, step) 24 | 25 | def log_video(self, video_frames, name, step, fps=10): 26 | assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!" 27 | self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps) 28 | 29 | def log_paths_as_videos(self, paths, step, max_videos_to_save=2, fps=10, video_title='video'): 30 | 31 | # reshape the rollouts 32 | videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths] 33 | 34 | # max rollout length 35 | max_videos_to_save = np.min([max_videos_to_save, len(videos)]) 36 | max_length = videos[0].shape[0] 37 | for i in range(max_videos_to_save): 38 | if videos[i].shape[0]>max_length: 39 | max_length = videos[i].shape[0] 40 | 41 | # pad rollouts to all be same length 42 | for i in range(max_videos_to_save): 43 | if videos[i].shape[0] 0, "Figure logging requires input shape [batch x figures]!" 54 | self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) 55 | 56 | def log_figure(self, figure, name, step, phase): 57 | """figure: matplotlib.pyplot figure handle""" 58 | self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) 59 | 60 | def log_graph(self, array, name, step, phase): 61 | """figure: matplotlib.pyplot figure handle""" 62 | im = plot_graph(array) 63 | self._summ_writer.add_image('{}_{}'.format(name, phase), im, step) 64 | 65 | def dump_scalars(self, log_path=None): 66 | log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path 67 | self._summ_writer.export_scalars_to_json(log_path) 68 | 69 | def flush(self): 70 | self._summ_writer.flush() 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /hw4/cs285/models/__pycache__/ff_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/models/__pycache__/ff_model.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/models/ff_model.py: -------------------------------------------------------------------------------- 1 | from cs285.infrastructure.utils import normalize, unnormalize, MLP 2 | import numpy as np 3 | import torch 4 | from torch import nn 5 | 6 | class FFModel: 7 | def __init__(self, ac_dim, ob_dim, n_layers, size, device, learning_rate = 0.001): 8 | # init vars 9 | self.device = device 10 | 11 | #TODO - specify ouput dim and input dim of delta func MLP 12 | self.delta_func = MLP(input_dim = ob_dim + ac_dim, 13 | output_dim = ob_dim, 14 | n_layers = n_layers, 15 | size = size, 16 | device = self.device, 17 | discrete = True) 18 | 19 | #TODO - define the delta func optimizer. Adam optimizer will work well. 20 | self.optimizer = torch.optim.Adam(self.delta_func.parameters(), lr = learning_rate) 21 | 22 | ############################# 23 | 24 | def get_prediction(self, obs, acs, data_statistics): 25 | if len(obs.shape) == 1 or len(acs.shape) == 1: 26 | obs = np.squeeze(obs)[None] 27 | acs = np.squeeze(acs)[None] 28 | 29 | norm_obs = normalize(obs, data_statistics['obs_mean'], data_statistics['obs_std']) 30 | norm_acs = normalize(acs, data_statistics['acs_mean'], data_statistics['acs_std']) 31 | 32 | norm_input = torch.Tensor(np.concatenate((norm_obs, norm_acs), axis = 1)).to(self.device) 33 | norm_delta = self.delta_func(norm_input).cpu().detach().numpy() 34 | 35 | delta = unnormalize(norm_delta, data_statistics['delta_mean'], data_statistics['delta_std']) 36 | return obs + delta 37 | 38 | def update(self, observations, actions, next_observations, data_statistics): 39 | 40 | norm_obs = normalize(np.squeeze(observations), data_statistics['obs_mean'], data_statistics['obs_std']) 41 | norm_acs = normalize(np.squeeze(actions), data_statistics['acs_mean'], data_statistics['acs_std']) 42 | 43 | pred_delta = self.delta_func(torch.Tensor(np.concatenate((norm_obs, norm_acs), axis = 1)).to(self.device)) 44 | true_delta = torch.Tensor(normalize(next_observations - observations, data_statistics['delta_mean'], data_statistics['delta_std'])).to(self.device) 45 | 46 | # TODO(Q1) Define a loss function that takes as input normalized versions of predicted change in state and true change in state 47 | loss = nn.functional.mse_loss(true_delta, pred_delta) 48 | self.optimizer.zero_grad() 49 | loss.backward() 50 | self.optimizer.step() 51 | 52 | return loss.item() 53 | -------------------------------------------------------------------------------- /hw4/cs285/policies/MPC_policy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class MPCPolicy: 4 | def __init__(self, 5 | env, 6 | ac_dim, 7 | dyn_models, 8 | horizon, 9 | N, 10 | **kwargs): 11 | super().__init__(**kwargs) 12 | 13 | # init vars 14 | self.env = env 15 | self.dyn_models = dyn_models 16 | self.horizon = horizon 17 | self.N = N 18 | self.data_statistics = None # NOTE must be updated from elsewhere 19 | 20 | self.ob_dim = self.env.observation_space.shape[0] 21 | 22 | # action space 23 | self.ac_space = self.env.action_space 24 | self.ac_dim = ac_dim 25 | self.low = self.ac_space.low 26 | self.high = self.ac_space.high 27 | 28 | def sample_action_sequences(self, num_sequences, horizon): 29 | # TODO(Q1) uniformly sample trajectories and return an array of 30 | # dimensions (num_sequences, horizon, self.ac_dim) 31 | 32 | random_action_sequences = np.random.uniform(self.low, self.high, (num_sequences, horizon, self.ac_dim)) 33 | return random_action_sequences 34 | 35 | def get_action(self, obs): 36 | 37 | if self.data_statistics is None: 38 | #print("WARNING: performing random actions.") 39 | return self.sample_action_sequences(num_sequences = 1, horizon = 1)[0, 0] 40 | 41 | #sample random actions (Nxhorizon) 42 | candidate_action_sequences = self.sample_action_sequences(num_sequences=self.N, horizon=self.horizon) 43 | 44 | # a list you can use for storing the predicted reward for each candidate sequence 45 | predicted_rewards_per_ens = [] 46 | 47 | for model in self.dyn_models: 48 | # TODO(Q2) 49 | # for each candidate action sequence, predict a sequence of 50 | # states for each dynamics model in your ensemble 51 | 52 | # once you have a sequence of predicted states from each model in your 53 | # ensemble, calculate the reward for each sequence using self.env.get_reward (See files in envs to see how to call this) 54 | sim_obs = np.tile(obs, (self.N, 1)) 55 | model_rewards = np.zeros(self.N) 56 | 57 | for t in range(self.horizon): 58 | rew, _ = self.env.get_reward(sim_obs, candidate_action_sequences[:, t, :]) 59 | model_rewards += rew 60 | sim_obs = model.get_prediction(sim_obs, candidate_action_sequences[:, t, :], self.data_statistics) 61 | predicted_rewards_per_ens.append(model_rewards) 62 | 63 | # calculate mean_across_ensembles(predicted rewards). 64 | # the matrix dimensions should change as follows: [ens,N] --> N 65 | predicted_rewards = np.mean(predicted_rewards_per_ens, axis = 0) # TODO(Q2) 66 | 67 | # pick the action sequence and return the 1st element of that sequence 68 | best_index = np.argmax(predicted_rewards) #TODO(Q2) 69 | best_action_sequence = candidate_action_sequences[best_index] #TODO(Q2) 70 | action_to_take = best_action_sequence[0] # TODO(Q2) 71 | return action_to_take 72 | -------------------------------------------------------------------------------- /hw4/cs285/policies/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw4/cs285/policies/__pycache__/MPC_policy.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/policies/__pycache__/MPC_policy.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285/policies/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/policies/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw4/cs285_hw4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285_hw4.pdf -------------------------------------------------------------------------------- /hw4/results/problem-1/n500_arch1x32_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-1/n500_arch1x32_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-1/n500_arch1x32_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-1/n500_arch1x32_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-1/n500_arch2x250_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-1/n500_arch2x250_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-1/n500_arch2x250_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-1/n500_arch2x250_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-1/n5_arch2x250_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-1/n5_arch2x250_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-1/n5_arch2x250_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-1/n5_arch2x250_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/events.out.tfevents.1594352996.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/events.out.tfevents.1594352996.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/itr_0_losses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/itr_0_losses.npy -------------------------------------------------------------------------------- /hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/itr_0_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/itr_0_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/itr_0_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/itr_0_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-3-cheetah/cheetah_returns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-cheetah/cheetah_returns.png -------------------------------------------------------------------------------- /hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/events.out.tfevents.1594417421.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/events.out.tfevents.1594417421.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/itr_0_losses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/itr_0_losses.npy -------------------------------------------------------------------------------- /hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/itr_0_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/itr_0_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/itr_0_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/itr_0_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/events.out.tfevents.1594417429.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/events.out.tfevents.1594417429.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/itr_0_losses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/itr_0_losses.npy -------------------------------------------------------------------------------- /hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/itr_0_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/itr_0_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/itr_0_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/itr_0_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-3-obstacles/obstacles_returns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-obstacles/obstacles_returns.png -------------------------------------------------------------------------------- /hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/events.out.tfevents.1594417416.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/events.out.tfevents.1594417416.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/itr_0_losses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/itr_0_losses.npy -------------------------------------------------------------------------------- /hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/itr_0_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/itr_0_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/itr_0_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/itr_0_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-3-reacher/reacher_returns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-reacher/reacher_returns.png -------------------------------------------------------------------------------- /hw4/results/problem-4-ensemble/ensemble_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/ensemble_comp.png -------------------------------------------------------------------------------- /hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/events.out.tfevents.1594364771.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/events.out.tfevents.1594364771.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/itr_0_losses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/itr_0_losses.npy -------------------------------------------------------------------------------- /hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/itr_0_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/itr_0_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/itr_0_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/itr_0_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/events.out.tfevents.1594364781.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/events.out.tfevents.1594364781.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/itr_0_losses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/itr_0_losses.npy -------------------------------------------------------------------------------- /hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/itr_0_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/itr_0_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/itr_0_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/itr_0_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/events.out.tfevents.1594364793.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/events.out.tfevents.1594364793.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/itr_0_losses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/itr_0_losses.npy -------------------------------------------------------------------------------- /hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/itr_0_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/itr_0_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/itr_0_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/itr_0_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-4-horizon/horizon_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/horizon_comp.png -------------------------------------------------------------------------------- /hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/events.out.tfevents.1594364548.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/events.out.tfevents.1594364548.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/itr_0_losses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/itr_0_losses.npy -------------------------------------------------------------------------------- /hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/itr_0_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/itr_0_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/itr_0_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/itr_0_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/events.out.tfevents.1594364556.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/events.out.tfevents.1594364556.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/itr_0_losses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/itr_0_losses.npy -------------------------------------------------------------------------------- /hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/itr_0_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/itr_0_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/itr_0_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/itr_0_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/events.out.tfevents.1594364512.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/events.out.tfevents.1594364512.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/itr_0_losses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/itr_0_losses.npy -------------------------------------------------------------------------------- /hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/itr_0_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/itr_0_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/itr_0_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/itr_0_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/events.out.tfevents.1594364663.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/events.out.tfevents.1594364663.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/itr_0_losses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/itr_0_losses.npy -------------------------------------------------------------------------------- /hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/itr_0_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/itr_0_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/itr_0_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/itr_0_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/events.out.tfevents.1594364620.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/events.out.tfevents.1594364620.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/itr_0_losses.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/itr_0_losses.npy -------------------------------------------------------------------------------- /hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/itr_0_losses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/itr_0_losses.png -------------------------------------------------------------------------------- /hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/itr_0_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/itr_0_predictions.png -------------------------------------------------------------------------------- /hw4/results/problem-4-numseq/numseq_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/numseq_comp.png -------------------------------------------------------------------------------- /hw4/setup.py: -------------------------------------------------------------------------------- 1 | # setup.py 2 | from setuptools import setup 3 | 4 | setup( 5 | name='cs285', 6 | version='0.1.0', 7 | packages=['cs285'], 8 | ) -------------------------------------------------------------------------------- /hw5/README.md: -------------------------------------------------------------------------------- 1 | # Section 5 Exploration in RL 2 | 3 | Below is the report for HW5. All data used can be found in the results folder. To view the tensorboard for a specific part navigate to that part's folder and run 4 | ```commandline 5 | tensorboard --logdir . 6 | ``` 7 | All commands used will be in the README.txt 8 | 9 | ## Problem 1 10 | 11 | First we run our actor-critic on the pointmass environment with no extra exploration tactic in order to establish a baseline performance: 12 | 13 | ![No Exploration](results/problem-1-none/no_exploration_returns.png) 14 | 15 | It can be seen that the agent only managed to find the goal area once out of three tries (with seed 1). It is clear from these trials that the agent would benefit from an exploration bonus. 16 | 17 | First the the histogram method is tested across the same three seeds: 18 | 19 | ![Histogram](results/problem-1-hist/hist_returns.png) 20 | 21 | With a histogram bonus all three runs are able to find the bonus area, and sooner in the case of the 1 seed. 22 | 23 | ## Problem 2 24 | 25 | Now the Radial Basis Function algorithm is tested in the same environment: 26 | 27 | ![RBF](results/problem-2/rbf_returns.png) 28 | 29 | While the RBF exploration performs better than no bonus it seems to perform worse than the histogram method. Since it is also more computationally expensive it seems that the histogram model is preferable when it is able to be used (small state spaces). 30 | 31 | ## Problem 3 32 | 33 | Finally the exemplar model is tested: 34 | 35 | ![Ex2](results/problem-3/ex2_returns.png) 36 | 37 | Once again the exemplar model provides a big boost in performance from no boost, but seems to fall short of the histogram's performance. This makes a lot of sense: the exemplar is really a sort of just an approximation of the histogram. While it is applicable to a far wider range of problems it tends to be less accurate when exact counts can be reasonably kept (when generalization doesn't make sense/is not needed). Interestingly it can be seen that with seed 21 (Red) it found the goal but then forgot about it. This seems to suggest that the exploration was a bit too strong in this case - the bonus steered the agent towards states it hadn't been to in a while instead of the known high reward of the goal. 38 | 39 | ## Problem 4 40 | 41 | Now the exemplar model is tested in an environment more suited to its generalization abilities: a sparse Half-Cheetah environment: 42 | 43 | ![Half Cheetah Comparison](results/problem-4/full_comp.png) 44 | 45 | It can be seen that most of the runs failed to develop any sort of rewarding strategies. Interestingly each set of hyperparameters developed only one decent strategy, all on seed 1 (no exploration is in orange, bc0.001 in light blue, bc0.0001 in pink). The two exemplar model runs developed these good strategies much earlier, but also seemed to forget about them. While it is hard to draw conclusions from such a small sample size in an environment with such high variance, it does seem that exploration does not provide a signifigant advantage in this case. 46 | -------------------------------------------------------------------------------- /hw5/README.txt: -------------------------------------------------------------------------------- 1 | 1) The code structure for this homeowrk was heavily modified in order to match the structure of the previous three homeworks. 2 | To this end the PDF does not give the most accurate location instructions but should still be referred to for questions and guidance. 3 | The logging procedure in particular was changed to match the previous assignments. 4 | 5 | 2) Code: 6 | 7 | Code to look at: 8 | 9 | - scripts/train_ac_exploration_f18.py 10 | - envs/pointmass.py 11 | - infrastructure/rl_trainer.py (Has been changed for this homework) 12 | - infrastructure/utils.py (Has been changed foir this homework) 13 | 14 | Code to fill in as part of HW: 15 | 16 | - agents/ac_agent.py (new Exploratory_ACAgent class added) 17 | - exploration/exploration.py 18 | - exploration/density_model.py 19 | 20 | 3) Commands to run for each problem: 21 | 22 | ########################## 23 | ### P1 Hist PointMass ### 24 | ########################## 25 | 26 | python cs285/scripts/train_ac_exploration_f18.py PointMass-v0 -n 100 -b 1000 -e 3 --density_model none -s 8 --exp_name PM_bc0_s8 27 | python cs285/scripts/train_ac_exploration_f18.py PointMass-v0 -n 100 -b 1000 -e 3 --density_model hist -bc 0.01 -s 8 --exp_name PM_hist_bc0.01_s8 28 | 29 | ########################## 30 | ### P2 RBF PointMass ### 31 | ########################## 32 | 33 | python cs285/scripts/train_ac_exploration_f18.py PointMass-v0 -n 100 -b 1000 -e 3 --density_model rbf -bc 0.01 -s 8 -sig 0.2 --exp_name PM_rbf_bc0.01_s8_sig0.2 34 | 35 | ########################## 36 | ### P3 EX2 PointMass ### 37 | ########################## 38 | 39 | python cs285/scripts/train_ac_exploration_f18.py PointMass-v0 -n 100 -b 1000 -e 3 --density_model ex2 -s 8 -bc 0.05 -kl 0.1 -dlr 0.001 -dh 8 --exp_name PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8 40 | 41 | ########################### 42 | ### P4 HalfCheetah ### 43 | ########################### 44 | 45 | python cs285/scripts/train_ac_exploration_f18.py sparse-cheetah-cs285-v1 -ep 150 --discount 0.9 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 --density_model none --exp_name HC_bc0 46 | python cs285/scripts/train_ac_exploration_f18.py sparse-cheetah-cs285-v1 -ep 150 --discount 0.9 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 --density_model ex2 -bc 0.001 -kl 0.1 -dlr 0.005 -dti 1000 --exp_name HC_bc0.001_kl0.1_dlr0.005_dti1000 47 | python cs285/scripts/train_ac_exploration_f18.py sparse-cheetah-cs285-v1 -ep 150 --discount 0.9 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 --density_model ex2 -bc 0.0001 -kl 0.1 -dlr 0.005 -dti 10000 --exp_name HC_bc0.0001_kl0.1_dlr0.005_dti10000 48 | 49 | 4) Visualize saved tensorboard event file: 50 | 51 | $ cd cs285/data/ 52 | $ tensorboard --logdir . 53 | 54 | Then, navigate to shown url to see scalar summaries as plots (in 'scalar' tab), as well as videos (in 'images' tab) -------------------------------------------------------------------------------- /hw5/cs285/agents/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw5/cs285/agents/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/agents/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/agents/__pycache__/ac_agent.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/agents/__pycache__/ac_agent.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/critics/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw5/cs285/critics/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/critics/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/critics/__pycache__/bootstrapped_continuous_critic.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/critics/__pycache__/bootstrapped_continuous_critic.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/critics/bootstrapped_continuous_critic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from cs285.infrastructure.utils import MLP 4 | 5 | class BootstrappedContinuousCritic: 6 | def __init__(self, hparams): 7 | self.ob_dim = hparams['ob_dim'] 8 | self.ac_dim = hparams['ac_dim'] 9 | self.size = hparams['size'] 10 | self.n_layers = hparams['n_layers'] 11 | self.device = hparams['device'] 12 | self.learning_rate = hparams['learning_rate'] 13 | self.num_target_updates = hparams['num_target_updates'] 14 | self.num_grad_steps_per_target_update = hparams['num_grad_steps_per_target_update'] 15 | self.gamma = hparams['gamma'] 16 | 17 | self.value_func = MLP(self.ob_dim, 1, self.n_layers, self.size, self.device, True) 18 | self.optimizer = torch.optim.Adam(self.value_func.parameters(), lr = self.learning_rate) 19 | 20 | def update(self, ob_no, next_ob_no, re_n, terminal_n): 21 | ''' 22 | ts_ob_no, ts_next_ob_no, ts_re_n, ts_terminal_n = map(lambda x: torch.Tensor(x).to(self.device), 23 | [ob_no, next_ob_no, re_n, terminal_n]) 24 | for _ in range(self.num_target_updates): 25 | with torch.no_grad(): 26 | ts_next_V_n = self.value_func(ts_next_ob_no).view(-1) 27 | ts_target_n = ts_re_n + (1 - ts_terminal_n) * self.gamma * ts_next_V_n 28 | for _ in range(self.num_grad_steps_per_target_update): 29 | ts_V_n = self.value_func(ts_ob_no).view(-1) 30 | self.optimizer.zero_grad() 31 | loss = nn.functional.mse_loss(ts_V_n, ts_target_n) 32 | loss.backward() 33 | self.optimizer.step() 34 | ''' 35 | ob, next_ob, rew, done = map(lambda x: torch.Tensor(x).to(self.device), [ob_no, next_ob_no, re_n, terminal_n]) 36 | 37 | for update in range(self.num_grad_steps_per_target_update * self.num_target_updates): 38 | if update % self.num_grad_steps_per_target_update == 0: 39 | next_value = self.value_func(next_ob).squeeze() * (1 - done) 40 | target_value = rew + self.gamma * next_value 41 | 42 | self.optimizer.zero_grad() 43 | loss = nn.functional.mse_loss(self.value_func(ob).squeeze(), target_value) 44 | loss.backward() 45 | self.optimizer.step() 46 | target_value.detach_() 47 | #''' 48 | 49 | return loss 50 | -------------------------------------------------------------------------------- /hw5/cs285/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from gym.envs.registration import register 2 | register( 3 | id='sparse-cheetah-cs285-v1', 4 | entry_point='cs285.envs.sparse_half_cheetah:HalfCheetahEnv', 5 | max_episode_steps=1000, 6 | ) 7 | from cs285.envs.sparse_half_cheetah import HalfCheetahEnv 8 | -------------------------------------------------------------------------------- /hw5/cs285/envs/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/envs/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/envs/__pycache__/pointmass.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/envs/__pycache__/pointmass.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/envs/__pycache__/sparse_half_cheetah.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/envs/__pycache__/sparse_half_cheetah.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/envs/sparse_half_cheetah.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | 5 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle): 6 | def __init__(self): 7 | mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5) 8 | utils.EzPickle.__init__(self) 9 | 10 | def step(self, action): 11 | ################################################# 12 | ctrl = False 13 | relu = False 14 | threshold = 10.0 15 | ################################################# 16 | xposbefore = self.sim.data.qpos[0] 17 | self.do_simulation(action, self.frame_skip) 18 | xposafter = self.sim.data.qpos[0] 19 | ob = self._get_obs() 20 | # reward_ctrl = - 0.1 * np.square(action).sum() 21 | # reward_run = (xposafter - xposbefore)/self.dt 22 | ################################################# 23 | if ctrl: 24 | reward_ctrl = - 0.1 * np.square(action).sum() 25 | else: 26 | reward_ctrl = 0 27 | if abs(xposafter) <= threshold: 28 | reward_run = 0.0 29 | else: 30 | if relu: 31 | reward_run = np.sign(xposafter)*(xposafter - xposbefore)/self.dt 32 | else: 33 | reward_run = 1.0 34 | ################################################# 35 | reward = reward_ctrl + reward_run 36 | done = False 37 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl) 38 | 39 | def _get_obs(self): 40 | return np.concatenate([ 41 | self.sim.data.qpos.flat[1:], 42 | self.sim.data.qvel.flat, 43 | ]) 44 | 45 | def reset_model(self): 46 | qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq) 47 | qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1 48 | self.set_state(qpos, qvel) 49 | return self._get_obs() 50 | 51 | def viewer_setup(self): 52 | self.viewer.cam.distance = self.model.stat.extent * 0.5 53 | -------------------------------------------------------------------------------- /hw5/cs285/exploration/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw5/cs285/exploration/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/exploration/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/exploration/__pycache__/density_model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/exploration/__pycache__/density_model.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/exploration/__pycache__/exploration.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/exploration/__pycache__/exploration.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/infrastructure/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw5/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/infrastructure/__pycache__/logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/infrastructure/__pycache__/logger.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/infrastructure/__pycache__/replay.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/infrastructure/__pycache__/replay.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/infrastructure/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/infrastructure/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/infrastructure/logger.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tensorboardX import SummaryWriter 3 | import numpy as np 4 | 5 | class Logger: 6 | def __init__(self, log_dir, n_logged_samples=10, summary_writer=None): 7 | self._log_dir = log_dir 8 | print('########################') 9 | print('logging outputs to ', log_dir) 10 | print('########################') 11 | self._n_logged_samples = n_logged_samples 12 | self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1) 13 | 14 | def log_scalar(self, scalar, name, step_): 15 | self._summ_writer.add_scalar('{}'.format(name), scalar, step_) 16 | 17 | def log_scalars(self, scalar_dict, group_name, step, phase): 18 | """Will log all scalars in the same plot.""" 19 | self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step) 20 | 21 | def log_image(self, image, name, step): 22 | assert(len(image.shape) == 3) # [C, H, W] 23 | self._summ_writer.add_image('{}'.format(name), image, step) 24 | 25 | def log_video(self, video_frames, name, step, fps=10): 26 | assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!" 27 | self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps) 28 | 29 | def log_paths_as_videos(self, paths, step, max_videos_to_save=2, fps=10, video_title='video'): 30 | 31 | # reshape the rollouts 32 | videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths] 33 | 34 | # max rollout length 35 | max_videos_to_save = np.min([max_videos_to_save, len(videos)]) 36 | max_length = videos[0].shape[0] 37 | for i in range(max_videos_to_save): 38 | if videos[i].shape[0]>max_length: 39 | max_length = videos[i].shape[0] 40 | 41 | # pad rollouts to all be same length 42 | for i in range(max_videos_to_save): 43 | if videos[i].shape[0] 0, "Figure logging requires input shape [batch x figures]!" 54 | self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) 55 | 56 | def log_figure(self, figure, name, step, phase): 57 | """figure: matplotlib.pyplot figure handle""" 58 | self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step) 59 | 60 | def log_graph(self, array, name, step, phase): 61 | """figure: matplotlib.pyplot figure handle""" 62 | im = plot_graph(array) 63 | self._summ_writer.add_image('{}_{}'.format(name, phase), im, step) 64 | 65 | def dump_scalars(self, log_path=None): 66 | log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path 67 | self._summ_writer.export_scalars_to_json(log_path) 68 | 69 | def flush(self): 70 | self._summ_writer.flush() 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /hw5/cs285/policies/__init__.py: -------------------------------------------------------------------------------- 1 | #init for making the folder a package 2 | -------------------------------------------------------------------------------- /hw5/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285/policies/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/policies/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /hw5/cs285_hw5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285_hw5.pdf -------------------------------------------------------------------------------- /hw5/requirements.txt: -------------------------------------------------------------------------------- 1 | gym==0.10.5 2 | mujoco-py==1.50.1.56 3 | numpy 4 | seaborn 5 | tqdm -------------------------------------------------------------------------------- /hw5/results/problem-1-hist/hist_returns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-hist/hist_returns.png -------------------------------------------------------------------------------- /hw5/results/problem-1-hist/seed11_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-11-30/events.out.tfevents.1595135490.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-hist/seed11_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-11-30/events.out.tfevents.1595135490.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-1-hist/seed1_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-06-41/events.out.tfevents.1595135201.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-hist/seed1_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-06-41/events.out.tfevents.1595135201.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-1-hist/seed21_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-17-01/events.out.tfevents.1595135821.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-hist/seed21_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-17-01/events.out.tfevents.1595135821.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-1-none/no_exploration_returns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-none/no_exploration_returns.png -------------------------------------------------------------------------------- /hw5/results/problem-1-none/seed11_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-05-20/events.out.tfevents.1595135120.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-none/seed11_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-05-20/events.out.tfevents.1595135120.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-1-none/seed1_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-03-45/events.out.tfevents.1595135025.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-none/seed1_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-03-45/events.out.tfevents.1595135025.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-1-none/seed21_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-06-56/events.out.tfevents.1595135216.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-none/seed21_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-06-56/events.out.tfevents.1595135216.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-2/rbf_returns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-2/rbf_returns.png -------------------------------------------------------------------------------- /hw5/results/problem-2/seed11_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-13-38/events.out.tfevents.1594937618.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-2/seed11_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-13-38/events.out.tfevents.1594937618.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-2/seed1_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-10-03/events.out.tfevents.1594937403.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-2/seed1_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-10-03/events.out.tfevents.1594937403.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-2/seed21_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-17-11/events.out.tfevents.1594937831.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-2/seed21_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-17-11/events.out.tfevents.1594937831.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-3/ex2_returns.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-3/ex2_returns.png -------------------------------------------------------------------------------- /hw5/results/problem-3/seed11_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_02-37-10/events.out.tfevents.1595140630.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-3/seed11_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_02-37-10/events.out.tfevents.1595140630.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-3/seed1_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_01-08-12/events.out.tfevents.1595135292.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-3/seed1_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_01-08-12/events.out.tfevents.1595135292.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-3/seed21_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_03-56-28/events.out.tfevents.1595145388.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-3/seed21_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_03-56-28/events.out.tfevents.1595145388.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-4/bc0.0001/seed11_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_19-07-2020_14-18-04/events.out.tfevents.1595182684.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/bc0.0001/seed11_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_19-07-2020_14-18-04/events.out.tfevents.1595182684.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-4/bc0.0001/seed1_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_19-07-2020_01-08-22/events.out.tfevents.1595135302.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/bc0.0001/seed1_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_19-07-2020_01-08-22/events.out.tfevents.1595135302.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-4/bc0.0001/seed21_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_20-07-2020_05-22-16/events.out.tfevents.1595236936.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/bc0.0001/seed21_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_20-07-2020_05-22-16/events.out.tfevents.1595236936.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-4/bc0.001/seed11_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_04-18-46/events.out.tfevents.1595146726.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/bc0.001/seed11_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_04-18-46/events.out.tfevents.1595146726.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-4/bc0.001/seed1_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_01-08-17/events.out.tfevents.1595135297.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/bc0.001/seed1_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_01-08-17/events.out.tfevents.1595135297.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-4/bc0.001/seed21_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_06-40-41/events.out.tfevents.1595155241.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/bc0.001/seed21_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_06-40-41/events.out.tfevents.1595155241.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-4/full_comp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/full_comp.png -------------------------------------------------------------------------------- /hw5/results/problem-4/none/seed11_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_00-57-30/events.out.tfevents.1594875450.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/none/seed11_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_00-57-30/events.out.tfevents.1594875450.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-4/none/seed1_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_00-26-39/events.out.tfevents.1594873599.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/none/seed1_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_00-26-39/events.out.tfevents.1594873599.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/results/problem-4/none/seed21_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_01-28-19/events.out.tfevents.1594877299.DESKTOP-U53KV1A: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/none/seed21_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_01-28-19/events.out.tfevents.1594877299.DESKTOP-U53KV1A -------------------------------------------------------------------------------- /hw5/setup.py: -------------------------------------------------------------------------------- 1 | # setup.py 2 | from setuptools import setup 3 | 4 | setup( 5 | name='cs285', 6 | version='0.1.0', 7 | packages=['cs285'], 8 | ) --------------------------------------------------------------------------------