├── .gitattributes
├── README.md
├── hw1
    ├── README.md
    ├── README.txt
    ├── cs285
    │   ├── agents
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── base_agent.cpython-37.pyc
    │   │   │   └── bc_agent.cpython-37.pyc
    │   │   └── bc_agent.py
    │   ├── data
    │   │   ├── Q1-2
    │   │   │   ├── bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37
    │   │   │   │   ├── events.out.tfevents.1593151299.DESKTOP-U53KV1A.17880.0
    │   │   │   │   └── policy_itr_0
    │   │   │   ├── bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25
    │   │   │   │   ├── events.out.tfevents.1593151346.DESKTOP-U53KV1A.3080.0
    │   │   │   │   └── policy_itr_0
    │   │   │   ├── bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26
    │   │   │   │   ├── events.out.tfevents.1593151407.DESKTOP-U53KV1A.17880.0
    │   │   │   │   └── policy_itr_0
    │   │   │   ├── bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13
    │   │   │   │   ├── events.out.tfevents.1593151514.DESKTOP-U53KV1A.2796.0
    │   │   │   │   └── policy_itr_0
    │   │   │   └── bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41
    │   │   │   │   ├── events.out.tfevents.1593151663.DESKTOP-U53KV1A.14512.0
    │   │   │   │   └── policy_itr_0
    │   │   ├── Q1-3
    │   │   │   └── bc_test_bc_walker2d_Walker2d-v2_26-06-2020_20-57-08
    │   │   │   │   └── events.out.tfevents.1593219429.DESKTOP-U53KV1A.388.0
    │   │   └── Q2-2
    │   │   │   └── dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29
    │   │   │       ├── events.out.tfevents.1593231811.DESKTOP-U53KV1A.16276.0
    │   │   │       ├── policy_itr_0
    │   │   │       ├── policy_itr_1
    │   │   │       ├── policy_itr_2
    │   │   │       ├── policy_itr_3
    │   │   │       ├── policy_itr_4
    │   │   │       ├── policy_itr_5
    │   │   │       ├── policy_itr_6
    │   │   │       ├── policy_itr_7
    │   │   │       ├── policy_itr_8
    │   │   │       └── policy_itr_9
    │   ├── expert_data
    │   │   ├── expert_data_Ant-v2.pkl
    │   │   ├── expert_data_HalfCheetah-v2.pkl
    │   │   ├── expert_data_Hopper-v2.pkl
    │   │   ├── expert_data_Humanoid-v2.pkl
    │   │   └── expert_data_Walker2d-v2.pkl
    │   ├── infrastructure
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── logger.cpython-37.pyc
    │   │   │   ├── replay_buffer.cpython-37.pyc
    │   │   │   ├── rl_trainer.cpython-37.pyc
    │   │   │   ├── tf_utils.cpython-37.pyc
    │   │   │   └── utils.cpython-37.pyc
    │   │   ├── logger.py
    │   │   ├── replay_buffer.py
    │   │   ├── rl_trainer.py
    │   │   └── utils.py
    │   ├── policies
    │   │   ├── MLP_policy.py
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── MLP_policy.cpython-37.pyc
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── base_policy.cpython-37.pyc
    │   │   │   └── loaded_gaussian_policy.cpython-37.pyc
    │   │   ├── experts
    │   │   │   ├── Ant.pkl
    │   │   │   ├── HalfCheetah.pkl
    │   │   │   ├── Hopper.pkl
    │   │   │   ├── Humanoid.pkl
    │   │   │   └── Walker2d.pkl
    │   │   └── loaded_gaussian_policy.py
    │   └── scripts
    │   │   └── run_hw1_behavior_cloning.py
    ├── cs285_hw1.pdf
    ├── downloads
    │   └── mjpro150
    │   │   ├── bin
    │   │       ├── basic
    │   │       ├── compile
    │   │       ├── derivative
    │   │       ├── libglew.so
    │   │       ├── libglewegl.so
    │   │       ├── libglewosmesa.so
    │   │       ├── libglfw.so.3
    │   │       ├── libmujoco150.so
    │   │       ├── libmujoco150nogl.so
    │   │       ├── record
    │   │       ├── simulate
    │   │       └── test
    │   │   ├── doc
    │   │       ├── README.txt
    │   │       └── REFERENCE.txt
    │   │   ├── include
    │   │       ├── glfw3.h
    │   │       ├── mjdata.h
    │   │       ├── mjmodel.h
    │   │       ├── mjrender.h
    │   │       ├── mjvisualize.h
    │   │       ├── mjxmacro.h
    │   │       └── mujoco.h
    │   │   ├── model
    │   │       ├── humanoid.xml
    │   │       └── humanoid100.xml
    │   │   └── sample
    │   │       ├── basic.cpp
    │   │       ├── compile.cpp
    │   │       ├── derivative.cpp
    │   │       ├── makefile
    │   │       ├── record.cpp
    │   │       ├── simulate.cpp
    │   │       └── test.cpp
    ├── requirements.txt
    ├── results
    │   ├── Q1-2
    │   │   ├── bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37
    │   │   │   ├── events.out.tfevents.1593151299.DESKTOP-U53KV1A.17880.0
    │   │   │   └── policy_itr_0
    │   │   ├── bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25
    │   │   │   ├── events.out.tfevents.1593151346.DESKTOP-U53KV1A.3080.0
    │   │   │   └── policy_itr_0
    │   │   ├── bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26
    │   │   │   ├── events.out.tfevents.1593151407.DESKTOP-U53KV1A.17880.0
    │   │   │   └── policy_itr_0
    │   │   ├── bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13
    │   │   │   ├── events.out.tfevents.1593151514.DESKTOP-U53KV1A.2796.0
    │   │   │   └── policy_itr_0
    │   │   └── bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41
    │   │   │   ├── events.out.tfevents.1593151663.DESKTOP-U53KV1A.14512.0
    │   │   │   └── policy_itr_0
    │   ├── Q1-3
    │   │   ├── bc-eval-avg.PNG
    │   │   ├── bc-eval-std.PNG
    │   │   └── bc_test_bc_walker2d_Walker2d-v2_26-06-2020_20-57-08
    │   │   │   └── events.out.tfevents.1593219429.DESKTOP-U53KV1A.388.0
    │   └── Q2-2
    │   │   ├── dagger-eval-avg.PNG
    │   │   ├── dagger-eval-std.PNG
    │   │   └── dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29
    │   │       ├── events.out.tfevents.1593231811.DESKTOP-U53KV1A.16276.0
    │   │       ├── policy_itr_0
    │   │       ├── policy_itr_1
    │   │       ├── policy_itr_2
    │   │       ├── policy_itr_3
    │   │       ├── policy_itr_4
    │   │       ├── policy_itr_5
    │   │       ├── policy_itr_6
    │   │       ├── policy_itr_7
    │   │       ├── policy_itr_8
    │   │       └── policy_itr_9
    └── setup.py
├── hw2
    ├── README.md
    ├── README.txt
    ├── cs285
    │   ├── agents
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   └── pg_agent.cpython-37.pyc
    │   │   └── pg_agent.py
    │   ├── infrastructure
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── logger.cpython-37.pyc
    │   │   │   ├── replay_buffer.cpython-37.pyc
    │   │   │   ├── rl_trainer.cpython-37.pyc
    │   │   │   └── utils.cpython-37.pyc
    │   │   ├── logger.py
    │   │   ├── replay_buffer.py
    │   │   ├── rl_trainer.py
    │   │   └── utils.py
    │   ├── policies
    │   │   ├── MLP_policy.py
    │   │   ├── __init__.py
    │   │   └── __pycache__
    │   │   │   ├── MLP_policy.cpython-37.pyc
    │   │   │   └── __init__.cpython-37.pyc
    │   └── scripts
    │   │   └── run_hw2_policy_gradient.py
    ├── cs285_hw2.pdf
    ├── requirements.txt
    ├── results
    │   ├── bonus-gae-cp
    │   │   ├── gae_cp_comp.png
    │   │   ├── pg_cp_lambda0.4_CartPole-v0_28-07-2020_00-18-04
    │   │   │   └── events.out.tfevents.1595909884.DESKTOP-U53KV1A
    │   │   ├── pg_cp_lambda0.8_CartPole-v0_28-07-2020_00-17-56
    │   │   │   └── events.out.tfevents.1595909876.DESKTOP-U53KV1A
    │   │   ├── pg_cp_lambda0.95_CartPole-v0_28-07-2020_00-17-50
    │   │   │   └── events.out.tfevents.1595909870.DESKTOP-U53KV1A
    │   │   └── pg_cp_lambda1_CartPole-v0_28-07-2020_00-17-41
    │   │   │   └── events.out.tfevents.1595909861.DESKTOP-U53KV1A
    │   ├── bonus-gae
    │   │   ├── gae_hc_comp.png
    │   │   ├── pg_lambda0.4_HalfCheetah-v2_27-07-2020_21-59-54
    │   │   │   └── events.out.tfevents.1595901594.DESKTOP-U53KV1A
    │   │   ├── pg_lambda0.8_HalfCheetah-v2_27-07-2020_19-21-50
    │   │   │   └── events.out.tfevents.1595892110.DESKTOP-U53KV1A
    │   │   ├── pg_lambda0.95_HalfCheetah-v2_27-07-2020_19-21-45
    │   │   │   └── events.out.tfevents.1595892105.DESKTOP-U53KV1A
    │   │   ├── pg_lambda0.975_HalfCheetah-v2_27-07-2020_19-21-43
    │   │   │   └── events.out.tfevents.1595892103.DESKTOP-U53KV1A
    │   │   ├── pg_lambda0.99_HalfCheetah-v2_27-07-2020_21-59-27
    │   │   │   └── events.out.tfevents.1595901567.DESKTOP-U53KV1A
    │   │   ├── pg_lambda0.9_HalfCheetah-v2_27-07-2020_21-59-37
    │   │   │   └── events.out.tfevents.1595901577.DESKTOP-U53KV1A
    │   │   ├── pg_lambda0_HalfCheetah-v2_27-07-2020_19-21-48
    │   │   │   └── events.out.tfevents.1595892108.DESKTOP-U53KV1A
    │   │   └── pg_lambda1_HalfCheetah-v2_27-07-2020_21-59-20
    │   │   │   └── events.out.tfevents.1595901560.DESKTOP-U53KV1A
    │   ├── problem-3-lb
    │   │   ├── eval-avg-cp-lb.png
    │   │   ├── pg_lb_no_rtg_dna_CartPole-v0_29-06-2020_00-52-35
    │   │   │   └── events.out.tfevents.1593406355.DESKTOP-U53KV1A
    │   │   ├── pg_lb_rtg_dna_CartPole-v0_29-06-2020_00-52-41
    │   │   │   └── events.out.tfevents.1593406361.DESKTOP-U53KV1A
    │   │   └── pg_lb_rtg_na_CartPole-v0_29-06-2020_00-52-47
    │   │   │   └── events.out.tfevents.1593406367.DESKTOP-U53KV1A
    │   ├── problem-3-sb
    │   │   ├── eval-avg-cp-sb.png
    │   │   ├── pg_sb_no_rtg_dna_CartPole-v0_29-06-2020_00-45-57
    │   │   │   └── events.out.tfevents.1593405957.DESKTOP-U53KV1A
    │   │   ├── pg_sb_rtg_dna_CartPole-v0_29-06-2020_00-46-05
    │   │   │   └── events.out.tfevents.1593405965.DESKTOP-U53KV1A
    │   │   └── pg_sb_rtg_na_CartPole-v0_29-06-2020_00-46-11
    │   │   │   └── events.out.tfevents.1593405971.DESKTOP-U53KV1A
    │   ├── problem-4
    │   │   ├── ideal-params-comparison.png
    │   │   ├── pg_ip_b100_lr0.06_InvertedPendulum-v2_01-07-2020_00-34-14
    │   │   │   └── events.out.tfevents.1593578054.DESKTOP-U53KV1A
    │   │   ├── pg_ip_b80_lr0.06_InvertedPendulum-v2_01-07-2020_00-33-01
    │   │   │   └── events.out.tfevents.1593577981.DESKTOP-U53KV1A
    │   │   ├── pg_ip_b90_lr0.05_InvertedPendulum-v2_01-07-2020_00-26-36
    │   │   │   └── events.out.tfevents.1593577596.DESKTOP-U53KV1A
    │   │   ├── pg_ip_b90_lr0.06_InvertedPendulum-v2_01-07-2020_00-27-32
    │   │   │   └── events.out.tfevents.1593577652.DESKTOP-U53KV1A
    │   │   └── pg_ip_b90_lr0.07_InvertedPendulum-v2_01-07-2020_00-28-39
    │   │   │   └── events.out.tfevents.1593577719.DESKTOP-U53KV1A
    │   ├── problem-6
    │   │   ├── eval-avg-ll.png
    │   │   └── pg_ll_b40000_r0.005_LunarLanderContinuous-v2_29-06-2020_21-30-30
    │   │   │   └── events.out.tfevents.1593480630.DESKTOP-U53KV1A
    │   ├── problem-7-ideal
    │   │   ├── cheetah-ideal-eval-avg.png
    │   │   ├── pg_hc_b30000_lr0.02_HalfCheetah-v2_30-06-2020_20-07-51
    │   │   │   └── events.out.tfevents.1593562071.DESKTOP-U53KV1A
    │   │   ├── pg_hc_nnb_b30000_lr0.02_HalfCheetah-v2_30-06-2020_22-15-46
    │   │   │   └── events.out.tfevents.1593569746.DESKTOP-U53KV1A
    │   │   ├── pg_hc_rtg_b30000_lr0.02_HalfCheetah-v2_30-06-2020_20-44-51
    │   │   │   └── events.out.tfevents.1593564291.DESKTOP-U53KV1A
    │   │   └── pg_hc_rtg_nnb_b30000_lr0.02_HalfCheetah-v2_30-06-2020_21-19-20
    │   │   │   └── events.out.tfevents.1593566360.DESKTOP-U53KV1A
    │   └── problem-7-search
    │   │   ├── cheetah-search-eval-avg.png
    │   │   ├── pg_hc_b10000_lr0.005_nnbaseline_HalfCheetah-v2_30-06-2020_01-48-46
    │   │       └── events.out.tfevents.1593496126.DESKTOP-U53KV1A
    │   │   ├── pg_hc_b10000_lr0.01_nnbaseline_HalfCheetah-v2_30-06-2020_00-21-43
    │   │       └── events.out.tfevents.1593490903.DESKTOP-U53KV1A
    │   │   ├── pg_hc_b10000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_00-37-16
    │   │       └── events.out.tfevents.1593491836.DESKTOP-U53KV1A
    │   │   ├── pg_hc_b30000_lr0.005_nnbaseline_HalfCheetah-v2_30-06-2020_00-51-38
    │   │       └── events.out.tfevents.1593492698.DESKTOP-U53KV1A
    │   │   ├── pg_hc_b30000_lr0.01_nnbaseline_HalfCheetah-v2_30-06-2020_02-09-33
    │   │       └── events.out.tfevents.1593497373.DESKTOP-U53KV1A
    │   │   └── pg_hc_b30000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_11-45-56
    │   │       └── events.out.tfevents.1593531956.DESKTOP-U53KV1A
    └── setup.py
├── hw3
    ├── README.md
    ├── README.txt
    ├── cs285
    │   ├── agents
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── ac_agent.cpython-37.pyc
    │   │   │   └── dqn_agent.cpython-37.pyc
    │   │   ├── ac_agent.py
    │   │   ├── dqn_agent.py
    │   │   └── pg_agent.py
    │   ├── critics
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── bootstrapped_continuous_critic.cpython-37.pyc
    │   │   │   └── dqn_critic.cpython-37.pyc
    │   │   ├── bootstrapped_continuous_critic.py
    │   │   └── dqn_critic.py
    │   ├── infrastructure
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── atari_wrappers.cpython-37.pyc
    │   │   │   ├── dqn_utils.cpython-37.pyc
    │   │   │   ├── logger.cpython-37.pyc
    │   │   │   ├── models.cpython-37.pyc
    │   │   │   ├── replay_buffer.cpython-37.pyc
    │   │   │   ├── rl_trainer.cpython-37.pyc
    │   │   │   └── utils.cpython-37.pyc
    │   │   ├── atari_wrappers.py
    │   │   ├── dqn_utils.py
    │   │   ├── logger.py
    │   │   ├── models.py
    │   │   ├── replay_buffer.py
    │   │   ├── rl_trainer.py
    │   │   └── utils.py
    │   ├── policies
    │   │   ├── MLP_policy.py
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── MLP_policy.cpython-37.pyc
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   └── argmax_policy.cpython-37.pyc
    │   │   └── argmax_policy.py
    │   └── scripts
    │   │   ├── run_hw3_actor_critic.py
    │   │   └── run_hw3_dqn.py
    ├── cs285_hw3.pdf
    ├── lunar_lander.py
    ├── requirements.txt
    ├── results
    │   ├── Q1
    │   │   ├── dqn_double_q_test_pong_PongNoFrameskip-v4_06-07-2020_01-47-43
    │   │   │   ├── events.out.tfevents.1594014463.DESKTOP-U53KV1A
    │   │   │   └── openaigym.video.0.15504.video004000.mp4
    │   │   ├── dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39
    │   │   │   ├── events.out.tfevents.1593891399.DESKTOP-U53KV1A
    │   │   │   └── openaigym.video.0.15892.video004000.mp4
    │   │   └── pong-comp.png
    │   ├── Q2
    │   │   ├── double_dqn
    │   │   │   ├── dqn_double_q_double_dqn_ll_seed1_LunarLander-v2_04-07-2020_21-47-22
    │   │   │   │   └── events.out.tfevents.1593913642.DESKTOP-U53KV1A
    │   │   │   ├── dqn_double_q_double_dqn_ll_seed2_LunarLander-v2_04-07-2020_21-47-30
    │   │   │   │   └── events.out.tfevents.1593913650.DESKTOP-U53KV1A
    │   │   │   └── dqn_double_q_double_dqn_ll_seed3_LunarLander-v2_04-07-2020_21-47-38
    │   │   │   │   └── events.out.tfevents.1593913658.DESKTOP-U53KV1A
    │   │   ├── double_dqn_eval_ll.png
    │   │   └── dqn
    │   │   │   ├── dqn_dqn_ll_seed1_LunarLander-v2_04-07-2020_21-46-58
    │   │   │       └── events.out.tfevents.1593913618.DESKTOP-U53KV1A
    │   │   │   ├── dqn_dqn_ll_seed2_LunarLander-v2_04-07-2020_21-47-06
    │   │   │       └── events.out.tfevents.1593913626.DESKTOP-U53KV1A
    │   │   │   └── dqn_dqn_ll_seed3_LunarLander-v2_04-07-2020_21-47-15
    │   │   │       └── events.out.tfevents.1593913635.DESKTOP-U53KV1A
    │   ├── Q3-init
    │   │   ├── dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39
    │   │   │   ├── events.out.tfevents.1593891399.DESKTOP-U53KV1A
    │   │   │   └── openaigym.video.0.15892.video004000.mp4
    │   │   ├── dqn_test_pong_PongNoFrameskip-v4_06-07-2020_01-46-09
    │   │   │   ├── events.out.tfevents.1594014369.DESKTOP-U53KV1A
    │   │   │   └── openaigym.video.0.14904.video005000.mp4
    │   │   └── init_pong_comp.png
    │   ├── Q3-lrmult
    │   │   ├── dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39
    │   │   │   ├── events.out.tfevents.1593891399.DESKTOP-U53KV1A
    │   │   │   └── openaigym.video.0.15892.video004000.mp4
    │   │   ├── dqn_test_pong_lrmult0.5_PongNoFrameskip-v4_06-07-2020_01-51-13
    │   │   │   ├── events.out.tfevents.1594014673.DESKTOP-U53KV1A
    │   │   │   └── openaigym.video.0.12644.video004000.mp4
    │   │   ├── dqn_test_pong_lrmult2.0_PongNoFrameskip-v4_06-07-2020_01-51-32
    │   │   │   ├── events.out.tfevents.1594014692.DESKTOP-U53KV1A
    │   │   │   └── openaigym.video.0.9872.video004000.mp4
    │   │   └── lrmult_pong_comp.png
    │   ├── Q4-1-100
    │   │   ├── 5-seed-1-100.png
    │   │   ├── ac_1_100_CartPole-v0_05-07-2020_14-59-37
    │   │   │   └── events.out.tfevents.1593975577.DESKTOP-U53KV1A
    │   │   ├── ac_1_100_CartPole-v0_05-07-2020_16-14-01
    │   │   │   └── events.out.tfevents.1593980041.DESKTOP-U53KV1A
    │   │   ├── ac_1_100_CartPole-v0_05-07-2020_16-20-04
    │   │   │   └── events.out.tfevents.1593980404.DESKTOP-U53KV1A
    │   │   ├── ac_1_100_CartPole-v0_05-07-2020_16-23-26
    │   │   │   └── events.out.tfevents.1593980606.DESKTOP-U53KV1A
    │   │   └── ac_1_100_CartPole-v0_05-07-2020_16-27-20
    │   │   │   └── events.out.tfevents.1593980840.DESKTOP-U53KV1A
    │   ├── Q4-10-10
    │   │   ├── 5-seed-10-10.png
    │   │   ├── ac_10_10_CartPole-v0_05-07-2020_15-12-48
    │   │   │   └── events.out.tfevents.1593976368.DESKTOP-U53KV1A
    │   │   ├── ac_10_10_CartPole-v0_05-07-2020_15-47-19
    │   │   │   └── events.out.tfevents.1593978439.DESKTOP-U53KV1A
    │   │   ├── ac_10_10_CartPole-v0_05-07-2020_15-49-24
    │   │   │   └── events.out.tfevents.1593978564.DESKTOP-U53KV1A
    │   │   ├── ac_10_10_CartPole-v0_05-07-2020_15-59-55
    │   │   │   └── events.out.tfevents.1593979195.DESKTOP-U53KV1A
    │   │   └── ac_10_10_CartPole-v0_05-07-2020_16-02-28
    │   │   │   └── events.out.tfevents.1593979348.DESKTOP-U53KV1A
    │   ├── Q4
    │   │   ├── ac_100_1_CartPole-v0_05-07-2020_14-54-21
    │   │   │   └── events.out.tfevents.1593975261.DESKTOP-U53KV1A
    │   │   ├── ac_10_10_CartPole-v0_05-07-2020_15-12-48
    │   │   │   └── events.out.tfevents.1593976368.DESKTOP-U53KV1A
    │   │   ├── ac_1_100_CartPole-v0_05-07-2020_14-59-37
    │   │   │   └── events.out.tfevents.1593975577.DESKTOP-U53KV1A
    │   │   ├── ac_1_1_CartPole-v0_05-07-2020_14-51-47
    │   │   │   └── events.out.tfevents.1593975107.DESKTOP-U53KV1A
    │   │   └── q4-comp.png
    │   ├── Q5-HC
    │   │   ├── ac_10_10_HalfCheetah-v2_05-07-2020_18-28-51
    │   │   │   └── events.out.tfevents.1593988131.DESKTOP-U53KV1A
    │   │   ├── hc_comp.png
    │   │   └── pg_hc_b30000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_11-45-56
    │   │   │   └── events.out.tfevents.1593531956.DESKTOP-U53KV1A
    │   └── Q5-IP
    │   │   ├── IP_ac_eval.png
    │   │   ├── ac_10_10_InvertedPendulum-v2_05-07-2020_18-28-19
    │   │       └── events.out.tfevents.1593988099.DESKTOP-U53KV1A
    │   │   └── ac_b30k_10_10_InvertedPendulum-v2_05-07-2020_19-45-39
    │   │       └── events.out.tfevents.1593992739.DESKTOP-U53KV1A
    └── setup.py
├── hw4
    ├── README.md
    ├── README.txt
    ├── cs285
    │   ├── agents
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   └── mb_agent.cpython-37.pyc
    │   │   └── mb_agent.py
    │   ├── envs
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   └── __init__.cpython-37.pyc
    │   │   ├── ant
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-35.pyc
    │   │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   │   ├── ant.cpython-35.pyc
    │   │   │   │   └── ant.cpython-37.pyc
    │   │   │   └── ant.py
    │   │   ├── cheetah
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-35.pyc
    │   │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   │   ├── cheetah.cpython-35.pyc
    │   │   │   │   └── cheetah.cpython-37.pyc
    │   │   │   └── cheetah.py
    │   │   ├── obstacles
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-35.pyc
    │   │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   │   ├── obstacles_env.cpython-35.pyc
    │   │   │   │   └── obstacles_env.cpython-37.pyc
    │   │   │   └── obstacles_env.py
    │   │   └── reacher
    │   │   │   ├── __init__.py
    │   │   │   ├── __pycache__
    │   │   │       ├── __init__.cpython-35.pyc
    │   │   │       ├── __init__.cpython-37.pyc
    │   │   │       ├── reacher_env.cpython-35.pyc
    │   │   │       └── reacher_env.cpython-37.pyc
    │   │   │   ├── assets
    │   │   │       └── sawyer.xml
    │   │   │   └── reacher_env.py
    │   ├── infrastructure
    │   │   ├── __init__.py
    │   │   ├── __pycache__
    │   │   │   ├── __init__.cpython-37.pyc
    │   │   │   ├── logger.cpython-37.pyc
    │   │   │   ├── replay_buffer.cpython-37.pyc
    │   │   │   ├── rl_trainer.cpython-37.pyc
    │   │   │   └── utils.cpython-37.pyc
    │   │   ├── logger.py
    │   │   ├── replay_buffer.py
    │   │   ├── rl_trainer.py
    │   │   └── utils.py
    │   ├── models
    │   │   ├── __pycache__
    │   │   │   └── ff_model.cpython-37.pyc
    │   │   └── ff_model.py
    │   ├── policies
    │   │   ├── MPC_policy.py
    │   │   ├── __init__.py
    │   │   └── __pycache__
    │   │   │   ├── MPC_policy.cpython-37.pyc
    │   │   │   └── __init__.cpython-37.pyc
    │   └── scripts
    │   │   └── run_hw4_mb.py
    ├── cs285_hw4.pdf
    ├── results
    │   ├── problem-1
    │   │   ├── n500_arch1x32_losses.png
    │   │   ├── n500_arch1x32_predictions.png
    │   │   ├── n500_arch2x250_losses.png
    │   │   ├── n500_arch2x250_predictions.png
    │   │   ├── n5_arch2x250_losses.png
    │   │   └── n5_arch2x250_predictions.png
    │   ├── problem-2
    │   │   └── mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56
    │   │   │   ├── events.out.tfevents.1594352996.DESKTOP-U53KV1A
    │   │   │   ├── itr_0_losses.npy
    │   │   │   ├── itr_0_losses.png
    │   │   │   └── itr_0_predictions.png
    │   ├── problem-3-cheetah
    │   │   ├── cheetah_returns.png
    │   │   └── mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41
    │   │   │   ├── events.out.tfevents.1594417421.DESKTOP-U53KV1A
    │   │   │   ├── itr_0_losses.npy
    │   │   │   ├── itr_0_losses.png
    │   │   │   └── itr_0_predictions.png
    │   ├── problem-3-obstacles
    │   │   ├── mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49
    │   │   │   ├── events.out.tfevents.1594417429.DESKTOP-U53KV1A
    │   │   │   ├── itr_0_losses.npy
    │   │   │   ├── itr_0_losses.png
    │   │   │   └── itr_0_predictions.png
    │   │   └── obstacles_returns.png
    │   ├── problem-3-reacher
    │   │   ├── mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36
    │   │   │   ├── events.out.tfevents.1594417416.DESKTOP-U53KV1A
    │   │   │   ├── itr_0_losses.npy
    │   │   │   ├── itr_0_losses.png
    │   │   │   └── itr_0_predictions.png
    │   │   └── reacher_returns.png
    │   ├── problem-4-ensemble
    │   │   ├── ensemble_comp.png
    │   │   ├── mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11
    │   │   │   ├── events.out.tfevents.1594364771.DESKTOP-U53KV1A
    │   │   │   ├── itr_0_losses.npy
    │   │   │   ├── itr_0_losses.png
    │   │   │   └── itr_0_predictions.png
    │   │   ├── mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21
    │   │   │   ├── events.out.tfevents.1594364781.DESKTOP-U53KV1A
    │   │   │   ├── itr_0_losses.npy
    │   │   │   ├── itr_0_losses.png
    │   │   │   └── itr_0_predictions.png
    │   │   └── mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33
    │   │   │   ├── events.out.tfevents.1594364793.DESKTOP-U53KV1A
    │   │   │   ├── itr_0_losses.npy
    │   │   │   ├── itr_0_losses.png
    │   │   │   └── itr_0_predictions.png
    │   ├── problem-4-horizon
    │   │   ├── horizon_comp.png
    │   │   ├── mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28
    │   │   │   ├── events.out.tfevents.1594364548.DESKTOP-U53KV1A
    │   │   │   ├── itr_0_losses.npy
    │   │   │   ├── itr_0_losses.png
    │   │   │   └── itr_0_predictions.png
    │   │   ├── mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36
    │   │   │   ├── events.out.tfevents.1594364556.DESKTOP-U53KV1A
    │   │   │   ├── itr_0_losses.npy
    │   │   │   ├── itr_0_losses.png
    │   │   │   └── itr_0_predictions.png
    │   │   └── mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52
    │   │   │   ├── events.out.tfevents.1594364512.DESKTOP-U53KV1A
    │   │   │   ├── itr_0_losses.npy
    │   │   │   ├── itr_0_losses.png
    │   │   │   └── itr_0_predictions.png
    │   └── problem-4-numseq
    │   │   ├── mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23
    │   │       ├── events.out.tfevents.1594364663.DESKTOP-U53KV1A
    │   │       ├── itr_0_losses.npy
    │   │       ├── itr_0_losses.png
    │   │       └── itr_0_predictions.png
    │   │   ├── mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40
    │   │       ├── events.out.tfevents.1594364620.DESKTOP-U53KV1A
    │   │       ├── itr_0_losses.npy
    │   │       ├── itr_0_losses.png
    │   │       └── itr_0_predictions.png
    │   │   └── numseq_comp.png
    └── setup.py
└── hw5
    ├── README.md
    ├── README.txt
    ├── cs285
        ├── agents
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-37.pyc
        │   │   └── ac_agent.cpython-37.pyc
        │   └── ac_agent.py
        ├── critics
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-37.pyc
        │   │   └── bootstrapped_continuous_critic.cpython-37.pyc
        │   └── bootstrapped_continuous_critic.py
        ├── envs
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-37.pyc
        │   │   ├── pointmass.cpython-37.pyc
        │   │   └── sparse_half_cheetah.cpython-37.pyc
        │   ├── pointmass.py
        │   └── sparse_half_cheetah.py
        ├── exploration
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-37.pyc
        │   │   ├── density_model.cpython-37.pyc
        │   │   └── exploration.cpython-37.pyc
        │   ├── density_model.py
        │   └── exploration.py
        ├── infrastructure
        │   ├── __init__.py
        │   ├── __pycache__
        │   │   ├── __init__.cpython-37.pyc
        │   │   ├── logger.cpython-37.pyc
        │   │   ├── replay.cpython-37.pyc
        │   │   ├── replay_buffer.cpython-37.pyc
        │   │   ├── rl_trainer.cpython-37.pyc
        │   │   └── utils.cpython-37.pyc
        │   ├── logger.py
        │   ├── replay_buffer.py
        │   ├── rl_trainer.py
        │   └── utils.py
        ├── policies
        │   ├── MLP_policy.py
        │   ├── __init__.py
        │   └── __pycache__
        │   │   ├── MLP_policy.cpython-37.pyc
        │   │   └── __init__.cpython-37.pyc
        └── scripts
        │   └── train_ac_exploration_f18.py
    ├── cs285_hw5.pdf
    ├── requirements.txt
    ├── results
        ├── problem-1-hist
        │   ├── hist_returns.png
        │   ├── seed11_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-11-30
        │   │   └── events.out.tfevents.1595135490.DESKTOP-U53KV1A
        │   ├── seed1_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-06-41
        │   │   └── events.out.tfevents.1595135201.DESKTOP-U53KV1A
        │   └── seed21_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-17-01
        │   │   └── events.out.tfevents.1595135821.DESKTOP-U53KV1A
        ├── problem-1-none
        │   ├── no_exploration_returns.png
        │   ├── seed11_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-05-20
        │   │   └── events.out.tfevents.1595135120.DESKTOP-U53KV1A
        │   ├── seed1_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-03-45
        │   │   └── events.out.tfevents.1595135025.DESKTOP-U53KV1A
        │   └── seed21_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-06-56
        │   │   └── events.out.tfevents.1595135216.DESKTOP-U53KV1A
        ├── problem-2
        │   ├── rbf_returns.png
        │   ├── seed11_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-13-38
        │   │   └── events.out.tfevents.1594937618.DESKTOP-U53KV1A
        │   ├── seed1_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-10-03
        │   │   └── events.out.tfevents.1594937403.DESKTOP-U53KV1A
        │   └── seed21_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-17-11
        │   │   └── events.out.tfevents.1594937831.DESKTOP-U53KV1A
        ├── problem-3
        │   ├── ex2_returns.png
        │   ├── seed11_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_02-37-10
        │   │   └── events.out.tfevents.1595140630.DESKTOP-U53KV1A
        │   ├── seed1_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_01-08-12
        │   │   └── events.out.tfevents.1595135292.DESKTOP-U53KV1A
        │   └── seed21_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_03-56-28
        │   │   └── events.out.tfevents.1595145388.DESKTOP-U53KV1A
        └── problem-4
        │   ├── bc0.0001
        │       ├── seed11_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_19-07-2020_14-18-04
        │       │   └── events.out.tfevents.1595182684.DESKTOP-U53KV1A
        │       ├── seed1_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_19-07-2020_01-08-22
        │       │   └── events.out.tfevents.1595135302.DESKTOP-U53KV1A
        │       └── seed21_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_20-07-2020_05-22-16
        │       │   └── events.out.tfevents.1595236936.DESKTOP-U53KV1A
        │   ├── bc0.001
        │       ├── seed11_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_04-18-46
        │       │   └── events.out.tfevents.1595146726.DESKTOP-U53KV1A
        │       ├── seed1_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_01-08-17
        │       │   └── events.out.tfevents.1595135297.DESKTOP-U53KV1A
        │       └── seed21_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_06-40-41
        │       │   └── events.out.tfevents.1595155241.DESKTOP-U53KV1A
        │   ├── full_comp.png
        │   └── none
        │       ├── seed11_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_00-57-30
        │           └── events.out.tfevents.1594875450.DESKTOP-U53KV1A
        │       ├── seed1_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_00-26-39
        │           └── events.out.tfevents.1594873599.DESKTOP-U53KV1A
        │       └── seed21_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_01-28-19
        │           └── events.out.tfevents.1594877299.DESKTOP-U53KV1A
    └── setup.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | hw1/downloads/* linguist-detectable=false
2 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # UC Berkeley Deep RL Pytorch Solutions
2 | 
3 | Pytorch solutions for [UC Berkeley's CS285 Deep RL course](http://rail.eecs.berkeley.edu/deeprlcourse/). If you wish to complete the assignments yourself, a [pytorch version](https://github.com/mdeib/berkeley-deep-RL-pytorch-starter) of the [official starter code](https://github.com/berkeleydeeprlcourse/homework_fall2019) has also been made.
4 | 
5 | While these solutions have produced reasonable results be aware that there may still be small bugs in the code and/or the solutions.
6 | 


--------------------------------------------------------------------------------
/hw1/README.md:
--------------------------------------------------------------------------------
 1 | # Section 1 Behavior Cloning
 2 | 
 3 | Below is the HW1 report. All data used can be found in the results folder - videos aren't included to save space. To view the tensorboard for a specific part navigate to that part's folder (not the subfolders) and run 
 4 | ```commandline
 5 | tensorboard --logdir .
 6 | ```
 7 | 
 8 | ## Question 1.2
 9 | 
10 | The agent was trained on 10,000 steps of expert behavior in each environment. It was then evaluated for >10,000 steps to get an accurate mean performance. The agent itself had an MLP policy consisting of 2 hidden layers of 64 neurons each. 
11 | 
12 | | Environment |      Expert      | Behavioral Cloning | Mean Percent Performance |
13 | |-------------|:----------------:|:------------------:|:------------------------:|
14 | | Ant         |  4713.65 ± 12.2  |   4696.46 ± 90.39  |          99.64%          |
15 | | HalfCheetah |  4205.78 ± 83.04 |  3521.82 ± 181.00  |          83.74%          |
16 | | Hopper      |  3772.67 ± 1.95  |   660.8 ± 348.67   |          17.52%          |
17 | | Humanoid    | 10344.52 ± 20.98 |   414.05 ± 105.71  |           4.00%          |
18 | | Walker2d    |  5566.84 ± 9.24  |    60.96 ± 94.77   |           1.10%          |
19 | 
20 | It can be seen that the agent achieved >30% performance in both the ant and the half chettah environment. It failed to reach this benchmark in the other three environments. These environments seem to be harder for behavioral cloning, requiring more training to reach a comparable level of performance.
21 | 
22 | ## Question 1.3
23 | 
24 | The agent in the Walker2d environment was only able to achieve 1.1% expert performance after 10,000 steps. It seems likely that it could do better with more training. For this question we will graph evaluation performance as a function of training steps. A data point was taken every 2000 training steps, and it was trained for a total of 100,000 steps. The mean returns throughout training are shown below:
25 | 
26 | ![Evaluation Average](results/Q1-3/bc-eval-avg.PNG)
27 | 
28 | It can be seen that the agent was able to improve greatly with more training updates, reaching almost 90 percent of the expert performance. Also notable is the significant initial time it took to actually begin performing.
29 | 
30 | ![Evaluation Standard Deviation](results/Q1-3/bc-eval-std.PNG)
31 | 
32 | While average performance seems to be quite good, the standard deviation over the course of training is a bit more telling, as is the min/max returns. The agent continues to have trials where it makes a mistake and is unable to recover, resulting in a terrible rollout and a large standard deviation. If the agent was really learning to perform well in the environment we would see the standard deviation fall as it begins to consistently do well. This perfectly illustrates the weaknesses of behavioral cloning, and leads into question 2.2.
33 | 
34 | ## Question 2.2
35 | 
36 | For this question dagger learning was done on the Walker2d environment used in question 1.3. In the first 10k steps behavioral cloning was done, after which 9 iterations of dagger were carried out. Thus a total of 100k training steps were done, just like in question 1.3. All other things were kept the same. This allows the usage of dagger to be fairly tested. The average returns are below:
37 | 
38 | ![Evaluation Average](results/Q2-2/dagger-eval-avg.PNG)
39 | 
40 | It can be seen that using dagger instead of training behavioral cloning further yielded better average returns. This is good but the real test is the standard deviation:
41 | 
42 | ![Evaluation Standard Deviation](results/Q2-2/dagger-eval-std.PNG)
43 | 
44 | Unlike in question 1.3 the standard deviation drops dramatically as more dagger iterations are done. This shows that dagger has taught the agent to actually correct its mistakes, instead of failing as soon as it deviates slightly from the experts path. Thus dagger is shown to provide agent robustness that pure behavioral cloning fails to give.
45 | 


--------------------------------------------------------------------------------
/hw1/cs285/agents/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making folder a package
2 | 


--------------------------------------------------------------------------------
/hw1/cs285/agents/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/agents/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw1/cs285/agents/__pycache__/base_agent.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/agents/__pycache__/base_agent.cpython-37.pyc


--------------------------------------------------------------------------------
/hw1/cs285/agents/__pycache__/bc_agent.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/agents/__pycache__/bc_agent.cpython-37.pyc


--------------------------------------------------------------------------------
/hw1/cs285/agents/bc_agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import time
 3 | 
 4 | from cs285.policies.MLP_policy import *
 5 | from cs285.infrastructure.replay_buffer import ReplayBuffer
 6 | from cs285.infrastructure.utils import *
 7 | 
 8 | class BCAgent:
 9 |     def __init__(self, env, agent_params):
10 |         # init vars
11 |         self.env = env
12 |         self.agent_params = agent_params
13 | 
14 |         # actor/policy
15 |         self.actor = MLPPolicySL(self.agent_params['ac_dim'],
16 |                                self.agent_params['ob_dim'],
17 |                                self.agent_params['n_layers'],
18 |                                self.agent_params['size'],
19 |                                self.agent_params['device'],
20 |                                discrete = self.agent_params['discrete'],
21 |                                learning_rate = self.agent_params['learning_rate'],
22 |                                ) ## TODO: look in here and implement this
23 | 
24 |         # replay buffer
25 |         self.replay_buffer = ReplayBuffer(self.agent_params['max_replay_buffer_size'])
26 | 
27 |     def train(self, ob_no, ac_na, re_n, next_ob_no, terminal_n):
28 |         # training a BC agent refers to updating its actor using
29 |         # the given observations and corresponding action labels
30 |         self.actor.update(ob_no, ac_na) ## TODO: look in here and implement this
31 | 
32 |     def add_to_replay_buffer(self, paths):
33 |         self.replay_buffer.add_rollouts(paths)
34 | 
35 |     def sample(self, batch_size):
36 |         return self.replay_buffer.sample_random_data(batch_size) ## TODO: look in here and implement this
37 | 


--------------------------------------------------------------------------------
/hw1/cs285/data/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/events.out.tfevents.1593151299.DESKTOP-U53KV1A.17880.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/events.out.tfevents.1593151299.DESKTOP-U53KV1A.17880.0


--------------------------------------------------------------------------------
/hw1/cs285/data/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/policy_itr_0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/policy_itr_0


--------------------------------------------------------------------------------
/hw1/cs285/data/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/events.out.tfevents.1593151346.DESKTOP-U53KV1A.3080.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/events.out.tfevents.1593151346.DESKTOP-U53KV1A.3080.0


--------------------------------------------------------------------------------
/hw1/cs285/data/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/policy_itr_0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/policy_itr_0


--------------------------------------------------------------------------------
/hw1/cs285/data/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/events.out.tfevents.1593151407.DESKTOP-U53KV1A.17880.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/events.out.tfevents.1593151407.DESKTOP-U53KV1A.17880.0


--------------------------------------------------------------------------------
/hw1/cs285/data/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/policy_itr_0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/policy_itr_0


--------------------------------------------------------------------------------
/hw1/cs285/data/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/events.out.tfevents.1593151514.DESKTOP-U53KV1A.2796.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/events.out.tfevents.1593151514.DESKTOP-U53KV1A.2796.0


--------------------------------------------------------------------------------
/hw1/cs285/data/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/policy_itr_0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/policy_itr_0


--------------------------------------------------------------------------------
/hw1/cs285/data/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/events.out.tfevents.1593151663.DESKTOP-U53KV1A.14512.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/events.out.tfevents.1593151663.DESKTOP-U53KV1A.14512.0


--------------------------------------------------------------------------------
/hw1/cs285/data/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/policy_itr_0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/policy_itr_0


--------------------------------------------------------------------------------
/hw1/cs285/data/Q1-3/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_20-57-08/events.out.tfevents.1593219429.DESKTOP-U53KV1A.388.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q1-3/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_20-57-08/events.out.tfevents.1593219429.DESKTOP-U53KV1A.388.0


--------------------------------------------------------------------------------
/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/events.out.tfevents.1593231811.DESKTOP-U53KV1A.16276.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/events.out.tfevents.1593231811.DESKTOP-U53KV1A.16276.0


--------------------------------------------------------------------------------
/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_0


--------------------------------------------------------------------------------
/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_1


--------------------------------------------------------------------------------
/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_2


--------------------------------------------------------------------------------
/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_3


--------------------------------------------------------------------------------
/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_4


--------------------------------------------------------------------------------
/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_5


--------------------------------------------------------------------------------
/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_6:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_6


--------------------------------------------------------------------------------
/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_7:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_7


--------------------------------------------------------------------------------
/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_8:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_8


--------------------------------------------------------------------------------
/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_9:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/data/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_9


--------------------------------------------------------------------------------
/hw1/cs285/expert_data/expert_data_Ant-v2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/expert_data/expert_data_Ant-v2.pkl


--------------------------------------------------------------------------------
/hw1/cs285/expert_data/expert_data_HalfCheetah-v2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/expert_data/expert_data_HalfCheetah-v2.pkl


--------------------------------------------------------------------------------
/hw1/cs285/expert_data/expert_data_Hopper-v2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/expert_data/expert_data_Hopper-v2.pkl


--------------------------------------------------------------------------------
/hw1/cs285/expert_data/expert_data_Humanoid-v2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/expert_data/expert_data_Humanoid-v2.pkl


--------------------------------------------------------------------------------
/hw1/cs285/expert_data/expert_data_Walker2d-v2.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/expert_data/expert_data_Walker2d-v2.pkl


--------------------------------------------------------------------------------
/hw1/cs285/infrastructure/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making folder a package
2 | 


--------------------------------------------------------------------------------
/hw1/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw1/cs285/infrastructure/__pycache__/logger.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/infrastructure/__pycache__/logger.cpython-37.pyc


--------------------------------------------------------------------------------
/hw1/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc


--------------------------------------------------------------------------------
/hw1/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc


--------------------------------------------------------------------------------
/hw1/cs285/infrastructure/__pycache__/tf_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/infrastructure/__pycache__/tf_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/hw1/cs285/infrastructure/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/infrastructure/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/hw1/cs285/infrastructure/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from torch.utils.tensorboard import SummaryWriter
 4 | import numpy as np
 5 | 
 6 | class Logger:
 7 |     def __init__(self, log_dir, n_logged_samples=10, summary_writer=None):
 8 |         self._log_dir = log_dir
 9 |         print('########################')
10 |         print('logging outputs to ', log_dir)
11 |         print('########################')
12 |         self._n_logged_samples = n_logged_samples
13 |         self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1)
14 | 
15 |     def log_scalar(self, scalar, name, step_):
16 |         self._summ_writer.add_scalar('{}'.format(name), scalar, step_)
17 | 
18 |     def log_scalars(self, scalar_dict, group_name, step, phase):
19 |         """Will log all scalars in the same plot."""
20 |         self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step)
21 | 
22 |     def log_image(self, image, name, step):
23 |         assert(len(image.shape) == 3)  # [C, H, W]
24 |         self._summ_writer.add_image('{}'.format(name), image, step)
25 | 
26 |     def log_video(self, video_frames, name, step, fps=10):
27 |         assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!"
28 |         self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps)
29 | 
30 |     def log_paths_as_videos(self, paths, step, max_videos_to_save=2, fps=10, video_title='video'):
31 | 
32 |         # reshape the rollouts
33 |         videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths]
34 | 
35 |         # max rollout length
36 |         max_videos_to_save = np.min([max_videos_to_save, len(videos)])
37 |         max_length = videos[0].shape[0]
38 |         for i in range(max_videos_to_save):
39 |             if videos[i].shape[0]>max_length:
40 |                 max_length = videos[i].shape[0]
41 | 
42 |         # pad rollouts to all be same length
43 |         for i in range(max_videos_to_save):
44 |             if videos[i].shape[0]<max_length:
45 |                 padding = np.tile([videos[i][-1]], (max_length-videos[i].shape[0],1,1,1))
46 |                 videos[i] = np.concatenate([videos[i], padding], 0)
47 | 
48 |         # log videos to tensorboard event file
49 |         print("Logging videos")
50 |         videos = np.stack(videos[:max_videos_to_save], 0)
51 |         self.log_video(videos, video_title, step, fps=fps)
52 | 
53 |     def log_figures(self, figure, name, step, phase):
54 |         """figure: matplotlib.pyplot figure handle"""
55 |         assert figure.shape[0] > 0, "Figure logging requires input shape [batch x figures]!"
56 |         self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
57 | 
58 |     def log_figure(self, figure, name, step, phase):
59 |         """figure: matplotlib.pyplot figure handle"""
60 |         self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
61 | 
62 |     def log_graph(self, array, name, step, phase):
63 |         """figure: matplotlib.pyplot figure handle"""
64 |         im = plot_graph(array)
65 |         self._summ_writer.add_image('{}_{}'.format(name, phase), im, step)
66 | 
67 |     def dump_scalars(self, log_path=None):
68 |         log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path
69 |         self._summ_writer.export_scalars_to_json(log_path)
70 | 
71 |     def flush(self):
72 |         self._summ_writer.flush()
73 | 


--------------------------------------------------------------------------------
/hw1/cs285/infrastructure/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import numpy as np
 3 | import gym
 4 | import os
 5 | 
 6 | from cs285.infrastructure.utils import *
 7 | 
 8 | class ReplayBuffer(object):
 9 | 
10 |     def __init__(self, max_size=1000000):
11 | 
12 |         self.max_size = max_size
13 | 
14 |         # store each rollout
15 |         self.paths = []
16 | 
17 |         # store (concatenated) component arrays from each rollout
18 |         self.obs = None
19 |         self.acs = None
20 |         self.rews = None
21 |         self.next_obs = None
22 |         self.terminals = None
23 | 
24 |     def __len__(self):
25 |         if self.obs is not None:
26 |             return self.obs.shape[0]
27 |         else:
28 |             return 0
29 | 
30 |     def add_rollouts(self, paths, concat_rew=True):
31 | 
32 |         # add new rollouts into our list of rollouts
33 |         for path in paths:
34 |             self.paths.append(path)
35 | 
36 |         # convert new rollouts into their component arrays, and append them onto our arrays
37 |         observations, actions, rewards, next_observations, terminals = convert_listofrollouts(paths, concat_rew)
38 | 
39 |         if self.obs is None:
40 |             self.obs = observations[-self.max_size:]
41 |             self.acs = actions[-self.max_size:]
42 |             self.rews = rewards[-self.max_size:]
43 |             self.next_obs = next_observations[-self.max_size:]
44 |             self.terminals = terminals[-self.max_size:]
45 |         else:
46 |             self.obs = np.concatenate([self.obs, observations])[-self.max_size:]
47 |             self.acs = np.concatenate([self.acs, actions])[-self.max_size:]
48 |             if concat_rew:
49 |                 self.rews = np.concatenate([self.rews, rewards])[-self.max_size:]
50 |             else:
51 |                 if isinstance(rewards, list):
52 |                     self.rews += rewards
53 |                 else:
54 |                     self.rews.append(rewards)
55 |                 self.rews = self.rews[-self.max_size:]
56 |             self.next_obs = np.concatenate([self.next_obs, next_observations])[-self.max_size:]
57 |             self.terminals = np.concatenate([self.terminals, terminals])[-self.max_size:]
58 | 
59 |     ########################################
60 |     ########################################
61 | 
62 |     def sample_random_data(self, batch_size):
63 |         assert self.obs.shape[0] == self.acs.shape[0] == self.rews.shape[0] == self.next_obs.shape[0] == self.terminals.shape[0]
64 | 
65 |         ## TODO return batch_size number of random entries from each of the 5 component arrays above
66 |         ## HINT 1: use np.random.permutation to sample random indices
67 |         ## HINT 2: return corresponding data points from each array (i.e., not different indices from each array)
68 |         ## HINT 3: look at the sample_recent_data function below
69 | 
70 |         indices = np.random.permutation(len(self))[:batch_size]
71 |         return self.obs[indices], self.acs[indices], self.rews[indices], self.next_obs[indices], self.terminals[indices]
72 | 
73 |     def sample_recent_data(self, batch_size=1):
74 |         return self.obs[-batch_size:], self.acs[-batch_size:], self.rews[-batch_size:], self.next_obs[-batch_size:], self.terminals[-batch_size:]
75 | 


--------------------------------------------------------------------------------
/hw1/cs285/policies/MLP_policy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | class MLPPolicy(nn.Module):
 6 | 
 7 |     def __init__(self,
 8 |         ac_dim,
 9 |         ob_dim,
10 |         n_layers,
11 |         size,
12 |         device,
13 |         lr = 1e-4,
14 |         training=True,
15 |         discrete=False, # unused for now
16 |         nn_baseline=False, # unused for now
17 |         **kwargs):
18 |         super().__init__()
19 | 
20 |         # init vars
21 |         self.training = training
22 |         self.device = device
23 | 
24 |         # network architecture
25 |         self.mlp = nn.ModuleList()
26 |         self.mlp.append(nn.Linear(ob_dim, size))#first hidden layer
27 |         self.mlp.append(nn.Tanh())
28 | 
29 |         for h in range(n_layers - 1): #additional hidden layers
30 |             self.mlp.append(nn.Linear(size, size))
31 |             self.mlp.append(nn.Tanh())
32 | 
33 |         self.mlp.append(nn.Linear(size, ac_dim)) #output layer, no activation function
34 | 
35 |         #loss and optimizer
36 |         if self.training:
37 |             self.loss_func = nn.MSELoss()
38 |             self.optimizer = torch.optim.Adam(self.parameters(), lr)
39 | 
40 |         self.to(device)
41 | 
42 |     ##################################
43 | 
44 |     def forward(self, x):
45 |         for layer in self.mlp:
46 |             x = layer(x)
47 |         return x
48 | 
49 |     ##################################
50 | 
51 |     def save(self, filepath):
52 |         torch.save(self.state_dict(), filepath)
53 | 
54 |     def restore(self, filepath):
55 |         self.load_state_dict(torch.load(filepath))
56 | 
57 |     ##################################
58 | 
59 |     # query this policy with observation(s) to get selected action(s)
60 |     def get_action(self, obs):
61 |         if len(obs.shape)>1:
62 |             observation = obs
63 |         else:
64 |             observation = obs[None]
65 | 
66 |         return self(torch.Tensor(observation).to(self.device)).cpu().detach().numpy()
67 | 
68 |     # update/train this policy
69 |     def update(self, observations, actions):
70 |         raise NotImplementedError
71 | 
72 | #####################################################
73 | #####################################################
74 | 
75 | class MLPPolicySL(MLPPolicy):
76 | 
77 |     """
78 |         This class is a special case of MLPPolicy,
79 |         which is trained using supervised learning.
80 |         The relevant functions to define are included below.
81 |     """
82 | 
83 |     def update(self, observations, actions):
84 |         assert self.training, 'Policy must be created with training = true in order to perform training updates...'
85 | 
86 |         # TODO define network update
87 |         self.optimizer.zero_grad()
88 |         predicted_actions = self(torch.Tensor(observations).to(self.device))
89 |         loss = self.loss_func(predicted_actions, torch.Tensor(actions).to(self.device))
90 |         loss.backward()
91 |         self.optimizer.step()
92 | 
93 |         #print("loss:", loss.item())
94 | 


--------------------------------------------------------------------------------
/hw1/cs285/policies/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making folder a package
2 | 


--------------------------------------------------------------------------------
/hw1/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc


--------------------------------------------------------------------------------
/hw1/cs285/policies/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw1/cs285/policies/__pycache__/base_policy.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/__pycache__/base_policy.cpython-37.pyc


--------------------------------------------------------------------------------
/hw1/cs285/policies/__pycache__/loaded_gaussian_policy.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/__pycache__/loaded_gaussian_policy.cpython-37.pyc


--------------------------------------------------------------------------------
/hw1/cs285/policies/experts/Ant.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/experts/Ant.pkl


--------------------------------------------------------------------------------
/hw1/cs285/policies/experts/HalfCheetah.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/experts/HalfCheetah.pkl


--------------------------------------------------------------------------------
/hw1/cs285/policies/experts/Hopper.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/experts/Hopper.pkl


--------------------------------------------------------------------------------
/hw1/cs285/policies/experts/Humanoid.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/experts/Humanoid.pkl


--------------------------------------------------------------------------------
/hw1/cs285/policies/experts/Walker2d.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285/policies/experts/Walker2d.pkl


--------------------------------------------------------------------------------
/hw1/cs285/policies/loaded_gaussian_policy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | import pickle
 5 | 
 6 | class Loaded_Gaussian_Policy(nn.Module):
 7 |     def __init__(self, filename, **kwargs):
 8 |         super().__init__()
 9 |         with open(filename, 'rb') as f:
10 |             data = pickle.loads(f.read())
11 | 
12 |         self.nonlin_type = data['nonlin_type']
13 |         policy_type = [k for k in data.keys() if k != 'nonlin_type'][0]
14 | 
15 |         assert policy_type == 'GaussianPolicy', 'Policy type {} not supported'.format(policy_type)
16 |         self.policy_params = data[policy_type]
17 | 
18 |         assert set(self.policy_params.keys()) == {'logstdevs_1_Da', 'hidden', 'obsnorm', 'out'}
19 | 
20 |         self.obsnorm_mean = self.policy_params['obsnorm']['Standardizer']['mean_1_D']
21 |         self.obsnorm_meansq = self.policy_params['obsnorm']['Standardizer']['meansq_1_D']
22 |         layer_params = self.policy_params['hidden']['FeedforwardNet']
23 | 
24 |         self.mlp = nn.ModuleList()
25 |         for layer_name in sorted(layer_params.keys()):
26 |             W = layer_params[layer_name]['AffineLayer']['W'].astype(np.float32)
27 |             b = layer_params[layer_name]['AffineLayer']['b'].astype(np.float32)
28 |             r, h = W.shape
29 | 
30 |             layer = nn.Linear(r,h)
31 |             layer.weight.data.copy_(torch.from_numpy(W.transpose()))
32 |             layer.bias.data.copy_(torch.from_numpy(b.squeeze(0)))
33 |             self.mlp.append(layer)
34 | 
35 |             if self.nonlin_type == 'lrelu':
36 |                 self.mlp.append(nn.LeakyReLU())
37 |             elif self.nonlin_type == 'tanh':
38 |                 self.mlp.append(nn.Tanh())
39 |             else:
40 |                 raise NotImplementedError(self.nonlin_type)
41 | 
42 |         #output layer
43 |         W = self.policy_params['out']['AffineLayer']['W'].astype(np.float32)
44 |         b = self.policy_params['out']['AffineLayer']['b'].astype(np.float32)
45 |         r, h = W.shape
46 |         layer = nn.Linear(r, h)
47 |         layer.weight.data.copy_(torch.from_numpy(W.transpose()))
48 |         layer.bias.data.copy_(torch.from_numpy(b.squeeze(0)))
49 |         self.mlp.append(layer)
50 | 
51 |     ##################################
52 | 
53 |     def obs_norm(self, obs_bo, obsnorm_mean, obsnorm_meansq):
54 |         obsnorm_stdev = np.sqrt(np.maximum(0, obsnorm_meansq - np.square(obsnorm_mean)))
55 |         normedobs_bo = (obs_bo - obsnorm_mean) / (obsnorm_stdev + 1e-6)
56 |         return torch.FloatTensor(normedobs_bo).squeeze(0)
57 | 
58 |     ##################################
59 | 
60 |     def forward(self, obs):
61 |         x = self.obs_norm(obs, self.obsnorm_mean, self.obsnorm_meansq)
62 |         for layer in self.mlp:
63 |             x = layer(x)
64 |         return x
65 | 
66 |     ##################################
67 | 
68 |     def update(self, obs_no, acs_na, adv_n=None, acs_labels_na=None):
69 |         print("\n\nThis policy class simply loads in a particular type of policy and queries it.")
70 |         print("Not training procedure has been written, so do not try to train it.\n\n")
71 |         raise NotImplementedError
72 | 
73 |     def get_action(self, obs):
74 |         if len(obs.shape) > 1:
75 |             observation = obs
76 |         else:
77 |             observation = obs[None, :]
78 |         return self(obs)
79 | 


--------------------------------------------------------------------------------
/hw1/cs285_hw1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/cs285_hw1.pdf


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/bin/basic:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/basic


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/bin/compile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/compile


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/bin/derivative:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/derivative


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/bin/libglew.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/libglew.so


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/bin/libglewegl.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/libglewegl.so


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/bin/libglewosmesa.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/libglewosmesa.so


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/bin/libglfw.so.3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/libglfw.so.3


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/bin/libmujoco150.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/libmujoco150.so


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/bin/libmujoco150nogl.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/libmujoco150nogl.so


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/bin/record:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/record


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/bin/simulate:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/simulate


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/bin/test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/downloads/mjpro150/bin/test


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/doc/README.txt:
--------------------------------------------------------------------------------
 1 | Welcome to MuJoCo Pro version 1.50.
 2 | 
 3 | The full documentation is available at http://www.mujoco.org/book
 4 | The most relevant chapters are Overview, MJCF Models, and MuJoCo Pro.
 5 | 
 6 | Here we provide brief notes to get you started:
 7 | 
 8 | 
 9 | The activation key (which you should have received with your license) is a
10 | plain-text file whose path must be passed to the mj_activate() function.
11 | The code samples assume that it is called mjkey.txt in the bin directory.
12 | 
13 | Once you have mjkey.txt in the bin directory, run:
14 |   simulate ../model/humanoid.xml  (or ./simulate on Linux and OSX)
15 | to see MuJoCo Pro in action.
16 | 
17 | On Linux, you can use LD_LIBRARY_PATH to point the dynamic linker to the
18 | .so files, or copy them to a directory that is already in the linker path.
19 | On OSX, the MuJoCo Pro dynamic library is compiled with @executable_path/
20 | to avoid the need for installation in a predefined directory.
21 | 
22 | In general, the directory structure we have provided is merely a suggestion;
23 | feel free to re-organize it if needed. MuJoCo Pro does not have an installer
24 | and does not write any files outside the executable directory.
25 | 
26 | The makefile in the sample directory generates binaries in the bin directory.
27 | These binaries are pre-compiled and included in the software distribution.
28 | 
29 | While the software distribution contains only one model (humanoid.xml),
30 | additional models are available at http://www.mujoco.org/forum under Resources.
31 | 


--------------------------------------------------------------------------------
/hw1/downloads/mjpro150/sample/makefile:
--------------------------------------------------------------------------------
 1 | COMMON=-O2 -I../include -L../bin -std=c++11 -mavx
 2 | 
 3 | default:
 4 | 	g++ $(COMMON) test.cpp -lmujoco150nogl -o ../bin/test
 5 | 	g++ $(COMMON) compile.cpp -lmujoco150nogl -o ../bin/compile
 6 | 	g++ $(COMMON) derivative.cpp -lmujoco150nogl -fopenmp -o ../bin/derivative
 7 | 	g++ $(COMMON) simulate.cpp -lmujoco150 -lGL -lglew ../bin/libglfw.so.3 -o ../bin/simulate
 8 | 	g++ $(COMMON) record.cpp -lmujoco150 -lGL -lglew ../bin/libglfw.so.3 -o ../bin/record
 9 | 	g++ $(COMMON) basic.cpp -lmujoco150 -lGL -lglew ../bin/libglfw.so.3 -o ../bin/basic
10 | 
11 | egl:	
12 | 	g++ $(COMMON) -DMJ_EGL record.cpp -lmujoco150 -lOpenGL -lEGL -lglewegl -o ../bin/recordegl
13 | 
14 | osmesa:	
15 | 	g++ $(COMMON) -DMJ_OSMESA record.cpp -lmujoco150 -lOSMesa -lglewosmesa -o ../bin/recordosmesa
16 | 
17 | all: default egl osmesa
18 | 


--------------------------------------------------------------------------------
/hw1/requirements.txt:
--------------------------------------------------------------------------------
1 | gym==0.10.11
2 | mujoco-py==1.50.1.35
3 | matplotlib==2.2.2
4 | ipython==6.4.0
5 | moviepy==1.0.0


--------------------------------------------------------------------------------
/hw1/results/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/events.out.tfevents.1593151299.DESKTOP-U53KV1A.17880.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/events.out.tfevents.1593151299.DESKTOP-U53KV1A.17880.0


--------------------------------------------------------------------------------
/hw1/results/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/policy_itr_0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_ant_Ant-v2_26-06-2020_02-01-37/policy_itr_0


--------------------------------------------------------------------------------
/hw1/results/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/events.out.tfevents.1593151346.DESKTOP-U53KV1A.3080.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/events.out.tfevents.1593151346.DESKTOP-U53KV1A.3080.0


--------------------------------------------------------------------------------
/hw1/results/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/policy_itr_0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_halfcheetah_HalfCheetah-v2_26-06-2020_02-02-25/policy_itr_0


--------------------------------------------------------------------------------
/hw1/results/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/events.out.tfevents.1593151407.DESKTOP-U53KV1A.17880.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/events.out.tfevents.1593151407.DESKTOP-U53KV1A.17880.0


--------------------------------------------------------------------------------
/hw1/results/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/policy_itr_0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_hopper_Hopper-v2_26-06-2020_02-03-26/policy_itr_0


--------------------------------------------------------------------------------
/hw1/results/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/events.out.tfevents.1593151514.DESKTOP-U53KV1A.2796.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/events.out.tfevents.1593151514.DESKTOP-U53KV1A.2796.0


--------------------------------------------------------------------------------
/hw1/results/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/policy_itr_0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_humanoid_Humanoid-v2_26-06-2020_02-05-13/policy_itr_0


--------------------------------------------------------------------------------
/hw1/results/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/events.out.tfevents.1593151663.DESKTOP-U53KV1A.14512.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/events.out.tfevents.1593151663.DESKTOP-U53KV1A.14512.0


--------------------------------------------------------------------------------
/hw1/results/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/policy_itr_0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-2/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_02-07-41/policy_itr_0


--------------------------------------------------------------------------------
/hw1/results/Q1-3/bc-eval-avg.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-3/bc-eval-avg.PNG


--------------------------------------------------------------------------------
/hw1/results/Q1-3/bc-eval-std.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-3/bc-eval-std.PNG


--------------------------------------------------------------------------------
/hw1/results/Q1-3/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_20-57-08/events.out.tfevents.1593219429.DESKTOP-U53KV1A.388.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q1-3/bc_test_bc_walker2d_Walker2d-v2_26-06-2020_20-57-08/events.out.tfevents.1593219429.DESKTOP-U53KV1A.388.0


--------------------------------------------------------------------------------
/hw1/results/Q2-2/dagger-eval-avg.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger-eval-avg.PNG


--------------------------------------------------------------------------------
/hw1/results/Q2-2/dagger-eval-std.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger-eval-std.PNG


--------------------------------------------------------------------------------
/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/events.out.tfevents.1593231811.DESKTOP-U53KV1A.16276.0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/events.out.tfevents.1593231811.DESKTOP-U53KV1A.16276.0


--------------------------------------------------------------------------------
/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_0:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_0


--------------------------------------------------------------------------------
/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_1:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_1


--------------------------------------------------------------------------------
/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_2


--------------------------------------------------------------------------------
/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_3


--------------------------------------------------------------------------------
/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_4


--------------------------------------------------------------------------------
/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_5


--------------------------------------------------------------------------------
/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_6:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_6


--------------------------------------------------------------------------------
/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_7:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_7


--------------------------------------------------------------------------------
/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_8:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_8


--------------------------------------------------------------------------------
/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_9:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw1/results/Q2-2/dagger_test_dagger_walker_Walker2d-v2_27-06-2020_00-23-29/policy_itr_9


--------------------------------------------------------------------------------
/hw1/setup.py:
--------------------------------------------------------------------------------
1 | # setup.py
2 | from setuptools import setup
3 | 
4 | setup(
5 |     name='cs285',
6 |     version='0.1.0',
7 |     packages=['cs285'],
8 | )


--------------------------------------------------------------------------------
/hw2/README.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 1) See hw1 if you'd like to see installation instructions. You do NOT have to redo them.
 3 | 
 4 | 
 5 | ##############################################
 6 | ##############################################
 7 | 
 8 | 
 9 | 2) Code:
10 | 
11 | -------------------------------------------
12 | 
13 | Files to look at, even though there are no explicit 'TODO' markings:
14 | - scripts/run_hw2_policy_gradient.py
15 | 
16 | -------------------------------------------
17 | 
18 | Blanks to be filled in by using your code from hw1 are marked with 'TODO: GETTHIS from HW1'
19 | 
20 | The following files have these:
21 | - infrastructure/rl_trainer.py
22 | - infrastructure/utils.py
23 | - policies/MLP_policy.py
24 | 
25 | -------------------------------------------
26 | 
27 | Blanks to be filled in now (for this assignment) are marked with 'TODO'
28 | 
29 | The following files have these:
30 | - agents/pg_agent.py
31 | - policies/MLP_policy.py
32 | 
33 | 
34 | ##############################################
35 | ##############################################
36 | 
37 | 
38 | 3) Run code with the following command: 
39 | 
40 | $ python cs285/scripts/run_hw2_policy_gradient.py --env_name CartPole-v1 --exp_name test_pg_cartpole
41 | $ python cs285/scripts/run_hw2_policy_gradient.py --env_name InvertedPendulum-v2 --exp_name test_pg_pendulum
42 | 
43 | Flags of relevance, when running the commands above (see pdf for more info):
44 | -n number of policy training iterations
45 | -rtg use reward_to_go for the value
46 | -dsa do not standardize the advantage values
47 | 
48 | ##############################################
49 | 
50 | 
51 | 4) Visualize saved tensorboard event file:
52 | 
53 | $ cd cs285/data/<your_log_dir>
54 | $ tensorboard --logdir .
55 | 
56 | Then, navigate to shown url to see scalar summaries as plots (in 'scalar' tab), as well as videos (in 'images' tab)
57 | 
58 | 


--------------------------------------------------------------------------------
/hw2/cs285/agents/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw2/cs285/agents/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/agents/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw2/cs285/agents/__pycache__/pg_agent.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/agents/__pycache__/pg_agent.cpython-37.pyc


--------------------------------------------------------------------------------
/hw2/cs285/infrastructure/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw2/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw2/cs285/infrastructure/__pycache__/logger.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/infrastructure/__pycache__/logger.cpython-37.pyc


--------------------------------------------------------------------------------
/hw2/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc


--------------------------------------------------------------------------------
/hw2/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc


--------------------------------------------------------------------------------
/hw2/cs285/infrastructure/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/infrastructure/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/hw2/cs285/infrastructure/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from tensorboardX import SummaryWriter
 3 | import numpy as np
 4 | 
 5 | class Logger:
 6 |     def __init__(self, log_dir, n_logged_samples=10, summary_writer=None):
 7 |         self._log_dir = log_dir
 8 |         print('########################')
 9 |         print('logging outputs to ', log_dir)
10 |         print('########################')
11 |         self._n_logged_samples = n_logged_samples
12 |         self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1)
13 | 
14 |     def log_scalar(self, scalar, name, step_):
15 |         self._summ_writer.add_scalar('{}'.format(name), scalar, step_)
16 | 
17 |     def log_scalars(self, scalar_dict, group_name, step, phase):
18 |         """Will log all scalars in the same plot."""
19 |         self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step)
20 | 
21 |     def log_image(self, image, name, step):
22 |         assert(len(image.shape) == 3)  # [C, H, W]
23 |         self._summ_writer.add_image('{}'.format(name), image, step)
24 | 
25 |     def log_video(self, video_frames, name, step, fps=10):
26 |         assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!"
27 |         self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps)
28 | 
29 |     def log_paths_as_videos(self, paths, step, max_videos_to_save=2, fps=10, video_title='video'):
30 | 
31 |         # reshape the rollouts
32 |         videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths]
33 | 
34 |         # max rollout length
35 |         max_videos_to_save = np.min([max_videos_to_save, len(videos)])
36 |         max_length = videos[0].shape[0]
37 |         for i in range(max_videos_to_save):
38 |             if videos[i].shape[0]>max_length:
39 |                 max_length = videos[i].shape[0]
40 | 
41 |         # pad rollouts to all be same length
42 |         for i in range(max_videos_to_save):
43 |             if videos[i].shape[0]<max_length:
44 |                 padding = np.tile([videos[i][-1]], (max_length-videos[i].shape[0],1,1,1))
45 |                 videos[i] = np.concatenate([videos[i], padding], 0)
46 | 
47 |         # log videos to tensorboard event file
48 |         videos = np.stack(videos[:max_videos_to_save], 0)
49 |         self.log_video(videos, video_title, step, fps=fps)
50 | 
51 |     def log_figures(self, figure, name, step, phase):
52 |         """figure: matplotlib.pyplot figure handle"""
53 |         assert figure.shape[0] > 0, "Figure logging requires input shape [batch x figures]!"
54 |         self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
55 | 
56 |     def log_figure(self, figure, name, step, phase):
57 |         """figure: matplotlib.pyplot figure handle"""
58 |         self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
59 | 
60 |     def log_graph(self, array, name, step, phase):
61 |         """figure: matplotlib.pyplot figure handle"""
62 |         im = plot_graph(array)
63 |         self._summ_writer.add_image('{}_{}'.format(name, phase), im, step)
64 | 
65 |     def dump_scalars(self, log_path=None):
66 |         log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path
67 |         self._summ_writer.export_scalars_to_json(log_path)
68 | 
69 |     def flush(self):
70 |         self._summ_writer.flush()
71 | 
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/hw2/cs285/infrastructure/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from cs285.infrastructure.utils import *
 4 | 
 5 | class ReplayBuffer(object):
 6 | 
 7 |     def __init__(self, max_size=1000000):
 8 | 
 9 |         self.max_size = max_size
10 |         self.paths = []
11 |         self.obs = None
12 |         self.acs = None
13 |         self.concatenated_rews = None
14 |         self.unconcatenated_rews = None
15 |         self.next_obs = None
16 |         self.terminals = None
17 | 
18 |     def add_rollouts(self, paths):
19 | 
20 |         # add new rollouts into our list of rollouts
21 |         for path in paths:
22 |             self.paths.append(path)
23 | 
24 |         # convert new rollouts into their component arrays, and append them onto our arrays
25 |         observations, actions, next_observations, terminals, concatenated_rews, unconcatenated_rews = convert_listofrollouts(paths)
26 | 
27 |         if self.obs is None:
28 |             self.obs = observations[-self.max_size:]
29 |             self.acs = actions[-self.max_size:]
30 |             self.next_obs = next_observations[-self.max_size:]
31 |             self.terminals = terminals[-self.max_size:]
32 |             self.concatenated_rews = concatenated_rews[-self.max_size:]
33 |             self.unconcatenated_rews = unconcatenated_rews[-self.max_size:]
34 |         else:
35 |             self.obs = np.concatenate([self.obs, observations])[-self.max_size:]
36 |             self.acs = np.concatenate([self.acs, actions])[-self.max_size:]
37 |             self.next_obs = np.concatenate([self.next_obs, next_observations])[-self.max_size:]
38 |             self.terminals = np.concatenate([self.terminals, terminals])[-self.max_size:]
39 |             self.concatenated_rews = np.concatenate([self.concatenated_rews, concatenated_rews])[-self.max_size:]
40 |             if isinstance(unconcatenated_rews, list):
41 |                 self.unconcatenated_rews += unconcatenated_rews
42 |             else:
43 |                 self.unconcatenated_rews.append(unconcatenated_rews)
44 | 
45 |     ########################################
46 |     ########################################
47 | 
48 |     def sample_random_rollouts(self, num_rollouts):
49 |         rand_indices = np.random.permutation(len(self.paths))[:num_rollouts]
50 |         return self.paths[rand_indices]
51 | 
52 |     def sample_recent_rollouts(self, num_rollouts=1):
53 |         return self.paths[-num_rollouts:]
54 | 
55 |     ########################################
56 |     ########################################
57 | 
58 |     def sample_random_data(self, batch_size):
59 | 
60 |         assert self.obs.shape[0] == self.acs.shape[0] == self.concatenated_rews.shape[0] == self.next_obs.shape[0] == self.terminals.shape[0]
61 |         rand_indices = np.random.permutation(self.obs.shape[0])[:batch_size]
62 |         return self.obs[rand_indices], self.acs[rand_indices], self.concatenated_rews[rand_indices], self.next_obs[rand_indices], self.terminals[rand_indices]
63 | 
64 |     def sample_recent_data(self, batch_size=1, concat_rew=True):
65 | 
66 |         if concat_rew:
67 |             return self.obs[-batch_size:], self.acs[-batch_size:], self.concatenated_rews[-batch_size:], self.next_obs[-batch_size:], self.terminals[-batch_size:]
68 |         else:
69 |             num_recent_rollouts_to_return = 0
70 |             num_datapoints_so_far = 0
71 |             index = -1
72 |             while num_datapoints_so_far < batch_size:
73 |                 recent_rollout = self.paths[index]
74 |                 index -=1
75 |                 num_recent_rollouts_to_return +=1
76 |                 num_datapoints_so_far += get_pathlength(recent_rollout)
77 |             rollouts_to_return = self.paths[-num_recent_rollouts_to_return:]
78 |             observations, actions, next_observations, terminals, concatenated_rews, unconcatenated_rews = convert_listofrollouts(rollouts_to_return)
79 |             return observations, actions, unconcatenated_rews, next_observations, terminals


--------------------------------------------------------------------------------
/hw2/cs285/policies/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw2/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc


--------------------------------------------------------------------------------
/hw2/cs285/policies/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285/policies/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw2/cs285_hw2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/cs285_hw2.pdf


--------------------------------------------------------------------------------
/hw2/requirements.txt:
--------------------------------------------------------------------------------
1 | gym==0.10.11
2 | mujoco-py==1.50.1.35
3 | matplotlib==2.2.2
4 | ipython==6.4.0
5 | moviepy==1.0.0


--------------------------------------------------------------------------------
/hw2/results/bonus-gae-cp/gae_cp_comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae-cp/gae_cp_comp.png


--------------------------------------------------------------------------------
/hw2/results/bonus-gae-cp/pg_cp_lambda0.4_CartPole-v0_28-07-2020_00-18-04/events.out.tfevents.1595909884.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae-cp/pg_cp_lambda0.4_CartPole-v0_28-07-2020_00-18-04/events.out.tfevents.1595909884.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/bonus-gae-cp/pg_cp_lambda0.8_CartPole-v0_28-07-2020_00-17-56/events.out.tfevents.1595909876.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae-cp/pg_cp_lambda0.8_CartPole-v0_28-07-2020_00-17-56/events.out.tfevents.1595909876.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/bonus-gae-cp/pg_cp_lambda0.95_CartPole-v0_28-07-2020_00-17-50/events.out.tfevents.1595909870.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae-cp/pg_cp_lambda0.95_CartPole-v0_28-07-2020_00-17-50/events.out.tfevents.1595909870.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/bonus-gae-cp/pg_cp_lambda1_CartPole-v0_28-07-2020_00-17-41/events.out.tfevents.1595909861.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae-cp/pg_cp_lambda1_CartPole-v0_28-07-2020_00-17-41/events.out.tfevents.1595909861.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/bonus-gae/gae_hc_comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/gae_hc_comp.png


--------------------------------------------------------------------------------
/hw2/results/bonus-gae/pg_lambda0.4_HalfCheetah-v2_27-07-2020_21-59-54/events.out.tfevents.1595901594.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0.4_HalfCheetah-v2_27-07-2020_21-59-54/events.out.tfevents.1595901594.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/bonus-gae/pg_lambda0.8_HalfCheetah-v2_27-07-2020_19-21-50/events.out.tfevents.1595892110.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0.8_HalfCheetah-v2_27-07-2020_19-21-50/events.out.tfevents.1595892110.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/bonus-gae/pg_lambda0.95_HalfCheetah-v2_27-07-2020_19-21-45/events.out.tfevents.1595892105.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0.95_HalfCheetah-v2_27-07-2020_19-21-45/events.out.tfevents.1595892105.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/bonus-gae/pg_lambda0.975_HalfCheetah-v2_27-07-2020_19-21-43/events.out.tfevents.1595892103.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0.975_HalfCheetah-v2_27-07-2020_19-21-43/events.out.tfevents.1595892103.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/bonus-gae/pg_lambda0.99_HalfCheetah-v2_27-07-2020_21-59-27/events.out.tfevents.1595901567.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0.99_HalfCheetah-v2_27-07-2020_21-59-27/events.out.tfevents.1595901567.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/bonus-gae/pg_lambda0.9_HalfCheetah-v2_27-07-2020_21-59-37/events.out.tfevents.1595901577.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0.9_HalfCheetah-v2_27-07-2020_21-59-37/events.out.tfevents.1595901577.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/bonus-gae/pg_lambda0_HalfCheetah-v2_27-07-2020_19-21-48/events.out.tfevents.1595892108.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda0_HalfCheetah-v2_27-07-2020_19-21-48/events.out.tfevents.1595892108.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/bonus-gae/pg_lambda1_HalfCheetah-v2_27-07-2020_21-59-20/events.out.tfevents.1595901560.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/bonus-gae/pg_lambda1_HalfCheetah-v2_27-07-2020_21-59-20/events.out.tfevents.1595901560.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-3-lb/eval-avg-cp-lb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-lb/eval-avg-cp-lb.png


--------------------------------------------------------------------------------
/hw2/results/problem-3-lb/pg_lb_no_rtg_dna_CartPole-v0_29-06-2020_00-52-35/events.out.tfevents.1593406355.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-lb/pg_lb_no_rtg_dna_CartPole-v0_29-06-2020_00-52-35/events.out.tfevents.1593406355.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-3-lb/pg_lb_rtg_dna_CartPole-v0_29-06-2020_00-52-41/events.out.tfevents.1593406361.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-lb/pg_lb_rtg_dna_CartPole-v0_29-06-2020_00-52-41/events.out.tfevents.1593406361.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-3-lb/pg_lb_rtg_na_CartPole-v0_29-06-2020_00-52-47/events.out.tfevents.1593406367.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-lb/pg_lb_rtg_na_CartPole-v0_29-06-2020_00-52-47/events.out.tfevents.1593406367.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-3-sb/eval-avg-cp-sb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-sb/eval-avg-cp-sb.png


--------------------------------------------------------------------------------
/hw2/results/problem-3-sb/pg_sb_no_rtg_dna_CartPole-v0_29-06-2020_00-45-57/events.out.tfevents.1593405957.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-sb/pg_sb_no_rtg_dna_CartPole-v0_29-06-2020_00-45-57/events.out.tfevents.1593405957.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-3-sb/pg_sb_rtg_dna_CartPole-v0_29-06-2020_00-46-05/events.out.tfevents.1593405965.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-sb/pg_sb_rtg_dna_CartPole-v0_29-06-2020_00-46-05/events.out.tfevents.1593405965.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-3-sb/pg_sb_rtg_na_CartPole-v0_29-06-2020_00-46-11/events.out.tfevents.1593405971.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-3-sb/pg_sb_rtg_na_CartPole-v0_29-06-2020_00-46-11/events.out.tfevents.1593405971.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-4/ideal-params-comparison.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-4/ideal-params-comparison.png


--------------------------------------------------------------------------------
/hw2/results/problem-4/pg_ip_b100_lr0.06_InvertedPendulum-v2_01-07-2020_00-34-14/events.out.tfevents.1593578054.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-4/pg_ip_b100_lr0.06_InvertedPendulum-v2_01-07-2020_00-34-14/events.out.tfevents.1593578054.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-4/pg_ip_b80_lr0.06_InvertedPendulum-v2_01-07-2020_00-33-01/events.out.tfevents.1593577981.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-4/pg_ip_b80_lr0.06_InvertedPendulum-v2_01-07-2020_00-33-01/events.out.tfevents.1593577981.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-4/pg_ip_b90_lr0.05_InvertedPendulum-v2_01-07-2020_00-26-36/events.out.tfevents.1593577596.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-4/pg_ip_b90_lr0.05_InvertedPendulum-v2_01-07-2020_00-26-36/events.out.tfevents.1593577596.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-4/pg_ip_b90_lr0.06_InvertedPendulum-v2_01-07-2020_00-27-32/events.out.tfevents.1593577652.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-4/pg_ip_b90_lr0.06_InvertedPendulum-v2_01-07-2020_00-27-32/events.out.tfevents.1593577652.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-4/pg_ip_b90_lr0.07_InvertedPendulum-v2_01-07-2020_00-28-39/events.out.tfevents.1593577719.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-4/pg_ip_b90_lr0.07_InvertedPendulum-v2_01-07-2020_00-28-39/events.out.tfevents.1593577719.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-6/eval-avg-ll.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-6/eval-avg-ll.png


--------------------------------------------------------------------------------
/hw2/results/problem-6/pg_ll_b40000_r0.005_LunarLanderContinuous-v2_29-06-2020_21-30-30/events.out.tfevents.1593480630.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-6/pg_ll_b40000_r0.005_LunarLanderContinuous-v2_29-06-2020_21-30-30/events.out.tfevents.1593480630.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-7-ideal/cheetah-ideal-eval-avg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-ideal/cheetah-ideal-eval-avg.png


--------------------------------------------------------------------------------
/hw2/results/problem-7-ideal/pg_hc_b30000_lr0.02_HalfCheetah-v2_30-06-2020_20-07-51/events.out.tfevents.1593562071.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-ideal/pg_hc_b30000_lr0.02_HalfCheetah-v2_30-06-2020_20-07-51/events.out.tfevents.1593562071.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-7-ideal/pg_hc_nnb_b30000_lr0.02_HalfCheetah-v2_30-06-2020_22-15-46/events.out.tfevents.1593569746.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-ideal/pg_hc_nnb_b30000_lr0.02_HalfCheetah-v2_30-06-2020_22-15-46/events.out.tfevents.1593569746.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-7-ideal/pg_hc_rtg_b30000_lr0.02_HalfCheetah-v2_30-06-2020_20-44-51/events.out.tfevents.1593564291.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-ideal/pg_hc_rtg_b30000_lr0.02_HalfCheetah-v2_30-06-2020_20-44-51/events.out.tfevents.1593564291.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-7-ideal/pg_hc_rtg_nnb_b30000_lr0.02_HalfCheetah-v2_30-06-2020_21-19-20/events.out.tfevents.1593566360.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-ideal/pg_hc_rtg_nnb_b30000_lr0.02_HalfCheetah-v2_30-06-2020_21-19-20/events.out.tfevents.1593566360.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-7-search/cheetah-search-eval-avg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/cheetah-search-eval-avg.png


--------------------------------------------------------------------------------
/hw2/results/problem-7-search/pg_hc_b10000_lr0.005_nnbaseline_HalfCheetah-v2_30-06-2020_01-48-46/events.out.tfevents.1593496126.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/pg_hc_b10000_lr0.005_nnbaseline_HalfCheetah-v2_30-06-2020_01-48-46/events.out.tfevents.1593496126.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-7-search/pg_hc_b10000_lr0.01_nnbaseline_HalfCheetah-v2_30-06-2020_00-21-43/events.out.tfevents.1593490903.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/pg_hc_b10000_lr0.01_nnbaseline_HalfCheetah-v2_30-06-2020_00-21-43/events.out.tfevents.1593490903.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-7-search/pg_hc_b10000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_00-37-16/events.out.tfevents.1593491836.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/pg_hc_b10000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_00-37-16/events.out.tfevents.1593491836.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-7-search/pg_hc_b30000_lr0.005_nnbaseline_HalfCheetah-v2_30-06-2020_00-51-38/events.out.tfevents.1593492698.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/pg_hc_b30000_lr0.005_nnbaseline_HalfCheetah-v2_30-06-2020_00-51-38/events.out.tfevents.1593492698.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-7-search/pg_hc_b30000_lr0.01_nnbaseline_HalfCheetah-v2_30-06-2020_02-09-33/events.out.tfevents.1593497373.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/pg_hc_b30000_lr0.01_nnbaseline_HalfCheetah-v2_30-06-2020_02-09-33/events.out.tfevents.1593497373.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/results/problem-7-search/pg_hc_b30000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_11-45-56/events.out.tfevents.1593531956.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw2/results/problem-7-search/pg_hc_b30000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_11-45-56/events.out.tfevents.1593531956.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw2/setup.py:
--------------------------------------------------------------------------------
1 | # setup.py
2 | from setuptools import setup
3 | 
4 | setup(
5 |     name='cs285',
6 |     version='0.1.0',
7 |     packages=['cs285'],
8 | )


--------------------------------------------------------------------------------
/hw3/README.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 1) See hw1 if you'd like to see installation instructions. You do NOT have to redo them. But, you need to install OpenCV for this assignment:
 3 | `pip install opencv-python==3.4.0.12`
 4 | 
 5 | You also need to replace `<pathtogym>/gym/envs/box2d/lunar_lander.py` with the provided `lunar_lander.py` file. To find the file:
 6 | $ locate lunar_lander.py
 7 | (or if there are multiple options there):
 8 | $ source activate cs285_env
 9 | $ ipython
10 | $ import gym
11 | $ gym.__file__
12 | <pathtogym>/gym/__init__.py
13 | ##############################################
14 | ##############################################
15 | 
16 | 
17 | 2) Code:
18 | 
19 | -------------------------------------------
20 | 
21 | Files to look at, even though there are no explicit 'TODO' markings:
22 | - scripts/run_hw3_dqn.py
23 | - scripts/run_hw3_actor_critic.py
24 | - infrastructure/models.py
25 | - policies/MLP_policy.py
26 | 
27 | -------------------------------------------
28 | 
29 | Blanks to be filled in now (for this assignment) are marked with 'TODO'
30 | 
31 | The following files have these:
32 | - critics/dqn_critic.py
33 | - agents/dqn_agent.py
34 | - policies/argmax_policy.py
35 | - critics/bootstrapped_continuous_critic.py
36 | - agents/ac_agent.py
37 | 
38 | ##############################################
39 | ##############################################
40 | 
41 | 
42 | 3) Run code with the following command: 
43 | 
44 | $ python cs285/scripts/run_hw3_dqn.py --env_name PongNoFrameskip-v4 --exp_name test_pong
45 | $ python cs285/scripts/run_hw3_actor_critic.py --env_name CartPole-v0 -n 100 -b 1000 --exp_name 100_1 -ntu 100 -ngsptu 1
46 | 
47 | Flags of relevance, when running the commands above (see pdf for more info):
48 | -double_q Whether to use double Q learning or not.
49 | 
50 | ##############################################
51 | 
52 | 
53 | 4) Visualize saved tensorboard event file:
54 | 
55 | $ cd cs285/data/<your_log_dir>
56 | $ tensorboard --logdir .
57 | 
58 | Then, navigate to shown url to see scalar summaries as plots (in 'scalar' tab), as well as videos (in 'images' tab)
59 | 


--------------------------------------------------------------------------------
/hw3/cs285/agents/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw3/cs285/agents/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/agents/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/agents/__pycache__/ac_agent.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/agents/__pycache__/ac_agent.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/agents/__pycache__/dqn_agent.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/agents/__pycache__/dqn_agent.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/agents/ac_agent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | from collections import OrderedDict
 4 | 
 5 | from cs285.policies.MLP_policy import MLPPolicyAC
 6 | from cs285.critics.bootstrapped_continuous_critic import BootstrappedContinuousCritic
 7 | from cs285.infrastructure.replay_buffer import ReplayBuffer
 8 | from cs285.infrastructure.utils import *
 9 | 
10 | class ACAgent:
11 |     def __init__(self, env, agent_params):
12 |         super(ACAgent, self).__init__()
13 | 
14 |         self.env = env
15 |         self.agent_params = agent_params
16 |         self.num_critic_updates_per_agent_update = agent_params['num_critic_updates_per_agent_update']
17 |         self.num_actor_updates_per_agent_update = agent_params['num_actor_updates_per_agent_update']
18 |         self.device = agent_params['device']
19 | 
20 |         self.gamma = self.agent_params['gamma']
21 |         self.standardize_advantages = self.agent_params['standardize_advantages']
22 | 
23 |         self.actor = MLPPolicyAC(self.agent_params['ac_dim'],
24 |                                self.agent_params['ob_dim'],
25 |                                self.agent_params['n_layers'],
26 |                                self.agent_params['size'],
27 |                                self.agent_params['device'],
28 |                                discrete=self.agent_params['discrete'],
29 |                                learning_rate=self.agent_params['learning_rate'],
30 |                                )
31 |         self.critic = BootstrappedContinuousCritic(self.agent_params)
32 | 
33 |         self.replay_buffer = ReplayBuffer()
34 | 
35 |     def estimate_advantage(self, ob_no, next_ob_no, re_n, terminal_n):
36 |         ob, next_ob, rew, done = map(lambda x: torch.from_numpy(x).to(self.device), [ob_no, next_ob_no, re_n, terminal_n])
37 | 
38 |         # TODO Implement the following pseudocode:
39 |             # 1) query the critic with ob_no, to get V(s)
40 |             # 2) query the critic with next_ob_no, to get V(s')
41 |             # 3) estimate the Q value as Q(s, a) = r(s, a) + gamma*V(s')
42 |             # HINT: Remember to cut off the V(s') term (ie set it to 0) at terminal states (ie terminal_n=1)
43 |             # 4) calculate advantage (adv_n) as A(s, a) = Q(s, a) - V(s)
44 | 
45 |         value = self.critic.value_func(ob)
46 |         next_value = self.critic.value_func(next_ob).squeeze() * (1 - done)
47 |         adv_n = rew + (self.gamma * next_value) - value
48 |         adv_n = adv_n.cpu().detach().numpy()
49 | 
50 |         if self.standardize_advantages:
51 |             adv_n = (adv_n - np.mean(adv_n)) / (np.std(adv_n) + 1e-8)
52 |         return adv_n
53 | 
54 |     def train(self, ob_no, ac_na, re_n, next_ob_no, terminal_n):
55 | 
56 |         # TODO Implement the following pseudocode:
57 |             # for agent_params['num_critic_updates_per_agent_update'] steps,
58 |             #     update the critic
59 | 
60 |             # advantage = estimate_advantage(...)
61 | 
62 |             # for agent_params['num_actor_updates_per_agent_update'] steps,
63 |             #     update the actor
64 | 
65 |         loss = OrderedDict()
66 | 
67 |         for critic_update in range(self.num_critic_updates_per_agent_update):
68 |             loss['Critic_Loss'] = self.critic.update(ob_no, next_ob_no, re_n, terminal_n)
69 | 
70 |         adv_n = self.estimate_advantage(ob_no, next_ob_no, re_n, terminal_n) # put final critic loss here
71 | 
72 |         for actor_update in range(self.num_actor_updates_per_agent_update):
73 |             loss['Actor_Loss'] = self.actor.update(ob_no, ac_na, adv_n)  # put final actor loss here
74 | 
75 |         return loss
76 | 
77 |     def add_to_replay_buffer(self, paths):
78 |         self.replay_buffer.add_rollouts(paths)
79 | 
80 |     def sample(self, batch_size):
81 |         return self.replay_buffer.sample_recent_data(batch_size)
82 | 


--------------------------------------------------------------------------------
/hw3/cs285/critics/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw3/cs285/critics/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/critics/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/critics/__pycache__/bootstrapped_continuous_critic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/critics/__pycache__/bootstrapped_continuous_critic.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/critics/__pycache__/dqn_critic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/critics/__pycache__/dqn_critic.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/critics/bootstrapped_continuous_critic.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from cs285.infrastructure.models import MLP
 4 | 
 5 | class BootstrappedContinuousCritic:
 6 |     def __init__(self, hparams):
 7 |         self.ob_dim = hparams['ob_dim']
 8 |         self.ac_dim = hparams['ac_dim']
 9 |         self.size = hparams['size']
10 |         self.n_layers = hparams['n_layers']
11 |         self.device = hparams['device']
12 |         self.learning_rate = hparams['learning_rate']
13 |         self.num_target_updates = hparams['num_target_updates']
14 |         self.num_grad_steps_per_target_update = hparams['num_grad_steps_per_target_update']
15 |         self.gamma = hparams['gamma']
16 | 
17 |         self.value_func = MLP(1, self.ob_dim, self.n_layers, self.size, self.device, True)
18 |         # TODO: use the Adam optimizer to optimize the loss
19 |         self.optimizer = torch.optim.Adam(self.value_func.parameters(), lr = self.learning_rate)
20 | 
21 |     def update(self, ob_no, next_ob_no, re_n, terminal_n):
22 |         """
23 |             Update the parameters of the critic.
24 | 
25 |             let sum_of_path_lengths be the sum of the lengths of the sampled paths
26 |             let num_paths be the number of sampled paths
27 | 
28 |             arguments:
29 |                 ob_no: shape: (sum_of_path_lengths, ob_dim)
30 |                 next_ob_no: shape: (sum_of_path_lengths, ob_dim). The observation after taking one step forward
31 |                 re_n: length: sum_of_path_lengths. Each element in re_n is a scalar containing
32 |                     the reward for each timestep
33 |                 terminal_n: length: sum_of_path_lengths. Each element in terminal_n is either 1 if the episode ended
34 |                     at that timestep of 0 if the episode did not end
35 | 
36 |             returns:
37 |                 loss
38 |         """
39 | 
40 |         # TODO: Implement the pseudocode below:
41 | 
42 |         # do the following (self.num_grad_steps_per_target_update * self.num_target_updates) times:
43 |             # every self.num_grad_steps_per_target_update steps (which includes the first step),
44 |                 # recompute the target values by
45 |                     #a) calculating V(s') by querying this critic network (ie calling 'forward') with next_ob_no
46 |                     #b) and computing the target values as r(s, a) + gamma * V(s')
47 |                 # HINT: don't forget to use terminal_n to cut off the V(s') (ie set it to 0) when a terminal state is reached
48 |             # every time,
49 |                 # update this critic using the observations and targets
50 |                 # HINT: use nn.MSE()
51 | 
52 |         ob, next_ob, rew, done = map(lambda x: torch.from_numpy(x).to(self.device), [ob_no, next_ob_no, re_n, terminal_n])
53 | 
54 |         for update in range(self.num_grad_steps_per_target_update * self.num_target_updates):
55 |             if update % self.num_grad_steps_per_target_update == 0:
56 |                 next_value = self.value_func(next_ob).squeeze() * (1 - done)
57 |                 target_value = rew + self.gamma * next_value
58 | 
59 |             self.optimizer.zero_grad()
60 |             loss = nn.functional.mse_loss(self.value_func(ob).squeeze(), target_value)
61 |             loss.backward()
62 |             self.optimizer.step()
63 |             target_value.detach_()
64 | 
65 |         return loss
66 | 


--------------------------------------------------------------------------------
/hw3/cs285/critics/dqn_critic.py:
--------------------------------------------------------------------------------
 1 | from cs285.infrastructure.models import *
 2 | import torch
 3 | from torch import nn
 4 | 
 5 | class DQNCritic:
 6 |     def __init__(self, hparams, optimizer_spec, **kwargs):
 7 |         super().__init__(**kwargs)
 8 |         self.env_name = hparams['env_name']
 9 |         self.device = hparams['device']
10 |         self.ob_dim = hparams['ob_dim']
11 | 
12 |         if isinstance(self.ob_dim, int):
13 |             self.input_shape = self.ob_dim
14 |         else:
15 |             self.input_shape = hparams['input_shape']
16 | 
17 |         self.ac_dim = hparams['ac_dim']
18 |         self.double_q = hparams['double_q']
19 |         self.grad_norm_clipping = hparams['grad_norm_clipping']
20 |         self.gamma = hparams['gamma']
21 | 
22 |         self.optimizer_spec = optimizer_spec
23 | 
24 |         if self.env_name == 'LunarLander-v2':
25 |             self.Q_func = LL_DQN(self.ac_dim, self.input_shape, self.device)
26 |             self.target_Q_func = LL_DQN(self.ac_dim, self.input_shape, self.device)
27 | 
28 |         elif self.env_name == 'PongNoFrameskip-v4':
29 |             self.Q_func = atari_DQN(self.ac_dim, self.input_shape, self.device)
30 |             self.target_Q_func = atari_DQN(self.ac_dim, self.input_shape, self.device)
31 | 
32 |         else: raise NotImplementedError
33 | 
34 |         self.optimizer = self.optimizer_spec.constructor(self.Q_func.parameters(), lr = 1, **self.optimizer_spec.kwargs)
35 |         self.lr_scheduler = torch.optim.lr_scheduler.LambdaLR(self.optimizer, self.optimizer_spec.lr_schedule)
36 | 
37 |     def get_loss(self, ob_no, ac_na, re_n, next_ob_no, terminal_n):
38 |         ob, ac, rew, next_ob, done = map(lambda x: torch.from_numpy(x).to(self.device), [ob_no, ac_na, re_n, next_ob_no, terminal_n])
39 | 
40 |         with torch.no_grad():
41 |             if self.double_q:
42 |                 max_ac = self.Q_func(next_ob).argmax(-1, True)
43 |             else:
44 |                 max_ac = self.target_Q_func(next_ob).argmax(-1, True)
45 | 
46 |         curr_Q = self.Q_func(ob).gather(-1, ac.long().view(-1, 1)).squeeze()
47 |         best_next_Q = self.target_Q_func(next_ob).gather(-1, max_ac).squeeze()
48 |         calc_Q = rew + (self.gamma * best_next_Q * (1 - done))
49 | 
50 |         return nn.functional.smooth_l1_loss(curr_Q, calc_Q) #Huber Loss
51 | 
52 | 
53 |     def update(self, ob_no, ac_na, re_n, next_ob_no, terminal_n):
54 |         self.optimizer.zero_grad()
55 | 
56 |         loss = self.get_loss(ob_no, ac_na, re_n, next_ob_no, terminal_n)
57 |         loss.backward()
58 | 
59 |         nn.utils.clip_grad_norm_(self.Q_func.parameters(), max_norm = self.grad_norm_clipping) #perform grad clipping
60 |         self.optimizer.step() #take step with optimizer
61 |         self.lr_scheduler.step() #move forward learning rate
62 | 
63 |         return loss
64 | 


--------------------------------------------------------------------------------
/hw3/cs285/infrastructure/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw3/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/infrastructure/__pycache__/atari_wrappers.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/atari_wrappers.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/infrastructure/__pycache__/dqn_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/dqn_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/infrastructure/__pycache__/logger.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/logger.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/infrastructure/__pycache__/models.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/models.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/infrastructure/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/infrastructure/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/infrastructure/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from tensorboardX import SummaryWriter
 3 | import numpy as np
 4 | 
 5 | class Logger:
 6 |     def __init__(self, log_dir, n_logged_samples=10, summary_writer=None):
 7 |         self._log_dir = log_dir
 8 |         print('########################')
 9 |         print('logging outputs to ', log_dir)
10 |         print('########################')
11 |         self._n_logged_samples = n_logged_samples
12 |         self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1)
13 | 
14 |     def log_scalar(self, scalar, name, step_):
15 |         self._summ_writer.add_scalar('{}'.format(name), scalar, step_)
16 | 
17 |     def log_scalars(self, scalar_dict, group_name, step, phase):
18 |         """Will log all scalars in the same plot."""
19 |         self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step)
20 | 
21 |     def log_image(self, image, name, step):
22 |         assert(len(image.shape) == 3)  # [C, H, W]
23 |         self._summ_writer.add_image('{}'.format(name), image, step)
24 | 
25 |     def log_video(self, video_frames, name, step, fps=10):
26 |         assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!"
27 |         self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps)
28 | 
29 |     def log_paths_as_videos(self, paths, step, max_videos_to_save=2, fps=10, video_title='video'):
30 | 
31 |         # reshape the rollouts
32 |         videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths]
33 | 
34 |         # max rollout length
35 |         max_videos_to_save = np.min([max_videos_to_save, len(videos)])
36 |         max_length = videos[0].shape[0]
37 |         for i in range(max_videos_to_save):
38 |             if videos[i].shape[0]>max_length:
39 |                 max_length = videos[i].shape[0]
40 | 
41 |         # pad rollouts to all be same length
42 |         for i in range(max_videos_to_save):
43 |             if videos[i].shape[0]<max_length:
44 |                 padding = np.tile([videos[i][-1]], (max_length-videos[i].shape[0],1,1,1))
45 |                 videos[i] = np.concatenate([videos[i], padding], 0)
46 | 
47 |         # log videos to tensorboard event file
48 |         videos = np.stack(videos[:max_videos_to_save], 0)
49 |         self.log_video(videos, video_title, step, fps=fps)
50 | 
51 |     def log_figures(self, figure, name, step, phase):
52 |         """figure: matplotlib.pyplot figure handle"""
53 |         assert figure.shape[0] > 0, "Figure logging requires input shape [batch x figures]!"
54 |         self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
55 | 
56 |     def log_figure(self, figure, name, step, phase):
57 |         """figure: matplotlib.pyplot figure handle"""
58 |         self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
59 | 
60 |     def log_graph(self, array, name, step, phase):
61 |         """figure: matplotlib.pyplot figure handle"""
62 |         im = plot_graph(array)
63 |         self._summ_writer.add_image('{}_{}'.format(name, phase), im, step)
64 | 
65 |     def dump_scalars(self, log_path=None):
66 |         log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path
67 |         self._summ_writer.export_scalars_to_json(log_path)
68 | 
69 |     def flush(self):
70 |         self._summ_writer.flush()
71 | 
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/hw3/cs285/infrastructure/models.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | class MLP(nn.Module):
 5 |     def __init__(self,
 6 |         ac_dim,
 7 |         ob_dim,
 8 |         n_layers,
 9 |         size,
10 |         device,
11 |         discrete,
12 |         activation = nn.Tanh()):
13 |         super().__init__()
14 | 
15 |         self.discrete = discrete
16 | 
17 |         # network architecture
18 |         self.mlp = nn.ModuleList()
19 |         self.mlp.append(nn.Linear(ob_dim, size)) #first hidden layer
20 |         self.mlp.append(activation)
21 | 
22 |         for h in range(n_layers - 1): #additional hidden layers
23 |             self.mlp.append(nn.Linear(size, size))
24 |             self.mlp.append(activation)
25 | 
26 |         self.mlp.append(nn.Linear(size, ac_dim)) #output layer, no activation function
27 | 
28 |         #if continuous define logstd variable
29 |         if not self.discrete:
30 |             self.logstd = nn.Parameter(torch.zeros(ac_dim))
31 | 
32 |         self.to(device)
33 | 
34 |     def forward(self, x):
35 |         for layer in self.mlp:
36 |             x = layer(x)
37 |         if self.discrete:
38 |             return x
39 |         else:
40 |             return (x, self.logstd.exp())
41 | 
42 |     def save(self, filepath):
43 |         torch.save(self.state_dict(), filepath)
44 | 
45 |     def restore(self, filepath):
46 |         self.load_state_dict(torch.load(filepath))
47 | 
48 | class LL_DQN(MLP):
49 |     def __init__(self, ac_dim, ob_dim, device):
50 |         super().__init__(ac_dim, ob_dim, 2, 64, device, True, nn.ReLU())
51 | 
52 | class atari_DQN(nn.Module):
53 |     def __init__(self, ac_dim, ob_dim, device):
54 |         super().__init__()
55 | 
56 |         self.convnet = nn.Sequential(
57 |             nn.Conv2d(ob_dim[2], 32, 8, stride = 4),
58 |             nn.ReLU(True),
59 |             nn.Conv2d(32, 64, 4, stride = 2),
60 |             nn.ReLU(True),
61 |             nn.Conv2d(64, 64, 3, stride = 1),
62 |             nn.ReLU(True),
63 |         )
64 |         self.action_value = nn.Sequential(
65 |             nn.Linear(7 * 7 * 64, 512),
66 |             nn.ReLU(True),
67 |             nn.Linear(512, ac_dim),
68 |         )
69 |         self.to(device)
70 | 
71 |     def forward(self, obs):
72 |         out = obs.float() / 255
73 |         out = out.permute(0, 3, 1, 2) #reshape to [batch size, channels, height, width]
74 |         out = self.convnet(out)
75 |         out = out.reshape(out.size(0), -1)
76 |         out = self.action_value(out)
77 |         return out
78 | 
79 |     def save(self, filepath):
80 |         torch.save(self.state_dict(), filepath)
81 | 
82 |     def restore(self, filepath):
83 |         self.load_state_dict(torch.load(filepath))
84 | 


--------------------------------------------------------------------------------
/hw3/cs285/infrastructure/replay_buffer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | from cs285.infrastructure.utils import *
 4 | 
 5 | class ReplayBuffer(object):
 6 | 
 7 |     def __init__(self, max_size=1000000):
 8 | 
 9 |         self.max_size = max_size
10 |         self.paths = []
11 |         self.obs = None
12 |         self.acs = None
13 |         self.concatenated_rews = None
14 |         self.unconcatenated_rews = None
15 |         self.next_obs = None
16 |         self.terminals = None
17 | 
18 |     def add_rollouts(self, paths):
19 | 
20 |         # add new rollouts into our list of rollouts
21 |         for path in paths:
22 |             self.paths.append(path)
23 | 
24 |         # convert new rollouts into their component arrays, and append them onto our arrays
25 |         observations, actions, next_observations, terminals, concatenated_rews, unconcatenated_rews = convert_listofrollouts(paths)
26 | 
27 |         if self.obs is None:
28 |             self.obs = observations[-self.max_size:]
29 |             self.acs = actions[-self.max_size:]
30 |             self.next_obs = next_observations[-self.max_size:]
31 |             self.terminals = terminals[-self.max_size:]
32 |             self.concatenated_rews = concatenated_rews[-self.max_size:]
33 |             self.unconcatenated_rews = unconcatenated_rews[-self.max_size:]
34 |         else:
35 |             self.obs = np.concatenate([self.obs, observations])[-self.max_size:]
36 |             self.acs = np.concatenate([self.acs, actions])[-self.max_size:]
37 |             self.next_obs = np.concatenate([self.next_obs, next_observations])[-self.max_size:]
38 |             self.terminals = np.concatenate([self.terminals, terminals])[-self.max_size:]
39 |             self.concatenated_rews = np.concatenate([self.concatenated_rews, concatenated_rews])[-self.max_size:]
40 |             if isinstance(unconcatenated_rews, list):
41 |                 self.unconcatenated_rews += unconcatenated_rews
42 |             else:
43 |                 self.unconcatenated_rews.append(unconcatenated_rews)
44 | 
45 |     ########################################
46 |     ########################################
47 | 
48 |     def sample_random_rollouts(self, num_rollouts):
49 |         rand_indices = np.random.permutation(len(self.paths))[:num_rollouts]
50 |         return self.paths[rand_indices]
51 | 
52 |     def sample_recent_rollouts(self, num_rollouts=1):
53 |         return self.paths[-num_rollouts:]
54 | 
55 |     ########################################
56 |     ########################################
57 | 
58 |     def sample_random_data(self, batch_size):
59 | 
60 |         assert self.obs.shape[0] == self.acs.shape[0] == self.concatenated_rews.shape[0] == self.next_obs.shape[0] == self.terminals.shape[0]
61 |         rand_indices = np.random.permutation(self.obs.shape[0])[:batch_size]
62 |         return self.obs[rand_indices], self.acs[rand_indices], self.concatenated_rews[rand_indices], self.next_obs[rand_indices], self.terminals[rand_indices]
63 | 
64 |     def sample_recent_data(self, batch_size=1, concat_rew=True):
65 | 
66 |         if concat_rew:
67 |             return self.obs[-batch_size:], self.acs[-batch_size:], self.concatenated_rews[-batch_size:], self.next_obs[-batch_size:], self.terminals[-batch_size:]
68 |         else:
69 |             num_recent_rollouts_to_return = 0
70 |             num_datapoints_so_far = 0
71 |             index = -1
72 |             while num_datapoints_so_far < batch_size:
73 |                 recent_rollout = self.paths[index]
74 |                 index -=1
75 |                 num_recent_rollouts_to_return +=1
76 |                 num_datapoints_so_far += get_pathlength(recent_rollout)
77 |             rollouts_to_return = self.paths[-num_recent_rollouts_to_return:]
78 |             observations, actions, next_observations, terminals, concatenated_rews, unconcatenated_rews = convert_listofrollouts(rollouts_to_return)
79 |             return observations, actions, unconcatenated_rews, next_observations, terminals


--------------------------------------------------------------------------------
/hw3/cs285/policies/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw3/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/policies/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/policies/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/policies/__pycache__/argmax_policy.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285/policies/__pycache__/argmax_policy.cpython-37.pyc


--------------------------------------------------------------------------------
/hw3/cs285/policies/argmax_policy.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | class ArgMaxPolicy:
 4 | 
 5 |     def __init__(self, critic, device):
 6 |         self.critic = critic
 7 |         self.device = device
 8 | 
 9 |     def get_action(self, obs):
10 |         if len(obs.shape) > 1:
11 |             observation = torch.tensor(obs).to(self.device)
12 |         else:
13 |             observation = torch.tensor(obs[None]).to(self.device)
14 |         # TODO: pass observation to critic and use argmax of the resulting Q values as the action
15 |         return self.critic.Q_func(observation).squeeze().argmax().item()
16 | 


--------------------------------------------------------------------------------
/hw3/cs285_hw3.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/cs285_hw3.pdf


--------------------------------------------------------------------------------
/hw3/requirements.txt:
--------------------------------------------------------------------------------
1 | gym==0.10.11
2 | mujoco-py==1.50.1.35
3 | matplotlib==2.2.2
4 | ipython==6.4.0
5 | moviepy==1.0.0
6 | box2d-py


--------------------------------------------------------------------------------
/hw3/results/Q1/dqn_double_q_test_pong_PongNoFrameskip-v4_06-07-2020_01-47-43/events.out.tfevents.1594014463.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q1/dqn_double_q_test_pong_PongNoFrameskip-v4_06-07-2020_01-47-43/events.out.tfevents.1594014463.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q1/dqn_double_q_test_pong_PongNoFrameskip-v4_06-07-2020_01-47-43/openaigym.video.0.15504.video004000.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q1/dqn_double_q_test_pong_PongNoFrameskip-v4_06-07-2020_01-47-43/openaigym.video.0.15504.video004000.mp4


--------------------------------------------------------------------------------
/hw3/results/Q1/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/events.out.tfevents.1593891399.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q1/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/events.out.tfevents.1593891399.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q1/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/openaigym.video.0.15892.video004000.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q1/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/openaigym.video.0.15892.video004000.mp4


--------------------------------------------------------------------------------
/hw3/results/Q1/pong-comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q1/pong-comp.png


--------------------------------------------------------------------------------
/hw3/results/Q2/double_dqn/dqn_double_q_double_dqn_ll_seed1_LunarLander-v2_04-07-2020_21-47-22/events.out.tfevents.1593913642.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/double_dqn/dqn_double_q_double_dqn_ll_seed1_LunarLander-v2_04-07-2020_21-47-22/events.out.tfevents.1593913642.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q2/double_dqn/dqn_double_q_double_dqn_ll_seed2_LunarLander-v2_04-07-2020_21-47-30/events.out.tfevents.1593913650.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/double_dqn/dqn_double_q_double_dqn_ll_seed2_LunarLander-v2_04-07-2020_21-47-30/events.out.tfevents.1593913650.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q2/double_dqn/dqn_double_q_double_dqn_ll_seed3_LunarLander-v2_04-07-2020_21-47-38/events.out.tfevents.1593913658.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/double_dqn/dqn_double_q_double_dqn_ll_seed3_LunarLander-v2_04-07-2020_21-47-38/events.out.tfevents.1593913658.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q2/double_dqn_eval_ll.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/double_dqn_eval_ll.png


--------------------------------------------------------------------------------
/hw3/results/Q2/dqn/dqn_dqn_ll_seed1_LunarLander-v2_04-07-2020_21-46-58/events.out.tfevents.1593913618.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/dqn/dqn_dqn_ll_seed1_LunarLander-v2_04-07-2020_21-46-58/events.out.tfevents.1593913618.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q2/dqn/dqn_dqn_ll_seed2_LunarLander-v2_04-07-2020_21-47-06/events.out.tfevents.1593913626.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/dqn/dqn_dqn_ll_seed2_LunarLander-v2_04-07-2020_21-47-06/events.out.tfevents.1593913626.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q2/dqn/dqn_dqn_ll_seed3_LunarLander-v2_04-07-2020_21-47-15/events.out.tfevents.1593913635.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q2/dqn/dqn_dqn_ll_seed3_LunarLander-v2_04-07-2020_21-47-15/events.out.tfevents.1593913635.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/events.out.tfevents.1593891399.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/events.out.tfevents.1593891399.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/openaigym.video.0.15892.video004000.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/openaigym.video.0.15892.video004000.mp4


--------------------------------------------------------------------------------
/hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_06-07-2020_01-46-09/events.out.tfevents.1594014369.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_06-07-2020_01-46-09/events.out.tfevents.1594014369.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_06-07-2020_01-46-09/openaigym.video.0.14904.video005000.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-init/dqn_test_pong_PongNoFrameskip-v4_06-07-2020_01-46-09/openaigym.video.0.14904.video005000.mp4


--------------------------------------------------------------------------------
/hw3/results/Q3-init/init_pong_comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-init/init_pong_comp.png


--------------------------------------------------------------------------------
/hw3/results/Q3-lrmult/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/events.out.tfevents.1593891399.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/events.out.tfevents.1593891399.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q3-lrmult/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/openaigym.video.0.15892.video004000.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/dqn_test_pong_PongNoFrameskip-v4_04-07-2020_15-36-39/openaigym.video.0.15892.video004000.mp4


--------------------------------------------------------------------------------
/hw3/results/Q3-lrmult/dqn_test_pong_lrmult0.5_PongNoFrameskip-v4_06-07-2020_01-51-13/events.out.tfevents.1594014673.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/dqn_test_pong_lrmult0.5_PongNoFrameskip-v4_06-07-2020_01-51-13/events.out.tfevents.1594014673.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q3-lrmult/dqn_test_pong_lrmult0.5_PongNoFrameskip-v4_06-07-2020_01-51-13/openaigym.video.0.12644.video004000.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/dqn_test_pong_lrmult0.5_PongNoFrameskip-v4_06-07-2020_01-51-13/openaigym.video.0.12644.video004000.mp4


--------------------------------------------------------------------------------
/hw3/results/Q3-lrmult/dqn_test_pong_lrmult2.0_PongNoFrameskip-v4_06-07-2020_01-51-32/events.out.tfevents.1594014692.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/dqn_test_pong_lrmult2.0_PongNoFrameskip-v4_06-07-2020_01-51-32/events.out.tfevents.1594014692.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q3-lrmult/dqn_test_pong_lrmult2.0_PongNoFrameskip-v4_06-07-2020_01-51-32/openaigym.video.0.9872.video004000.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/dqn_test_pong_lrmult2.0_PongNoFrameskip-v4_06-07-2020_01-51-32/openaigym.video.0.9872.video004000.mp4


--------------------------------------------------------------------------------
/hw3/results/Q3-lrmult/lrmult_pong_comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q3-lrmult/lrmult_pong_comp.png


--------------------------------------------------------------------------------
/hw3/results/Q4-1-100/5-seed-1-100.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-1-100/5-seed-1-100.png


--------------------------------------------------------------------------------
/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_14-59-37/events.out.tfevents.1593975577.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_14-59-37/events.out.tfevents.1593975577.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-14-01/events.out.tfevents.1593980041.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-14-01/events.out.tfevents.1593980041.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-20-04/events.out.tfevents.1593980404.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-20-04/events.out.tfevents.1593980404.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-23-26/events.out.tfevents.1593980606.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-23-26/events.out.tfevents.1593980606.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-27-20/events.out.tfevents.1593980840.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-1-100/ac_1_100_CartPole-v0_05-07-2020_16-27-20/events.out.tfevents.1593980840.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4-10-10/5-seed-10-10.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-10-10/5-seed-10-10.png


--------------------------------------------------------------------------------
/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-12-48/events.out.tfevents.1593976368.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-12-48/events.out.tfevents.1593976368.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-47-19/events.out.tfevents.1593978439.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-47-19/events.out.tfevents.1593978439.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-49-24/events.out.tfevents.1593978564.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-49-24/events.out.tfevents.1593978564.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-59-55/events.out.tfevents.1593979195.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_15-59-55/events.out.tfevents.1593979195.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_16-02-28/events.out.tfevents.1593979348.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4-10-10/ac_10_10_CartPole-v0_05-07-2020_16-02-28/events.out.tfevents.1593979348.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4/ac_100_1_CartPole-v0_05-07-2020_14-54-21/events.out.tfevents.1593975261.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4/ac_100_1_CartPole-v0_05-07-2020_14-54-21/events.out.tfevents.1593975261.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4/ac_10_10_CartPole-v0_05-07-2020_15-12-48/events.out.tfevents.1593976368.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4/ac_10_10_CartPole-v0_05-07-2020_15-12-48/events.out.tfevents.1593976368.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4/ac_1_100_CartPole-v0_05-07-2020_14-59-37/events.out.tfevents.1593975577.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4/ac_1_100_CartPole-v0_05-07-2020_14-59-37/events.out.tfevents.1593975577.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4/ac_1_1_CartPole-v0_05-07-2020_14-51-47/events.out.tfevents.1593975107.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4/ac_1_1_CartPole-v0_05-07-2020_14-51-47/events.out.tfevents.1593975107.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q4/q4-comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q4/q4-comp.png


--------------------------------------------------------------------------------
/hw3/results/Q5-HC/ac_10_10_HalfCheetah-v2_05-07-2020_18-28-51/events.out.tfevents.1593988131.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q5-HC/ac_10_10_HalfCheetah-v2_05-07-2020_18-28-51/events.out.tfevents.1593988131.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q5-HC/hc_comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q5-HC/hc_comp.png


--------------------------------------------------------------------------------
/hw3/results/Q5-HC/pg_hc_b30000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_11-45-56/events.out.tfevents.1593531956.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q5-HC/pg_hc_b30000_lr0.02_nnbaseline_HalfCheetah-v2_30-06-2020_11-45-56/events.out.tfevents.1593531956.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q5-IP/IP_ac_eval.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q5-IP/IP_ac_eval.png


--------------------------------------------------------------------------------
/hw3/results/Q5-IP/ac_10_10_InvertedPendulum-v2_05-07-2020_18-28-19/events.out.tfevents.1593988099.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q5-IP/ac_10_10_InvertedPendulum-v2_05-07-2020_18-28-19/events.out.tfevents.1593988099.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/results/Q5-IP/ac_b30k_10_10_InvertedPendulum-v2_05-07-2020_19-45-39/events.out.tfevents.1593992739.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw3/results/Q5-IP/ac_b30k_10_10_InvertedPendulum-v2_05-07-2020_19-45-39/events.out.tfevents.1593992739.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw3/setup.py:
--------------------------------------------------------------------------------
1 | # setup.py
2 | from setuptools import setup
3 | 
4 | setup(
5 |     name='cs285',
6 |     version='0.1.0',
7 |     packages=['cs285'],
8 | )


--------------------------------------------------------------------------------
/hw4/README.txt:
--------------------------------------------------------------------------------
 1 | 1) See hw1 if you'd like to see installation instructions. You do NOT have to redo them.
 2 | 
 3 | ##############################################
 4 | ##############################################
 5 | 
 6 | 
 7 | 2) Code:
 8 | 
 9 | -------------------------------------------
10 | 
11 | Files to look at, even though there are no explicit 'TODO' markings:
12 | - scripts/run_hw4_mb.py
13 | - infrastructure/rl_trainer.py
14 | 
15 | -------------------------------------------
16 | 
17 | Blanks to be filled in now (for this assignment) are marked with 'TODO'
18 | 
19 | The following files have these:
20 | - agents/mb_agent.py
21 | - models/ff_model.py
22 | - policies/MPC_policy.py
23 | - infrastructure/utils.py
24 | 
25 | ##############################################
26 | ##############################################
27 | 
28 | 
29 | 3) Commands: 
30 | 
31 | Please refer to the PDF for the specific commands needed for different questions. 
32 | 
33 | ##############################################
34 | 
35 | 
36 | 4) Visualize saved tensorboard event file:
37 | 
38 | $ cd cs285/data/<your_log_dir>
39 | $ tensorboard --logdir .
40 | 
41 | Then, navigate to shown url to see scalar summaries as plots (in 'scalar' tab), as well as videos (in 'images' tab)


--------------------------------------------------------------------------------
/hw4/cs285/agents/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw4/cs285/agents/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/agents/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/agents/__pycache__/mb_agent.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/agents/__pycache__/mb_agent.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from cs285.envs import ant
2 | from cs285.envs import cheetah
3 | from cs285.envs import obstacles
4 | from cs285.envs import reacher


--------------------------------------------------------------------------------
/hw4/cs285/envs/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/ant/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 | 
3 | register(
4 |     id='ant-cs285-v0',
5 |     entry_point='cs285.envs.ant:AntEnv',
6 |     max_episode_steps=1000,
7 | )
8 | from cs285.envs.ant.ant import AntEnv
9 | 


--------------------------------------------------------------------------------
/hw4/cs285/envs/ant/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/ant/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/ant/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/ant/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/ant/__pycache__/ant.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/ant/__pycache__/ant.cpython-35.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/ant/__pycache__/ant.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/ant/__pycache__/ant.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/cheetah/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 | 
3 | register(
4 |     id='cheetah-cs285-v0',
5 |     entry_point='cs285.envs.cheetah:HalfCheetahEnv',
6 |     max_episode_steps=1000,
7 | )
8 | from cs285.envs.cheetah.cheetah import HalfCheetahEnv
9 | 


--------------------------------------------------------------------------------
/hw4/cs285/envs/cheetah/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/cheetah/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/cheetah/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/cheetah/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/cheetah/__pycache__/cheetah.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/cheetah/__pycache__/cheetah.cpython-35.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/cheetah/__pycache__/cheetah.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/cheetah/__pycache__/cheetah.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/obstacles/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 | 
3 | register(
4 |     id='obstacles-cs285-v0',
5 |     entry_point='cs285.envs.obstacles:Obstacles',
6 |     max_episode_steps=500,
7 | )
8 | from cs285.envs.obstacles.obstacles_env import Obstacles
9 | 


--------------------------------------------------------------------------------
/hw4/cs285/envs/obstacles/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/obstacles/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/obstacles/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/obstacles/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/obstacles/__pycache__/obstacles_env.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/obstacles/__pycache__/obstacles_env.cpython-35.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/obstacles/__pycache__/obstacles_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/obstacles/__pycache__/obstacles_env.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/reacher/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 | 
3 | register(
4 |     id='reacher-cs285-v0',
5 |     entry_point='cs285.envs.reacher:Reacher7DOFEnv',
6 |     max_episode_steps=500,
7 | )
8 | from cs285.envs.reacher.reacher_env import Reacher7DOFEnv
9 | 


--------------------------------------------------------------------------------
/hw4/cs285/envs/reacher/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/reacher/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/reacher/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/reacher/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/reacher/__pycache__/reacher_env.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/reacher/__pycache__/reacher_env.cpython-35.pyc


--------------------------------------------------------------------------------
/hw4/cs285/envs/reacher/__pycache__/reacher_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/envs/reacher/__pycache__/reacher_env.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/infrastructure/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw4/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/infrastructure/__pycache__/logger.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/infrastructure/__pycache__/logger.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/infrastructure/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/infrastructure/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/infrastructure/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from tensorboardX import SummaryWriter
 3 | import numpy as np
 4 | 
 5 | class Logger:
 6 |     def __init__(self, log_dir, n_logged_samples=10, summary_writer=None):
 7 |         self._log_dir = log_dir
 8 |         print('########################')
 9 |         print('logging outputs to ', log_dir)
10 |         print('########################')
11 |         self._n_logged_samples = n_logged_samples
12 |         self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1)
13 | 
14 |     def log_scalar(self, scalar, name, step_):
15 |         self._summ_writer.add_scalar('{}'.format(name), scalar, step_)
16 | 
17 |     def log_scalars(self, scalar_dict, group_name, step, phase):
18 |         """Will log all scalars in the same plot."""
19 |         self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step)
20 | 
21 |     def log_image(self, image, name, step):
22 |         assert(len(image.shape) == 3)  # [C, H, W]
23 |         self._summ_writer.add_image('{}'.format(name), image, step)
24 | 
25 |     def log_video(self, video_frames, name, step, fps=10):
26 |         assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!"
27 |         self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps)
28 | 
29 |     def log_paths_as_videos(self, paths, step, max_videos_to_save=2, fps=10, video_title='video'):
30 | 
31 |         # reshape the rollouts
32 |         videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths]
33 | 
34 |         # max rollout length
35 |         max_videos_to_save = np.min([max_videos_to_save, len(videos)])
36 |         max_length = videos[0].shape[0]
37 |         for i in range(max_videos_to_save):
38 |             if videos[i].shape[0]>max_length:
39 |                 max_length = videos[i].shape[0]
40 | 
41 |         # pad rollouts to all be same length
42 |         for i in range(max_videos_to_save):
43 |             if videos[i].shape[0]<max_length:
44 |                 padding = np.tile([videos[i][-1]], (max_length-videos[i].shape[0],1,1,1))
45 |                 videos[i] = np.concatenate([videos[i], padding], 0)
46 | 
47 |         # log videos to tensorboard event file
48 |         videos = np.stack(videos[:max_videos_to_save], 0)
49 |         self.log_video(videos, video_title, step, fps=fps)
50 | 
51 |     def log_figures(self, figure, name, step, phase):
52 |         """figure: matplotlib.pyplot figure handle"""
53 |         assert figure.shape[0] > 0, "Figure logging requires input shape [batch x figures]!"
54 |         self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
55 | 
56 |     def log_figure(self, figure, name, step, phase):
57 |         """figure: matplotlib.pyplot figure handle"""
58 |         self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
59 | 
60 |     def log_graph(self, array, name, step, phase):
61 |         """figure: matplotlib.pyplot figure handle"""
62 |         im = plot_graph(array)
63 |         self._summ_writer.add_image('{}_{}'.format(name, phase), im, step)
64 | 
65 |     def dump_scalars(self, log_path=None):
66 |         log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path
67 |         self._summ_writer.export_scalars_to_json(log_path)
68 | 
69 |     def flush(self):
70 |         self._summ_writer.flush()
71 | 
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/hw4/cs285/models/__pycache__/ff_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/models/__pycache__/ff_model.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/models/ff_model.py:
--------------------------------------------------------------------------------
 1 | from cs285.infrastructure.utils import normalize, unnormalize, MLP
 2 | import numpy as np
 3 | import torch
 4 | from torch import nn
 5 | 
 6 | class FFModel:
 7 |     def __init__(self, ac_dim, ob_dim, n_layers, size, device, learning_rate = 0.001):
 8 |         # init vars
 9 |         self.device = device
10 | 
11 |         #TODO - specify ouput dim and input dim of delta func MLP
12 |         self.delta_func = MLP(input_dim = ob_dim + ac_dim,
13 |                               output_dim = ob_dim,
14 |                               n_layers = n_layers,
15 |                               size = size,
16 |                               device = self.device,
17 |                               discrete = True)
18 | 
19 |         #TODO - define the delta func optimizer. Adam optimizer will work well.
20 |         self.optimizer = torch.optim.Adam(self.delta_func.parameters(), lr = learning_rate)
21 | 
22 |     #############################
23 | 
24 |     def get_prediction(self, obs, acs, data_statistics):
25 |         if len(obs.shape) == 1 or len(acs.shape) == 1:
26 |             obs = np.squeeze(obs)[None]
27 |             acs = np.squeeze(acs)[None]
28 | 
29 |         norm_obs = normalize(obs, data_statistics['obs_mean'], data_statistics['obs_std'])
30 |         norm_acs = normalize(acs, data_statistics['acs_mean'], data_statistics['acs_std'])
31 | 
32 |         norm_input = torch.Tensor(np.concatenate((norm_obs, norm_acs), axis = 1)).to(self.device)
33 |         norm_delta = self.delta_func(norm_input).cpu().detach().numpy()
34 | 
35 |         delta = unnormalize(norm_delta, data_statistics['delta_mean'], data_statistics['delta_std'])
36 |         return obs + delta
37 | 
38 |     def update(self, observations, actions, next_observations, data_statistics):
39 | 
40 |         norm_obs = normalize(np.squeeze(observations), data_statistics['obs_mean'], data_statistics['obs_std'])
41 |         norm_acs = normalize(np.squeeze(actions), data_statistics['acs_mean'], data_statistics['acs_std'])
42 | 
43 |         pred_delta = self.delta_func(torch.Tensor(np.concatenate((norm_obs, norm_acs), axis = 1)).to(self.device))
44 |         true_delta = torch.Tensor(normalize(next_observations - observations, data_statistics['delta_mean'], data_statistics['delta_std'])).to(self.device)
45 | 
46 |         # TODO(Q1) Define a loss function that takes as input normalized versions of predicted change in state and true change in state
47 |         loss = nn.functional.mse_loss(true_delta, pred_delta)
48 |         self.optimizer.zero_grad()
49 |         loss.backward()
50 |         self.optimizer.step()
51 | 
52 |         return loss.item()
53 | 


--------------------------------------------------------------------------------
/hw4/cs285/policies/MPC_policy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | class MPCPolicy:
 4 |     def __init__(self,
 5 |         env,
 6 |         ac_dim,
 7 |         dyn_models,
 8 |         horizon,
 9 |         N,
10 |         **kwargs):
11 |         super().__init__(**kwargs)
12 | 
13 |         # init vars
14 |         self.env = env
15 |         self.dyn_models = dyn_models
16 |         self.horizon = horizon
17 |         self.N = N
18 |         self.data_statistics = None # NOTE must be updated from elsewhere
19 | 
20 |         self.ob_dim = self.env.observation_space.shape[0]
21 | 
22 |         # action space
23 |         self.ac_space = self.env.action_space
24 |         self.ac_dim = ac_dim
25 |         self.low = self.ac_space.low
26 |         self.high = self.ac_space.high
27 | 
28 |     def sample_action_sequences(self, num_sequences, horizon):
29 |         # TODO(Q1) uniformly sample trajectories and return an array of
30 |         # dimensions (num_sequences, horizon, self.ac_dim)
31 | 
32 |         random_action_sequences = np.random.uniform(self.low, self.high, (num_sequences, horizon, self.ac_dim))
33 |         return random_action_sequences
34 | 
35 |     def get_action(self, obs):
36 | 
37 |         if self.data_statistics is None:
38 |             #print("WARNING: performing random actions.")
39 |             return self.sample_action_sequences(num_sequences = 1, horizon = 1)[0, 0]
40 | 
41 |         #sample random actions (Nxhorizon)
42 |         candidate_action_sequences = self.sample_action_sequences(num_sequences=self.N, horizon=self.horizon)
43 | 
44 |         # a list you can use for storing the predicted reward for each candidate sequence
45 |         predicted_rewards_per_ens = []
46 | 
47 |         for model in self.dyn_models:
48 |             # TODO(Q2)
49 |             # for each candidate action sequence, predict a sequence of
50 |             # states for each dynamics model in your ensemble
51 | 
52 |             # once you have a sequence of predicted states from each model in your
53 |             # ensemble, calculate the reward for each sequence using self.env.get_reward (See files in envs to see how to call this)
54 |             sim_obs = np.tile(obs, (self.N, 1))
55 |             model_rewards = np.zeros(self.N)
56 | 
57 |             for t in range(self.horizon):
58 |                 rew, _ = self.env.get_reward(sim_obs, candidate_action_sequences[:, t, :])
59 |                 model_rewards += rew
60 |                 sim_obs = model.get_prediction(sim_obs, candidate_action_sequences[:, t, :], self.data_statistics)
61 |             predicted_rewards_per_ens.append(model_rewards)
62 | 
63 |         # calculate mean_across_ensembles(predicted rewards).
64 |         # the matrix dimensions should change as follows: [ens,N] --> N
65 |         predicted_rewards = np.mean(predicted_rewards_per_ens, axis = 0) # TODO(Q2)
66 | 
67 |         # pick the action sequence and return the 1st element of that sequence
68 |         best_index = np.argmax(predicted_rewards) #TODO(Q2)
69 |         best_action_sequence = candidate_action_sequences[best_index] #TODO(Q2)
70 |         action_to_take = best_action_sequence[0] # TODO(Q2)
71 |         return action_to_take
72 | 


--------------------------------------------------------------------------------
/hw4/cs285/policies/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw4/cs285/policies/__pycache__/MPC_policy.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/policies/__pycache__/MPC_policy.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285/policies/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285/policies/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw4/cs285_hw4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/cs285_hw4.pdf


--------------------------------------------------------------------------------
/hw4/results/problem-1/n500_arch1x32_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-1/n500_arch1x32_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-1/n500_arch1x32_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-1/n500_arch1x32_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-1/n500_arch2x250_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-1/n500_arch2x250_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-1/n500_arch2x250_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-1/n500_arch2x250_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-1/n5_arch2x250_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-1/n5_arch2x250_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-1/n5_arch2x250_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-1/n5_arch2x250_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/events.out.tfevents.1594352996.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/events.out.tfevents.1594352996.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/itr_0_losses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/itr_0_losses.npy


--------------------------------------------------------------------------------
/hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/itr_0_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/itr_0_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/itr_0_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-2/mb_obstacles_singleiteration_obstacles-cs285-v0_09-07-2020_23-49-56/itr_0_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-3-cheetah/cheetah_returns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-cheetah/cheetah_returns.png


--------------------------------------------------------------------------------
/hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/events.out.tfevents.1594417421.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/events.out.tfevents.1594417421.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/itr_0_losses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/itr_0_losses.npy


--------------------------------------------------------------------------------
/hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/itr_0_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/itr_0_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/itr_0_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-cheetah/mb_cheetah_cheetah-cs285-v0_10-07-2020_17-43-41/itr_0_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/events.out.tfevents.1594417429.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/events.out.tfevents.1594417429.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/itr_0_losses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/itr_0_losses.npy


--------------------------------------------------------------------------------
/hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/itr_0_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/itr_0_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/itr_0_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-obstacles/mb_obstacles_obstacles-cs285-v0_10-07-2020_17-43-49/itr_0_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-3-obstacles/obstacles_returns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-obstacles/obstacles_returns.png


--------------------------------------------------------------------------------
/hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/events.out.tfevents.1594417416.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/events.out.tfevents.1594417416.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/itr_0_losses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/itr_0_losses.npy


--------------------------------------------------------------------------------
/hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/itr_0_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/itr_0_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/itr_0_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-reacher/mb_reacher_reacher-cs285-v0_10-07-2020_17-43-36/itr_0_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-3-reacher/reacher_returns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-3-reacher/reacher_returns.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-ensemble/ensemble_comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/ensemble_comp.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/events.out.tfevents.1594364771.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/events.out.tfevents.1594364771.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/itr_0_losses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/itr_0_losses.npy


--------------------------------------------------------------------------------
/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/itr_0_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/itr_0_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/itr_0_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble1_reacher-cs285-v0_10-07-2020_03-06-11/itr_0_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/events.out.tfevents.1594364781.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/events.out.tfevents.1594364781.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/itr_0_losses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/itr_0_losses.npy


--------------------------------------------------------------------------------
/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/itr_0_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/itr_0_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/itr_0_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble3_reacher-cs285-v0_10-07-2020_03-06-21/itr_0_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/events.out.tfevents.1594364793.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/events.out.tfevents.1594364793.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/itr_0_losses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/itr_0_losses.npy


--------------------------------------------------------------------------------
/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/itr_0_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/itr_0_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/itr_0_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-ensemble/mb_q5_reacher_ensemble5_reacher-cs285-v0_10-07-2020_03-06-33/itr_0_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-horizon/horizon_comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/horizon_comp.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/events.out.tfevents.1594364548.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/events.out.tfevents.1594364548.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/itr_0_losses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/itr_0_losses.npy


--------------------------------------------------------------------------------
/hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/itr_0_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/itr_0_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/itr_0_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon15_reacher-cs285-v0_10-07-2020_03-02-28/itr_0_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/events.out.tfevents.1594364556.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/events.out.tfevents.1594364556.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/itr_0_losses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/itr_0_losses.npy


--------------------------------------------------------------------------------
/hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/itr_0_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/itr_0_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/itr_0_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon30_reacher-cs285-v0_10-07-2020_03-02-36/itr_0_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/events.out.tfevents.1594364512.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/events.out.tfevents.1594364512.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/itr_0_losses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/itr_0_losses.npy


--------------------------------------------------------------------------------
/hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/itr_0_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/itr_0_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/itr_0_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-horizon/mb_q5_reacher_horizon5_reacher-cs285-v0_10-07-2020_03-01-52/itr_0_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/events.out.tfevents.1594364663.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/events.out.tfevents.1594364663.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/itr_0_losses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/itr_0_losses.npy


--------------------------------------------------------------------------------
/hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/itr_0_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/itr_0_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/itr_0_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq1000_reacher-cs285-v0_10-07-2020_03-04-23/itr_0_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/events.out.tfevents.1594364620.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/events.out.tfevents.1594364620.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/itr_0_losses.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/itr_0_losses.npy


--------------------------------------------------------------------------------
/hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/itr_0_losses.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/itr_0_losses.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/itr_0_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/mb_q5_reacher_numseq100_reacher-cs285-v0_10-07-2020_03-03-40/itr_0_predictions.png


--------------------------------------------------------------------------------
/hw4/results/problem-4-numseq/numseq_comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw4/results/problem-4-numseq/numseq_comp.png


--------------------------------------------------------------------------------
/hw4/setup.py:
--------------------------------------------------------------------------------
1 | # setup.py
2 | from setuptools import setup
3 | 
4 | setup(
5 |     name='cs285',
6 |     version='0.1.0',
7 |     packages=['cs285'],
8 | )


--------------------------------------------------------------------------------
/hw5/README.md:
--------------------------------------------------------------------------------
 1 | # Section 5 Exploration in RL
 2 | 
 3 | Below is the report for HW5. All data used can be found in the results folder. To view the tensorboard for a specific part navigate to that part's folder and run 
 4 | ```commandline
 5 | tensorboard --logdir .
 6 | ```
 7 | All commands used will be in the README.txt
 8 | 
 9 | ## Problem 1
10 | 
11 | First we run our actor-critic on the pointmass environment with no extra exploration tactic in order to establish a baseline performance:
12 | 
13 | ![No Exploration](results/problem-1-none/no_exploration_returns.png)
14 | 
15 | It can be seen that the agent only managed to find the goal area once out of three tries (with seed 1). It is clear from these trials that the agent would benefit from an exploration bonus.  
16 |   
17 | First the the histogram method is tested across the same three seeds:
18 | 
19 | ![Histogram](results/problem-1-hist/hist_returns.png)
20 | 
21 | With a histogram bonus all three runs are able to find the bonus area, and sooner in the case of the 1 seed. 
22 | 
23 | ## Problem 2
24 | 
25 | Now the Radial Basis Function algorithm is tested in the same environment:
26 | 
27 | ![RBF](results/problem-2/rbf_returns.png)  
28 | 
29 | While the RBF exploration performs better than no bonus it seems to perform worse than the histogram method. Since it is also more computationally expensive it seems that the histogram model is preferable when it is able to be used (small state spaces).
30 | 
31 | ## Problem 3
32 | 
33 | Finally the exemplar model is tested:
34 | 
35 | ![Ex2](results/problem-3/ex2_returns.png)  
36 | 
37 | Once again the exemplar model provides a big boost in performance from no boost, but seems to fall short of the histogram's performance. This makes a lot of sense: the exemplar is really a sort of just an approximation of the histogram. While it is applicable to a far wider range of problems it tends to be less accurate when exact counts can be reasonably kept (when generalization doesn't make sense/is not needed). Interestingly it can be seen that with seed 21 (Red) it found the goal but then forgot about it. This seems to suggest that the exploration was a bit too strong in this case - the bonus steered the agent towards states it hadn't been to in a while instead of the known high reward of the goal.
38 | 
39 | ## Problem 4
40 | 
41 | Now the exemplar model is tested in an environment more suited to its generalization abilities: a sparse Half-Cheetah environment:
42 | 
43 | ![Half Cheetah Comparison](results/problem-4/full_comp.png)  
44 | 
45 | It can be seen that most of the runs failed to develop any sort of rewarding strategies. Interestingly each set of hyperparameters developed only one decent strategy, all on seed 1 (no exploration is in orange, bc0.001 in light blue, bc0.0001 in pink). The two exemplar model runs developed these good strategies much earlier, but also seemed to forget about them. While it is hard to draw conclusions from such a small sample size in an environment with such high variance, it does seem that exploration does not provide a signifigant advantage in this case. 
46 | 


--------------------------------------------------------------------------------
/hw5/README.txt:
--------------------------------------------------------------------------------
 1 | 1) The code structure for this homeowrk was heavily modified in order to match the structure of the previous three homeworks. 
 2 | To this end the PDF does not give the most accurate location instructions but should still be referred to for questions and guidance.
 3 | The logging procedure in particular was changed to match the previous assignments.
 4 | 
 5 | 2) Code:
 6 | 
 7 | Code to look at:
 8 | 
 9 | - scripts/train_ac_exploration_f18.py
10 | - envs/pointmass.py
11 | - infrastructure/rl_trainer.py (Has been changed for this homework)
12 | - infrastructure/utils.py (Has been changed foir this homework)
13 | 
14 | Code to fill in as part of HW:
15 | 
16 | - agents/ac_agent.py (new Exploratory_ACAgent class added)
17 | - exploration/exploration.py
18 | - exploration/density_model.py
19 | 
20 | 3) Commands to run for each problem:
21 | 
22 | ##########################
23 | ### P1 Hist PointMass  ###
24 | ##########################
25 | 
26 | python cs285/scripts/train_ac_exploration_f18.py PointMass-v0 -n 100 -b 1000 -e 3 --density_model none -s 8 --exp_name PM_bc0_s8
27 | python cs285/scripts/train_ac_exploration_f18.py PointMass-v0 -n 100 -b 1000 -e 3 --density_model hist -bc 0.01 -s 8 --exp_name PM_hist_bc0.01_s8
28 | 
29 | ##########################
30 | ###  P2 RBF PointMass  ###
31 | ##########################
32 | 
33 | python cs285/scripts/train_ac_exploration_f18.py PointMass-v0 -n 100 -b 1000 -e 3 --density_model rbf -bc 0.01 -s 8 -sig 0.2 --exp_name PM_rbf_bc0.01_s8_sig0.2
34 | 
35 | ##########################
36 | ###  P3 EX2 PointMass  ###
37 | ##########################
38 | 
39 | python cs285/scripts/train_ac_exploration_f18.py PointMass-v0 -n 100 -b 1000 -e 3 --density_model ex2 -s 8 -bc 0.05 -kl 0.1 -dlr 0.001 -dh 8 --exp_name PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8
40 | 
41 | ###########################
42 | ###    P4 HalfCheetah   ###
43 | ###########################
44 | 
45 | python cs285/scripts/train_ac_exploration_f18.py sparse-cheetah-cs285-v1 -ep 150 --discount 0.9 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 --density_model none --exp_name HC_bc0
46 | python cs285/scripts/train_ac_exploration_f18.py sparse-cheetah-cs285-v1 -ep 150 --discount 0.9 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 --density_model ex2 -bc 0.001 -kl 0.1 -dlr 0.005 -dti 1000 --exp_name HC_bc0.001_kl0.1_dlr0.005_dti1000
47 | python cs285/scripts/train_ac_exploration_f18.py sparse-cheetah-cs285-v1 -ep 150 --discount 0.9 -n 100 -e 3 -l 2 -s 32 -b 30000 -lr 0.02 --density_model ex2 -bc 0.0001 -kl 0.1 -dlr 0.005 -dti 10000 --exp_name HC_bc0.0001_kl0.1_dlr0.005_dti10000
48 | 
49 | 4) Visualize saved tensorboard event file:
50 | 
51 | $ cd cs285/data/<your_log_dir>
52 | $ tensorboard --logdir .
53 | 
54 | Then, navigate to shown url to see scalar summaries as plots (in 'scalar' tab), as well as videos (in 'images' tab)


--------------------------------------------------------------------------------
/hw5/cs285/agents/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw5/cs285/agents/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/agents/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/agents/__pycache__/ac_agent.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/agents/__pycache__/ac_agent.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/critics/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw5/cs285/critics/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/critics/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/critics/__pycache__/bootstrapped_continuous_critic.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/critics/__pycache__/bootstrapped_continuous_critic.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/critics/bootstrapped_continuous_critic.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from cs285.infrastructure.utils import MLP
 4 | 
 5 | class BootstrappedContinuousCritic:
 6 |     def __init__(self, hparams):
 7 |         self.ob_dim = hparams['ob_dim']
 8 |         self.ac_dim = hparams['ac_dim']
 9 |         self.size = hparams['size']
10 |         self.n_layers = hparams['n_layers']
11 |         self.device = hparams['device']
12 |         self.learning_rate = hparams['learning_rate']
13 |         self.num_target_updates = hparams['num_target_updates']
14 |         self.num_grad_steps_per_target_update = hparams['num_grad_steps_per_target_update']
15 |         self.gamma = hparams['gamma']
16 | 
17 |         self.value_func = MLP(self.ob_dim, 1, self.n_layers, self.size, self.device, True)
18 |         self.optimizer = torch.optim.Adam(self.value_func.parameters(), lr = self.learning_rate)
19 | 
20 |     def update(self, ob_no, next_ob_no, re_n, terminal_n):
21 |         '''
22 |         ts_ob_no, ts_next_ob_no, ts_re_n, ts_terminal_n = map(lambda x: torch.Tensor(x).to(self.device),
23 |                                                               [ob_no, next_ob_no, re_n, terminal_n])
24 |         for _ in range(self.num_target_updates):
25 |             with torch.no_grad():
26 |                 ts_next_V_n = self.value_func(ts_next_ob_no).view(-1)
27 |             ts_target_n = ts_re_n + (1 - ts_terminal_n) * self.gamma * ts_next_V_n
28 |             for _ in range(self.num_grad_steps_per_target_update):
29 |                 ts_V_n = self.value_func(ts_ob_no).view(-1)
30 |                 self.optimizer.zero_grad()
31 |                 loss = nn.functional.mse_loss(ts_V_n, ts_target_n)
32 |                 loss.backward()
33 |                 self.optimizer.step()
34 |         '''
35 |         ob, next_ob, rew, done = map(lambda x: torch.Tensor(x).to(self.device), [ob_no, next_ob_no, re_n, terminal_n])
36 | 
37 |         for update in range(self.num_grad_steps_per_target_update * self.num_target_updates):
38 |             if update % self.num_grad_steps_per_target_update == 0:
39 |                 next_value = self.value_func(next_ob).squeeze() * (1 - done)
40 |                 target_value = rew + self.gamma * next_value
41 | 
42 |             self.optimizer.zero_grad()
43 |             loss = nn.functional.mse_loss(self.value_func(ob).squeeze(), target_value)
44 |             loss.backward()
45 |             self.optimizer.step()
46 |             target_value.detach_()
47 |         #'''
48 | 
49 |         return loss
50 | 


--------------------------------------------------------------------------------
/hw5/cs285/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from gym.envs.registration import register
2 | register(
3 |     id='sparse-cheetah-cs285-v1',
4 |     entry_point='cs285.envs.sparse_half_cheetah:HalfCheetahEnv',
5 |     max_episode_steps=1000,
6 | )
7 | from cs285.envs.sparse_half_cheetah import HalfCheetahEnv
8 | 


--------------------------------------------------------------------------------
/hw5/cs285/envs/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/envs/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/envs/__pycache__/pointmass.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/envs/__pycache__/pointmass.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/envs/__pycache__/sparse_half_cheetah.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/envs/__pycache__/sparse_half_cheetah.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/envs/sparse_half_cheetah.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from gym import utils
 3 | from gym.envs.mujoco import mujoco_env
 4 | 
 5 | class HalfCheetahEnv(mujoco_env.MujocoEnv, utils.EzPickle):
 6 |     def __init__(self):
 7 |         mujoco_env.MujocoEnv.__init__(self, 'half_cheetah.xml', 5)
 8 |         utils.EzPickle.__init__(self)
 9 | 
10 |     def step(self, action):
11 |         #################################################
12 |         ctrl = False
13 |         relu = False
14 |         threshold = 10.0
15 |         #################################################
16 |         xposbefore = self.sim.data.qpos[0]
17 |         self.do_simulation(action, self.frame_skip)
18 |         xposafter = self.sim.data.qpos[0]
19 |         ob = self._get_obs()
20 |         # reward_ctrl = - 0.1 * np.square(action).sum()
21 |         # reward_run = (xposafter - xposbefore)/self.dt
22 |         #################################################
23 |         if ctrl:
24 |             reward_ctrl = - 0.1 * np.square(action).sum()
25 |         else:
26 |             reward_ctrl = 0
27 |         if abs(xposafter) <= threshold:
28 |             reward_run = 0.0
29 |         else:
30 |             if relu:
31 |                 reward_run = np.sign(xposafter)*(xposafter - xposbefore)/self.dt
32 |             else:
33 |                 reward_run = 1.0
34 |         #################################################
35 |         reward = reward_ctrl + reward_run
36 |         done = False
37 |         return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
38 | 
39 |     def _get_obs(self):
40 |         return np.concatenate([
41 |             self.sim.data.qpos.flat[1:],
42 |             self.sim.data.qvel.flat,
43 |         ])
44 | 
45 |     def reset_model(self):
46 |         qpos = self.init_qpos + self.np_random.uniform(low=-.1, high=.1, size=self.model.nq)
47 |         qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
48 |         self.set_state(qpos, qvel)
49 |         return self._get_obs()
50 | 
51 |     def viewer_setup(self):
52 |         self.viewer.cam.distance = self.model.stat.extent * 0.5
53 | 


--------------------------------------------------------------------------------
/hw5/cs285/exploration/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw5/cs285/exploration/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/exploration/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/exploration/__pycache__/density_model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/exploration/__pycache__/density_model.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/exploration/__pycache__/exploration.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/exploration/__pycache__/exploration.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/infrastructure/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw5/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/infrastructure/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/infrastructure/__pycache__/logger.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/infrastructure/__pycache__/logger.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/infrastructure/__pycache__/replay.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/infrastructure/__pycache__/replay.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/infrastructure/__pycache__/replay_buffer.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/infrastructure/__pycache__/rl_trainer.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/infrastructure/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/infrastructure/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/infrastructure/logger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from tensorboardX import SummaryWriter
 3 | import numpy as np
 4 | 
 5 | class Logger:
 6 |     def __init__(self, log_dir, n_logged_samples=10, summary_writer=None):
 7 |         self._log_dir = log_dir
 8 |         print('########################')
 9 |         print('logging outputs to ', log_dir)
10 |         print('########################')
11 |         self._n_logged_samples = n_logged_samples
12 |         self._summ_writer = SummaryWriter(log_dir, flush_secs=1, max_queue=1)
13 | 
14 |     def log_scalar(self, scalar, name, step_):
15 |         self._summ_writer.add_scalar('{}'.format(name), scalar, step_)
16 | 
17 |     def log_scalars(self, scalar_dict, group_name, step, phase):
18 |         """Will log all scalars in the same plot."""
19 |         self._summ_writer.add_scalars('{}_{}'.format(group_name, phase), scalar_dict, step)
20 | 
21 |     def log_image(self, image, name, step):
22 |         assert(len(image.shape) == 3)  # [C, H, W]
23 |         self._summ_writer.add_image('{}'.format(name), image, step)
24 | 
25 |     def log_video(self, video_frames, name, step, fps=10):
26 |         assert len(video_frames.shape) == 5, "Need [N, T, C, H, W] input tensor for video logging!"
27 |         self._summ_writer.add_video('{}'.format(name), video_frames, step, fps=fps)
28 | 
29 |     def log_paths_as_videos(self, paths, step, max_videos_to_save=2, fps=10, video_title='video'):
30 | 
31 |         # reshape the rollouts
32 |         videos = [np.transpose(p['image_obs'], [0, 3, 1, 2]) for p in paths]
33 | 
34 |         # max rollout length
35 |         max_videos_to_save = np.min([max_videos_to_save, len(videos)])
36 |         max_length = videos[0].shape[0]
37 |         for i in range(max_videos_to_save):
38 |             if videos[i].shape[0]>max_length:
39 |                 max_length = videos[i].shape[0]
40 | 
41 |         # pad rollouts to all be same length
42 |         for i in range(max_videos_to_save):
43 |             if videos[i].shape[0]<max_length:
44 |                 padding = np.tile([videos[i][-1]], (max_length-videos[i].shape[0],1,1,1))
45 |                 videos[i] = np.concatenate([videos[i], padding], 0)
46 | 
47 |         # log videos to tensorboard event file
48 |         videos = np.stack(videos[:max_videos_to_save], 0)
49 |         self.log_video(videos, video_title, step, fps=fps)
50 | 
51 |     def log_figures(self, figure, name, step, phase):
52 |         """figure: matplotlib.pyplot figure handle"""
53 |         assert figure.shape[0] > 0, "Figure logging requires input shape [batch x figures]!"
54 |         self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
55 | 
56 |     def log_figure(self, figure, name, step, phase):
57 |         """figure: matplotlib.pyplot figure handle"""
58 |         self._summ_writer.add_figure('{}_{}'.format(name, phase), figure, step)
59 | 
60 |     def log_graph(self, array, name, step, phase):
61 |         """figure: matplotlib.pyplot figure handle"""
62 |         im = plot_graph(array)
63 |         self._summ_writer.add_image('{}_{}'.format(name, phase), im, step)
64 | 
65 |     def dump_scalars(self, log_path=None):
66 |         log_path = os.path.join(self._log_dir, "scalar_data.json") if log_path is None else log_path
67 |         self._summ_writer.export_scalars_to_json(log_path)
68 | 
69 |     def flush(self):
70 |         self._summ_writer.flush()
71 | 
72 | 
73 | 
74 | 
75 | 


--------------------------------------------------------------------------------
/hw5/cs285/policies/__init__.py:
--------------------------------------------------------------------------------
1 | #init for making the folder a package
2 | 


--------------------------------------------------------------------------------
/hw5/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/policies/__pycache__/MLP_policy.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285/policies/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285/policies/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/hw5/cs285_hw5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/cs285_hw5.pdf


--------------------------------------------------------------------------------
/hw5/requirements.txt:
--------------------------------------------------------------------------------
1 | gym==0.10.5
2 | mujoco-py==1.50.1.56
3 | numpy
4 | seaborn
5 | tqdm


--------------------------------------------------------------------------------
/hw5/results/problem-1-hist/hist_returns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-hist/hist_returns.png


--------------------------------------------------------------------------------
/hw5/results/problem-1-hist/seed11_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-11-30/events.out.tfevents.1595135490.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-hist/seed11_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-11-30/events.out.tfevents.1595135490.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-1-hist/seed1_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-06-41/events.out.tfevents.1595135201.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-hist/seed1_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-06-41/events.out.tfevents.1595135201.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-1-hist/seed21_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-17-01/events.out.tfevents.1595135821.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-hist/seed21_ac_PM_hist_bc0.01_s8_PointMass-v0_19-07-2020_01-17-01/events.out.tfevents.1595135821.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-1-none/no_exploration_returns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-none/no_exploration_returns.png


--------------------------------------------------------------------------------
/hw5/results/problem-1-none/seed11_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-05-20/events.out.tfevents.1595135120.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-none/seed11_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-05-20/events.out.tfevents.1595135120.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-1-none/seed1_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-03-45/events.out.tfevents.1595135025.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-none/seed1_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-03-45/events.out.tfevents.1595135025.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-1-none/seed21_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-06-56/events.out.tfevents.1595135216.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-1-none/seed21_ac_PM_bc0_s8_PointMass-v0_19-07-2020_01-06-56/events.out.tfevents.1595135216.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-2/rbf_returns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-2/rbf_returns.png


--------------------------------------------------------------------------------
/hw5/results/problem-2/seed11_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-13-38/events.out.tfevents.1594937618.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-2/seed11_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-13-38/events.out.tfevents.1594937618.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-2/seed1_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-10-03/events.out.tfevents.1594937403.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-2/seed1_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-10-03/events.out.tfevents.1594937403.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-2/seed21_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-17-11/events.out.tfevents.1594937831.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-2/seed21_ac_PM_rbf_bc0.01_s8_sig0.2_PointMass-v0_16-07-2020_18-17-11/events.out.tfevents.1594937831.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-3/ex2_returns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-3/ex2_returns.png


--------------------------------------------------------------------------------
/hw5/results/problem-3/seed11_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_02-37-10/events.out.tfevents.1595140630.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-3/seed11_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_02-37-10/events.out.tfevents.1595140630.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-3/seed1_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_01-08-12/events.out.tfevents.1595135292.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-3/seed1_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_01-08-12/events.out.tfevents.1595135292.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-3/seed21_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_03-56-28/events.out.tfevents.1595145388.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-3/seed21_ac_PM_ex2_s8_bc0.05_kl0.1_dlr0.001_dh8_PointMass-v0_19-07-2020_03-56-28/events.out.tfevents.1595145388.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-4/bc0.0001/seed11_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_19-07-2020_14-18-04/events.out.tfevents.1595182684.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/bc0.0001/seed11_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_19-07-2020_14-18-04/events.out.tfevents.1595182684.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-4/bc0.0001/seed1_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_19-07-2020_01-08-22/events.out.tfevents.1595135302.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/bc0.0001/seed1_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_19-07-2020_01-08-22/events.out.tfevents.1595135302.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-4/bc0.0001/seed21_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_20-07-2020_05-22-16/events.out.tfevents.1595236936.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/bc0.0001/seed21_ac_HC_bc0.0001_kl0.1_dlr0.005_dti10000_sparse-cheetah-cs285-v1_20-07-2020_05-22-16/events.out.tfevents.1595236936.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-4/bc0.001/seed11_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_04-18-46/events.out.tfevents.1595146726.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/bc0.001/seed11_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_04-18-46/events.out.tfevents.1595146726.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-4/bc0.001/seed1_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_01-08-17/events.out.tfevents.1595135297.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/bc0.001/seed1_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_01-08-17/events.out.tfevents.1595135297.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-4/bc0.001/seed21_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_06-40-41/events.out.tfevents.1595155241.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/bc0.001/seed21_ac_HC_bc0.001_kl0.1_dlr0.005_dti1000_sparse-cheetah-cs285-v1_19-07-2020_06-40-41/events.out.tfevents.1595155241.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-4/full_comp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/full_comp.png


--------------------------------------------------------------------------------
/hw5/results/problem-4/none/seed11_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_00-57-30/events.out.tfevents.1594875450.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/none/seed11_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_00-57-30/events.out.tfevents.1594875450.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-4/none/seed1_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_00-26-39/events.out.tfevents.1594873599.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/none/seed1_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_00-26-39/events.out.tfevents.1594873599.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/results/problem-4/none/seed21_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_01-28-19/events.out.tfevents.1594877299.DESKTOP-U53KV1A:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mdeib/berkeley-deep-RL-pytorch-solutions/47da61101d144e14926975f3732af7ac020382b3/hw5/results/problem-4/none/seed21_ac_HC_bc0_sparse-cheetah-cs285-v1_16-07-2020_01-28-19/events.out.tfevents.1594877299.DESKTOP-U53KV1A


--------------------------------------------------------------------------------
/hw5/setup.py:
--------------------------------------------------------------------------------
1 | # setup.py
2 | from setuptools import setup
3 | 
4 | setup(
5 |     name='cs285',
6 |     version='0.1.0',
7 |     packages=['cs285'],
8 | )


--------------------------------------------------------------------------------