├── README.md ├── cs287hw1 ├── .idea │ ├── cs287-hw1-code.iml │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── workspace.xml ├── .vs │ ├── VSWorkspaceState.json │ ├── cs287-hw1-code │ │ └── v15 │ │ │ └── .suo │ └── slnx.sqlite ├── README.md ├── __pycache__ │ ├── logger.cpython-36.pyc │ └── logger.cpython-37.pyc ├── data │ ├── part1 │ │ ├── GridWorldEnv0 │ │ │ ├── policy_typedeterministic_temperature1.0 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ ├── policy_typemax_ent_temperature0.01 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ ├── policy_typemax_ent_temperature1.0 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ └── policy_typemax_ent_temperature1e-05 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ └── GridWorldEnv1 │ │ │ ├── policy_typedeterministic_temperature1.0 │ │ │ ├── contour.png │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ │ ├── policy_typemax_ent_temperature0.01 │ │ │ ├── contour.png │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ │ ├── policy_typemax_ent_temperature1.0 │ │ │ ├── contour.png │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ │ └── policy_typemax_ent_temperature1e-05 │ │ │ ├── contour.png │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ ├── part2_ab │ │ ├── DoubleIntegratorEnv │ │ │ ├── modelinear_state_discretization151 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ ├── modelinear_state_discretization21 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ ├── modelinear_state_discretization51 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ ├── modenn_state_discretization151 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ ├── modenn_state_discretization21 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ └── modenn_state_discretization51 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ └── MountainCarEnv │ │ │ ├── modelinear_state_discretization151 │ │ │ ├── contour.png │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ │ ├── modelinear_state_discretization21 │ │ │ ├── contour.png │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ │ ├── modelinear_state_discretization51 │ │ │ ├── contour.png │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ │ ├── modenn_state_discretization151 │ │ │ ├── contour.png │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ │ ├── modenn_state_discretization21 │ │ │ ├── contour.png │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ │ └── modenn_state_discretization51 │ │ │ ├── contour.png │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ ├── part2_c │ │ ├── CartPoleEnv │ │ │ ├── linear │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ └── nn │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ ├── DoubleIntegratorEnv │ │ │ ├── linear │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ └── nn │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ ├── MountainCarEnv │ │ │ ├── linear │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ └── nn │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ └── SwingUpEnv │ │ │ ├── linear │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ │ └── nn │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ ├── part2_d │ │ ├── CartPoleEnv │ │ │ ├── policy_typelook_ahead_modelinear_horizon1 │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ ├── policy_typelook_ahead_modelinear_horizon2 │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ └── policy_typelook_ahead_modelinear_horizon3 │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ ├── DoubleIntegratorEnv │ │ │ ├── policy_typelook_ahead_modelinear_horizon1 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ ├── policy_typelook_ahead_modelinear_horizon2 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ └── policy_typelook_ahead_modelinear_horizon3 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ ├── MountainCarEnv │ │ │ ├── policy_typelook_ahead_modelinear_horizon1 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ ├── policy_typelook_ahead_modelinear_horizon2 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ └── policy_typelook_ahead_modelinear_horizon3 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ └── SwingUpEnv │ │ │ ├── policy_typelook_ahead_modelinear_horizon1 │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ │ ├── policy_typelook_ahead_modelinear_horizon2 │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ │ └── policy_typelook_ahead_modelinear_horizon3 │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ ├── part3_a │ │ ├── DoubleIntegratorEnv │ │ │ ├── contour.png │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ └── MountainCarEnv │ │ │ ├── contour.png │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ ├── part3_b │ │ ├── CartPoleEnv │ │ │ ├── horizon1 │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ ├── horizon10 │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ └── horizon5 │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ ├── DoubleIntegratorEnv │ │ │ ├── horizon1 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ ├── horizon10 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ └── horizon5 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ ├── MountainCarEnv │ │ │ ├── horizon1 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ ├── horizon10 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ │ └── horizon5 │ │ │ │ ├── contour.png │ │ │ │ ├── learning_curve.png │ │ │ │ ├── log.txt │ │ │ │ ├── params.json │ │ │ │ └── progress.csv │ │ └── SwingUpEnv │ │ │ ├── horizon1 │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ │ ├── horizon10 │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ │ │ └── horizon5 │ │ │ ├── learning_curve.png │ │ │ ├── log.txt │ │ │ ├── params.json │ │ │ └── progress.csv │ └── part5 │ │ └── CartPoleEnv │ │ └── modelinear_state_discretization51 │ │ ├── log.txt │ │ ├── params.json │ │ └── progress.csv ├── envs │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── cart_pole_env.cpython-36.pyc │ │ ├── double_integrator_env.cpython-36.pyc │ │ ├── grid1d_env.cpython-36.pyc │ │ ├── gridworld_env.cpython-36.pyc │ │ ├── mountain_hill_env.cpython-36.pyc │ │ └── swing_up_env.cpython-36.pyc │ ├── cart_pole_env.py │ ├── double_integrator_env.py │ ├── grid1d_env.py │ ├── gridworld_env.py │ ├── mountain_hill_env.py │ └── swing_up_env.py ├── logger.py ├── part1 │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── tabular_value_iteration.cpython-36.pyc │ ├── run_part1.py │ └── tabular_value_iteration.py ├── part2 │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── discretize.cpython-36.pyc │ │ └── look_ahead_policy.cpython-36.pyc │ ├── discretize.py │ ├── look_ahead_policy.py │ ├── run_part2_ab.py │ ├── run_part2_c.py │ └── run_part2_d.py ├── part3 │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── continous_value_iteration.cpython-36.pyc │ │ └── look_ahead_policy.cpython-36.pyc │ ├── continous_value_iteration.py │ ├── look_ahead_policy.py │ ├── run_part3_a.py │ └── run_part3_b.py ├── part4 │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ └── discretize.cpython-36.pyc │ ├── discretize.py │ └── run_part4.py ├── requirements.txt ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── plot.cpython-36.pyc │ │ ├── utils.cpython-36.pyc │ │ └── value_functions.cpython-36.pyc │ ├── plot.py │ ├── utils.py │ └── value_functions.py └── viskit │ ├── __init__.py │ ├── __pycache__ │ ├── __init__.cpython-36.pyc │ └── core.cpython-36.pyc │ ├── core.py │ ├── frontend.py │ ├── static │ ├── css │ │ ├── bootstrap.min.css │ │ └── dropdowns-enhancement.css │ └── js │ │ ├── bootstrap.min.js │ │ ├── dropdowns-enhancement.js │ │ ├── jquery-1.10.2.min.js │ │ ├── jquery.loadTemplate-1.5.6.js │ │ └── plotly-latest.min.js │ └── templates │ └── main.html ├── cs287hw2 ├── .DS_Store ├── .ipynb_checkpoints │ ├── Untitled-checkpoint.ipynb │ ├── Untitled1-checkpoint.ipynb │ ├── Untitled2-checkpoint.ipynb │ ├── Untitled3-checkpoint.ipynb │ ├── lqr-checkpoint.ipynb │ ├── lqr-sol-checkpoint.ipynb │ └── lqr_nolonger_clean-checkpoint.ipynb ├── __pycache__ │ ├── rot_utils.cpython-37.pyc │ └── simulators.cpython-37.pyc ├── cs287hw2.pdf ├── environment.yml ├── envs │ ├── __pycache__ │ │ ├── cheetah_env.cpython-37.pyc │ │ └── hopper_env.cpython-37.pyc │ ├── cheetah_env.py │ └── hopper_env.py ├── img │ ├── fig_a.png │ ├── ref_a.png │ ├── ref_b_cartpole.png │ └── ref_b_heli.png ├── lqr.ipynb ├── mats │ ├── cartpole_traj.mat │ ├── heli_traj.mat │ ├── p_a_w.mat │ ├── p_b_w.mat │ ├── p_c_heli_starting_states.mat │ └── p_c_w.mat ├── requirements.txt ├── rot_utils.py ├── simulators.py └── vids │ └── visualization_hopper.gif ├── cs287hw3 ├── .DS_Store ├── .idea │ ├── .gitignore │ ├── assignment2.iml │ ├── inspectionProfiles │ │ └── profiles_settings.xml │ ├── misc.xml │ ├── modules.xml │ └── vcs.xml ├── .ipynb_checkpoints │ ├── Non-linear Optimization-checkpoint.ipynb │ ├── non_linear_optimization-checkpoint.ipynb │ ├── non_linear_optimization_og-checkpoint.ipynb │ ├── non_linear_optimization_sols-checkpoint.ipynb │ ├── non_linear_optimzation-checkpoint.ipynb │ └── non_linear_optimzation_sols-checkpoint.ipynb ├── __pycache__ │ ├── utils.cpython-36.pyc │ └── utils.cpython-37.pyc ├── envs │ ├── __init__.py │ ├── __pycache__ │ │ ├── __init__.cpython-36.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── cart_pole_env.cpython-36.pyc │ │ ├── cart_pole_env.cpython-37.pyc │ │ ├── cheetah_env.cpython-37.pyc │ │ └── hopper_env.cpython-37.pyc │ ├── cart_pole_env.py │ ├── cheetah_env.py │ └── hopper_env.py ├── non_linear_optimization.ipynb ├── non_linear_optimization.pdf ├── utils.py └── vids │ ├── .DS_Store │ └── rollout.gif ├── cs287hw4 ├── .ipynb_checkpoints │ ├── Homework4_Q-Copy1-checkpoint.ipynb │ ├── Homework4_Q-checkpoint.ipynb │ └── Homework4_Q111-checkpoint.ipynb ├── Homework4_Q.ipynb ├── Homework4_Q_exp.ipynb ├── __MACOSX │ └── ._hw4.pdf ├── hw4.pdf ├── hw4_rubric.pdf ├── p3_a_data_1.npy ├── p3_a_data_2.npy ├── p3_a_data_3.npy ├── p3_a_data_4.npy ├── p6_data_0.npy ├── p6_data_1.npy ├── p6_data_2.npy └── p6_data_3.npy └── cs287hw5 ├── .DS_Store ├── .idea ├── hw5_nov12.iml ├── misc.xml ├── modules.xml └── workspace.xml ├── hw5.pdf ├── hw5_writeup.zip ├── hw5_writeup ├── .DS_Store ├── PS5.tex ├── PS5_template.tex ├── figures │ ├── baseline.png │ ├── clipper.png │ ├── entropy.png │ ├── gae.png │ ├── mbppo.png │ ├── meppo.png │ ├── newplot.png │ ├── pg.png │ ├── pg_cheetah.png │ ├── ph_baseline.png │ ├── ph_clipper.png │ ├── ph_entropy.png │ ├── ph_gae.png │ ├── ph_mbppo.png │ ├── ph_meppo.png │ ├── ph_pg.png │ ├── ph_ppo_obj.png │ ├── ph_sac.png │ ├── ppo_obj.png │ └── sac_cheetah.png └── my_figures │ ├── Ant_3A_12.png │ ├── Ant_3A_3.png │ ├── Cheetah_2A.png │ ├── Cheetah_2B_1.png │ ├── Cheetah_2B_2.png │ ├── Cheetah_2C.png │ ├── Cheetah_3A_12.png │ ├── Cheetah_3A_3.png │ ├── HalfCheetah_1A.png │ ├── HalfCheetah_1B.png │ ├── HalfCheetah_1C.png │ ├── HalfCheetah_1D.png │ ├── HalfCheetah_1E.png │ ├── HalfCheetah_1F.png │ ├── HalfCheetah_1G.png │ ├── Hopper_1A.png │ ├── Hopper_1B.png │ ├── Hopper_1C.png │ ├── Hopper_1D.png │ ├── Hopper_1E.png │ ├── Hopper_1F.png │ ├── Hopper_1G.png │ ├── Hopper_2A.png │ ├── Hopper_2B_1.png │ ├── Hopper_2B_2.png │ ├── Hopper_2C.png │ ├── Hopper_3A_12.png │ ├── Hopper_3A_3.png │ ├── Swimmer_1A.png │ ├── Swimmer_1B.png │ ├── Swimmer_1C.png │ ├── Swimmer_1D.png │ ├── Swimmer_1E.png │ ├── Swimmer_1F.png │ ├── Swimmer_1G.png │ ├── Swimmer_2A.png │ ├── Swimmer_2B_1.png │ ├── Swimmer_2B_2.png │ └── Swimmer_2C.png └── sac ├── .DS_Store ├── .idea ├── misc.xml ├── modules.xml ├── sac.iml └── workspace.xml ├── README.md ├── __pycache__ ├── logz.cpython-35.pyc ├── logz.cpython-37.pyc ├── nn.cpython-35.pyc ├── nn.cpython-37.pyc ├── sac.cpython-35.pyc ├── sac.cpython-37.pyc ├── utils.cpython-35.pyc └── utils.cpython-37.pyc ├── data ├── .DS_Store ├── sac_Ant-v2_reinf_02-12-2019_16-46-48 │ ├── 1 │ │ ├── log.txt │ │ └── params.json │ ├── 11 │ │ ├── log.txt │ │ └── params.json │ └── 21 │ │ ├── log.txt │ │ └── params.json ├── sac_Ant-v2_reparam_02-12-2019_16-47-03 │ ├── 1 │ │ ├── log.txt │ │ └── params.json │ ├── 11 │ │ ├── log.txt │ │ └── params.json │ └── 21 │ │ ├── log.txt │ │ └── params.json ├── sac_Ant-v2_reparam_2qf_02-12-2019_16-47-25 │ ├── 1 │ │ ├── log.txt │ │ └── params.json │ ├── 11 │ │ ├── log.txt │ │ └── params.json │ └── 21 │ │ ├── log.txt │ │ └── params.json ├── sac_HalfCheetah-v2_reinf_02-12-2019_11-48-53 │ ├── 1 │ │ ├── log.txt │ │ └── params.json │ ├── 11 │ │ ├── log.txt │ │ └── params.json │ └── 21 │ │ ├── log.txt │ │ └── params.json ├── sac_HalfCheetah-v2_reparam_02-12-2019_12-05-49 │ ├── 1 │ │ ├── log.txt │ │ └── params.json │ ├── 11 │ │ ├── log.txt │ │ └── params.json │ └── 21 │ │ ├── log.txt │ │ └── params.json ├── sac_HalfCheetah-v2_reparam_2qf_02-12-2019_12-34-24 │ ├── 1 │ │ ├── log.txt │ │ └── params.json │ ├── 11 │ │ ├── log.txt │ │ └── params.json │ └── 21 │ │ ├── log.txt │ │ └── params.json ├── sac_Hopper-v2_reinf_02-12-2019_21-02-20 │ ├── 1 │ │ ├── log.txt │ │ └── params.json │ ├── 11 │ │ ├── log.txt │ │ └── params.json │ └── 21 │ │ ├── log.txt │ │ └── params.json ├── sac_Hopper-v2_reparam_02-12-2019_21-02-26 │ ├── 1 │ │ ├── log.txt │ │ └── params.json │ ├── 11 │ │ ├── log.txt │ │ └── params.json │ └── 21 │ │ ├── log.txt │ │ └── params.json └── sac_Hopper-v2_reparam_2qf_02-12-2019_21-02-37 │ ├── 1 │ ├── log.txt │ └── params.json │ ├── 11 │ ├── log.txt │ └── params.json │ └── 21 │ ├── log.txt │ └── params.json ├── environment.yml ├── generate_plots.sh ├── logz.py ├── myplot.py ├── nn.py ├── plot.py ├── project_setup.bash ├── run_all.sh ├── sac.py ├── train_mujoco.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # Optimal Control, Reinforcement Learning & Robotics Projects in CS 287: Advanced Robotics (fall 2019) 2 | 3 | **This repository contains past projects I've completed in CS 287, which I took in Berkeley EECS department under Prof. Pieter Abbeel, in fall 2019.** 4 | (https://people.eecs.berkeley.edu/~pabbeel/cs287-fa19/) 5 | 6 | Followings are those projects and related materials covered/implemented in each project. 7 | 8 | - [**Project 1**](/cs287hw1) 9 | - Value Iteration 10 | - Dicretization-based Optimal Control 11 | - Function Approximation Optimal Control 12 | 13 | - [**Project 2**](/cs287hw2) 14 | - LQR, iLQR 15 | - DDP 16 | - Feedback Linearization 17 | 18 | - [**Project 3**](/cs287hw3) 19 | - Convex Optimiazation 20 | - Sequential Convex Programming 21 | - Motion Planning and Control (w/ convex opt.) 22 | 23 | - [**Project 4**](/cs287hw4) 24 | - Multivariate Gaussians 25 | - Kalman Filtering 26 | - EM & MLE 27 | - Particle Filtering 28 | - Belief Space Planning 29 | 30 | - [**Project 5**](/cs287hw5) 31 | - Policy Gradient 32 | - Trust Region Policy Optimization (TRPO) 33 | - Proximal Policy Optimization (PPO) 34 | - Deep Q-Learning 35 | - DQN 36 | - Double DQN 37 | - Dueling DQN 38 | -------------------------------------------------------------------------------- /cs287hw1/.idea/cs287-hw1-code.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /cs287hw1/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /cs287hw1/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /cs287hw1/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /cs287hw1/.idea/workspace.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 11 | 12 | 13 | 14 | 15 | 16 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 1568209473775 34 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /cs287hw1/.vs/VSWorkspaceState.json: -------------------------------------------------------------------------------- 1 | { 2 | "ExpandedNodes": [ 3 | "" 4 | ], 5 | "PreviewInSolutionExplorer": false 6 | } -------------------------------------------------------------------------------- /cs287hw1/.vs/cs287-hw1-code/v15/.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/.vs/cs287-hw1-code/v15/.suo -------------------------------------------------------------------------------- /cs287hw1/.vs/slnx.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/.vs/slnx.sqlite -------------------------------------------------------------------------------- /cs287hw1/__pycache__/logger.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/__pycache__/logger.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/__pycache__/logger.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/__pycache__/logger.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "GridWorldEnv0", 3 | "policy_type": "deterministic", 4 | "render": false, 5 | "temperature": 1.0 6 | } -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | 0.0,0 3 | 0.0,1 4 | 0.0,2 5 | 0.05,3 6 | 0.0,4 7 | 0.0,5 8 | 0.1,6 9 | 0.1,7 10 | 0.05,8 11 | 0.1,9 12 | 0.15,10 13 | 0.1,11 14 | 0.2,12 15 | 0.1,13 16 | 0.15,14 17 | 0.15,15 18 | 0.3,16 19 | 0.1,17 20 | 0.15,18 21 | 0.25,19 22 | 0.45,20 23 | 0.2,21 24 | 0.55,22 25 | 0.45,23 26 | 0.35,24 27 | 0.7,25 28 | 0.55,26 29 | 0.75,27 30 | 0.75,28 31 | 0.65,29 32 | 0.55,30 33 | 0.85,31 34 | 0.65,32 35 | 0.9,33 36 | 0.85,34 37 | 0.85,35 38 | 0.95,36 39 | 1.0,37 40 | 1.0,38 41 | 1.0,39 42 | 1.0,40 43 | 1.0,41 44 | 1.0,42 45 | 1.0,43 46 | 1.0,44 47 | 1.0,45 48 | 1.0,46 49 | 0.95,47 50 | 1.0,48 51 | 1.0,49 52 | -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "GridWorldEnv0", 3 | "policy_type": "max_ent", 4 | "render": false, 5 | "temperature": 0.01 6 | } -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,0.0 3 | 1,0.0 4 | 2,0.0 5 | 3,0.15 6 | 4,0.05 7 | 5,0.0 8 | 6,0.05 9 | 7,0.0 10 | 8,0.0 11 | 9,0.0 12 | 10,0.0 13 | 11,0.0 14 | 12,0.0 15 | 13,0.0 16 | 14,0.05 17 | 15,0.0 18 | 16,0.05 19 | 17,0.05 20 | 18,0.0 21 | 19,0.0 22 | 20,0.05 23 | 21,0.05 24 | 22,0.05 25 | 23,0.0 26 | 24,0.0 27 | 25,0.05 28 | 26,0.05 29 | 27,0.05 30 | 28,0.0 31 | 29,0.0 32 | 30,0.0 33 | 31,0.0 34 | 32,0.0 35 | 33,0.0 36 | 34,0.05 37 | 35,0.05 38 | 36,0.05 39 | 37,0.1 40 | 38,0.0 41 | 39,0.0 42 | 40,0.0 43 | 41,0.05 44 | 42,0.05 45 | 43,0.0 46 | 44,0.1 47 | 45,0.05 48 | 46,0.05 49 | 47,0.0 50 | 48,0.05 51 | 49,0.0 52 | -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "GridWorldEnv0", 3 | "policy_type": "max_ent", 4 | "render": false, 5 | "temperature": 1.0 6 | } -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,0.0 3 | 1,0.0 4 | 2,0.0 5 | 3,0.15 6 | 4,0.05 7 | 5,0.0 8 | 6,0.1 9 | 7,0.0 10 | 8,0.05 11 | 9,0.0 12 | 10,0.0 13 | 11,0.0 14 | 12,0.05 15 | 13,0.05 16 | 14,0.05 17 | 15,0.05 18 | 16,0.05 19 | 17,0.05 20 | 18,0.0 21 | 19,0.0 22 | 20,0.05 23 | 21,0.05 24 | 22,0.05 25 | 23,0.1 26 | 24,0.1 27 | 25,0.1 28 | 26,0.05 29 | 27,0.05 30 | 28,0.0 31 | 29,0.0 32 | 30,0.0 33 | 31,0.0 34 | 32,0.0 35 | 33,0.0 36 | 34,0.1 37 | 35,0.05 38 | 36,0.05 39 | 37,0.1 40 | 38,0.0 41 | 39,0.0 42 | 40,0.0 43 | 41,0.1 44 | 42,0.05 45 | 43,0.05 46 | 44,0.05 47 | 45,0.1 48 | 46,0.05 49 | 47,0.0 50 | 48,0.05 51 | 49,0.05 52 | -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "GridWorldEnv0", 3 | "policy_type": "max_ent", 4 | "render": false, 5 | "temperature": 1e-05 6 | } -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | 0.0,0 3 | 0.0,1 4 | 0.0,2 5 | 0.15,3 6 | 0.05,4 7 | 0.0,5 8 | 0.05,6 9 | 0.0,7 10 | 0.0,8 11 | 0.0,9 12 | 0.0,10 13 | 0.0,11 14 | 0.0,12 15 | 0.0,13 16 | 0.05,14 17 | 0.0,15 18 | 0.05,16 19 | 0.05,17 20 | 0.0,18 21 | 0.0,19 22 | 0.0,20 23 | 0.05,21 24 | 0.05,22 25 | 0.0,23 26 | 0.0,24 27 | 0.05,25 28 | 0.05,26 29 | 0.05,27 30 | 0.0,28 31 | 0.0,29 32 | 0.0,30 33 | 0.0,31 34 | 0.0,32 35 | 0.0,33 36 | 0.05,34 37 | 0.05,35 38 | 0.05,36 39 | 0.1,37 40 | 0.0,38 41 | 0.0,39 42 | 0.0,40 43 | 0.05,41 44 | 0.05,42 45 | 0.0,43 46 | 0.1,44 47 | 0.1,45 48 | 0.05,46 49 | 0.0,47 50 | 0.05,48 51 | 0.0,49 52 | -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "GridWorldEnv1", 3 | "policy_type": "deterministic", 4 | "render": false, 5 | "temperature": 1.0 6 | } -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | 0.0,0 3 | 0.0,1 4 | 0.0,2 5 | 0.0,3 6 | 0.0,4 7 | 0.0,5 8 | 0.0,6 9 | 0.0,7 10 | 0.1,8 11 | 0.15,9 12 | 0.05,10 13 | 0.2,11 14 | 0.3,12 15 | 0.25,13 16 | 0.35,14 17 | 0.4,15 18 | 0.35,16 19 | 0.3,17 20 | 0.45,18 21 | 0.7,19 22 | 0.45,20 23 | 0.55,21 24 | 0.55,22 25 | 0.65,23 26 | 0.75,24 27 | 0.7,25 28 | 0.7,26 29 | 0.85,27 30 | 0.9,28 31 | 0.75,29 32 | 1.0,30 33 | 0.85,31 34 | 0.95,32 35 | 0.95,33 36 | 1.0,34 37 | 1.0,35 38 | 1.0,36 39 | 0.9,37 40 | 1.0,38 41 | 1.0,39 42 | 1.0,40 43 | 1.0,41 44 | 1.0,42 45 | 0.95,43 46 | 1.0,44 47 | 1.0,45 48 | 0.95,46 49 | 1.0,47 50 | 1.0,48 51 | 1.0,49 52 | -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "GridWorldEnv1", 3 | "policy_type": "max_ent", 4 | "render": false, 5 | "temperature": 0.01 6 | } -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,0.0 3 | 1,0.0 4 | 2,0.0 5 | 3,0.1 6 | 4,0.05 7 | 5,0.0 8 | 6,0.05 9 | 7,0.0 10 | 8,0.05 11 | 9,0.0 12 | 10,0.0 13 | 11,0.0 14 | 12,0.0 15 | 13,0.05 16 | 14,0.0 17 | 15,0.1 18 | 16,0.0 19 | 17,0.0 20 | 18,0.05 21 | 19,0.0 22 | 20,0.1 23 | 21,0.0 24 | 22,0.0 25 | 23,0.0 26 | 24,0.0 27 | 25,0.05 28 | 26,0.05 29 | 27,0.05 30 | 28,0.0 31 | 29,0.0 32 | 30,0.0 33 | 31,0.0 34 | 32,0.0 35 | 33,0.0 36 | 34,0.05 37 | 35,0.0 38 | 36,0.0 39 | 37,0.05 40 | 38,0.05 41 | 39,0.0 42 | 40,0.05 43 | 41,0.05 44 | 42,0.0 45 | 43,0.0 46 | 44,0.05 47 | 45,0.05 48 | 46,0.05 49 | 47,0.05 50 | 48,0.1 51 | 49,0.05 52 | -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "GridWorldEnv1", 3 | "policy_type": "max_ent", 4 | "render": false, 5 | "temperature": 1.0 6 | } -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,0.0 3 | 1,0.0 4 | 2,0.0 5 | 3,0.1 6 | 4,0.05 7 | 5,0.0 8 | 6,0.1 9 | 7,0.0 10 | 8,0.05 11 | 9,0.05 12 | 10,0.0 13 | 11,0.0 14 | 12,0.0 15 | 13,0.05 16 | 14,0.0 17 | 15,0.1 18 | 16,0.05 19 | 17,0.0 20 | 18,0.1 21 | 19,0.0 22 | 20,0.1 23 | 21,0.0 24 | 22,0.0 25 | 23,0.0 26 | 24,0.0 27 | 25,0.1 28 | 26,0.05 29 | 27,0.05 30 | 28,0.0 31 | 29,0.0 32 | 30,0.05 33 | 31,0.0 34 | 32,0.0 35 | 33,0.05 36 | 34,0.1 37 | 35,0.0 38 | 36,0.05 39 | 37,0.05 40 | 38,0.05 41 | 39,0.0 42 | 40,0.05 43 | 41,0.05 44 | 42,0.0 45 | 43,0.0 46 | 44,0.05 47 | 45,0.1 48 | 46,0.05 49 | 47,0.1 50 | 48,0.1 51 | 49,0.05 52 | -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "GridWorldEnv1", 3 | "policy_type": "max_ent", 4 | "render": false, 5 | "temperature": 1e-05 6 | } -------------------------------------------------------------------------------- /cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | 0.0,0 3 | 0.0,1 4 | 0.0,2 5 | 0.1,3 6 | 0.0,4 7 | 0.0,5 8 | 0.05,6 9 | 0.0,7 10 | 0.05,8 11 | 0.0,9 12 | 0.0,10 13 | 0.0,11 14 | 0.0,12 15 | 0.05,13 16 | 0.0,14 17 | 0.1,15 18 | 0.0,16 19 | 0.0,17 20 | 0.05,18 21 | 0.0,19 22 | 0.1,20 23 | 0.0,21 24 | 0.0,22 25 | 0.0,23 26 | 0.0,24 27 | 0.05,25 28 | 0.05,26 29 | 0.05,27 30 | 0.0,28 31 | 0.0,29 32 | 0.0,30 33 | 0.0,31 34 | 0.0,32 35 | 0.0,33 36 | 0.05,34 37 | 0.0,35 38 | 0.0,36 39 | 0.05,37 40 | 0.05,38 41 | 0.0,39 42 | 0.05,40 43 | 0.05,41 44 | 0.0,42 45 | 0.0,43 46 | 0.05,44 47 | 0.05,45 48 | 0.05,46 49 | 0.05,47 50 | 0.05,48 51 | 0.05,49 52 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "DoubleIntegratorEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "tabular", 8 | "render": false, 9 | "state_discretization": 151 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1086.2625732421875 3 | 5,-1263.9954833984375 4 | 10,-1262.9053955078125 5 | 15,-661.170166015625 6 | 20,-288.8149108886719 7 | 25,-108.48724365234375 8 | 30,-86.2402114868164 9 | 35,-82.36270141601562 10 | 40,-77.28358459472656 11 | 45,-74.50456237792969 12 | 50,-71.93091583251953 13 | 55,-72.67729187011719 14 | 60,-70.5304946899414 15 | 65,-70.99906921386719 16 | 70,-72.21932220458984 17 | 75,-70.76229095458984 18 | 80,-69.89215087890625 19 | 85,-71.07881164550781 20 | 90,-69.90048217773438 21 | 95,-72.05101776123047 22 | 100,-70.91768646240234 23 | 105,-70.9474868774414 24 | 110,-70.95112609863281 25 | 115,-71.04618072509766 26 | 120,-70.75999450683594 27 | 125,-70.89779663085938 28 | 130,-70.42910766601562 29 | 135,-71.04066467285156 30 | 140,-70.91621398925781 31 | 145,-70.90560913085938 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "DoubleIntegratorEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "tabular", 8 | "render": false, 9 | "state_discretization": 21 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | -1086.2625732421875,0 3 | -882.6398315429688,5 4 | -678.9556274414062,10 5 | -449.319580078125,15 6 | -169.34024047851562,20 7 | -189.33172607421875,25 8 | -92.19869232177734,30 9 | -97.56818389892578,35 10 | -80.79776000976562,40 11 | -82.92337036132812,45 12 | -83.22191619873047,50 13 | -76.68871307373047,55 14 | -81.23823547363281,60 15 | -79.61701965332031,65 16 | -85.20960998535156,70 17 | -81.37742614746094,75 18 | -86.22061157226562,80 19 | -80.67992401123047,85 20 | -87.55850219726562,90 21 | -87.11900329589844,95 22 | -84.90445709228516,100 23 | -82.08210754394531,105 24 | -87.08905029296875,110 25 | -84.68000030517578,115 26 | -83.30575561523438,120 27 | -75.68476104736328,125 28 | -80.35721588134766,130 29 | -80.14833068847656,135 30 | -93.36302185058594,140 31 | -81.09378051757812,145 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "DoubleIntegratorEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "tabular", 8 | "render": false, 9 | "state_discretization": 51 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | -1086.2625732421875,0 3 | -1144.5582275390625,5 4 | -1067.7918701171875,10 5 | -601.8530883789062,15 6 | -208.37237548828125,20 7 | -170.8474884033203,25 8 | -87.156005859375,30 9 | -82.76526641845703,35 10 | -73.99360656738281,40 11 | -77.13446044921875,45 12 | -71.68731689453125,50 13 | -73.66706848144531,55 14 | -71.9505386352539,60 15 | -84.7891845703125,65 16 | -78.1386489868164,70 17 | -94.64347839355469,75 18 | -91.47573852539062,80 19 | -85.21119689941406,85 20 | -88.02317810058594,90 21 | -84.81150817871094,95 22 | -96.04219818115234,100 23 | -90.305419921875,105 24 | -89.38578033447266,110 25 | -91.25402069091797,115 26 | -89.45679473876953,120 27 | -89.90522003173828,125 28 | -91.32474517822266,130 29 | -92.66826629638672,135 30 | -90.96044158935547,140 31 | -74.82160949707031,145 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "DoubleIntegratorEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "nn", 7 | "policy_type": "tabular", 8 | "render": false, 9 | "state_discretization": 151 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1086.2625732421875 3 | 5,-1074.3865966796875 4 | 10,-1108.3885498046875 5 | 15,-1194.7117919921875 6 | 20,-1210.2864990234375 7 | 25,-1210.2864990234375 8 | 30,-439.2337341308594 9 | 35,-272.38214111328125 10 | 40,-275.7249755859375 11 | 45,-273.76495361328125 12 | 50,-269.75738525390625 13 | 55,-253.88551330566406 14 | 60,-233.76580810546875 15 | 65,-203.03372192382812 16 | 70,-187.3236083984375 17 | 75,-181.8560028076172 18 | 80,-172.03138732910156 19 | 85,-172.03138732910156 20 | 90,-166.24839782714844 21 | 95,-157.64881896972656 22 | 100,-157.64881896972656 23 | 105,-151.69505310058594 24 | 110,-151.69505310058594 25 | 115,-151.69505310058594 26 | 120,-151.69505310058594 27 | 125,-148.64476013183594 28 | 130,-149.03884887695312 29 | 135,-157.87362670898438 30 | 140,-157.87362670898438 31 | 145,-151.9330596923828 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "DoubleIntegratorEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "nn", 7 | "policy_type": "tabular", 8 | "render": false, 9 | "state_discretization": 21 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1086.2625732421875 3 | 5,-1086.2625732421875 4 | 10,-1086.2625732421875 5 | 15,-1086.2625732421875 6 | 20,-1086.2625732421875 7 | 25,-1086.2625732421875 8 | 30,-1086.2625732421875 9 | 35,-1086.2625732421875 10 | 40,-1086.2625732421875 11 | 45,-1086.2625732421875 12 | 50,-1086.2625732421875 13 | 55,-1086.2625732421875 14 | 60,-1086.2625732421875 15 | 65,-1086.2625732421875 16 | 70,-1086.2625732421875 17 | 75,-1086.2625732421875 18 | 80,-1086.2625732421875 19 | 85,-1086.2625732421875 20 | 90,-1086.2625732421875 21 | 95,-1086.2625732421875 22 | 100,-1086.2625732421875 23 | 105,-1086.2625732421875 24 | 110,-1086.2625732421875 25 | 115,-1086.2625732421875 26 | 120,-1086.2625732421875 27 | 125,-1086.2625732421875 28 | 130,-1086.2625732421875 29 | 135,-1086.2625732421875 30 | 140,-1086.2625732421875 31 | 145,-1086.2625732421875 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "DoubleIntegratorEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "nn", 7 | "policy_type": "tabular", 8 | "render": false, 9 | "state_discretization": 51 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1086.2625732421875 3 | 5,-1086.2625732421875 4 | 10,-1086.2625732421875 5 | 15,-1086.2625732421875 6 | 20,-1086.2625732421875 7 | 25,-1086.2625732421875 8 | 30,-1086.2625732421875 9 | 35,-1086.2625732421875 10 | 40,-727.3023071289062 11 | 45,-685.129150390625 12 | 50,-563.1832275390625 13 | 55,-550.3751831054688 14 | 60,-584.0700073242188 15 | 65,-597.1317749023438 16 | 70,-594.9307861328125 17 | 75,-427.8874816894531 18 | 80,-472.5625915527344 19 | 85,-472.5625915527344 20 | 90,-472.1018371582031 21 | 95,-472.1018371582031 22 | 100,-516.2792358398438 23 | 105,-478.8886413574219 24 | 110,-493.9183654785156 25 | 115,-493.9183654785156 26 | 120,-493.9183654785156 27 | 125,-355.2012023925781 28 | 130,-394.9552917480469 29 | 135,-394.9552917480469 30 | 140,-394.9552917480469 31 | 145,-394.9552917480469 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "MountainCarEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "tabular", 8 | "render": false, 9 | "state_discretization": 151 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-500.0 3 | 5,-500.0 4 | 10,-500.0 5 | 15,-500.0 6 | 20,-500.0 7 | 25,-500.0 8 | 30,-500.0 9 | 35,-500.0 10 | 40,-500.0 11 | 45,-500.0 12 | 50,-500.0 13 | 55,-500.0 14 | 60,-500.0 15 | 65,-500.0 16 | 70,-161.0 17 | 75,-154.0 18 | 80,-147.0 19 | 85,-150.0 20 | 90,-147.0 21 | 95,-149.0 22 | 100,-146.0 23 | 105,-149.0 24 | 110,-145.0 25 | 115,-105.0 26 | 120,-105.0 27 | 125,-105.0 28 | 130,-105.0 29 | 135,-105.0 30 | 140,-105.0 31 | 145,-105.0 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "MountainCarEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "tabular", 8 | "render": false, 9 | "state_discretization": 21 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | -500.0,0 3 | -500.0,5 4 | -500.0,10 5 | -500.0,15 6 | -238.0,20 7 | -346.0,25 8 | -274.0,30 9 | -496.0,35 10 | -412.0,40 11 | -192.0,45 12 | -138.0,50 13 | -131.0,55 14 | -143.0,60 15 | -147.0,65 16 | -138.0,70 17 | -153.0,75 18 | -150.0,80 19 | -152.0,85 20 | -147.0,90 21 | -145.0,95 22 | -148.0,100 23 | -149.0,105 24 | -146.0,110 25 | -150.0,115 26 | -150.0,120 27 | -147.0,125 28 | -146.0,130 29 | -147.0,135 30 | -150.0,140 31 | -148.0,145 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "MountainCarEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "tabular", 8 | "render": false, 9 | "state_discretization": 51 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | -500.0,0 3 | -500.0,5 4 | -500.0,10 5 | -500.0,15 6 | -500.0,20 7 | -500.0,25 8 | -500.0,30 9 | -500.0,35 10 | -275.0,40 11 | -174.0,45 12 | -236.0,50 13 | -160.0,55 14 | -157.0,60 15 | -157.0,65 16 | -158.0,70 17 | -160.0,75 18 | -158.0,80 19 | -153.0,85 20 | -155.0,90 21 | -158.0,95 22 | -159.0,100 23 | -156.0,105 24 | -154.0,110 25 | -154.0,115 26 | -154.0,120 27 | -156.0,125 28 | -156.0,130 29 | -160.0,135 30 | -157.0,140 31 | -106.0,145 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "MountainCarEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "nn", 7 | "policy_type": "tabular", 8 | "render": false, 9 | "state_discretization": 151 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-500.0 3 | 5,-500.0 4 | 10,-500.0 5 | 15,-500.0 6 | 20,-500.0 7 | 25,-500.0 8 | 30,-500.0 9 | 35,-500.0 10 | 40,-500.0 11 | 45,-500.0 12 | 50,-500.0 13 | 55,-500.0 14 | 60,-500.0 15 | 65,-500.0 16 | 70,-500.0 17 | 75,-500.0 18 | 80,-500.0 19 | 85,-500.0 20 | 90,-491.0 21 | 95,-169.0 22 | 100,-183.0 23 | 105,-183.0 24 | 110,-183.0 25 | 115,-183.0 26 | 120,-183.0 27 | 125,-183.0 28 | 130,-183.0 29 | 135,-183.0 30 | 140,-183.0 31 | 145,-183.0 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "MountainCarEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "nn", 7 | "policy_type": "tabular", 8 | "render": false, 9 | "state_discretization": 21 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-500.0 3 | 5,-500.0 4 | 10,-500.0 5 | 15,-500.0 6 | 20,-500.0 7 | 25,-500.0 8 | 30,-500.0 9 | 35,-500.0 10 | 40,-500.0 11 | 45,-500.0 12 | 50,-500.0 13 | 55,-500.0 14 | 60,-500.0 15 | 65,-500.0 16 | 70,-500.0 17 | 75,-500.0 18 | 80,-500.0 19 | 85,-500.0 20 | 90,-500.0 21 | 95,-500.0 22 | 100,-500.0 23 | 105,-500.0 24 | 110,-500.0 25 | 115,-500.0 26 | 120,-500.0 27 | 125,-500.0 28 | 130,-500.0 29 | 135,-500.0 30 | 140,-500.0 31 | 145,-500.0 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "MountainCarEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "nn", 7 | "policy_type": "tabular", 8 | "render": false, 9 | "state_discretization": 51 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-500.0 3 | 5,-500.0 4 | 10,-500.0 5 | 15,-500.0 6 | 20,-500.0 7 | 25,-500.0 8 | 30,-500.0 9 | 35,-500.0 10 | 40,-500.0 11 | 45,-500.0 12 | 50,-500.0 13 | 55,-500.0 14 | 60,-500.0 15 | 65,-500.0 16 | 70,-500.0 17 | 75,-500.0 18 | 80,-500.0 19 | 85,-500.0 20 | 90,-500.0 21 | 95,-500.0 22 | 100,-500.0 23 | 105,-500.0 24 | 110,-500.0 25 | 115,-500.0 26 | 120,-500.0 27 | 125,-500.0 28 | 130,-500.0 29 | 135,-500.0 30 | 140,-500.0 31 | 145,-500.0 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/CartPoleEnv/linear/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/CartPoleEnv/linear/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/CartPoleEnv/linear/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "CartPoleEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "tabular", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/CartPoleEnv/linear/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,10.0 3 | 5,392.0 4 | 10,298.0 5 | 15,500.0 6 | 20,500.0 7 | 25,500.0 8 | 30,500.0 9 | 35,500.0 10 | 40,500.0 11 | 45,500.0 12 | 50,500.0 13 | 55,500.0 14 | 60,500.0 15 | 65,500.0 16 | 70,500.0 17 | 75,500.0 18 | 80,500.0 19 | 85,500.0 20 | 90,500.0 21 | 95,500.0 22 | 100,500.0 23 | 105,500.0 24 | 110,500.0 25 | 115,500.0 26 | 120,500.0 27 | 125,500.0 28 | 130,500.0 29 | 135,500.0 30 | 140,500.0 31 | 145,500.0 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/CartPoleEnv/nn/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/CartPoleEnv/nn/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/CartPoleEnv/nn/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "CartPoleEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "nn", 7 | "policy_type": "tabular", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/CartPoleEnv/nn/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,9.0 3 | 5,13.0 4 | 10,13.0 5 | 15,13.0 6 | 20,13.0 7 | 25,13.0 8 | 30,13.0 9 | 35,13.0 10 | 40,13.0 11 | 45,15.0 12 | 50,15.0 13 | 55,15.0 14 | 60,15.0 15 | 65,15.0 16 | 70,15.0 17 | 75,15.0 18 | 80,15.0 19 | 85,15.0 20 | 90,15.0 21 | 95,15.0 22 | 100,15.0 23 | 105,15.0 24 | 110,15.0 25 | 115,15.0 26 | 120,15.0 27 | 125,15.0 28 | 130,15.0 29 | 135,15.0 30 | 140,15.0 31 | 145,15.0 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "DoubleIntegratorEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "tabular", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1086.2625732421875 3 | 5,-1263.9954833984375 4 | 10,-1262.9053955078125 5 | 15,-661.170166015625 6 | 20,-288.8149108886719 7 | 25,-108.48724365234375 8 | 30,-86.2402114868164 9 | 35,-82.36270141601562 10 | 40,-77.28358459472656 11 | 45,-74.50456237792969 12 | 50,-71.93091583251953 13 | 55,-72.67729187011719 14 | 60,-70.5304946899414 15 | 65,-70.99906921386719 16 | 70,-72.21932220458984 17 | 75,-70.76229095458984 18 | 80,-69.89215087890625 19 | 85,-71.07881164550781 20 | 90,-69.90048217773438 21 | 95,-72.05101776123047 22 | 100,-70.91768646240234 23 | 105,-70.9474868774414 24 | 110,-70.95112609863281 25 | 115,-71.04618072509766 26 | 120,-70.75999450683594 27 | 125,-70.89779663085938 28 | 130,-70.42910766601562 29 | 135,-71.04066467285156 30 | 140,-70.91621398925781 31 | 145,-70.90560913085938 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "DoubleIntegratorEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "nn", 7 | "policy_type": "tabular", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1086.2625732421875 3 | 5,-1074.3865966796875 4 | 10,-1108.3885498046875 5 | 15,-1194.7117919921875 6 | 20,-1210.2864990234375 7 | 25,-1210.2864990234375 8 | 30,-439.2337341308594 9 | 35,-272.38214111328125 10 | 40,-275.7249755859375 11 | 45,-273.76495361328125 12 | 50,-269.75738525390625 13 | 55,-253.88551330566406 14 | 60,-233.76580810546875 15 | 65,-203.03372192382812 16 | 70,-187.3236083984375 17 | 75,-181.8560028076172 18 | 80,-172.03138732910156 19 | 85,-172.03138732910156 20 | 90,-166.24839782714844 21 | 95,-157.64881896972656 22 | 100,-157.64881896972656 23 | 105,-151.69505310058594 24 | 110,-151.69505310058594 25 | 115,-151.69505310058594 26 | 120,-151.69505310058594 27 | 125,-148.64476013183594 28 | 130,-149.03884887695312 29 | 135,-157.87362670898438 30 | 140,-157.87362670898438 31 | 145,-151.9330596923828 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/MountainCarEnv/linear/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/MountainCarEnv/linear/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/MountainCarEnv/linear/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/MountainCarEnv/linear/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/MountainCarEnv/linear/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "MountainCarEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "tabular", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/MountainCarEnv/linear/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-500.0 3 | 5,-500.0 4 | 10,-500.0 5 | 15,-500.0 6 | 20,-500.0 7 | 25,-500.0 8 | 30,-500.0 9 | 35,-500.0 10 | 40,-500.0 11 | 45,-500.0 12 | 50,-500.0 13 | 55,-500.0 14 | 60,-500.0 15 | 65,-500.0 16 | 70,-161.0 17 | 75,-154.0 18 | 80,-147.0 19 | 85,-150.0 20 | 90,-147.0 21 | 95,-149.0 22 | 100,-146.0 23 | 105,-149.0 24 | 110,-145.0 25 | 115,-105.0 26 | 120,-105.0 27 | 125,-105.0 28 | 130,-105.0 29 | 135,-105.0 30 | 140,-105.0 31 | 145,-105.0 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/MountainCarEnv/nn/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/MountainCarEnv/nn/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/MountainCarEnv/nn/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/MountainCarEnv/nn/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/MountainCarEnv/nn/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "MountainCarEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "nn", 7 | "policy_type": "tabular", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/MountainCarEnv/nn/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-500.0 3 | 5,-500.0 4 | 10,-500.0 5 | 15,-500.0 6 | 20,-500.0 7 | 25,-500.0 8 | 30,-500.0 9 | 35,-500.0 10 | 40,-500.0 11 | 45,-500.0 12 | 50,-500.0 13 | 55,-500.0 14 | 60,-500.0 15 | 65,-500.0 16 | 70,-500.0 17 | 75,-500.0 18 | 80,-500.0 19 | 85,-500.0 20 | 90,-491.0 21 | 95,-169.0 22 | 100,-183.0 23 | 105,-183.0 24 | 110,-183.0 25 | 115,-183.0 26 | 120,-183.0 27 | 125,-183.0 28 | 130,-183.0 29 | 135,-183.0 30 | 140,-183.0 31 | 145,-183.0 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/SwingUpEnv/linear/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/SwingUpEnv/linear/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/SwingUpEnv/linear/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "SwingUpEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "tabular", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/SwingUpEnv/linear/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1611.0622956168809 3 | 5,-806.5761793759123 4 | 10,-747.522897685818 5 | 15,-540.7433507520074 6 | 20,-825.9231338583917 7 | 25,-944.4935466237056 8 | 30,-991.0924221438501 9 | 35,-1025.7969101684948 10 | 40,-1052.3914612528288 11 | 45,-1073.722422897381 12 | 50,-1139.109125287909 13 | 55,-1105.0172456320088 14 | 60,-1126.5440104057298 15 | 65,-1151.5207311147847 16 | 70,-1143.6126499683105 17 | 75,-1160.0488311303698 18 | 80,-1193.488355727164 19 | 85,-1175.0405064667475 20 | 90,-1163.1958750856556 21 | 95,-1165.6738488734682 22 | 100,-1171.2232022753903 23 | 105,-1186.8571065947756 24 | 110,-1185.2320044494918 25 | 115,-1174.4411249481252 26 | 120,-1170.948044299401 27 | 125,-1157.6196817398613 28 | 130,-1164.1346390379954 29 | 135,-1163.9363993137863 30 | 140,-1172.475828192612 31 | 145,-1233.3813361680384 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/SwingUpEnv/nn/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/SwingUpEnv/nn/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/SwingUpEnv/nn/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "action_discretization": 5, 3 | "env": "SwingUpEnv", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "nn", 7 | "policy_type": "tabular", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_c/SwingUpEnv/nn/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1091.1176999012564 3 | 5,-1091.1176999012564 4 | 10,-1091.1176999012564 5 | 15,-1091.1176999012564 6 | 20,-1091.1176999012564 7 | 25,-1091.1176999012564 8 | 30,-1091.1176999012564 9 | 35,-1091.1176999012564 10 | 40,-1091.1176999012564 11 | 45,-1091.1176999012564 12 | 50,-1091.1176999012564 13 | 55,-1091.1176999012564 14 | 60,-1091.1176999012564 15 | 65,-1091.1176999012564 16 | 70,-1091.1176999012564 17 | 75,-1091.1176999012564 18 | 80,-1091.1176999012564 19 | 85,-1091.1176999012564 20 | 90,-1091.1176999012564 21 | 95,-1091.1176999012564 22 | 100,-1091.1176999012564 23 | 105,-1091.1176999012564 24 | 110,-1091.1176999012564 25 | 115,-1091.1176999012564 26 | 120,-1091.1176999012564 27 | 125,-1091.1176999012564 28 | 130,-1091.1176999012564 29 | 135,-1091.1176999012564 30 | 140,-1091.1176999012564 31 | 145,-1091.1176999012564 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "CartPoleEnv", 3 | "exp_name": "test", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "look_ahead", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon1/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | 6.0,0 3 | 338.0,5 4 | 190.0,10 5 | 364.0,15 6 | 500.0,20 7 | 500.0,25 8 | 500.0,30 9 | 500.0,35 10 | 500.0,40 11 | 500.0,45 12 | 500.0,50 13 | 500.0,55 14 | 500.0,60 15 | 500.0,65 16 | 500.0,70 17 | 500.0,75 18 | 500.0,80 19 | 500.0,85 20 | 500.0,90 21 | 500.0,95 22 | 500.0,100 23 | 500.0,105 24 | 500.0,110 25 | 500.0,115 26 | 500.0,120 27 | 500.0,125 28 | 500.0,130 29 | 500.0,135 30 | 500.0,140 31 | 500.0,145 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon2/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "CartPoleEnv", 3 | "exp_name": "test", 4 | "horizon": 2, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "look_ahead", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon2/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | 6.0,0 3 | 154.0,5 4 | 124.0,10 5 | 62.0,15 6 | 364.0,20 7 | 267.0,25 8 | 354.0,30 9 | 500.0,35 10 | 166.0,40 11 | 139.0,45 12 | 500.0,50 13 | 500.0,55 14 | 277.0,60 15 | 500.0,65 16 | 500.0,70 17 | 125.0,75 18 | 42.0,80 19 | 500.0,85 20 | 500.0,90 21 | 500.0,95 22 | 500.0,100 23 | 500.0,105 24 | 110.0,110 25 | 500.0,115 26 | 500.0,120 27 | 500.0,125 28 | 71.0,130 29 | 500.0,135 30 | 500.0,140 31 | 224.0,145 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon3/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "CartPoleEnv", 3 | "exp_name": "test", 4 | "horizon": 3, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "look_ahead", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon3/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,6.0 3 | 5,128.0 4 | 10,226.0 5 | 15,69.0 6 | 20,500.0 7 | 25,500.0 8 | 30,135.0 9 | 35,500.0 10 | 40,500.0 11 | 45,350.0 12 | 50,500.0 13 | 55,88.0 14 | 60,500.0 15 | 65,500.0 16 | 70,97.0 17 | 75,500.0 18 | 80,387.0 19 | 85,216.0 20 | 90,500.0 21 | 95,500.0 22 | 100,500.0 23 | 105,231.0 24 | 110,471.0 25 | 115,500.0 26 | 120,254.0 27 | 125,113.0 28 | 130,500.0 29 | 135,500.0 30 | 140,500.0 31 | 145,500.0 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "DoubleIntegratorEnv", 3 | "exp_name": "test", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "look_ahead", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | -1086.2625732421875,0 3 | -893.8065795898438,5 4 | -498.8104553222656,10 5 | -210.59446716308594,15 6 | -94.314453125,20 7 | -124.63628387451172,25 8 | -128.93954467773438,30 9 | -146.0016326904297,35 10 | -150.7569122314453,40 11 | -131.7736358642578,45 12 | -119.9066390991211,50 13 | -146.92782592773438,55 14 | -130.0223846435547,60 15 | -187.83953857421875,65 16 | -142.1328582763672,70 17 | -135.27488708496094,75 18 | -111.18167114257812,80 19 | -183.66871643066406,85 20 | -114.97431182861328,90 21 | -220.096923828125,95 22 | -122.19498443603516,100 23 | -154.7129364013672,105 24 | -135.93690490722656,110 25 | -153.4174041748047,115 26 | -130.9731903076172,120 27 | -134.832275390625,125 28 | -179.64395141601562,130 29 | -148.343017578125,135 30 | -231.59364318847656,140 31 | -145.043701171875,145 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "DoubleIntegratorEnv", 3 | "exp_name": "test", 4 | "horizon": 2, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "look_ahead", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | -1183.3167724609375,0 3 | -466.75787353515625,5 4 | -452.248291015625,10 5 | -144.1388397216797,15 6 | -148.9638214111328,20 7 | -131.39715576171875,25 8 | -163.52474975585938,30 9 | -185.48219299316406,35 10 | -154.23744201660156,40 11 | -167.04725646972656,45 12 | -158.9932403564453,50 13 | -185.72344970703125,55 14 | -166.24571228027344,60 15 | -217.92059326171875,65 16 | -161.99917602539062,70 17 | -204.30374145507812,75 18 | -236.45884704589844,80 19 | -151.06915283203125,85 20 | -197.75592041015625,90 21 | -198.5358428955078,95 22 | -182.3708038330078,100 23 | -174.017333984375,105 24 | -154.0111083984375,110 25 | -187.34825134277344,115 26 | -154.50106811523438,120 27 | -178.9189910888672,125 28 | -163.55311584472656,130 29 | -139.14596557617188,135 30 | -193.9720916748047,140 31 | -142.2526092529297,145 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "DoubleIntegratorEnv", 3 | "exp_name": "test", 4 | "horizon": 3, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "look_ahead", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1205.2218017578125 3 | 5,-484.97027587890625 4 | 10,-189.61758422851562 5 | 15,-169.90155029296875 6 | 20,-168.10523986816406 7 | 25,-167.66624450683594 8 | 30,-132.1918487548828 9 | 35,-182.9347381591797 10 | 40,-175.6367950439453 11 | 45,-180.9919891357422 12 | 50,-140.72007751464844 13 | 55,-165.1248779296875 14 | 60,-161.497314453125 15 | 65,-167.78457641601562 16 | 70,-156.17080688476562 17 | 75,-153.41282653808594 18 | 80,-188.10372924804688 19 | 85,-162.02178955078125 20 | 90,-169.79441833496094 21 | 95,-174.7077178955078 22 | 100,-176.8898468017578 23 | 105,-123.10155487060547 24 | 110,-140.86790466308594 25 | 115,-119.35347747802734 26 | 120,-148.01553344726562 27 | 125,-164.94239807128906 28 | 130,-190.43072509765625 29 | 135,-163.91397094726562 30 | 140,-192.2443389892578 31 | 145,-182.40818786621094 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "MountainCarEnv", 3 | "exp_name": "test", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "look_ahead", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | -500.0,0 3 | -500.0,5 4 | -500.0,10 5 | -500.0,15 6 | -500.0,20 7 | -500.0,25 8 | -500.0,30 9 | -500.0,35 10 | -500.0,40 11 | -500.0,45 12 | -500.0,50 13 | -303.0,55 14 | -236.0,60 15 | -500.0,65 16 | -500.0,70 17 | -426.0,75 18 | -420.0,80 19 | -417.0,85 20 | -500.0,90 21 | -420.0,95 22 | -500.0,100 23 | -414.0,105 24 | -500.0,110 25 | -229.0,115 26 | -238.0,120 27 | -500.0,125 28 | -500.0,130 29 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "MountainCarEnv", 3 | "exp_name": "test", 4 | "horizon": 2, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "look_ahead", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | -500.0,0 3 | -500.0,5 4 | -500.0,10 5 | -500.0,15 6 | -500.0,20 7 | -500.0,25 8 | -500.0,30 9 | -500.0,35 10 | -500.0,40 11 | -500.0,45 12 | -500.0,50 13 | -312.0,55 14 | -228.0,60 15 | -500.0,65 16 | -312.0,70 17 | -237.0,75 18 | -227.0,80 19 | -240.0,85 20 | -500.0,90 21 | -315.0,95 22 | -326.0,100 23 | -500.0,105 24 | -308.0,110 25 | -500.0,115 26 | -498.0,120 27 | -223.0,125 28 | -310.0,130 29 | -500.0,135 30 | -324.0,140 31 | -500.0,145 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "MountainCarEnv", 3 | "exp_name": "test", 4 | "horizon": 3, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "look_ahead", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-500.0 3 | 5,-500.0 4 | 10,-500.0 5 | 15,-500.0 6 | 20,-500.0 7 | 25,-500.0 8 | 30,-500.0 9 | 35,-500.0 10 | 40,-500.0 11 | 45,-420.0 12 | 50,-409.0 13 | 55,-303.0 14 | 60,-223.0 15 | 65,-256.0 16 | 70,-228.0 17 | 75,-236.0 18 | 80,-244.0 19 | 85,-301.0 20 | 90,-243.0 21 | 95,-247.0 22 | 100,-228.0 23 | 105,-237.0 24 | 110,-233.0 25 | 115,-231.0 26 | 120,-306.0 27 | 125,-293.0 28 | 130,-230.0 29 | 135,-238.0 30 | 140,-241.0 31 | 145,-234.0 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "SwingUpEnv", 3 | "exp_name": "test", 4 | "horizon": 1, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "look_ahead", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon1/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | -1091.1176999012564,0 3 | -1162.1493360627258,5 4 | -1585.8347053987275,10 5 | -1218.8241165219229,15 6 | -1249.7989512873348,20 7 | -1389.2664479944547,25 8 | -1298.771278433243,30 9 | -1683.4040480165327,35 10 | -1507.2040631908412,40 11 | -1422.1633591071438,45 12 | -1505.8400605781235,50 13 | -1531.366199951518,55 14 | -1859.374352334298,60 15 | -1481.806252109952,65 16 | -1226.7502585204463,70 17 | -1268.0980292418817,75 18 | -1446.588749786304,80 19 | -1234.6143376530424,85 20 | -1173.3418445679686,90 21 | -1452.18655778993,95 22 | -1347.0481453039677,100 23 | -1489.2808735833657,105 24 | -1191.7517135659734,110 25 | -1475.9642971133658,115 26 | -1501.2822528696033,120 27 | -1905.84236700956,125 28 | -1442.219550827407,130 29 | -1195.037295457711,135 30 | -1337.1308600611028,140 31 | -1193.1598828731228,145 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon2/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "SwingUpEnv", 3 | "exp_name": "test", 4 | "horizon": 2, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "look_ahead", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon2/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | -1092.7898438420132,0 3 | -1215.7979095044884,5 4 | -900.2297428611507,10 5 | -1329.5358970230398,15 6 | -1421.1131724637714,20 7 | -1343.603741625047,25 8 | -1032.839109351379,30 9 | -1356.2425212447233,35 10 | -1503.854075071196,40 11 | -1420.590041290277,45 12 | -1581.3682381306112,50 13 | -1513.242610160141,55 14 | -1986.0909965287349,60 15 | -1818.8259128345749,65 16 | -1547.818410397714,70 17 | -1401.8233207141116,75 18 | -2026.578864863387,80 19 | -2098.2875860182835,85 20 | -1648.5499722747932,90 21 | -2156.958773287344,95 22 | -2161.416928709432,100 23 | -2180.018826056157,105 24 | -1830.83594101961,110 25 | -1223.477103579234,115 26 | -1189.1726035364468,120 27 | -1458.5448452906428,125 28 | -1284.0247744743867,130 29 | -1210.7703241828312,135 30 | -1505.6303316533417,140 31 | -1759.0756618354292,145 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon3/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "SwingUpEnv", 3 | "exp_name": "test", 4 | "horizon": 3, 5 | "max_iter": 150, 6 | "mode": "linear", 7 | "policy_type": "look_ahead", 8 | "render": false 9 | } -------------------------------------------------------------------------------- /cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon3/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1116.391526334325 3 | 5,-1747.689019464483 4 | 10,-1670.6732967647536 5 | 15,-997.3284087020362 6 | 20,-955.9082685133767 7 | 25,-1177.432988580442 8 | 30,-926.5136135262421 9 | 35,-1104.902891456816 10 | 40,-1047.1105995789278 11 | 45,-1675.357489520778 12 | 50,-1120.925762449103 13 | 55,-983.6193496550145 14 | 60,-1178.07379545421 15 | 65,-1152.8209185244793 16 | 70,-1084.3333912294572 17 | 75,-864.4824550887413 18 | 80,-1128.1111239455465 19 | 85,-1877.5212413637996 20 | 90,-1459.4320766081328 21 | 95,-868.6700377542332 22 | 100,-1280.24002899705 23 | 105,-1165.836603981392 24 | 110,-1254.735230603128 25 | 115,-1310.1745190244744 26 | 120,-1038.4402663605304 27 | 125,-1260.488809359114 28 | 130,-1372.7110653444643 29 | 135,-1167.5436737789994 30 | 140,-1228.4214639353308 31 | 145,-1129.5736579851712 32 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_a/DoubleIntegratorEnv/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_a/DoubleIntegratorEnv/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_a/DoubleIntegratorEnv/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_a/DoubleIntegratorEnv/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_a/DoubleIntegratorEnv/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "DoubleIntegratorEnv", 4 | "horizon": 1, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 10, 8 | "policy_type": "rs", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_a/DoubleIntegratorEnv/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1122.7823725767166 3 | 5,-1062.223100979545 4 | 10,-1136.400997769638 5 | 15,-1093.1759279603343 6 | 20,-1064.8429670854428 7 | 25,-1155.1049075078602 8 | 30,-1265.864351441458 9 | 35,-678.3290560802258 10 | 40,-381.8164868973546 11 | 45,-265.22922879100435 12 | 50,-169.64047694026792 13 | 55,-155.43764777676293 14 | 60,-97.6076650722121 15 | 65,-84.33627450797492 16 | 70,-93.36829751629186 17 | 75,-78.18198268902506 18 | 80,-76.97710627691222 19 | 85,-73.00620265014103 20 | 90,-78.6135226587545 21 | 95,-80.37329447070243 22 | 100,-78.95941675331389 23 | 105,-79.55617247669808 24 | 110,-83.57970985465705 25 | 115,-82.76591377741207 26 | 120,-85.4703499994012 27 | 125,-82.73176603238862 28 | 130,-86.52114550412108 29 | 135,-89.53232013434773 30 | 140,-93.2105884194899 31 | 145,-86.04294015559864 32 | 150,-88.7641655907943 33 | 155,-89.18217105937 34 | 160,-91.07337037083897 35 | 165,-89.36643780693299 36 | 170,-92.59827615711666 37 | 175,-96.53035937070437 38 | 180,-98.23655640085903 39 | 185,-88.73787475559153 40 | 190,-91.47763141092898 41 | 195,-93.40145281128702 42 | 200,-88.1035242005634 43 | 205,-90.125792591034 44 | 210,-87.15102617961149 45 | 215,-88.15198854500593 46 | 220,-88.5171408243244 47 | 225,-86.40243325822708 48 | 230,-87.99009024200436 49 | 235,-84.80985085614553 50 | 240,-90.16705955320245 51 | 245,-94.08950693010142 52 | 249,-90.98371433618405 53 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_a/MountainCarEnv/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_a/MountainCarEnv/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_a/MountainCarEnv/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_a/MountainCarEnv/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_a/MountainCarEnv/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "MountainCarEnv", 4 | "horizon": 1, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 10, 8 | "policy_type": "rs", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_a/MountainCarEnv/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-500.0 3 | 5,-500.0 4 | 10,-500.0 5 | 15,-500.0 6 | 20,-500.0 7 | 25,-500.0 8 | 30,-500.0 9 | 35,-500.0 10 | 40,-500.0 11 | 45,-500.0 12 | 50,-500.0 13 | 55,-500.0 14 | 60,-500.0 15 | 65,-500.0 16 | 70,-500.0 17 | 75,-500.0 18 | 80,-500.0 19 | 85,-500.0 20 | 90,-500.0 21 | 95,-500.0 22 | 100,-500.0 23 | 105,-500.0 24 | 110,-500.0 25 | 115,-500.0 26 | 120,-500.0 27 | 125,-500.0 28 | 130,-500.0 29 | 135,-500.0 30 | 140,-500.0 31 | 145,-500.0 32 | 150,-500.0 33 | 155,-500.0 34 | 160,-500.0 35 | 165,-500.0 36 | 170,-500.0 37 | 175,-500.0 38 | 180,-500.0 39 | 185,-500.0 40 | 190,-500.0 41 | 195,-500.0 42 | 200,-500.0 43 | 205,-500.0 44 | 210,-500.0 45 | 215,-500.0 46 | 220,-500.0 47 | 225,-500.0 48 | 230,-500.0 49 | 235,-500.0 50 | 240,-500.0 51 | 245,-500.0 52 | 249,-500.0 53 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/CartPoleEnv/horizon1/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/CartPoleEnv/horizon1/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/CartPoleEnv/horizon1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "CartPoleEnv", 4 | "horizon": 1, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 32, 8 | "policy_type": "cem", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/CartPoleEnv/horizon1/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,6.0 3 | 10,6.0 4 | 20,6.0 5 | 30,6.0 6 | 40,6.0 7 | 50,6.0 8 | 60,8.0 9 | 70,38.0 10 | 80,38.0 11 | 90,23.0 12 | 100,40.0 13 | 110,40.0 14 | 120,25.0 15 | 130,34.0 16 | 140,27.0 17 | 150,45.0 18 | 160,46.0 19 | 170,68.0 20 | 180,32.0 21 | 190,34.0 22 | 200,40.0 23 | 210,64.0 24 | 220,32.0 25 | 230,69.0 26 | 240,30.0 27 | 249,50.0 28 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/CartPoleEnv/horizon10/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/CartPoleEnv/horizon10/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/CartPoleEnv/horizon10/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "CartPoleEnv", 4 | "horizon": 10, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 32, 8 | "policy_type": "cem", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/CartPoleEnv/horizon10/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | 6.0,0 3 | 6.0,10 4 | 6.0,20 5 | 6.0,30 6 | 6.0,40 7 | 6.0,50 8 | 6.0,60 9 | 6.0,70 10 | 6.0,80 11 | 6.0,90 12 | 6.0,100 13 | 6.0,110 14 | 6.0,120 15 | 6.0,130 16 | 6.0,140 17 | 6.0,150 18 | 6.0,160 19 | 6.0,170 20 | 6.0,180 21 | 8.0,190 22 | 94.0,200 23 | 72.0,210 24 | 108.0,220 25 | 247.0,230 26 | 288.0,240 27 | 160.0,249 28 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/CartPoleEnv/horizon5/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/CartPoleEnv/horizon5/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/CartPoleEnv/horizon5/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "CartPoleEnv", 4 | "horizon": 5, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 32, 8 | "policy_type": "cem", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/CartPoleEnv/horizon5/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,6.0 3 | 10,6.0 4 | 20,6.0 5 | 30,6.0 6 | 40,6.0 7 | 50,6.0 8 | 60,6.0 9 | 70,6.0 10 | 80,6.0 11 | 90,6.0 12 | 100,12.0 13 | 110,6.0 14 | 120,8.0 15 | 130,12.0 16 | 140,20.0 17 | 150,6.0 18 | 160,6.0 19 | 170,6.0 20 | 180,13.0 21 | 190,63.0 22 | 200,6.0 23 | 210,60.0 24 | 220,134.0 25 | 230,87.0 26 | 240,145.0 27 | 249,140.0 28 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "DoubleIntegratorEnv", 4 | "horizon": 1, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 32, 8 | "policy_type": "cem", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1083.2572626905703 3 | 10,-1144.3036625666957 4 | 20,-70.25937042708051 5 | 30,-65.82760913324954 6 | 40,-69.26650008607427 7 | 50,-71.99475809172492 8 | 60,-73.36248774374332 9 | 70,-75.01519602189407 10 | 80,-76.7247153398074 11 | 90,-77.0947355563633 12 | 100,-82.09236529859092 13 | 110,-80.32221880918433 14 | 120,-86.06994235658904 15 | 130,-93.97135070453305 16 | 140,-123.41189553061248 17 | 150,-111.09701675206192 18 | 160,-80.99094440230087 19 | 170,-111.82992648461546 20 | 180,-1407.4459451091536 21 | 190,-1466.2611779251909 22 | 200,-1579.0335773941397 23 | 210,-1436.1574085697116 24 | 220,-215.21898344969065 25 | 230,-1478.0767973483007 26 | 240,-1634.5027828698492 27 | 249,-1619.9079515249616 28 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "DoubleIntegratorEnv", 4 | "horizon": 10, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 32, 8 | "policy_type": "cem", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | -328.79005020730284,0 3 | -200.98521546340913,10 4 | -64.77700467721864,20 5 | -84.86970171900138,30 6 | -128.60893935069768,40 7 | -147.48401302068382,50 8 | -139.2296066862622,60 9 | -110.77936569105727,70 10 | -114.11320544047803,80 11 | -125.6650792064009,90 12 | -123.18294325299779,100 13 | -138.1127465768214,110 14 | -126.7707613795519,120 15 | -112.07978614737182,130 16 | -99.5483647865851,140 17 | -116.85997383619346,150 18 | -129.06128247897595,160 19 | -104.97593874618848,170 20 | -122.84541554251891,180 21 | -114.77346058769307,190 22 | -107.45986913081748,200 23 | -101.4258130643817,210 24 | -114.49001701841401,220 25 | -104.44739383339761,230 26 | -115.09346632637698,240 27 | -129.87534014553373,249 28 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "DoubleIntegratorEnv", 4 | "horizon": 5, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 32, 8 | "policy_type": "cem", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1175.616838534374 3 | 10,-478.01630518122784 4 | 20,-62.36968488577191 5 | 30,-70.35551873127073 6 | 40,-81.91418989124216 7 | 50,-113.87514452834378 8 | 60,-99.39722406411673 9 | 70,-104.74424596209462 10 | 80,-116.43105353091153 11 | 90,-120.61381139602538 12 | 100,-104.22825758830257 13 | 110,-97.51563398276409 14 | 120,-96.10433126328779 15 | 130,-117.98687857144064 16 | 140,-103.6915302113553 17 | 150,-120.80207494162703 18 | 160,-96.17403533006443 19 | 170,-95.24357935829224 20 | 180,-104.7003414784946 21 | 190,-98.02249429487223 22 | 200,-96.91254602252111 23 | 210,-104.4164736636655 24 | 220,-111.60483821355875 25 | 230,-103.64503997519414 26 | 240,-109.3393306564727 27 | 249,-100.84268701592826 28 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/MountainCarEnv/horizon1/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon1/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/MountainCarEnv/horizon1/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon1/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/MountainCarEnv/horizon1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "MountainCarEnv", 4 | "horizon": 1, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 32, 8 | "policy_type": "cem", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/MountainCarEnv/horizon1/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-500.0 3 | 10,-500.0 4 | 20,-500.0 5 | 30,-500.0 6 | 40,-500.0 7 | 50,-500.0 8 | 60,-500.0 9 | 70,-500.0 10 | 80,-500.0 11 | 90,-500.0 12 | 100,-500.0 13 | 110,-500.0 14 | 120,-500.0 15 | 130,-500.0 16 | 140,-500.0 17 | 150,-500.0 18 | 160,-500.0 19 | 170,-500.0 20 | 180,-500.0 21 | 190,-500.0 22 | 200,-500.0 23 | 210,-500.0 24 | 220,-500.0 25 | 230,-500.0 26 | 240,-500.0 27 | 249,-500.0 28 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/MountainCarEnv/horizon10/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon10/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/MountainCarEnv/horizon10/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon10/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/MountainCarEnv/horizon10/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "MountainCarEnv", 4 | "horizon": 10, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 32, 8 | "policy_type": "cem", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/MountainCarEnv/horizon10/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | -500.0,0 3 | -500.0,10 4 | -500.0,20 5 | -500.0,30 6 | -500.0,40 7 | -500.0,50 8 | -500.0,60 9 | -500.0,70 10 | -500.0,80 11 | -500.0,90 12 | -500.0,100 13 | -500.0,110 14 | -500.0,120 15 | -500.0,130 16 | -500.0,140 17 | -500.0,150 18 | -500.0,160 19 | -500.0,170 20 | -500.0,180 21 | -500.0,190 22 | -500.0,200 23 | -500.0,210 24 | -500.0,220 25 | -500.0,230 26 | -500.0,240 27 | -500.0,249 28 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/MountainCarEnv/horizon5/contour.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon5/contour.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/MountainCarEnv/horizon5/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon5/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/MountainCarEnv/horizon5/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "MountainCarEnv", 4 | "horizon": 5, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 32, 8 | "policy_type": "cem", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/MountainCarEnv/horizon5/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-500.0 3 | 10,-500.0 4 | 20,-500.0 5 | 30,-500.0 6 | 40,-500.0 7 | 50,-500.0 8 | 60,-500.0 9 | 70,-500.0 10 | 80,-500.0 11 | 90,-500.0 12 | 100,-500.0 13 | 110,-500.0 14 | 120,-500.0 15 | 130,-500.0 16 | 140,-500.0 17 | 150,-500.0 18 | 160,-500.0 19 | 170,-500.0 20 | 180,-500.0 21 | 190,-500.0 22 | 200,-500.0 23 | 210,-500.0 24 | 220,-500.0 25 | 230,-500.0 26 | 240,-500.0 27 | 249,-500.0 28 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/SwingUpEnv/horizon1/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/SwingUpEnv/horizon1/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/SwingUpEnv/horizon1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "SwingUpEnv", 4 | "horizon": 1, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 32, 8 | "policy_type": "cem", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/SwingUpEnv/horizon1/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1396.667327423777 3 | 10,-1236.5227676190348 4 | 20,-1201.2862992423045 5 | 30,-1264.1382127123386 6 | 40,-1525.1329954184694 7 | 50,-1494.0356861809296 8 | 60,-1180.626836176929 9 | 70,-1115.4489761429518 10 | 80,-2462.797364356984 11 | 90,-1093.9334660251343 12 | 100,-2196.921839095621 13 | 110,-2146.571425384078 14 | 120,-1093.7293024977773 15 | 130,-1093.9334660251343 16 | 140,-1097.0459684677512 17 | 150,-1357.03826196838 18 | 160,-1095.0031456020984 19 | 170,-1103.5009875248982 20 | 180,-1318.8339192880053 21 | 190,-1103.5009875248982 22 | 200,-2462.797364356984 23 | 210,-2032.1447096031256 24 | 220,-1770.8190385950547 25 | 230,-1831.6227592496257 26 | 240,-1627.0230466828693 27 | 249,-1729.4831263473936 28 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/SwingUpEnv/horizon10/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/SwingUpEnv/horizon10/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/SwingUpEnv/horizon10/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "SwingUpEnv", 4 | "horizon": 10, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 32, 8 | "policy_type": "cem", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/SwingUpEnv/horizon10/progress.csv: -------------------------------------------------------------------------------- 1 | Average Returns,Iteration 2 | -1093.708926777139,0 3 | -1095.0031456020984,10 4 | -1177.3105002993598,20 5 | -2317.7596258376225,30 6 | -3112.9437564743707,40 7 | -2272.4569652126597,50 8 | -2422.264676378556,60 9 | -2353.919829701844,70 10 | -2331.054111508252,80 11 | -2346.653255271173,90 12 | -2387.710052225485,100 13 | -2078.59505659242,110 14 | -2904.856342572041,120 15 | -1352.730304454663,130 16 | -1311.3610187713514,140 17 | -1365.0260867375184,150 18 | -1243.4885705745057,160 19 | -1244.0366648294828,170 20 | -1241.9574480293968,180 21 | -1280.8335180998288,190 22 | -1242.429338154825,200 23 | -750.119641550883,210 24 | -1257.9522315148947,220 25 | -1158.085668416221,230 26 | -1226.716938288927,240 27 | -1235.294428793677,249 28 | -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/SwingUpEnv/horizon5/learning_curve.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/SwingUpEnv/horizon5/learning_curve.png -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/SwingUpEnv/horizon5/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "batch_size": 256, 3 | "env": "SwingUpEnv", 4 | "horizon": 5, 5 | "learning_rate": 0.001, 6 | "max_iter": 250, 7 | "num_acts": 32, 8 | "policy_type": "cem", 9 | "render": false 10 | } -------------------------------------------------------------------------------- /cs287hw1/data/part3_b/SwingUpEnv/horizon5/progress.csv: -------------------------------------------------------------------------------- 1 | Iteration,Average Returns 2 | 0,-1092.2915620815575 3 | 10,-1103.5009875248982 4 | 20,-2471.439718954018 5 | 30,-2074.797792846919 6 | 40,-2424.161644445676 7 | 50,-2426.6673415803493 8 | 60,-2449.902157955784 9 | 70,-2617.3939123653045 10 | 80,-2456.9473873794736 11 | 90,-2455.0500879372407 12 | 100,-2438.936672619652 13 | 110,-2633.558000601011 14 | 120,-2517.561124215514 15 | 130,-2455.0688311597387 16 | 140,-2629.0789210394737 17 | 150,-2919.149640766505 18 | 160,-1305.1880741230243 19 | 170,-1168.2989464442921 20 | 180,-1481.954819420085 21 | 190,-760.3012598426423 22 | 200,-1276.914365026002 23 | 210,-1278.1670534512004 24 | 220,-1493.6650277140825 25 | 230,-1293.9963890328356 26 | 240,-1261.0170201625338 27 | 249,-1248.4889372240405 28 | -------------------------------------------------------------------------------- /cs287hw1/data/part5/CartPoleEnv/modelinear_state_discretization51/log.txt: -------------------------------------------------------------------------------- 1 | Logging to C:\Users\Minjune\Desktop\287\cs287hw1\cs287-hw1-code/data/part5/CartPoleEnv/modelinear_state_discretization51/ 2 | -------------------------------------------------------------------------------- /cs287hw1/data/part5/CartPoleEnv/modelinear_state_discretization51/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "env": "CartPoleEnv", 3 | "max_iter": 150, 4 | "mode": "linear", 5 | "render": false, 6 | "state_discretization": 51 7 | } -------------------------------------------------------------------------------- /cs287hw1/data/part5/CartPoleEnv/modelinear_state_discretization51/progress.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part5/CartPoleEnv/modelinear_state_discretization51/progress.csv -------------------------------------------------------------------------------- /cs287hw1/envs/__init__.py: -------------------------------------------------------------------------------- 1 | from envs.cart_pole_env import CartPoleEnv 2 | from envs.mountain_hill_env import MountainCarEnv 3 | from envs.double_integrator_env import DoubleIntegratorEnv 4 | from envs.swing_up_env import SwingUpEnv 5 | from envs.grid1d_env import Grid1DEnv 6 | from envs.gridworld_env import GridWorldEnv 7 | -------------------------------------------------------------------------------- /cs287hw1/envs/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/envs/__pycache__/cart_pole_env.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/cart_pole_env.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/envs/__pycache__/double_integrator_env.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/double_integrator_env.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/envs/__pycache__/grid1d_env.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/grid1d_env.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/envs/__pycache__/gridworld_env.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/gridworld_env.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/envs/__pycache__/mountain_hill_env.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/mountain_hill_env.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/envs/__pycache__/swing_up_env.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/swing_up_env.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/part1/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part1/__init__.py -------------------------------------------------------------------------------- /cs287hw1/part1/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part1/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/part1/__pycache__/tabular_value_iteration.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part1/__pycache__/tabular_value_iteration.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/part1/run_part1.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import logger 4 | import json 5 | 6 | 7 | def main(args): 8 | render = args.render 9 | if not render: 10 | import matplotlib 11 | matplotlib.use('Agg') 12 | import matplotlib.pyplot as plt 13 | from utils.utils import TabularPolicy, TabularValueFun 14 | from part1.tabular_value_iteration import ValueIteration 15 | from envs import Grid1DEnv, GridWorldEnv 16 | envs = [GridWorldEnv(seed=0), GridWorldEnv(seed=1)] 17 | 18 | for env in envs: 19 | env_name = env.__name__ 20 | exp_dir = os.getcwd() + '/data/part1/%s/policy_type%s_temperature%s/' % (env_name, args.policy_type, args.temperature) 21 | logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv']) 22 | args_dict = vars(args) 23 | args_dict['env'] = env_name 24 | json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True) 25 | 26 | policy = TabularPolicy(env) 27 | value_fun = TabularValueFun(env) 28 | algo = ValueIteration(env, 29 | value_fun, 30 | policy, 31 | policy_type=args.policy_type, 32 | render=render, 33 | temperature=args.temperature) 34 | algo.train() 35 | 36 | 37 | if __name__ == "__main__": 38 | parser = argparse.ArgumentParser() 39 | parser.add_argument("--policy_type", "-p", type=str, default='deterministic', choices=["deterministic", "max_ent"], 40 | help="Whether to train a deterministic policy or a maximum entropy one") 41 | parser.add_argument("--render", "-r", action='store_true', help="Vizualize the policy and contours when training") 42 | parser.add_argument("--temperature", "-t", type=float, default=1., 43 | help="Temperature parameter for maximum entropy policies") 44 | args = parser.parse_args() 45 | main(args) 46 | -------------------------------------------------------------------------------- /cs287hw1/part2/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part2/__init__.py -------------------------------------------------------------------------------- /cs287hw1/part2/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part2/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/part2/__pycache__/discretize.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part2/__pycache__/discretize.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/part2/__pycache__/look_ahead_policy.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part2/__pycache__/look_ahead_policy.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/part2/look_ahead_policy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import spaces 3 | 4 | 5 | class LookAheadPolicy(object): 6 | """ 7 | Look ahead policy 8 | 9 | -- VARIABLES/FUNCTIONS YOU WILL NEED TO USE -- 10 | * self.horizon (int): Horizon for the look ahead policy 11 | 12 | * act_dim (int): Dimension of the state space 13 | 14 | * value_fun (TabularValueFun): 15 | - get_values(states): if states is None returns the values of all the states. Otherwise, it returns the 16 | values of the specified states 17 | * env (Env): 18 | - vec_set_state(states): vectorized (multiple environments in parallel) version of reseting the 19 | environment to a state for a batch of states. 20 | - vec_step(actions): vectorized (multiple environments in parallel) version of stepping through the 21 | environment for a batch of actions. Returns the next observations, rewards, dones signals, env infos 22 | (last not used). 23 | """ 24 | def __init__(self, 25 | env, 26 | value_fun, 27 | horizon, 28 | ): 29 | self.env = env 30 | self.discount = env.discount 31 | self._value_fun = value_fun 32 | self.horizon = horizon 33 | 34 | def get_action(self, state): 35 | """ 36 | Get the best action by doing look ahead, covering actions for the specified horizon. 37 | HINT: use np.meshgrid to compute all the possible action sequences. 38 | :param state: 39 | :return: best_action (int) 40 | """ 41 | assert isinstance(self.env.action_space, spaces.Discrete) 42 | act_dim = self.env.action_space.n 43 | """ INSERT YOUR CODE HERE""" 44 | actions = np.arange(act_dim) 45 | sequences = np.array(np.meshgrid(*np.tile(np.arange(act_dim), 46 | (self.horizon, 1)))).T.reshape(-1, self.horizon).T 47 | return sequences[0, np.argmax(self.get_returns(state, sequences))] 48 | 49 | def get_returns(self, state, actions): 50 | """ 51 | :param state: current state of the policy 52 | :param actions: array of actions of shape [horizon, num_acts] 53 | :return: returns for the specified horizon + self.discount ^ H value_fun 54 | HINT: Make sure to take the discounting and done into acount! 55 | """ 56 | assert self.env.vectorized 57 | """ INSERT YOUR CODE HERE""" 58 | num_acts = actions.shape[1] 59 | returns = np.zeros(num_acts) 60 | # self.env.set_state(state) 61 | # if len(actions.shape) < 3: 62 | # self.env.vec_set_state(np.full(num_acts, state)) 63 | # else: 64 | self.env.vec_set_state(np.tile(state, (num_acts, 1))) 65 | for h in range(self.horizon): 66 | observations, rewards, dones, env_infos = self.env.vec_step(actions[h]) 67 | self.env.vec_set_state(observations) 68 | returns += self.discount ** h * rewards 69 | returns += self.discount ** self.horizon * self._value_fun.get_values(observations) 70 | return returns 71 | 72 | def update(self, actions): 73 | pass 74 | -------------------------------------------------------------------------------- /cs287hw1/part2/run_part2_c.py: -------------------------------------------------------------------------------- 1 | import logger 2 | import argparse 3 | import os 4 | import json 5 | import numpy as np; np.random.seed(0) 6 | 7 | 8 | def main(args): 9 | render = args.render 10 | if not render: 11 | import matplotlib 12 | matplotlib.use('Agg') 13 | import matplotlib.pyplot as plt 14 | from envs import DoubleIntegratorEnv, MountainCarEnv, CartPoleEnv, SwingUpEnv 15 | from utils.utils import TabularPolicy, TabularValueFun 16 | from part1.tabular_value_iteration import ValueIteration 17 | from part2.look_ahead_policy import LookAheadPolicy 18 | from part2.discretize import Discretize 19 | envs = [DoubleIntegratorEnv(), MountainCarEnv(), CartPoleEnv(), SwingUpEnv()] 20 | 21 | for env in envs: 22 | env_name = env.__class__.__name__ 23 | state_discretization = 151 if env_name in ['MountainCarEnv', 'DoubleIntegratorEnv'] else 21 24 | exp_dir = os.getcwd() + '/data/part2_c/%s/%s' % (env_name, args.mode) 25 | logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv']) 26 | args_dict = vars(args) 27 | args_dict['env'] = env_name 28 | json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True) 29 | 30 | env = Discretize(env, 31 | state_discretization=state_discretization, 32 | mode=args.mode 33 | ) 34 | value_fun = TabularValueFun(env) 35 | if args.policy_type == 'tabular': 36 | policy = TabularPolicy(env) 37 | elif args.policy_type == 'look_ahead': 38 | policy = LookAheadPolicy(env, value_fun, args.horizon) 39 | else: 40 | raise NotImplementedError 41 | algo = ValueIteration(env, 42 | value_fun, 43 | policy, 44 | render=render, 45 | max_itr=args.max_iter, 46 | num_rollouts=1, 47 | render_itr=5, 48 | log_itr=5) 49 | algo.train() 50 | 51 | 52 | if __name__ == "__main__": 53 | parser = argparse.ArgumentParser() 54 | parser.add_argument("--render", "-r", action='store_true', 55 | help="Vizualize the policy and contours when training") 56 | parser.add_argument("--action_discretization", "-a", type=int, default=5, 57 | help="Number of points per state dimension to discretize") 58 | parser.add_argument("--mode", "-m", type=str, default='nn', choices=['nn', 'linear'], 59 | help="Mode of interpolate between discrete points") 60 | parser.add_argument("--policy_type", "-p", type=str, default='tabular', choices=['tabular', 'look_ahead'], 61 | help='Type of policy to use. Whether to use look ahead policy or tabular') 62 | parser.add_argument("--horizon", "-H", type=int, default=1, 63 | help='Planning horizon for the look ahead policy') 64 | parser.add_argument("--max_iter", "-i", type=int, default=150, 65 | help='Maximum number of iterations for the value iteration algorithm') 66 | args = parser.parse_args() 67 | main(args) 68 | -------------------------------------------------------------------------------- /cs287hw1/part3/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part3/__init__.py -------------------------------------------------------------------------------- /cs287hw1/part3/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part3/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/part3/__pycache__/continous_value_iteration.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part3/__pycache__/continous_value_iteration.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/part3/__pycache__/look_ahead_policy.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part3/__pycache__/look_ahead_policy.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/part3/run_part3_a.py: -------------------------------------------------------------------------------- 1 | import logger 2 | import argparse 3 | import os 4 | import json 5 | import numpy as np; np.random.seed(0) 6 | 7 | 8 | def main(args): 9 | render = args.render 10 | if not render: 11 | import matplotlib 12 | matplotlib.use('Agg') 13 | import matplotlib.pyplot as plt 14 | from envs import DoubleIntegratorEnv, MountainCarEnv, CartPoleEnv, SwingUpEnv 15 | from utils.utils import VectorizeMujocoEnv 16 | from part3.look_ahead_policy import LookAheadPolicy 17 | from utils.value_functions import MLPValueFun 18 | from part3.continous_value_iteration import ContinousStateValueIteration 19 | envs = [DoubleIntegratorEnv(), MountainCarEnv()] 20 | 21 | for env in envs: 22 | env_name = env.__class__.__name__ 23 | exp_dir = os.getcwd() + '/data/part3_a/%s' % (env_name) 24 | logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv']) 25 | args_dict = vars(args) 26 | args_dict['env'] = env_name 27 | json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True) 28 | 29 | value_fun = MLPValueFun(env) 30 | policy = LookAheadPolicy(env, 31 | value_fun, 32 | horizon=args.horizon, 33 | look_ahead_type=args.policy_type, 34 | num_acts=args.num_acts) 35 | algo = ContinousStateValueIteration(env, 36 | value_fun, 37 | policy, 38 | learning_rate=args.learning_rate, 39 | batch_size=args.batch_size, 40 | num_acts=args.num_acts, 41 | render=args.render, 42 | max_itr=args.max_iter, 43 | log_itr=5) 44 | algo.train() 45 | 46 | 47 | if __name__ == "__main__": 48 | parser = argparse.ArgumentParser() 49 | parser.add_argument("--render", "-r", action='store_true', 50 | help="Vizualize the policy and contours when training") 51 | parser.add_argument("--policy_type", "-p", type=str, default='rs', choices=['cem', 'rs'], 52 | help='Type of policy to use. Whether to use look ahead with cross-entropy \ 53 | method or random shooting') 54 | parser.add_argument("--horizon", "-H", type=int, default=1, 55 | help='Planning horizon for the look ahead policy') 56 | parser.add_argument("--max_iter", "-i", type=int, default=250, 57 | help='Maximum number of iterations for the value iteration algorithm') 58 | parser.add_argument("--learning_rate", "-lr", type=float, default=1e-3, 59 | help='Learning rate for training the value function') 60 | parser.add_argument("--batch_size", "-bs", type=int, default=256, 61 | help='batch size for training the value function') 62 | parser.add_argument("--num_acts", "-a", type=int, default=10, 63 | help='Number of actions sampled for maximizing the value function') 64 | args = parser.parse_args() 65 | main(args) -------------------------------------------------------------------------------- /cs287hw1/part4/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part4/__init__.py -------------------------------------------------------------------------------- /cs287hw1/part4/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part4/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/part4/__pycache__/discretize.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part4/__pycache__/discretize.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/part4/run_part4.py: -------------------------------------------------------------------------------- 1 | import logger 2 | import argparse 3 | import os 4 | import json 5 | import numpy as np; np.random.seed(0) 6 | 7 | 8 | def main(args): 9 | render = args.render 10 | if not render: 11 | import matplotlib 12 | matplotlib.use('Agg') 13 | import matplotlib.pyplot as plt 14 | from envs import CartPoleEnv, SwingUpEnv 15 | from utils.utils import TabularPolicy, TabularValueFun 16 | from part1.tabular_value_iteration import ValueIteration 17 | from part4.discretize import Discretize 18 | envs = [CartPoleEnv(), SwingUpEnv()] 19 | 20 | for env in envs: 21 | env_name = env.__class__.__name__ 22 | exp_dir = os.getcwd() + '/data/part5/%s/mode%s_state_discretization%s/' % (env_name, 23 | args.mode, 24 | str(args.state_discretization) 25 | ) 26 | logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv']) 27 | args_dict = vars(args) 28 | args_dict['env'] = env_name 29 | json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True) 30 | 31 | env = Discretize(env, 32 | state_discretization=args.state_discretization, 33 | mode=args.mode 34 | ) 35 | value_fun = TabularValueFun(env) 36 | policy = TabularPolicy(env) 37 | algo = ValueIteration(env, 38 | value_fun, 39 | policy, 40 | render=render, 41 | max_itr=args.max_iter, 42 | num_rollouts=1, 43 | render_itr=5, 44 | log_itr=5) 45 | algo.train() 46 | 47 | 48 | if __name__ == "__main__": 49 | parser = argparse.ArgumentParser() 50 | parser.add_argument("--render", "-r", action='store_true', 51 | help="Vizualize the policy and contours when training") 52 | parser.add_argument("--state_discretization", "-s", type=int, default=21, 53 | help="Number of points per state dimension to discretize") 54 | parser.add_argument("--mode", "-m", type=str, default='nn', choices=['nn', 'linear'], 55 | help="Mode of interpolate between discrete points") 56 | parser.add_argument("--max_iter", "-i", type=int, default=150, 57 | help='Maximum number of iterations for the value iteration algorithm') 58 | args = parser.parse_args() 59 | main(args) 60 | -------------------------------------------------------------------------------- /cs287hw1/requirements.txt: -------------------------------------------------------------------------------- 1 | autograd 2 | gym 3 | joblib 4 | matplotlib 5 | moviepy 6 | numpy 7 | Flask==1.0.2 8 | plotly==3.2.0 9 | -------------------------------------------------------------------------------- /cs287hw1/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__init__.py -------------------------------------------------------------------------------- /cs287hw1/utils/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/utils/__pycache__/plot.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__pycache__/plot.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/utils/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/utils/__pycache__/value_functions.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__pycache__/value_functions.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/utils/value_functions.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | from collections import OrderedDict 3 | 4 | 5 | class MLPValueFun(object): 6 | _activations = { 7 | 'tanh': np.tanh, 8 | None: lambda x: x, 9 | 'relu': lambda x: np.maximum(x, 0) 10 | } 11 | 12 | def __init__(self, env, hidden_sizes=(256, 256), activation='relu'): 13 | self._env = env 14 | self._params = dict() 15 | self._build(hidden_sizes, activation) 16 | 17 | def _build(self, hidden_sizes=(256, 256), activation='relu', *args, **kwargs): 18 | self._activation = self._activations[activation] 19 | self._hidden_sizes = hidden_sizes 20 | prev_size = self._env.observation_space.shape[0] 21 | for i, hidden_size in enumerate(hidden_sizes): 22 | W = np.random.normal(loc=0, scale=1/prev_size, size=(hidden_size, prev_size)) 23 | b = np.zeros((hidden_size,)) 24 | 25 | self._params['W_%d' % i] = W 26 | self._params['b_%d' % i] = b 27 | 28 | prev_size = hidden_size 29 | 30 | W = np.random.normal(loc=0, scale=1/prev_size, size=(1, prev_size)) 31 | b = np.zeros((1,)) 32 | self._params['W_out'] = W 33 | self._params['b_out'] = b 34 | 35 | def get_values(self, states, params=None): 36 | params = self._params if params is None else params 37 | x = states 38 | for i, hidden_size in enumerate(self._hidden_sizes): 39 | x = np.dot(params['W_%d' % i], x.T).T + params['b_%d' % i] 40 | x = self._activation(x) 41 | values = np.dot(params['W_out'], x.T).T + params['b_out'] 42 | return values[:, 0] 43 | 44 | def update(self, params): 45 | assert set(params.keys()) == set(self._params.keys()) 46 | self._params = params 47 | 48 | -------------------------------------------------------------------------------- /cs287hw1/viskit/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'dementrock' 2 | -------------------------------------------------------------------------------- /cs287hw1/viskit/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/viskit/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw1/viskit/__pycache__/core.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/viskit/__pycache__/core.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw2/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/.DS_Store -------------------------------------------------------------------------------- /cs287hw2/.ipynb_checkpoints/Untitled-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /cs287hw2/.ipynb_checkpoints/Untitled1-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /cs287hw2/.ipynb_checkpoints/Untitled3-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 2 6 | } 7 | -------------------------------------------------------------------------------- /cs287hw2/__pycache__/rot_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/__pycache__/rot_utils.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw2/__pycache__/simulators.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/__pycache__/simulators.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw2/cs287hw2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/cs287hw2.pdf -------------------------------------------------------------------------------- /cs287hw2/environment.yml: -------------------------------------------------------------------------------- 1 | name: cs287hw2 2 | channels: 3 | - defaults 4 | - conda-forge 5 | dependencies: 6 | - python=3.7.3 7 | - jupyter 8 | - patchelf=0.9 # comment this line out on Mac 9 | - pip>=19.1 10 | - pip: 11 | - -r ./requirements.txt 12 | -------------------------------------------------------------------------------- /cs287hw2/envs/__pycache__/cheetah_env.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/envs/__pycache__/cheetah_env.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw2/envs/__pycache__/hopper_env.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/envs/__pycache__/hopper_env.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw2/envs/cheetah_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import copy 3 | from gym import utils 4 | from gym.envs.mujoco import mujoco_env 5 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv 6 | import os 7 | 8 | class CheetahModEnv(HalfCheetahEnv, mujoco_env.MujocoEnv, utils.EzPickle): 9 | def __init__(self): 10 | 11 | self.perturb_joints = True 12 | self.count = 0 13 | mujoco_env.MujocoEnv.__init__(self, "half_cheetah.xml", 4) 14 | utils.EzPickle.__init__(self) 15 | self.perturb_idx=0 16 | self.init_geom_rgba = self.model.geom_rgba.copy() 17 | 18 | 19 | def f_sim(self, x0, u, dt, rollout=False, perturb=.01): 20 | nq, nv = self.model.nq, self.model.nv 21 | self.sim.reset() 22 | qpos = copy.deepcopy(self.init_qpos) 23 | qvel = copy.deepcopy(self.init_qvel) 24 | 25 | qpos[:] = x0[:nq] 26 | qvel[:] = x0[nq:] 27 | 28 | self.set_state(qpos, qvel) 29 | if rollout: 30 | self.step(u, perturb=perturb) 31 | else: 32 | self.perturb_joints = False 33 | self.step(u) 34 | self.perturb_joints = True 35 | return np.concatenate([ 36 | self.sim.data.qpos.flat[:], 37 | self.sim.data.qvel.flat[:] 38 | ]) 39 | 40 | def step(self, a, perturb=.01): 41 | self.count += 1 42 | if self.perturb_joints and self.count%5==0: 43 | self.perturb_idx = np.random.randint(0,6) 44 | a[self.perturb_idx] += np.random.choice(np.array([-1*perturb,perturb])) 45 | model_id = self.model.geom_names.index(self.model.joint_names[self.perturb_idx+3]) 46 | geom_rgba = self.init_geom_rgba.copy() 47 | geom_rgba[model_id] = [0, 1, 1 ,1] 48 | self.model.geom_rgba[:] = geom_rgba 49 | else: 50 | if self.count > 1 and self.count%8==0: 51 | model_id = self.model.geom_names.index(self.model.joint_names[self.perturb_idx+3]) 52 | geom_rgba = self.init_geom_rgba.copy() 53 | self.model.geom_rgba[:] = geom_rgba 54 | xposbefore = self.sim.data.qpos[0] 55 | self.do_simulation(a, self.frame_skip) 56 | xposafter = self.sim.data.qpos[0] 57 | ob = self._get_obs() 58 | reward_ctrl = - 0.1 * np.square(a).sum() 59 | reward_run = (xposafter - xposbefore)/self.dt 60 | reward = reward_ctrl + reward_run 61 | done = False 62 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl) 63 | -------------------------------------------------------------------------------- /cs287hw2/img/fig_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/img/fig_a.png -------------------------------------------------------------------------------- /cs287hw2/img/ref_a.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/img/ref_a.png -------------------------------------------------------------------------------- /cs287hw2/img/ref_b_cartpole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/img/ref_b_cartpole.png -------------------------------------------------------------------------------- /cs287hw2/img/ref_b_heli.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/img/ref_b_heli.png -------------------------------------------------------------------------------- /cs287hw2/mats/cartpole_traj.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/cartpole_traj.mat -------------------------------------------------------------------------------- /cs287hw2/mats/heli_traj.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/heli_traj.mat -------------------------------------------------------------------------------- /cs287hw2/mats/p_a_w.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/p_a_w.mat -------------------------------------------------------------------------------- /cs287hw2/mats/p_b_w.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/p_b_w.mat -------------------------------------------------------------------------------- /cs287hw2/mats/p_c_heli_starting_states.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/p_c_heli_starting_states.mat -------------------------------------------------------------------------------- /cs287hw2/mats/p_c_w.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/p_c_w.mat -------------------------------------------------------------------------------- /cs287hw2/requirements.txt: -------------------------------------------------------------------------------- 1 | scipy==1.1.0 2 | moviepy==1.0.0 3 | seaborn==0.9.0 4 | matplotlib==3.0.2 5 | mujoco_py>=1.50.1.56 6 | #mujoco_py==2.0.2.2 7 | #numpy==1.15.4 8 | numpy==1.16.1 9 | gym==0.12.5 10 | -------------------------------------------------------------------------------- /cs287hw2/vids/visualization_hopper.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/vids/visualization_hopper.gif -------------------------------------------------------------------------------- /cs287hw3/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/.DS_Store -------------------------------------------------------------------------------- /cs287hw3/.idea/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Default ignored files 3 | /workspace.xml -------------------------------------------------------------------------------- /cs287hw3/.idea/assignment2.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /cs287hw3/.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /cs287hw3/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /cs287hw3/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /cs287hw3/.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /cs287hw3/__pycache__/utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/__pycache__/utils.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw3/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw3/envs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__init__.py -------------------------------------------------------------------------------- /cs287hw3/envs/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw3/envs/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw3/envs/__pycache__/cart_pole_env.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/cart_pole_env.cpython-36.pyc -------------------------------------------------------------------------------- /cs287hw3/envs/__pycache__/cart_pole_env.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/cart_pole_env.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw3/envs/__pycache__/cheetah_env.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/cheetah_env.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw3/envs/__pycache__/hopper_env.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/hopper_env.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw3/envs/cheetah_env.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from gym import utils 3 | from gym.envs.mujoco import mujoco_env 4 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv 5 | 6 | 7 | class CheetahModEnv(HalfCheetahEnv, mujoco_env.MujocoEnv, utils.EzPickle): 8 | def __init__(self): 9 | 10 | self.perturb_joints = True 11 | self.count = 0 12 | mujoco_env.MujocoEnv.__init__(self, "half_cheetah.xml", 8) 13 | utils.EzPickle.__init__(self) 14 | self.H = 30 15 | self.du = self.action_space.shape[0] 16 | self.dx = self.observation_space.shape[0] 17 | 18 | def step(self, a): 19 | self.count += 1 20 | xposbefore = self.sim.data.qpos[0] 21 | self.do_simulation(a, self.frame_skip) 22 | xposafter = self.sim.data.qpos[0] 23 | ob = self._get_obs() 24 | reward = (xposafter - xposbefore)/self.dt 25 | done = False 26 | return ob, -reward, done, dict() 27 | 28 | def set_state(self, state): 29 | nq, nv = self.model.nq, self.model.nv 30 | self.sim.reset() 31 | qpos = copy.deepcopy(self.init_qpos) 32 | 33 | qpos[1:nq] = state[:nq - 1] 34 | qvel = state[nq - 1:] 35 | 36 | mujoco_env.MujocoEnv.set_state(self, qpos, qvel) 37 | 38 | def reset_model(self): 39 | mujoco_env.MujocoEnv.set_state(self, self.init_qpos, self.init_qvel) 40 | return self._get_obs() 41 | -------------------------------------------------------------------------------- /cs287hw3/envs/hopper_env.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from gym import utils 3 | import copy 4 | from gym.envs.mujoco import mujoco_env 5 | from gym.envs.mujoco.hopper import HopperEnv 6 | 7 | 8 | class HopperModEnv(HopperEnv, mujoco_env.MujocoEnv, utils.EzPickle): 9 | def __init__(self): 10 | 11 | self.perturb_joints = True 12 | self.components = np.array(['thigh', 'leg', 'foot']) 13 | self.affected_part = 'thigh' 14 | self.count = 0 15 | mujoco_env.MujocoEnv.__init__(self, "hopper.xml", 8) 16 | utils.EzPickle.__init__(self) 17 | 18 | self.init_geom_rgba = self.model.geom_rgba.copy() 19 | self.dx = self.observation_space.shape[0] 20 | self.du = self.action_space.shape[0] 21 | self.H = 50 22 | 23 | def step(self, a): 24 | self.count += 1 25 | posbefore = self.sim.data.qpos[0] 26 | self.do_simulation(a, self.frame_skip) 27 | posafter, height, ang = self.sim.data.qpos[0:3] 28 | reward = (posafter - posbefore) / self.dt 29 | done = False 30 | ob = self._get_obs() 31 | return ob, -reward, done, {} 32 | 33 | def set_state(self, state): 34 | nq, nv = self.model.nq, self.model.nv 35 | self.sim.reset() 36 | qpos = copy.deepcopy(self.init_qpos) 37 | qvel = copy.deepcopy(self.init_qvel) 38 | 39 | qpos[1:6] = state[:nq-1] 40 | qvel[:6] = state[nq-1:] 41 | 42 | mujoco_env.MujocoEnv.set_state(self, qpos, qvel) 43 | 44 | def _get_obs(self): 45 | return np.concatenate([ 46 | self.sim.data.qpos.flat[1:6], 47 | np.clip(self.sim.data.qvel.flat, -10, 10) 48 | ]) 49 | 50 | def reset_model(self): 51 | 52 | self.affected_part = self.components[np.random.randint(0,3)] 53 | self.count = 0 54 | qpos = self.init_qpos 55 | qvel = self.init_qvel 56 | mujoco_env.MujocoEnv.set_state(self, qpos, qvel) 57 | 58 | return self._get_obs() 59 | 60 | def viewer_setup(self): 61 | self.viewer.cam.trackbodyid = 2 62 | self.viewer.cam.distance = self.model.stat.extent * 0.75 63 | self.viewer.cam.lookat[2] = 1.15 64 | self.viewer.cam.elevation = -20 65 | -------------------------------------------------------------------------------- /cs287hw3/non_linear_optimization.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/non_linear_optimization.pdf -------------------------------------------------------------------------------- /cs287hw3/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import moviepy.editor as mpy 3 | from scipy.optimize import minimize 4 | 5 | 6 | class NNPolicy(object): 7 | def __init__(self, input_dim, output_dim, hidden_sizes): 8 | self.input_dim = input_dim 9 | self.output_dim = output_dim 10 | self.hidden_sizes = tuple(hidden_sizes) 11 | self.params = None 12 | 13 | def get_action(self, state, timestep=None): 14 | x = state 15 | params = self.params 16 | for i in range(len(self.hidden_sizes)): 17 | x = x.T @ params['W'][i] + params['b'][i] 18 | x = np.tanh(x) 19 | action = x.T @ params['W'][-1] + params['b'][-1] 20 | action = np.tanh(action) 21 | return action 22 | 23 | def set_params(self, params): 24 | sizes = (self.input_dim,) + self.hidden_sizes + (self.output_dim,) 25 | Ws, bs = [], [] 26 | s_id = 0 27 | for i in range(len(self.hidden_sizes) + 1): 28 | w_shape = (sizes[i], sizes[i + 1]) 29 | e_id = s_id + np.prod(w_shape) 30 | W = params[s_id:e_id].reshape(w_shape) 31 | s_id = e_id 32 | e_id = s_id + sizes[i + 1] 33 | b = params[s_id:e_id] 34 | s_id = e_id 35 | Ws.append(W) 36 | bs.append(b) 37 | self.params = dict(W=Ws, b=bs) 38 | 39 | def get_params(self): 40 | params = [] 41 | for W, b in zip(self.params['W'], self.params['b']): 42 | params.extend([W.flatten(), b.flatten()]) 43 | return np.concatenate(params) 44 | 45 | def init_params(self): 46 | sizes = (self.input_dim,) + self.hidden_sizes + (self.output_dim,) 47 | Ws, bs = [], [] 48 | for i in range(len(self.hidden_sizes) + 1): 49 | W = np.random.uniform(size=(sizes[i], sizes[i + 1]))/np.sqrt(sizes[i] + sizes[i+1]) 50 | b = np.zeros(shape=sizes[i + 1]) 51 | Ws.append(W) 52 | bs.append(b) 53 | self.params = dict(W=Ws, b=bs) 54 | return dict(W=Ws, b=bs) 55 | 56 | 57 | class ActPolicy(object): 58 | def __init__(self, env, actions): 59 | self._actions = actions.reshape(env.H, env.du) 60 | self.t = 0 61 | 62 | def get_action(self, state, timestep=None): 63 | act = self._actions[self.t] 64 | self.t = (self.t + 1) % len(self._actions) 65 | return act 66 | 67 | def reset(self): 68 | self.t = 0 69 | 70 | 71 | def rollout(env, policy, noise=0., render=False): 72 | np.random.seed(0) 73 | s = env.reset() 74 | states = [] 75 | imgs = [] 76 | cost = 0 77 | for t in range(env.H): 78 | act = policy.get_action(s, t) + np.random.normal(0, scale=noise, size=(env.du,)) 79 | s, c, d, _ = env.step(act) 80 | if render:imgs.append(env.render('rgb_array')) 81 | states.append(s) 82 | cost += c 83 | if d: break 84 | if render: 85 | clip = mpy.ImageSequenceClip(imgs, fps=8) 86 | clip.write_gif('./rollout.gif', verbose=False) 87 | 88 | return cost, states 89 | 90 | -------------------------------------------------------------------------------- /cs287hw3/vids/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/vids/.DS_Store -------------------------------------------------------------------------------- /cs287hw3/vids/rollout.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/vids/rollout.gif -------------------------------------------------------------------------------- /cs287hw4/__MACOSX/._hw4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/__MACOSX/._hw4.pdf -------------------------------------------------------------------------------- /cs287hw4/hw4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/hw4.pdf -------------------------------------------------------------------------------- /cs287hw4/hw4_rubric.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/hw4_rubric.pdf -------------------------------------------------------------------------------- /cs287hw4/p3_a_data_1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p3_a_data_1.npy -------------------------------------------------------------------------------- /cs287hw4/p3_a_data_2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p3_a_data_2.npy -------------------------------------------------------------------------------- /cs287hw4/p3_a_data_3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p3_a_data_3.npy -------------------------------------------------------------------------------- /cs287hw4/p3_a_data_4.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p3_a_data_4.npy -------------------------------------------------------------------------------- /cs287hw4/p6_data_0.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p6_data_0.npy -------------------------------------------------------------------------------- /cs287hw4/p6_data_1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p6_data_1.npy -------------------------------------------------------------------------------- /cs287hw4/p6_data_2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p6_data_2.npy -------------------------------------------------------------------------------- /cs287hw4/p6_data_3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p6_data_3.npy -------------------------------------------------------------------------------- /cs287hw5/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/.DS_Store -------------------------------------------------------------------------------- /cs287hw5/.idea/hw5_nov12.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /cs287hw5/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /cs287hw5/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /cs287hw5/hw5.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5.pdf -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup.zip -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/.DS_Store -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/baseline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/baseline.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/clipper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/clipper.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/entropy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/entropy.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/gae.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/gae.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/mbppo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/mbppo.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/meppo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/meppo.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/newplot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/newplot.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/pg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/pg.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/pg_cheetah.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/pg_cheetah.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/ph_baseline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_baseline.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/ph_clipper.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_clipper.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/ph_entropy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_entropy.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/ph_gae.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_gae.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/ph_mbppo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_mbppo.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/ph_meppo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_meppo.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/ph_pg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_pg.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/ph_ppo_obj.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_ppo_obj.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/ph_sac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_sac.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/ppo_obj.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ppo_obj.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/figures/sac_cheetah.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/sac_cheetah.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Ant_3A_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Ant_3A_12.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Ant_3A_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Ant_3A_3.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Cheetah_2A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_2A.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Cheetah_2B_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_2B_1.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Cheetah_2B_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_2B_2.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Cheetah_2C.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_2C.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Cheetah_3A_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_3A_12.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Cheetah_3A_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_3A_3.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/HalfCheetah_1A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1A.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/HalfCheetah_1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1B.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/HalfCheetah_1C.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1C.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/HalfCheetah_1D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1D.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/HalfCheetah_1E.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1E.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/HalfCheetah_1F.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1F.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/HalfCheetah_1G.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1G.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Hopper_1A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1A.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Hopper_1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1B.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Hopper_1C.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1C.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Hopper_1D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1D.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Hopper_1E.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1E.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Hopper_1F.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1F.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Hopper_1G.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1G.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Hopper_2A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_2A.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Hopper_2B_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_2B_1.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Hopper_2B_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_2B_2.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Hopper_2C.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_2C.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Hopper_3A_12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_3A_12.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Hopper_3A_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_3A_3.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Swimmer_1A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1A.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Swimmer_1B.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1B.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Swimmer_1C.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1C.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Swimmer_1D.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1D.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Swimmer_1E.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1E.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Swimmer_1F.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1F.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Swimmer_1G.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1G.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Swimmer_2A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_2A.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Swimmer_2B_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_2B_1.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Swimmer_2B_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_2B_2.png -------------------------------------------------------------------------------- /cs287hw5/hw5_writeup/my_figures/Swimmer_2C.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_2C.png -------------------------------------------------------------------------------- /cs287hw5/sac/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/.DS_Store -------------------------------------------------------------------------------- /cs287hw5/sac/.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /cs287hw5/sac/.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /cs287hw5/sac/.idea/sac.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /cs287hw5/sac/README.md: -------------------------------------------------------------------------------- 1 | # CS294-112 HW 5b: Soft Actor Critic 2 | Original code from Tuomas Haarnoja, Soroush Nasiriany, and Aurick Zhou for CS294-112 Fall 2018 3 | 4 | Dependencies: 5 | * Python **3.4.5** 6 | * Numpy version **1.15.2** 7 | * TensorFlow version **1.10.0** 8 | * tensorflow-probability version **0.4.0** 9 | * OpenAI Gym version **0.10.8** 10 | * MuJoCo version **1.50** and mujoco-py **1.50.1.59** 11 | * seaborn version **0.9.0** 12 | 13 | You will implement `sac.py`, and `nn.py`. 14 | 15 | See the [HW5 PDF](http://rail.eecs.berkeley.edu/deeprlcourse/static/homeworks/hw5b.pdf) for further instructions. 16 | 17 | 18 | Instructions for Running the Code 19 | 20 | OS Requirement 21 | * Ubuntu 16.04 LTS 22 | 23 | Dependencies: 24 | * Anaconda 25 | 26 | All the specific python packages are listed in `environment.yml` 27 | 28 | 29 | Instructions 30 | * Install all the environment dependencies by running `./project_setup.bash setup` 31 | * Load the dependencies with `source project_setup.bash` 32 | * Run `run_all.sh` to run all the experiments. 33 | * Run `generate_plots.sh` to generate all the plots, plots are located in the plots directory. 34 | -------------------------------------------------------------------------------- /cs287hw5/sac/__pycache__/logz.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/logz.cpython-35.pyc -------------------------------------------------------------------------------- /cs287hw5/sac/__pycache__/logz.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/logz.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw5/sac/__pycache__/nn.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/nn.cpython-35.pyc -------------------------------------------------------------------------------- /cs287hw5/sac/__pycache__/nn.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/nn.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw5/sac/__pycache__/sac.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/sac.cpython-35.pyc -------------------------------------------------------------------------------- /cs287hw5/sac/__pycache__/sac.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/sac.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw5/sac/__pycache__/utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/utils.cpython-35.pyc -------------------------------------------------------------------------------- /cs287hw5/sac/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /cs287hw5/sac/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/data/.DS_Store -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Ant-v2_reinf_02-12-2019_16-46-48/1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.1, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": false, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "Ant-v2", 14 | "exp_name": "reinf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Ant-v2_reinf_02-12-2019_16-46-48/11/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.1, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": false, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "Ant-v2", 14 | "exp_name": "reinf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Ant-v2_reinf_02-12-2019_16-46-48/21/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.1, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": false, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "Ant-v2", 14 | "exp_name": "reinf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Ant-v2_reparam_02-12-2019_16-47-03/1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.1, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "Ant-v2", 14 | "exp_name": "reparam", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Ant-v2_reparam_02-12-2019_16-47-03/11/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.1, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "Ant-v2", 14 | "exp_name": "reparam", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Ant-v2_reparam_02-12-2019_16-47-03/21/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.1, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "Ant-v2", 14 | "exp_name": "reparam", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Ant-v2_reparam_2qf_02-12-2019_16-47-25/1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.1, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": true 12 | }, 13 | "env_name": "Ant-v2", 14 | "exp_name": "reparam_2qf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Ant-v2_reparam_2qf_02-12-2019_16-47-25/11/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.1, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": true 12 | }, 13 | "env_name": "Ant-v2", 14 | "exp_name": "reparam_2qf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Ant-v2_reparam_2qf_02-12-2019_16-47-25/21/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.1, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": true 12 | }, 13 | "env_name": "Ant-v2", 14 | "exp_name": "reparam_2qf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_HalfCheetah-v2_reinf_02-12-2019_11-48-53/1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": false, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "HalfCheetah-v2", 14 | "exp_name": "reinf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_HalfCheetah-v2_reinf_02-12-2019_11-48-53/11/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": false, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "HalfCheetah-v2", 14 | "exp_name": "reinf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_HalfCheetah-v2_reinf_02-12-2019_11-48-53/21/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": false, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "HalfCheetah-v2", 14 | "exp_name": "reinf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_02-12-2019_12-05-49/1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "HalfCheetah-v2", 14 | "exp_name": "reparam", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_02-12-2019_12-05-49/11/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "HalfCheetah-v2", 14 | "exp_name": "reparam", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_02-12-2019_12-05-49/21/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "HalfCheetah-v2", 14 | "exp_name": "reparam", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_2qf_02-12-2019_12-34-24/1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": true 12 | }, 13 | "env_name": "HalfCheetah-v2", 14 | "exp_name": "reparam_2qf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_2qf_02-12-2019_12-34-24/11/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": true 12 | }, 13 | "env_name": "HalfCheetah-v2", 14 | "exp_name": "reparam_2qf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_2qf_02-12-2019_12-34-24/21/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": true 12 | }, 13 | "env_name": "HalfCheetah-v2", 14 | "exp_name": "reparam_2qf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Hopper-v2_reinf_02-12-2019_21-02-20/1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": false, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "Hopper-v2", 14 | "exp_name": "reinf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Hopper-v2_reinf_02-12-2019_21-02-20/11/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": false, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "Hopper-v2", 14 | "exp_name": "reinf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Hopper-v2_reinf_02-12-2019_21-02-20/21/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": false, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "Hopper-v2", 14 | "exp_name": "reinf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Hopper-v2_reparam_02-12-2019_21-02-26/1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "Hopper-v2", 14 | "exp_name": "reparam", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Hopper-v2_reparam_02-12-2019_21-02-26/11/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "Hopper-v2", 14 | "exp_name": "reparam", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Hopper-v2_reparam_02-12-2019_21-02-26/21/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": false 12 | }, 13 | "env_name": "Hopper-v2", 14 | "exp_name": "reparam", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Hopper-v2_reparam_2qf_02-12-2019_21-02-37/1/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": true 12 | }, 13 | "env_name": "Hopper-v2", 14 | "exp_name": "reparam_2qf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Hopper-v2_reparam_2qf_02-12-2019_21-02-37/11/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": true 12 | }, 13 | "env_name": "Hopper-v2", 14 | "exp_name": "reparam_2qf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/data/sac_Hopper-v2_reparam_2qf_02-12-2019_21-02-37/21/params.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm_params": { 3 | "alpha": 0.2, 4 | "batch_size": 256, 5 | "discount": 0.99, 6 | "epoch_length": 1000, 7 | "learning_rate": 0.001, 8 | "n_epochs": 500, 9 | "reparameterize": true, 10 | "tau": 0.01, 11 | "two_qf": true 12 | }, 13 | "env_name": "Hopper-v2", 14 | "exp_name": "reparam_2qf", 15 | "policy_params": { 16 | "hidden_layer_sizes": [ 17 | 128, 18 | 128 19 | ] 20 | }, 21 | "q_function_params": { 22 | "hidden_layer_sizes": [ 23 | 128, 24 | 128 25 | ] 26 | }, 27 | "replay_pool_params": { 28 | "max_size": 1000000.0 29 | }, 30 | "sampler_params": { 31 | "max_episode_length": 1000, 32 | "prefill_steps": 1000 33 | }, 34 | "value_function_params": { 35 | "hidden_layer_sizes": [ 36 | 128, 37 | 128 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /cs287hw5/sac/environment.yml: -------------------------------------------------------------------------------- 1 | name: cs294drl_hw5_sac 2 | # dependencies: 3 | # - python==3.4.5 4 | # - pip: 5 | # - gym==0.10.8 6 | # - numpy==1.15.2 7 | # - tensorflow-gpu==1.10.0 8 | # - tensorflow-probability==0.4.0 9 | # - mujoco-py==1.50.1.56 10 | # - seaborn==0.9.0 11 | dependencies: 12 | - python=3.5 13 | - numpy=1.14.5 14 | - pandas 15 | - scipy 16 | - matplotlib 17 | - seaborn 18 | - scikit-learn 19 | - jupyter 20 | - patchelf 21 | - pip: 22 | - Cython 23 | - https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.13.1-cp35-cp35m-linux_x86_64.whl 24 | - mujoco-py==1.50.1.56 25 | - box2d==2.3.2 26 | - opencv-python 27 | - gym[atari]==0.10.5 28 | - tensorflow-probability==0.6.0 29 | -------------------------------------------------------------------------------- /cs287hw5/sac/generate_plots.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | function filter_experiment_dirs { 4 | ls data | grep -e $1 | sed -e 's/^/data\//' 5 | } 6 | 7 | function filter_experiment_config { 8 | ls data | grep -e $1 | sed -e "s/$2.*//" 9 | } 10 | 11 | 12 | mkdir -p plots 13 | 14 | python myplot.py \ 15 | --legend $(filter_experiment_config 'sac_HalfCheetah' '\d{2}-\d{2}-\d{4}') \ 16 | --title 'HalfCheetah SAC' \ 17 | --output plots/HalfCheetah_SAC.png \ 18 | $(filter_experiment_dirs 'sac_HalfCheetah') 19 | 20 | python myplot.py \ 21 | --legend $(filter_experiment_config 'sac_Ant' '\d{2}-\d{2}-\d{4}') \ 22 | --title 'Ant SAC' \ 23 | --output plots/Ant_SAC.png \ 24 | $(filter_experiment_dirs 'sac_Ant') 25 | -------------------------------------------------------------------------------- /cs287hw5/sac/project_setup.bash: -------------------------------------------------------------------------------- 1 | # Project setup script 2 | # Source this file to set up the environment for this project. 3 | 4 | 5 | ENV_NAME='cs294drl_hw5_sac' 6 | 7 | if [ "$1" = "setup" ]; then 8 | echo "Creating conda environment..." 9 | conda env create -f environment.yml 10 | elif [ "$1" = "remove" ]; then 11 | conda remove --name $ENV_NAME --all --yes 12 | else 13 | 14 | export PROJECT_HOME="$(pwd)" 15 | 16 | alias ph="cd $PROJECT_HOME" 17 | 18 | 19 | alias set_display="export DISPLAY=':0.0'" 20 | alias unset_display="unset DISPLAY" 21 | 22 | export MPLBACKEND='Agg' 23 | 24 | source activate $ENV_NAME 25 | 26 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/young/.mujoco/mjpro150/bin" 27 | fi 28 | -------------------------------------------------------------------------------- /cs287hw5/sac/run_all.sh: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | python train_mujoco.py --env_name HalfCheetah-v2 --exp_name reinf -e 3 4 | 5 | python train_mujoco.py --env_name HalfCheetah-v2 --exp_name reparam -e 3 --reparameterize 6 | 7 | python train_mujoco.py --env_name HalfCheetah-v2 --exp_name reparam_2qf -e 3 --reparameterize --two_qf 8 | 9 | 10 | python train_mujoco.py --env_name Ant-v2 --exp_name reinf -e 3 11 | 12 | python train_mujoco.py --env_name Ant-v2 --exp_name reparam -e 3 --reparameterize 13 | 14 | python train_mujoco.py --env_name Ant-v2 --exp_name reparam_2qf -e 3 --reparameterize --two_qf 15 | 16 | 17 | python train_mujoco.py --env_name Hopper-v2 --exp_name reinf -e 3 18 | 19 | python train_mujoco.py --env_name Hopper-v2 --exp_name reparam -e 3 --reparameterize 20 | 21 | python train_mujoco.py --env_name Hopper-v2 --exp_name reparam_2qf -e 3 --reparameterize --two_qf --------------------------------------------------------------------------------