├── README.md
├── cs287hw1
├── .idea
│ ├── cs287-hw1-code.iml
│ ├── inspectionProfiles
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── workspace.xml
├── .vs
│ ├── VSWorkspaceState.json
│ ├── cs287-hw1-code
│ │ └── v15
│ │ │ └── .suo
│ └── slnx.sqlite
├── README.md
├── __pycache__
│ ├── logger.cpython-36.pyc
│ └── logger.cpython-37.pyc
├── data
│ ├── part1
│ │ ├── GridWorldEnv0
│ │ │ ├── policy_typedeterministic_temperature1.0
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ ├── policy_typemax_ent_temperature0.01
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ ├── policy_typemax_ent_temperature1.0
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ └── policy_typemax_ent_temperature1e-05
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ └── GridWorldEnv1
│ │ │ ├── policy_typedeterministic_temperature1.0
│ │ │ ├── contour.png
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ │ ├── policy_typemax_ent_temperature0.01
│ │ │ ├── contour.png
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ │ ├── policy_typemax_ent_temperature1.0
│ │ │ ├── contour.png
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ │ └── policy_typemax_ent_temperature1e-05
│ │ │ ├── contour.png
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ ├── part2_ab
│ │ ├── DoubleIntegratorEnv
│ │ │ ├── modelinear_state_discretization151
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ ├── modelinear_state_discretization21
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ ├── modelinear_state_discretization51
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ ├── modenn_state_discretization151
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ ├── modenn_state_discretization21
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ └── modenn_state_discretization51
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ └── MountainCarEnv
│ │ │ ├── modelinear_state_discretization151
│ │ │ ├── contour.png
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ │ ├── modelinear_state_discretization21
│ │ │ ├── contour.png
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ │ ├── modelinear_state_discretization51
│ │ │ ├── contour.png
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ │ ├── modenn_state_discretization151
│ │ │ ├── contour.png
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ │ ├── modenn_state_discretization21
│ │ │ ├── contour.png
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ │ └── modenn_state_discretization51
│ │ │ ├── contour.png
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ ├── part2_c
│ │ ├── CartPoleEnv
│ │ │ ├── linear
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ └── nn
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ ├── DoubleIntegratorEnv
│ │ │ ├── linear
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ └── nn
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ ├── MountainCarEnv
│ │ │ ├── linear
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ └── nn
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ └── SwingUpEnv
│ │ │ ├── linear
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ │ └── nn
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ ├── part2_d
│ │ ├── CartPoleEnv
│ │ │ ├── policy_typelook_ahead_modelinear_horizon1
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ ├── policy_typelook_ahead_modelinear_horizon2
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ └── policy_typelook_ahead_modelinear_horizon3
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ ├── DoubleIntegratorEnv
│ │ │ ├── policy_typelook_ahead_modelinear_horizon1
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ ├── policy_typelook_ahead_modelinear_horizon2
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ └── policy_typelook_ahead_modelinear_horizon3
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ ├── MountainCarEnv
│ │ │ ├── policy_typelook_ahead_modelinear_horizon1
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ ├── policy_typelook_ahead_modelinear_horizon2
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ └── policy_typelook_ahead_modelinear_horizon3
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ └── SwingUpEnv
│ │ │ ├── policy_typelook_ahead_modelinear_horizon1
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ │ ├── policy_typelook_ahead_modelinear_horizon2
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ │ └── policy_typelook_ahead_modelinear_horizon3
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ ├── part3_a
│ │ ├── DoubleIntegratorEnv
│ │ │ ├── contour.png
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ └── MountainCarEnv
│ │ │ ├── contour.png
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ ├── part3_b
│ │ ├── CartPoleEnv
│ │ │ ├── horizon1
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ ├── horizon10
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ └── horizon5
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ ├── DoubleIntegratorEnv
│ │ │ ├── horizon1
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ ├── horizon10
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ └── horizon5
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ ├── MountainCarEnv
│ │ │ ├── horizon1
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ ├── horizon10
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ │ └── horizon5
│ │ │ │ ├── contour.png
│ │ │ │ ├── learning_curve.png
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── progress.csv
│ │ └── SwingUpEnv
│ │ │ ├── horizon1
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ │ ├── horizon10
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ │ │ └── horizon5
│ │ │ ├── learning_curve.png
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── progress.csv
│ └── part5
│ │ └── CartPoleEnv
│ │ └── modelinear_state_discretization51
│ │ ├── log.txt
│ │ ├── params.json
│ │ └── progress.csv
├── envs
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── cart_pole_env.cpython-36.pyc
│ │ ├── double_integrator_env.cpython-36.pyc
│ │ ├── grid1d_env.cpython-36.pyc
│ │ ├── gridworld_env.cpython-36.pyc
│ │ ├── mountain_hill_env.cpython-36.pyc
│ │ └── swing_up_env.cpython-36.pyc
│ ├── cart_pole_env.py
│ ├── double_integrator_env.py
│ ├── grid1d_env.py
│ ├── gridworld_env.py
│ ├── mountain_hill_env.py
│ └── swing_up_env.py
├── logger.py
├── part1
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ └── tabular_value_iteration.cpython-36.pyc
│ ├── run_part1.py
│ └── tabular_value_iteration.py
├── part2
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── discretize.cpython-36.pyc
│ │ └── look_ahead_policy.cpython-36.pyc
│ ├── discretize.py
│ ├── look_ahead_policy.py
│ ├── run_part2_ab.py
│ ├── run_part2_c.py
│ └── run_part2_d.py
├── part3
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── continous_value_iteration.cpython-36.pyc
│ │ └── look_ahead_policy.cpython-36.pyc
│ ├── continous_value_iteration.py
│ ├── look_ahead_policy.py
│ ├── run_part3_a.py
│ └── run_part3_b.py
├── part4
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ └── discretize.cpython-36.pyc
│ ├── discretize.py
│ └── run_part4.py
├── requirements.txt
├── utils
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── plot.cpython-36.pyc
│ │ ├── utils.cpython-36.pyc
│ │ └── value_functions.cpython-36.pyc
│ ├── plot.py
│ ├── utils.py
│ └── value_functions.py
└── viskit
│ ├── __init__.py
│ ├── __pycache__
│ ├── __init__.cpython-36.pyc
│ └── core.cpython-36.pyc
│ ├── core.py
│ ├── frontend.py
│ ├── static
│ ├── css
│ │ ├── bootstrap.min.css
│ │ └── dropdowns-enhancement.css
│ └── js
│ │ ├── bootstrap.min.js
│ │ ├── dropdowns-enhancement.js
│ │ ├── jquery-1.10.2.min.js
│ │ ├── jquery.loadTemplate-1.5.6.js
│ │ └── plotly-latest.min.js
│ └── templates
│ └── main.html
├── cs287hw2
├── .DS_Store
├── .ipynb_checkpoints
│ ├── Untitled-checkpoint.ipynb
│ ├── Untitled1-checkpoint.ipynb
│ ├── Untitled2-checkpoint.ipynb
│ ├── Untitled3-checkpoint.ipynb
│ ├── lqr-checkpoint.ipynb
│ ├── lqr-sol-checkpoint.ipynb
│ └── lqr_nolonger_clean-checkpoint.ipynb
├── __pycache__
│ ├── rot_utils.cpython-37.pyc
│ └── simulators.cpython-37.pyc
├── cs287hw2.pdf
├── environment.yml
├── envs
│ ├── __pycache__
│ │ ├── cheetah_env.cpython-37.pyc
│ │ └── hopper_env.cpython-37.pyc
│ ├── cheetah_env.py
│ └── hopper_env.py
├── img
│ ├── fig_a.png
│ ├── ref_a.png
│ ├── ref_b_cartpole.png
│ └── ref_b_heli.png
├── lqr.ipynb
├── mats
│ ├── cartpole_traj.mat
│ ├── heli_traj.mat
│ ├── p_a_w.mat
│ ├── p_b_w.mat
│ ├── p_c_heli_starting_states.mat
│ └── p_c_w.mat
├── requirements.txt
├── rot_utils.py
├── simulators.py
└── vids
│ └── visualization_hopper.gif
├── cs287hw3
├── .DS_Store
├── .idea
│ ├── .gitignore
│ ├── assignment2.iml
│ ├── inspectionProfiles
│ │ └── profiles_settings.xml
│ ├── misc.xml
│ ├── modules.xml
│ └── vcs.xml
├── .ipynb_checkpoints
│ ├── Non-linear Optimization-checkpoint.ipynb
│ ├── non_linear_optimization-checkpoint.ipynb
│ ├── non_linear_optimization_og-checkpoint.ipynb
│ ├── non_linear_optimization_sols-checkpoint.ipynb
│ ├── non_linear_optimzation-checkpoint.ipynb
│ └── non_linear_optimzation_sols-checkpoint.ipynb
├── __pycache__
│ ├── utils.cpython-36.pyc
│ └── utils.cpython-37.pyc
├── envs
│ ├── __init__.py
│ ├── __pycache__
│ │ ├── __init__.cpython-36.pyc
│ │ ├── __init__.cpython-37.pyc
│ │ ├── cart_pole_env.cpython-36.pyc
│ │ ├── cart_pole_env.cpython-37.pyc
│ │ ├── cheetah_env.cpython-37.pyc
│ │ └── hopper_env.cpython-37.pyc
│ ├── cart_pole_env.py
│ ├── cheetah_env.py
│ └── hopper_env.py
├── non_linear_optimization.ipynb
├── non_linear_optimization.pdf
├── utils.py
└── vids
│ ├── .DS_Store
│ └── rollout.gif
├── cs287hw4
├── .ipynb_checkpoints
│ ├── Homework4_Q-Copy1-checkpoint.ipynb
│ ├── Homework4_Q-checkpoint.ipynb
│ └── Homework4_Q111-checkpoint.ipynb
├── Homework4_Q.ipynb
├── Homework4_Q_exp.ipynb
├── __MACOSX
│ └── ._hw4.pdf
├── hw4.pdf
├── hw4_rubric.pdf
├── p3_a_data_1.npy
├── p3_a_data_2.npy
├── p3_a_data_3.npy
├── p3_a_data_4.npy
├── p6_data_0.npy
├── p6_data_1.npy
├── p6_data_2.npy
└── p6_data_3.npy
└── cs287hw5
├── .DS_Store
├── .idea
├── hw5_nov12.iml
├── misc.xml
├── modules.xml
└── workspace.xml
├── hw5.pdf
├── hw5_writeup.zip
├── hw5_writeup
├── .DS_Store
├── PS5.tex
├── PS5_template.tex
├── figures
│ ├── baseline.png
│ ├── clipper.png
│ ├── entropy.png
│ ├── gae.png
│ ├── mbppo.png
│ ├── meppo.png
│ ├── newplot.png
│ ├── pg.png
│ ├── pg_cheetah.png
│ ├── ph_baseline.png
│ ├── ph_clipper.png
│ ├── ph_entropy.png
│ ├── ph_gae.png
│ ├── ph_mbppo.png
│ ├── ph_meppo.png
│ ├── ph_pg.png
│ ├── ph_ppo_obj.png
│ ├── ph_sac.png
│ ├── ppo_obj.png
│ └── sac_cheetah.png
└── my_figures
│ ├── Ant_3A_12.png
│ ├── Ant_3A_3.png
│ ├── Cheetah_2A.png
│ ├── Cheetah_2B_1.png
│ ├── Cheetah_2B_2.png
│ ├── Cheetah_2C.png
│ ├── Cheetah_3A_12.png
│ ├── Cheetah_3A_3.png
│ ├── HalfCheetah_1A.png
│ ├── HalfCheetah_1B.png
│ ├── HalfCheetah_1C.png
│ ├── HalfCheetah_1D.png
│ ├── HalfCheetah_1E.png
│ ├── HalfCheetah_1F.png
│ ├── HalfCheetah_1G.png
│ ├── Hopper_1A.png
│ ├── Hopper_1B.png
│ ├── Hopper_1C.png
│ ├── Hopper_1D.png
│ ├── Hopper_1E.png
│ ├── Hopper_1F.png
│ ├── Hopper_1G.png
│ ├── Hopper_2A.png
│ ├── Hopper_2B_1.png
│ ├── Hopper_2B_2.png
│ ├── Hopper_2C.png
│ ├── Hopper_3A_12.png
│ ├── Hopper_3A_3.png
│ ├── Swimmer_1A.png
│ ├── Swimmer_1B.png
│ ├── Swimmer_1C.png
│ ├── Swimmer_1D.png
│ ├── Swimmer_1E.png
│ ├── Swimmer_1F.png
│ ├── Swimmer_1G.png
│ ├── Swimmer_2A.png
│ ├── Swimmer_2B_1.png
│ ├── Swimmer_2B_2.png
│ └── Swimmer_2C.png
└── sac
├── .DS_Store
├── .idea
├── misc.xml
├── modules.xml
├── sac.iml
└── workspace.xml
├── README.md
├── __pycache__
├── logz.cpython-35.pyc
├── logz.cpython-37.pyc
├── nn.cpython-35.pyc
├── nn.cpython-37.pyc
├── sac.cpython-35.pyc
├── sac.cpython-37.pyc
├── utils.cpython-35.pyc
└── utils.cpython-37.pyc
├── data
├── .DS_Store
├── sac_Ant-v2_reinf_02-12-2019_16-46-48
│ ├── 1
│ │ ├── log.txt
│ │ └── params.json
│ ├── 11
│ │ ├── log.txt
│ │ └── params.json
│ └── 21
│ │ ├── log.txt
│ │ └── params.json
├── sac_Ant-v2_reparam_02-12-2019_16-47-03
│ ├── 1
│ │ ├── log.txt
│ │ └── params.json
│ ├── 11
│ │ ├── log.txt
│ │ └── params.json
│ └── 21
│ │ ├── log.txt
│ │ └── params.json
├── sac_Ant-v2_reparam_2qf_02-12-2019_16-47-25
│ ├── 1
│ │ ├── log.txt
│ │ └── params.json
│ ├── 11
│ │ ├── log.txt
│ │ └── params.json
│ └── 21
│ │ ├── log.txt
│ │ └── params.json
├── sac_HalfCheetah-v2_reinf_02-12-2019_11-48-53
│ ├── 1
│ │ ├── log.txt
│ │ └── params.json
│ ├── 11
│ │ ├── log.txt
│ │ └── params.json
│ └── 21
│ │ ├── log.txt
│ │ └── params.json
├── sac_HalfCheetah-v2_reparam_02-12-2019_12-05-49
│ ├── 1
│ │ ├── log.txt
│ │ └── params.json
│ ├── 11
│ │ ├── log.txt
│ │ └── params.json
│ └── 21
│ │ ├── log.txt
│ │ └── params.json
├── sac_HalfCheetah-v2_reparam_2qf_02-12-2019_12-34-24
│ ├── 1
│ │ ├── log.txt
│ │ └── params.json
│ ├── 11
│ │ ├── log.txt
│ │ └── params.json
│ └── 21
│ │ ├── log.txt
│ │ └── params.json
├── sac_Hopper-v2_reinf_02-12-2019_21-02-20
│ ├── 1
│ │ ├── log.txt
│ │ └── params.json
│ ├── 11
│ │ ├── log.txt
│ │ └── params.json
│ └── 21
│ │ ├── log.txt
│ │ └── params.json
├── sac_Hopper-v2_reparam_02-12-2019_21-02-26
│ ├── 1
│ │ ├── log.txt
│ │ └── params.json
│ ├── 11
│ │ ├── log.txt
│ │ └── params.json
│ └── 21
│ │ ├── log.txt
│ │ └── params.json
└── sac_Hopper-v2_reparam_2qf_02-12-2019_21-02-37
│ ├── 1
│ ├── log.txt
│ └── params.json
│ ├── 11
│ ├── log.txt
│ └── params.json
│ └── 21
│ ├── log.txt
│ └── params.json
├── environment.yml
├── generate_plots.sh
├── logz.py
├── myplot.py
├── nn.py
├── plot.py
├── project_setup.bash
├── run_all.sh
├── sac.py
├── train_mujoco.py
└── utils.py
/README.md:
--------------------------------------------------------------------------------
1 | # Optimal Control, Reinforcement Learning & Robotics Projects in CS 287: Advanced Robotics (fall 2019)
2 |
3 | **This repository contains past projects I've completed in CS 287, which I took in Berkeley EECS department under Prof. Pieter Abbeel, in fall 2019.**
4 | (https://people.eecs.berkeley.edu/~pabbeel/cs287-fa19/)
5 |
6 | Followings are those projects and related materials covered/implemented in each project.
7 |
8 | - [**Project 1**](/cs287hw1)
9 | - Value Iteration
10 | - Dicretization-based Optimal Control
11 | - Function Approximation Optimal Control
12 |
13 | - [**Project 2**](/cs287hw2)
14 | - LQR, iLQR
15 | - DDP
16 | - Feedback Linearization
17 |
18 | - [**Project 3**](/cs287hw3)
19 | - Convex Optimiazation
20 | - Sequential Convex Programming
21 | - Motion Planning and Control (w/ convex opt.)
22 |
23 | - [**Project 4**](/cs287hw4)
24 | - Multivariate Gaussians
25 | - Kalman Filtering
26 | - EM & MLE
27 | - Particle Filtering
28 | - Belief Space Planning
29 |
30 | - [**Project 5**](/cs287hw5)
31 | - Policy Gradient
32 | - Trust Region Policy Optimization (TRPO)
33 | - Proximal Policy Optimization (PPO)
34 | - Deep Q-Learning
35 | - DQN
36 | - Double DQN
37 | - Dueling DQN
38 |
--------------------------------------------------------------------------------
/cs287hw1/.idea/cs287-hw1-code.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/cs287hw1/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/cs287hw1/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/cs287hw1/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/cs287hw1/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | 1568209473775
34 |
35 |
36 | 1568209473775
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/cs287hw1/.vs/VSWorkspaceState.json:
--------------------------------------------------------------------------------
1 | {
2 | "ExpandedNodes": [
3 | ""
4 | ],
5 | "PreviewInSolutionExplorer": false
6 | }
--------------------------------------------------------------------------------
/cs287hw1/.vs/cs287-hw1-code/v15/.suo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/.vs/cs287-hw1-code/v15/.suo
--------------------------------------------------------------------------------
/cs287hw1/.vs/slnx.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/.vs/slnx.sqlite
--------------------------------------------------------------------------------
/cs287hw1/__pycache__/logger.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/__pycache__/logger.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/__pycache__/logger.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/__pycache__/logger.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "GridWorldEnv0",
3 | "policy_type": "deterministic",
4 | "render": false,
5 | "temperature": 1.0
6 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | 0.0,0
3 | 0.0,1
4 | 0.0,2
5 | 0.05,3
6 | 0.0,4
7 | 0.0,5
8 | 0.1,6
9 | 0.1,7
10 | 0.05,8
11 | 0.1,9
12 | 0.15,10
13 | 0.1,11
14 | 0.2,12
15 | 0.1,13
16 | 0.15,14
17 | 0.15,15
18 | 0.3,16
19 | 0.1,17
20 | 0.15,18
21 | 0.25,19
22 | 0.45,20
23 | 0.2,21
24 | 0.55,22
25 | 0.45,23
26 | 0.35,24
27 | 0.7,25
28 | 0.55,26
29 | 0.75,27
30 | 0.75,28
31 | 0.65,29
32 | 0.55,30
33 | 0.85,31
34 | 0.65,32
35 | 0.9,33
36 | 0.85,34
37 | 0.85,35
38 | 0.95,36
39 | 1.0,37
40 | 1.0,38
41 | 1.0,39
42 | 1.0,40
43 | 1.0,41
44 | 1.0,42
45 | 1.0,43
46 | 1.0,44
47 | 1.0,45
48 | 1.0,46
49 | 0.95,47
50 | 1.0,48
51 | 1.0,49
52 |
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "GridWorldEnv0",
3 | "policy_type": "max_ent",
4 | "render": false,
5 | "temperature": 0.01
6 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,0.0
3 | 1,0.0
4 | 2,0.0
5 | 3,0.15
6 | 4,0.05
7 | 5,0.0
8 | 6,0.05
9 | 7,0.0
10 | 8,0.0
11 | 9,0.0
12 | 10,0.0
13 | 11,0.0
14 | 12,0.0
15 | 13,0.0
16 | 14,0.05
17 | 15,0.0
18 | 16,0.05
19 | 17,0.05
20 | 18,0.0
21 | 19,0.0
22 | 20,0.05
23 | 21,0.05
24 | 22,0.05
25 | 23,0.0
26 | 24,0.0
27 | 25,0.05
28 | 26,0.05
29 | 27,0.05
30 | 28,0.0
31 | 29,0.0
32 | 30,0.0
33 | 31,0.0
34 | 32,0.0
35 | 33,0.0
36 | 34,0.05
37 | 35,0.05
38 | 36,0.05
39 | 37,0.1
40 | 38,0.0
41 | 39,0.0
42 | 40,0.0
43 | 41,0.05
44 | 42,0.05
45 | 43,0.0
46 | 44,0.1
47 | 45,0.05
48 | 46,0.05
49 | 47,0.0
50 | 48,0.05
51 | 49,0.0
52 |
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "GridWorldEnv0",
3 | "policy_type": "max_ent",
4 | "render": false,
5 | "temperature": 1.0
6 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,0.0
3 | 1,0.0
4 | 2,0.0
5 | 3,0.15
6 | 4,0.05
7 | 5,0.0
8 | 6,0.1
9 | 7,0.0
10 | 8,0.05
11 | 9,0.0
12 | 10,0.0
13 | 11,0.0
14 | 12,0.05
15 | 13,0.05
16 | 14,0.05
17 | 15,0.05
18 | 16,0.05
19 | 17,0.05
20 | 18,0.0
21 | 19,0.0
22 | 20,0.05
23 | 21,0.05
24 | 22,0.05
25 | 23,0.1
26 | 24,0.1
27 | 25,0.1
28 | 26,0.05
29 | 27,0.05
30 | 28,0.0
31 | 29,0.0
32 | 30,0.0
33 | 31,0.0
34 | 32,0.0
35 | 33,0.0
36 | 34,0.1
37 | 35,0.05
38 | 36,0.05
39 | 37,0.1
40 | 38,0.0
41 | 39,0.0
42 | 40,0.0
43 | 41,0.1
44 | 42,0.05
45 | 43,0.05
46 | 44,0.05
47 | 45,0.1
48 | 46,0.05
49 | 47,0.0
50 | 48,0.05
51 | 49,0.05
52 |
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "GridWorldEnv0",
3 | "policy_type": "max_ent",
4 | "render": false,
5 | "temperature": 1e-05
6 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | 0.0,0
3 | 0.0,1
4 | 0.0,2
5 | 0.15,3
6 | 0.05,4
7 | 0.0,5
8 | 0.05,6
9 | 0.0,7
10 | 0.0,8
11 | 0.0,9
12 | 0.0,10
13 | 0.0,11
14 | 0.0,12
15 | 0.0,13
16 | 0.05,14
17 | 0.0,15
18 | 0.05,16
19 | 0.05,17
20 | 0.0,18
21 | 0.0,19
22 | 0.0,20
23 | 0.05,21
24 | 0.05,22
25 | 0.0,23
26 | 0.0,24
27 | 0.05,25
28 | 0.05,26
29 | 0.05,27
30 | 0.0,28
31 | 0.0,29
32 | 0.0,30
33 | 0.0,31
34 | 0.0,32
35 | 0.0,33
36 | 0.05,34
37 | 0.05,35
38 | 0.05,36
39 | 0.1,37
40 | 0.0,38
41 | 0.0,39
42 | 0.0,40
43 | 0.05,41
44 | 0.05,42
45 | 0.0,43
46 | 0.1,44
47 | 0.1,45
48 | 0.05,46
49 | 0.0,47
50 | 0.05,48
51 | 0.0,49
52 |
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "GridWorldEnv1",
3 | "policy_type": "deterministic",
4 | "render": false,
5 | "temperature": 1.0
6 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | 0.0,0
3 | 0.0,1
4 | 0.0,2
5 | 0.0,3
6 | 0.0,4
7 | 0.0,5
8 | 0.0,6
9 | 0.0,7
10 | 0.1,8
11 | 0.15,9
12 | 0.05,10
13 | 0.2,11
14 | 0.3,12
15 | 0.25,13
16 | 0.35,14
17 | 0.4,15
18 | 0.35,16
19 | 0.3,17
20 | 0.45,18
21 | 0.7,19
22 | 0.45,20
23 | 0.55,21
24 | 0.55,22
25 | 0.65,23
26 | 0.75,24
27 | 0.7,25
28 | 0.7,26
29 | 0.85,27
30 | 0.9,28
31 | 0.75,29
32 | 1.0,30
33 | 0.85,31
34 | 0.95,32
35 | 0.95,33
36 | 1.0,34
37 | 1.0,35
38 | 1.0,36
39 | 0.9,37
40 | 1.0,38
41 | 1.0,39
42 | 1.0,40
43 | 1.0,41
44 | 1.0,42
45 | 0.95,43
46 | 1.0,44
47 | 1.0,45
48 | 0.95,46
49 | 1.0,47
50 | 1.0,48
51 | 1.0,49
52 |
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "GridWorldEnv1",
3 | "policy_type": "max_ent",
4 | "render": false,
5 | "temperature": 0.01
6 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,0.0
3 | 1,0.0
4 | 2,0.0
5 | 3,0.1
6 | 4,0.05
7 | 5,0.0
8 | 6,0.05
9 | 7,0.0
10 | 8,0.05
11 | 9,0.0
12 | 10,0.0
13 | 11,0.0
14 | 12,0.0
15 | 13,0.05
16 | 14,0.0
17 | 15,0.1
18 | 16,0.0
19 | 17,0.0
20 | 18,0.05
21 | 19,0.0
22 | 20,0.1
23 | 21,0.0
24 | 22,0.0
25 | 23,0.0
26 | 24,0.0
27 | 25,0.05
28 | 26,0.05
29 | 27,0.05
30 | 28,0.0
31 | 29,0.0
32 | 30,0.0
33 | 31,0.0
34 | 32,0.0
35 | 33,0.0
36 | 34,0.05
37 | 35,0.0
38 | 36,0.0
39 | 37,0.05
40 | 38,0.05
41 | 39,0.0
42 | 40,0.05
43 | 41,0.05
44 | 42,0.0
45 | 43,0.0
46 | 44,0.05
47 | 45,0.05
48 | 46,0.05
49 | 47,0.05
50 | 48,0.1
51 | 49,0.05
52 |
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "GridWorldEnv1",
3 | "policy_type": "max_ent",
4 | "render": false,
5 | "temperature": 1.0
6 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,0.0
3 | 1,0.0
4 | 2,0.0
5 | 3,0.1
6 | 4,0.05
7 | 5,0.0
8 | 6,0.1
9 | 7,0.0
10 | 8,0.05
11 | 9,0.05
12 | 10,0.0
13 | 11,0.0
14 | 12,0.0
15 | 13,0.05
16 | 14,0.0
17 | 15,0.1
18 | 16,0.05
19 | 17,0.0
20 | 18,0.1
21 | 19,0.0
22 | 20,0.1
23 | 21,0.0
24 | 22,0.0
25 | 23,0.0
26 | 24,0.0
27 | 25,0.1
28 | 26,0.05
29 | 27,0.05
30 | 28,0.0
31 | 29,0.0
32 | 30,0.05
33 | 31,0.0
34 | 32,0.0
35 | 33,0.05
36 | 34,0.1
37 | 35,0.0
38 | 36,0.05
39 | 37,0.05
40 | 38,0.05
41 | 39,0.0
42 | 40,0.05
43 | 41,0.05
44 | 42,0.0
45 | 43,0.0
46 | 44,0.05
47 | 45,0.1
48 | 46,0.05
49 | 47,0.1
50 | 48,0.1
51 | 49,0.05
52 |
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "GridWorldEnv1",
3 | "policy_type": "max_ent",
4 | "render": false,
5 | "temperature": 1e-05
6 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | 0.0,0
3 | 0.0,1
4 | 0.0,2
5 | 0.1,3
6 | 0.0,4
7 | 0.0,5
8 | 0.05,6
9 | 0.0,7
10 | 0.05,8
11 | 0.0,9
12 | 0.0,10
13 | 0.0,11
14 | 0.0,12
15 | 0.05,13
16 | 0.0,14
17 | 0.1,15
18 | 0.0,16
19 | 0.0,17
20 | 0.05,18
21 | 0.0,19
22 | 0.1,20
23 | 0.0,21
24 | 0.0,22
25 | 0.0,23
26 | 0.0,24
27 | 0.05,25
28 | 0.05,26
29 | 0.05,27
30 | 0.0,28
31 | 0.0,29
32 | 0.0,30
33 | 0.0,31
34 | 0.0,32
35 | 0.0,33
36 | 0.05,34
37 | 0.0,35
38 | 0.0,36
39 | 0.05,37
40 | 0.05,38
41 | 0.0,39
42 | 0.05,40
43 | 0.05,41
44 | 0.0,42
45 | 0.0,43
46 | 0.05,44
47 | 0.05,45
48 | 0.05,46
49 | 0.05,47
50 | 0.05,48
51 | 0.05,49
52 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "DoubleIntegratorEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "tabular",
8 | "render": false,
9 | "state_discretization": 151
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1086.2625732421875
3 | 5,-1263.9954833984375
4 | 10,-1262.9053955078125
5 | 15,-661.170166015625
6 | 20,-288.8149108886719
7 | 25,-108.48724365234375
8 | 30,-86.2402114868164
9 | 35,-82.36270141601562
10 | 40,-77.28358459472656
11 | 45,-74.50456237792969
12 | 50,-71.93091583251953
13 | 55,-72.67729187011719
14 | 60,-70.5304946899414
15 | 65,-70.99906921386719
16 | 70,-72.21932220458984
17 | 75,-70.76229095458984
18 | 80,-69.89215087890625
19 | 85,-71.07881164550781
20 | 90,-69.90048217773438
21 | 95,-72.05101776123047
22 | 100,-70.91768646240234
23 | 105,-70.9474868774414
24 | 110,-70.95112609863281
25 | 115,-71.04618072509766
26 | 120,-70.75999450683594
27 | 125,-70.89779663085938
28 | 130,-70.42910766601562
29 | 135,-71.04066467285156
30 | 140,-70.91621398925781
31 | 145,-70.90560913085938
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "DoubleIntegratorEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "tabular",
8 | "render": false,
9 | "state_discretization": 21
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | -1086.2625732421875,0
3 | -882.6398315429688,5
4 | -678.9556274414062,10
5 | -449.319580078125,15
6 | -169.34024047851562,20
7 | -189.33172607421875,25
8 | -92.19869232177734,30
9 | -97.56818389892578,35
10 | -80.79776000976562,40
11 | -82.92337036132812,45
12 | -83.22191619873047,50
13 | -76.68871307373047,55
14 | -81.23823547363281,60
15 | -79.61701965332031,65
16 | -85.20960998535156,70
17 | -81.37742614746094,75
18 | -86.22061157226562,80
19 | -80.67992401123047,85
20 | -87.55850219726562,90
21 | -87.11900329589844,95
22 | -84.90445709228516,100
23 | -82.08210754394531,105
24 | -87.08905029296875,110
25 | -84.68000030517578,115
26 | -83.30575561523438,120
27 | -75.68476104736328,125
28 | -80.35721588134766,130
29 | -80.14833068847656,135
30 | -93.36302185058594,140
31 | -81.09378051757812,145
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "DoubleIntegratorEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "tabular",
8 | "render": false,
9 | "state_discretization": 51
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | -1086.2625732421875,0
3 | -1144.5582275390625,5
4 | -1067.7918701171875,10
5 | -601.8530883789062,15
6 | -208.37237548828125,20
7 | -170.8474884033203,25
8 | -87.156005859375,30
9 | -82.76526641845703,35
10 | -73.99360656738281,40
11 | -77.13446044921875,45
12 | -71.68731689453125,50
13 | -73.66706848144531,55
14 | -71.9505386352539,60
15 | -84.7891845703125,65
16 | -78.1386489868164,70
17 | -94.64347839355469,75
18 | -91.47573852539062,80
19 | -85.21119689941406,85
20 | -88.02317810058594,90
21 | -84.81150817871094,95
22 | -96.04219818115234,100
23 | -90.305419921875,105
24 | -89.38578033447266,110
25 | -91.25402069091797,115
26 | -89.45679473876953,120
27 | -89.90522003173828,125
28 | -91.32474517822266,130
29 | -92.66826629638672,135
30 | -90.96044158935547,140
31 | -74.82160949707031,145
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "DoubleIntegratorEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "nn",
7 | "policy_type": "tabular",
8 | "render": false,
9 | "state_discretization": 151
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1086.2625732421875
3 | 5,-1074.3865966796875
4 | 10,-1108.3885498046875
5 | 15,-1194.7117919921875
6 | 20,-1210.2864990234375
7 | 25,-1210.2864990234375
8 | 30,-439.2337341308594
9 | 35,-272.38214111328125
10 | 40,-275.7249755859375
11 | 45,-273.76495361328125
12 | 50,-269.75738525390625
13 | 55,-253.88551330566406
14 | 60,-233.76580810546875
15 | 65,-203.03372192382812
16 | 70,-187.3236083984375
17 | 75,-181.8560028076172
18 | 80,-172.03138732910156
19 | 85,-172.03138732910156
20 | 90,-166.24839782714844
21 | 95,-157.64881896972656
22 | 100,-157.64881896972656
23 | 105,-151.69505310058594
24 | 110,-151.69505310058594
25 | 115,-151.69505310058594
26 | 120,-151.69505310058594
27 | 125,-148.64476013183594
28 | 130,-149.03884887695312
29 | 135,-157.87362670898438
30 | 140,-157.87362670898438
31 | 145,-151.9330596923828
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "DoubleIntegratorEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "nn",
7 | "policy_type": "tabular",
8 | "render": false,
9 | "state_discretization": 21
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1086.2625732421875
3 | 5,-1086.2625732421875
4 | 10,-1086.2625732421875
5 | 15,-1086.2625732421875
6 | 20,-1086.2625732421875
7 | 25,-1086.2625732421875
8 | 30,-1086.2625732421875
9 | 35,-1086.2625732421875
10 | 40,-1086.2625732421875
11 | 45,-1086.2625732421875
12 | 50,-1086.2625732421875
13 | 55,-1086.2625732421875
14 | 60,-1086.2625732421875
15 | 65,-1086.2625732421875
16 | 70,-1086.2625732421875
17 | 75,-1086.2625732421875
18 | 80,-1086.2625732421875
19 | 85,-1086.2625732421875
20 | 90,-1086.2625732421875
21 | 95,-1086.2625732421875
22 | 100,-1086.2625732421875
23 | 105,-1086.2625732421875
24 | 110,-1086.2625732421875
25 | 115,-1086.2625732421875
26 | 120,-1086.2625732421875
27 | 125,-1086.2625732421875
28 | 130,-1086.2625732421875
29 | 135,-1086.2625732421875
30 | 140,-1086.2625732421875
31 | 145,-1086.2625732421875
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "DoubleIntegratorEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "nn",
7 | "policy_type": "tabular",
8 | "render": false,
9 | "state_discretization": 51
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1086.2625732421875
3 | 5,-1086.2625732421875
4 | 10,-1086.2625732421875
5 | 15,-1086.2625732421875
6 | 20,-1086.2625732421875
7 | 25,-1086.2625732421875
8 | 30,-1086.2625732421875
9 | 35,-1086.2625732421875
10 | 40,-727.3023071289062
11 | 45,-685.129150390625
12 | 50,-563.1832275390625
13 | 55,-550.3751831054688
14 | 60,-584.0700073242188
15 | 65,-597.1317749023438
16 | 70,-594.9307861328125
17 | 75,-427.8874816894531
18 | 80,-472.5625915527344
19 | 85,-472.5625915527344
20 | 90,-472.1018371582031
21 | 95,-472.1018371582031
22 | 100,-516.2792358398438
23 | 105,-478.8886413574219
24 | 110,-493.9183654785156
25 | 115,-493.9183654785156
26 | 120,-493.9183654785156
27 | 125,-355.2012023925781
28 | 130,-394.9552917480469
29 | 135,-394.9552917480469
30 | 140,-394.9552917480469
31 | 145,-394.9552917480469
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "MountainCarEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "tabular",
8 | "render": false,
9 | "state_discretization": 151
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-500.0
3 | 5,-500.0
4 | 10,-500.0
5 | 15,-500.0
6 | 20,-500.0
7 | 25,-500.0
8 | 30,-500.0
9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-161.0
17 | 75,-154.0
18 | 80,-147.0
19 | 85,-150.0
20 | 90,-147.0
21 | 95,-149.0
22 | 100,-146.0
23 | 105,-149.0
24 | 110,-145.0
25 | 115,-105.0
26 | 120,-105.0
27 | 125,-105.0
28 | 130,-105.0
29 | 135,-105.0
30 | 140,-105.0
31 | 145,-105.0
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "MountainCarEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "tabular",
8 | "render": false,
9 | "state_discretization": 21
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | -500.0,0
3 | -500.0,5
4 | -500.0,10
5 | -500.0,15
6 | -238.0,20
7 | -346.0,25
8 | -274.0,30
9 | -496.0,35
10 | -412.0,40
11 | -192.0,45
12 | -138.0,50
13 | -131.0,55
14 | -143.0,60
15 | -147.0,65
16 | -138.0,70
17 | -153.0,75
18 | -150.0,80
19 | -152.0,85
20 | -147.0,90
21 | -145.0,95
22 | -148.0,100
23 | -149.0,105
24 | -146.0,110
25 | -150.0,115
26 | -150.0,120
27 | -147.0,125
28 | -146.0,130
29 | -147.0,135
30 | -150.0,140
31 | -148.0,145
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "MountainCarEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "tabular",
8 | "render": false,
9 | "state_discretization": 51
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | -500.0,0
3 | -500.0,5
4 | -500.0,10
5 | -500.0,15
6 | -500.0,20
7 | -500.0,25
8 | -500.0,30
9 | -500.0,35
10 | -275.0,40
11 | -174.0,45
12 | -236.0,50
13 | -160.0,55
14 | -157.0,60
15 | -157.0,65
16 | -158.0,70
17 | -160.0,75
18 | -158.0,80
19 | -153.0,85
20 | -155.0,90
21 | -158.0,95
22 | -159.0,100
23 | -156.0,105
24 | -154.0,110
25 | -154.0,115
26 | -154.0,120
27 | -156.0,125
28 | -156.0,130
29 | -160.0,135
30 | -157.0,140
31 | -106.0,145
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "MountainCarEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "nn",
7 | "policy_type": "tabular",
8 | "render": false,
9 | "state_discretization": 151
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-500.0
3 | 5,-500.0
4 | 10,-500.0
5 | 15,-500.0
6 | 20,-500.0
7 | 25,-500.0
8 | 30,-500.0
9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-500.0
17 | 75,-500.0
18 | 80,-500.0
19 | 85,-500.0
20 | 90,-491.0
21 | 95,-169.0
22 | 100,-183.0
23 | 105,-183.0
24 | 110,-183.0
25 | 115,-183.0
26 | 120,-183.0
27 | 125,-183.0
28 | 130,-183.0
29 | 135,-183.0
30 | 140,-183.0
31 | 145,-183.0
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "MountainCarEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "nn",
7 | "policy_type": "tabular",
8 | "render": false,
9 | "state_discretization": 21
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-500.0
3 | 5,-500.0
4 | 10,-500.0
5 | 15,-500.0
6 | 20,-500.0
7 | 25,-500.0
8 | 30,-500.0
9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-500.0
17 | 75,-500.0
18 | 80,-500.0
19 | 85,-500.0
20 | 90,-500.0
21 | 95,-500.0
22 | 100,-500.0
23 | 105,-500.0
24 | 110,-500.0
25 | 115,-500.0
26 | 120,-500.0
27 | 125,-500.0
28 | 130,-500.0
29 | 135,-500.0
30 | 140,-500.0
31 | 145,-500.0
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "MountainCarEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "nn",
7 | "policy_type": "tabular",
8 | "render": false,
9 | "state_discretization": 51
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-500.0
3 | 5,-500.0
4 | 10,-500.0
5 | 15,-500.0
6 | 20,-500.0
7 | 25,-500.0
8 | 30,-500.0
9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-500.0
17 | 75,-500.0
18 | 80,-500.0
19 | 85,-500.0
20 | 90,-500.0
21 | 95,-500.0
22 | 100,-500.0
23 | 105,-500.0
24 | 110,-500.0
25 | 115,-500.0
26 | 120,-500.0
27 | 125,-500.0
28 | 130,-500.0
29 | 135,-500.0
30 | 140,-500.0
31 | 145,-500.0
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/CartPoleEnv/linear/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/CartPoleEnv/linear/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/CartPoleEnv/linear/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "CartPoleEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "tabular",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/CartPoleEnv/linear/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,10.0
3 | 5,392.0
4 | 10,298.0
5 | 15,500.0
6 | 20,500.0
7 | 25,500.0
8 | 30,500.0
9 | 35,500.0
10 | 40,500.0
11 | 45,500.0
12 | 50,500.0
13 | 55,500.0
14 | 60,500.0
15 | 65,500.0
16 | 70,500.0
17 | 75,500.0
18 | 80,500.0
19 | 85,500.0
20 | 90,500.0
21 | 95,500.0
22 | 100,500.0
23 | 105,500.0
24 | 110,500.0
25 | 115,500.0
26 | 120,500.0
27 | 125,500.0
28 | 130,500.0
29 | 135,500.0
30 | 140,500.0
31 | 145,500.0
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/CartPoleEnv/nn/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/CartPoleEnv/nn/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/CartPoleEnv/nn/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "CartPoleEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "nn",
7 | "policy_type": "tabular",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/CartPoleEnv/nn/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,9.0
3 | 5,13.0
4 | 10,13.0
5 | 15,13.0
6 | 20,13.0
7 | 25,13.0
8 | 30,13.0
9 | 35,13.0
10 | 40,13.0
11 | 45,15.0
12 | 50,15.0
13 | 55,15.0
14 | 60,15.0
15 | 65,15.0
16 | 70,15.0
17 | 75,15.0
18 | 80,15.0
19 | 85,15.0
20 | 90,15.0
21 | 95,15.0
22 | 100,15.0
23 | 105,15.0
24 | 110,15.0
25 | 115,15.0
26 | 120,15.0
27 | 125,15.0
28 | 130,15.0
29 | 135,15.0
30 | 140,15.0
31 | 145,15.0
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "DoubleIntegratorEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "tabular",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1086.2625732421875
3 | 5,-1263.9954833984375
4 | 10,-1262.9053955078125
5 | 15,-661.170166015625
6 | 20,-288.8149108886719
7 | 25,-108.48724365234375
8 | 30,-86.2402114868164
9 | 35,-82.36270141601562
10 | 40,-77.28358459472656
11 | 45,-74.50456237792969
12 | 50,-71.93091583251953
13 | 55,-72.67729187011719
14 | 60,-70.5304946899414
15 | 65,-70.99906921386719
16 | 70,-72.21932220458984
17 | 75,-70.76229095458984
18 | 80,-69.89215087890625
19 | 85,-71.07881164550781
20 | 90,-69.90048217773438
21 | 95,-72.05101776123047
22 | 100,-70.91768646240234
23 | 105,-70.9474868774414
24 | 110,-70.95112609863281
25 | 115,-71.04618072509766
26 | 120,-70.75999450683594
27 | 125,-70.89779663085938
28 | 130,-70.42910766601562
29 | 135,-71.04066467285156
30 | 140,-70.91621398925781
31 | 145,-70.90560913085938
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "DoubleIntegratorEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "nn",
7 | "policy_type": "tabular",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1086.2625732421875
3 | 5,-1074.3865966796875
4 | 10,-1108.3885498046875
5 | 15,-1194.7117919921875
6 | 20,-1210.2864990234375
7 | 25,-1210.2864990234375
8 | 30,-439.2337341308594
9 | 35,-272.38214111328125
10 | 40,-275.7249755859375
11 | 45,-273.76495361328125
12 | 50,-269.75738525390625
13 | 55,-253.88551330566406
14 | 60,-233.76580810546875
15 | 65,-203.03372192382812
16 | 70,-187.3236083984375
17 | 75,-181.8560028076172
18 | 80,-172.03138732910156
19 | 85,-172.03138732910156
20 | 90,-166.24839782714844
21 | 95,-157.64881896972656
22 | 100,-157.64881896972656
23 | 105,-151.69505310058594
24 | 110,-151.69505310058594
25 | 115,-151.69505310058594
26 | 120,-151.69505310058594
27 | 125,-148.64476013183594
28 | 130,-149.03884887695312
29 | 135,-157.87362670898438
30 | 140,-157.87362670898438
31 | 145,-151.9330596923828
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/linear/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/MountainCarEnv/linear/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/linear/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/MountainCarEnv/linear/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/linear/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "MountainCarEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "tabular",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/linear/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-500.0
3 | 5,-500.0
4 | 10,-500.0
5 | 15,-500.0
6 | 20,-500.0
7 | 25,-500.0
8 | 30,-500.0
9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-161.0
17 | 75,-154.0
18 | 80,-147.0
19 | 85,-150.0
20 | 90,-147.0
21 | 95,-149.0
22 | 100,-146.0
23 | 105,-149.0
24 | 110,-145.0
25 | 115,-105.0
26 | 120,-105.0
27 | 125,-105.0
28 | 130,-105.0
29 | 135,-105.0
30 | 140,-105.0
31 | 145,-105.0
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/nn/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/MountainCarEnv/nn/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/nn/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/MountainCarEnv/nn/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/nn/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "MountainCarEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "nn",
7 | "policy_type": "tabular",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/nn/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-500.0
3 | 5,-500.0
4 | 10,-500.0
5 | 15,-500.0
6 | 20,-500.0
7 | 25,-500.0
8 | 30,-500.0
9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-500.0
17 | 75,-500.0
18 | 80,-500.0
19 | 85,-500.0
20 | 90,-491.0
21 | 95,-169.0
22 | 100,-183.0
23 | 105,-183.0
24 | 110,-183.0
25 | 115,-183.0
26 | 120,-183.0
27 | 125,-183.0
28 | 130,-183.0
29 | 135,-183.0
30 | 140,-183.0
31 | 145,-183.0
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/SwingUpEnv/linear/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/SwingUpEnv/linear/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/SwingUpEnv/linear/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "SwingUpEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "tabular",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/SwingUpEnv/linear/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1611.0622956168809
3 | 5,-806.5761793759123
4 | 10,-747.522897685818
5 | 15,-540.7433507520074
6 | 20,-825.9231338583917
7 | 25,-944.4935466237056
8 | 30,-991.0924221438501
9 | 35,-1025.7969101684948
10 | 40,-1052.3914612528288
11 | 45,-1073.722422897381
12 | 50,-1139.109125287909
13 | 55,-1105.0172456320088
14 | 60,-1126.5440104057298
15 | 65,-1151.5207311147847
16 | 70,-1143.6126499683105
17 | 75,-1160.0488311303698
18 | 80,-1193.488355727164
19 | 85,-1175.0405064667475
20 | 90,-1163.1958750856556
21 | 95,-1165.6738488734682
22 | 100,-1171.2232022753903
23 | 105,-1186.8571065947756
24 | 110,-1185.2320044494918
25 | 115,-1174.4411249481252
26 | 120,-1170.948044299401
27 | 125,-1157.6196817398613
28 | 130,-1164.1346390379954
29 | 135,-1163.9363993137863
30 | 140,-1172.475828192612
31 | 145,-1233.3813361680384
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/SwingUpEnv/nn/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/SwingUpEnv/nn/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/SwingUpEnv/nn/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "action_discretization": 5,
3 | "env": "SwingUpEnv",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "nn",
7 | "policy_type": "tabular",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/SwingUpEnv/nn/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1091.1176999012564
3 | 5,-1091.1176999012564
4 | 10,-1091.1176999012564
5 | 15,-1091.1176999012564
6 | 20,-1091.1176999012564
7 | 25,-1091.1176999012564
8 | 30,-1091.1176999012564
9 | 35,-1091.1176999012564
10 | 40,-1091.1176999012564
11 | 45,-1091.1176999012564
12 | 50,-1091.1176999012564
13 | 55,-1091.1176999012564
14 | 60,-1091.1176999012564
15 | 65,-1091.1176999012564
16 | 70,-1091.1176999012564
17 | 75,-1091.1176999012564
18 | 80,-1091.1176999012564
19 | 85,-1091.1176999012564
20 | 90,-1091.1176999012564
21 | 95,-1091.1176999012564
22 | 100,-1091.1176999012564
23 | 105,-1091.1176999012564
24 | 110,-1091.1176999012564
25 | 115,-1091.1176999012564
26 | 120,-1091.1176999012564
27 | 125,-1091.1176999012564
28 | 130,-1091.1176999012564
29 | 135,-1091.1176999012564
30 | 140,-1091.1176999012564
31 | 145,-1091.1176999012564
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "CartPoleEnv",
3 | "exp_name": "test",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "look_ahead",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon1/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | 6.0,0
3 | 338.0,5
4 | 190.0,10
5 | 364.0,15
6 | 500.0,20
7 | 500.0,25
8 | 500.0,30
9 | 500.0,35
10 | 500.0,40
11 | 500.0,45
12 | 500.0,50
13 | 500.0,55
14 | 500.0,60
15 | 500.0,65
16 | 500.0,70
17 | 500.0,75
18 | 500.0,80
19 | 500.0,85
20 | 500.0,90
21 | 500.0,95
22 | 500.0,100
23 | 500.0,105
24 | 500.0,110
25 | 500.0,115
26 | 500.0,120
27 | 500.0,125
28 | 500.0,130
29 | 500.0,135
30 | 500.0,140
31 | 500.0,145
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon2/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "CartPoleEnv",
3 | "exp_name": "test",
4 | "horizon": 2,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "look_ahead",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon2/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | 6.0,0
3 | 154.0,5
4 | 124.0,10
5 | 62.0,15
6 | 364.0,20
7 | 267.0,25
8 | 354.0,30
9 | 500.0,35
10 | 166.0,40
11 | 139.0,45
12 | 500.0,50
13 | 500.0,55
14 | 277.0,60
15 | 500.0,65
16 | 500.0,70
17 | 125.0,75
18 | 42.0,80
19 | 500.0,85
20 | 500.0,90
21 | 500.0,95
22 | 500.0,100
23 | 500.0,105
24 | 110.0,110
25 | 500.0,115
26 | 500.0,120
27 | 500.0,125
28 | 71.0,130
29 | 500.0,135
30 | 500.0,140
31 | 224.0,145
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon3/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "CartPoleEnv",
3 | "exp_name": "test",
4 | "horizon": 3,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "look_ahead",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon3/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,6.0
3 | 5,128.0
4 | 10,226.0
5 | 15,69.0
6 | 20,500.0
7 | 25,500.0
8 | 30,135.0
9 | 35,500.0
10 | 40,500.0
11 | 45,350.0
12 | 50,500.0
13 | 55,88.0
14 | 60,500.0
15 | 65,500.0
16 | 70,97.0
17 | 75,500.0
18 | 80,387.0
19 | 85,216.0
20 | 90,500.0
21 | 95,500.0
22 | 100,500.0
23 | 105,231.0
24 | 110,471.0
25 | 115,500.0
26 | 120,254.0
27 | 125,113.0
28 | 130,500.0
29 | 135,500.0
30 | 140,500.0
31 | 145,500.0
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "DoubleIntegratorEnv",
3 | "exp_name": "test",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "look_ahead",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | -1086.2625732421875,0
3 | -893.8065795898438,5
4 | -498.8104553222656,10
5 | -210.59446716308594,15
6 | -94.314453125,20
7 | -124.63628387451172,25
8 | -128.93954467773438,30
9 | -146.0016326904297,35
10 | -150.7569122314453,40
11 | -131.7736358642578,45
12 | -119.9066390991211,50
13 | -146.92782592773438,55
14 | -130.0223846435547,60
15 | -187.83953857421875,65
16 | -142.1328582763672,70
17 | -135.27488708496094,75
18 | -111.18167114257812,80
19 | -183.66871643066406,85
20 | -114.97431182861328,90
21 | -220.096923828125,95
22 | -122.19498443603516,100
23 | -154.7129364013672,105
24 | -135.93690490722656,110
25 | -153.4174041748047,115
26 | -130.9731903076172,120
27 | -134.832275390625,125
28 | -179.64395141601562,130
29 | -148.343017578125,135
30 | -231.59364318847656,140
31 | -145.043701171875,145
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "DoubleIntegratorEnv",
3 | "exp_name": "test",
4 | "horizon": 2,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "look_ahead",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | -1183.3167724609375,0
3 | -466.75787353515625,5
4 | -452.248291015625,10
5 | -144.1388397216797,15
6 | -148.9638214111328,20
7 | -131.39715576171875,25
8 | -163.52474975585938,30
9 | -185.48219299316406,35
10 | -154.23744201660156,40
11 | -167.04725646972656,45
12 | -158.9932403564453,50
13 | -185.72344970703125,55
14 | -166.24571228027344,60
15 | -217.92059326171875,65
16 | -161.99917602539062,70
17 | -204.30374145507812,75
18 | -236.45884704589844,80
19 | -151.06915283203125,85
20 | -197.75592041015625,90
21 | -198.5358428955078,95
22 | -182.3708038330078,100
23 | -174.017333984375,105
24 | -154.0111083984375,110
25 | -187.34825134277344,115
26 | -154.50106811523438,120
27 | -178.9189910888672,125
28 | -163.55311584472656,130
29 | -139.14596557617188,135
30 | -193.9720916748047,140
31 | -142.2526092529297,145
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "DoubleIntegratorEnv",
3 | "exp_name": "test",
4 | "horizon": 3,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "look_ahead",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1205.2218017578125
3 | 5,-484.97027587890625
4 | 10,-189.61758422851562
5 | 15,-169.90155029296875
6 | 20,-168.10523986816406
7 | 25,-167.66624450683594
8 | 30,-132.1918487548828
9 | 35,-182.9347381591797
10 | 40,-175.6367950439453
11 | 45,-180.9919891357422
12 | 50,-140.72007751464844
13 | 55,-165.1248779296875
14 | 60,-161.497314453125
15 | 65,-167.78457641601562
16 | 70,-156.17080688476562
17 | 75,-153.41282653808594
18 | 80,-188.10372924804688
19 | 85,-162.02178955078125
20 | 90,-169.79441833496094
21 | 95,-174.7077178955078
22 | 100,-176.8898468017578
23 | 105,-123.10155487060547
24 | 110,-140.86790466308594
25 | 115,-119.35347747802734
26 | 120,-148.01553344726562
27 | 125,-164.94239807128906
28 | 130,-190.43072509765625
29 | 135,-163.91397094726562
30 | 140,-192.2443389892578
31 | 145,-182.40818786621094
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "MountainCarEnv",
3 | "exp_name": "test",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "look_ahead",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | -500.0,0
3 | -500.0,5
4 | -500.0,10
5 | -500.0,15
6 | -500.0,20
7 | -500.0,25
8 | -500.0,30
9 | -500.0,35
10 | -500.0,40
11 | -500.0,45
12 | -500.0,50
13 | -303.0,55
14 | -236.0,60
15 | -500.0,65
16 | -500.0,70
17 | -426.0,75
18 | -420.0,80
19 | -417.0,85
20 | -500.0,90
21 | -420.0,95
22 | -500.0,100
23 | -414.0,105
24 | -500.0,110
25 | -229.0,115
26 | -238.0,120
27 | -500.0,125
28 | -500.0,130
29 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "MountainCarEnv",
3 | "exp_name": "test",
4 | "horizon": 2,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "look_ahead",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | -500.0,0
3 | -500.0,5
4 | -500.0,10
5 | -500.0,15
6 | -500.0,20
7 | -500.0,25
8 | -500.0,30
9 | -500.0,35
10 | -500.0,40
11 | -500.0,45
12 | -500.0,50
13 | -312.0,55
14 | -228.0,60
15 | -500.0,65
16 | -312.0,70
17 | -237.0,75
18 | -227.0,80
19 | -240.0,85
20 | -500.0,90
21 | -315.0,95
22 | -326.0,100
23 | -500.0,105
24 | -308.0,110
25 | -500.0,115
26 | -498.0,120
27 | -223.0,125
28 | -310.0,130
29 | -500.0,135
30 | -324.0,140
31 | -500.0,145
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "MountainCarEnv",
3 | "exp_name": "test",
4 | "horizon": 3,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "look_ahead",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-500.0
3 | 5,-500.0
4 | 10,-500.0
5 | 15,-500.0
6 | 20,-500.0
7 | 25,-500.0
8 | 30,-500.0
9 | 35,-500.0
10 | 40,-500.0
11 | 45,-420.0
12 | 50,-409.0
13 | 55,-303.0
14 | 60,-223.0
15 | 65,-256.0
16 | 70,-228.0
17 | 75,-236.0
18 | 80,-244.0
19 | 85,-301.0
20 | 90,-243.0
21 | 95,-247.0
22 | 100,-228.0
23 | 105,-237.0
24 | 110,-233.0
25 | 115,-231.0
26 | 120,-306.0
27 | 125,-293.0
28 | 130,-230.0
29 | 135,-238.0
30 | 140,-241.0
31 | 145,-234.0
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "SwingUpEnv",
3 | "exp_name": "test",
4 | "horizon": 1,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "look_ahead",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon1/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | -1091.1176999012564,0
3 | -1162.1493360627258,5
4 | -1585.8347053987275,10
5 | -1218.8241165219229,15
6 | -1249.7989512873348,20
7 | -1389.2664479944547,25
8 | -1298.771278433243,30
9 | -1683.4040480165327,35
10 | -1507.2040631908412,40
11 | -1422.1633591071438,45
12 | -1505.8400605781235,50
13 | -1531.366199951518,55
14 | -1859.374352334298,60
15 | -1481.806252109952,65
16 | -1226.7502585204463,70
17 | -1268.0980292418817,75
18 | -1446.588749786304,80
19 | -1234.6143376530424,85
20 | -1173.3418445679686,90
21 | -1452.18655778993,95
22 | -1347.0481453039677,100
23 | -1489.2808735833657,105
24 | -1191.7517135659734,110
25 | -1475.9642971133658,115
26 | -1501.2822528696033,120
27 | -1905.84236700956,125
28 | -1442.219550827407,130
29 | -1195.037295457711,135
30 | -1337.1308600611028,140
31 | -1193.1598828731228,145
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon2/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "SwingUpEnv",
3 | "exp_name": "test",
4 | "horizon": 2,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "look_ahead",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon2/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | -1092.7898438420132,0
3 | -1215.7979095044884,5
4 | -900.2297428611507,10
5 | -1329.5358970230398,15
6 | -1421.1131724637714,20
7 | -1343.603741625047,25
8 | -1032.839109351379,30
9 | -1356.2425212447233,35
10 | -1503.854075071196,40
11 | -1420.590041290277,45
12 | -1581.3682381306112,50
13 | -1513.242610160141,55
14 | -1986.0909965287349,60
15 | -1818.8259128345749,65
16 | -1547.818410397714,70
17 | -1401.8233207141116,75
18 | -2026.578864863387,80
19 | -2098.2875860182835,85
20 | -1648.5499722747932,90
21 | -2156.958773287344,95
22 | -2161.416928709432,100
23 | -2180.018826056157,105
24 | -1830.83594101961,110
25 | -1223.477103579234,115
26 | -1189.1726035364468,120
27 | -1458.5448452906428,125
28 | -1284.0247744743867,130
29 | -1210.7703241828312,135
30 | -1505.6303316533417,140
31 | -1759.0756618354292,145
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon3/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "SwingUpEnv",
3 | "exp_name": "test",
4 | "horizon": 3,
5 | "max_iter": 150,
6 | "mode": "linear",
7 | "policy_type": "look_ahead",
8 | "render": false
9 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon3/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1116.391526334325
3 | 5,-1747.689019464483
4 | 10,-1670.6732967647536
5 | 15,-997.3284087020362
6 | 20,-955.9082685133767
7 | 25,-1177.432988580442
8 | 30,-926.5136135262421
9 | 35,-1104.902891456816
10 | 40,-1047.1105995789278
11 | 45,-1675.357489520778
12 | 50,-1120.925762449103
13 | 55,-983.6193496550145
14 | 60,-1178.07379545421
15 | 65,-1152.8209185244793
16 | 70,-1084.3333912294572
17 | 75,-864.4824550887413
18 | 80,-1128.1111239455465
19 | 85,-1877.5212413637996
20 | 90,-1459.4320766081328
21 | 95,-868.6700377542332
22 | 100,-1280.24002899705
23 | 105,-1165.836603981392
24 | 110,-1254.735230603128
25 | 115,-1310.1745190244744
26 | 120,-1038.4402663605304
27 | 125,-1260.488809359114
28 | 130,-1372.7110653444643
29 | 135,-1167.5436737789994
30 | 140,-1228.4214639353308
31 | 145,-1129.5736579851712
32 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/DoubleIntegratorEnv/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_a/DoubleIntegratorEnv/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/DoubleIntegratorEnv/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_a/DoubleIntegratorEnv/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/DoubleIntegratorEnv/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "DoubleIntegratorEnv",
4 | "horizon": 1,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 10,
8 | "policy_type": "rs",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/DoubleIntegratorEnv/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1122.7823725767166
3 | 5,-1062.223100979545
4 | 10,-1136.400997769638
5 | 15,-1093.1759279603343
6 | 20,-1064.8429670854428
7 | 25,-1155.1049075078602
8 | 30,-1265.864351441458
9 | 35,-678.3290560802258
10 | 40,-381.8164868973546
11 | 45,-265.22922879100435
12 | 50,-169.64047694026792
13 | 55,-155.43764777676293
14 | 60,-97.6076650722121
15 | 65,-84.33627450797492
16 | 70,-93.36829751629186
17 | 75,-78.18198268902506
18 | 80,-76.97710627691222
19 | 85,-73.00620265014103
20 | 90,-78.6135226587545
21 | 95,-80.37329447070243
22 | 100,-78.95941675331389
23 | 105,-79.55617247669808
24 | 110,-83.57970985465705
25 | 115,-82.76591377741207
26 | 120,-85.4703499994012
27 | 125,-82.73176603238862
28 | 130,-86.52114550412108
29 | 135,-89.53232013434773
30 | 140,-93.2105884194899
31 | 145,-86.04294015559864
32 | 150,-88.7641655907943
33 | 155,-89.18217105937
34 | 160,-91.07337037083897
35 | 165,-89.36643780693299
36 | 170,-92.59827615711666
37 | 175,-96.53035937070437
38 | 180,-98.23655640085903
39 | 185,-88.73787475559153
40 | 190,-91.47763141092898
41 | 195,-93.40145281128702
42 | 200,-88.1035242005634
43 | 205,-90.125792591034
44 | 210,-87.15102617961149
45 | 215,-88.15198854500593
46 | 220,-88.5171408243244
47 | 225,-86.40243325822708
48 | 230,-87.99009024200436
49 | 235,-84.80985085614553
50 | 240,-90.16705955320245
51 | 245,-94.08950693010142
52 | 249,-90.98371433618405
53 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/MountainCarEnv/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_a/MountainCarEnv/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/MountainCarEnv/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_a/MountainCarEnv/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/MountainCarEnv/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "MountainCarEnv",
4 | "horizon": 1,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 10,
8 | "policy_type": "rs",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/MountainCarEnv/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-500.0
3 | 5,-500.0
4 | 10,-500.0
5 | 15,-500.0
6 | 20,-500.0
7 | 25,-500.0
8 | 30,-500.0
9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-500.0
17 | 75,-500.0
18 | 80,-500.0
19 | 85,-500.0
20 | 90,-500.0
21 | 95,-500.0
22 | 100,-500.0
23 | 105,-500.0
24 | 110,-500.0
25 | 115,-500.0
26 | 120,-500.0
27 | 125,-500.0
28 | 130,-500.0
29 | 135,-500.0
30 | 140,-500.0
31 | 145,-500.0
32 | 150,-500.0
33 | 155,-500.0
34 | 160,-500.0
35 | 165,-500.0
36 | 170,-500.0
37 | 175,-500.0
38 | 180,-500.0
39 | 185,-500.0
40 | 190,-500.0
41 | 195,-500.0
42 | 200,-500.0
43 | 205,-500.0
44 | 210,-500.0
45 | 215,-500.0
46 | 220,-500.0
47 | 225,-500.0
48 | 230,-500.0
49 | 235,-500.0
50 | 240,-500.0
51 | 245,-500.0
52 | 249,-500.0
53 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/CartPoleEnv/horizon1/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "CartPoleEnv",
4 | "horizon": 1,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 32,
8 | "policy_type": "cem",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon1/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,6.0
3 | 10,6.0
4 | 20,6.0
5 | 30,6.0
6 | 40,6.0
7 | 50,6.0
8 | 60,8.0
9 | 70,38.0
10 | 80,38.0
11 | 90,23.0
12 | 100,40.0
13 | 110,40.0
14 | 120,25.0
15 | 130,34.0
16 | 140,27.0
17 | 150,45.0
18 | 160,46.0
19 | 170,68.0
20 | 180,32.0
21 | 190,34.0
22 | 200,40.0
23 | 210,64.0
24 | 220,32.0
25 | 230,69.0
26 | 240,30.0
27 | 249,50.0
28 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon10/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/CartPoleEnv/horizon10/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon10/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "CartPoleEnv",
4 | "horizon": 10,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 32,
8 | "policy_type": "cem",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon10/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | 6.0,0
3 | 6.0,10
4 | 6.0,20
5 | 6.0,30
6 | 6.0,40
7 | 6.0,50
8 | 6.0,60
9 | 6.0,70
10 | 6.0,80
11 | 6.0,90
12 | 6.0,100
13 | 6.0,110
14 | 6.0,120
15 | 6.0,130
16 | 6.0,140
17 | 6.0,150
18 | 6.0,160
19 | 6.0,170
20 | 6.0,180
21 | 8.0,190
22 | 94.0,200
23 | 72.0,210
24 | 108.0,220
25 | 247.0,230
26 | 288.0,240
27 | 160.0,249
28 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon5/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/CartPoleEnv/horizon5/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon5/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "CartPoleEnv",
4 | "horizon": 5,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 32,
8 | "policy_type": "cem",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon5/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,6.0
3 | 10,6.0
4 | 20,6.0
5 | 30,6.0
6 | 40,6.0
7 | 50,6.0
8 | 60,6.0
9 | 70,6.0
10 | 80,6.0
11 | 90,6.0
12 | 100,12.0
13 | 110,6.0
14 | 120,8.0
15 | 130,12.0
16 | 140,20.0
17 | 150,6.0
18 | 160,6.0
19 | 170,6.0
20 | 180,13.0
21 | 190,63.0
22 | 200,6.0
23 | 210,60.0
24 | 220,134.0
25 | 230,87.0
26 | 240,145.0
27 | 249,140.0
28 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "DoubleIntegratorEnv",
4 | "horizon": 1,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 32,
8 | "policy_type": "cem",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1083.2572626905703
3 | 10,-1144.3036625666957
4 | 20,-70.25937042708051
5 | 30,-65.82760913324954
6 | 40,-69.26650008607427
7 | 50,-71.99475809172492
8 | 60,-73.36248774374332
9 | 70,-75.01519602189407
10 | 80,-76.7247153398074
11 | 90,-77.0947355563633
12 | 100,-82.09236529859092
13 | 110,-80.32221880918433
14 | 120,-86.06994235658904
15 | 130,-93.97135070453305
16 | 140,-123.41189553061248
17 | 150,-111.09701675206192
18 | 160,-80.99094440230087
19 | 170,-111.82992648461546
20 | 180,-1407.4459451091536
21 | 190,-1466.2611779251909
22 | 200,-1579.0335773941397
23 | 210,-1436.1574085697116
24 | 220,-215.21898344969065
25 | 230,-1478.0767973483007
26 | 240,-1634.5027828698492
27 | 249,-1619.9079515249616
28 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "DoubleIntegratorEnv",
4 | "horizon": 10,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 32,
8 | "policy_type": "cem",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | -328.79005020730284,0
3 | -200.98521546340913,10
4 | -64.77700467721864,20
5 | -84.86970171900138,30
6 | -128.60893935069768,40
7 | -147.48401302068382,50
8 | -139.2296066862622,60
9 | -110.77936569105727,70
10 | -114.11320544047803,80
11 | -125.6650792064009,90
12 | -123.18294325299779,100
13 | -138.1127465768214,110
14 | -126.7707613795519,120
15 | -112.07978614737182,130
16 | -99.5483647865851,140
17 | -116.85997383619346,150
18 | -129.06128247897595,160
19 | -104.97593874618848,170
20 | -122.84541554251891,180
21 | -114.77346058769307,190
22 | -107.45986913081748,200
23 | -101.4258130643817,210
24 | -114.49001701841401,220
25 | -104.44739383339761,230
26 | -115.09346632637698,240
27 | -129.87534014553373,249
28 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "DoubleIntegratorEnv",
4 | "horizon": 5,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 32,
8 | "policy_type": "cem",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1175.616838534374
3 | 10,-478.01630518122784
4 | 20,-62.36968488577191
5 | 30,-70.35551873127073
6 | 40,-81.91418989124216
7 | 50,-113.87514452834378
8 | 60,-99.39722406411673
9 | 70,-104.74424596209462
10 | 80,-116.43105353091153
11 | 90,-120.61381139602538
12 | 100,-104.22825758830257
13 | 110,-97.51563398276409
14 | 120,-96.10433126328779
15 | 130,-117.98687857144064
16 | 140,-103.6915302113553
17 | 150,-120.80207494162703
18 | 160,-96.17403533006443
19 | 170,-95.24357935829224
20 | 180,-104.7003414784946
21 | 190,-98.02249429487223
22 | 200,-96.91254602252111
23 | 210,-104.4164736636655
24 | 220,-111.60483821355875
25 | 230,-103.64503997519414
26 | 240,-109.3393306564727
27 | 249,-100.84268701592826
28 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon1/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon1/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon1/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "MountainCarEnv",
4 | "horizon": 1,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 32,
8 | "policy_type": "cem",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon1/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-500.0
3 | 10,-500.0
4 | 20,-500.0
5 | 30,-500.0
6 | 40,-500.0
7 | 50,-500.0
8 | 60,-500.0
9 | 70,-500.0
10 | 80,-500.0
11 | 90,-500.0
12 | 100,-500.0
13 | 110,-500.0
14 | 120,-500.0
15 | 130,-500.0
16 | 140,-500.0
17 | 150,-500.0
18 | 160,-500.0
19 | 170,-500.0
20 | 180,-500.0
21 | 190,-500.0
22 | 200,-500.0
23 | 210,-500.0
24 | 220,-500.0
25 | 230,-500.0
26 | 240,-500.0
27 | 249,-500.0
28 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon10/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon10/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon10/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon10/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon10/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "MountainCarEnv",
4 | "horizon": 10,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 32,
8 | "policy_type": "cem",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon10/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | -500.0,0
3 | -500.0,10
4 | -500.0,20
5 | -500.0,30
6 | -500.0,40
7 | -500.0,50
8 | -500.0,60
9 | -500.0,70
10 | -500.0,80
11 | -500.0,90
12 | -500.0,100
13 | -500.0,110
14 | -500.0,120
15 | -500.0,130
16 | -500.0,140
17 | -500.0,150
18 | -500.0,160
19 | -500.0,170
20 | -500.0,180
21 | -500.0,190
22 | -500.0,200
23 | -500.0,210
24 | -500.0,220
25 | -500.0,230
26 | -500.0,240
27 | -500.0,249
28 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon5/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon5/contour.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon5/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon5/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon5/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "MountainCarEnv",
4 | "horizon": 5,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 32,
8 | "policy_type": "cem",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon5/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-500.0
3 | 10,-500.0
4 | 20,-500.0
5 | 30,-500.0
6 | 40,-500.0
7 | 50,-500.0
8 | 60,-500.0
9 | 70,-500.0
10 | 80,-500.0
11 | 90,-500.0
12 | 100,-500.0
13 | 110,-500.0
14 | 120,-500.0
15 | 130,-500.0
16 | 140,-500.0
17 | 150,-500.0
18 | 160,-500.0
19 | 170,-500.0
20 | 180,-500.0
21 | 190,-500.0
22 | 200,-500.0
23 | 210,-500.0
24 | 220,-500.0
25 | 230,-500.0
26 | 240,-500.0
27 | 249,-500.0
28 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/SwingUpEnv/horizon1/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "SwingUpEnv",
4 | "horizon": 1,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 32,
8 | "policy_type": "cem",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon1/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1396.667327423777
3 | 10,-1236.5227676190348
4 | 20,-1201.2862992423045
5 | 30,-1264.1382127123386
6 | 40,-1525.1329954184694
7 | 50,-1494.0356861809296
8 | 60,-1180.626836176929
9 | 70,-1115.4489761429518
10 | 80,-2462.797364356984
11 | 90,-1093.9334660251343
12 | 100,-2196.921839095621
13 | 110,-2146.571425384078
14 | 120,-1093.7293024977773
15 | 130,-1093.9334660251343
16 | 140,-1097.0459684677512
17 | 150,-1357.03826196838
18 | 160,-1095.0031456020984
19 | 170,-1103.5009875248982
20 | 180,-1318.8339192880053
21 | 190,-1103.5009875248982
22 | 200,-2462.797364356984
23 | 210,-2032.1447096031256
24 | 220,-1770.8190385950547
25 | 230,-1831.6227592496257
26 | 240,-1627.0230466828693
27 | 249,-1729.4831263473936
28 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon10/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/SwingUpEnv/horizon10/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon10/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "SwingUpEnv",
4 | "horizon": 10,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 32,
8 | "policy_type": "cem",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon10/progress.csv:
--------------------------------------------------------------------------------
1 | Average Returns,Iteration
2 | -1093.708926777139,0
3 | -1095.0031456020984,10
4 | -1177.3105002993598,20
5 | -2317.7596258376225,30
6 | -3112.9437564743707,40
7 | -2272.4569652126597,50
8 | -2422.264676378556,60
9 | -2353.919829701844,70
10 | -2331.054111508252,80
11 | -2346.653255271173,90
12 | -2387.710052225485,100
13 | -2078.59505659242,110
14 | -2904.856342572041,120
15 | -1352.730304454663,130
16 | -1311.3610187713514,140
17 | -1365.0260867375184,150
18 | -1243.4885705745057,160
19 | -1244.0366648294828,170
20 | -1241.9574480293968,180
21 | -1280.8335180998288,190
22 | -1242.429338154825,200
23 | -750.119641550883,210
24 | -1257.9522315148947,220
25 | -1158.085668416221,230
26 | -1226.716938288927,240
27 | -1235.294428793677,249
28 |
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon5/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/SwingUpEnv/horizon5/learning_curve.png
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon5/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "batch_size": 256,
3 | "env": "SwingUpEnv",
4 | "horizon": 5,
5 | "learning_rate": 0.001,
6 | "max_iter": 250,
7 | "num_acts": 32,
8 | "policy_type": "cem",
9 | "render": false
10 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon5/progress.csv:
--------------------------------------------------------------------------------
1 | Iteration,Average Returns
2 | 0,-1092.2915620815575
3 | 10,-1103.5009875248982
4 | 20,-2471.439718954018
5 | 30,-2074.797792846919
6 | 40,-2424.161644445676
7 | 50,-2426.6673415803493
8 | 60,-2449.902157955784
9 | 70,-2617.3939123653045
10 | 80,-2456.9473873794736
11 | 90,-2455.0500879372407
12 | 100,-2438.936672619652
13 | 110,-2633.558000601011
14 | 120,-2517.561124215514
15 | 130,-2455.0688311597387
16 | 140,-2629.0789210394737
17 | 150,-2919.149640766505
18 | 160,-1305.1880741230243
19 | 170,-1168.2989464442921
20 | 180,-1481.954819420085
21 | 190,-760.3012598426423
22 | 200,-1276.914365026002
23 | 210,-1278.1670534512004
24 | 220,-1493.6650277140825
25 | 230,-1293.9963890328356
26 | 240,-1261.0170201625338
27 | 249,-1248.4889372240405
28 |
--------------------------------------------------------------------------------
/cs287hw1/data/part5/CartPoleEnv/modelinear_state_discretization51/log.txt:
--------------------------------------------------------------------------------
1 | Logging to C:\Users\Minjune\Desktop\287\cs287hw1\cs287-hw1-code/data/part5/CartPoleEnv/modelinear_state_discretization51/
2 |
--------------------------------------------------------------------------------
/cs287hw1/data/part5/CartPoleEnv/modelinear_state_discretization51/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "env": "CartPoleEnv",
3 | "max_iter": 150,
4 | "mode": "linear",
5 | "render": false,
6 | "state_discretization": 51
7 | }
--------------------------------------------------------------------------------
/cs287hw1/data/part5/CartPoleEnv/modelinear_state_discretization51/progress.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part5/CartPoleEnv/modelinear_state_discretization51/progress.csv
--------------------------------------------------------------------------------
/cs287hw1/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from envs.cart_pole_env import CartPoleEnv
2 | from envs.mountain_hill_env import MountainCarEnv
3 | from envs.double_integrator_env import DoubleIntegratorEnv
4 | from envs.swing_up_env import SwingUpEnv
5 | from envs.grid1d_env import Grid1DEnv
6 | from envs.gridworld_env import GridWorldEnv
7 |
--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/cart_pole_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/cart_pole_env.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/double_integrator_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/double_integrator_env.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/grid1d_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/grid1d_env.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/gridworld_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/gridworld_env.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/mountain_hill_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/mountain_hill_env.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/swing_up_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/swing_up_env.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/part1/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part1/__init__.py
--------------------------------------------------------------------------------
/cs287hw1/part1/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part1/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/part1/__pycache__/tabular_value_iteration.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part1/__pycache__/tabular_value_iteration.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/part1/run_part1.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import logger
4 | import json
5 |
6 |
7 | def main(args):
8 | render = args.render
9 | if not render:
10 | import matplotlib
11 | matplotlib.use('Agg')
12 | import matplotlib.pyplot as plt
13 | from utils.utils import TabularPolicy, TabularValueFun
14 | from part1.tabular_value_iteration import ValueIteration
15 | from envs import Grid1DEnv, GridWorldEnv
16 | envs = [GridWorldEnv(seed=0), GridWorldEnv(seed=1)]
17 |
18 | for env in envs:
19 | env_name = env.__name__
20 | exp_dir = os.getcwd() + '/data/part1/%s/policy_type%s_temperature%s/' % (env_name, args.policy_type, args.temperature)
21 | logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
22 | args_dict = vars(args)
23 | args_dict['env'] = env_name
24 | json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True)
25 |
26 | policy = TabularPolicy(env)
27 | value_fun = TabularValueFun(env)
28 | algo = ValueIteration(env,
29 | value_fun,
30 | policy,
31 | policy_type=args.policy_type,
32 | render=render,
33 | temperature=args.temperature)
34 | algo.train()
35 |
36 |
37 | if __name__ == "__main__":
38 | parser = argparse.ArgumentParser()
39 | parser.add_argument("--policy_type", "-p", type=str, default='deterministic', choices=["deterministic", "max_ent"],
40 | help="Whether to train a deterministic policy or a maximum entropy one")
41 | parser.add_argument("--render", "-r", action='store_true', help="Vizualize the policy and contours when training")
42 | parser.add_argument("--temperature", "-t", type=float, default=1.,
43 | help="Temperature parameter for maximum entropy policies")
44 | args = parser.parse_args()
45 | main(args)
46 |
--------------------------------------------------------------------------------
/cs287hw1/part2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part2/__init__.py
--------------------------------------------------------------------------------
/cs287hw1/part2/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part2/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/part2/__pycache__/discretize.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part2/__pycache__/discretize.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/part2/__pycache__/look_ahead_policy.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part2/__pycache__/look_ahead_policy.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/part2/look_ahead_policy.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import spaces
3 |
4 |
5 | class LookAheadPolicy(object):
6 | """
7 | Look ahead policy
8 |
9 | -- VARIABLES/FUNCTIONS YOU WILL NEED TO USE --
10 | * self.horizon (int): Horizon for the look ahead policy
11 |
12 | * act_dim (int): Dimension of the state space
13 |
14 | * value_fun (TabularValueFun):
15 | - get_values(states): if states is None returns the values of all the states. Otherwise, it returns the
16 | values of the specified states
17 | * env (Env):
18 | - vec_set_state(states): vectorized (multiple environments in parallel) version of reseting the
19 | environment to a state for a batch of states.
20 | - vec_step(actions): vectorized (multiple environments in parallel) version of stepping through the
21 | environment for a batch of actions. Returns the next observations, rewards, dones signals, env infos
22 | (last not used).
23 | """
24 | def __init__(self,
25 | env,
26 | value_fun,
27 | horizon,
28 | ):
29 | self.env = env
30 | self.discount = env.discount
31 | self._value_fun = value_fun
32 | self.horizon = horizon
33 |
34 | def get_action(self, state):
35 | """
36 | Get the best action by doing look ahead, covering actions for the specified horizon.
37 | HINT: use np.meshgrid to compute all the possible action sequences.
38 | :param state:
39 | :return: best_action (int)
40 | """
41 | assert isinstance(self.env.action_space, spaces.Discrete)
42 | act_dim = self.env.action_space.n
43 | """ INSERT YOUR CODE HERE"""
44 | actions = np.arange(act_dim)
45 | sequences = np.array(np.meshgrid(*np.tile(np.arange(act_dim),
46 | (self.horizon, 1)))).T.reshape(-1, self.horizon).T
47 | return sequences[0, np.argmax(self.get_returns(state, sequences))]
48 |
49 | def get_returns(self, state, actions):
50 | """
51 | :param state: current state of the policy
52 | :param actions: array of actions of shape [horizon, num_acts]
53 | :return: returns for the specified horizon + self.discount ^ H value_fun
54 | HINT: Make sure to take the discounting and done into acount!
55 | """
56 | assert self.env.vectorized
57 | """ INSERT YOUR CODE HERE"""
58 | num_acts = actions.shape[1]
59 | returns = np.zeros(num_acts)
60 | # self.env.set_state(state)
61 | # if len(actions.shape) < 3:
62 | # self.env.vec_set_state(np.full(num_acts, state))
63 | # else:
64 | self.env.vec_set_state(np.tile(state, (num_acts, 1)))
65 | for h in range(self.horizon):
66 | observations, rewards, dones, env_infos = self.env.vec_step(actions[h])
67 | self.env.vec_set_state(observations)
68 | returns += self.discount ** h * rewards
69 | returns += self.discount ** self.horizon * self._value_fun.get_values(observations)
70 | return returns
71 |
72 | def update(self, actions):
73 | pass
74 |
--------------------------------------------------------------------------------
/cs287hw1/part2/run_part2_c.py:
--------------------------------------------------------------------------------
1 | import logger
2 | import argparse
3 | import os
4 | import json
5 | import numpy as np; np.random.seed(0)
6 |
7 |
8 | def main(args):
9 | render = args.render
10 | if not render:
11 | import matplotlib
12 | matplotlib.use('Agg')
13 | import matplotlib.pyplot as plt
14 | from envs import DoubleIntegratorEnv, MountainCarEnv, CartPoleEnv, SwingUpEnv
15 | from utils.utils import TabularPolicy, TabularValueFun
16 | from part1.tabular_value_iteration import ValueIteration
17 | from part2.look_ahead_policy import LookAheadPolicy
18 | from part2.discretize import Discretize
19 | envs = [DoubleIntegratorEnv(), MountainCarEnv(), CartPoleEnv(), SwingUpEnv()]
20 |
21 | for env in envs:
22 | env_name = env.__class__.__name__
23 | state_discretization = 151 if env_name in ['MountainCarEnv', 'DoubleIntegratorEnv'] else 21
24 | exp_dir = os.getcwd() + '/data/part2_c/%s/%s' % (env_name, args.mode)
25 | logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
26 | args_dict = vars(args)
27 | args_dict['env'] = env_name
28 | json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True)
29 |
30 | env = Discretize(env,
31 | state_discretization=state_discretization,
32 | mode=args.mode
33 | )
34 | value_fun = TabularValueFun(env)
35 | if args.policy_type == 'tabular':
36 | policy = TabularPolicy(env)
37 | elif args.policy_type == 'look_ahead':
38 | policy = LookAheadPolicy(env, value_fun, args.horizon)
39 | else:
40 | raise NotImplementedError
41 | algo = ValueIteration(env,
42 | value_fun,
43 | policy,
44 | render=render,
45 | max_itr=args.max_iter,
46 | num_rollouts=1,
47 | render_itr=5,
48 | log_itr=5)
49 | algo.train()
50 |
51 |
52 | if __name__ == "__main__":
53 | parser = argparse.ArgumentParser()
54 | parser.add_argument("--render", "-r", action='store_true',
55 | help="Vizualize the policy and contours when training")
56 | parser.add_argument("--action_discretization", "-a", type=int, default=5,
57 | help="Number of points per state dimension to discretize")
58 | parser.add_argument("--mode", "-m", type=str, default='nn', choices=['nn', 'linear'],
59 | help="Mode of interpolate between discrete points")
60 | parser.add_argument("--policy_type", "-p", type=str, default='tabular', choices=['tabular', 'look_ahead'],
61 | help='Type of policy to use. Whether to use look ahead policy or tabular')
62 | parser.add_argument("--horizon", "-H", type=int, default=1,
63 | help='Planning horizon for the look ahead policy')
64 | parser.add_argument("--max_iter", "-i", type=int, default=150,
65 | help='Maximum number of iterations for the value iteration algorithm')
66 | args = parser.parse_args()
67 | main(args)
68 |
--------------------------------------------------------------------------------
/cs287hw1/part3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part3/__init__.py
--------------------------------------------------------------------------------
/cs287hw1/part3/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part3/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/part3/__pycache__/continous_value_iteration.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part3/__pycache__/continous_value_iteration.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/part3/__pycache__/look_ahead_policy.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part3/__pycache__/look_ahead_policy.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/part3/run_part3_a.py:
--------------------------------------------------------------------------------
1 | import logger
2 | import argparse
3 | import os
4 | import json
5 | import numpy as np; np.random.seed(0)
6 |
7 |
8 | def main(args):
9 | render = args.render
10 | if not render:
11 | import matplotlib
12 | matplotlib.use('Agg')
13 | import matplotlib.pyplot as plt
14 | from envs import DoubleIntegratorEnv, MountainCarEnv, CartPoleEnv, SwingUpEnv
15 | from utils.utils import VectorizeMujocoEnv
16 | from part3.look_ahead_policy import LookAheadPolicy
17 | from utils.value_functions import MLPValueFun
18 | from part3.continous_value_iteration import ContinousStateValueIteration
19 | envs = [DoubleIntegratorEnv(), MountainCarEnv()]
20 |
21 | for env in envs:
22 | env_name = env.__class__.__name__
23 | exp_dir = os.getcwd() + '/data/part3_a/%s' % (env_name)
24 | logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
25 | args_dict = vars(args)
26 | args_dict['env'] = env_name
27 | json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True)
28 |
29 | value_fun = MLPValueFun(env)
30 | policy = LookAheadPolicy(env,
31 | value_fun,
32 | horizon=args.horizon,
33 | look_ahead_type=args.policy_type,
34 | num_acts=args.num_acts)
35 | algo = ContinousStateValueIteration(env,
36 | value_fun,
37 | policy,
38 | learning_rate=args.learning_rate,
39 | batch_size=args.batch_size,
40 | num_acts=args.num_acts,
41 | render=args.render,
42 | max_itr=args.max_iter,
43 | log_itr=5)
44 | algo.train()
45 |
46 |
47 | if __name__ == "__main__":
48 | parser = argparse.ArgumentParser()
49 | parser.add_argument("--render", "-r", action='store_true',
50 | help="Vizualize the policy and contours when training")
51 | parser.add_argument("--policy_type", "-p", type=str, default='rs', choices=['cem', 'rs'],
52 | help='Type of policy to use. Whether to use look ahead with cross-entropy \
53 | method or random shooting')
54 | parser.add_argument("--horizon", "-H", type=int, default=1,
55 | help='Planning horizon for the look ahead policy')
56 | parser.add_argument("--max_iter", "-i", type=int, default=250,
57 | help='Maximum number of iterations for the value iteration algorithm')
58 | parser.add_argument("--learning_rate", "-lr", type=float, default=1e-3,
59 | help='Learning rate for training the value function')
60 | parser.add_argument("--batch_size", "-bs", type=int, default=256,
61 | help='batch size for training the value function')
62 | parser.add_argument("--num_acts", "-a", type=int, default=10,
63 | help='Number of actions sampled for maximizing the value function')
64 | args = parser.parse_args()
65 | main(args)
--------------------------------------------------------------------------------
/cs287hw1/part4/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part4/__init__.py
--------------------------------------------------------------------------------
/cs287hw1/part4/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part4/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/part4/__pycache__/discretize.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part4/__pycache__/discretize.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/part4/run_part4.py:
--------------------------------------------------------------------------------
1 | import logger
2 | import argparse
3 | import os
4 | import json
5 | import numpy as np; np.random.seed(0)
6 |
7 |
8 | def main(args):
9 | render = args.render
10 | if not render:
11 | import matplotlib
12 | matplotlib.use('Agg')
13 | import matplotlib.pyplot as plt
14 | from envs import CartPoleEnv, SwingUpEnv
15 | from utils.utils import TabularPolicy, TabularValueFun
16 | from part1.tabular_value_iteration import ValueIteration
17 | from part4.discretize import Discretize
18 | envs = [CartPoleEnv(), SwingUpEnv()]
19 |
20 | for env in envs:
21 | env_name = env.__class__.__name__
22 | exp_dir = os.getcwd() + '/data/part5/%s/mode%s_state_discretization%s/' % (env_name,
23 | args.mode,
24 | str(args.state_discretization)
25 | )
26 | logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
27 | args_dict = vars(args)
28 | args_dict['env'] = env_name
29 | json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True)
30 |
31 | env = Discretize(env,
32 | state_discretization=args.state_discretization,
33 | mode=args.mode
34 | )
35 | value_fun = TabularValueFun(env)
36 | policy = TabularPolicy(env)
37 | algo = ValueIteration(env,
38 | value_fun,
39 | policy,
40 | render=render,
41 | max_itr=args.max_iter,
42 | num_rollouts=1,
43 | render_itr=5,
44 | log_itr=5)
45 | algo.train()
46 |
47 |
48 | if __name__ == "__main__":
49 | parser = argparse.ArgumentParser()
50 | parser.add_argument("--render", "-r", action='store_true',
51 | help="Vizualize the policy and contours when training")
52 | parser.add_argument("--state_discretization", "-s", type=int, default=21,
53 | help="Number of points per state dimension to discretize")
54 | parser.add_argument("--mode", "-m", type=str, default='nn', choices=['nn', 'linear'],
55 | help="Mode of interpolate between discrete points")
56 | parser.add_argument("--max_iter", "-i", type=int, default=150,
57 | help='Maximum number of iterations for the value iteration algorithm')
58 | args = parser.parse_args()
59 | main(args)
60 |
--------------------------------------------------------------------------------
/cs287hw1/requirements.txt:
--------------------------------------------------------------------------------
1 | autograd
2 | gym
3 | joblib
4 | matplotlib
5 | moviepy
6 | numpy
7 | Flask==1.0.2
8 | plotly==3.2.0
9 |
--------------------------------------------------------------------------------
/cs287hw1/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__init__.py
--------------------------------------------------------------------------------
/cs287hw1/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/utils/__pycache__/plot.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__pycache__/plot.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/utils/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__pycache__/utils.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/utils/__pycache__/value_functions.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__pycache__/value_functions.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/utils/value_functions.py:
--------------------------------------------------------------------------------
1 | import autograd.numpy as np
2 | from collections import OrderedDict
3 |
4 |
5 | class MLPValueFun(object):
6 | _activations = {
7 | 'tanh': np.tanh,
8 | None: lambda x: x,
9 | 'relu': lambda x: np.maximum(x, 0)
10 | }
11 |
12 | def __init__(self, env, hidden_sizes=(256, 256), activation='relu'):
13 | self._env = env
14 | self._params = dict()
15 | self._build(hidden_sizes, activation)
16 |
17 | def _build(self, hidden_sizes=(256, 256), activation='relu', *args, **kwargs):
18 | self._activation = self._activations[activation]
19 | self._hidden_sizes = hidden_sizes
20 | prev_size = self._env.observation_space.shape[0]
21 | for i, hidden_size in enumerate(hidden_sizes):
22 | W = np.random.normal(loc=0, scale=1/prev_size, size=(hidden_size, prev_size))
23 | b = np.zeros((hidden_size,))
24 |
25 | self._params['W_%d' % i] = W
26 | self._params['b_%d' % i] = b
27 |
28 | prev_size = hidden_size
29 |
30 | W = np.random.normal(loc=0, scale=1/prev_size, size=(1, prev_size))
31 | b = np.zeros((1,))
32 | self._params['W_out'] = W
33 | self._params['b_out'] = b
34 |
35 | def get_values(self, states, params=None):
36 | params = self._params if params is None else params
37 | x = states
38 | for i, hidden_size in enumerate(self._hidden_sizes):
39 | x = np.dot(params['W_%d' % i], x.T).T + params['b_%d' % i]
40 | x = self._activation(x)
41 | values = np.dot(params['W_out'], x.T).T + params['b_out']
42 | return values[:, 0]
43 |
44 | def update(self, params):
45 | assert set(params.keys()) == set(self._params.keys())
46 | self._params = params
47 |
48 |
--------------------------------------------------------------------------------
/cs287hw1/viskit/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'dementrock'
2 |
--------------------------------------------------------------------------------
/cs287hw1/viskit/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/viskit/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw1/viskit/__pycache__/core.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/viskit/__pycache__/core.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw2/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/.DS_Store
--------------------------------------------------------------------------------
/cs287hw2/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 2
6 | }
7 |
--------------------------------------------------------------------------------
/cs287hw2/.ipynb_checkpoints/Untitled1-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 2
6 | }
7 |
--------------------------------------------------------------------------------
/cs287hw2/.ipynb_checkpoints/Untitled3-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 2
6 | }
7 |
--------------------------------------------------------------------------------
/cs287hw2/__pycache__/rot_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/__pycache__/rot_utils.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw2/__pycache__/simulators.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/__pycache__/simulators.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw2/cs287hw2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/cs287hw2.pdf
--------------------------------------------------------------------------------
/cs287hw2/environment.yml:
--------------------------------------------------------------------------------
1 | name: cs287hw2
2 | channels:
3 | - defaults
4 | - conda-forge
5 | dependencies:
6 | - python=3.7.3
7 | - jupyter
8 | - patchelf=0.9 # comment this line out on Mac
9 | - pip>=19.1
10 | - pip:
11 | - -r ./requirements.txt
12 |
--------------------------------------------------------------------------------
/cs287hw2/envs/__pycache__/cheetah_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/envs/__pycache__/cheetah_env.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw2/envs/__pycache__/hopper_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/envs/__pycache__/hopper_env.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw2/envs/cheetah_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import copy
3 | from gym import utils
4 | from gym.envs.mujoco import mujoco_env
5 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
6 | import os
7 |
8 | class CheetahModEnv(HalfCheetahEnv, mujoco_env.MujocoEnv, utils.EzPickle):
9 | def __init__(self):
10 |
11 | self.perturb_joints = True
12 | self.count = 0
13 | mujoco_env.MujocoEnv.__init__(self, "half_cheetah.xml", 4)
14 | utils.EzPickle.__init__(self)
15 | self.perturb_idx=0
16 | self.init_geom_rgba = self.model.geom_rgba.copy()
17 |
18 |
19 | def f_sim(self, x0, u, dt, rollout=False, perturb=.01):
20 | nq, nv = self.model.nq, self.model.nv
21 | self.sim.reset()
22 | qpos = copy.deepcopy(self.init_qpos)
23 | qvel = copy.deepcopy(self.init_qvel)
24 |
25 | qpos[:] = x0[:nq]
26 | qvel[:] = x0[nq:]
27 |
28 | self.set_state(qpos, qvel)
29 | if rollout:
30 | self.step(u, perturb=perturb)
31 | else:
32 | self.perturb_joints = False
33 | self.step(u)
34 | self.perturb_joints = True
35 | return np.concatenate([
36 | self.sim.data.qpos.flat[:],
37 | self.sim.data.qvel.flat[:]
38 | ])
39 |
40 | def step(self, a, perturb=.01):
41 | self.count += 1
42 | if self.perturb_joints and self.count%5==0:
43 | self.perturb_idx = np.random.randint(0,6)
44 | a[self.perturb_idx] += np.random.choice(np.array([-1*perturb,perturb]))
45 | model_id = self.model.geom_names.index(self.model.joint_names[self.perturb_idx+3])
46 | geom_rgba = self.init_geom_rgba.copy()
47 | geom_rgba[model_id] = [0, 1, 1 ,1]
48 | self.model.geom_rgba[:] = geom_rgba
49 | else:
50 | if self.count > 1 and self.count%8==0:
51 | model_id = self.model.geom_names.index(self.model.joint_names[self.perturb_idx+3])
52 | geom_rgba = self.init_geom_rgba.copy()
53 | self.model.geom_rgba[:] = geom_rgba
54 | xposbefore = self.sim.data.qpos[0]
55 | self.do_simulation(a, self.frame_skip)
56 | xposafter = self.sim.data.qpos[0]
57 | ob = self._get_obs()
58 | reward_ctrl = - 0.1 * np.square(a).sum()
59 | reward_run = (xposafter - xposbefore)/self.dt
60 | reward = reward_ctrl + reward_run
61 | done = False
62 | return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
63 |
--------------------------------------------------------------------------------
/cs287hw2/img/fig_a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/img/fig_a.png
--------------------------------------------------------------------------------
/cs287hw2/img/ref_a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/img/ref_a.png
--------------------------------------------------------------------------------
/cs287hw2/img/ref_b_cartpole.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/img/ref_b_cartpole.png
--------------------------------------------------------------------------------
/cs287hw2/img/ref_b_heli.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/img/ref_b_heli.png
--------------------------------------------------------------------------------
/cs287hw2/mats/cartpole_traj.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/cartpole_traj.mat
--------------------------------------------------------------------------------
/cs287hw2/mats/heli_traj.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/heli_traj.mat
--------------------------------------------------------------------------------
/cs287hw2/mats/p_a_w.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/p_a_w.mat
--------------------------------------------------------------------------------
/cs287hw2/mats/p_b_w.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/p_b_w.mat
--------------------------------------------------------------------------------
/cs287hw2/mats/p_c_heli_starting_states.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/p_c_heli_starting_states.mat
--------------------------------------------------------------------------------
/cs287hw2/mats/p_c_w.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/p_c_w.mat
--------------------------------------------------------------------------------
/cs287hw2/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy==1.1.0
2 | moviepy==1.0.0
3 | seaborn==0.9.0
4 | matplotlib==3.0.2
5 | mujoco_py>=1.50.1.56
6 | #mujoco_py==2.0.2.2
7 | #numpy==1.15.4
8 | numpy==1.16.1
9 | gym==0.12.5
10 |
--------------------------------------------------------------------------------
/cs287hw2/vids/visualization_hopper.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/vids/visualization_hopper.gif
--------------------------------------------------------------------------------
/cs287hw3/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/.DS_Store
--------------------------------------------------------------------------------
/cs287hw3/.idea/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | # Default ignored files
3 | /workspace.xml
--------------------------------------------------------------------------------
/cs287hw3/.idea/assignment2.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/cs287hw3/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/cs287hw3/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/cs287hw3/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/cs287hw3/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/cs287hw3/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/__pycache__/utils.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw3/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw3/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__init__.py
--------------------------------------------------------------------------------
/cs287hw3/envs/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw3/envs/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw3/envs/__pycache__/cart_pole_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/cart_pole_env.cpython-36.pyc
--------------------------------------------------------------------------------
/cs287hw3/envs/__pycache__/cart_pole_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/cart_pole_env.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw3/envs/__pycache__/cheetah_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/cheetah_env.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw3/envs/__pycache__/hopper_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/hopper_env.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw3/envs/cheetah_env.py:
--------------------------------------------------------------------------------
1 | import copy
2 | from gym import utils
3 | from gym.envs.mujoco import mujoco_env
4 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
5 |
6 |
7 | class CheetahModEnv(HalfCheetahEnv, mujoco_env.MujocoEnv, utils.EzPickle):
8 | def __init__(self):
9 |
10 | self.perturb_joints = True
11 | self.count = 0
12 | mujoco_env.MujocoEnv.__init__(self, "half_cheetah.xml", 8)
13 | utils.EzPickle.__init__(self)
14 | self.H = 30
15 | self.du = self.action_space.shape[0]
16 | self.dx = self.observation_space.shape[0]
17 |
18 | def step(self, a):
19 | self.count += 1
20 | xposbefore = self.sim.data.qpos[0]
21 | self.do_simulation(a, self.frame_skip)
22 | xposafter = self.sim.data.qpos[0]
23 | ob = self._get_obs()
24 | reward = (xposafter - xposbefore)/self.dt
25 | done = False
26 | return ob, -reward, done, dict()
27 |
28 | def set_state(self, state):
29 | nq, nv = self.model.nq, self.model.nv
30 | self.sim.reset()
31 | qpos = copy.deepcopy(self.init_qpos)
32 |
33 | qpos[1:nq] = state[:nq - 1]
34 | qvel = state[nq - 1:]
35 |
36 | mujoco_env.MujocoEnv.set_state(self, qpos, qvel)
37 |
38 | def reset_model(self):
39 | mujoco_env.MujocoEnv.set_state(self, self.init_qpos, self.init_qvel)
40 | return self._get_obs()
41 |
--------------------------------------------------------------------------------
/cs287hw3/envs/hopper_env.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from gym import utils
3 | import copy
4 | from gym.envs.mujoco import mujoco_env
5 | from gym.envs.mujoco.hopper import HopperEnv
6 |
7 |
8 | class HopperModEnv(HopperEnv, mujoco_env.MujocoEnv, utils.EzPickle):
9 | def __init__(self):
10 |
11 | self.perturb_joints = True
12 | self.components = np.array(['thigh', 'leg', 'foot'])
13 | self.affected_part = 'thigh'
14 | self.count = 0
15 | mujoco_env.MujocoEnv.__init__(self, "hopper.xml", 8)
16 | utils.EzPickle.__init__(self)
17 |
18 | self.init_geom_rgba = self.model.geom_rgba.copy()
19 | self.dx = self.observation_space.shape[0]
20 | self.du = self.action_space.shape[0]
21 | self.H = 50
22 |
23 | def step(self, a):
24 | self.count += 1
25 | posbefore = self.sim.data.qpos[0]
26 | self.do_simulation(a, self.frame_skip)
27 | posafter, height, ang = self.sim.data.qpos[0:3]
28 | reward = (posafter - posbefore) / self.dt
29 | done = False
30 | ob = self._get_obs()
31 | return ob, -reward, done, {}
32 |
33 | def set_state(self, state):
34 | nq, nv = self.model.nq, self.model.nv
35 | self.sim.reset()
36 | qpos = copy.deepcopy(self.init_qpos)
37 | qvel = copy.deepcopy(self.init_qvel)
38 |
39 | qpos[1:6] = state[:nq-1]
40 | qvel[:6] = state[nq-1:]
41 |
42 | mujoco_env.MujocoEnv.set_state(self, qpos, qvel)
43 |
44 | def _get_obs(self):
45 | return np.concatenate([
46 | self.sim.data.qpos.flat[1:6],
47 | np.clip(self.sim.data.qvel.flat, -10, 10)
48 | ])
49 |
50 | def reset_model(self):
51 |
52 | self.affected_part = self.components[np.random.randint(0,3)]
53 | self.count = 0
54 | qpos = self.init_qpos
55 | qvel = self.init_qvel
56 | mujoco_env.MujocoEnv.set_state(self, qpos, qvel)
57 |
58 | return self._get_obs()
59 |
60 | def viewer_setup(self):
61 | self.viewer.cam.trackbodyid = 2
62 | self.viewer.cam.distance = self.model.stat.extent * 0.75
63 | self.viewer.cam.lookat[2] = 1.15
64 | self.viewer.cam.elevation = -20
65 |
--------------------------------------------------------------------------------
/cs287hw3/non_linear_optimization.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/non_linear_optimization.pdf
--------------------------------------------------------------------------------
/cs287hw3/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import moviepy.editor as mpy
3 | from scipy.optimize import minimize
4 |
5 |
6 | class NNPolicy(object):
7 | def __init__(self, input_dim, output_dim, hidden_sizes):
8 | self.input_dim = input_dim
9 | self.output_dim = output_dim
10 | self.hidden_sizes = tuple(hidden_sizes)
11 | self.params = None
12 |
13 | def get_action(self, state, timestep=None):
14 | x = state
15 | params = self.params
16 | for i in range(len(self.hidden_sizes)):
17 | x = x.T @ params['W'][i] + params['b'][i]
18 | x = np.tanh(x)
19 | action = x.T @ params['W'][-1] + params['b'][-1]
20 | action = np.tanh(action)
21 | return action
22 |
23 | def set_params(self, params):
24 | sizes = (self.input_dim,) + self.hidden_sizes + (self.output_dim,)
25 | Ws, bs = [], []
26 | s_id = 0
27 | for i in range(len(self.hidden_sizes) + 1):
28 | w_shape = (sizes[i], sizes[i + 1])
29 | e_id = s_id + np.prod(w_shape)
30 | W = params[s_id:e_id].reshape(w_shape)
31 | s_id = e_id
32 | e_id = s_id + sizes[i + 1]
33 | b = params[s_id:e_id]
34 | s_id = e_id
35 | Ws.append(W)
36 | bs.append(b)
37 | self.params = dict(W=Ws, b=bs)
38 |
39 | def get_params(self):
40 | params = []
41 | for W, b in zip(self.params['W'], self.params['b']):
42 | params.extend([W.flatten(), b.flatten()])
43 | return np.concatenate(params)
44 |
45 | def init_params(self):
46 | sizes = (self.input_dim,) + self.hidden_sizes + (self.output_dim,)
47 | Ws, bs = [], []
48 | for i in range(len(self.hidden_sizes) + 1):
49 | W = np.random.uniform(size=(sizes[i], sizes[i + 1]))/np.sqrt(sizes[i] + sizes[i+1])
50 | b = np.zeros(shape=sizes[i + 1])
51 | Ws.append(W)
52 | bs.append(b)
53 | self.params = dict(W=Ws, b=bs)
54 | return dict(W=Ws, b=bs)
55 |
56 |
57 | class ActPolicy(object):
58 | def __init__(self, env, actions):
59 | self._actions = actions.reshape(env.H, env.du)
60 | self.t = 0
61 |
62 | def get_action(self, state, timestep=None):
63 | act = self._actions[self.t]
64 | self.t = (self.t + 1) % len(self._actions)
65 | return act
66 |
67 | def reset(self):
68 | self.t = 0
69 |
70 |
71 | def rollout(env, policy, noise=0., render=False):
72 | np.random.seed(0)
73 | s = env.reset()
74 | states = []
75 | imgs = []
76 | cost = 0
77 | for t in range(env.H):
78 | act = policy.get_action(s, t) + np.random.normal(0, scale=noise, size=(env.du,))
79 | s, c, d, _ = env.step(act)
80 | if render:imgs.append(env.render('rgb_array'))
81 | states.append(s)
82 | cost += c
83 | if d: break
84 | if render:
85 | clip = mpy.ImageSequenceClip(imgs, fps=8)
86 | clip.write_gif('./rollout.gif', verbose=False)
87 |
88 | return cost, states
89 |
90 |
--------------------------------------------------------------------------------
/cs287hw3/vids/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/vids/.DS_Store
--------------------------------------------------------------------------------
/cs287hw3/vids/rollout.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/vids/rollout.gif
--------------------------------------------------------------------------------
/cs287hw4/__MACOSX/._hw4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/__MACOSX/._hw4.pdf
--------------------------------------------------------------------------------
/cs287hw4/hw4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/hw4.pdf
--------------------------------------------------------------------------------
/cs287hw4/hw4_rubric.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/hw4_rubric.pdf
--------------------------------------------------------------------------------
/cs287hw4/p3_a_data_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p3_a_data_1.npy
--------------------------------------------------------------------------------
/cs287hw4/p3_a_data_2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p3_a_data_2.npy
--------------------------------------------------------------------------------
/cs287hw4/p3_a_data_3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p3_a_data_3.npy
--------------------------------------------------------------------------------
/cs287hw4/p3_a_data_4.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p3_a_data_4.npy
--------------------------------------------------------------------------------
/cs287hw4/p6_data_0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p6_data_0.npy
--------------------------------------------------------------------------------
/cs287hw4/p6_data_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p6_data_1.npy
--------------------------------------------------------------------------------
/cs287hw4/p6_data_2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p6_data_2.npy
--------------------------------------------------------------------------------
/cs287hw4/p6_data_3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p6_data_3.npy
--------------------------------------------------------------------------------
/cs287hw5/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/.DS_Store
--------------------------------------------------------------------------------
/cs287hw5/.idea/hw5_nov12.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/cs287hw5/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/cs287hw5/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/cs287hw5/hw5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5.pdf
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup.zip
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/.DS_Store
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/baseline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/baseline.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/clipper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/clipper.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/entropy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/entropy.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/gae.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/gae.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/mbppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/mbppo.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/meppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/meppo.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/newplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/newplot.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/pg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/pg.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/pg_cheetah.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/pg_cheetah.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_baseline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_baseline.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_clipper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_clipper.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_entropy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_entropy.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_gae.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_gae.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_mbppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_mbppo.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_meppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_meppo.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_pg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_pg.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_ppo_obj.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_ppo_obj.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_sac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_sac.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ppo_obj.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ppo_obj.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/sac_cheetah.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/sac_cheetah.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Ant_3A_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Ant_3A_12.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Ant_3A_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Ant_3A_3.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Cheetah_2A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_2A.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Cheetah_2B_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_2B_1.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Cheetah_2B_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_2B_2.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Cheetah_2C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_2C.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Cheetah_3A_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_3A_12.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Cheetah_3A_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_3A_3.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1A.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1B.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1C.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1D.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1D.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1E.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1E.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1F.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1F.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1G.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1G.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1A.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1B.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1C.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1D.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1D.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1E.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1E.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1F.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1F.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1G.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1G.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_2A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_2A.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_2B_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_2B_1.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_2B_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_2B_2.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_2C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_2C.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_3A_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_3A_12.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_3A_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_3A_3.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1A.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1B.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1C.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1D.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1D.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1E.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1E.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1F.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1F.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1G.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1G.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_2A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_2A.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_2B_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_2B_1.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_2B_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_2B_2.png
--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_2C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_2C.png
--------------------------------------------------------------------------------
/cs287hw5/sac/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/.DS_Store
--------------------------------------------------------------------------------
/cs287hw5/sac/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/cs287hw5/sac/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/cs287hw5/sac/.idea/sac.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/cs287hw5/sac/README.md:
--------------------------------------------------------------------------------
1 | # CS294-112 HW 5b: Soft Actor Critic
2 | Original code from Tuomas Haarnoja, Soroush Nasiriany, and Aurick Zhou for CS294-112 Fall 2018
3 |
4 | Dependencies:
5 | * Python **3.4.5**
6 | * Numpy version **1.15.2**
7 | * TensorFlow version **1.10.0**
8 | * tensorflow-probability version **0.4.0**
9 | * OpenAI Gym version **0.10.8**
10 | * MuJoCo version **1.50** and mujoco-py **1.50.1.59**
11 | * seaborn version **0.9.0**
12 |
13 | You will implement `sac.py`, and `nn.py`.
14 |
15 | See the [HW5 PDF](http://rail.eecs.berkeley.edu/deeprlcourse/static/homeworks/hw5b.pdf) for further instructions.
16 |
17 |
18 | Instructions for Running the Code
19 |
20 | OS Requirement
21 | * Ubuntu 16.04 LTS
22 |
23 | Dependencies:
24 | * Anaconda
25 |
26 | All the specific python packages are listed in `environment.yml`
27 |
28 |
29 | Instructions
30 | * Install all the environment dependencies by running `./project_setup.bash setup`
31 | * Load the dependencies with `source project_setup.bash`
32 | * Run `run_all.sh` to run all the experiments.
33 | * Run `generate_plots.sh` to generate all the plots, plots are located in the plots directory.
34 |
--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/logz.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/logz.cpython-35.pyc
--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/logz.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/logz.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/nn.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/nn.cpython-35.pyc
--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/nn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/nn.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/sac.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/sac.cpython-35.pyc
--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/sac.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/sac.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/utils.cpython-35.pyc
--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/cs287hw5/sac/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/data/.DS_Store
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reinf_02-12-2019_16-46-48/1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.1,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": false,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "Ant-v2",
14 | "exp_name": "reinf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reinf_02-12-2019_16-46-48/11/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.1,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": false,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "Ant-v2",
14 | "exp_name": "reinf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reinf_02-12-2019_16-46-48/21/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.1,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": false,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "Ant-v2",
14 | "exp_name": "reinf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reparam_02-12-2019_16-47-03/1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.1,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "Ant-v2",
14 | "exp_name": "reparam",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reparam_02-12-2019_16-47-03/11/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.1,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "Ant-v2",
14 | "exp_name": "reparam",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reparam_02-12-2019_16-47-03/21/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.1,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "Ant-v2",
14 | "exp_name": "reparam",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reparam_2qf_02-12-2019_16-47-25/1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.1,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": true
12 | },
13 | "env_name": "Ant-v2",
14 | "exp_name": "reparam_2qf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reparam_2qf_02-12-2019_16-47-25/11/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.1,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": true
12 | },
13 | "env_name": "Ant-v2",
14 | "exp_name": "reparam_2qf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reparam_2qf_02-12-2019_16-47-25/21/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.1,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": true
12 | },
13 | "env_name": "Ant-v2",
14 | "exp_name": "reparam_2qf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reinf_02-12-2019_11-48-53/1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": false,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "HalfCheetah-v2",
14 | "exp_name": "reinf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reinf_02-12-2019_11-48-53/11/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": false,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "HalfCheetah-v2",
14 | "exp_name": "reinf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reinf_02-12-2019_11-48-53/21/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": false,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "HalfCheetah-v2",
14 | "exp_name": "reinf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_02-12-2019_12-05-49/1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "HalfCheetah-v2",
14 | "exp_name": "reparam",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_02-12-2019_12-05-49/11/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "HalfCheetah-v2",
14 | "exp_name": "reparam",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_02-12-2019_12-05-49/21/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "HalfCheetah-v2",
14 | "exp_name": "reparam",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_2qf_02-12-2019_12-34-24/1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": true
12 | },
13 | "env_name": "HalfCheetah-v2",
14 | "exp_name": "reparam_2qf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_2qf_02-12-2019_12-34-24/11/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": true
12 | },
13 | "env_name": "HalfCheetah-v2",
14 | "exp_name": "reparam_2qf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_2qf_02-12-2019_12-34-24/21/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": true
12 | },
13 | "env_name": "HalfCheetah-v2",
14 | "exp_name": "reparam_2qf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reinf_02-12-2019_21-02-20/1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": false,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "Hopper-v2",
14 | "exp_name": "reinf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reinf_02-12-2019_21-02-20/11/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": false,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "Hopper-v2",
14 | "exp_name": "reinf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reinf_02-12-2019_21-02-20/21/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": false,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "Hopper-v2",
14 | "exp_name": "reinf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reparam_02-12-2019_21-02-26/1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "Hopper-v2",
14 | "exp_name": "reparam",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reparam_02-12-2019_21-02-26/11/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "Hopper-v2",
14 | "exp_name": "reparam",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reparam_02-12-2019_21-02-26/21/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": false
12 | },
13 | "env_name": "Hopper-v2",
14 | "exp_name": "reparam",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reparam_2qf_02-12-2019_21-02-37/1/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": true
12 | },
13 | "env_name": "Hopper-v2",
14 | "exp_name": "reparam_2qf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reparam_2qf_02-12-2019_21-02-37/11/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": true
12 | },
13 | "env_name": "Hopper-v2",
14 | "exp_name": "reparam_2qf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reparam_2qf_02-12-2019_21-02-37/21/params.json:
--------------------------------------------------------------------------------
1 | {
2 | "algorithm_params": {
3 | "alpha": 0.2,
4 | "batch_size": 256,
5 | "discount": 0.99,
6 | "epoch_length": 1000,
7 | "learning_rate": 0.001,
8 | "n_epochs": 500,
9 | "reparameterize": true,
10 | "tau": 0.01,
11 | "two_qf": true
12 | },
13 | "env_name": "Hopper-v2",
14 | "exp_name": "reparam_2qf",
15 | "policy_params": {
16 | "hidden_layer_sizes": [
17 | 128,
18 | 128
19 | ]
20 | },
21 | "q_function_params": {
22 | "hidden_layer_sizes": [
23 | 128,
24 | 128
25 | ]
26 | },
27 | "replay_pool_params": {
28 | "max_size": 1000000.0
29 | },
30 | "sampler_params": {
31 | "max_episode_length": 1000,
32 | "prefill_steps": 1000
33 | },
34 | "value_function_params": {
35 | "hidden_layer_sizes": [
36 | 128,
37 | 128
38 | ]
39 | }
40 | }
--------------------------------------------------------------------------------
/cs287hw5/sac/environment.yml:
--------------------------------------------------------------------------------
1 | name: cs294drl_hw5_sac
2 | # dependencies:
3 | # - python==3.4.5
4 | # - pip:
5 | # - gym==0.10.8
6 | # - numpy==1.15.2
7 | # - tensorflow-gpu==1.10.0
8 | # - tensorflow-probability==0.4.0
9 | # - mujoco-py==1.50.1.56
10 | # - seaborn==0.9.0
11 | dependencies:
12 | - python=3.5
13 | - numpy=1.14.5
14 | - pandas
15 | - scipy
16 | - matplotlib
17 | - seaborn
18 | - scikit-learn
19 | - jupyter
20 | - patchelf
21 | - pip:
22 | - Cython
23 | - https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.13.1-cp35-cp35m-linux_x86_64.whl
24 | - mujoco-py==1.50.1.56
25 | - box2d==2.3.2
26 | - opencv-python
27 | - gym[atari]==0.10.5
28 | - tensorflow-probability==0.6.0
29 |
--------------------------------------------------------------------------------
/cs287hw5/sac/generate_plots.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 |
3 | function filter_experiment_dirs {
4 | ls data | grep -e $1 | sed -e 's/^/data\//'
5 | }
6 |
7 | function filter_experiment_config {
8 | ls data | grep -e $1 | sed -e "s/$2.*//"
9 | }
10 |
11 |
12 | mkdir -p plots
13 |
14 | python myplot.py \
15 | --legend $(filter_experiment_config 'sac_HalfCheetah' '\d{2}-\d{2}-\d{4}') \
16 | --title 'HalfCheetah SAC' \
17 | --output plots/HalfCheetah_SAC.png \
18 | $(filter_experiment_dirs 'sac_HalfCheetah')
19 |
20 | python myplot.py \
21 | --legend $(filter_experiment_config 'sac_Ant' '\d{2}-\d{2}-\d{4}') \
22 | --title 'Ant SAC' \
23 | --output plots/Ant_SAC.png \
24 | $(filter_experiment_dirs 'sac_Ant')
25 |
--------------------------------------------------------------------------------
/cs287hw5/sac/project_setup.bash:
--------------------------------------------------------------------------------
1 | # Project setup script
2 | # Source this file to set up the environment for this project.
3 |
4 |
5 | ENV_NAME='cs294drl_hw5_sac'
6 |
7 | if [ "$1" = "setup" ]; then
8 | echo "Creating conda environment..."
9 | conda env create -f environment.yml
10 | elif [ "$1" = "remove" ]; then
11 | conda remove --name $ENV_NAME --all --yes
12 | else
13 |
14 | export PROJECT_HOME="$(pwd)"
15 |
16 | alias ph="cd $PROJECT_HOME"
17 |
18 |
19 | alias set_display="export DISPLAY=':0.0'"
20 | alias unset_display="unset DISPLAY"
21 |
22 | export MPLBACKEND='Agg'
23 |
24 | source activate $ENV_NAME
25 |
26 | export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/young/.mujoco/mjpro150/bin"
27 | fi
28 |
--------------------------------------------------------------------------------
/cs287hw5/sac/run_all.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 |
3 | python train_mujoco.py --env_name HalfCheetah-v2 --exp_name reinf -e 3
4 |
5 | python train_mujoco.py --env_name HalfCheetah-v2 --exp_name reparam -e 3 --reparameterize
6 |
7 | python train_mujoco.py --env_name HalfCheetah-v2 --exp_name reparam_2qf -e 3 --reparameterize --two_qf
8 |
9 |
10 | python train_mujoco.py --env_name Ant-v2 --exp_name reinf -e 3
11 |
12 | python train_mujoco.py --env_name Ant-v2 --exp_name reparam -e 3 --reparameterize
13 |
14 | python train_mujoco.py --env_name Ant-v2 --exp_name reparam_2qf -e 3 --reparameterize --two_qf
15 |
16 |
17 | python train_mujoco.py --env_name Hopper-v2 --exp_name reinf -e 3
18 |
19 | python train_mujoco.py --env_name Hopper-v2 --exp_name reparam -e 3 --reparameterize
20 |
21 | python train_mujoco.py --env_name Hopper-v2 --exp_name reparam_2qf -e 3 --reparameterize --two_qf
--------------------------------------------------------------------------------