├── README.md
├── cs287hw1
    ├── .idea
    │   ├── cs287-hw1-code.iml
    │   ├── inspectionProfiles
    │   │   └── profiles_settings.xml
    │   ├── misc.xml
    │   ├── modules.xml
    │   └── workspace.xml
    ├── .vs
    │   ├── VSWorkspaceState.json
    │   ├── cs287-hw1-code
    │   │   └── v15
    │   │   │   └── .suo
    │   └── slnx.sqlite
    ├── README.md
    ├── __pycache__
    │   ├── logger.cpython-36.pyc
    │   └── logger.cpython-37.pyc
    ├── data
    │   ├── part1
    │   │   ├── GridWorldEnv0
    │   │   │   ├── policy_typedeterministic_temperature1.0
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   ├── policy_typemax_ent_temperature0.01
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   ├── policy_typemax_ent_temperature1.0
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   └── policy_typemax_ent_temperature1e-05
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   └── GridWorldEnv1
    │   │   │   ├── policy_typedeterministic_temperature1.0
    │   │   │       ├── contour.png
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   │   │   ├── policy_typemax_ent_temperature0.01
    │   │   │       ├── contour.png
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   │   │   ├── policy_typemax_ent_temperature1.0
    │   │   │       ├── contour.png
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   │   │   └── policy_typemax_ent_temperature1e-05
    │   │   │       ├── contour.png
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   ├── part2_ab
    │   │   ├── DoubleIntegratorEnv
    │   │   │   ├── modelinear_state_discretization151
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   ├── modelinear_state_discretization21
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   ├── modelinear_state_discretization51
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   ├── modenn_state_discretization151
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   ├── modenn_state_discretization21
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   └── modenn_state_discretization51
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   └── MountainCarEnv
    │   │   │   ├── modelinear_state_discretization151
    │   │   │       ├── contour.png
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   │   │   ├── modelinear_state_discretization21
    │   │   │       ├── contour.png
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   │   │   ├── modelinear_state_discretization51
    │   │   │       ├── contour.png
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   │   │   ├── modenn_state_discretization151
    │   │   │       ├── contour.png
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   │   │   ├── modenn_state_discretization21
    │   │   │       ├── contour.png
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   │   │   └── modenn_state_discretization51
    │   │   │       ├── contour.png
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   ├── part2_c
    │   │   ├── CartPoleEnv
    │   │   │   ├── linear
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   └── nn
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   ├── DoubleIntegratorEnv
    │   │   │   ├── linear
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   └── nn
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   ├── MountainCarEnv
    │   │   │   ├── linear
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   └── nn
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   └── SwingUpEnv
    │   │   │   ├── linear
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   │   │   └── nn
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   ├── part2_d
    │   │   ├── CartPoleEnv
    │   │   │   ├── policy_typelook_ahead_modelinear_horizon1
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   ├── policy_typelook_ahead_modelinear_horizon2
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   └── policy_typelook_ahead_modelinear_horizon3
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   ├── DoubleIntegratorEnv
    │   │   │   ├── policy_typelook_ahead_modelinear_horizon1
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   ├── policy_typelook_ahead_modelinear_horizon2
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   └── policy_typelook_ahead_modelinear_horizon3
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   ├── MountainCarEnv
    │   │   │   ├── policy_typelook_ahead_modelinear_horizon1
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   ├── policy_typelook_ahead_modelinear_horizon2
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   └── policy_typelook_ahead_modelinear_horizon3
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   └── SwingUpEnv
    │   │   │   ├── policy_typelook_ahead_modelinear_horizon1
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   │   │   ├── policy_typelook_ahead_modelinear_horizon2
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   │   │   └── policy_typelook_ahead_modelinear_horizon3
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   ├── part3_a
    │   │   ├── DoubleIntegratorEnv
    │   │   │   ├── contour.png
    │   │   │   ├── learning_curve.png
    │   │   │   ├── log.txt
    │   │   │   ├── params.json
    │   │   │   └── progress.csv
    │   │   └── MountainCarEnv
    │   │   │   ├── contour.png
    │   │   │   ├── learning_curve.png
    │   │   │   ├── log.txt
    │   │   │   ├── params.json
    │   │   │   └── progress.csv
    │   ├── part3_b
    │   │   ├── CartPoleEnv
    │   │   │   ├── horizon1
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   ├── horizon10
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   └── horizon5
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   ├── DoubleIntegratorEnv
    │   │   │   ├── horizon1
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   ├── horizon10
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   └── horizon5
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   ├── MountainCarEnv
    │   │   │   ├── horizon1
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   ├── horizon10
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   │   └── horizon5
    │   │   │   │   ├── contour.png
    │   │   │   │   ├── learning_curve.png
    │   │   │   │   ├── log.txt
    │   │   │   │   ├── params.json
    │   │   │   │   └── progress.csv
    │   │   └── SwingUpEnv
    │   │   │   ├── horizon1
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   │   │   ├── horizon10
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   │   │   └── horizon5
    │   │   │       ├── learning_curve.png
    │   │   │       ├── log.txt
    │   │   │       ├── params.json
    │   │   │       └── progress.csv
    │   └── part5
    │   │   └── CartPoleEnv
    │   │       └── modelinear_state_discretization51
    │   │           ├── log.txt
    │   │           ├── params.json
    │   │           └── progress.csv
    ├── envs
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── cart_pole_env.cpython-36.pyc
    │   │   ├── double_integrator_env.cpython-36.pyc
    │   │   ├── grid1d_env.cpython-36.pyc
    │   │   ├── gridworld_env.cpython-36.pyc
    │   │   ├── mountain_hill_env.cpython-36.pyc
    │   │   └── swing_up_env.cpython-36.pyc
    │   ├── cart_pole_env.py
    │   ├── double_integrator_env.py
    │   ├── grid1d_env.py
    │   ├── gridworld_env.py
    │   ├── mountain_hill_env.py
    │   └── swing_up_env.py
    ├── logger.py
    ├── part1
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   └── tabular_value_iteration.cpython-36.pyc
    │   ├── run_part1.py
    │   └── tabular_value_iteration.py
    ├── part2
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── discretize.cpython-36.pyc
    │   │   └── look_ahead_policy.cpython-36.pyc
    │   ├── discretize.py
    │   ├── look_ahead_policy.py
    │   ├── run_part2_ab.py
    │   ├── run_part2_c.py
    │   └── run_part2_d.py
    ├── part3
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── continous_value_iteration.cpython-36.pyc
    │   │   └── look_ahead_policy.cpython-36.pyc
    │   ├── continous_value_iteration.py
    │   ├── look_ahead_policy.py
    │   ├── run_part3_a.py
    │   └── run_part3_b.py
    ├── part4
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   └── discretize.cpython-36.pyc
    │   ├── discretize.py
    │   └── run_part4.py
    ├── requirements.txt
    ├── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── plot.cpython-36.pyc
    │   │   ├── utils.cpython-36.pyc
    │   │   └── value_functions.cpython-36.pyc
    │   ├── plot.py
    │   ├── utils.py
    │   └── value_functions.py
    └── viskit
    │   ├── __init__.py
    │   ├── __pycache__
    │       ├── __init__.cpython-36.pyc
    │       └── core.cpython-36.pyc
    │   ├── core.py
    │   ├── frontend.py
    │   ├── static
    │       ├── css
    │       │   ├── bootstrap.min.css
    │       │   └── dropdowns-enhancement.css
    │       └── js
    │       │   ├── bootstrap.min.js
    │       │   ├── dropdowns-enhancement.js
    │       │   ├── jquery-1.10.2.min.js
    │       │   ├── jquery.loadTemplate-1.5.6.js
    │       │   └── plotly-latest.min.js
    │   └── templates
    │       └── main.html
├── cs287hw2
    ├── .DS_Store
    ├── .ipynb_checkpoints
    │   ├── Untitled-checkpoint.ipynb
    │   ├── Untitled1-checkpoint.ipynb
    │   ├── Untitled2-checkpoint.ipynb
    │   ├── Untitled3-checkpoint.ipynb
    │   ├── lqr-checkpoint.ipynb
    │   ├── lqr-sol-checkpoint.ipynb
    │   └── lqr_nolonger_clean-checkpoint.ipynb
    ├── __pycache__
    │   ├── rot_utils.cpython-37.pyc
    │   └── simulators.cpython-37.pyc
    ├── cs287hw2.pdf
    ├── environment.yml
    ├── envs
    │   ├── __pycache__
    │   │   ├── cheetah_env.cpython-37.pyc
    │   │   └── hopper_env.cpython-37.pyc
    │   ├── cheetah_env.py
    │   └── hopper_env.py
    ├── img
    │   ├── fig_a.png
    │   ├── ref_a.png
    │   ├── ref_b_cartpole.png
    │   └── ref_b_heli.png
    ├── lqr.ipynb
    ├── mats
    │   ├── cartpole_traj.mat
    │   ├── heli_traj.mat
    │   ├── p_a_w.mat
    │   ├── p_b_w.mat
    │   ├── p_c_heli_starting_states.mat
    │   └── p_c_w.mat
    ├── requirements.txt
    ├── rot_utils.py
    ├── simulators.py
    └── vids
    │   └── visualization_hopper.gif
├── cs287hw3
    ├── .DS_Store
    ├── .idea
    │   ├── .gitignore
    │   ├── assignment2.iml
    │   ├── inspectionProfiles
    │   │   └── profiles_settings.xml
    │   ├── misc.xml
    │   ├── modules.xml
    │   └── vcs.xml
    ├── .ipynb_checkpoints
    │   ├── Non-linear Optimization-checkpoint.ipynb
    │   ├── non_linear_optimization-checkpoint.ipynb
    │   ├── non_linear_optimization_og-checkpoint.ipynb
    │   ├── non_linear_optimization_sols-checkpoint.ipynb
    │   ├── non_linear_optimzation-checkpoint.ipynb
    │   └── non_linear_optimzation_sols-checkpoint.ipynb
    ├── __pycache__
    │   ├── utils.cpython-36.pyc
    │   └── utils.cpython-37.pyc
    ├── envs
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── cart_pole_env.cpython-36.pyc
    │   │   ├── cart_pole_env.cpython-37.pyc
    │   │   ├── cheetah_env.cpython-37.pyc
    │   │   └── hopper_env.cpython-37.pyc
    │   ├── cart_pole_env.py
    │   ├── cheetah_env.py
    │   └── hopper_env.py
    ├── non_linear_optimization.ipynb
    ├── non_linear_optimization.pdf
    ├── utils.py
    └── vids
    │   ├── .DS_Store
    │   └── rollout.gif
├── cs287hw4
    ├── .ipynb_checkpoints
    │   ├── Homework4_Q-Copy1-checkpoint.ipynb
    │   ├── Homework4_Q-checkpoint.ipynb
    │   └── Homework4_Q111-checkpoint.ipynb
    ├── Homework4_Q.ipynb
    ├── Homework4_Q_exp.ipynb
    ├── __MACOSX
    │   └── ._hw4.pdf
    ├── hw4.pdf
    ├── hw4_rubric.pdf
    ├── p3_a_data_1.npy
    ├── p3_a_data_2.npy
    ├── p3_a_data_3.npy
    ├── p3_a_data_4.npy
    ├── p6_data_0.npy
    ├── p6_data_1.npy
    ├── p6_data_2.npy
    └── p6_data_3.npy
└── cs287hw5
    ├── .DS_Store
    ├── .idea
        ├── hw5_nov12.iml
        ├── misc.xml
        ├── modules.xml
        └── workspace.xml
    ├── hw5.pdf
    ├── hw5_writeup.zip
    ├── hw5_writeup
        ├── .DS_Store
        ├── PS5.tex
        ├── PS5_template.tex
        ├── figures
        │   ├── baseline.png
        │   ├── clipper.png
        │   ├── entropy.png
        │   ├── gae.png
        │   ├── mbppo.png
        │   ├── meppo.png
        │   ├── newplot.png
        │   ├── pg.png
        │   ├── pg_cheetah.png
        │   ├── ph_baseline.png
        │   ├── ph_clipper.png
        │   ├── ph_entropy.png
        │   ├── ph_gae.png
        │   ├── ph_mbppo.png
        │   ├── ph_meppo.png
        │   ├── ph_pg.png
        │   ├── ph_ppo_obj.png
        │   ├── ph_sac.png
        │   ├── ppo_obj.png
        │   └── sac_cheetah.png
        └── my_figures
        │   ├── Ant_3A_12.png
        │   ├── Ant_3A_3.png
        │   ├── Cheetah_2A.png
        │   ├── Cheetah_2B_1.png
        │   ├── Cheetah_2B_2.png
        │   ├── Cheetah_2C.png
        │   ├── Cheetah_3A_12.png
        │   ├── Cheetah_3A_3.png
        │   ├── HalfCheetah_1A.png
        │   ├── HalfCheetah_1B.png
        │   ├── HalfCheetah_1C.png
        │   ├── HalfCheetah_1D.png
        │   ├── HalfCheetah_1E.png
        │   ├── HalfCheetah_1F.png
        │   ├── HalfCheetah_1G.png
        │   ├── Hopper_1A.png
        │   ├── Hopper_1B.png
        │   ├── Hopper_1C.png
        │   ├── Hopper_1D.png
        │   ├── Hopper_1E.png
        │   ├── Hopper_1F.png
        │   ├── Hopper_1G.png
        │   ├── Hopper_2A.png
        │   ├── Hopper_2B_1.png
        │   ├── Hopper_2B_2.png
        │   ├── Hopper_2C.png
        │   ├── Hopper_3A_12.png
        │   ├── Hopper_3A_3.png
        │   ├── Swimmer_1A.png
        │   ├── Swimmer_1B.png
        │   ├── Swimmer_1C.png
        │   ├── Swimmer_1D.png
        │   ├── Swimmer_1E.png
        │   ├── Swimmer_1F.png
        │   ├── Swimmer_1G.png
        │   ├── Swimmer_2A.png
        │   ├── Swimmer_2B_1.png
        │   ├── Swimmer_2B_2.png
        │   └── Swimmer_2C.png
    └── sac
        ├── .DS_Store
        ├── .idea
            ├── misc.xml
            ├── modules.xml
            ├── sac.iml
            └── workspace.xml
        ├── README.md
        ├── __pycache__
            ├── logz.cpython-35.pyc
            ├── logz.cpython-37.pyc
            ├── nn.cpython-35.pyc
            ├── nn.cpython-37.pyc
            ├── sac.cpython-35.pyc
            ├── sac.cpython-37.pyc
            ├── utils.cpython-35.pyc
            └── utils.cpython-37.pyc
        ├── data
            ├── .DS_Store
            ├── sac_Ant-v2_reinf_02-12-2019_16-46-48
            │   ├── 1
            │   │   ├── log.txt
            │   │   └── params.json
            │   ├── 11
            │   │   ├── log.txt
            │   │   └── params.json
            │   └── 21
            │   │   ├── log.txt
            │   │   └── params.json
            ├── sac_Ant-v2_reparam_02-12-2019_16-47-03
            │   ├── 1
            │   │   ├── log.txt
            │   │   └── params.json
            │   ├── 11
            │   │   ├── log.txt
            │   │   └── params.json
            │   └── 21
            │   │   ├── log.txt
            │   │   └── params.json
            ├── sac_Ant-v2_reparam_2qf_02-12-2019_16-47-25
            │   ├── 1
            │   │   ├── log.txt
            │   │   └── params.json
            │   ├── 11
            │   │   ├── log.txt
            │   │   └── params.json
            │   └── 21
            │   │   ├── log.txt
            │   │   └── params.json
            ├── sac_HalfCheetah-v2_reinf_02-12-2019_11-48-53
            │   ├── 1
            │   │   ├── log.txt
            │   │   └── params.json
            │   ├── 11
            │   │   ├── log.txt
            │   │   └── params.json
            │   └── 21
            │   │   ├── log.txt
            │   │   └── params.json
            ├── sac_HalfCheetah-v2_reparam_02-12-2019_12-05-49
            │   ├── 1
            │   │   ├── log.txt
            │   │   └── params.json
            │   ├── 11
            │   │   ├── log.txt
            │   │   └── params.json
            │   └── 21
            │   │   ├── log.txt
            │   │   └── params.json
            ├── sac_HalfCheetah-v2_reparam_2qf_02-12-2019_12-34-24
            │   ├── 1
            │   │   ├── log.txt
            │   │   └── params.json
            │   ├── 11
            │   │   ├── log.txt
            │   │   └── params.json
            │   └── 21
            │   │   ├── log.txt
            │   │   └── params.json
            ├── sac_Hopper-v2_reinf_02-12-2019_21-02-20
            │   ├── 1
            │   │   ├── log.txt
            │   │   └── params.json
            │   ├── 11
            │   │   ├── log.txt
            │   │   └── params.json
            │   └── 21
            │   │   ├── log.txt
            │   │   └── params.json
            ├── sac_Hopper-v2_reparam_02-12-2019_21-02-26
            │   ├── 1
            │   │   ├── log.txt
            │   │   └── params.json
            │   ├── 11
            │   │   ├── log.txt
            │   │   └── params.json
            │   └── 21
            │   │   ├── log.txt
            │   │   └── params.json
            └── sac_Hopper-v2_reparam_2qf_02-12-2019_21-02-37
            │   ├── 1
            │       ├── log.txt
            │       └── params.json
            │   ├── 11
            │       ├── log.txt
            │       └── params.json
            │   └── 21
            │       ├── log.txt
            │       └── params.json
        ├── environment.yml
        ├── generate_plots.sh
        ├── logz.py
        ├── myplot.py
        ├── nn.py
        ├── plot.py
        ├── project_setup.bash
        ├── run_all.sh
        ├── sac.py
        ├── train_mujoco.py
        └── utils.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Optimal Control, Reinforcement Learning & Robotics Projects in CS 287: Advanced Robotics (fall 2019)
 2 | 
 3 | **This repository contains past projects I've completed in CS 287, which I took in Berkeley EECS department under Prof. Pieter Abbeel, in fall 2019.**
 4 | (https://people.eecs.berkeley.edu/~pabbeel/cs287-fa19/)
 5 | 
 6 | Followings are those projects and related materials covered/implemented in each project.
 7 | 
 8 | - [**Project 1**](/cs287hw1)
 9 |   - Value Iteration
10 |   - Dicretization-based Optimal Control
11 |   - Function Approximation Optimal Control
12 | 
13 | - [**Project 2**](/cs287hw2)
14 |   - LQR, iLQR
15 |   - DDP
16 |   - Feedback Linearization
17 | 
18 | - [**Project 3**](/cs287hw3)
19 |   - Convex Optimiazation
20 |   - Sequential Convex Programming
21 |   - Motion Planning and Control (w/ convex opt.)
22 |   
23 | - [**Project 4**](/cs287hw4)
24 |   - Multivariate Gaussians
25 |   - Kalman Filtering
26 |   - EM & MLE
27 |   - Particle Filtering
28 |   - Belief Space Planning
29 |   
30 | - [**Project 5**](/cs287hw5)
31 |   - Policy Gradient
32 |     - Trust Region Policy Optimization (TRPO)
33 |     - Proximal Policy Optimization (PPO)
34 |   - Deep Q-Learning
35 |     - DQN
36 |     - Double DQN
37 |     - Dueling DQN
38 | 


--------------------------------------------------------------------------------
/cs287hw1/.idea/cs287-hw1-code.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/cs287hw1/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/cs287hw1/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/cs287hw1/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/cs287-hw1-code.iml" filepath="$PROJECT_DIR$/.idea/cs287-hw1-code.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/cs287hw1/.idea/workspace.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="ChangeListManager">
 4 |     <list default="true" id="5b0bd792-9207-499b-a13e-89e3236679c0" name="Default Changelist" comment="" />
 5 |     <option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
 6 |     <option name="SHOW_DIALOG" value="false" />
 7 |     <option name="HIGHLIGHT_CONFLICTS" value="true" />
 8 |     <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
 9 |     <option name="LAST_RESOLUTION" value="IGNORE" />
10 |   </component>
11 |   <component name="ProjectId" id="1Qh4Pw3ao1U6ESo4mth5JHVocNC" />
12 |   <component name="PropertiesComponent">
13 |     <property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
14 |   </component>
15 |   <component name="RunDashboard">
16 |     <option name="ruleStates">
17 |       <list>
18 |         <RuleState>
19 |           <option name="name" value="ConfigurationTypeDashboardGroupingRule" />
20 |         </RuleState>
21 |         <RuleState>
22 |           <option name="name" value="StatusDashboardGroupingRule" />
23 |         </RuleState>
24 |       </list>
25 |     </option>
26 |   </component>
27 |   <component name="SvnConfiguration">
28 |     <configuration />
29 |   </component>
30 |   <component name="TaskManager">
31 |     <task active="true" id="Default" summary="Default task">
32 |       <changelist id="5b0bd792-9207-499b-a13e-89e3236679c0" name="Default Changelist" comment="" />
33 |       <created>1568209473775</created>
34 |       <option name="number" value="Default" />
35 |       <option name="presentableId" value="Default" />
36 |       <updated>1568209473775</updated>
37 |     </task>
38 |     <servers />
39 |   </component>
40 | </project>


--------------------------------------------------------------------------------
/cs287hw1/.vs/VSWorkspaceState.json:
--------------------------------------------------------------------------------
1 | {
2 |   "ExpandedNodes": [
3 |     ""
4 |   ],
5 |   "PreviewInSolutionExplorer": false
6 | }


--------------------------------------------------------------------------------
/cs287hw1/.vs/cs287-hw1-code/v15/.suo:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/.vs/cs287-hw1-code/v15/.suo


--------------------------------------------------------------------------------
/cs287hw1/.vs/slnx.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/.vs/slnx.sqlite


--------------------------------------------------------------------------------
/cs287hw1/__pycache__/logger.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/__pycache__/logger.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/__pycache__/logger.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/__pycache__/logger.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "GridWorldEnv0",
3 |   "policy_type": "deterministic",
4 |   "render": false,
5 |   "temperature": 1.0
6 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typedeterministic_temperature1.0/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | 0.0,0
 3 | 0.0,1
 4 | 0.0,2
 5 | 0.05,3
 6 | 0.0,4
 7 | 0.0,5
 8 | 0.1,6
 9 | 0.1,7
10 | 0.05,8
11 | 0.1,9
12 | 0.15,10
13 | 0.1,11
14 | 0.2,12
15 | 0.1,13
16 | 0.15,14
17 | 0.15,15
18 | 0.3,16
19 | 0.1,17
20 | 0.15,18
21 | 0.25,19
22 | 0.45,20
23 | 0.2,21
24 | 0.55,22
25 | 0.45,23
26 | 0.35,24
27 | 0.7,25
28 | 0.55,26
29 | 0.75,27
30 | 0.75,28
31 | 0.65,29
32 | 0.55,30
33 | 0.85,31
34 | 0.65,32
35 | 0.9,33
36 | 0.85,34
37 | 0.85,35
38 | 0.95,36
39 | 1.0,37
40 | 1.0,38
41 | 1.0,39
42 | 1.0,40
43 | 1.0,41
44 | 1.0,42
45 | 1.0,43
46 | 1.0,44
47 | 1.0,45
48 | 1.0,46
49 | 0.95,47
50 | 1.0,48
51 | 1.0,49
52 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "GridWorldEnv0",
3 |   "policy_type": "max_ent",
4 |   "render": false,
5 |   "temperature": 0.01
6 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature0.01/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,0.0
 3 | 1,0.0
 4 | 2,0.0
 5 | 3,0.15
 6 | 4,0.05
 7 | 5,0.0
 8 | 6,0.05
 9 | 7,0.0
10 | 8,0.0
11 | 9,0.0
12 | 10,0.0
13 | 11,0.0
14 | 12,0.0
15 | 13,0.0
16 | 14,0.05
17 | 15,0.0
18 | 16,0.05
19 | 17,0.05
20 | 18,0.0
21 | 19,0.0
22 | 20,0.05
23 | 21,0.05
24 | 22,0.05
25 | 23,0.0
26 | 24,0.0
27 | 25,0.05
28 | 26,0.05
29 | 27,0.05
30 | 28,0.0
31 | 29,0.0
32 | 30,0.0
33 | 31,0.0
34 | 32,0.0
35 | 33,0.0
36 | 34,0.05
37 | 35,0.05
38 | 36,0.05
39 | 37,0.1
40 | 38,0.0
41 | 39,0.0
42 | 40,0.0
43 | 41,0.05
44 | 42,0.05
45 | 43,0.0
46 | 44,0.1
47 | 45,0.05
48 | 46,0.05
49 | 47,0.0
50 | 48,0.05
51 | 49,0.0
52 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "GridWorldEnv0",
3 |   "policy_type": "max_ent",
4 |   "render": false,
5 |   "temperature": 1.0
6 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1.0/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,0.0
 3 | 1,0.0
 4 | 2,0.0
 5 | 3,0.15
 6 | 4,0.05
 7 | 5,0.0
 8 | 6,0.1
 9 | 7,0.0
10 | 8,0.05
11 | 9,0.0
12 | 10,0.0
13 | 11,0.0
14 | 12,0.05
15 | 13,0.05
16 | 14,0.05
17 | 15,0.05
18 | 16,0.05
19 | 17,0.05
20 | 18,0.0
21 | 19,0.0
22 | 20,0.05
23 | 21,0.05
24 | 22,0.05
25 | 23,0.1
26 | 24,0.1
27 | 25,0.1
28 | 26,0.05
29 | 27,0.05
30 | 28,0.0
31 | 29,0.0
32 | 30,0.0
33 | 31,0.0
34 | 32,0.0
35 | 33,0.0
36 | 34,0.1
37 | 35,0.05
38 | 36,0.05
39 | 37,0.1
40 | 38,0.0
41 | 39,0.0
42 | 40,0.0
43 | 41,0.1
44 | 42,0.05
45 | 43,0.05
46 | 44,0.05
47 | 45,0.1
48 | 46,0.05
49 | 47,0.0
50 | 48,0.05
51 | 49,0.05
52 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "GridWorldEnv0",
3 |   "policy_type": "max_ent",
4 |   "render": false,
5 |   "temperature": 1e-05
6 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv0/policy_typemax_ent_temperature1e-05/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | 0.0,0
 3 | 0.0,1
 4 | 0.0,2
 5 | 0.15,3
 6 | 0.05,4
 7 | 0.0,5
 8 | 0.05,6
 9 | 0.0,7
10 | 0.0,8
11 | 0.0,9
12 | 0.0,10
13 | 0.0,11
14 | 0.0,12
15 | 0.0,13
16 | 0.05,14
17 | 0.0,15
18 | 0.05,16
19 | 0.05,17
20 | 0.0,18
21 | 0.0,19
22 | 0.0,20
23 | 0.05,21
24 | 0.05,22
25 | 0.0,23
26 | 0.0,24
27 | 0.05,25
28 | 0.05,26
29 | 0.05,27
30 | 0.0,28
31 | 0.0,29
32 | 0.0,30
33 | 0.0,31
34 | 0.0,32
35 | 0.0,33
36 | 0.05,34
37 | 0.05,35
38 | 0.05,36
39 | 0.1,37
40 | 0.0,38
41 | 0.0,39
42 | 0.0,40
43 | 0.05,41
44 | 0.05,42
45 | 0.0,43
46 | 0.1,44
47 | 0.1,45
48 | 0.05,46
49 | 0.0,47
50 | 0.05,48
51 | 0.0,49
52 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "GridWorldEnv1",
3 |   "policy_type": "deterministic",
4 |   "render": false,
5 |   "temperature": 1.0
6 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typedeterministic_temperature1.0/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | 0.0,0
 3 | 0.0,1
 4 | 0.0,2
 5 | 0.0,3
 6 | 0.0,4
 7 | 0.0,5
 8 | 0.0,6
 9 | 0.0,7
10 | 0.1,8
11 | 0.15,9
12 | 0.05,10
13 | 0.2,11
14 | 0.3,12
15 | 0.25,13
16 | 0.35,14
17 | 0.4,15
18 | 0.35,16
19 | 0.3,17
20 | 0.45,18
21 | 0.7,19
22 | 0.45,20
23 | 0.55,21
24 | 0.55,22
25 | 0.65,23
26 | 0.75,24
27 | 0.7,25
28 | 0.7,26
29 | 0.85,27
30 | 0.9,28
31 | 0.75,29
32 | 1.0,30
33 | 0.85,31
34 | 0.95,32
35 | 0.95,33
36 | 1.0,34
37 | 1.0,35
38 | 1.0,36
39 | 0.9,37
40 | 1.0,38
41 | 1.0,39
42 | 1.0,40
43 | 1.0,41
44 | 1.0,42
45 | 0.95,43
46 | 1.0,44
47 | 1.0,45
48 | 0.95,46
49 | 1.0,47
50 | 1.0,48
51 | 1.0,49
52 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "GridWorldEnv1",
3 |   "policy_type": "max_ent",
4 |   "render": false,
5 |   "temperature": 0.01
6 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature0.01/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,0.0
 3 | 1,0.0
 4 | 2,0.0
 5 | 3,0.1
 6 | 4,0.05
 7 | 5,0.0
 8 | 6,0.05
 9 | 7,0.0
10 | 8,0.05
11 | 9,0.0
12 | 10,0.0
13 | 11,0.0
14 | 12,0.0
15 | 13,0.05
16 | 14,0.0
17 | 15,0.1
18 | 16,0.0
19 | 17,0.0
20 | 18,0.05
21 | 19,0.0
22 | 20,0.1
23 | 21,0.0
24 | 22,0.0
25 | 23,0.0
26 | 24,0.0
27 | 25,0.05
28 | 26,0.05
29 | 27,0.05
30 | 28,0.0
31 | 29,0.0
32 | 30,0.0
33 | 31,0.0
34 | 32,0.0
35 | 33,0.0
36 | 34,0.05
37 | 35,0.0
38 | 36,0.0
39 | 37,0.05
40 | 38,0.05
41 | 39,0.0
42 | 40,0.05
43 | 41,0.05
44 | 42,0.0
45 | 43,0.0
46 | 44,0.05
47 | 45,0.05
48 | 46,0.05
49 | 47,0.05
50 | 48,0.1
51 | 49,0.05
52 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "GridWorldEnv1",
3 |   "policy_type": "max_ent",
4 |   "render": false,
5 |   "temperature": 1.0
6 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1.0/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,0.0
 3 | 1,0.0
 4 | 2,0.0
 5 | 3,0.1
 6 | 4,0.05
 7 | 5,0.0
 8 | 6,0.1
 9 | 7,0.0
10 | 8,0.05
11 | 9,0.05
12 | 10,0.0
13 | 11,0.0
14 | 12,0.0
15 | 13,0.05
16 | 14,0.0
17 | 15,0.1
18 | 16,0.05
19 | 17,0.0
20 | 18,0.1
21 | 19,0.0
22 | 20,0.1
23 | 21,0.0
24 | 22,0.0
25 | 23,0.0
26 | 24,0.0
27 | 25,0.1
28 | 26,0.05
29 | 27,0.05
30 | 28,0.0
31 | 29,0.0
32 | 30,0.05
33 | 31,0.0
34 | 32,0.0
35 | 33,0.05
36 | 34,0.1
37 | 35,0.0
38 | 36,0.05
39 | 37,0.05
40 | 38,0.05
41 | 39,0.0
42 | 40,0.05
43 | 41,0.05
44 | 42,0.0
45 | 43,0.0
46 | 44,0.05
47 | 45,0.1
48 | 46,0.05
49 | 47,0.1
50 | 48,0.1
51 | 49,0.05
52 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "GridWorldEnv1",
3 |   "policy_type": "max_ent",
4 |   "render": false,
5 |   "temperature": 1e-05
6 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part1/GridWorldEnv1/policy_typemax_ent_temperature1e-05/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | 0.0,0
 3 | 0.0,1
 4 | 0.0,2
 5 | 0.1,3
 6 | 0.0,4
 7 | 0.0,5
 8 | 0.05,6
 9 | 0.0,7
10 | 0.05,8
11 | 0.0,9
12 | 0.0,10
13 | 0.0,11
14 | 0.0,12
15 | 0.05,13
16 | 0.0,14
17 | 0.1,15
18 | 0.0,16
19 | 0.0,17
20 | 0.05,18
21 | 0.0,19
22 | 0.1,20
23 | 0.0,21
24 | 0.0,22
25 | 0.0,23
26 | 0.0,24
27 | 0.05,25
28 | 0.05,26
29 | 0.05,27
30 | 0.0,28
31 | 0.0,29
32 | 0.0,30
33 | 0.0,31
34 | 0.0,32
35 | 0.0,33
36 | 0.05,34
37 | 0.0,35
38 | 0.0,36
39 | 0.05,37
40 | 0.05,38
41 | 0.0,39
42 | 0.05,40
43 | 0.05,41
44 | 0.0,42
45 | 0.0,43
46 | 0.05,44
47 | 0.05,45
48 | 0.05,46
49 | 0.05,47
50 | 0.05,48
51 | 0.05,49
52 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "action_discretization": 5,
 3 |   "env": "DoubleIntegratorEnv",
 4 |   "horizon": 1,
 5 |   "max_iter": 150,
 6 |   "mode": "linear",
 7 |   "policy_type": "tabular",
 8 |   "render": false,
 9 |   "state_discretization": 151
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization151/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1086.2625732421875
 3 | 5,-1263.9954833984375
 4 | 10,-1262.9053955078125
 5 | 15,-661.170166015625
 6 | 20,-288.8149108886719
 7 | 25,-108.48724365234375
 8 | 30,-86.2402114868164
 9 | 35,-82.36270141601562
10 | 40,-77.28358459472656
11 | 45,-74.50456237792969
12 | 50,-71.93091583251953
13 | 55,-72.67729187011719
14 | 60,-70.5304946899414
15 | 65,-70.99906921386719
16 | 70,-72.21932220458984
17 | 75,-70.76229095458984
18 | 80,-69.89215087890625
19 | 85,-71.07881164550781
20 | 90,-69.90048217773438
21 | 95,-72.05101776123047
22 | 100,-70.91768646240234
23 | 105,-70.9474868774414
24 | 110,-70.95112609863281
25 | 115,-71.04618072509766
26 | 120,-70.75999450683594
27 | 125,-70.89779663085938
28 | 130,-70.42910766601562
29 | 135,-71.04066467285156
30 | 140,-70.91621398925781
31 | 145,-70.90560913085938
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "action_discretization": 5,
 3 |   "env": "DoubleIntegratorEnv",
 4 |   "horizon": 1,
 5 |   "max_iter": 150,
 6 |   "mode": "linear",
 7 |   "policy_type": "tabular",
 8 |   "render": false,
 9 |   "state_discretization": 21
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization21/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | -1086.2625732421875,0
 3 | -882.6398315429688,5
 4 | -678.9556274414062,10
 5 | -449.319580078125,15
 6 | -169.34024047851562,20
 7 | -189.33172607421875,25
 8 | -92.19869232177734,30
 9 | -97.56818389892578,35
10 | -80.79776000976562,40
11 | -82.92337036132812,45
12 | -83.22191619873047,50
13 | -76.68871307373047,55
14 | -81.23823547363281,60
15 | -79.61701965332031,65
16 | -85.20960998535156,70
17 | -81.37742614746094,75
18 | -86.22061157226562,80
19 | -80.67992401123047,85
20 | -87.55850219726562,90
21 | -87.11900329589844,95
22 | -84.90445709228516,100
23 | -82.08210754394531,105
24 | -87.08905029296875,110
25 | -84.68000030517578,115
26 | -83.30575561523438,120
27 | -75.68476104736328,125
28 | -80.35721588134766,130
29 | -80.14833068847656,135
30 | -93.36302185058594,140
31 | -81.09378051757812,145
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "action_discretization": 5,
 3 |   "env": "DoubleIntegratorEnv",
 4 |   "horizon": 1,
 5 |   "max_iter": 150,
 6 |   "mode": "linear",
 7 |   "policy_type": "tabular",
 8 |   "render": false,
 9 |   "state_discretization": 51
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modelinear_state_discretization51/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | -1086.2625732421875,0
 3 | -1144.5582275390625,5
 4 | -1067.7918701171875,10
 5 | -601.8530883789062,15
 6 | -208.37237548828125,20
 7 | -170.8474884033203,25
 8 | -87.156005859375,30
 9 | -82.76526641845703,35
10 | -73.99360656738281,40
11 | -77.13446044921875,45
12 | -71.68731689453125,50
13 | -73.66706848144531,55
14 | -71.9505386352539,60
15 | -84.7891845703125,65
16 | -78.1386489868164,70
17 | -94.64347839355469,75
18 | -91.47573852539062,80
19 | -85.21119689941406,85
20 | -88.02317810058594,90
21 | -84.81150817871094,95
22 | -96.04219818115234,100
23 | -90.305419921875,105
24 | -89.38578033447266,110
25 | -91.25402069091797,115
26 | -89.45679473876953,120
27 | -89.90522003173828,125
28 | -91.32474517822266,130
29 | -92.66826629638672,135
30 | -90.96044158935547,140
31 | -74.82160949707031,145
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "action_discretization": 5,
 3 |   "env": "DoubleIntegratorEnv",
 4 |   "horizon": 1,
 5 |   "max_iter": 150,
 6 |   "mode": "nn",
 7 |   "policy_type": "tabular",
 8 |   "render": false,
 9 |   "state_discretization": 151
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization151/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1086.2625732421875
 3 | 5,-1074.3865966796875
 4 | 10,-1108.3885498046875
 5 | 15,-1194.7117919921875
 6 | 20,-1210.2864990234375
 7 | 25,-1210.2864990234375
 8 | 30,-439.2337341308594
 9 | 35,-272.38214111328125
10 | 40,-275.7249755859375
11 | 45,-273.76495361328125
12 | 50,-269.75738525390625
13 | 55,-253.88551330566406
14 | 60,-233.76580810546875
15 | 65,-203.03372192382812
16 | 70,-187.3236083984375
17 | 75,-181.8560028076172
18 | 80,-172.03138732910156
19 | 85,-172.03138732910156
20 | 90,-166.24839782714844
21 | 95,-157.64881896972656
22 | 100,-157.64881896972656
23 | 105,-151.69505310058594
24 | 110,-151.69505310058594
25 | 115,-151.69505310058594
26 | 120,-151.69505310058594
27 | 125,-148.64476013183594
28 | 130,-149.03884887695312
29 | 135,-157.87362670898438
30 | 140,-157.87362670898438
31 | 145,-151.9330596923828
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "action_discretization": 5,
 3 |   "env": "DoubleIntegratorEnv",
 4 |   "horizon": 1,
 5 |   "max_iter": 150,
 6 |   "mode": "nn",
 7 |   "policy_type": "tabular",
 8 |   "render": false,
 9 |   "state_discretization": 21
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization21/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1086.2625732421875
 3 | 5,-1086.2625732421875
 4 | 10,-1086.2625732421875
 5 | 15,-1086.2625732421875
 6 | 20,-1086.2625732421875
 7 | 25,-1086.2625732421875
 8 | 30,-1086.2625732421875
 9 | 35,-1086.2625732421875
10 | 40,-1086.2625732421875
11 | 45,-1086.2625732421875
12 | 50,-1086.2625732421875
13 | 55,-1086.2625732421875
14 | 60,-1086.2625732421875
15 | 65,-1086.2625732421875
16 | 70,-1086.2625732421875
17 | 75,-1086.2625732421875
18 | 80,-1086.2625732421875
19 | 85,-1086.2625732421875
20 | 90,-1086.2625732421875
21 | 95,-1086.2625732421875
22 | 100,-1086.2625732421875
23 | 105,-1086.2625732421875
24 | 110,-1086.2625732421875
25 | 115,-1086.2625732421875
26 | 120,-1086.2625732421875
27 | 125,-1086.2625732421875
28 | 130,-1086.2625732421875
29 | 135,-1086.2625732421875
30 | 140,-1086.2625732421875
31 | 145,-1086.2625732421875
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "action_discretization": 5,
 3 |   "env": "DoubleIntegratorEnv",
 4 |   "horizon": 1,
 5 |   "max_iter": 150,
 6 |   "mode": "nn",
 7 |   "policy_type": "tabular",
 8 |   "render": false,
 9 |   "state_discretization": 51
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/DoubleIntegratorEnv/modenn_state_discretization51/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1086.2625732421875
 3 | 5,-1086.2625732421875
 4 | 10,-1086.2625732421875
 5 | 15,-1086.2625732421875
 6 | 20,-1086.2625732421875
 7 | 25,-1086.2625732421875
 8 | 30,-1086.2625732421875
 9 | 35,-1086.2625732421875
10 | 40,-727.3023071289062
11 | 45,-685.129150390625
12 | 50,-563.1832275390625
13 | 55,-550.3751831054688
14 | 60,-584.0700073242188
15 | 65,-597.1317749023438
16 | 70,-594.9307861328125
17 | 75,-427.8874816894531
18 | 80,-472.5625915527344
19 | 85,-472.5625915527344
20 | 90,-472.1018371582031
21 | 95,-472.1018371582031
22 | 100,-516.2792358398438
23 | 105,-478.8886413574219
24 | 110,-493.9183654785156
25 | 115,-493.9183654785156
26 | 120,-493.9183654785156
27 | 125,-355.2012023925781
28 | 130,-394.9552917480469
29 | 135,-394.9552917480469
30 | 140,-394.9552917480469
31 | 145,-394.9552917480469
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "action_discretization": 5,
 3 |   "env": "MountainCarEnv",
 4 |   "horizon": 1,
 5 |   "max_iter": 150,
 6 |   "mode": "linear",
 7 |   "policy_type": "tabular",
 8 |   "render": false,
 9 |   "state_discretization": 151
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization151/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-500.0
 3 | 5,-500.0
 4 | 10,-500.0
 5 | 15,-500.0
 6 | 20,-500.0
 7 | 25,-500.0
 8 | 30,-500.0
 9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-161.0
17 | 75,-154.0
18 | 80,-147.0
19 | 85,-150.0
20 | 90,-147.0
21 | 95,-149.0
22 | 100,-146.0
23 | 105,-149.0
24 | 110,-145.0
25 | 115,-105.0
26 | 120,-105.0
27 | 125,-105.0
28 | 130,-105.0
29 | 135,-105.0
30 | 140,-105.0
31 | 145,-105.0
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "action_discretization": 5,
 3 |   "env": "MountainCarEnv",
 4 |   "horizon": 1,
 5 |   "max_iter": 150,
 6 |   "mode": "linear",
 7 |   "policy_type": "tabular",
 8 |   "render": false,
 9 |   "state_discretization": 21
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization21/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | -500.0,0
 3 | -500.0,5
 4 | -500.0,10
 5 | -500.0,15
 6 | -238.0,20
 7 | -346.0,25
 8 | -274.0,30
 9 | -496.0,35
10 | -412.0,40
11 | -192.0,45
12 | -138.0,50
13 | -131.0,55
14 | -143.0,60
15 | -147.0,65
16 | -138.0,70
17 | -153.0,75
18 | -150.0,80
19 | -152.0,85
20 | -147.0,90
21 | -145.0,95
22 | -148.0,100
23 | -149.0,105
24 | -146.0,110
25 | -150.0,115
26 | -150.0,120
27 | -147.0,125
28 | -146.0,130
29 | -147.0,135
30 | -150.0,140
31 | -148.0,145
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "action_discretization": 5,
 3 |   "env": "MountainCarEnv",
 4 |   "horizon": 1,
 5 |   "max_iter": 150,
 6 |   "mode": "linear",
 7 |   "policy_type": "tabular",
 8 |   "render": false,
 9 |   "state_discretization": 51
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modelinear_state_discretization51/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | -500.0,0
 3 | -500.0,5
 4 | -500.0,10
 5 | -500.0,15
 6 | -500.0,20
 7 | -500.0,25
 8 | -500.0,30
 9 | -500.0,35
10 | -275.0,40
11 | -174.0,45
12 | -236.0,50
13 | -160.0,55
14 | -157.0,60
15 | -157.0,65
16 | -158.0,70
17 | -160.0,75
18 | -158.0,80
19 | -153.0,85
20 | -155.0,90
21 | -158.0,95
22 | -159.0,100
23 | -156.0,105
24 | -154.0,110
25 | -154.0,115
26 | -154.0,120
27 | -156.0,125
28 | -156.0,130
29 | -160.0,135
30 | -157.0,140
31 | -106.0,145
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "action_discretization": 5,
 3 |   "env": "MountainCarEnv",
 4 |   "horizon": 1,
 5 |   "max_iter": 150,
 6 |   "mode": "nn",
 7 |   "policy_type": "tabular",
 8 |   "render": false,
 9 |   "state_discretization": 151
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization151/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-500.0
 3 | 5,-500.0
 4 | 10,-500.0
 5 | 15,-500.0
 6 | 20,-500.0
 7 | 25,-500.0
 8 | 30,-500.0
 9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-500.0
17 | 75,-500.0
18 | 80,-500.0
19 | 85,-500.0
20 | 90,-491.0
21 | 95,-169.0
22 | 100,-183.0
23 | 105,-183.0
24 | 110,-183.0
25 | 115,-183.0
26 | 120,-183.0
27 | 125,-183.0
28 | 130,-183.0
29 | 135,-183.0
30 | 140,-183.0
31 | 145,-183.0
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "action_discretization": 5,
 3 |   "env": "MountainCarEnv",
 4 |   "horizon": 1,
 5 |   "max_iter": 150,
 6 |   "mode": "nn",
 7 |   "policy_type": "tabular",
 8 |   "render": false,
 9 |   "state_discretization": 21
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization21/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-500.0
 3 | 5,-500.0
 4 | 10,-500.0
 5 | 15,-500.0
 6 | 20,-500.0
 7 | 25,-500.0
 8 | 30,-500.0
 9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-500.0
17 | 75,-500.0
18 | 80,-500.0
19 | 85,-500.0
20 | 90,-500.0
21 | 95,-500.0
22 | 100,-500.0
23 | 105,-500.0
24 | 110,-500.0
25 | 115,-500.0
26 | 120,-500.0
27 | 125,-500.0
28 | 130,-500.0
29 | 135,-500.0
30 | 140,-500.0
31 | 145,-500.0
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "action_discretization": 5,
 3 |   "env": "MountainCarEnv",
 4 |   "horizon": 1,
 5 |   "max_iter": 150,
 6 |   "mode": "nn",
 7 |   "policy_type": "tabular",
 8 |   "render": false,
 9 |   "state_discretization": 51
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_ab/MountainCarEnv/modenn_state_discretization51/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-500.0
 3 | 5,-500.0
 4 | 10,-500.0
 5 | 15,-500.0
 6 | 20,-500.0
 7 | 25,-500.0
 8 | 30,-500.0
 9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-500.0
17 | 75,-500.0
18 | 80,-500.0
19 | 85,-500.0
20 | 90,-500.0
21 | 95,-500.0
22 | 100,-500.0
23 | 105,-500.0
24 | 110,-500.0
25 | 115,-500.0
26 | 120,-500.0
27 | 125,-500.0
28 | 130,-500.0
29 | 135,-500.0
30 | 140,-500.0
31 | 145,-500.0
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/CartPoleEnv/linear/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/CartPoleEnv/linear/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/CartPoleEnv/linear/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "action_discretization": 5,
3 |   "env": "CartPoleEnv",
4 |   "horizon": 1,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "tabular",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/CartPoleEnv/linear/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,10.0
 3 | 5,392.0
 4 | 10,298.0
 5 | 15,500.0
 6 | 20,500.0
 7 | 25,500.0
 8 | 30,500.0
 9 | 35,500.0
10 | 40,500.0
11 | 45,500.0
12 | 50,500.0
13 | 55,500.0
14 | 60,500.0
15 | 65,500.0
16 | 70,500.0
17 | 75,500.0
18 | 80,500.0
19 | 85,500.0
20 | 90,500.0
21 | 95,500.0
22 | 100,500.0
23 | 105,500.0
24 | 110,500.0
25 | 115,500.0
26 | 120,500.0
27 | 125,500.0
28 | 130,500.0
29 | 135,500.0
30 | 140,500.0
31 | 145,500.0
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/CartPoleEnv/nn/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/CartPoleEnv/nn/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/CartPoleEnv/nn/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "action_discretization": 5,
3 |   "env": "CartPoleEnv",
4 |   "horizon": 1,
5 |   "max_iter": 150,
6 |   "mode": "nn",
7 |   "policy_type": "tabular",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/CartPoleEnv/nn/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,9.0
 3 | 5,13.0
 4 | 10,13.0
 5 | 15,13.0
 6 | 20,13.0
 7 | 25,13.0
 8 | 30,13.0
 9 | 35,13.0
10 | 40,13.0
11 | 45,15.0
12 | 50,15.0
13 | 55,15.0
14 | 60,15.0
15 | 65,15.0
16 | 70,15.0
17 | 75,15.0
18 | 80,15.0
19 | 85,15.0
20 | 90,15.0
21 | 95,15.0
22 | 100,15.0
23 | 105,15.0
24 | 110,15.0
25 | 115,15.0
26 | 120,15.0
27 | 125,15.0
28 | 130,15.0
29 | 135,15.0
30 | 140,15.0
31 | 145,15.0
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "action_discretization": 5,
3 |   "env": "DoubleIntegratorEnv",
4 |   "horizon": 1,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "tabular",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/linear/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1086.2625732421875
 3 | 5,-1263.9954833984375
 4 | 10,-1262.9053955078125
 5 | 15,-661.170166015625
 6 | 20,-288.8149108886719
 7 | 25,-108.48724365234375
 8 | 30,-86.2402114868164
 9 | 35,-82.36270141601562
10 | 40,-77.28358459472656
11 | 45,-74.50456237792969
12 | 50,-71.93091583251953
13 | 55,-72.67729187011719
14 | 60,-70.5304946899414
15 | 65,-70.99906921386719
16 | 70,-72.21932220458984
17 | 75,-70.76229095458984
18 | 80,-69.89215087890625
19 | 85,-71.07881164550781
20 | 90,-69.90048217773438
21 | 95,-72.05101776123047
22 | 100,-70.91768646240234
23 | 105,-70.9474868774414
24 | 110,-70.95112609863281
25 | 115,-71.04618072509766
26 | 120,-70.75999450683594
27 | 125,-70.89779663085938
28 | 130,-70.42910766601562
29 | 135,-71.04066467285156
30 | 140,-70.91621398925781
31 | 145,-70.90560913085938
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "action_discretization": 5,
3 |   "env": "DoubleIntegratorEnv",
4 |   "horizon": 1,
5 |   "max_iter": 150,
6 |   "mode": "nn",
7 |   "policy_type": "tabular",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/DoubleIntegratorEnv/nn/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1086.2625732421875
 3 | 5,-1074.3865966796875
 4 | 10,-1108.3885498046875
 5 | 15,-1194.7117919921875
 6 | 20,-1210.2864990234375
 7 | 25,-1210.2864990234375
 8 | 30,-439.2337341308594
 9 | 35,-272.38214111328125
10 | 40,-275.7249755859375
11 | 45,-273.76495361328125
12 | 50,-269.75738525390625
13 | 55,-253.88551330566406
14 | 60,-233.76580810546875
15 | 65,-203.03372192382812
16 | 70,-187.3236083984375
17 | 75,-181.8560028076172
18 | 80,-172.03138732910156
19 | 85,-172.03138732910156
20 | 90,-166.24839782714844
21 | 95,-157.64881896972656
22 | 100,-157.64881896972656
23 | 105,-151.69505310058594
24 | 110,-151.69505310058594
25 | 115,-151.69505310058594
26 | 120,-151.69505310058594
27 | 125,-148.64476013183594
28 | 130,-149.03884887695312
29 | 135,-157.87362670898438
30 | 140,-157.87362670898438
31 | 145,-151.9330596923828
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/linear/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/MountainCarEnv/linear/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/linear/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/MountainCarEnv/linear/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/linear/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "action_discretization": 5,
3 |   "env": "MountainCarEnv",
4 |   "horizon": 1,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "tabular",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/linear/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-500.0
 3 | 5,-500.0
 4 | 10,-500.0
 5 | 15,-500.0
 6 | 20,-500.0
 7 | 25,-500.0
 8 | 30,-500.0
 9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-161.0
17 | 75,-154.0
18 | 80,-147.0
19 | 85,-150.0
20 | 90,-147.0
21 | 95,-149.0
22 | 100,-146.0
23 | 105,-149.0
24 | 110,-145.0
25 | 115,-105.0
26 | 120,-105.0
27 | 125,-105.0
28 | 130,-105.0
29 | 135,-105.0
30 | 140,-105.0
31 | 145,-105.0
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/nn/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/MountainCarEnv/nn/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/nn/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/MountainCarEnv/nn/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/nn/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "action_discretization": 5,
3 |   "env": "MountainCarEnv",
4 |   "horizon": 1,
5 |   "max_iter": 150,
6 |   "mode": "nn",
7 |   "policy_type": "tabular",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/MountainCarEnv/nn/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-500.0
 3 | 5,-500.0
 4 | 10,-500.0
 5 | 15,-500.0
 6 | 20,-500.0
 7 | 25,-500.0
 8 | 30,-500.0
 9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-500.0
17 | 75,-500.0
18 | 80,-500.0
19 | 85,-500.0
20 | 90,-491.0
21 | 95,-169.0
22 | 100,-183.0
23 | 105,-183.0
24 | 110,-183.0
25 | 115,-183.0
26 | 120,-183.0
27 | 125,-183.0
28 | 130,-183.0
29 | 135,-183.0
30 | 140,-183.0
31 | 145,-183.0
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/SwingUpEnv/linear/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/SwingUpEnv/linear/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/SwingUpEnv/linear/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "action_discretization": 5,
3 |   "env": "SwingUpEnv",
4 |   "horizon": 1,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "tabular",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/SwingUpEnv/linear/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1611.0622956168809
 3 | 5,-806.5761793759123
 4 | 10,-747.522897685818
 5 | 15,-540.7433507520074
 6 | 20,-825.9231338583917
 7 | 25,-944.4935466237056
 8 | 30,-991.0924221438501
 9 | 35,-1025.7969101684948
10 | 40,-1052.3914612528288
11 | 45,-1073.722422897381
12 | 50,-1139.109125287909
13 | 55,-1105.0172456320088
14 | 60,-1126.5440104057298
15 | 65,-1151.5207311147847
16 | 70,-1143.6126499683105
17 | 75,-1160.0488311303698
18 | 80,-1193.488355727164
19 | 85,-1175.0405064667475
20 | 90,-1163.1958750856556
21 | 95,-1165.6738488734682
22 | 100,-1171.2232022753903
23 | 105,-1186.8571065947756
24 | 110,-1185.2320044494918
25 | 115,-1174.4411249481252
26 | 120,-1170.948044299401
27 | 125,-1157.6196817398613
28 | 130,-1164.1346390379954
29 | 135,-1163.9363993137863
30 | 140,-1172.475828192612
31 | 145,-1233.3813361680384
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/SwingUpEnv/nn/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_c/SwingUpEnv/nn/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/SwingUpEnv/nn/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "action_discretization": 5,
3 |   "env": "SwingUpEnv",
4 |   "horizon": 1,
5 |   "max_iter": 150,
6 |   "mode": "nn",
7 |   "policy_type": "tabular",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_c/SwingUpEnv/nn/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1091.1176999012564
 3 | 5,-1091.1176999012564
 4 | 10,-1091.1176999012564
 5 | 15,-1091.1176999012564
 6 | 20,-1091.1176999012564
 7 | 25,-1091.1176999012564
 8 | 30,-1091.1176999012564
 9 | 35,-1091.1176999012564
10 | 40,-1091.1176999012564
11 | 45,-1091.1176999012564
12 | 50,-1091.1176999012564
13 | 55,-1091.1176999012564
14 | 60,-1091.1176999012564
15 | 65,-1091.1176999012564
16 | 70,-1091.1176999012564
17 | 75,-1091.1176999012564
18 | 80,-1091.1176999012564
19 | 85,-1091.1176999012564
20 | 90,-1091.1176999012564
21 | 95,-1091.1176999012564
22 | 100,-1091.1176999012564
23 | 105,-1091.1176999012564
24 | 110,-1091.1176999012564
25 | 115,-1091.1176999012564
26 | 120,-1091.1176999012564
27 | 125,-1091.1176999012564
28 | 130,-1091.1176999012564
29 | 135,-1091.1176999012564
30 | 140,-1091.1176999012564
31 | 145,-1091.1176999012564
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon1/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "CartPoleEnv",
3 |   "exp_name": "test",
4 |   "horizon": 1,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "look_ahead",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon1/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | 6.0,0
 3 | 338.0,5
 4 | 190.0,10
 5 | 364.0,15
 6 | 500.0,20
 7 | 500.0,25
 8 | 500.0,30
 9 | 500.0,35
10 | 500.0,40
11 | 500.0,45
12 | 500.0,50
13 | 500.0,55
14 | 500.0,60
15 | 500.0,65
16 | 500.0,70
17 | 500.0,75
18 | 500.0,80
19 | 500.0,85
20 | 500.0,90
21 | 500.0,95
22 | 500.0,100
23 | 500.0,105
24 | 500.0,110
25 | 500.0,115
26 | 500.0,120
27 | 500.0,125
28 | 500.0,130
29 | 500.0,135
30 | 500.0,140
31 | 500.0,145
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon2/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "CartPoleEnv",
3 |   "exp_name": "test",
4 |   "horizon": 2,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "look_ahead",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon2/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | 6.0,0
 3 | 154.0,5
 4 | 124.0,10
 5 | 62.0,15
 6 | 364.0,20
 7 | 267.0,25
 8 | 354.0,30
 9 | 500.0,35
10 | 166.0,40
11 | 139.0,45
12 | 500.0,50
13 | 500.0,55
14 | 277.0,60
15 | 500.0,65
16 | 500.0,70
17 | 125.0,75
18 | 42.0,80
19 | 500.0,85
20 | 500.0,90
21 | 500.0,95
22 | 500.0,100
23 | 500.0,105
24 | 110.0,110
25 | 500.0,115
26 | 500.0,120
27 | 500.0,125
28 | 71.0,130
29 | 500.0,135
30 | 500.0,140
31 | 224.0,145
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon3/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "CartPoleEnv",
3 |   "exp_name": "test",
4 |   "horizon": 3,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "look_ahead",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/CartPoleEnv/policy_typelook_ahead_modelinear_horizon3/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,6.0
 3 | 5,128.0
 4 | 10,226.0
 5 | 15,69.0
 6 | 20,500.0
 7 | 25,500.0
 8 | 30,135.0
 9 | 35,500.0
10 | 40,500.0
11 | 45,350.0
12 | 50,500.0
13 | 55,88.0
14 | 60,500.0
15 | 65,500.0
16 | 70,97.0
17 | 75,500.0
18 | 80,387.0
19 | 85,216.0
20 | 90,500.0
21 | 95,500.0
22 | 100,500.0
23 | 105,231.0
24 | 110,471.0
25 | 115,500.0
26 | 120,254.0
27 | 125,113.0
28 | 130,500.0
29 | 135,500.0
30 | 140,500.0
31 | 145,500.0
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "DoubleIntegratorEnv",
3 |   "exp_name": "test",
4 |   "horizon": 1,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "look_ahead",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon1/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | -1086.2625732421875,0
 3 | -893.8065795898438,5
 4 | -498.8104553222656,10
 5 | -210.59446716308594,15
 6 | -94.314453125,20
 7 | -124.63628387451172,25
 8 | -128.93954467773438,30
 9 | -146.0016326904297,35
10 | -150.7569122314453,40
11 | -131.7736358642578,45
12 | -119.9066390991211,50
13 | -146.92782592773438,55
14 | -130.0223846435547,60
15 | -187.83953857421875,65
16 | -142.1328582763672,70
17 | -135.27488708496094,75
18 | -111.18167114257812,80
19 | -183.66871643066406,85
20 | -114.97431182861328,90
21 | -220.096923828125,95
22 | -122.19498443603516,100
23 | -154.7129364013672,105
24 | -135.93690490722656,110
25 | -153.4174041748047,115
26 | -130.9731903076172,120
27 | -134.832275390625,125
28 | -179.64395141601562,130
29 | -148.343017578125,135
30 | -231.59364318847656,140
31 | -145.043701171875,145
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "DoubleIntegratorEnv",
3 |   "exp_name": "test",
4 |   "horizon": 2,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "look_ahead",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon2/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | -1183.3167724609375,0
 3 | -466.75787353515625,5
 4 | -452.248291015625,10
 5 | -144.1388397216797,15
 6 | -148.9638214111328,20
 7 | -131.39715576171875,25
 8 | -163.52474975585938,30
 9 | -185.48219299316406,35
10 | -154.23744201660156,40
11 | -167.04725646972656,45
12 | -158.9932403564453,50
13 | -185.72344970703125,55
14 | -166.24571228027344,60
15 | -217.92059326171875,65
16 | -161.99917602539062,70
17 | -204.30374145507812,75
18 | -236.45884704589844,80
19 | -151.06915283203125,85
20 | -197.75592041015625,90
21 | -198.5358428955078,95
22 | -182.3708038330078,100
23 | -174.017333984375,105
24 | -154.0111083984375,110
25 | -187.34825134277344,115
26 | -154.50106811523438,120
27 | -178.9189910888672,125
28 | -163.55311584472656,130
29 | -139.14596557617188,135
30 | -193.9720916748047,140
31 | -142.2526092529297,145
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "DoubleIntegratorEnv",
3 |   "exp_name": "test",
4 |   "horizon": 3,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "look_ahead",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/DoubleIntegratorEnv/policy_typelook_ahead_modelinear_horizon3/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1205.2218017578125
 3 | 5,-484.97027587890625
 4 | 10,-189.61758422851562
 5 | 15,-169.90155029296875
 6 | 20,-168.10523986816406
 7 | 25,-167.66624450683594
 8 | 30,-132.1918487548828
 9 | 35,-182.9347381591797
10 | 40,-175.6367950439453
11 | 45,-180.9919891357422
12 | 50,-140.72007751464844
13 | 55,-165.1248779296875
14 | 60,-161.497314453125
15 | 65,-167.78457641601562
16 | 70,-156.17080688476562
17 | 75,-153.41282653808594
18 | 80,-188.10372924804688
19 | 85,-162.02178955078125
20 | 90,-169.79441833496094
21 | 95,-174.7077178955078
22 | 100,-176.8898468017578
23 | 105,-123.10155487060547
24 | 110,-140.86790466308594
25 | 115,-119.35347747802734
26 | 120,-148.01553344726562
27 | 125,-164.94239807128906
28 | 130,-190.43072509765625
29 | 135,-163.91397094726562
30 | 140,-192.2443389892578
31 | 145,-182.40818786621094
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "MountainCarEnv",
3 |   "exp_name": "test",
4 |   "horizon": 1,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "look_ahead",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon1/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | -500.0,0
 3 | -500.0,5
 4 | -500.0,10
 5 | -500.0,15
 6 | -500.0,20
 7 | -500.0,25
 8 | -500.0,30
 9 | -500.0,35
10 | -500.0,40
11 | -500.0,45
12 | -500.0,50
13 | -303.0,55
14 | -236.0,60
15 | -500.0,65
16 | -500.0,70
17 | -426.0,75
18 | -420.0,80
19 | -417.0,85
20 | -500.0,90
21 | -420.0,95
22 | -500.0,100
23 | -414.0,105
24 | -500.0,110
25 | -229.0,115
26 | -238.0,120
27 | -500.0,125
28 | -500.0,130
29 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "MountainCarEnv",
3 |   "exp_name": "test",
4 |   "horizon": 2,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "look_ahead",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon2/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | -500.0,0
 3 | -500.0,5
 4 | -500.0,10
 5 | -500.0,15
 6 | -500.0,20
 7 | -500.0,25
 8 | -500.0,30
 9 | -500.0,35
10 | -500.0,40
11 | -500.0,45
12 | -500.0,50
13 | -312.0,55
14 | -228.0,60
15 | -500.0,65
16 | -312.0,70
17 | -237.0,75
18 | -227.0,80
19 | -240.0,85
20 | -500.0,90
21 | -315.0,95
22 | -326.0,100
23 | -500.0,105
24 | -308.0,110
25 | -500.0,115
26 | -498.0,120
27 | -223.0,125
28 | -310.0,130
29 | -500.0,135
30 | -324.0,140
31 | -500.0,145
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "MountainCarEnv",
3 |   "exp_name": "test",
4 |   "horizon": 3,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "look_ahead",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/MountainCarEnv/policy_typelook_ahead_modelinear_horizon3/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-500.0
 3 | 5,-500.0
 4 | 10,-500.0
 5 | 15,-500.0
 6 | 20,-500.0
 7 | 25,-500.0
 8 | 30,-500.0
 9 | 35,-500.0
10 | 40,-500.0
11 | 45,-420.0
12 | 50,-409.0
13 | 55,-303.0
14 | 60,-223.0
15 | 65,-256.0
16 | 70,-228.0
17 | 75,-236.0
18 | 80,-244.0
19 | 85,-301.0
20 | 90,-243.0
21 | 95,-247.0
22 | 100,-228.0
23 | 105,-237.0
24 | 110,-233.0
25 | 115,-231.0
26 | 120,-306.0
27 | 125,-293.0
28 | 130,-230.0
29 | 135,-238.0
30 | 140,-241.0
31 | 145,-234.0
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon1/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon1/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "SwingUpEnv",
3 |   "exp_name": "test",
4 |   "horizon": 1,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "look_ahead",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon1/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | -1091.1176999012564,0
 3 | -1162.1493360627258,5
 4 | -1585.8347053987275,10
 5 | -1218.8241165219229,15
 6 | -1249.7989512873348,20
 7 | -1389.2664479944547,25
 8 | -1298.771278433243,30
 9 | -1683.4040480165327,35
10 | -1507.2040631908412,40
11 | -1422.1633591071438,45
12 | -1505.8400605781235,50
13 | -1531.366199951518,55
14 | -1859.374352334298,60
15 | -1481.806252109952,65
16 | -1226.7502585204463,70
17 | -1268.0980292418817,75
18 | -1446.588749786304,80
19 | -1234.6143376530424,85
20 | -1173.3418445679686,90
21 | -1452.18655778993,95
22 | -1347.0481453039677,100
23 | -1489.2808735833657,105
24 | -1191.7517135659734,110
25 | -1475.9642971133658,115
26 | -1501.2822528696033,120
27 | -1905.84236700956,125
28 | -1442.219550827407,130
29 | -1195.037295457711,135
30 | -1337.1308600611028,140
31 | -1193.1598828731228,145
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon2/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon2/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "SwingUpEnv",
3 |   "exp_name": "test",
4 |   "horizon": 2,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "look_ahead",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon2/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | -1092.7898438420132,0
 3 | -1215.7979095044884,5
 4 | -900.2297428611507,10
 5 | -1329.5358970230398,15
 6 | -1421.1131724637714,20
 7 | -1343.603741625047,25
 8 | -1032.839109351379,30
 9 | -1356.2425212447233,35
10 | -1503.854075071196,40
11 | -1420.590041290277,45
12 | -1581.3682381306112,50
13 | -1513.242610160141,55
14 | -1986.0909965287349,60
15 | -1818.8259128345749,65
16 | -1547.818410397714,70
17 | -1401.8233207141116,75
18 | -2026.578864863387,80
19 | -2098.2875860182835,85
20 | -1648.5499722747932,90
21 | -2156.958773287344,95
22 | -2161.416928709432,100
23 | -2180.018826056157,105
24 | -1830.83594101961,110
25 | -1223.477103579234,115
26 | -1189.1726035364468,120
27 | -1458.5448452906428,125
28 | -1284.0247744743867,130
29 | -1210.7703241828312,135
30 | -1505.6303316533417,140
31 | -1759.0756618354292,145
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon3/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon3/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "SwingUpEnv",
3 |   "exp_name": "test",
4 |   "horizon": 3,
5 |   "max_iter": 150,
6 |   "mode": "linear",
7 |   "policy_type": "look_ahead",
8 |   "render": false
9 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part2_d/SwingUpEnv/policy_typelook_ahead_modelinear_horizon3/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1116.391526334325
 3 | 5,-1747.689019464483
 4 | 10,-1670.6732967647536
 5 | 15,-997.3284087020362
 6 | 20,-955.9082685133767
 7 | 25,-1177.432988580442
 8 | 30,-926.5136135262421
 9 | 35,-1104.902891456816
10 | 40,-1047.1105995789278
11 | 45,-1675.357489520778
12 | 50,-1120.925762449103
13 | 55,-983.6193496550145
14 | 60,-1178.07379545421
15 | 65,-1152.8209185244793
16 | 70,-1084.3333912294572
17 | 75,-864.4824550887413
18 | 80,-1128.1111239455465
19 | 85,-1877.5212413637996
20 | 90,-1459.4320766081328
21 | 95,-868.6700377542332
22 | 100,-1280.24002899705
23 | 105,-1165.836603981392
24 | 110,-1254.735230603128
25 | 115,-1310.1745190244744
26 | 120,-1038.4402663605304
27 | 125,-1260.488809359114
28 | 130,-1372.7110653444643
29 | 135,-1167.5436737789994
30 | 140,-1228.4214639353308
31 | 145,-1129.5736579851712
32 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/DoubleIntegratorEnv/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_a/DoubleIntegratorEnv/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/DoubleIntegratorEnv/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_a/DoubleIntegratorEnv/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/DoubleIntegratorEnv/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "DoubleIntegratorEnv",
 4 |   "horizon": 1,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 10,
 8 |   "policy_type": "rs",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/DoubleIntegratorEnv/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1122.7823725767166
 3 | 5,-1062.223100979545
 4 | 10,-1136.400997769638
 5 | 15,-1093.1759279603343
 6 | 20,-1064.8429670854428
 7 | 25,-1155.1049075078602
 8 | 30,-1265.864351441458
 9 | 35,-678.3290560802258
10 | 40,-381.8164868973546
11 | 45,-265.22922879100435
12 | 50,-169.64047694026792
13 | 55,-155.43764777676293
14 | 60,-97.6076650722121
15 | 65,-84.33627450797492
16 | 70,-93.36829751629186
17 | 75,-78.18198268902506
18 | 80,-76.97710627691222
19 | 85,-73.00620265014103
20 | 90,-78.6135226587545
21 | 95,-80.37329447070243
22 | 100,-78.95941675331389
23 | 105,-79.55617247669808
24 | 110,-83.57970985465705
25 | 115,-82.76591377741207
26 | 120,-85.4703499994012
27 | 125,-82.73176603238862
28 | 130,-86.52114550412108
29 | 135,-89.53232013434773
30 | 140,-93.2105884194899
31 | 145,-86.04294015559864
32 | 150,-88.7641655907943
33 | 155,-89.18217105937
34 | 160,-91.07337037083897
35 | 165,-89.36643780693299
36 | 170,-92.59827615711666
37 | 175,-96.53035937070437
38 | 180,-98.23655640085903
39 | 185,-88.73787475559153
40 | 190,-91.47763141092898
41 | 195,-93.40145281128702
42 | 200,-88.1035242005634
43 | 205,-90.125792591034
44 | 210,-87.15102617961149
45 | 215,-88.15198854500593
46 | 220,-88.5171408243244
47 | 225,-86.40243325822708
48 | 230,-87.99009024200436
49 | 235,-84.80985085614553
50 | 240,-90.16705955320245
51 | 245,-94.08950693010142
52 | 249,-90.98371433618405
53 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/MountainCarEnv/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_a/MountainCarEnv/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/MountainCarEnv/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_a/MountainCarEnv/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/MountainCarEnv/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "MountainCarEnv",
 4 |   "horizon": 1,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 10,
 8 |   "policy_type": "rs",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_a/MountainCarEnv/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-500.0
 3 | 5,-500.0
 4 | 10,-500.0
 5 | 15,-500.0
 6 | 20,-500.0
 7 | 25,-500.0
 8 | 30,-500.0
 9 | 35,-500.0
10 | 40,-500.0
11 | 45,-500.0
12 | 50,-500.0
13 | 55,-500.0
14 | 60,-500.0
15 | 65,-500.0
16 | 70,-500.0
17 | 75,-500.0
18 | 80,-500.0
19 | 85,-500.0
20 | 90,-500.0
21 | 95,-500.0
22 | 100,-500.0
23 | 105,-500.0
24 | 110,-500.0
25 | 115,-500.0
26 | 120,-500.0
27 | 125,-500.0
28 | 130,-500.0
29 | 135,-500.0
30 | 140,-500.0
31 | 145,-500.0
32 | 150,-500.0
33 | 155,-500.0
34 | 160,-500.0
35 | 165,-500.0
36 | 170,-500.0
37 | 175,-500.0
38 | 180,-500.0
39 | 185,-500.0
40 | 190,-500.0
41 | 195,-500.0
42 | 200,-500.0
43 | 205,-500.0
44 | 210,-500.0
45 | 215,-500.0
46 | 220,-500.0
47 | 225,-500.0
48 | 230,-500.0
49 | 235,-500.0
50 | 240,-500.0
51 | 245,-500.0
52 | 249,-500.0
53 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/CartPoleEnv/horizon1/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon1/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "CartPoleEnv",
 4 |   "horizon": 1,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 32,
 8 |   "policy_type": "cem",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon1/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,6.0
 3 | 10,6.0
 4 | 20,6.0
 5 | 30,6.0
 6 | 40,6.0
 7 | 50,6.0
 8 | 60,8.0
 9 | 70,38.0
10 | 80,38.0
11 | 90,23.0
12 | 100,40.0
13 | 110,40.0
14 | 120,25.0
15 | 130,34.0
16 | 140,27.0
17 | 150,45.0
18 | 160,46.0
19 | 170,68.0
20 | 180,32.0
21 | 190,34.0
22 | 200,40.0
23 | 210,64.0
24 | 220,32.0
25 | 230,69.0
26 | 240,30.0
27 | 249,50.0
28 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon10/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/CartPoleEnv/horizon10/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon10/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "CartPoleEnv",
 4 |   "horizon": 10,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 32,
 8 |   "policy_type": "cem",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon10/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | 6.0,0
 3 | 6.0,10
 4 | 6.0,20
 5 | 6.0,30
 6 | 6.0,40
 7 | 6.0,50
 8 | 6.0,60
 9 | 6.0,70
10 | 6.0,80
11 | 6.0,90
12 | 6.0,100
13 | 6.0,110
14 | 6.0,120
15 | 6.0,130
16 | 6.0,140
17 | 6.0,150
18 | 6.0,160
19 | 6.0,170
20 | 6.0,180
21 | 8.0,190
22 | 94.0,200
23 | 72.0,210
24 | 108.0,220
25 | 247.0,230
26 | 288.0,240
27 | 160.0,249
28 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon5/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/CartPoleEnv/horizon5/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon5/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "CartPoleEnv",
 4 |   "horizon": 5,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 32,
 8 |   "policy_type": "cem",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/CartPoleEnv/horizon5/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,6.0
 3 | 10,6.0
 4 | 20,6.0
 5 | 30,6.0
 6 | 40,6.0
 7 | 50,6.0
 8 | 60,6.0
 9 | 70,6.0
10 | 80,6.0
11 | 90,6.0
12 | 100,12.0
13 | 110,6.0
14 | 120,8.0
15 | 130,12.0
16 | 140,20.0
17 | 150,6.0
18 | 160,6.0
19 | 170,6.0
20 | 180,13.0
21 | 190,63.0
22 | 200,6.0
23 | 210,60.0
24 | 220,134.0
25 | 230,87.0
26 | 240,145.0
27 | 249,140.0
28 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "DoubleIntegratorEnv",
 4 |   "horizon": 1,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 32,
 8 |   "policy_type": "cem",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon1/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1083.2572626905703
 3 | 10,-1144.3036625666957
 4 | 20,-70.25937042708051
 5 | 30,-65.82760913324954
 6 | 40,-69.26650008607427
 7 | 50,-71.99475809172492
 8 | 60,-73.36248774374332
 9 | 70,-75.01519602189407
10 | 80,-76.7247153398074
11 | 90,-77.0947355563633
12 | 100,-82.09236529859092
13 | 110,-80.32221880918433
14 | 120,-86.06994235658904
15 | 130,-93.97135070453305
16 | 140,-123.41189553061248
17 | 150,-111.09701675206192
18 | 160,-80.99094440230087
19 | 170,-111.82992648461546
20 | 180,-1407.4459451091536
21 | 190,-1466.2611779251909
22 | 200,-1579.0335773941397
23 | 210,-1436.1574085697116
24 | 220,-215.21898344969065
25 | 230,-1478.0767973483007
26 | 240,-1634.5027828698492
27 | 249,-1619.9079515249616
28 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "DoubleIntegratorEnv",
 4 |   "horizon": 10,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 32,
 8 |   "policy_type": "cem",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon10/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | -328.79005020730284,0
 3 | -200.98521546340913,10
 4 | -64.77700467721864,20
 5 | -84.86970171900138,30
 6 | -128.60893935069768,40
 7 | -147.48401302068382,50
 8 | -139.2296066862622,60
 9 | -110.77936569105727,70
10 | -114.11320544047803,80
11 | -125.6650792064009,90
12 | -123.18294325299779,100
13 | -138.1127465768214,110
14 | -126.7707613795519,120
15 | -112.07978614737182,130
16 | -99.5483647865851,140
17 | -116.85997383619346,150
18 | -129.06128247897595,160
19 | -104.97593874618848,170
20 | -122.84541554251891,180
21 | -114.77346058769307,190
22 | -107.45986913081748,200
23 | -101.4258130643817,210
24 | -114.49001701841401,220
25 | -104.44739383339761,230
26 | -115.09346632637698,240
27 | -129.87534014553373,249
28 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "DoubleIntegratorEnv",
 4 |   "horizon": 5,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 32,
 8 |   "policy_type": "cem",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/DoubleIntegratorEnv/horizon5/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1175.616838534374
 3 | 10,-478.01630518122784
 4 | 20,-62.36968488577191
 5 | 30,-70.35551873127073
 6 | 40,-81.91418989124216
 7 | 50,-113.87514452834378
 8 | 60,-99.39722406411673
 9 | 70,-104.74424596209462
10 | 80,-116.43105353091153
11 | 90,-120.61381139602538
12 | 100,-104.22825758830257
13 | 110,-97.51563398276409
14 | 120,-96.10433126328779
15 | 130,-117.98687857144064
16 | 140,-103.6915302113553
17 | 150,-120.80207494162703
18 | 160,-96.17403533006443
19 | 170,-95.24357935829224
20 | 180,-104.7003414784946
21 | 190,-98.02249429487223
22 | 200,-96.91254602252111
23 | 210,-104.4164736636655
24 | 220,-111.60483821355875
25 | 230,-103.64503997519414
26 | 240,-109.3393306564727
27 | 249,-100.84268701592826
28 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon1/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon1/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon1/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon1/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "MountainCarEnv",
 4 |   "horizon": 1,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 32,
 8 |   "policy_type": "cem",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon1/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-500.0
 3 | 10,-500.0
 4 | 20,-500.0
 5 | 30,-500.0
 6 | 40,-500.0
 7 | 50,-500.0
 8 | 60,-500.0
 9 | 70,-500.0
10 | 80,-500.0
11 | 90,-500.0
12 | 100,-500.0
13 | 110,-500.0
14 | 120,-500.0
15 | 130,-500.0
16 | 140,-500.0
17 | 150,-500.0
18 | 160,-500.0
19 | 170,-500.0
20 | 180,-500.0
21 | 190,-500.0
22 | 200,-500.0
23 | 210,-500.0
24 | 220,-500.0
25 | 230,-500.0
26 | 240,-500.0
27 | 249,-500.0
28 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon10/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon10/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon10/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon10/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon10/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "MountainCarEnv",
 4 |   "horizon": 10,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 32,
 8 |   "policy_type": "cem",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon10/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | -500.0,0
 3 | -500.0,10
 4 | -500.0,20
 5 | -500.0,30
 6 | -500.0,40
 7 | -500.0,50
 8 | -500.0,60
 9 | -500.0,70
10 | -500.0,80
11 | -500.0,90
12 | -500.0,100
13 | -500.0,110
14 | -500.0,120
15 | -500.0,130
16 | -500.0,140
17 | -500.0,150
18 | -500.0,160
19 | -500.0,170
20 | -500.0,180
21 | -500.0,190
22 | -500.0,200
23 | -500.0,210
24 | -500.0,220
25 | -500.0,230
26 | -500.0,240
27 | -500.0,249
28 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon5/contour.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon5/contour.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon5/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/MountainCarEnv/horizon5/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon5/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "MountainCarEnv",
 4 |   "horizon": 5,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 32,
 8 |   "policy_type": "cem",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/MountainCarEnv/horizon5/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-500.0
 3 | 10,-500.0
 4 | 20,-500.0
 5 | 30,-500.0
 6 | 40,-500.0
 7 | 50,-500.0
 8 | 60,-500.0
 9 | 70,-500.0
10 | 80,-500.0
11 | 90,-500.0
12 | 100,-500.0
13 | 110,-500.0
14 | 120,-500.0
15 | 130,-500.0
16 | 140,-500.0
17 | 150,-500.0
18 | 160,-500.0
19 | 170,-500.0
20 | 180,-500.0
21 | 190,-500.0
22 | 200,-500.0
23 | 210,-500.0
24 | 220,-500.0
25 | 230,-500.0
26 | 240,-500.0
27 | 249,-500.0
28 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon1/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/SwingUpEnv/horizon1/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon1/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "SwingUpEnv",
 4 |   "horizon": 1,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 32,
 8 |   "policy_type": "cem",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon1/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1396.667327423777
 3 | 10,-1236.5227676190348
 4 | 20,-1201.2862992423045
 5 | 30,-1264.1382127123386
 6 | 40,-1525.1329954184694
 7 | 50,-1494.0356861809296
 8 | 60,-1180.626836176929
 9 | 70,-1115.4489761429518
10 | 80,-2462.797364356984
11 | 90,-1093.9334660251343
12 | 100,-2196.921839095621
13 | 110,-2146.571425384078
14 | 120,-1093.7293024977773
15 | 130,-1093.9334660251343
16 | 140,-1097.0459684677512
17 | 150,-1357.03826196838
18 | 160,-1095.0031456020984
19 | 170,-1103.5009875248982
20 | 180,-1318.8339192880053
21 | 190,-1103.5009875248982
22 | 200,-2462.797364356984
23 | 210,-2032.1447096031256
24 | 220,-1770.8190385950547
25 | 230,-1831.6227592496257
26 | 240,-1627.0230466828693
27 | 249,-1729.4831263473936
28 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon10/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/SwingUpEnv/horizon10/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon10/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "SwingUpEnv",
 4 |   "horizon": 10,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 32,
 8 |   "policy_type": "cem",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon10/progress.csv:
--------------------------------------------------------------------------------
 1 | Average Returns,Iteration
 2 | -1093.708926777139,0
 3 | -1095.0031456020984,10
 4 | -1177.3105002993598,20
 5 | -2317.7596258376225,30
 6 | -3112.9437564743707,40
 7 | -2272.4569652126597,50
 8 | -2422.264676378556,60
 9 | -2353.919829701844,70
10 | -2331.054111508252,80
11 | -2346.653255271173,90
12 | -2387.710052225485,100
13 | -2078.59505659242,110
14 | -2904.856342572041,120
15 | -1352.730304454663,130
16 | -1311.3610187713514,140
17 | -1365.0260867375184,150
18 | -1243.4885705745057,160
19 | -1244.0366648294828,170
20 | -1241.9574480293968,180
21 | -1280.8335180998288,190
22 | -1242.429338154825,200
23 | -750.119641550883,210
24 | -1257.9522315148947,220
25 | -1158.085668416221,230
26 | -1226.716938288927,240
27 | -1235.294428793677,249
28 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon5/learning_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part3_b/SwingUpEnv/horizon5/learning_curve.png


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon5/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "batch_size": 256,
 3 |   "env": "SwingUpEnv",
 4 |   "horizon": 5,
 5 |   "learning_rate": 0.001,
 6 |   "max_iter": 250,
 7 |   "num_acts": 32,
 8 |   "policy_type": "cem",
 9 |   "render": false
10 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part3_b/SwingUpEnv/horizon5/progress.csv:
--------------------------------------------------------------------------------
 1 | Iteration,Average Returns
 2 | 0,-1092.2915620815575
 3 | 10,-1103.5009875248982
 4 | 20,-2471.439718954018
 5 | 30,-2074.797792846919
 6 | 40,-2424.161644445676
 7 | 50,-2426.6673415803493
 8 | 60,-2449.902157955784
 9 | 70,-2617.3939123653045
10 | 80,-2456.9473873794736
11 | 90,-2455.0500879372407
12 | 100,-2438.936672619652
13 | 110,-2633.558000601011
14 | 120,-2517.561124215514
15 | 130,-2455.0688311597387
16 | 140,-2629.0789210394737
17 | 150,-2919.149640766505
18 | 160,-1305.1880741230243
19 | 170,-1168.2989464442921
20 | 180,-1481.954819420085
21 | 190,-760.3012598426423
22 | 200,-1276.914365026002
23 | 210,-1278.1670534512004
24 | 220,-1493.6650277140825
25 | 230,-1293.9963890328356
26 | 240,-1261.0170201625338
27 | 249,-1248.4889372240405
28 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part5/CartPoleEnv/modelinear_state_discretization51/log.txt:
--------------------------------------------------------------------------------
1 | Logging to C:\Users\Minjune\Desktop\287\cs287hw1\cs287-hw1-code/data/part5/CartPoleEnv/modelinear_state_discretization51/
2 | 


--------------------------------------------------------------------------------
/cs287hw1/data/part5/CartPoleEnv/modelinear_state_discretization51/params.json:
--------------------------------------------------------------------------------
1 | {
2 |   "env": "CartPoleEnv",
3 |   "max_iter": 150,
4 |   "mode": "linear",
5 |   "render": false,
6 |   "state_discretization": 51
7 | }


--------------------------------------------------------------------------------
/cs287hw1/data/part5/CartPoleEnv/modelinear_state_discretization51/progress.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/data/part5/CartPoleEnv/modelinear_state_discretization51/progress.csv


--------------------------------------------------------------------------------
/cs287hw1/envs/__init__.py:
--------------------------------------------------------------------------------
1 | from envs.cart_pole_env import CartPoleEnv
2 | from envs.mountain_hill_env import MountainCarEnv
3 | from envs.double_integrator_env import DoubleIntegratorEnv
4 | from envs.swing_up_env import SwingUpEnv
5 | from envs.grid1d_env import Grid1DEnv
6 | from envs.gridworld_env import GridWorldEnv
7 | 


--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/cart_pole_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/cart_pole_env.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/double_integrator_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/double_integrator_env.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/grid1d_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/grid1d_env.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/gridworld_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/gridworld_env.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/mountain_hill_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/mountain_hill_env.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/envs/__pycache__/swing_up_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/envs/__pycache__/swing_up_env.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/part1/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part1/__init__.py


--------------------------------------------------------------------------------
/cs287hw1/part1/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part1/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/part1/__pycache__/tabular_value_iteration.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part1/__pycache__/tabular_value_iteration.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/part1/run_part1.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import os
 3 | import logger
 4 | import json
 5 | 
 6 | 
 7 | def main(args):
 8 |     render = args.render
 9 |     if not render:
10 |         import matplotlib
11 |         matplotlib.use('Agg')
12 |         import matplotlib.pyplot as plt
13 |     from utils.utils import TabularPolicy, TabularValueFun
14 |     from part1.tabular_value_iteration import ValueIteration
15 |     from envs import Grid1DEnv, GridWorldEnv
16 |     envs = [GridWorldEnv(seed=0), GridWorldEnv(seed=1)]
17 | 
18 |     for env in envs:
19 |         env_name = env.__name__
20 |         exp_dir = os.getcwd() + '/data/part1/%s/policy_type%s_temperature%s/' % (env_name, args.policy_type, args.temperature)
21 |         logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
22 |         args_dict = vars(args)
23 |         args_dict['env'] = env_name
24 |         json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True)
25 | 
26 |         policy = TabularPolicy(env)
27 |         value_fun = TabularValueFun(env)
28 |         algo = ValueIteration(env,
29 |                               value_fun,
30 |                               policy,
31 |                               policy_type=args.policy_type,
32 |                               render=render,
33 |                               temperature=args.temperature)
34 |         algo.train()
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     parser = argparse.ArgumentParser()
39 |     parser.add_argument("--policy_type", "-p", type=str, default='deterministic', choices=["deterministic", "max_ent"],
40 |                         help="Whether to train a deterministic policy or a maximum entropy one")
41 |     parser.add_argument("--render", "-r", action='store_true', help="Vizualize the policy and contours when training")
42 |     parser.add_argument("--temperature", "-t", type=float, default=1.,
43 |                         help="Temperature parameter for maximum entropy policies")
44 |     args = parser.parse_args()
45 |     main(args)
46 | 


--------------------------------------------------------------------------------
/cs287hw1/part2/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part2/__init__.py


--------------------------------------------------------------------------------
/cs287hw1/part2/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part2/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/part2/__pycache__/discretize.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part2/__pycache__/discretize.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/part2/__pycache__/look_ahead_policy.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part2/__pycache__/look_ahead_policy.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/part2/look_ahead_policy.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from gym import spaces
 3 | 
 4 | 
 5 | class LookAheadPolicy(object):
 6 |     """
 7 |     Look ahead policy
 8 | 
 9 |     -- VARIABLES/FUNCTIONS YOU WILL NEED TO USE --
10 |     * self.horizon (int): Horizon for the look ahead policy
11 | 
12 |     * act_dim (int): Dimension of the state space
13 | 
14 |     * value_fun (TabularValueFun):
15 |                 - get_values(states): if states is None returns the values of all the states. Otherwise, it returns the
16 |                                       values of the specified states
17 |     * env (Env):
18 |                 - vec_set_state(states): vectorized (multiple environments in parallel) version of reseting the
19 |                 environment to a state for a batch of states.
20 |                 - vec_step(actions): vectorized (multiple environments in parallel) version of stepping through the
21 |                 environment for a batch of actions. Returns the next observations, rewards, dones signals, env infos
22 |                 (last not used).
23 |     """
24 |     def __init__(self,
25 |                  env,
26 |                  value_fun,
27 |                  horizon,
28 |                  ):
29 |         self.env = env
30 |         self.discount = env.discount
31 |         self._value_fun = value_fun
32 |         self.horizon = horizon
33 | 
34 |     def get_action(self, state):
35 |         """
36 |         Get the best action by doing look ahead, covering actions for the specified horizon.
37 |         HINT: use np.meshgrid to compute all the possible action sequences.
38 |         :param state:
39 |         :return: best_action (int)
40 |            """
41 |         assert isinstance(self.env.action_space, spaces.Discrete)
42 |         act_dim = self.env.action_space.n
43 |         """ INSERT YOUR CODE HERE"""
44 |         actions = np.arange(act_dim)
45 |         sequences = np.array(np.meshgrid(*np.tile(np.arange(act_dim), 
46 |                                                   (self.horizon, 1)))).T.reshape(-1, self.horizon).T
47 |         return sequences[0, np.argmax(self.get_returns(state, sequences))]
48 | 
49 |     def get_returns(self, state, actions):
50 |         """
51 |         :param state: current state of the policy
52 |         :param actions: array of actions of shape [horizon, num_acts]
53 |         :return: returns for the specified horizon + self.discount ^ H value_fun
54 |         HINT: Make sure to take the discounting and done into acount!
55 |         """
56 |         assert self.env.vectorized
57 |         """ INSERT YOUR CODE HERE"""
58 |         num_acts = actions.shape[1]
59 |         returns = np.zeros(num_acts)
60 |         # self.env.set_state(state)
61 |         # if len(actions.shape) < 3:
62 |         #     self.env.vec_set_state(np.full(num_acts, state))
63 |         # else:
64 |         self.env.vec_set_state(np.tile(state, (num_acts, 1)))
65 |         for h in range(self.horizon):
66 |             observations, rewards, dones, env_infos = self.env.vec_step(actions[h])
67 |             self.env.vec_set_state(observations)
68 |             returns += self.discount ** h * rewards
69 |         returns += self.discount ** self.horizon * self._value_fun.get_values(observations)
70 |         return returns
71 | 
72 |     def update(self, actions):
73 |         pass
74 | 


--------------------------------------------------------------------------------
/cs287hw1/part2/run_part2_c.py:
--------------------------------------------------------------------------------
 1 | import logger
 2 | import argparse
 3 | import os
 4 | import json
 5 | import numpy as np; np.random.seed(0)
 6 | 
 7 | 
 8 | def main(args):
 9 |     render = args.render
10 |     if not render:
11 |         import matplotlib
12 |         matplotlib.use('Agg')
13 |         import matplotlib.pyplot as plt
14 |     from envs import DoubleIntegratorEnv, MountainCarEnv, CartPoleEnv, SwingUpEnv
15 |     from utils.utils import TabularPolicy, TabularValueFun
16 |     from part1.tabular_value_iteration import ValueIteration
17 |     from part2.look_ahead_policy import LookAheadPolicy
18 |     from part2.discretize import Discretize
19 |     envs = [DoubleIntegratorEnv(), MountainCarEnv(), CartPoleEnv(), SwingUpEnv()]
20 | 
21 |     for env in envs:
22 |         env_name = env.__class__.__name__
23 |         state_discretization = 151 if env_name in ['MountainCarEnv', 'DoubleIntegratorEnv'] else 21
24 |         exp_dir = os.getcwd() + '/data/part2_c/%s/%s' % (env_name, args.mode)
25 |         logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
26 |         args_dict = vars(args)
27 |         args_dict['env'] = env_name
28 |         json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True)
29 | 
30 |         env = Discretize(env,
31 |                          state_discretization=state_discretization,
32 |                          mode=args.mode
33 |                          )
34 |         value_fun = TabularValueFun(env)
35 |         if args.policy_type == 'tabular':
36 |             policy = TabularPolicy(env)
37 |         elif args.policy_type == 'look_ahead':
38 |             policy = LookAheadPolicy(env, value_fun, args.horizon)
39 |         else:
40 |             raise NotImplementedError
41 |         algo = ValueIteration(env,
42 |                               value_fun,
43 |                               policy,
44 |                               render=render,
45 |                               max_itr=args.max_iter,
46 |                               num_rollouts=1,
47 |                               render_itr=5,
48 |                               log_itr=5)
49 |         algo.train()
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     parser = argparse.ArgumentParser()
54 |     parser.add_argument("--render", "-r", action='store_true',
55 |                         help="Vizualize the policy and contours when training")
56 |     parser.add_argument("--action_discretization", "-a", type=int, default=5,
57 |                         help="Number of points per state dimension to discretize")
58 |     parser.add_argument("--mode", "-m", type=str, default='nn', choices=['nn', 'linear'],
59 |                         help="Mode of interpolate between discrete points")
60 |     parser.add_argument("--policy_type", "-p", type=str, default='tabular', choices=['tabular', 'look_ahead'],
61 |                         help='Type of policy to use. Whether to use look ahead policy or tabular')
62 |     parser.add_argument("--horizon", "-H", type=int, default=1,
63 |                         help='Planning horizon for the look ahead policy')
64 |     parser.add_argument("--max_iter", "-i", type=int, default=150,
65 |                         help='Maximum number of iterations for the value iteration algorithm')
66 |     args = parser.parse_args()
67 |     main(args)
68 | 


--------------------------------------------------------------------------------
/cs287hw1/part3/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part3/__init__.py


--------------------------------------------------------------------------------
/cs287hw1/part3/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part3/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/part3/__pycache__/continous_value_iteration.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part3/__pycache__/continous_value_iteration.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/part3/__pycache__/look_ahead_policy.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part3/__pycache__/look_ahead_policy.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/part3/run_part3_a.py:
--------------------------------------------------------------------------------
 1 | import logger
 2 | import argparse
 3 | import os
 4 | import json
 5 | import numpy as np; np.random.seed(0)
 6 | 
 7 | 
 8 | def main(args):
 9 |     render = args.render
10 |     if not render:
11 |         import matplotlib
12 |         matplotlib.use('Agg')
13 |         import matplotlib.pyplot as plt
14 |     from envs import DoubleIntegratorEnv, MountainCarEnv, CartPoleEnv, SwingUpEnv
15 |     from utils.utils import VectorizeMujocoEnv
16 |     from part3.look_ahead_policy import LookAheadPolicy
17 |     from utils.value_functions import MLPValueFun
18 |     from part3.continous_value_iteration import ContinousStateValueIteration
19 |     envs = [DoubleIntegratorEnv(), MountainCarEnv()]
20 | 
21 |     for env in envs:
22 |         env_name = env.__class__.__name__
23 |         exp_dir = os.getcwd() + '/data/part3_a/%s' % (env_name)
24 |         logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
25 |         args_dict = vars(args)
26 |         args_dict['env'] = env_name
27 |         json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True)
28 | 
29 |         value_fun = MLPValueFun(env)
30 |         policy = LookAheadPolicy(env,
31 |                                  value_fun,
32 |                                  horizon=args.horizon,
33 |                                  look_ahead_type=args.policy_type,
34 |                                  num_acts=args.num_acts)
35 |         algo = ContinousStateValueIteration(env,
36 |                                             value_fun,
37 |                                             policy,
38 |                                             learning_rate=args.learning_rate,
39 |                                             batch_size=args.batch_size,
40 |                                             num_acts=args.num_acts,
41 |                                             render=args.render,
42 |                                             max_itr=args.max_iter,
43 |                                             log_itr=5)
44 |         algo.train()
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     parser = argparse.ArgumentParser()
49 |     parser.add_argument("--render", "-r", action='store_true',
50 |                         help="Vizualize the policy and contours when training")
51 |     parser.add_argument("--policy_type", "-p", type=str, default='rs', choices=['cem', 'rs'],
52 |                         help='Type of policy to use. Whether to use look ahead with cross-entropy \
53 |                         method or random shooting')
54 |     parser.add_argument("--horizon", "-H", type=int, default=1,
55 |                         help='Planning horizon for the look ahead policy')
56 |     parser.add_argument("--max_iter", "-i", type=int, default=250,
57 |                         help='Maximum number of iterations for the value iteration algorithm')
58 |     parser.add_argument("--learning_rate", "-lr", type=float, default=1e-3,
59 |                         help='Learning rate for training the value function')
60 |     parser.add_argument("--batch_size", "-bs", type=int, default=256,
61 |                         help='batch size for training the value function')
62 |     parser.add_argument("--num_acts", "-a", type=int, default=10,
63 |                         help='Number of actions sampled for maximizing the value function')
64 |     args = parser.parse_args()
65 |     main(args)


--------------------------------------------------------------------------------
/cs287hw1/part4/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part4/__init__.py


--------------------------------------------------------------------------------
/cs287hw1/part4/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part4/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/part4/__pycache__/discretize.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/part4/__pycache__/discretize.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/part4/run_part4.py:
--------------------------------------------------------------------------------
 1 | import logger
 2 | import argparse
 3 | import os
 4 | import json
 5 | import numpy as np; np.random.seed(0)
 6 | 
 7 | 
 8 | def main(args):
 9 |     render = args.render
10 |     if not render:
11 |         import matplotlib
12 |         matplotlib.use('Agg')
13 |         import matplotlib.pyplot as plt
14 |     from envs import CartPoleEnv, SwingUpEnv
15 |     from utils.utils import TabularPolicy, TabularValueFun
16 |     from part1.tabular_value_iteration import ValueIteration
17 |     from part4.discretize import Discretize
18 |     envs = [CartPoleEnv(), SwingUpEnv()]
19 | 
20 |     for env in envs:
21 |         env_name = env.__class__.__name__
22 |         exp_dir = os.getcwd() + '/data/part5/%s/mode%s_state_discretization%s/' % (env_name,
23 |                                                                                       args.mode,
24 |                                                                                       str(args.state_discretization)
25 |                                                                                       )
26 |         logger.configure(dir=exp_dir, format_strs=['stdout', 'log', 'csv'])
27 |         args_dict = vars(args)
28 |         args_dict['env'] = env_name
29 |         json.dump(vars(args), open(exp_dir + '/params.json', 'w'), indent=2, sort_keys=True)
30 | 
31 |         env = Discretize(env,
32 |                          state_discretization=args.state_discretization,
33 |                          mode=args.mode
34 |                          )
35 |         value_fun = TabularValueFun(env)
36 |         policy = TabularPolicy(env)
37 |         algo = ValueIteration(env,
38 |                               value_fun,
39 |                               policy,
40 |                               render=render,
41 |                               max_itr=args.max_iter,
42 |                               num_rollouts=1,
43 |                               render_itr=5,
44 |                               log_itr=5)
45 |         algo.train()
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     parser = argparse.ArgumentParser()
50 |     parser.add_argument("--render", "-r", action='store_true',
51 |                         help="Vizualize the policy and contours when training")
52 |     parser.add_argument("--state_discretization", "-s", type=int, default=21,
53 |                         help="Number of points per state dimension to discretize")
54 |     parser.add_argument("--mode", "-m", type=str, default='nn', choices=['nn', 'linear'],
55 |                         help="Mode of interpolate between discrete points")
56 |     parser.add_argument("--max_iter", "-i", type=int, default=150,
57 |                         help='Maximum number of iterations for the value iteration algorithm')
58 |     args = parser.parse_args()
59 |     main(args)
60 | 


--------------------------------------------------------------------------------
/cs287hw1/requirements.txt:
--------------------------------------------------------------------------------
1 | autograd
2 | gym
3 | joblib
4 | matplotlib
5 | moviepy
6 | numpy
7 | Flask==1.0.2
8 | plotly==3.2.0
9 | 


--------------------------------------------------------------------------------
/cs287hw1/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__init__.py


--------------------------------------------------------------------------------
/cs287hw1/utils/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/utils/__pycache__/plot.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__pycache__/plot.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/utils/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/utils/__pycache__/value_functions.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/utils/__pycache__/value_functions.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/utils/value_functions.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | from collections import OrderedDict
 3 | 
 4 | 
 5 | class MLPValueFun(object):
 6 |     _activations = {
 7 |         'tanh': np.tanh,
 8 |         None: lambda x: x,
 9 |         'relu': lambda x: np.maximum(x, 0)
10 |     }
11 | 
12 |     def __init__(self, env, hidden_sizes=(256, 256), activation='relu'):
13 |         self._env = env
14 |         self._params = dict()
15 |         self._build(hidden_sizes, activation)
16 | 
17 |     def _build(self, hidden_sizes=(256, 256), activation='relu', *args, **kwargs):
18 |         self._activation = self._activations[activation]
19 |         self._hidden_sizes = hidden_sizes
20 |         prev_size = self._env.observation_space.shape[0]
21 |         for i, hidden_size in enumerate(hidden_sizes):
22 |             W = np.random.normal(loc=0, scale=1/prev_size, size=(hidden_size, prev_size))
23 |             b = np.zeros((hidden_size,))
24 | 
25 |             self._params['W_%d' % i] = W
26 |             self._params['b_%d' % i] = b
27 | 
28 |             prev_size = hidden_size
29 | 
30 |         W = np.random.normal(loc=0, scale=1/prev_size, size=(1, prev_size))
31 |         b = np.zeros((1,))
32 |         self._params['W_out'] = W
33 |         self._params['b_out'] = b
34 | 
35 |     def get_values(self, states, params=None):
36 |         params = self._params if params is None else params
37 |         x = states
38 |         for i, hidden_size in enumerate(self._hidden_sizes):
39 |             x = np.dot(params['W_%d' % i], x.T).T + params['b_%d' % i]
40 |             x = self._activation(x)
41 |         values = np.dot(params['W_out'], x.T).T + params['b_out']
42 |         return values[:, 0]
43 | 
44 |     def update(self, params):
45 |         assert set(params.keys()) == set(self._params.keys())
46 |         self._params = params
47 | 
48 | 


--------------------------------------------------------------------------------
/cs287hw1/viskit/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'dementrock'
2 | 


--------------------------------------------------------------------------------
/cs287hw1/viskit/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/viskit/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw1/viskit/__pycache__/core.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw1/viskit/__pycache__/core.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw2/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/.DS_Store


--------------------------------------------------------------------------------
/cs287hw2/.ipynb_checkpoints/Untitled-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 2
6 | }
7 | 


--------------------------------------------------------------------------------
/cs287hw2/.ipynb_checkpoints/Untitled1-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 2
6 | }
7 | 


--------------------------------------------------------------------------------
/cs287hw2/.ipynb_checkpoints/Untitled3-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 |  "cells": [],
3 |  "metadata": {},
4 |  "nbformat": 4,
5 |  "nbformat_minor": 2
6 | }
7 | 


--------------------------------------------------------------------------------
/cs287hw2/__pycache__/rot_utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/__pycache__/rot_utils.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw2/__pycache__/simulators.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/__pycache__/simulators.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw2/cs287hw2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/cs287hw2.pdf


--------------------------------------------------------------------------------
/cs287hw2/environment.yml:
--------------------------------------------------------------------------------
 1 | name: cs287hw2
 2 | channels:
 3 | - defaults
 4 | - conda-forge
 5 | dependencies:
 6 | - python=3.7.3
 7 | - jupyter
 8 | - patchelf=0.9 # comment this line out on Mac
 9 | - pip>=19.1
10 | - pip:
11 |     - -r ./requirements.txt
12 | 


--------------------------------------------------------------------------------
/cs287hw2/envs/__pycache__/cheetah_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/envs/__pycache__/cheetah_env.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw2/envs/__pycache__/hopper_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/envs/__pycache__/hopper_env.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw2/envs/cheetah_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import copy
 3 | from gym import utils
 4 | from gym.envs.mujoco import mujoco_env
 5 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
 6 | import os
 7 | 
 8 | class CheetahModEnv(HalfCheetahEnv, mujoco_env.MujocoEnv, utils.EzPickle):
 9 |     def __init__(self):
10 | 
11 |         self.perturb_joints = True
12 |         self.count = 0
13 |         mujoco_env.MujocoEnv.__init__(self, "half_cheetah.xml", 4)
14 |         utils.EzPickle.__init__(self)
15 |         self.perturb_idx=0
16 |         self.init_geom_rgba = self.model.geom_rgba.copy()
17 | 
18 | 
19 |     def f_sim(self, x0, u, dt, rollout=False, perturb=.01):
20 |         nq, nv = self.model.nq, self.model.nv
21 |         self.sim.reset()
22 |         qpos = copy.deepcopy(self.init_qpos)
23 |         qvel = copy.deepcopy(self.init_qvel)
24 |         
25 |         qpos[:] = x0[:nq]
26 |         qvel[:] = x0[nq:]
27 |       
28 |         self.set_state(qpos, qvel)
29 |         if rollout:
30 |             self.step(u, perturb=perturb)
31 |         else:
32 |             self.perturb_joints = False
33 |             self.step(u)
34 |             self.perturb_joints = True
35 |         return np.concatenate([
36 |             self.sim.data.qpos.flat[:],
37 |             self.sim.data.qvel.flat[:]
38 |         ])
39 |     
40 |     def step(self, a, perturb=.01):
41 |         self.count += 1
42 |         if self.perturb_joints and self.count%5==0:
43 |             self.perturb_idx = np.random.randint(0,6)
44 |             a[self.perturb_idx] += np.random.choice(np.array([-1*perturb,perturb]))
45 |             model_id = self.model.geom_names.index(self.model.joint_names[self.perturb_idx+3])
46 |             geom_rgba = self.init_geom_rgba.copy()
47 |             geom_rgba[model_id] = [0, 1, 1 ,1]
48 |             self.model.geom_rgba[:] = geom_rgba
49 |         else:
50 |             if self.count > 1 and self.count%8==0:
51 |                 model_id = self.model.geom_names.index(self.model.joint_names[self.perturb_idx+3])
52 |                 geom_rgba = self.init_geom_rgba.copy()
53 |                 self.model.geom_rgba[:] = geom_rgba
54 |         xposbefore = self.sim.data.qpos[0]
55 |         self.do_simulation(a, self.frame_skip)
56 |         xposafter = self.sim.data.qpos[0]
57 |         ob = self._get_obs()
58 |         reward_ctrl = - 0.1 * np.square(a).sum()
59 |         reward_run = (xposafter - xposbefore)/self.dt
60 |         reward = reward_ctrl + reward_run
61 |         done = False
62 |         return ob, reward, done, dict(reward_run=reward_run, reward_ctrl=reward_ctrl)
63 | 


--------------------------------------------------------------------------------
/cs287hw2/img/fig_a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/img/fig_a.png


--------------------------------------------------------------------------------
/cs287hw2/img/ref_a.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/img/ref_a.png


--------------------------------------------------------------------------------
/cs287hw2/img/ref_b_cartpole.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/img/ref_b_cartpole.png


--------------------------------------------------------------------------------
/cs287hw2/img/ref_b_heli.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/img/ref_b_heli.png


--------------------------------------------------------------------------------
/cs287hw2/mats/cartpole_traj.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/cartpole_traj.mat


--------------------------------------------------------------------------------
/cs287hw2/mats/heli_traj.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/heli_traj.mat


--------------------------------------------------------------------------------
/cs287hw2/mats/p_a_w.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/p_a_w.mat


--------------------------------------------------------------------------------
/cs287hw2/mats/p_b_w.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/p_b_w.mat


--------------------------------------------------------------------------------
/cs287hw2/mats/p_c_heli_starting_states.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/p_c_heli_starting_states.mat


--------------------------------------------------------------------------------
/cs287hw2/mats/p_c_w.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/mats/p_c_w.mat


--------------------------------------------------------------------------------
/cs287hw2/requirements.txt:
--------------------------------------------------------------------------------
 1 | scipy==1.1.0
 2 | moviepy==1.0.0
 3 | seaborn==0.9.0
 4 | matplotlib==3.0.2
 5 | mujoco_py>=1.50.1.56
 6 | #mujoco_py==2.0.2.2
 7 | #numpy==1.15.4
 8 | numpy==1.16.1
 9 | gym==0.12.5
10 | 


--------------------------------------------------------------------------------
/cs287hw2/vids/visualization_hopper.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw2/vids/visualization_hopper.gif


--------------------------------------------------------------------------------
/cs287hw3/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/.DS_Store


--------------------------------------------------------------------------------
/cs287hw3/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | # Default ignored files
3 | /workspace.xml


--------------------------------------------------------------------------------
/cs287hw3/.idea/assignment2.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/cs287hw3/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 | <component name="InspectionProjectProfileManager">
2 |   <settings>
3 |     <option name="USE_PROJECT_PROFILE" value="false" />
4 |     <version value="1.0" />
5 |   </settings>
6 | </component>


--------------------------------------------------------------------------------
/cs287hw3/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="JavaScriptSettings">
4 |     <option name="languageLevel" value="ES6" />
5 |   </component>
6 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7" project-jdk-type="Python SDK" />
7 | </project>


--------------------------------------------------------------------------------
/cs287hw3/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/assignment2.iml" filepath="$PROJECT_DIR$/.idea/assignment2.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/cs287hw3/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$/.." vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/cs287hw3/__pycache__/utils.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/__pycache__/utils.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw3/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw3/envs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__init__.py


--------------------------------------------------------------------------------
/cs287hw3/envs/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw3/envs/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw3/envs/__pycache__/cart_pole_env.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/cart_pole_env.cpython-36.pyc


--------------------------------------------------------------------------------
/cs287hw3/envs/__pycache__/cart_pole_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/cart_pole_env.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw3/envs/__pycache__/cheetah_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/cheetah_env.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw3/envs/__pycache__/hopper_env.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/envs/__pycache__/hopper_env.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw3/envs/cheetah_env.py:
--------------------------------------------------------------------------------
 1 | import copy
 2 | from gym import utils
 3 | from gym.envs.mujoco import mujoco_env
 4 | from gym.envs.mujoco.half_cheetah import HalfCheetahEnv
 5 | 
 6 | 
 7 | class CheetahModEnv(HalfCheetahEnv, mujoco_env.MujocoEnv, utils.EzPickle):
 8 |     def __init__(self):
 9 | 
10 |         self.perturb_joints = True
11 |         self.count = 0
12 |         mujoco_env.MujocoEnv.__init__(self, "half_cheetah.xml", 8)
13 |         utils.EzPickle.__init__(self)
14 |         self.H = 30
15 |         self.du = self.action_space.shape[0]
16 |         self.dx = self.observation_space.shape[0]
17 | 
18 |     def step(self, a):
19 |         self.count += 1
20 |         xposbefore = self.sim.data.qpos[0]
21 |         self.do_simulation(a, self.frame_skip)
22 |         xposafter = self.sim.data.qpos[0]
23 |         ob = self._get_obs()
24 |         reward = (xposafter - xposbefore)/self.dt
25 |         done = False
26 |         return ob, -reward, done, dict()
27 | 
28 |     def set_state(self, state):
29 |         nq, nv = self.model.nq, self.model.nv
30 |         self.sim.reset()
31 |         qpos = copy.deepcopy(self.init_qpos)
32 | 
33 |         qpos[1:nq] = state[:nq - 1]
34 |         qvel = state[nq - 1:]
35 | 
36 |         mujoco_env.MujocoEnv.set_state(self, qpos, qvel)
37 | 
38 |     def reset_model(self):
39 |         mujoco_env.MujocoEnv.set_state(self, self.init_qpos, self.init_qvel)
40 |         return self._get_obs()
41 | 


--------------------------------------------------------------------------------
/cs287hw3/envs/hopper_env.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from gym import utils
 3 | import copy
 4 | from gym.envs.mujoco import mujoco_env
 5 | from gym.envs.mujoco.hopper import HopperEnv
 6 | 
 7 | 
 8 | class HopperModEnv(HopperEnv, mujoco_env.MujocoEnv, utils.EzPickle):
 9 |     def __init__(self):
10 | 
11 |         self.perturb_joints = True
12 |         self.components = np.array(['thigh', 'leg', 'foot'])
13 |         self.affected_part = 'thigh'
14 |         self.count = 0
15 |         mujoco_env.MujocoEnv.__init__(self, "hopper.xml", 8)
16 |         utils.EzPickle.__init__(self)
17 | 
18 |         self.init_geom_rgba = self.model.geom_rgba.copy()
19 |         self.dx = self.observation_space.shape[0]
20 |         self.du = self.action_space.shape[0]
21 |         self.H = 50
22 | 
23 |     def step(self, a):
24 |         self.count += 1
25 |         posbefore = self.sim.data.qpos[0]
26 |         self.do_simulation(a, self.frame_skip)
27 |         posafter, height, ang = self.sim.data.qpos[0:3]
28 |         reward = (posafter - posbefore) / self.dt
29 |         done = False
30 |         ob = self._get_obs()
31 |         return ob, -reward, done, {}
32 | 
33 |     def set_state(self, state):
34 |         nq, nv = self.model.nq, self.model.nv
35 |         self.sim.reset()
36 |         qpos = copy.deepcopy(self.init_qpos)
37 |         qvel = copy.deepcopy(self.init_qvel)
38 |         
39 |         qpos[1:6] = state[:nq-1]
40 |         qvel[:6] = state[nq-1:]
41 |       
42 |         mujoco_env.MujocoEnv.set_state(self, qpos, qvel)
43 | 
44 |     def _get_obs(self):
45 |         return np.concatenate([
46 |             self.sim.data.qpos.flat[1:6],
47 |             np.clip(self.sim.data.qvel.flat, -10, 10)
48 |         ])
49 | 
50 |     def reset_model(self):
51 | 
52 |         self.affected_part = self.components[np.random.randint(0,3)]
53 |         self.count = 0
54 |         qpos = self.init_qpos
55 |         qvel = self.init_qvel
56 |         mujoco_env.MujocoEnv.set_state(self, qpos, qvel)
57 | 
58 |         return self._get_obs()
59 | 
60 |     def viewer_setup(self):
61 |         self.viewer.cam.trackbodyid = 2
62 |         self.viewer.cam.distance = self.model.stat.extent * 0.75
63 |         self.viewer.cam.lookat[2] = 1.15
64 |         self.viewer.cam.elevation = -20
65 | 


--------------------------------------------------------------------------------
/cs287hw3/non_linear_optimization.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/non_linear_optimization.pdf


--------------------------------------------------------------------------------
/cs287hw3/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import moviepy.editor as mpy
 3 | from scipy.optimize import minimize
 4 | 
 5 | 
 6 | class NNPolicy(object):
 7 |     def __init__(self, input_dim, output_dim, hidden_sizes):
 8 |         self.input_dim = input_dim
 9 |         self.output_dim = output_dim
10 |         self.hidden_sizes = tuple(hidden_sizes)
11 |         self.params = None
12 | 
13 |     def get_action(self, state, timestep=None):
14 |         x = state
15 |         params = self.params
16 |         for i in range(len(self.hidden_sizes)):
17 |             x = x.T @ params['W'][i] + params['b'][i]
18 |             x = np.tanh(x)
19 |         action = x.T @ params['W'][-1] + params['b'][-1]
20 |         action = np.tanh(action)
21 |         return action
22 | 
23 |     def set_params(self, params):
24 |         sizes = (self.input_dim,) + self.hidden_sizes + (self.output_dim,)
25 |         Ws, bs = [], []
26 |         s_id = 0
27 |         for i in range(len(self.hidden_sizes) + 1):
28 |             w_shape = (sizes[i], sizes[i + 1])
29 |             e_id = s_id + np.prod(w_shape)
30 |             W = params[s_id:e_id].reshape(w_shape)
31 |             s_id = e_id
32 |             e_id = s_id + sizes[i + 1]
33 |             b = params[s_id:e_id]
34 |             s_id = e_id
35 |             Ws.append(W)
36 |             bs.append(b)
37 |         self.params = dict(W=Ws, b=bs)
38 | 
39 |     def get_params(self):
40 |         params = []
41 |         for W, b in zip(self.params['W'], self.params['b']):
42 |             params.extend([W.flatten(), b.flatten()])
43 |         return np.concatenate(params)
44 | 
45 |     def init_params(self):
46 |         sizes = (self.input_dim,) + self.hidden_sizes + (self.output_dim,)
47 |         Ws, bs = [], []
48 |         for i in range(len(self.hidden_sizes) + 1):
49 |             W = np.random.uniform(size=(sizes[i], sizes[i + 1]))/np.sqrt(sizes[i] + sizes[i+1])
50 |             b = np.zeros(shape=sizes[i + 1])
51 |             Ws.append(W)
52 |             bs.append(b)
53 |         self.params = dict(W=Ws, b=bs)
54 |         return dict(W=Ws, b=bs)
55 | 
56 | 
57 | class ActPolicy(object):
58 |     def __init__(self, env, actions):
59 |         self._actions = actions.reshape(env.H, env.du)
60 |         self.t = 0
61 | 
62 |     def get_action(self, state, timestep=None):
63 |         act = self._actions[self.t]
64 |         self.t = (self.t + 1) % len(self._actions)
65 |         return act
66 | 
67 |     def reset(self):
68 |         self.t = 0
69 | 
70 | 
71 | def rollout(env, policy, noise=0., render=False):
72 |     np.random.seed(0)
73 |     s = env.reset()
74 |     states = []
75 |     imgs = []
76 |     cost = 0
77 |     for t in range(env.H):
78 |         act = policy.get_action(s, t) + np.random.normal(0, scale=noise, size=(env.du,))
79 |         s, c, d, _ = env.step(act)
80 |         if render:imgs.append(env.render('rgb_array'))
81 |         states.append(s)
82 |         cost += c
83 |         if d: break
84 |     if render:
85 |         clip = mpy.ImageSequenceClip(imgs, fps=8)
86 |         clip.write_gif('./rollout.gif', verbose=False)
87 | 
88 |     return cost, states
89 | 
90 | 


--------------------------------------------------------------------------------
/cs287hw3/vids/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/vids/.DS_Store


--------------------------------------------------------------------------------
/cs287hw3/vids/rollout.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw3/vids/rollout.gif


--------------------------------------------------------------------------------
/cs287hw4/__MACOSX/._hw4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/__MACOSX/._hw4.pdf


--------------------------------------------------------------------------------
/cs287hw4/hw4.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/hw4.pdf


--------------------------------------------------------------------------------
/cs287hw4/hw4_rubric.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/hw4_rubric.pdf


--------------------------------------------------------------------------------
/cs287hw4/p3_a_data_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p3_a_data_1.npy


--------------------------------------------------------------------------------
/cs287hw4/p3_a_data_2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p3_a_data_2.npy


--------------------------------------------------------------------------------
/cs287hw4/p3_a_data_3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p3_a_data_3.npy


--------------------------------------------------------------------------------
/cs287hw4/p3_a_data_4.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p3_a_data_4.npy


--------------------------------------------------------------------------------
/cs287hw4/p6_data_0.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p6_data_0.npy


--------------------------------------------------------------------------------
/cs287hw4/p6_data_1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p6_data_1.npy


--------------------------------------------------------------------------------
/cs287hw4/p6_data_2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p6_data_2.npy


--------------------------------------------------------------------------------
/cs287hw4/p6_data_3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw4/p6_data_3.npy


--------------------------------------------------------------------------------
/cs287hw5/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/.DS_Store


--------------------------------------------------------------------------------
/cs287hw5/.idea/hw5_nov12.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/cs287hw5/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7.0 (~/anaconda3/envs/prior_rl/bin/python3.7)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/cs287hw5/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/hw5_nov12.iml" filepath="$PROJECT_DIR$/.idea/hw5_nov12.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/cs287hw5/hw5.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5.pdf


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup.zip


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/.DS_Store


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/baseline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/baseline.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/clipper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/clipper.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/entropy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/entropy.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/gae.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/gae.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/mbppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/mbppo.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/meppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/meppo.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/newplot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/newplot.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/pg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/pg.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/pg_cheetah.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/pg_cheetah.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_baseline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_baseline.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_clipper.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_clipper.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_entropy.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_entropy.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_gae.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_gae.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_mbppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_mbppo.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_meppo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_meppo.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_pg.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_pg.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_ppo_obj.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_ppo_obj.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ph_sac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ph_sac.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/ppo_obj.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/ppo_obj.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/figures/sac_cheetah.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/figures/sac_cheetah.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Ant_3A_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Ant_3A_12.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Ant_3A_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Ant_3A_3.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Cheetah_2A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_2A.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Cheetah_2B_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_2B_1.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Cheetah_2B_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_2B_2.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Cheetah_2C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_2C.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Cheetah_3A_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_3A_12.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Cheetah_3A_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Cheetah_3A_3.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1A.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1B.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1C.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1D.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1D.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1E.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1E.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1F.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1F.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1G.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/HalfCheetah_1G.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1A.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1B.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1C.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1D.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1D.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1E.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1E.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1F.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1F.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_1G.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_1G.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_2A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_2A.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_2B_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_2B_1.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_2B_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_2B_2.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_2C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_2C.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_3A_12.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_3A_12.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Hopper_3A_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Hopper_3A_3.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1A.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1B.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1B.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1C.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1D.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1D.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1E.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1E.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1F.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1F.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_1G.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_1G.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_2A.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_2A.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_2B_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_2B_1.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_2B_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_2B_2.png


--------------------------------------------------------------------------------
/cs287hw5/hw5_writeup/my_figures/Swimmer_2C.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/hw5_writeup/my_figures/Swimmer_2C.png


--------------------------------------------------------------------------------
/cs287hw5/sac/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/.DS_Store


--------------------------------------------------------------------------------
/cs287hw5/sac/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.7.0 (~/anaconda3/envs/cs287hw5/bin/python)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/cs287hw5/sac/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/sac.iml" filepath="$PROJECT_DIR$/.idea/sac.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/cs287hw5/sac/.idea/sac.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="jdk" jdkName="Python 3.7.0 (~/anaconda3/envs/cs287hw5/bin/python)" jdkType="Python SDK" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/cs287hw5/sac/README.md:
--------------------------------------------------------------------------------
 1 | # CS294-112 HW 5b: Soft Actor Critic
 2 | Original code from Tuomas Haarnoja, Soroush Nasiriany, and Aurick Zhou for CS294-112 Fall 2018
 3 | 
 4 | Dependencies:
 5 |  * Python **3.4.5**
 6 |  * Numpy version **1.15.2**
 7 |  * TensorFlow version **1.10.0**
 8 |  * tensorflow-probability version **0.4.0**
 9 |  * OpenAI Gym version **0.10.8**
10 |  * MuJoCo version **1.50** and mujoco-py **1.50.1.59**
11 |  * seaborn version **0.9.0**
12 | 
13 | You will implement `sac.py`, and `nn.py`.
14 | 
15 | See the [HW5 PDF](http://rail.eecs.berkeley.edu/deeprlcourse/static/homeworks/hw5b.pdf) for further instructions.
16 | 
17 | 
18 | Instructions for Running the Code
19 | 
20 | OS Requirement
21 |  * Ubuntu 16.04 LTS
22 | 
23 | Dependencies:
24 |  * Anaconda
25 |  
26 | All the specific python packages are listed in `environment.yml`
27 |  
28 | 
29 | Instructions
30 |  * Install all the environment dependencies by running `./project_setup.bash setup`
31 |  * Load the dependencies with `source project_setup.bash`
32 |  * Run `run_all.sh` to run all the experiments.
33 |  * Run `generate_plots.sh` to generate all the plots, plots are located in the plots directory.
34 | 


--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/logz.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/logz.cpython-35.pyc


--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/logz.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/logz.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/nn.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/nn.cpython-35.pyc


--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/nn.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/nn.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/sac.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/sac.cpython-35.pyc


--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/sac.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/sac.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/utils.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/utils.cpython-35.pyc


--------------------------------------------------------------------------------
/cs287hw5/sac/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/cs287hw5/sac/data/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mj-hwang/cs287-advanced-robotics/a6f6da0b3fd170ecf2eeb9f7e51f600686d22028/cs287hw5/sac/data/.DS_Store


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reinf_02-12-2019_16-46-48/1/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.1,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": false,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "Ant-v2",
14 |   "exp_name": "reinf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reinf_02-12-2019_16-46-48/11/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.1,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": false,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "Ant-v2",
14 |   "exp_name": "reinf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reinf_02-12-2019_16-46-48/21/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.1,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": false,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "Ant-v2",
14 |   "exp_name": "reinf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reparam_02-12-2019_16-47-03/1/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.1,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "Ant-v2",
14 |   "exp_name": "reparam",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reparam_02-12-2019_16-47-03/11/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.1,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "Ant-v2",
14 |   "exp_name": "reparam",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reparam_02-12-2019_16-47-03/21/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.1,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "Ant-v2",
14 |   "exp_name": "reparam",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reparam_2qf_02-12-2019_16-47-25/1/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.1,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": true
12 |   },
13 |   "env_name": "Ant-v2",
14 |   "exp_name": "reparam_2qf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reparam_2qf_02-12-2019_16-47-25/11/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.1,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": true
12 |   },
13 |   "env_name": "Ant-v2",
14 |   "exp_name": "reparam_2qf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Ant-v2_reparam_2qf_02-12-2019_16-47-25/21/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.1,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": true
12 |   },
13 |   "env_name": "Ant-v2",
14 |   "exp_name": "reparam_2qf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reinf_02-12-2019_11-48-53/1/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": false,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "HalfCheetah-v2",
14 |   "exp_name": "reinf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reinf_02-12-2019_11-48-53/11/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": false,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "HalfCheetah-v2",
14 |   "exp_name": "reinf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reinf_02-12-2019_11-48-53/21/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": false,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "HalfCheetah-v2",
14 |   "exp_name": "reinf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_02-12-2019_12-05-49/1/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "HalfCheetah-v2",
14 |   "exp_name": "reparam",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_02-12-2019_12-05-49/11/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "HalfCheetah-v2",
14 |   "exp_name": "reparam",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_02-12-2019_12-05-49/21/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "HalfCheetah-v2",
14 |   "exp_name": "reparam",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_2qf_02-12-2019_12-34-24/1/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": true
12 |   },
13 |   "env_name": "HalfCheetah-v2",
14 |   "exp_name": "reparam_2qf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_2qf_02-12-2019_12-34-24/11/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": true
12 |   },
13 |   "env_name": "HalfCheetah-v2",
14 |   "exp_name": "reparam_2qf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_HalfCheetah-v2_reparam_2qf_02-12-2019_12-34-24/21/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": true
12 |   },
13 |   "env_name": "HalfCheetah-v2",
14 |   "exp_name": "reparam_2qf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reinf_02-12-2019_21-02-20/1/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": false,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "Hopper-v2",
14 |   "exp_name": "reinf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reinf_02-12-2019_21-02-20/11/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": false,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "Hopper-v2",
14 |   "exp_name": "reinf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reinf_02-12-2019_21-02-20/21/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": false,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "Hopper-v2",
14 |   "exp_name": "reinf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reparam_02-12-2019_21-02-26/1/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "Hopper-v2",
14 |   "exp_name": "reparam",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reparam_02-12-2019_21-02-26/11/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "Hopper-v2",
14 |   "exp_name": "reparam",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reparam_02-12-2019_21-02-26/21/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": false
12 |   },
13 |   "env_name": "Hopper-v2",
14 |   "exp_name": "reparam",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reparam_2qf_02-12-2019_21-02-37/1/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": true
12 |   },
13 |   "env_name": "Hopper-v2",
14 |   "exp_name": "reparam_2qf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reparam_2qf_02-12-2019_21-02-37/11/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": true
12 |   },
13 |   "env_name": "Hopper-v2",
14 |   "exp_name": "reparam_2qf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/data/sac_Hopper-v2_reparam_2qf_02-12-2019_21-02-37/21/params.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "algorithm_params": {
 3 |     "alpha": 0.2,
 4 |     "batch_size": 256,
 5 |     "discount": 0.99,
 6 |     "epoch_length": 1000,
 7 |     "learning_rate": 0.001,
 8 |     "n_epochs": 500,
 9 |     "reparameterize": true,
10 |     "tau": 0.01,
11 |     "two_qf": true
12 |   },
13 |   "env_name": "Hopper-v2",
14 |   "exp_name": "reparam_2qf",
15 |   "policy_params": {
16 |     "hidden_layer_sizes": [
17 |       128,
18 |       128
19 |     ]
20 |   },
21 |   "q_function_params": {
22 |     "hidden_layer_sizes": [
23 |       128,
24 |       128
25 |     ]
26 |   },
27 |   "replay_pool_params": {
28 |     "max_size": 1000000.0
29 |   },
30 |   "sampler_params": {
31 |     "max_episode_length": 1000,
32 |     "prefill_steps": 1000
33 |   },
34 |   "value_function_params": {
35 |     "hidden_layer_sizes": [
36 |       128,
37 |       128
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/cs287hw5/sac/environment.yml:
--------------------------------------------------------------------------------
 1 | name: cs294drl_hw5_sac
 2 | # dependencies:
 3 | #     - python==3.4.5
 4 | #     - pip:
 5 | #         - gym==0.10.8
 6 | #         - numpy==1.15.2
 7 | #         - tensorflow-gpu==1.10.0
 8 | #         - tensorflow-probability==0.4.0
 9 | #         - mujoco-py==1.50.1.56
10 | #         - seaborn==0.9.0
11 | dependencies:
12 |     - python=3.5
13 |     - numpy=1.14.5
14 |     - pandas
15 |     - scipy
16 |     - matplotlib
17 |     - seaborn
18 |     - scikit-learn
19 |     - jupyter
20 |     - patchelf
21 |     - pip:
22 |         - Cython
23 |         - https://storage.googleapis.com/tensorflow/linux/gpu/tensorflow_gpu-1.13.1-cp35-cp35m-linux_x86_64.whl
24 |         - mujoco-py==1.50.1.56
25 |         - box2d==2.3.2
26 |         - opencv-python
27 |         - gym[atari]==0.10.5
28 |         - tensorflow-probability==0.6.0
29 | 


--------------------------------------------------------------------------------
/cs287hw5/sac/generate_plots.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | function filter_experiment_dirs {
 4 |    ls data | grep -e $1 | sed -e 's/^/data\//'
 5 | }
 6 | 
 7 | function filter_experiment_config {
 8 |    ls data | grep -e $1 | sed -e "s/$2.*//"
 9 | }
10 | 
11 | 
12 | mkdir -p plots
13 | 
14 | python myplot.py  \
15 |     --legend $(filter_experiment_config 'sac_HalfCheetah' '\d{2}-\d{2}-\d{4}') \
16 |     --title 'HalfCheetah SAC' \
17 |     --output plots/HalfCheetah_SAC.png \
18 |     $(filter_experiment_dirs 'sac_HalfCheetah')
19 |     
20 | python myplot.py  \
21 |     --legend $(filter_experiment_config 'sac_Ant' '\d{2}-\d{2}-\d{4}') \
22 |     --title 'Ant SAC' \
23 |     --output plots/Ant_SAC.png \
24 |     $(filter_experiment_dirs 'sac_Ant')
25 |     


--------------------------------------------------------------------------------
/cs287hw5/sac/project_setup.bash:
--------------------------------------------------------------------------------
 1 | # Project setup script
 2 | # Source this file to set up the environment for this project.
 3 | 
 4 | 
 5 | ENV_NAME='cs294drl_hw5_sac'
 6 | 
 7 | if [ "$1" = "setup" ]; then
 8 |     echo "Creating conda environment..."
 9 |     conda env create -f environment.yml
10 | elif [ "$1" = "remove" ]; then
11 |     conda remove --name $ENV_NAME --all --yes
12 | else
13 | 
14 |     export PROJECT_HOME="$(pwd)"
15 |     
16 |     alias ph="cd $PROJECT_HOME"
17 |     
18 |     
19 |     alias set_display="export DISPLAY=':0.0'"
20 |     alias unset_display="unset DISPLAY"
21 |     
22 |     export MPLBACKEND='Agg'
23 |     
24 |     source activate $ENV_NAME
25 |     
26 |     export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/home/young/.mujoco/mjpro150/bin"
27 | fi
28 | 


--------------------------------------------------------------------------------
/cs287hw5/sac/run_all.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | python train_mujoco.py --env_name HalfCheetah-v2 --exp_name reinf -e 3
 4 | 
 5 | python train_mujoco.py --env_name HalfCheetah-v2 --exp_name reparam -e 3 --reparameterize
 6 | 
 7 | python train_mujoco.py --env_name HalfCheetah-v2 --exp_name reparam_2qf -e 3 --reparameterize --two_qf
 8 | 
 9 | 
10 | python train_mujoco.py --env_name Ant-v2 --exp_name reinf -e 3
11 | 
12 | python train_mujoco.py --env_name Ant-v2 --exp_name reparam -e 3 --reparameterize
13 | 
14 | python train_mujoco.py --env_name Ant-v2 --exp_name reparam_2qf -e 3 --reparameterize --two_qf
15 | 
16 | 
17 | python train_mujoco.py --env_name Hopper-v2 --exp_name reinf -e 3
18 | 
19 | python train_mujoco.py --env_name Hopper-v2 --exp_name reparam -e 3 --reparameterize
20 | 
21 | python train_mujoco.py --env_name Hopper-v2 --exp_name reparam_2qf -e 3 --reparameterize --two_qf


--------------------------------------------------------------------------------