├── ARS_Carla ├── data │ └── old_logs │ │ ├── 2020-12-16_test │ │ ├── log.txt │ │ └── params.json │ │ ├── 2020-12-19_test │ │ ├── log.txt │ │ └── params.json │ │ ├── 2020-12-29_test │ │ ├── log.txt │ │ └── params.json │ │ ├── 2020-12-16_1250steps_part1 │ │ ├── lin_policy_plus.npz │ │ ├── params.json │ │ └── log.txt │ │ ├── 2020-12-16_1250steps_part2 │ │ ├── lin_policy_plus.npz │ │ ├── params.json │ │ └── log.txt │ │ ├── 2020-12-16_1250steps_part3 │ │ ├── lin_policy_plus.npz │ │ ├── params.json │ │ └── log.txt │ │ └── 2020-12-16_1250steps_part4 │ │ ├── lin_policy_plus.npz │ │ ├── params.json │ │ └── log.txt ├── code │ ├── __pycache__ │ │ ├── logz.cpython-37.pyc │ │ ├── filter.cpython-37.pyc │ │ ├── utils.cpython-37.pyc │ │ ├── policies.cpython-37.pyc │ │ ├── optimizers.cpython-37.pyc │ │ └── shared_noise.cpython-37.pyc │ ├── shared_noise.py │ ├── utils.py │ ├── optimizers.py │ ├── lqr_env.py │ ├── run_policy.py │ ├── policies.py │ ├── logz.py │ ├── filter.py │ └── ars_carla.py ├── trained_policies │ ├── Walker2d-v1 │ │ ├── gait1.npz │ │ ├── gait2.npz │ │ ├── gait3.npz │ │ ├── gait4.npz │ │ └── gait5_reward_11200.npz │ ├── Ant-v1 │ │ └── lin_policy_plus.npz │ ├── Hopper-v1 │ │ └── lin_policy_plus.npz │ ├── Swimmer-v1 │ │ └── lin_policy_plus.npz │ ├── HalfCheetah-v1 │ │ └── lin_policy_plus.npz │ └── Humanoid-v1 │ │ ├── spin │ │ ├── 8020 │ │ │ └── lin_policy_plus.npz │ │ ├── 9363 │ │ │ └── lin_policy_plus.npz │ │ └── lin_policy_plus.npz │ │ ├── halfrun │ │ └── lin_policy_plus.npz │ │ ├── jumping │ │ └── lin_policy_plus.npz │ │ ├── tiptoe │ │ └── lin_policy_plus.npz │ │ ├── arm_swing │ │ └── lin_policy_plus.npz │ │ ├── big_steps │ │ └── lin_policy_plus.npz │ │ ├── galloping │ │ └── lin_policy_plus.npz │ │ ├── average_one │ │ └── lin_policy_plus.npz │ │ ├── limp_arms_up │ │ └── lin_policy_plus.npz │ │ ├── humanoid_on_ice │ │ └── lin_policy_plus.npz │ │ ├── stiff_leg_hop │ │ └── lin_policy_plus.npz │ │ ├── almost_fall_over │ │ └── lin_policy_plus.npz │ │ ├── humanoid_kick_up │ │ └── lin_policy_plus.npz │ │ ├── hunched_sideways │ │ └── lin_policy_plus.npz │ │ ├── humanoid_grapevine │ │ └── lin_policy_plus.npz │ │ ├── humanoid_hop_one_leg │ │ └── lin_policy_plus.npz │ │ ├── humanoid_hunched_run │ │ └── lin_policy_plus.npz │ │ ├── policy_reward_11600 │ │ └── lin_policy_plus.npz │ │ ├── humanoid_hop_leg_kick │ │ └── lin_policy_plus.npz │ │ ├── humanoid_run_backwards │ │ └── lin_policy_plus.npz │ │ ├── humanoid_run_sideways │ │ └── lin_policy_plus.npz │ │ ├── humanoid_skip_one_leg │ │ └── lin_policy_plus.npz │ │ ├── humanoid_hop_one_leg_sideways │ │ └── lin_policy_plus.npz │ │ └── humanoid_hunched_skip_one_leg │ │ └── lin_policy_plus.npz └── LICENSE ├── images ├── WorkerIO.png ├── test_cam.png ├── training_results.png ├── ars_formula_explained.png └── bipedal_walker_results.png ├── videos └── BipedalWalker-v3 │ ├── openaigym.video.0.15084.video000064.mp4 │ ├── openaigym.video.0.15084.video003314.mp4 │ ├── openaigym.video.0.15084.video006564.mp4 │ ├── openaigym.video.0.15084.video009814.mp4 │ ├── openaigym.video.0.15084.video013064.mp4 │ ├── openaigym.video.0.15084.video016314.mp4 │ ├── openaigym.video.0.15084.video019564.mp4 │ ├── openaigym.video.0.15084.video022814.mp4 │ ├── openaigym.video.0.15084.video026064.mp4 │ ├── openaigym.video.0.15084.video029314.mp4 │ ├── openaigym.video.0.15084.video032564.mp4 │ ├── openaigym.video.0.15084.video035814.mp4 │ ├── openaigym.video.0.15084.video039064.mp4 │ ├── openaigym.video.0.15084.video042314.mp4 │ ├── openaigym.video.0.15084.video045564.mp4 │ ├── openaigym.video.0.15084.video048814.mp4 │ ├── openaigym.video.0.15084.video052064.mp4 │ ├── openaigym.video.0.15084.video055314.mp4 │ ├── openaigym.video.0.15084.video058564.mp4 │ ├── openaigym.video.0.15084.video061814.mp4 │ ├── openaigym.video.0.15084.video000064.meta.json │ ├── openaigym.video.0.15084.video003314.meta.json │ ├── openaigym.video.0.15084.video006564.meta.json │ ├── openaigym.video.0.15084.video009814.meta.json │ ├── openaigym.video.0.15084.video013064.meta.json │ ├── openaigym.video.0.15084.video016314.meta.json │ ├── openaigym.video.0.15084.video019564.meta.json │ ├── openaigym.video.0.15084.video022814.meta.json │ ├── openaigym.video.0.15084.video026064.meta.json │ ├── openaigym.video.0.15084.video029314.meta.json │ ├── openaigym.video.0.15084.video032564.meta.json │ ├── openaigym.video.0.15084.video035814.meta.json │ ├── openaigym.video.0.15084.video039064.meta.json │ ├── openaigym.video.0.15084.video042314.meta.json │ ├── openaigym.video.0.15084.video045564.meta.json │ ├── openaigym.video.0.15084.video048814.meta.json │ ├── openaigym.video.0.15084.video052064.meta.json │ ├── openaigym.video.0.15084.video055314.meta.json │ ├── openaigym.video.0.15084.video058564.meta.json │ ├── openaigym.video.0.15084.video061814.meta.json │ └── openaigym.manifest.0.15084.manifest.json ├── environment.yaml ├── .gitignore ├── 2_train_agent.md └── README.md /ARS_Carla/data/old_logs/2020-12-16_test/log.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-19_test/log.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-29_test/log.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /images/WorkerIO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/images/WorkerIO.png -------------------------------------------------------------------------------- /images/test_cam.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/images/test_cam.png -------------------------------------------------------------------------------- /images/training_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/images/training_results.png -------------------------------------------------------------------------------- /images/ars_formula_explained.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/images/ars_formula_explained.png -------------------------------------------------------------------------------- /images/bipedal_walker_results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/images/bipedal_walker_results.png -------------------------------------------------------------------------------- /ARS_Carla/code/__pycache__/logz.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/code/__pycache__/logz.cpython-37.pyc -------------------------------------------------------------------------------- /ARS_Carla/code/__pycache__/filter.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/code/__pycache__/filter.cpython-37.pyc -------------------------------------------------------------------------------- /ARS_Carla/code/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/code/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Walker2d-v1/gait1.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Walker2d-v1/gait1.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Walker2d-v1/gait2.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Walker2d-v1/gait2.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Walker2d-v1/gait3.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Walker2d-v1/gait3.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Walker2d-v1/gait4.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Walker2d-v1/gait4.npz -------------------------------------------------------------------------------- /ARS_Carla/code/__pycache__/policies.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/code/__pycache__/policies.cpython-37.pyc -------------------------------------------------------------------------------- /ARS_Carla/code/__pycache__/optimizers.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/code/__pycache__/optimizers.cpython-37.pyc -------------------------------------------------------------------------------- /ARS_Carla/code/__pycache__/shared_noise.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/code/__pycache__/shared_noise.cpython-37.pyc -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Ant-v1/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Ant-v1/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Hopper-v1/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Hopper-v1/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Swimmer-v1/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Swimmer-v1/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/HalfCheetah-v1/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/HalfCheetah-v1/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Walker2d-v1/gait5_reward_11200.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Walker2d-v1/gait5_reward_11200.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/spin/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/spin/lin_policy_plus.npz -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video000064.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video000064.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video003314.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video003314.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video006564.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video006564.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video009814.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video009814.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video013064.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video013064.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video016314.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video016314.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video019564.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video019564.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video022814.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video022814.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video026064.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video026064.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video029314.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video029314.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video032564.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video032564.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video035814.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video035814.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video039064.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video039064.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video042314.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video042314.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video045564.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video045564.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video048814.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video048814.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video052064.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video052064.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video055314.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video055314.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video058564.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video058564.mp4 -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video061814.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/videos/BipedalWalker-v3/openaigym.video.0.15084.video061814.mp4 -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/halfrun/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/halfrun/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/jumping/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/jumping/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/tiptoe/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/tiptoe/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/arm_swing/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/arm_swing/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/big_steps/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/big_steps/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/galloping/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/galloping/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/spin/8020/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/spin/8020/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/spin/9363/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/spin/9363/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-16_1250steps_part1/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/data/old_logs/2020-12-16_1250steps_part1/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-16_1250steps_part2/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/data/old_logs/2020-12-16_1250steps_part2/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-16_1250steps_part3/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/data/old_logs/2020-12-16_1250steps_part3/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-16_1250steps_part4/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/data/old_logs/2020-12-16_1250steps_part4/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/average_one/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/average_one/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/limp_arms_up/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/limp_arms_up/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/humanoid_on_ice/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/humanoid_on_ice/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/stiff_leg_hop/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/stiff_leg_hop/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/almost_fall_over/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/almost_fall_over/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/humanoid_kick_up/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/humanoid_kick_up/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/hunched_sideways/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/hunched_sideways/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/humanoid_grapevine/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/humanoid_grapevine/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/humanoid_hop_one_leg/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/humanoid_hop_one_leg/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/humanoid_hunched_run/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/humanoid_hunched_run/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/policy_reward_11600/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/policy_reward_11600/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/humanoid_hop_leg_kick/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/humanoid_hop_leg_kick/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/humanoid_run_backwards/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/humanoid_run_backwards/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/humanoid_run_sideways/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/humanoid_run_sideways/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/humanoid_skip_one_leg/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/humanoid_skip_one_leg/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/humanoid_hop_one_leg_sideways/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/humanoid_hop_one_leg_sideways/lin_policy_plus.npz -------------------------------------------------------------------------------- /ARS_Carla/trained_policies/Humanoid-v1/humanoid_hunched_skip_one_leg/lin_policy_plus.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FoamoftheSea/mod6project/HEAD/ARS_Carla/trained_policies/Humanoid-v1/humanoid_hunched_skip_one_leg/lin_policy_plus.npz -------------------------------------------------------------------------------- /environment.yaml: -------------------------------------------------------------------------------- 1 | name: ars_carla 2 | 3 | dependencies: 4 | - python=3.7 5 | - pip=19.2.3 6 | - pip: 7 | - opencv-python==4.5.1.48 8 | - tensorflow==2.4.1 9 | - ray==1.11 10 | - protobuf==3.20.0 11 | - carla==0.9.13 12 | - notebook==6.5.5 13 | - matplotlib==3.3.4 -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-16_1250steps_part1/params.json: -------------------------------------------------------------------------------- 1 | {"delta_std" : 0.03, 2 | "deltas_used" : 16, 3 | "dir_path" : ".\\data\\old_logs\\2020-12-16_1000steps", 4 | "env_name" : "CarEnv", 5 | "filter" : "MeanStdFilter", 6 | "learning_rate" : 0.02, 7 | "lr_decay" : 0.001, 8 | "n_iter" : 1000, 9 | "n_workers" : 4, 10 | "num_deltas" : 32, 11 | "policy_file" : "", 12 | "policy_type" : "linear", 13 | "rollout_length" : 1000, 14 | "seconds_per_episode" : 15, 15 | "seed" : 42, 16 | "shift" : 0, 17 | "show_cam" : 0, 18 | "state_filter" : false, 19 | "std_decay" : 0.001} -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-19_test/params.json: -------------------------------------------------------------------------------- 1 | {"delta_std" : 0.05, 2 | "deltas_used" : 16, 3 | "dir_path" : ".\\data\\old_logs\\2020-12-19_test", 4 | "enable_gpu" : false, 5 | "env_name" : "CarEnv", 6 | "eval_rollouts" : null, 7 | "filter" : "MeanStdFilter", 8 | "learning_rate" : 0.005, 9 | "log_every" : 10, 10 | "lr_decay" : 0.001, 11 | "n_iter" : 10, 12 | "n_workers" : 4, 13 | "num_deltas" : 32, 14 | "policy_file" : "", 15 | "policy_type" : "linear", 16 | "rollout_length" : 1000, 17 | "seconds_per_episode" : 15, 18 | "seed" : 237, 19 | "shift" : 0, 20 | "show_cam" : 2, 21 | "state_filter" : false, 22 | "std_decay" : 0.001} -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-16_1250steps_part2/params.json: -------------------------------------------------------------------------------- 1 | {"delta_std" : 0.026873544987117377, 2 | "deltas_used" : 16, 3 | "dir_path" : ".\\data\\old_logs\\2020-12-16_1000steps_part2", 4 | "env_name" : "CarEnv", 5 | "filter" : "MeanStdFilter", 6 | "learning_rate" : 0.017915696658078243, 7 | "lr_decay" : 0.001, 8 | "n_iter" : 1000, 9 | "n_workers" : 4, 10 | "num_deltas" : 32, 11 | "policy_file" : ".\\data\\old_logs\\2020-12-16_1000steps\\lin_policy_plus.npz", 12 | "policy_type" : "linear", 13 | "rollout_length" : 1000, 14 | "seconds_per_episode" : 15, 15 | "seed" : 69, 16 | "shift" : 0, 17 | "show_cam" : 0, 18 | "state_filter" : false, 19 | "std_decay" : 0.001} -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-16_1250steps_part3/params.json: -------------------------------------------------------------------------------- 1 | {"delta_std" : 0.0187456593611607, 2 | "deltas_used" : 16, 3 | "dir_path" : ".\\data\\old_logs\\2020-12-16_1000steps_part3", 4 | "env_name" : "CarEnv", 5 | "filter" : "MeanStdFilter", 6 | "learning_rate" : 0.012497106240773818, 7 | "lr_decay" : 0.001, 8 | "n_iter" : 1000, 9 | "n_workers" : 4, 10 | "num_deltas" : 32, 11 | "policy_file" : ".\\data\\old_logs\\2020-12-16_1000steps_part2\\lin_policy_plus.npz", 12 | "policy_type" : "linear", 13 | "rollout_length" : 1000, 14 | "seconds_per_episode" : 15, 15 | "seed" : 7, 16 | "shift" : 0, 17 | "show_cam" : 0, 18 | "state_filter" : false, 19 | "std_decay" : 0.001} -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-16_1250steps_part4/params.json: -------------------------------------------------------------------------------- 1 | {"delta_std" : 0.015042088192787672, 2 | "deltas_used" : 16, 3 | "dir_path" : ".\\data\\old_logs\\2020-12-16_1000steps_part4", 4 | "env_name" : "CarEnv", 5 | "filter" : "MeanStdFilter", 6 | "learning_rate" : 0.010028058795191787, 7 | "lr_decay" : 0.001, 8 | "n_iter" : 1000, 9 | "n_workers" : 4, 10 | "num_deltas" : 32, 11 | "policy_file" : ".\\data\\old_logs\\2020-12-16_1000steps_part3\\lin_policy_plus.npz", 12 | "policy_type" : "linear", 13 | "rollout_length" : 1000, 14 | "seconds_per_episode" : 15, 15 | "seed" : 99, 16 | "shift" : 0, 17 | "show_cam" : 0, 18 | "state_filter" : false, 19 | "std_decay" : 0.001} -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-29_test/params.json: -------------------------------------------------------------------------------- 1 | {"delta_std" : 0.02, 2 | "deltas_used" : 16, 3 | "dir_path" : ".\\data\\old_logs\\2020-12-29_test", 4 | "enable_gpu" : false, 5 | "env_name" : "CarEnv", 6 | "eval_rollouts" : null, 7 | "filter" : "MeanStdFilter", 8 | "learning_rate" : 0.01, 9 | "log_every" : 10, 10 | "lr_decay" : 0.001, 11 | "n_iter" : 10, 12 | "n_workers" : 4, 13 | "num_deltas" : 32, 14 | "policy_file" : ".\\data\\old_logs\\2020-12-16_1250steps_part4\\lin_policy_plus.npz", 15 | "policy_type" : "linear", 16 | "rollout_length" : 1000, 17 | "seconds_per_episode" : 15, 18 | "seed" : 42, 19 | "shift" : 0, 20 | "show_cam" : 1, 21 | "state_filter" : false, 22 | "std_decay" : 0.001} -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-16_test/params.json: -------------------------------------------------------------------------------- 1 | {"delta_std" : 0.015042088192787672, 2 | "deltas_used" : 16, 3 | "dir_path" : ".\\data\\old_logs\\2020-12-16_test", 4 | "env_name" : "CarEnv", 5 | "eval_rollouts" : null, 6 | "filter" : "MeanStdFilter", 7 | "learning_rate" : 0.010028058795191787, 8 | "log_every" : 10, 9 | "lr_decay" : 0.001, 10 | "n_iter" : 1000, 11 | "n_workers" : 4, 12 | "num_deltas" : 32, 13 | "policy_file" : ".\\data\\old_logs\\2020-12-16_1250steps_part4\\lin_policy_plus.npz", 14 | "policy_type" : "linear", 15 | "rollout_length" : 1000, 16 | "seconds_per_episode" : 15, 17 | "seed" : 99, 18 | "shift" : 0, 19 | "show_cam" : 0, 20 | "state_filter" : false, 21 | "std_decay" : 0.001} -------------------------------------------------------------------------------- /ARS_Carla/code/shared_noise.py: -------------------------------------------------------------------------------- 1 | # Code in this file is copied and adapted from 2 | # https://github.com/ray-project/ray/tree/master/python/ray/rllib/es 3 | 4 | import ray 5 | import numpy as np 6 | 7 | @ray.remote 8 | def create_shared_noise(): 9 | """ 10 | Create a large array of noise to be shared by all workers. Used 11 | for avoiding the communication of the random perturbations delta. 12 | """ 13 | 14 | seed = 12345 15 | count = 250000000 16 | noise = np.random.RandomState(seed).randn(count).astype(np.float64) 17 | return noise 18 | 19 | 20 | class SharedNoiseTable(object): 21 | def __init__(self, noise, seed = 11): 22 | 23 | self.rg = np.random.RandomState(seed) 24 | self.noise = noise 25 | assert self.noise.dtype == np.float64 26 | 27 | def get(self, i, dim): 28 | return self.noise[i:i + dim] 29 | 30 | def sample_index(self, dim): 31 | return self.rg.randint(0, len(self.noise) - dim + 1) 32 | 33 | def get_delta(self, dim): 34 | idx = self.sample_index(dim) 35 | return idx, self.get(idx, dim) 36 | -------------------------------------------------------------------------------- /ARS_Carla/code/utils.py: -------------------------------------------------------------------------------- 1 | # Code in this file is copied and adapted from 2 | # https://github.com/openai/evolution-strategies-starter. 3 | 4 | import numpy as np 5 | 6 | def itergroups(items, group_size): 7 | assert group_size >= 1 8 | group = [] 9 | for x in items: 10 | group.append(x) 11 | if len(group) == group_size: 12 | yield tuple(group) 13 | del group[:] 14 | if group: 15 | yield tuple(group) 16 | 17 | 18 | # A little confusing because weights will actually be the pos/neg rollout reward diffs 19 | # and vecs will be the deltas used in those rollouts. Math is performed in a 20 | # different order but all comes out the same as that in the research notebook. 21 | def batched_weighted_sum(weights, vecs, batch_size): 22 | total = 0 23 | num_items_summed = 0 24 | for batch_weights, batch_vecs in zip(itergroups(weights, batch_size), 25 | itergroups(vecs, batch_size)): 26 | assert len(batch_weights) == len(batch_vecs) <= batch_size 27 | total += np.dot(np.asarray(batch_weights, dtype=np.float64), 28 | np.asarray(batch_vecs, dtype=np.float64)) 29 | num_items_summed += len(batch_weights) 30 | return total, num_items_summed 31 | -------------------------------------------------------------------------------- /ARS_Carla/code/optimizers.py: -------------------------------------------------------------------------------- 1 | # Code in this file is copied and adapted from 2 | # https://github.com/openai/evolution-strategies-starter. 3 | 4 | # This code has been modified by Nate Cibik to include functionality for 5 | # learning rate decay parameter 6 | 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | from __future__ import print_function 10 | 11 | import numpy as np 12 | 13 | # OPTIMIZERS FOR MINIMIZING OBJECTIVES 14 | class Optimizer(object): 15 | def __init__(self, w_policy): 16 | self.w_policy = w_policy.flatten() 17 | self.dim = w_policy.size 18 | self.t = 0 19 | 20 | def update(self, globalg): 21 | self.t += 1 22 | step = self._compute_step(globalg) 23 | ratio = np.linalg.norm(step) / (np.linalg.norm(self.w_policy) + 1e-5) 24 | return self.w_policy + step, ratio 25 | 26 | def _compute_step(self, globalg): 27 | raise NotImplementedError 28 | 29 | 30 | class SGD(Optimizer): 31 | def __init__(self, pi, learning_rate, lr_decay): 32 | Optimizer.__init__(self, pi) 33 | self.learning_rate = learning_rate 34 | self.lr_decay = lr_decay 35 | 36 | def _compute_step(self, globalg): 37 | step = -self.learning_rate * globalg 38 | self.learning_rate *= (1 - self.lr_decay) 39 | if self.lr_decay != 0: 40 | print('New learning rate:', self.learning_rate) 41 | return step 42 | 43 | -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-16_1250steps_part1/log.txt: -------------------------------------------------------------------------------- 1 | Time Iteration AverageReward StdRewards MaxRewardRollout MinRewardRollout Timesteps LearningRate DeltaStd 2 | 3381.0791521072388 10 19.28237442286214 9.546175867960368 33.676427282365225 -10.56852453051878 11867 0.019800897604194966 0.029701346406292437 3 | 6717.330669403076 20 22.08048349218211 11.244721333244387 34.43528453578401 -9.552599772564879 22424 0.019603777296590692 0.029405665944886035 4 | 10074.364431619644 30 20.472663898034714 13.626943702736401 33.43520748067809 -16.601752899522793 33114 0.01940861934526171 0.02911292901789256 5 | 13333.280536651611 40 16.987832227765452 14.80887115639854 33.21991038030705 -28.78893820534381 43594 0.01921540421471623 0.028823106322074347 6 | 16649.656021118164 50 17.41766058583616 13.009298648402435 32.019466723474714 -12.20413561799838 54399 0.01902411256394062 0.028536168845910936 7 | 19959.368383169174 60 19.4123030967097 13.411557302263784 34.08040796013981 -11.364055146142794 65020 0.018834725244463356 0.02825208786669504 8 | 23260.872073173523 70 10.413525475915694 14.242396792354205 33.28279046739336 -14.11806503272824 75585 0.018647223298438244 0.02797083494765737 9 | 26586.20476746559 80 14.569677009983613 16.878163554048122 33.91732720910303 -22.87307961259974 86302 0.018461587956746718 0.02769238193512008 10 | 29947.703352451324 90 12.775089540062325 16.16040436945871 35.21772599349047 -17.507052095140775 96965 0.01827780063711903 0.02741670095567855 11 | 33462.91309285164 100 4.182398327277619 11.748881438600181 32.95824700405847 -10.590414161727965 107613 0.01809584294227417 0.027143764413411257 12 | 37044.325439453125 110 0.8564147148762196 5.296329377424257 11.572963490828817 -10.42852229472967 117976 0.017915696658078243 0.026873544987117377 13 | -------------------------------------------------------------------------------- /ARS_Carla/code/lqr_env.py: -------------------------------------------------------------------------------- 1 | import gym 2 | from gym import spaces 3 | from gym.utils import seeding 4 | import numpy as np 5 | from os import path 6 | 7 | class LQR_Env(gym.Env): 8 | 9 | def __init__(self): 10 | 11 | self.viewer = None 12 | 13 | self.A = np.array([[1.01, 0.01, 0.0],[0.01, 1.01, 0.01], [0., 0.01, 1.01]]) 14 | self.B = np.eye(3) 15 | 16 | self.d, self.p = self.B.shape 17 | 18 | self.R = np.eye(self.p) 19 | self.Q = np.eye(self.d) / 1000 20 | 21 | self.time = 0 22 | 23 | self.action_space = spaces.Box(low=-1e+8, high=1e+8, shape=(self.p,)) 24 | self.observation_space = spaces.Box(low=-float('inf'), high=float('inf'), shape=(self.d, )) 25 | 26 | self.state = np.random.normal(0,1,size = self.d) 27 | 28 | self._seed() 29 | 30 | 31 | def _seed(self, seed=None): 32 | self.np_random, seed = seeding.np_random(seed) 33 | return [seed] 34 | 35 | def _step(self,u): 36 | 37 | x = self.state 38 | 39 | cost = np.dot(x, np.dot(self.Q, x)) + np.dot(u, np.dot(self.R, u)) 40 | new_x = np.dot(self.A, x) + np.dot(self.B, u) + self.np_random.normal(0,1,size = self.d) 41 | 42 | self.state = new_x 43 | 44 | terminated = False 45 | if self.time > 300: 46 | terminated = True 47 | 48 | self.time += 1 49 | 50 | return self._get_obs(), - cost, terminated, {} 51 | 52 | def _reset(self): 53 | self.state = self.np_random.normal(0, 1, size = self.d) 54 | self.last_u = None 55 | self.time = 0 56 | 57 | return self._get_obs() 58 | 59 | def _get_obs(self): 60 | return self.state 61 | 62 | def get_params(self): 63 | return self.A, self.B, self.Q, self.R 64 | -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video000064.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 64, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video000064.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video003314.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 3314, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video003314.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video006564.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 6564, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video006564.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video009814.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 9814, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video009814.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video013064.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 13064, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video013064.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video016314.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 16314, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video016314.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video019564.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 19564, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video019564.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video022814.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 22814, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video022814.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video026064.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 26064, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video026064.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video029314.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 29314, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video029314.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video032564.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 32564, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video032564.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video035814.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 35814, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video035814.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video039064.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 39064, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video039064.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video042314.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 42314, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video042314.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video045564.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 45564, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video045564.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video048814.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 48814, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video048814.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video052064.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 52064, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video052064.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video055314.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 55314, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video055314.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video058564.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 58564, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video058564.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.video.0.15084.video061814.meta.json: -------------------------------------------------------------------------------- 1 | {"episode_id": 61814, "content_type": "video/mp4", "encoder_version": {"backend": "ffmpeg", "version": "b'ffmpeg version 4.3.1 Copyright (c) 2000-2020 the FFmpeg developers\\r\\nbuilt with gcc 10.2.1 (GCC) 20200726\\r\\nconfiguration: --disable-static --enable-shared --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libsrt --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvmaf --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libgsm --enable-librav1e --disable-w32threads --enable-libmfx --enable-ffnvcodec --enable-cuda-llvm --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf\\r\\nlibavutil 56. 51.100 / 56. 51.100\\r\\nlibavcodec 58. 91.100 / 58. 91.100\\r\\nlibavformat 58. 45.100 / 58. 45.100\\r\\nlibavdevice 58. 10.100 / 58. 10.100\\r\\nlibavfilter 7. 85.100 / 7. 85.100\\r\\nlibswscale 5. 7.100 / 5. 7.100\\r\\nlibswresample 3. 7.100 / 3. 7.100\\r\\nlibpostproc 55. 7.100 / 55. 7.100\\r\\n'", "cmdline": ["ffmpeg", "-nostats", "-loglevel", "error", "-y", "-f", "rawvideo", "-s:v", "600x400", "-pix_fmt", "rgb24", "-framerate", "50", "-i", "-", "-vf", "scale=trunc(iw/2)*2:trunc(ih/2)*2", "-vcodec", "libx264", "-pix_fmt", "yuv420p", "-r", "50", "C:\\Users\\indez\\Documents\\Flatiron\\mod6project\\videos\\BipedalWalker-v3\\openaigym.video.0.15084.video061814.mp4"]}} -------------------------------------------------------------------------------- /videos/BipedalWalker-v3/openaigym.manifest.0.15084.manifest.json: -------------------------------------------------------------------------------- 1 | {"stats": "openaigym.episode_batch.0.15084.stats.json", "videos": [["openaigym.video.0.15084.video000064.mp4", "openaigym.video.0.15084.video000064.meta.json"], ["openaigym.video.0.15084.video003314.mp4", "openaigym.video.0.15084.video003314.meta.json"], ["openaigym.video.0.15084.video006564.mp4", "openaigym.video.0.15084.video006564.meta.json"], ["openaigym.video.0.15084.video009814.mp4", "openaigym.video.0.15084.video009814.meta.json"], ["openaigym.video.0.15084.video013064.mp4", "openaigym.video.0.15084.video013064.meta.json"], ["openaigym.video.0.15084.video016314.mp4", "openaigym.video.0.15084.video016314.meta.json"], ["openaigym.video.0.15084.video019564.mp4", "openaigym.video.0.15084.video019564.meta.json"], ["openaigym.video.0.15084.video022814.mp4", "openaigym.video.0.15084.video022814.meta.json"], ["openaigym.video.0.15084.video026064.mp4", "openaigym.video.0.15084.video026064.meta.json"], ["openaigym.video.0.15084.video029314.mp4", "openaigym.video.0.15084.video029314.meta.json"], ["openaigym.video.0.15084.video032564.mp4", "openaigym.video.0.15084.video032564.meta.json"], ["openaigym.video.0.15084.video035814.mp4", "openaigym.video.0.15084.video035814.meta.json"], ["openaigym.video.0.15084.video039064.mp4", "openaigym.video.0.15084.video039064.meta.json"], ["openaigym.video.0.15084.video042314.mp4", "openaigym.video.0.15084.video042314.meta.json"], ["openaigym.video.0.15084.video045564.mp4", "openaigym.video.0.15084.video045564.meta.json"], ["openaigym.video.0.15084.video048814.mp4", "openaigym.video.0.15084.video048814.meta.json"], ["openaigym.video.0.15084.video052064.mp4", "openaigym.video.0.15084.video052064.meta.json"], ["openaigym.video.0.15084.video055314.mp4", "openaigym.video.0.15084.video055314.meta.json"], ["openaigym.video.0.15084.video058564.mp4", "openaigym.video.0.15084.video058564.meta.json"], ["openaigym.video.0.15084.video061814.mp4", "openaigym.video.0.15084.video061814.meta.json"]], "env_info": {"gym_version": "0.17.3", "env_id": "BipedalWalker-v3"}} -------------------------------------------------------------------------------- /ARS_Carla/code/run_policy.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Code to load a policy and generate rollout data. Adapted from https://github.com/berkeleydeeprlcourse. 4 | Example usage: 5 | python run_policy.py ../trained_policies/Humanoid-v1/policy_reward_11600/lin_policy_plus.npz Humanoid-v1 --render \ 6 | --num_rollouts 20 7 | """ 8 | import numpy as np 9 | import gym 10 | 11 | def main(): 12 | import argparse 13 | parser = argparse.ArgumentParser() 14 | parser.add_argument('expert_policy_file', type=str) 15 | parser.add_argument('envname', type=str) 16 | parser.add_argument('--render', action='store_true') 17 | parser.add_argument('--num_rollouts', type=int, default=20, 18 | help='Number of expert rollouts') 19 | args = parser.parse_args() 20 | 21 | print('loading and building expert policy') 22 | lin_policy = np.load(args.expert_policy_file) 23 | lin_policy = lin_policy.items()[0][1] 24 | 25 | M = lin_policy[0] 26 | # mean and std of state vectors estimated online by ARS. 27 | mean = lin_policy[1] 28 | std = lin_policy[2] 29 | 30 | env = gym.make(args.envname) 31 | 32 | returns = [] 33 | observations = [] 34 | actions = [] 35 | for i in range(args.num_rollouts): 36 | print('iter', i) 37 | obs = env.reset() 38 | done = False 39 | totalr = 0. 40 | steps = 0 41 | while not done: 42 | action = np.dot(M, (obs - mean)/std) 43 | observations.append(obs) 44 | actions.append(action) 45 | 46 | 47 | obs, r, done, _ = env.step(action) 48 | totalr += r 49 | steps += 1 50 | if args.render: 51 | env.render() 52 | if steps % 100 == 0: print("%i/%i"%(steps, env.spec.timestep_limit)) 53 | if steps >= env.spec.timestep_limit: 54 | break 55 | returns.append(totalr) 56 | 57 | print('returns', returns) 58 | print('mean return', np.mean(returns)) 59 | print('std of return', np.std(returns)) 60 | 61 | if __name__ == '__main__': 62 | main() 63 | -------------------------------------------------------------------------------- /ARS_Carla/code/policies.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Policy class for computing action from weights and observation vector. 3 | Horia Mania --- hmania@berkeley.edu 4 | Aurelia Guy 5 | Benjamin Recht 6 | 7 | Modified by Nate Cibik to work with Carla application 8 | ''' 9 | 10 | 11 | import numpy as np 12 | from filter import get_filter 13 | 14 | class Policy(object): 15 | 16 | def __init__(self, policy_params): 17 | 18 | self.ob_dim = policy_params['ob_dim'] 19 | self.ac_dim = policy_params['ac_dim'] 20 | self.weights = np.zeros((self.ac_dim, self.ob_dim), dtype=np.float64) 21 | #self.weights = np.zeros(self.ac_dim) 22 | 23 | # a filter for updating statistics of the observations and normalizing inputs to the policies 24 | self.observation_filter = get_filter(policy_params['ob_filter'], 25 | shape=(self.ob_dim,), 26 | mean=policy_params['initial_mean'], 27 | std=policy_params['initial_std']) 28 | #self.observation_filter = None 29 | self.update_filter = True 30 | 31 | def update_weights(self, new_weights): 32 | self.weights = new_weights 33 | return 34 | 35 | def get_weights(self): 36 | return self.weights 37 | 38 | def get_observation_filter(self): 39 | return self.observation_filter 40 | 41 | def act(self, state): 42 | raise NotImplementedError 43 | 44 | def copy(self): 45 | raise NotImplementedError 46 | 47 | class LinearPolicy(Policy): 48 | """ 49 | Linear policy class that computes action as . 50 | """ 51 | 52 | def __init__(self, policy_params, initial_weights=None): 53 | Policy.__init__(self, policy_params) 54 | if initial_weights is None: 55 | self.weights = np.zeros((self.ac_dim, self.ob_dim), dtype=np.float64) 56 | else: 57 | self.weights = policy_params['initial_weights'] 58 | 59 | def act(self, state, state_filter=True): 60 | if filter: 61 | state = self.observation_filter(state, update=self.update_filter) 62 | return np.dot(self.weights, state) 63 | 64 | def get_weights_plus_stats(self): 65 | 66 | mu, std = self.observation_filter.get_stats() 67 | aux = np.asarray([self.weights, mu, std]) 68 | return aux 69 | 70 | -------------------------------------------------------------------------------- /ARS_Carla/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018, ARS contributors (Horia Mania, Aurelia Guy, Benjamin Recht) 2 | All rights reserved. 3 | 4 | Redistribution and use of ARS in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation and/or 12 | other materials provided with the distribution. 13 | 14 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 15 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 18 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 20 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 21 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 23 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 | 25 | -------------------------------------------------------------------------------- 26 | 27 | Code in code/{utils, optimizers} adapted from 28 | https://github.com/openai (MIT License) 29 | Copyright (c) 2016 OpenAI (http://openai.com) 30 | 31 | Code in code/{logz, run_policy} adapted from 32 | https://github.com/berkeleydeeprlcourse (MIT License) 33 | Copyright (c) 2017 berkeleydeeprlcourse 34 | 35 | Permission is hereby granted, free of charge, to any person obtaining a copy 36 | of this software and associated documentation files (the "Software"), to deal 37 | in the Software without restriction, including without limitation the rights 38 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 39 | copies of the Software, and to permit persons to whom the Software is 40 | furnished to do so, subject to the following conditions: 41 | 42 | The above copyright notice and this permission notice shall be included in 43 | all copies or substantial portions of the Software. 44 | 45 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 46 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 47 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 48 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 49 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 50 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 51 | THE SOFTWARE. 52 | -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-16_1250steps_part3/log.txt: -------------------------------------------------------------------------------- 1 | Time Iteration AverageReward StdRewards MaxRewardRollout MinRewardRollout Timesteps LearningRate DeltaStd 2 | 3480.0037546157837 10 19.102696369240327 14.022763644276939 33.700131487494204 -17.130193928741246 9775 0.012372696051115416 0.018559044076673088 3 | 6906.172424316406 20 26.53319621516443 8.568051039328772 33.86958739743582 1.775128108453414 19387 0.012249524379798187 0.018374286569697244 4 | 10371.688838481903 30 27.892307841196505 6.258869807151486 34.93595498990655 8.993525985496033 29272 0.012127578897223686 0.01819136834583549 5 | 13717.378996133804 40 19.21948601944507 14.14186912191187 34.37711380861537 -10.093574025288365 38633 0.012006847396536093 0.018010271094804107 6 | 17184.14987874031 50 26.913970078334728 5.685966312538745 32.24423565152339 8.36125628287386 48511 0.011887317792400303 0.017830976688600425 7 | 20596.109083890915 60 25.63265951357887 7.5797434449044525 32.22352212834502 6.078081428965852 58254 0.011768978119792165 0.017653467179688218 8 | 24016.045884370804 70 17.846356259417334 12.049798417112328 32.43864514671001 -3.106972347720955 68153 0.011651816532800783 0.017477724799201146 9 | 27461.379663705826 80 17.952585521605005 14.396555035792622 33.41806322573282 -11.386459595606834 78056 0.011535821303442717 0.017303731955164044 10 | 30862.932608366013 90 18.65026948049721 13.589464438435158 33.59735541131505 -17.289644560402525 87650 0.011420980820488008 0.01713147123073198 11 | 34246.897911548615 100 19.720413446513426 13.712764171891983 33.093936184930655 -12.619431053772791 97391 0.011307283588297882 0.01696092538244679 12 | 37678.76226115227 110 1.4897755087495164 7.279711563918499 10.804064949079123 -12.940242398505097 107300 0.011194718225674027 0.016792077338511013 13 | 41040.97807478905 120 13.150748514469136 16.994612140246947 37.188559445973496 -18.527450082969068 117365 0.011083273464719327 0.016624910197078964 14 | 44641.54324054718 130 8.068482085654662 6.284312431500895 26.914036688993995 -4.7277654370790945 127998 0.010972938149709927 0.016459407224564866 15 | 48170.474724292755 140 7.633418927881386 8.162971387666692 23.31760397967585 -7.78524590068302 138798 0.01086370123597854 0.016295551853967784 16 | 51690.232620716095 150 6.701077516320288 7.702630203027482 30.144020466100216 -11.720926324660145 149560 0.010755551788808862 0.016133327683213274 17 | 55279.75283575058 160 1.93254371368034 3.560583798232062 9.374142484586741 -5.339646405009849 160645 0.010648478982341014 0.015972718473511508 18 | 58821.89023017883 170 3.9883493004437796 5.546563576361692 13.055341097694823 -4.906417647493871 171555 0.01054247209848783 0.015813708147731732 19 | 62360.430225133896 180 1.472141757761697 4.6657478794688005 8.570419465348042 -8.591495707921396 182480 0.010437520525861997 0.01565628078879298 20 | 65898.061825037 190 6.16755192164314 8.574713209337451 39.4713170423322 -9.149058923473198 193303 0.010333613758713828 0.015500420638070727 21 | 69196.91781783104 200 9.639319309865293 15.781376345802357 33.92642034177644 -12.46431435521904 202869 0.010230741395879636 0.015346112093819439 22 | 72556.2358353138 210 12.330361976313483 16.47869652280626 34.60617433642707 -17.46888148391768 212546 0.010128893139740566 0.015193339709610836 23 | 76064.42847561836 220 3.031586726679293 4.810987541439182 10.335264308013159 -6.288815109500139 223142 0.010028058795191787 0.015042088192787672 24 | -------------------------------------------------------------------------------- /ARS_Carla/code/logz.py: -------------------------------------------------------------------------------- 1 | # Code in this file is copied and adapted from 2 | # https://github.com/berkeleydeeprlcourse 3 | 4 | import json 5 | 6 | """ 7 | 8 | Some simple logging functionality, inspired by rllab's logging. 9 | Assumes that each diagnostic gets logged each iteration 10 | 11 | Call logz.configure_output_dir() to start logging to a 12 | tab-separated-values file (some_folder_name/log.txt) 13 | 14 | """ 15 | 16 | import os.path as osp, shutil, time, atexit, os, subprocess 17 | 18 | color2num = dict( 19 | gray=30, 20 | red=31, 21 | green=32, 22 | yellow=33, 23 | blue=34, 24 | magenta=35, 25 | cyan=36, 26 | white=37, 27 | crimson=38 28 | ) 29 | 30 | def colorize(string, color, bold=False, highlight=False): 31 | attr = [] 32 | num = color2num[color] 33 | if highlight: num += 10 34 | attr.append(str(num)) 35 | if bold: attr.append('1') 36 | return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string) 37 | 38 | class G(object): 39 | output_dir = None 40 | output_file = None 41 | first_row = True 42 | log_headers = [] 43 | log_current_row = {} 44 | 45 | def configure_output_dir(d=None): 46 | """ 47 | Set output directory to d, or to /tmp/somerandomnumber if d is None 48 | """ 49 | G.first_row = True 50 | G.log_headers = [] 51 | G.log_current_row = {} 52 | 53 | G.output_dir = d or "/tmp/experiments/%i"%int(time.time()) 54 | if not osp.exists(G.output_dir): 55 | os.makedirs(G.output_dir) 56 | G.output_file = open(osp.join(G.output_dir, "log.txt"), 'w') 57 | atexit.register(G.output_file.close) 58 | print(colorize("Logging data to %s"%G.output_file.name, 'green', bold=True)) 59 | 60 | def log_tabular(key, val): 61 | """ 62 | Log a value of some diagnostic 63 | Call this once for each diagnostic quantity, each iteration 64 | """ 65 | if G.first_row: 66 | G.log_headers.append(key) 67 | else: 68 | assert key in G.log_headers, "Trying to introduce a new key %s that you didn't include in the first iteration"%key 69 | assert key not in G.log_current_row, "You already set %s this iteration. Maybe you forgot to call dump_tabular()"%key 70 | G.log_current_row[key] = val 71 | 72 | 73 | def save_params(params): 74 | with open(osp.join(G.output_dir, "params.json"), 'w') as out: 75 | out.write(json.dumps(params, separators=(',\n','\t:\t'), sort_keys=True)) 76 | 77 | 78 | def dump_tabular(): 79 | """ 80 | Write all of the diagnostics from the current iteration 81 | """ 82 | vals = [] 83 | key_lens = [len(key) for key in G.log_headers] 84 | max_key_len = max(15,max(key_lens)) 85 | keystr = '%'+'%d'%max_key_len 86 | fmt = "| " + keystr + "s | %15s |" 87 | n_slashes = 22 + max_key_len 88 | print("-"*n_slashes) 89 | for key in G.log_headers: 90 | val = G.log_current_row.get(key, "") 91 | if hasattr(val, "__float__"): valstr = "%8.3g"%val 92 | else: valstr = val 93 | print(fmt%(key, valstr)) 94 | vals.append(val) 95 | print("-"*n_slashes) 96 | if G.output_file is not None: 97 | if G.first_row: 98 | G.output_file.write("\t".join(G.log_headers)) 99 | G.output_file.write("\n") 100 | G.output_file.write("\t".join(map(str,vals))) 101 | G.output_file.write("\n") 102 | G.output_file.flush() 103 | G.log_current_row.clear() 104 | G.first_row=False 105 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | .idea/ 161 | -------------------------------------------------------------------------------- /2_train_agent.md: -------------------------------------------------------------------------------- 1 | # Training the Agent 2 | Instructions for running the parallelized agent training process. Before continuing below, be sure you have already followed the steps in the "Setup" section of the [README file](README.md). 3 | 4 | ## Parallel Training using Ray 5 | Now that we have seen that our environment will work with ARS, it is time to do some serious training. Running only one car at a time at 15 seconds per episode would take forever to train, so we want to be able to have multiple cars exploring deltas simultaneously and pooling their results together for the update step. To do this, I have altered [the code made available by Mania, Guy, and Recht to reproduce their research on MuJoCo locomotion tasks](https://github.com/modestyachts/ARS) in order to implement the CarEnv instead. Their ARS framework includes functionality on a Ray server. 6 | 7 | All of the code being run to perform this parallel training using ray can be found in the '[ARS_Carla](ARS_Carla/)' folder of this repository. Some noteable changes that I have made to the code include: 8 | - I have wrapped the observation filter functionality which normalizes the inputs using rolling statistics into a boolean parameter 'state_filter' which defaults to False, since the VGG19 outputs are all on a standard scale between 0-10. Experimenting in training using the filter may be appropriate for future work. 9 | - I have added learning rate decay, and delta standard deviation (called noise in Skow's code) decay, which can be adjusted by calling --lr_decay and --std_decay parameters. This allows for more exploration early on, and favoring smaller steps over time after some initial progress has been made. 10 | - I have added functionality which supports initializing the ARS process with a saved policy file, so that training can be resumed at a later time. The user can pass the saved policy file in .csv (weights only) or .npz (weights only or weights plus state filter stats) format into the --policy_file parameter. The code will automatically update the saved policy file 'lin_policy_plus.npz' in the assigned --log_dir location every 10 steps by default, but this can be adjusted by setting the new --log_every parameter on execution. The number of rollouts used in each evaluation step can be set with the new --eval_rollouts. 11 | - I have added a parameter called --show_cam which takes in an int telling the program how many cameras you would like to display from the group of workers. It defaults to 1, but for long training periods I recommend setting it to 0 in order to save CPU overhead. 12 | 13 | ### Train a New Policy from Scratch 14 | 1. Start CARLA server by running the executable in your CARLA installation. 15 | 2. In a **non-administrator** terminal: 16 | - run `conda activate ars_carla` 17 | - run `ray start --head` 18 | - Append parameters to this command that are appropriate for your machine if needed. The CarEnvs take up a lot of RAM, so it may be necessary to set your --object-store-memory and --memory parameters to as high of values as you can. 19 | - You can call up the list of parameters with 'ray start --help'. 20 | 3. Navigate to the ARS_Carla folder in this repository: `cd ARS_Carla` 21 | 4. Explore list of parameter options for training with `python code/ars_carla.py --help` 22 | 5. In another **non-administrator** terminal: 23 | - run `conda activate ars_carla` 24 | - Train a policy using the following command (example arguments shown) 25 | ``` 26 | python code/ars_carla.py --n_iter 1000 --num_deltas 32 --deltas_used 16 --learning_rate 0.02 --lr_decay 0.001 --delta_std 0.03 --std_decay 0.001 --n_workers 4 --show_cam 0 --seconds_per_episode 15 --log_every 10 --eval_rollouts 100 --seed 42 --dir_path .\data\old_logs\2020-12-21_test 27 | ``` 28 | This will connect to the running Ray cluster, and begin creating workers in the Carla server. The policy will be saved and training details logged every 10 iterations by default into the --dir_path that you set (defaults to ./data if not set). 29 | 30 | ### Training a Pre-Existing Policy 31 | Follow the same steps above, except include the --policy_file parameter in your call to `code/ars_carla.py` pointing to the .csv or .npz file containing the weights you are going to build on. 32 | - The default name of a saved policy file is 'lin_policy_plus.npz'. 33 | - **Note** that this does not automatically recover the current learning rate or delta std from when the training ended, so you will need either recover that from the log file, or start it wherever you want. 34 | 35 | ``` 36 | python code/ars_carla.py --policy_file ./data/old_logs/2020-12-17_1000steps/lin_policy_plus.npz 37 | ``` -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-16_1250steps_part2/log.txt: -------------------------------------------------------------------------------- 1 | Time Iteration AverageReward StdRewards MaxRewardRollout MinRewardRollout Timesteps LearningRate DeltaStd 2 | 3501.397497653961 10 7.671438823535907 8.12010394931593 39.11166100872704 -2.335766120173103 10486 0.017737343751721264 0.026606015627581903 3 | 6995.146368026733 20 12.652928005769615 14.150959078144364 33.53768261428967 -17.444581732322952 21030 0.017560766369912006 0.02634114955486801 4 | 10500.034389257431 30 3.4031146347401515 4.495490684397994 11.18715993268027 -7.267173535547444 31123 0.01738594683709091 0.026078920255636358 5 | 13993.672963142395 40 2.827000129734696 13.545094603594476 31.668486272140466 -25.676402979636123 40913 0.017212867653660723 0.025819301480491073 6 | 17523.15318083763 50 2.161294215652702 2.9279888782370125 7.319331277944584 -3.24955825717891 51074 0.01704151149423478 0.02556226724135216 7 | 20835.828765153885 60 7.026958125840407 8.654584188989887 30.530776418588044 -17.829508664186633 60608 0.016871861205902724 0.025307791808854067 8 | 24327.380190610886 70 4.855743892268028 6.5493224575321465 19.62641319065838 -6.403067985710903 70506 0.01670389980651346 0.025055849709770168 9 | 27826.480825662613 80 4.868425043707901 16.743148028991527 33.89091267907321 -58.69075575132494 81149 0.016537610482975258 0.024806415724462856 10 | 31322.00183200836 90 5.1893850299919855 6.63765735883717 19.77259752326366 -8.825031381465624 92661 0.01637297658957272 0.02455946488435904 11 | 34160.24541759491 100 3.6814095428676596 16.73667514597855 36.60953389850356 -25.55965760628325 100802 0.016209981646300532 0.024314972469450757 12 | 39047.943504333496 110 -1.4678242431542259 15.302817808738393 32.13674225673491 -39.71219588296523 110997 0.016048609337213825 0.024072914005820702 13 | 42325.51155424118 120 2.036800456545468 5.767232546438116 11.284143740983607 -15.265060751222036 120512 0.015888843508794912 0.023833265263192318 14 | 45683.58576798439 130 2.680870700093881 16.595977604343787 32.538124756603736 -28.17311163990257 129886 0.01573066816833629 0.02359600225250439 15 | 48998.78708577156 140 7.135430549613234 16.90176187095837 33.48360933084875 -28.845997645616542 139350 0.0155740674823398 0.023361101223509657 16 | 52395.40455698967 150 6.786080350069404 20.94382761415299 33.35027320131576 -46.350269719006846 149116 0.015419025774931643 0.023128538662397426 17 | 55735.65903401375 160 8.573849393354147 17.706846474525086 33.065388123024405 -30.303617867592468 158746 0.015265527526293218 0.022898291289439786 18 | 59142.97088599205 170 7.70037998202929 14.079507574000836 30.563243211519257 -22.798061659325015 168406 0.01511355737110758 0.02267033605666133 19 | 62578.54021835327 180 4.122486507555646 16.40841660054083 30.958512379303116 -39.342606097365554 178286 0.014963100097021361 0.022444650145532003 20 | 65956.58520674706 190 7.0297401653693985 14.781729047050229 29.615264279710438 -25.11198019088713 187968 0.014814140643121986 0.022221210964682943 21 | 69235.04050445557 200 -1.1998549462021422 19.39545379573951 31.552325907527337 -45.10658750641263 197462 0.014666664098430069 0.021999996147645067 22 | 72556.5299937725 210 1.3836886370199246 17.492213761978665 33.891647263397694 -46.45022080738933 207085 0.014520655700406808 0.021780983550610183 23 | 75958.48812556267 220 0.5093522044585436 7.619548202202525 11.05484414062106 -15.107341089066836 216797 0.014376100833476253 0.02156415125021435 24 | 79515.66375613213 230 7.926693784433167 9.000715509850208 39.356440720866225 -13.220100264309437 227685 0.014232985027562256 0.021349477541343354 25 | 83024.39015007019 240 3.2763865745369536 5.080343784100701 11.392256769482891 -7.003922886650756 237597 0.014091293956640015 0.021136940934959988 26 | 86529.91059803963 250 3.5808425817293537 7.233757898773167 14.013684888319933 -11.009355355157858 248230 0.013951013437302012 0.020926520155952984 27 | 90053.74342870712 260 2.7733204671910556 9.383895927108744 30.232478886842404 -18.07310256730253 258442 0.013812129427338258 0.02071819414100735 28 | 93592.80687451363 270 2.668826238609604 6.15248619022918 14.314605348199606 -17.221810780774756 268597 0.013674628024330643 0.02051194203649593 29 | 97107.37986683846 280 0.4120882377212114 6.392631607261827 11.131917341429848 -9.375146350480124 278844 0.013538495464261297 0.02030774319639191 30 | 100653.61778140068 290 -1.1283080236796281 4.472559649763016 5.356845277287349 -12.015598060280972 289512 0.013403718120134796 0.020105577180202158 31 | 104050.98913860321 300 0.5988635677575356 6.55017270480862 10.741428503164387 -16.071294303768294 299455 0.013270282500614083 0.019905423750921095 32 | 107484.07497215271 310 -10.519035833201805 14.875719418019099 13.735279615050981 -35.051786628619524 309860 0.013138175248669986 0.019707262873004953 33 | 110088.39070010185 320 -4.355172918941315 13.156332189779578 16.612537158169506 -27.665589170865363 316811 0.013007383140244154 0.019511074710366207 34 | 113003.2191889286 330 -10.314033611852937 17.670062639216045 28.408605159422113 -55.43130072194767 325070 0.012877893082925322 0.019316839624387957 35 | 115640.80614733696 340 -6.347116141440447 18.277343835586887 27.350135796529354 -56.52059495603551 332148 0.012749692114638745 0.01912453817195809 36 | 118812.45344877243 350 0.7226358678415874 5.090926718031306 11.673857892370556 -10.00662556217986 341198 0.012622767402348687 0.018934151103523002 37 | 122278.90013837814 360 -0.46944419622392664 14.959664189010462 25.84898190794901 -40.32171048028982 351028 0.012497106240773818 0.0187456593611607 38 | -------------------------------------------------------------------------------- /ARS_Carla/data/old_logs/2020-12-16_1250steps_part4/log.txt: -------------------------------------------------------------------------------- 1 | Time Iteration AverageReward StdRewards MaxRewardRollout MinRewardRollout Timesteps LearningRate DeltaStd 2 | 3595.369238138199 10 2.483971846006747 6.636370883785107 10.474950871003944 -10.358804576115208 10976 0.009928228268621964 0.014892342402932938 3 | 7204.212927341461 20 0.8106579281563822 4.0865456046071165 7.4539051899259 -7.235943709727749 22136 0.00982939156690287 0.014744087350354295 4 | 10795.156468868256 30 5.237790368802971 5.840685858872214 13.46043280951239 -7.5604510055169465 33206 0.00973153879638906 0.014597308194583578 5 | 14343.599791765213 40 2.5367067431035686 5.743187373576168 15.794858566538544 -8.660802246766778 44068 0.009634660161927525 0.014451990242891276 6 | 17926.64273738861 50 4.8454659853909625 4.4625807279289695 13.278999497605758 -3.9860230859269725 55130 0.00953874596587717 0.014308118948815743 7 | 21559.25502705574 60 4.899596127823429 10.033535836991081 39.33467211382369 -6.382066488378829 66373 0.009443786607138082 0.014165679910707111 8 | 25151.221185684204 70 4.859910362986818 4.933519535168362 12.974110005737083 -7.748874897436277 77415 0.009349772580190447 0.014024658870285657 9 | 28749.39368200302 80 6.454434555853027 8.472752927993735 27.75194865911409 -9.781972020673944 88404 0.009256694474143038 0.013885041711214541 10 | 32352.789509296417 90 4.7182593820303875 7.943424352612805 39.490160571320054 -7.417356889487735 99605 0.009164542971791187 0.013746814457686756 11 | 35899.923395872116 100 3.577800351323839 5.082697949505149 14.575501406964182 -5.45620589533792 110457 0.009073308848684096 0.013609963273026118 12 | 39470.74469137192 110 4.969706470022993 7.805485895670962 34.2384245094646 -8.366239320061826 121430 0.008982982972201493 0.01347447445830222 13 | 43036.466530799866 120 7.499827451546374 8.067040443919788 25.26241348344617 -9.514670596776465 132343 0.008893556300639439 0.013340334450959132 14 | 46512.43024778366 130 3.446237880626523 6.134253667823204 12.889891187035893 -8.596804075829649 142904 0.008805019882305223 0.013207529823457811 15 | 50020.16182208061 140 1.2723602357155352 11.048651953166214 33.4314766406634 -30.24203942978769 153403 0.008717364854621325 0.013076047281931968 16 | 53517.340678453445 150 2.241216432078537 4.8933489243784525 14.176127584001765 -7.0717401552917 164140 0.008630582443238239 0.01294587366485734 17 | 57057.6213722229 160 4.653181430055686 9.500132369932228 39.58494506508086 -9.399181791912293 175071 0.008544663961156157 0.012816995941734216 18 | 60581.30990147591 170 8.335157560183001 5.771820178024454 23.732092159037354 -0.3684060679554462 185819 0.008459600807855402 0.012689401211783082 19 | 64171.6130797863 180 5.471160565984282 7.611705181993327 36.25274729433023 -6.643481445969301 196844 0.00837538446843549 0.012563076702653216 20 | 67767.77277731895 190 4.776529371180256 9.264676305193445 28.260801466326186 -11.122803284497364 207733 0.0082920065127628 0.012438009769144182 21 | 71309.03654098511 200 1.7399019140356995 6.154972916849342 14.579588108369723 -9.93528791166378 218419 0.008209458594626696 0.012314187891940031 22 | 74820.92268514633 210 2.074766201210002 5.18499922977276 14.550130739100963 -8.656623950790408 229148 0.008127732450904075 0.012191598676356101 23 | 78378.85832619667 220 2.2597663183860486 5.590940462244722 11.913850852927462 -8.954640469900118 240000 0.008046819900732208 0.0120702298510983 24 | 81834.40345621109 230 12.07784501122117 14.877178695769482 33.59677702391711 -18.041955720670988 250308 0.007966712844689838 0.011950069267034744 25 | 85416.94610095024 240 4.739421114534369 5.410998307604792 17.290637047694684 -10.748046706707042 261252 0.007887403263986414 0.011831104895979605 26 | 88790.2525730133 250 13.54752662516826 13.172739965252275 31.870514483762797 -17.34185837159009 271278 0.007808883219659405 0.011713324829489094 27 | 92274.59388184547 260 5.120750595478142 4.977403336737703 14.305653963168849 -11.052720916245491 281681 0.007731144851779608 0.0115967172776694 28 | 95840.71329641342 270 3.8599164090633717 8.934461398882581 28.340639881931384 -10.833492014784893 292467 0.007654180378664354 0.011481270567996521 29 | 99442.48598265648 280 -0.7942970511887811 6.819958350912569 12.326839469884249 -13.709386276283086 303545 0.007577982096098555 0.011366973144147828 30 | 103044.26683163643 290 7.185590971295367 9.422203953216023 31.395692351187474 -12.580334511058444 314520 0.00750254237656351 0.011253813564845264 31 | 106585.51752305031 300 6.295390595908332 5.888943944165857 15.975936349702996 -11.789461871638716 325298 0.007427853668473379 0.011141780502710068 32 | 110202.73489618301 310 0.25144104027826863 6.284267326855424 11.411589130362602 -12.977718502177963 336338 0.0073539084954192665 0.011030862743128898 33 | 113802.25816082954 320 0.99272823977215 6.667649306216759 13.148615821566658 -10.185671974244219 347406 0.007280699455420816 0.010921049183131224 34 | 117384.3557984829 330 3.5755484356034635 6.0547056505295584 16.041329044092603 -9.31481807977866 358394 0.007208219220185281 0.01081232883027792 35 | 120933.02275252342 340 2.4148675856702453 7.926252888242236 36.97631236852636 -8.165689415910984 369219 0.007136460534373942 0.010704690801560912 36 | 124550.7697558403 350 2.113430961563331 5.034513708812462 11.931875035838829 -10.751468707807888 380479 0.007065416214875844 0.010598124322313768 37 | 128157.1714463234 360 3.6923227770668334 7.51634438395936 16.064856188429616 -11.771609784644255 391644 0.006995079150088768 0.010492618725133155 38 | 131677.69003129005 370 2.804879815562905 12.327284620526568 36.403763827571176 -18.059140976688834 402447 0.0069254422992073425 0.010388163448811015 39 | 135200.79145860672 380 4.6386933690138035 5.136320831442303 11.405888720220963 -9.887250255523536 413204 0.006856498691518256 0.010284748037277386 40 | 138728.06091570854 390 3.7607922945833128 7.858875470106901 36.21479235030556 -10.868918301920719 424051 0.006788241425702487 0.010182362138553731 41 | 142232.29185843468 400 15.491206702462268 15.166865079645408 32.789860818624994 -24.674587826491184 434628 0.006720663669144469 0.010080995503716706 42 | 145692.64837932587 410 6.924463121574166 19.12514437567291 32.84877298668823 -39.690362031064154 444992 0.006653758657248142 0.009980637985872218 43 | 149171.929363966 420 3.200756274073047 4.273660183704282 14.69616851206086 -10.130798544042785 455134 0.006587519692759813 0.009881279539139724 44 | 152747.10377764702 430 0.16643626969394254 5.079333471958028 9.479589770527554 -9.460320853807717 465756 0.006521940145097747 0.009782910217646624 45 | 156316.14569592476 440 4.227339895314584 5.438936367037851 13.093882004383017 -5.770994738854634 475590 0.0064570134496884455 0.00968552017453267 46 | 159766.8038764 450 7.222859103375848 16.233684286805406 31.305142243251545 -25.355348883040048 485316 0.0063927331073095305 0.0095890996609643 47 | 163159.0805399418 460 13.386645218340274 18.461866536681054 34.68675272558722 -34.5709694787403 495130 0.006329092683439154 0.009493639025158736 48 | 166606.47481751442 470 0.7193441345657394 11.212088516768693 36.72321156854904 -13.343536999136314 505302 0.006266085807611912 0.009399128711417872 49 | 170199.93497347832 480 2.1326742302920594 8.587273018701328 36.325939236206615 -8.494916372781208 516348 0.006203706172781138 0.009305559259171708 50 | 176823.47868919373 490 1.4039761140082565 5.940032501853362 10.91604293032971 -9.298746171540932 526923 0.006141947534687577 0.009212921302031368 51 | 180327.80352258682 500 2.5839619981528377 6.466036566763536 13.491499893292529 -11.960665136172095 537590 0.006080803711234322 0.009121205566851481 52 | 183925.0451142788 510 3.710776776591912 5.112243321084118 12.599561395814975 -6.667983026168453 548703 0.006020268581867977 0.009030402872801966 53 | 187475.26344299316 520 3.1930729175489243 4.389879162110938 11.773952419551556 -7.977843676725456 559559 0.005960336086965992 0.008940504130448989 54 | 191051.68546581268 530 1.5604244200230823 9.290762607107041 39.45012288818214 -12.104000873017087 570570 0.0059010002272300854 0.00885150034084513 55 | 194603.85883069038 540 4.701893810996976 5.0760521451794345 16.66148426052861 -3.9545737250907185 581409 0.005842255063085708 0.008763382594628564 56 | 198165.82372927666 550 1.1376362139709648 5.954078897665362 11.756771033265235 -13.953963178608888 592272 0.0057840947140874845 0.008676142071131229 57 | 201760.10316181183 560 2.139579159519426 5.166459431147396 16.296597928427225 -8.62593648462421 603433 0.005726513358330582 0.008589770037495875 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Setup 2 | 3 | To run the notebooks or scripts in this repo, take the following steps: 4 | 1. Install CARLA 0.9.13 5 | - Follow instructions in [CARLA quickstart guide](https://carla.readthedocs.io/en/latest/start_quickstart/). 6 | 2. Create conda env 7 | - `conda env create -f environment.yaml` 8 | 3. Activate conda env 9 | - `conda activate ars_carla` 10 | 4. Train a policy in CARLA 11 | - A walkthrough of the code can be found in [1_research.ipynb](1_research.ipynb) 12 | - To train agents in parallel using Ray.io, follow the instruction in [2_train_agent.md](2_train_agent.md) 13 | 14 | *NOTE* 15 | If you have CuDNN installed alongside TensorFlow, it can cause errors with this code. If you experience an issue, temporarily change the name of the folder containing your CuDNN installation so that python can not find it when loading TensorFlow. 16 | 17 | ## Contents 18 | - [1_research](1_research.ipynb) - contains the abstract for this study, and a concise review of the research which made it possible. The notebook offers an intuitive explanation of the math behind the learning algorithm, and provides a basic coding framework of the process which makes it easy to understand the mechanics involved in training the agent, built off of the [ARS framework provided by Colin Skow](https://github.com/colinskow/move37/tree/master/ars). 19 | 20 | - [2_train_agent](2_train_agent.md) - instructions to run a parallelized training session using the code in the '[ARS_Carla/](ARS_Carla/)' folder. 21 | 22 | - [3_results_analysis](3_results_analysis.ipynb) - reviews and analyzes the results of the 5-day training period of this study, then draws conclusions and lays out what steps should be taken in future research using the training framework. 23 | 24 | - [ARS_Carla/](ARS_Carla) - contains a modified version of the code provided by the authors of the 2018 paper on ARS (Mania, Guy, and Recht) to reproduce their results using the environments in Python's gym module, which employs parallelization through use of the [Ray package](https://docs.ray.io/en/latest/) for Python. Their code has been modified to make use of [Sentdex's CarEnv class](https://pythonprogramming.net/reinforcement-learning-self-driving-autonomous-cars-carla-python/) that he used to train a Deep Q-Learning Network (DQN), which itself has been modified to function in the context of ARS learning. Useful functionality has been added to the ARS code which allows the user to resume training of an existing policy. 25 | 26 | - [ars_practice](ars_practice.ipynb) - contains preliminary research into the ARS algorithm that was done by recreating Skow's ARS example using the BipedalWalker-v3 environment from Python's Gym module. This experiment was helpful to get a baseline concept of how many training episodes the algorithm needed to effectively train an agent. Videos of the resulting policy can be found in the '[videos](videos/)' folder. 27 | 28 | 29 | #### Project Links: 30 | - [Blog Post](https://natecibik.medium.com/training-autonomous-vehicles-using-augmented-random-search-in-carla-19fcbe62b697) - Summarizes the research, methodology, and conclusions of this study. 31 | - [Non-technical Presentation](https://youtu.be/ILbmBa5MAtI) - Presents the business insights gained from this study for a non-technical audience. 32 | 33 | # Training Autonomous Vehicles using ARS in Carla 34 | 35 | This repository offers a learning framework for training self-driving cars in the [Carla simulator](https://carla.org/) using [Augmented Random Search (ARS)](https://arxiv.org/pdf/1803.07055.pdf), as well as a study of the first implementation of this learning framework. ARS is an exciting new algorithm in the field of Reinforcement Learning (RL) which has been shown to produce competitive to superior results on benchmark MuJoCo tasks, while offering at least 15x computational efficiency in comparison to other model-free learning methods. This study sought to investigate the application of this algorithm in the context of autonomous vehicles using Carla as a training environment. Below, we can see the math behind the ARS algorithm. 36 | 37 | ![The math behind ARS](images/ars_formula_explained.png) 38 | 39 | For an initial investigation into using ARS to train a self-driving car policy, an attempt was made to train the agent to perform the simple task of driving through empty streets without hitting anything using only the input from a single front-facing RGB camera. This RGB camera data was converted to more generalized edge-case representations by first passing it through a pre-trained VGG19 CNN on its way into the ARS perceptron layer, as shown below: 40 | 41 | ![Worker I/O](images/WorkerIO.png) 42 | 43 | Below is a graphic summarizing the results of the training period of this study, which was just over 5 days (121.43 hours). Learning rate and delta std decay was applied, so the changing values of these hyperparameters over time is shown. 44 | 45 | ![Training Results](images/training_results.png) 46 | 47 | The results of this study were inconclusive in demonstrating the ability of ARS to train a self-driving car policy. We can see from the above chart that after 1250 training iterations, substantial progress was not made in the average rewards of each evaluation step. There does appear to be a reduction in standard deviation of rewards over time, but without the desired increase in average reward. There is a slow decline in average reward in the early period of the training, then a spike upwards around 500 training iterations, after which there is another decline. This may be an indication that the learning rate is set too high for this task. It may also be an indication that the delta std was set too high as well. More testing with different hyperparameter values, tweaking of the reward system, and longer duration of training is warranted. Several insights about how to improve these results in the future were obtained, and are covered in detail in the [third notebook](3_results_analysis.ipynb). The learning framework created in this repository can be used to facilitate these future investigations. 48 | 49 | One important point of consideration is that workers were spawned in random locations for each episode, and it is likely that this led to unfair comparisons of the rewards produced by the positive and negative additions of each delta used to make each update step, causing less meaningful alterations to the policy weights. Starting a worker in the same location for the positive and negative addition of a given delta to the weights would be a great start for fixing this issue. In future work, it would also be better to include more rollouts in each policy evaluation step, since this would evaluate the current state of the policy on a wider array of random starting positions and give a more generalized perspective on its performance by reducing the influence of luck on the policy evaluation process. This would increase the amount of time needed for training, which is already quite large, so in the future training should be scaled up onto bigger hardware, using more parallel workers and thus reducing the time required to make each update step. 50 | 51 | The task at hand in this study was relatively simple for this domain: drive around empty streets without smashing into anything, using edge depictions generated by a CNN from RGB camera sensor input. In reality, there are many individual tasks within the context of autonomous driving that researchers seek to accomplish using machine learning and sensory input, including object detection/classification, determination of drivable area, trajectory planning, localization, and many more. The ARS algorithm is used to train agents to perform continuous control locomotion tasks using a single-layer perceptron on a given set of inputs, and it is likely that this capability would be facilitated by including among these inputs the predictions of models pre-trained to perform these specific driving-related tasks, and more research is warranted to explore such possible input configurations to the algorithm. 52 | 53 | The safety and scalability of learning in a simulated environment such as Carla provides the opportunity to asses the potential of various combinations of learning algorithms and sensory input configurations to perform a given task or set of tasks. Once models are trained to effectively perform these individual tasks using the sensor data, their outputs may then be combined and passed as inputs into RL algorithms such as ARS to train autonomous navigation policies. The computational simplicity of ARS makes it possible to perform more simultaneous virtual training operations using a given set of inputs for any given amount of available computational resources, allowing for the efficacy of an input schema to be evaluated in a timely fashion. This means that ARS has the potential to expedite the discovery of powerful ways to combine the outputs of task-specific models to facilitate policy training. The learning framework constructed in this study offers a structural foundation on which to explore these possibilities. 54 | -------------------------------------------------------------------------------- /ARS_Carla/code/filter.py: -------------------------------------------------------------------------------- 1 | # Code in this file is copied and adapted from 2 | # https://github.com/ray-project/ray/blob/master/python/ray/rllib/utils/filter.py 3 | 4 | # This code has been modified by Nate Cibik in order to accept previous distribution 5 | # information to use in the event of resuming paused/recovered training session. 6 | 7 | 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | from __future__ import print_function 11 | 12 | import numpy as np 13 | 14 | 15 | class Filter(object): 16 | """Processes input, possibly statefully.""" 17 | 18 | def update(self, other, *args, **kwargs): 19 | """Updates self with "new state" from other filter.""" 20 | raise NotImplementedError 21 | 22 | def copy(self): 23 | """Creates a new object with same state as self. 24 | 25 | Returns: 26 | copy (Filter): Copy of self""" 27 | raise NotImplementedError 28 | 29 | def sync(self, other): 30 | """Copies all state from other filter to self.""" 31 | raise NotImplementedError 32 | 33 | 34 | class NoFilter(Filter): 35 | def __init__(self, *args): 36 | pass 37 | 38 | def __call__(self, x, update=True): 39 | return np.asarray(x, dtype = np.float64) 40 | 41 | def update(self, other, *args, **kwargs): 42 | pass 43 | 44 | def copy(self): 45 | return self 46 | 47 | def sync(self, other): 48 | pass 49 | 50 | def stats_increment(self): 51 | pass 52 | 53 | def clear_buffer(self): 54 | pass 55 | 56 | def get_stats(self): 57 | return 0, 1 58 | 59 | @property 60 | def mean(self): 61 | return 0 62 | 63 | @property 64 | def var(self): 65 | return 1 66 | 67 | @property 68 | def std(self): 69 | return 1 70 | 71 | 72 | 73 | # http://www.johndcook.com/blog/standard_deviation/ 74 | class RunningStat(object): 75 | 76 | def __init__(self, shape=None, mean=None, std=None): 77 | self._n = 0 78 | self._M = mean or np.zeros(shape, dtype = np.float64) 79 | self._S = std or np.zeros(shape, dtype = np.float64) 80 | self._M2 = np.zeros(shape, dtype = np.float64) 81 | 82 | def copy(self): 83 | other = RunningStat() 84 | other._n = self._n 85 | other._M = np.copy(self._M) 86 | other._S = np.copy(self._S) 87 | return other 88 | 89 | def push(self, x): 90 | x = np.asarray(x) 91 | # Unvectorized update of the running statistics. 92 | assert x.shape == self._M.shape, ("x.shape = {}, self.shape = {}" 93 | .format(x.shape, self._M.shape)) 94 | n1 = self._n 95 | self._n += 1 96 | if self._n == 1: 97 | self._M[...] = x 98 | else: 99 | delta = x - self._M 100 | deltaM2 = np.square(x) - self._M2 101 | self._M[...] += delta / self._n 102 | self._S[...] += delta * delta * n1 / self._n 103 | 104 | 105 | def update(self, other): 106 | n1 = self._n 107 | n2 = other._n 108 | n = n1 + n2 109 | delta = self._M - other._M 110 | delta2 = delta * delta 111 | M = (n1 * self._M + n2 * other._M) / n 112 | S = self._S + other._S + delta2 * n1 * n2 / n 113 | self._n = n 114 | self._M = M 115 | self._S = S 116 | 117 | def __repr__(self): 118 | return '(n={}, mean_mean={}, mean_std={})'.format( 119 | self.n, np.mean(self.mean), np.mean(self.std)) 120 | 121 | @property 122 | def n(self): 123 | return self._n 124 | 125 | @property 126 | def mean(self): 127 | return self._M 128 | 129 | @property 130 | def var(self): 131 | return self._S / (self._n - 1) if self._n > 1 else np.square(self._M) 132 | 133 | @property 134 | def std(self): 135 | return np.sqrt(self.var) 136 | 137 | @property 138 | def shape(self): 139 | return self._M.shape 140 | 141 | 142 | class MeanStdFilter(Filter): 143 | """Keeps track of a running mean for seen states""" 144 | 145 | def __init__(self, shape, demean=True, destd=True, mean=None, std=None): 146 | self.shape = shape 147 | self.demean = demean 148 | self.destd = destd 149 | self.rs = RunningStat(shape, mean=mean, std=std) 150 | # In distributed rollouts, each worker sees different states. 151 | # The buffer is used to keep track of deltas amongst all the 152 | # observation filters. 153 | 154 | self.buffer = RunningStat(shape) 155 | 156 | if mean is None: 157 | self.mean = np.zeros(shape, dtype = np.float64) 158 | else: 159 | self.mean = mean 160 | 161 | if std is None: 162 | self.std = np.ones(shape, dtype = np.float64) 163 | else: 164 | self.std = std 165 | 166 | def clear_buffer(self): 167 | self.buffer = RunningStat(self.shape) 168 | return 169 | 170 | def update(self, other, copy_buffer=False): 171 | """Takes another filter and only applies the information from the 172 | buffer. 173 | 174 | Using notation `F(state, buffer)` 175 | Given `Filter1(x1, y1)` and `Filter2(x2, yt)`, 176 | `update` modifies `Filter1` to `Filter1(x1 + yt, y1)` 177 | If `copy_buffer`, then `Filter1` is modified to 178 | `Filter1(x1 + yt, yt)`. 179 | """ 180 | self.rs.update(other.buffer) 181 | if copy_buffer: 182 | self.buffer = other.buffer.copy() 183 | return 184 | 185 | def copy(self): 186 | """Returns a copy of Filter.""" 187 | other = MeanStdFilter(self.shape) 188 | other.demean = self.demean 189 | other.destd = self.destd 190 | other.rs = self.rs.copy() 191 | other.buffer = self.buffer.copy() 192 | return other 193 | 194 | def sync(self, other): 195 | """Syncs all fields together from other filter. 196 | 197 | Using notation `F(state, buffer)` 198 | Given `Filter1(x1, y1)` and `Filter2(x2, yt)`, 199 | `sync` modifies `Filter1` to `Filter1(x2, yt)` 200 | """ 201 | assert other.shape == self.shape, "Shapes don't match!" 202 | self.demean = other.demean 203 | self.destd = other.destd 204 | self.rs = other.rs.copy() 205 | self.buffer = other.buffer.copy() 206 | return 207 | 208 | def __call__(self, x, update=True): 209 | x = np.asarray(x, dtype = np.float64) 210 | if update: 211 | if len(x.shape) == len(self.rs.shape) + 1: 212 | # The vectorized case. 213 | for i in range(x.shape[0]): 214 | self.rs.push(x[i]) 215 | self.buffer.push(x[i]) 216 | else: 217 | # The unvectorized case. 218 | self.rs.push(x) 219 | self.buffer.push(x) 220 | if self.demean: 221 | x = x - self.mean 222 | if self.destd: 223 | x = x / (self.std + 1e-8) 224 | return x 225 | 226 | def stats_increment(self): 227 | self.mean = self.rs.mean 228 | self.std = self.rs.std 229 | 230 | # Set values for std less than 1e-7 to +inf to avoid 231 | # dividing by zero. State elements with zero variance 232 | # are set to zero as a result. 233 | self.std[self.std < 1e-7] = float("inf") 234 | return 235 | 236 | def get_stats(self): 237 | return self.rs.mean, (self.rs.std + 1e-8) 238 | 239 | def __repr__(self): 240 | return 'MeanStdFilter({}, {}, {}, {}, {}, {})'.format( 241 | self.shape, self.demean, 242 | self.rs, self.buffer) 243 | 244 | 245 | def get_filter(filter_config, shape=None, mean=None, std=None): 246 | if filter_config == "MeanStdFilter": 247 | return MeanStdFilter(shape, mean=mean, std=std) 248 | elif filter_config == "NoFilter": 249 | return NoFilter() 250 | else: 251 | raise Exception("Unknown observation_filter: " + 252 | str(filter_config)) 253 | 254 | 255 | def test_running_stat(): 256 | for shp in ((), (3,), (3, 4)): 257 | li = [] 258 | rs = RunningStat(shp) 259 | for _ in range(5): 260 | val = np.random.randn(*shp) 261 | rs.push(val) 262 | li.append(val) 263 | m = np.mean(li, axis=0) 264 | assert np.allclose(rs.mean, m) 265 | v = np.square(m) if (len(li) == 1) else np.var(li, ddof=1, axis=0) 266 | assert np.allclose(rs.var, v) 267 | 268 | 269 | def test_combining_stat(): 270 | for shape in [(), (3,), (3, 4)]: 271 | li = [] 272 | rs1 = RunningStat(shape) 273 | rs2 = RunningStat(shape) 274 | rs = RunningStat(shape) 275 | for _ in range(5): 276 | val = np.random.randn(*shape) 277 | rs1.push(val) 278 | rs.push(val) 279 | li.append(val) 280 | for _ in range(9): 281 | rs2.push(val) 282 | rs.push(val) 283 | li.append(val) 284 | rs1.update(rs2) 285 | assert np.allclose(rs.mean, rs1.mean) 286 | assert np.allclose(rs.std, rs1.std) 287 | 288 | 289 | test_running_stat() 290 | test_combining_stat() 291 | -------------------------------------------------------------------------------- /ARS_Carla/code/ars_carla.py: -------------------------------------------------------------------------------- 1 | """ 2 | Parallel implementation of the Augmented Random Search method. 3 | Horia Mania --- hmania@berkeley.edu 4 | Aurelia Guy 5 | Benjamin Recht 6 | 7 | !!!This code has been modified by Nate Cibik in order to work with a Carla 8 | Client Environment. 9 | 10 | The CarEnv() Class used is highly based off of that made 11 | by Sentdex in his Deep Q Learning tutorial series using Carla. 12 | 13 | Multiple arguments have been added to the argparser at the bottom of this 14 | code to increase the functionality with Carla, as well as to add the ability 15 | to resume training from a pre-existing policy. 16 | 17 | Optional Learning rate and Delta Standard Deviation Decay functionality have 18 | been added. 19 | """ 20 | 21 | import os 22 | import cv2 23 | import random 24 | import math 25 | import sys 26 | import glob 27 | import time 28 | from typing import Union, List 29 | 30 | import logz 31 | import ray 32 | import utils 33 | import optimizers 34 | from policies import * 35 | import socket 36 | from shared_noise import * 37 | 38 | from tensorflow.keras.applications import VGG19 39 | from tensorflow.compat.v1 import ConfigProto 40 | from tensorflow.compat.v1 import InteractiveSession 41 | 42 | # carla should be installed via conda, or use with a different version 43 | try: 44 | import carla 45 | except ImportError: 46 | sys.path.append( 47 | glob.glob( 48 | r"D:\CARLA_0.9.13\PythonAPI\carla\dist\carla-*%d.%d-%s.egg" 49 | % ( 50 | sys.version_info.major, 51 | sys.version_info.minor, 52 | "win-amd64" if os.name == "nt" else "linux-x86_64", 53 | ) 54 | )[0] 55 | ) 56 | import carla 57 | 58 | from carla import ActorBlueprint, BlueprintLibrary, World, Actor 59 | 60 | # gpus = tf.config.experimental.list_physical_devices('GPU') 61 | # gpu_memory_limit = [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=5044)] 62 | # try: 63 | # tf.config.experimental.set_virtual_device_configuration(gpus[0], gpu_memory_limit) 64 | # except Exception as e: 65 | # raise e 66 | # config = ConfigProto() 67 | # config.gpu_options.allow_growth = True 68 | # session = InteractiveSession(config=config) 69 | 70 | os.environ["MKL_NUM_THREADS"] = "1" 71 | # os.environ["TF_USE_CUDNN"] = "0" 72 | # os.environ["CUDA_VISIBLE_DEVICES"]="-1" 73 | 74 | 75 | # Carla Car Env based on work of Sentdex 76 | class CarEnv: 77 | def __init__( 78 | self, 79 | img_width=224, 80 | img_height=224, 81 | show_cam=False, 82 | seconds_per_episode=15, 83 | control_type="continuous", 84 | car_model="mustang", 85 | ): 86 | self.img_width: int = img_width 87 | self.img_height: int = img_height 88 | self.client: carla.Client = carla.Client("localhost", 2000) 89 | self.client.set_timeout(5.0) 90 | self.world: World = self.client.get_world() 91 | self.blueprint_library: BlueprintLibrary = self.world.get_blueprint_library() 92 | self.car: ActorBlueprint = self.blueprint_library.filter(car_model)[0] 93 | self.show_cam: bool = show_cam 94 | self.control_type: str = control_type 95 | self.front_camera: Union[np.ndarray, None] = None 96 | self.actor_list: List[Actor] = [] 97 | self.seconds_per_episode: int = seconds_per_episode 98 | self.collision_hist: list = [] 99 | self.steering_cache: list = [] 100 | 101 | if self.control_type == "continuous": 102 | self.action_space = np.array(["throttle", "steer", "brake"]) 103 | 104 | def reset(self) -> np.ndarray: 105 | self.collision_hist = [] 106 | self.steering_cache = [] 107 | 108 | if len(self.actor_list) > 0: 109 | for actor in self.actor_list: 110 | actor.destroy() 111 | self.actor_list = [] 112 | 113 | try: 114 | self.transform = random.choice(self.world.get_map().get_spawn_points()) 115 | self.vehicle = self.world.spawn_actor(self.car, self.transform) 116 | self.actor_list.append(self.vehicle) 117 | except: 118 | self.reset() 119 | 120 | # Attach RGB Camera 121 | self.rgb_cam: ActorBlueprint = self.blueprint_library.find("sensor.camera.rgb") 122 | self.rgb_cam.set_attribute("image_size_x", f"{self.img_width}") 123 | self.rgb_cam.set_attribute("image_size_y", f"{self.img_height}") 124 | self.rgb_cam.set_attribute("fov", f"110") 125 | 126 | transform = carla.Transform(carla.Location(x=2.5, z=0.7)) 127 | self.rgb_sensor: Actor = self.world.spawn_actor(self.rgb_cam, transform, attach_to=self.vehicle) 128 | self.actor_list.append(self.rgb_sensor) 129 | self.rgb_sensor.listen(lambda data: self.process_img(data)) 130 | 131 | self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0)) 132 | time.sleep(4) 133 | 134 | collision_sensor: ActorBlueprint = self.blueprint_library.find("sensor.other.collision") 135 | self.collision_sensor: Actor = self.world.spawn_actor(collision_sensor, transform, attach_to=self.vehicle) 136 | self.actor_list.append(self.collision_sensor) 137 | self.collision_sensor.listen(lambda event: self.collision_data(event)) 138 | 139 | while self.front_camera is None: 140 | time.sleep(0.01) 141 | 142 | self.episode_start = time.time() 143 | self.vehicle.apply_control(carla.VehicleControl(throttle=0.0, brake=0.0)) 144 | 145 | return self.front_camera 146 | 147 | def collision_data(self, event): 148 | self.collision_hist.append(event) 149 | 150 | def process_img(self, image): 151 | img = np.array(image.raw_data).reshape((self.img_height, self.img_width, 4))[..., :3] 152 | if self.show_cam: 153 | cv2.imshow("", img) 154 | cv2.waitKey(1) 155 | self.front_camera = img 156 | 157 | def step(self, action, steps): 158 | if self.control_type == "continuous": 159 | self.vehicle.apply_control( 160 | carla.VehicleControl( 161 | throttle=np.clip(action[0], 0.0, 1.0), 162 | steer=np.clip(action[1], -1.0, 1.0), 163 | brake=np.clip(action[2], 0.0, 1.0), 164 | ) 165 | ) 166 | self.steering_cache.append(action[1]) 167 | # elif self.control_type == 'action': 168 | # if action == 0: 169 | # self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, 170 | # steer=-1*self.STEER_AMT)) 171 | # elif action == 1: 172 | # self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, steer= 0)) 173 | # elif action == 2: 174 | # self.vehicle.apply_control(carla.VehicleControl(throttle=1.0, 175 | # steer=1*self.STEER_AMT)) 176 | 177 | v = self.vehicle.get_velocity() 178 | kmh = int(3.6 * math.sqrt(v.x ** 2 + v.y ** 2 + v.z ** 2)) 179 | 180 | # Reward System: 181 | if len(self.collision_hist) != 0: 182 | # Check to see if on first step (for rough spawns by Carla) 183 | if steps == 1: 184 | self.collision_hist = [] 185 | done = False 186 | reward = 0 187 | else: 188 | done = True 189 | print("Collision!") 190 | reward = -200 191 | elif kmh < 60 & kmh > 0.2: 192 | done = False 193 | reward = 1 # -1 194 | # Reward lighter steering when moving 195 | if np.abs(action[1]) < 0.3: 196 | reward += 9 197 | elif 0.5 < np.abs(action[1]) < 0.9: 198 | reward -= 1 199 | elif np.abs(action[1]) >= 0.9: 200 | reward -= 6 201 | elif kmh <= 0.2: 202 | done = False 203 | reward = -10 204 | else: 205 | done = False 206 | reward = 20 # 2 207 | # Reward lighter steering when moving 208 | if np.abs(action[1]) < 0.3: 209 | reward += 20 210 | # Reduce score for heavy steering 211 | if 0.5 < np.abs(action[1]) < 0.9: 212 | reward -= 10 213 | elif np.abs(action[1]) >= 0.9: 214 | reward -= 20 215 | 216 | # Penalize consistent and heavily directional steering 217 | reward -= (np.abs(np.mean(self.steering_cache)) + np.abs(action[1])) * 10 / 2 218 | 219 | if self.episode_start + self.seconds_per_episode < time.time(): 220 | done = True 221 | if done: 222 | self.steering_cache = [] 223 | 224 | return self.front_camera, reward, done, None 225 | 226 | 227 | @ray.remote 228 | class Worker(object): 229 | """ 230 | Object class for parallel rollout generation. 231 | """ 232 | 233 | def __init__( 234 | self, 235 | env_seed, 236 | policy_params=None, 237 | deltas=None, 238 | delta_std=0.02, 239 | seconds_per_episode=15, 240 | # initial_weights=None, 241 | # initial_mean=None, 242 | # initial_std=None, 243 | show_cam=False, 244 | enable_gpu=False, 245 | ): 246 | 247 | # create environment for each worker 248 | self.env = CarEnv(show_cam=show_cam, seconds_per_episode=seconds_per_episode) 249 | # self.env.seed(env_seed) 250 | if enable_gpu: 251 | self.config = ConfigProto() 252 | self.config.gpu_options.allow_growth = True 253 | # if num_workers is not None: 254 | # self.config.gpu_options.per_process_gpu_memory_fraction = (1 / num_workers) 255 | self.session = InteractiveSession(config=self.config) 256 | 257 | # Create base CNN for finding edges 258 | self.base_model = VGG19( 259 | weights="imagenet", 260 | include_top=False, 261 | input_shape=(self.env.img_height, self.env.img_width, 3), 262 | ) 263 | 264 | # each worker gets access to the shared noise table 265 | # with independent random streams for sampling 266 | # from the shared noise table. 267 | self.deltas = SharedNoiseTable(deltas, env_seed + 7) 268 | self.policy_params = policy_params 269 | if policy_params["type"] == "linear": 270 | self.policy = LinearPolicy(policy_params) 271 | else: 272 | raise NotImplementedError 273 | 274 | self.delta_std = delta_std 275 | 276 | def get_weights_plus_stats(self): 277 | """ 278 | Get current policy weights and current statistics of past states. 279 | """ 280 | assert self.policy_params["type"] == "linear" 281 | return self.policy.get_weights_plus_stats() 282 | 283 | # @ray.remote(num_gpus= .5 / 2) 284 | def rollout(self, shift=0.0, state_filter=False): 285 | """ 286 | At each time-step it subtracts shift from the reward. 287 | """ 288 | 289 | total_reward = 0.0 290 | steps = 0 291 | 292 | self.env.reset() 293 | done = False 294 | while not done: 295 | steps += 1 296 | rgb_img = self.env.front_camera.reshape((1, 224, 224, 3)) / 255.0 297 | state = self.base_model.predict(rgb_img).flatten() / 10 298 | action = self.policy.act(state, state_filter=state_filter) 299 | ob, reward, done, _ = self.env.step(action, steps) 300 | # Clips step reward between -1 and +1 301 | # reward = max(min(reward, 1), -1) 302 | total_reward += reward - shift 303 | 304 | adjusted_reward = total_reward / steps 305 | 306 | print("Worker saw {} steps".format(steps)) 307 | return adjusted_reward, steps 308 | 309 | def do_rollouts( 310 | self, 311 | w_policy, 312 | num_rollouts=1, 313 | shift=1, 314 | delta_std=None, 315 | evaluate=False, 316 | state_filter=False, 317 | ): 318 | """ 319 | Generate multiple rollouts with a policy parametrized by w_policy. 320 | """ 321 | 322 | rollout_rewards, deltas_idx = [], [] 323 | steps = 0 324 | 325 | for i in range(num_rollouts): 326 | 327 | if evaluate: 328 | self.policy.update_weights(w_policy) 329 | deltas_idx.append(-1) 330 | 331 | # set to false so that evaluation rollouts are not used for updating state statistics 332 | self.policy.update_filter = False 333 | 334 | # for evaluation, we do not shift the rewards (shift = 0) and we use the 335 | # default rollout length (1000 for the MuJoCo locomotion tasks) 336 | reward, r_steps = self.rollout(shift=0.0, state_filter=state_filter) 337 | rollout_rewards.append(reward) 338 | 339 | else: 340 | idx, delta = self.deltas.get_delta(w_policy.size) 341 | 342 | if delta_std is None: 343 | delta_std = self.delta_std 344 | delta = (delta_std * delta).reshape(w_policy.shape) 345 | deltas_idx.append(idx) 346 | 347 | # set to true so that state statistics are updated 348 | self.policy.update_filter = True 349 | 350 | # compute reward and number of timesteps used for positive perturbation rollout 351 | self.policy.update_weights(w_policy + delta) 352 | pos_reward, pos_steps = self.rollout(shift=shift, state_filter=state_filter) 353 | 354 | # compute reward and number of timesteps used for negative perturbation rollout 355 | self.policy.update_weights(w_policy - delta) 356 | neg_reward, neg_steps = self.rollout(shift=shift, state_filter=state_filter) 357 | steps += pos_steps + neg_steps 358 | 359 | rollout_rewards.append([pos_reward, neg_reward]) 360 | 361 | return {"deltas_idx": deltas_idx, "rollout_rewards": rollout_rewards, "steps": steps} 362 | 363 | def stats_increment(self): 364 | self.policy.observation_filter.stats_increment() 365 | return 366 | 367 | def get_weights(self): 368 | return self.policy.get_weights() 369 | 370 | def get_filter(self): 371 | return self.policy.observation_filter 372 | 373 | def sync_filter(self, other): 374 | self.policy.observation_filter.sync(other) 375 | return 376 | 377 | def clean_up(self): 378 | for actor in self.env.actor_list: 379 | actor.destroy() 380 | 381 | 382 | class ARSLearner(object): 383 | """ 384 | Object class implementing the ARS algorithm. 385 | """ 386 | 387 | def __init__( 388 | self, 389 | policy_params=None, 390 | num_workers=32, 391 | num_deltas=320, 392 | deltas_used=320, 393 | delta_std=0.02, 394 | std_decay=0.0, 395 | logdir=None, 396 | learning_rate=0.01, 397 | lr_decay=0.0, 398 | shift="constant zero", 399 | params=None, 400 | seed=123, 401 | seconds_per_episode=15, 402 | log_every=10, 403 | show_cam=1, 404 | enable_gpu=False, 405 | ): 406 | 407 | logz.configure_output_dir(logdir) 408 | logz.save_params(params) 409 | 410 | env = CarEnv() 411 | 412 | # Create base CNN for finding edges 413 | # base_model = VGG19(weights='imagenet', 414 | # include_top=False, 415 | # input_shape=(env.img_height, 416 | # env.img_width, 417 | # 3 418 | # ) 419 | # ) 420 | 421 | self.timesteps = 0 422 | self.action_size = env.action_space.shape[0] 423 | self.num_deltas = num_deltas 424 | self.deltas_used = deltas_used 425 | self.learning_rate = learning_rate 426 | self.lr_decay = lr_decay 427 | self.delta_std = delta_std 428 | self.std_decay = std_decay 429 | self.logdir = logdir 430 | self.shift = shift 431 | self.params = params 432 | self.max_past_avg_reward = float("-inf") 433 | self.num_episodes_used = float("inf") 434 | self.log_every = log_every 435 | 436 | # create shared table for storing noise 437 | print("Creating deltas table.") 438 | deltas_id = create_shared_noise.remote() 439 | self.deltas = SharedNoiseTable(ray.get(deltas_id), seed=seed + 3) 440 | print("Created deltas table.") 441 | 442 | # initialize workers with different random seeds 443 | print("Initializing workers.") 444 | self.num_workers = num_workers 445 | self.workers = [ 446 | Worker.remote( 447 | seed + 7 * i, 448 | policy_params=policy_params, 449 | deltas=deltas_id, 450 | delta_std=delta_std, 451 | seconds_per_episode=seconds_per_episode, 452 | show_cam=False, 453 | enable_gpu=enable_gpu 454 | # initial_weights=initial_weights, 455 | # initial_mean=initial_mean, 456 | # initial_std=initial_std, 457 | ) 458 | for i in range(num_workers - show_cam) 459 | ] 460 | # Show the number of desired worker cams 461 | for i in range(show_cam): 462 | self.workers.append( 463 | Worker.remote( 464 | seed + 7 * i, 465 | policy_params=policy_params, 466 | deltas=deltas_id, 467 | delta_std=delta_std, 468 | seconds_per_episode=seconds_per_episode, 469 | show_cam=True, 470 | enable_gpu=enable_gpu, 471 | # initial_weights=initial_weights, 472 | # initial_mean=initial_mean, 473 | # initial_std=initial_std, 474 | ) 475 | ) 476 | 477 | # initialize policy 478 | if policy_params["type"] == "linear": 479 | self.policy = LinearPolicy(policy_params) 480 | self.w_policy = self.policy.get_weights() 481 | else: 482 | raise NotImplementedError 483 | 484 | # initialize optimization algorithm 485 | self.optimizer = optimizers.SGD(self.w_policy, self.learning_rate, self.lr_decay) 486 | print("Initialization of ARS complete.") 487 | 488 | def aggregate_rollouts(self, num_rollouts=None, evaluate=False, state_filter=False): 489 | """ 490 | Aggregate update step from rollouts generated in parallel. 491 | """ 492 | 493 | if num_rollouts is None: 494 | num_deltas = self.num_deltas 495 | else: 496 | num_deltas = num_rollouts 497 | 498 | # put policy weights in the object store 499 | policy_id = ray.put(self.w_policy) 500 | 501 | t1 = time.time() 502 | num_rollouts = int(num_deltas / self.num_workers) 503 | 504 | # parallel generation of rollouts 505 | rollout_ids_one = [ 506 | worker.do_rollouts.remote( 507 | policy_id, 508 | num_rollouts=num_rollouts, 509 | shift=self.shift, 510 | delta_std=self.delta_std, 511 | state_filter=state_filter, 512 | evaluate=evaluate, 513 | ) 514 | for worker in self.workers 515 | ] 516 | 517 | rollout_ids_two = [ 518 | worker.do_rollouts.remote( 519 | policy_id, 520 | num_rollouts=1, 521 | shift=self.shift, 522 | delta_std=self.delta_std, 523 | state_filter=state_filter, 524 | evaluate=evaluate, 525 | ) 526 | for worker in self.workers[: (num_deltas % self.num_workers)] 527 | ] 528 | 529 | # gather results 530 | results_one = ray.get(rollout_ids_one) 531 | results_two = ray.get(rollout_ids_two) 532 | 533 | rollout_rewards, deltas_idx = [], [] 534 | 535 | for result in results_one: 536 | if not evaluate: 537 | self.timesteps += result["steps"] 538 | deltas_idx += result["deltas_idx"] 539 | rollout_rewards += result["rollout_rewards"] 540 | 541 | for result in results_two: 542 | if not evaluate: 543 | self.timesteps += result["steps"] 544 | deltas_idx += result["deltas_idx"] 545 | rollout_rewards += result["rollout_rewards"] 546 | 547 | deltas_idx = np.array(deltas_idx) 548 | rollout_rewards = np.array(rollout_rewards, dtype=np.float64) 549 | 550 | # print('Maximum reward of collected rollouts:', rollout_rewards.max()) 551 | t2 = time.time() 552 | 553 | print("Time to generate rollouts:", t2 - t1) 554 | 555 | if evaluate: 556 | return rollout_rewards 557 | 558 | # select top performing directions if deltas_used < num_deltas 559 | max_rewards = np.max(rollout_rewards, axis=1) 560 | if self.deltas_used > self.num_deltas: 561 | self.deltas_used = self.num_deltas 562 | 563 | idx = np.arange(max_rewards.size)[ 564 | max_rewards >= np.percentile(max_rewards, 100 * (1 - (self.deltas_used / self.num_deltas))) 565 | ] 566 | deltas_idx = deltas_idx[idx] 567 | top_rewards = rollout_rewards[idx, :] 568 | 569 | # normalize rewards by their standard deviation 570 | top_rewards /= np.std(top_rewards) 571 | 572 | t1 = time.time() 573 | # aggregate rollouts to form g_hat, the gradient used to compute SGD step 574 | g_hat, count = utils.batched_weighted_sum( 575 | top_rewards[:, 0] - top_rewards[:, 1], 576 | (self.deltas.get(idx, self.w_policy.size) for idx in deltas_idx), 577 | batch_size=500, 578 | ) 579 | g_hat /= deltas_idx.size 580 | t2 = time.time() 581 | print("time to aggregate rollouts", t2 - t1) 582 | return g_hat, rollout_rewards 583 | 584 | def train_step(self, state_filter=False): 585 | """ 586 | Perform one update step of the policy weights. 587 | """ 588 | 589 | g_hat, rewards = self.aggregate_rollouts(state_filter=state_filter) 590 | print("Euclidean norm of update step:", np.linalg.norm(g_hat)) 591 | self.w_policy -= self.optimizer._compute_step(g_hat).reshape(self.w_policy.shape) 592 | if self.std_decay != 0: 593 | self.delta_std *= 1 - self.std_decay 594 | print("New delta std:", self.delta_std) 595 | return rewards 596 | 597 | def train(self, num_iter, state_filter=False): 598 | 599 | start = time.time() 600 | for i in range(num_iter): 601 | 602 | t1 = time.time() 603 | rewards = self.train_step(state_filter=state_filter) 604 | t2 = time.time() 605 | print("total time of one step", t2 - t1) 606 | print("Iteration", i + 1, "done") 607 | print("AverageReward:", np.mean(rewards)) 608 | print("StdRewards:", np.std(rewards)) 609 | print("MaxRewardRollout:", np.max(rewards)) 610 | print("MinRewardRollout:", np.min(rewards)) 611 | 612 | # record weights and stats every n iterations 613 | if (i + 1) % self.log_every == 0: 614 | rewards = self.aggregate_rollouts(num_rollouts=self.num_deltas, evaluate=True) 615 | # w = ray.get(self.workers[0].get_weights.remote()) 616 | if state_filter: 617 | w = ray.get(self.workers[0].get_weights_plus_stats.remote()) 618 | else: 619 | w = ray.get(self.workers[0].get_weights.remote()) 620 | np.savez(self.logdir + "/lin_policy_plus", w) 621 | 622 | # print(sorted(self.params.items())) 623 | logz.log_tabular("Time", time.time() - start) 624 | logz.log_tabular("Iteration", i + 1) 625 | logz.log_tabular("AverageReward", np.mean(rewards)) 626 | logz.log_tabular("StdRewards", np.std(rewards)) 627 | logz.log_tabular("MaxRewardRollout", np.max(rewards)) 628 | logz.log_tabular("MinRewardRollout", np.min(rewards)) 629 | logz.log_tabular("Timesteps", self.timesteps) 630 | logz.log_tabular("LearningRate", self.optimizer.learning_rate) 631 | logz.log_tabular("DeltaStd", self.delta_std) 632 | logz.dump_tabular() 633 | 634 | if state_filter: 635 | t1 = time.time() 636 | # get statistics from all workers 637 | for j in range(self.num_workers): 638 | self.policy.observation_filter.update(ray.get(self.workers[j].get_filter.remote())) 639 | self.policy.observation_filter.stats_increment() 640 | 641 | # make sure master filter buffer is clear 642 | self.policy.observation_filter.clear_buffer() 643 | # sync all workers 644 | filter_id = ray.put(self.policy.observation_filter) 645 | setting_filters_ids = [worker.sync_filter.remote(filter_id) for worker in self.workers] 646 | # waiting for sync of all workers 647 | ray.get(setting_filters_ids) 648 | 649 | increment_filters_ids = [worker.stats_increment.remote() for worker in self.workers] 650 | # waiting for increment of all workers 651 | ray.get(increment_filters_ids) 652 | t2 = time.time() 653 | print("Time to sync statistics:", t2 - t1) 654 | 655 | return 656 | 657 | 658 | def run_ars(params): 659 | dir_path = params["dir_path"] 660 | 661 | if not (os.path.exists(dir_path)): 662 | os.makedirs(dir_path) 663 | logdir = dir_path 664 | if not (os.path.exists(logdir)): 665 | os.makedirs(logdir) 666 | 667 | # Disables TensorFlow GPU use for compatibility reasons. 668 | # To try and use GPU, set --enable_gpu to True on execution 669 | if not params["enable_gpu"]: 670 | # os.environ["CUDA_VISIBLE_DEVICES"] = "-1" 671 | config = ConfigProto(device_count={"GPU": 0}) 672 | # config.gpu_options.allow_growth = True 673 | session = InteractiveSession(config=config) 674 | else: 675 | config = ConfigProto() 676 | config.gpu_options.allow_growth = True 677 | session = InteractiveSession(config=config) 678 | 679 | env = CarEnv() 680 | base_model = VGG19( 681 | weights="imagenet", 682 | include_top=False, 683 | input_shape=(env.img_height, env.img_width, 3), 684 | ) 685 | shape = 1 686 | for num in base_model.output_shape: 687 | if num is not None: 688 | shape *= num 689 | ob_dim = shape # base_model.input_shape 690 | ac_dim = env.action_space.shape[0] 691 | 692 | # Set global variable for num workers 693 | # global worker_count 694 | # worker_count = params["n_workers"] 695 | 696 | # Get initial weights if directory given. Can be csv or numpy 697 | if len(params["policy_file"]) > 0: 698 | try: 699 | initial_policy = np.load(params["policy_file"]) 700 | initial_weights = initial_policy["arr_0"] 701 | print("Found .npz policy file at {}".format(params["policy_file"])) 702 | print("Loaded policy weights.") 703 | try: 704 | initial_mean = initial_policy["arr_1"] 705 | initial_std = initial_policy["arr_2"] 706 | print("Loaded policy stats.") 707 | except: 708 | initial_mean = None 709 | initial_std = None 710 | except: 711 | initial_weights = np.genfromtxt(params["policy_file"], delimiter=",") 712 | print("Found policy file at {}".format(params["policy_file"])) 713 | print("Loaded weights") 714 | initial_mean = None 715 | initial_std = None 716 | else: 717 | print("Initializing new policy.") 718 | initial_weights = None 719 | initial_mean = None 720 | initial_std = None 721 | 722 | # set policy parameters. Possible filters: 'MeanStdFilter' for v2, 'NoFilter' for v1. 723 | policy_params = { 724 | "type": "linear", 725 | "ob_filter": params["filter"], 726 | "ob_dim": ob_dim, 727 | "ac_dim": ac_dim, 728 | "initial_weights": initial_weights, 729 | "initial_mean": initial_mean, 730 | "initial_std": initial_std, 731 | } 732 | 733 | ars = ARSLearner( 734 | policy_params=policy_params, 735 | num_workers=params["n_workers"], 736 | num_deltas=params["num_deltas"], 737 | deltas_used=params["deltas_used"], 738 | learning_rate=params["learning_rate"], 739 | lr_decay=params["lr_decay"], 740 | delta_std=params["delta_std"], 741 | std_decay=params["std_decay"], 742 | logdir=logdir, 743 | shift=params["shift"], 744 | params=params, 745 | seed=params["seed"], 746 | seconds_per_episode=params["seconds_per_episode"], 747 | show_cam=params["show_cam"], 748 | log_every=params["log_every"], 749 | enable_gpu=params["enable_gpu"], 750 | ) 751 | 752 | ars.train(params["n_iter"], state_filter=params["state_filter"]) 753 | 754 | save_file = "/".join(params["policy_file"].split("/")[:-1]) 755 | np.savetxt(save_file + "/recent_weights.csv", ars.w_policy, delimiter=",") 756 | 757 | for worker in ars.workers: 758 | worker.clean_up.remote() 759 | 760 | return 761 | 762 | 763 | if __name__ == "__main__": 764 | import argparse 765 | 766 | parser = argparse.ArgumentParser() 767 | parser.add_argument("--n_iter", "-n", type=int, default=1000, help="Total number of update steps.") 768 | parser.add_argument( 769 | "--num_deltas", 770 | "-nd", 771 | type=int, 772 | default=32, 773 | help="Number of deltas to explore between each update step.", 774 | ) 775 | parser.add_argument( 776 | "--deltas_used", 777 | "-du", 778 | type=int, 779 | default=16, 780 | help="Number of top performing deltas to use in udate step.", 781 | ) 782 | parser.add_argument( 783 | "--learning_rate", 784 | "-lr", 785 | type=float, 786 | default=0.02, 787 | help="Learning rate to start training at.", 788 | ) 789 | parser.add_argument("--lr_decay", "-lrd", type=float, default=0.0, help="Learning rate decay") 790 | parser.add_argument( 791 | "--delta_std", 792 | "-std", 793 | type=float, 794 | default=0.03, 795 | help="The amount of noise to add to weights during exploration.", 796 | ) 797 | parser.add_argument( 798 | "--std_decay", 799 | "-stdd", 800 | type=float, 801 | default=0.0, 802 | help="Decay in the amount of noise to add to policy weights.", 803 | ) 804 | parser.add_argument( 805 | "--n_workers", 806 | "-e", 807 | type=int, 808 | default=4, 809 | help="Number of driver agents to run in parallel. Set based on hardware capability.", 810 | ) 811 | parser.add_argument( 812 | "--show_cam", 813 | "-sc", 814 | type=int, 815 | default=1, 816 | help="Number of cameras to display to user during training. Set to zero for best performance.", 817 | ) 818 | parser.add_argument( 819 | "--policy_file", 820 | "-pf", 821 | type=str, 822 | default="", 823 | help="File containing weights for resuming training.", 824 | ) 825 | parser.add_argument( 826 | "--seconds_per_episode", 827 | "-se", 828 | type=int, 829 | default=15, 830 | help="Maximum number of seconds for each driving episode.", 831 | ) 832 | parser.add_argument( 833 | "--state_filter", 834 | "-sf", 835 | type=bool, 836 | default=False, 837 | help="Turns on rolling statistics normalization of inputs. Best to leave False.", 838 | ) 839 | parser.add_argument( 840 | "--log_every", 841 | "-le", 842 | type=int, 843 | default=10, 844 | help="Number of update steps to complete between each logging event.", 845 | ) 846 | parser.add_argument( 847 | "--enable_gpu", 848 | "-gpu", 849 | type=bool, 850 | default=False, 851 | help="Whether to enable tensorflow gpu access. Leave to False.", 852 | ) 853 | 854 | # for Swimmer-v1 and HalfCheetah-v1 use shift = 0 855 | # for Hopper-v1, Walker2d-v1, and Ant-v1 use shift = 1 856 | # for Humanoid-v1 used shift = 5 857 | parser.add_argument("--shift", type=float, default=0) 858 | parser.add_argument("--seed", type=int, default=237) 859 | parser.add_argument("--policy_type", type=str, default="linear") 860 | parser.add_argument("--dir_path", type=str, default="data") 861 | 862 | # for ARS V1 use filter = 'NoFilter' 863 | parser.add_argument("--filter", type=str, default="MeanStdFilter") 864 | 865 | local_ip = socket.gethostbyname(socket.gethostname()) 866 | ray.init( # object_store_memory=1.5e+10, 867 | # _memory=1.5e+10, 868 | # num_cpus=7, 869 | # address= local_ip + ':6379', 870 | # redis_password='' 871 | # address = '127.0.0.1:6379', 872 | # local_mode 873 | # node_ip_address=local_ip 874 | ) 875 | 876 | args = parser.parse_args() 877 | params = vars(args) 878 | 879 | # with session.as_default(): 880 | run_ars(params) 881 | --------------------------------------------------------------------------------