├── .gitignore ├── Alpha zero-2018.pdf ├── AlphaGeometry.pdf ├── AlphaStar.pdf ├── Alpha_go_zero.PDF ├── Code ├── Lapan │ ├── Chapter02 │ │ ├── 01_agent_anatomy.py │ │ ├── 02_cartpole_random.py │ │ ├── 03_random_action_wrapper.py │ │ └── 04_cartpole_random_monitor.py │ ├── Chapter03 │ │ ├── .gitignore │ │ ├── 01_modules.py │ │ ├── 02_tensorboard.py │ │ ├── 03_atari_gan.py │ │ └── 04_atari_gan_ignite.py │ ├── Chapter04 │ │ ├── 01_cartpole.py │ │ ├── 02_frozenlake_naive.ipynb │ │ ├── 02_frozenlake_naive.py │ │ ├── 03_frozenlake_tweaked.py │ │ ├── 04_frozenlake_nonslippery.py │ │ └── runs │ │ │ ├── Jun12_15-03-05_Tesla-cartpole │ │ │ └── events.out.tfevents.1686592985.Tesla │ │ │ ├── Jun12_15-06-38_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593198.Tesla │ │ │ ├── Jun12_15-07-38_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593258.Tesla │ │ │ ├── Jun12_15-08-04_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593284.Tesla │ │ │ ├── Jun12_15-08-20_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593300.Tesla │ │ │ ├── Jun12_15-08-45_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593325.Tesla │ │ │ ├── Jun12_15-10-13_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593413.Tesla │ │ │ ├── Jun12_15-11-17_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593477.Tesla │ │ │ ├── Jun12_15-12-48_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593568.Tesla │ │ │ ├── Jun12_15-13-54_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593634.Tesla │ │ │ ├── Jun12_15-15-35_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593735.Tesla │ │ │ ├── Jun12_15-15-45_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593745.Tesla │ │ │ ├── Jun12_15-16-16_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593776.Tesla │ │ │ ├── Jun12_15-16-24_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593784.Tesla │ │ │ ├── Jun12_15-16-37_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593797.Tesla │ │ │ ├── Jun12_15-18-45_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593925.Tesla │ │ │ ├── Jun12_15-19-13_Tesla-cartpole │ │ │ └── events.out.tfevents.1686593953.Tesla │ │ │ ├── Jun12_15-20-27_Tesla-cartpole │ │ │ └── events.out.tfevents.1686594027.Tesla │ │ │ ├── Jun12_15-23-26_Tesla-cartpole │ │ │ └── events.out.tfevents.1686594206.Tesla │ │ │ ├── Jun12_15-27-06_Tesla-cartpole │ │ │ └── events.out.tfevents.1686594426.Tesla │ │ │ ├── Jun12_15-29-23_Tesla-cartpole │ │ │ └── events.out.tfevents.1686594563.Tesla │ │ │ ├── Jun12_15-30-21_Tesla-cartpole │ │ │ └── events.out.tfevents.1686594621.Tesla │ │ │ ├── Jun12_15-30-37_Tesla-cartpole │ │ │ └── events.out.tfevents.1686594637.Tesla │ │ │ ├── Jun12_15-30-50_Tesla-cartpole │ │ │ └── events.out.tfevents.1686594650.Tesla │ │ │ ├── Jun12_15-31-13_Tesla-cartpole │ │ │ └── events.out.tfevents.1686594673.Tesla │ │ │ ├── Jun12_15-32-02_Tesla-cartpole │ │ │ └── events.out.tfevents.1686594722.Tesla │ │ │ ├── Jun12_15-33-09_Tesla-cartpole │ │ │ └── events.out.tfevents.1686594789.Tesla │ │ │ ├── Jun12_15-33-58_Tesla-cartpole │ │ │ └── events.out.tfevents.1686594838.Tesla │ │ │ ├── Jun12_15-34-14_Tesla-cartpole │ │ │ └── events.out.tfevents.1686594854.Tesla │ │ │ ├── Jun12_15-35-26_Tesla-cartpole │ │ │ └── events.out.tfevents.1686594926.Tesla │ │ │ ├── Jun12_15-36-57_Tesla-frozenlake-naive │ │ │ └── events.out.tfevents.1686595017.Tesla │ │ │ ├── Jun12_15-37-34_Tesla-frozenlake-naive │ │ │ └── events.out.tfevents.1686595054.Tesla │ │ │ ├── Jun12_15-38-04_Tesla-frozenlake-naive │ │ │ └── events.out.tfevents.1686595084.Tesla │ │ │ ├── Jun12_15-41-03_Tesla-frozenlake-tweaked │ │ │ └── events.out.tfevents.1686595263.Tesla │ │ │ └── Jun12_15-43-04_Tesla-frozenlake-nonslippery │ │ │ └── events.out.tfevents.1686595384.Tesla │ ├── Chapter05 │ │ ├── 01_frozenlake_v_iteration.py │ │ └── 02_frozenlake_q_iteration.py │ ├── Chapter06 │ │ ├── 01_frozenlake_q_learning.py │ │ ├── 02_dqn_pong.py │ │ ├── 03_dqn_play.py │ │ ├── lib │ │ │ ├── __init__.py │ │ │ ├── dqn_model.py │ │ │ └── wrappers.py │ │ └── runs │ │ │ ├── Jul31_14-41-47_Tesla-q-learning │ │ │ └── events.out.tfevents.1690825307.Tesla │ │ │ ├── Jul31_14-46-11_Tesla-q-learning │ │ │ └── events.out.tfevents.1690825571.Tesla │ │ │ ├── Jul31_14-47-27_Tesla-q-learning │ │ │ └── events.out.tfevents.1690825647.Tesla │ │ │ ├── Jul31_14-49-33_Tesla-q-learning │ │ │ └── events.out.tfevents.1690825773.Tesla │ │ │ ├── Jul31_14-49-59_Tesla-q-learning │ │ │ └── events.out.tfevents.1690825799.Tesla │ │ │ ├── Jul31_14-50-38_Tesla-q-learning │ │ │ └── events.out.tfevents.1690825838.Tesla │ │ │ ├── Jul31_14-54-08_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826048.Tesla │ │ │ ├── Jul31_14-54-29_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826069.Tesla │ │ │ ├── Jul31_14-54-44_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826084.Tesla │ │ │ ├── Jul31_14-54-59_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826099.Tesla │ │ │ ├── Jul31_14-56-00_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826160.Tesla │ │ │ ├── Jul31_14-56-19_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826179.Tesla │ │ │ ├── Jul31_14-58-01_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826281.Tesla │ │ │ ├── Jul31_14-58-25_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826305.Tesla │ │ │ ├── Jul31_15-00-12_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826412.Tesla │ │ │ ├── Jul31_15-00-44_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826444.Tesla │ │ │ ├── Jul31_15-01-00_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826460.Tesla │ │ │ ├── Jul31_15-01-08_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826468.Tesla │ │ │ ├── Jul31_15-02-49_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826569.Tesla │ │ │ ├── Jul31_15-03-08_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826588.Tesla │ │ │ └── Jul31_15-03-43_Tesla-q-learning │ │ │ └── events.out.tfevents.1690826623.Tesla │ ├── Chapter07 │ │ ├── 01_actions.py │ │ ├── 02_agents.py │ │ ├── 03_exp_sources.py │ │ ├── 04_replay_buf.py │ │ ├── 05_target_net.py │ │ └── 06_cartpole.py │ ├── Chapter08 │ │ ├── 01_dqn_basic.py │ │ ├── 02_dqn_n_steps.py │ │ ├── 03_dqn_double.py │ │ ├── 04_dqn_noisy_net.py │ │ ├── 05_dqn_prio_replay.py │ │ ├── 06_dqn_dueling.py │ │ ├── 07_dqn_distrib.py │ │ ├── 07_dqn_distrib_plots.py │ │ ├── 08_dqn_rainbow.py │ │ ├── adhoc │ │ │ ├── commute.py │ │ │ └── distr_test.py │ │ ├── bench │ │ │ ├── prio_buffer_bench.py │ │ │ └── simple_buffer_bench.py │ │ └── lib │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── dqn_extra.py │ │ │ └── dqn_model.py │ ├── Chapter09 │ │ ├── .gitignore │ │ ├── 00_slow_grads.py │ │ ├── 01_baseline.py │ │ ├── 02_n_envs.py │ │ ├── 03_parallel.py │ │ ├── 04_new_wrappers_n_env.py │ │ ├── 04_new_wrappers_parallel.py │ │ ├── 05_cule.py │ │ ├── attic │ │ │ ├── 02_env_steps.py │ │ │ └── 03_parallel_orig.py │ │ ├── img │ │ │ ├── 01_orig_tb.png │ │ │ ├── 02_steps-tb.png │ │ │ ├── 03-serial-blocks.png │ │ │ ├── 03_serial.png │ │ │ └── 04_parallel.png │ │ └── lib │ │ │ ├── __init__.py │ │ │ ├── atari_wrappers.py │ │ │ ├── common.py │ │ │ └── dqn_model.py │ ├── Chapter10 │ │ ├── .gitignore │ │ ├── data │ │ │ ├── ch08-small-quotes.tgz │ │ │ └── unpack_data.sh │ │ ├── lib │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── data.py │ │ │ ├── environ.py │ │ │ ├── models.py │ │ │ └── validation.py │ │ ├── run_model.py │ │ ├── tests │ │ │ ├── test_data.py │ │ │ └── test_environ.py │ │ ├── train_model.py │ │ └── train_model_conv.py │ ├── Chapter11 │ │ ├── 01_cartpole_dqn.py │ │ ├── 02_cartpole_reinforce.py │ │ ├── 03_cartpole_reinforce_baseline.py │ │ ├── 04_cartpole_pg.py │ │ ├── 05_pong_pg.py │ │ └── lib │ │ │ ├── __init__.py │ │ │ └── common.py │ ├── Chapter12 │ │ ├── .gitignore │ │ ├── 01_cartpole_pg.py │ │ ├── 02_pong_a2c.py │ │ ├── 03_pong_a2c_rollouts.py │ │ ├── 04_pong_r2.py │ │ ├── log.md │ │ └── torch_perf │ │ │ └── t1.py │ ├── Chapter13 │ │ ├── .gitignore │ │ ├── 01_a3c_data.py │ │ ├── 02_a3c_grad.py │ │ ├── adhoc │ │ │ ├── distr_grad.py │ │ │ ├── distr_grad2.py │ │ │ └── sync_bench.py │ │ └── lib │ │ │ ├── __init__.py │ │ │ └── common.py │ ├── Chapter14 │ │ ├── .gitignore │ │ ├── cor_reader.py │ │ ├── data │ │ │ ├── .gitignore │ │ │ └── get_data.sh │ │ ├── data_test.py │ │ ├── libbots │ │ │ ├── __init__.py │ │ │ ├── cornell.py │ │ │ ├── data.py │ │ │ ├── model.py │ │ │ └── utils.py │ │ ├── telegram_bot.py │ │ ├── tests │ │ │ ├── test_data.py │ │ │ └── test_subtitles.py │ │ ├── train_crossent.py │ │ ├── train_scst.py │ │ └── use_model.py │ ├── Chapter15 │ │ ├── README.md │ │ ├── games │ │ │ ├── .gitignore │ │ │ └── make_games.sh │ │ ├── lib │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── model.py │ │ │ └── preproc.py │ │ ├── requirements.txt │ │ ├── train_basic.py │ │ └── train_lm.py │ ├── Chapter16 │ │ ├── .gitignore │ │ ├── adhoc │ │ │ ├── demo_dump.py │ │ │ ├── fbs_dump.py │ │ │ ├── fbs_join.py │ │ │ ├── fbs_read.py │ │ │ ├── start_docker.sh │ │ │ ├── start_docker_demo.sh │ │ │ ├── wd_tests.py │ │ │ ├── wob_clicks.py │ │ │ ├── wob_create.py │ │ │ └── wob_test.py │ │ ├── demos │ │ │ ├── demo-ClickButton.tgz │ │ │ ├── demo-CountSides.tar.gz │ │ │ └── demo-TicTacToe.tgz │ │ ├── environment.yml │ │ ├── ksy │ │ │ ├── fbs.ksy │ │ │ ├── rfp_client.ksy │ │ │ └── rfp_server.ksy │ │ ├── lib │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── ksy │ │ │ │ ├── __init__.py │ │ │ │ ├── fbs.py │ │ │ │ ├── rfp_client.py │ │ │ │ └── rfp_server.py │ │ │ ├── model_vnc.py │ │ │ ├── vnc_demo.py │ │ │ └── wob_vnc.py │ │ ├── wob_click_mm_play.py │ │ ├── wob_click_mm_train.py │ │ ├── wob_click_play.py │ │ ├── wob_click_train.py │ │ └── wob_fixes │ │ │ ├── 01_wob_crash-fix.patch │ │ │ ├── 02_reward_proxy_append_rewards.patch │ │ │ ├── autopatch.sh │ │ │ └── readme.md │ ├── Chapter17 │ │ ├── .gitignore │ │ ├── 01_check_env.py │ │ ├── 02_train_a2c.py │ │ ├── 03_play_a2c.py │ │ ├── 04_train_ddpg.py │ │ ├── 05_play_ddpg.py │ │ ├── 06_train_d4pg.py │ │ ├── adhoc │ │ │ ├── record_a2c.sh │ │ │ └── record_ddpg.sh │ │ └── lib │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ └── model.py │ ├── Chapter18 │ │ ├── export_model.py │ │ ├── hw │ │ │ ├── bench.py │ │ │ ├── libhw │ │ │ │ ├── __init__.py │ │ │ │ ├── hw_sensors │ │ │ │ │ ├── __init__.py │ │ │ │ │ ├── l3g4200d.py │ │ │ │ │ ├── lis331dlh.py │ │ │ │ │ ├── lis3mdl.py │ │ │ │ │ └── st_family.py │ │ │ │ ├── nn.py │ │ │ │ ├── postproc.py │ │ │ │ ├── sensor_buffer.py │ │ │ │ ├── sensors.py │ │ │ │ ├── servo.py │ │ │ │ ├── t1.py │ │ │ │ ├── t1zyh.py │ │ │ │ └── t1zyho.py │ │ │ ├── obs.py │ │ │ ├── run.py │ │ │ └── zero.py │ │ ├── lib │ │ │ ├── __init__.py │ │ │ ├── ddpg.py │ │ │ └── microtaur.py │ │ ├── micropython │ │ │ ├── PYB11_my.diff │ │ │ ├── PYBV11_my │ │ │ │ ├── mpconfigboard.h │ │ │ │ ├── mpconfigboard.mk │ │ │ │ ├── pins.csv │ │ │ │ └── stm32f4xx_hal_conf.h │ │ │ └── README.md │ │ ├── models │ │ │ ├── four_short_legs.xml │ │ │ └── four_short_legs.xml.tmpl │ │ ├── show_model.py │ │ ├── stl │ │ │ ├── 01_trivial │ │ │ │ ├── Frame02.stl │ │ │ │ ├── Leg_Needle.stl │ │ │ │ └── SensorHolder.stl │ │ │ ├── frame_short_legs.scad │ │ │ ├── frame_short_legs_pyboard.scad │ │ │ └── frame_short_legs_pyboard.stl │ │ ├── tests │ │ │ ├── __init__.py │ │ │ └── test_hw_nn.py │ │ └── train_ddpg.py │ ├── Chapter19 │ │ ├── .gitignore │ │ ├── 01_train_a2c.py │ │ ├── 02_play.py │ │ ├── 03_train_trpo.py │ │ ├── 04_train_ppo.py │ │ ├── 05_train_acktr.py │ │ ├── 06_train_sac.py │ │ ├── README.md │ │ └── lib │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── kfac.py │ │ │ ├── model.py │ │ │ └── trpo.py │ ├── Chapter20 │ │ ├── .gitignore │ │ ├── 01_cartpole_es.py │ │ ├── 02_cheetah_es.py │ │ ├── 03_cartpole_ga.py │ │ ├── 04_cheetah_ga.py │ │ ├── 05_cheetah_ga_batch.py │ │ └── not_converging │ │ │ ├── 02_breakout_es.py │ │ │ └── 02_hyper.py │ ├── Chapter21 │ │ ├── atari_dqn.py │ │ ├── atari_ppo.py │ │ ├── lib │ │ │ ├── __init__.py │ │ │ ├── atari_wrappers.py │ │ │ ├── common.py │ │ │ ├── dqn_extra.py │ │ │ └── ppo.py │ │ ├── mcar_dqn.py │ │ ├── mcar_ppo.py │ │ ├── riverswim.py │ │ └── tests │ │ │ ├── __init__.py │ │ │ └── test_ppo.py │ ├── Chapter22 │ │ ├── .gitignore │ │ ├── 01_a2c.py │ │ ├── 02_imag.py │ │ ├── 03_i2a.py │ │ ├── lib │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ └── i2a.py │ │ └── play.py │ ├── Chapter23 │ │ ├── .gitignore │ │ ├── lib │ │ │ ├── __init__.py │ │ │ ├── game.py │ │ │ ├── mcts.py │ │ │ └── model.py │ │ ├── play.py │ │ ├── semi-final.sh │ │ ├── telegram-bot.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── test_game.py │ │ │ └── test_model.py │ │ ├── tournament │ │ │ ├── charts.ipynb │ │ │ ├── final-short.png │ │ │ ├── final.csv │ │ │ ├── final.svg │ │ │ ├── final.txt │ │ │ ├── final_plot.ipynb │ │ │ ├── semi-0.txt │ │ │ ├── semi-1.txt │ │ │ ├── semi-2.txt │ │ │ ├── semi-3.txt │ │ │ ├── semi-4.txt │ │ │ ├── semi-5.txt │ │ │ ├── semi-6.txt │ │ │ ├── semi-7.txt │ │ │ ├── semi-8.txt │ │ │ ├── semi-9.txt │ │ │ ├── semi-common.png │ │ │ └── semi-scores.png │ │ └── train.py │ ├── Chapter24 │ │ ├── .gitignore │ │ ├── csvs │ │ │ ├── README.md │ │ │ ├── c2x2-paper-d200-t1-v2.csv │ │ │ ├── c2x2-paper-d200-t1.csv │ │ │ ├── c2x2-zero-goal-d200-t1-v2.csv │ │ │ ├── c2x2-zero-goal-d200-t1.csv │ │ │ ├── c3x3-paper-d200-t1-v2.csv │ │ │ ├── c3x3-paper-d200-t1.csv │ │ │ ├── c3x3-zero-goal-d200-no-decay-v2.csv │ │ │ ├── c3x3-zero-goal-d200-no-decay.csv │ │ │ ├── c3x3-zero-goal-d200-t1-v2.csv │ │ │ ├── c3x3-zero-goal-d200-t1.csv │ │ │ ├── c3x3 │ │ │ │ ├── c3-paper-d20-1.93e-1.csv │ │ │ │ ├── c3-zg-d20-noweight-no-decay=5.501e-1.csv │ │ │ │ ├── c3-zg-d20-noweight-no-decay=5.61e-1.csv │ │ │ │ ├── c3-zg-d20-noweight-no-decay=6.43e-1.csv │ │ │ │ ├── c3-zg-d20-noweight-no-decay=7.29e-1.csv │ │ │ │ ├── c3-zg-d20-noweight-no-decay=chp100k.csv │ │ │ │ ├── c3-zg-d20-noweight-no-decay=chp64k.csv │ │ │ │ ├── c3-zg-d20-noweight.csv │ │ │ │ └── c3-zg-d20.csv │ │ │ ├── t3-c2x2-mcts-c=0.01.csv │ │ │ ├── t3-c2x2-mcts-c=0.1.csv │ │ │ ├── t3-c2x2-mcts-c=1.csv │ │ │ ├── t3-c2x2-mcts-c=10.csv │ │ │ ├── t3-c2x2-mcts-c=100.csv │ │ │ ├── t3-c2x2-mcts-c=1000.csv │ │ │ ├── t3-c2x2-mcts-c=100000.csv │ │ │ ├── t3.1-c2x2-mcts-c=1.csv │ │ │ ├── t3.1-c2x2-mcts-c=10.csv │ │ │ ├── t3.1-c2x2-mcts-c=100-steps=100k.csv │ │ │ ├── t3.1-c2x2-mcts-c=100-steps=60k.csv │ │ │ ├── t3.1-c2x2-mcts-c=100.csv │ │ │ ├── t3.1-c2x2-mcts-c=1000.csv │ │ │ ├── t3.1-c2x2-mcts-c=10000.csv │ │ │ ├── t4-c2x2-mcts-c=10-steps=100k.csv │ │ │ ├── t4-c2x2-mcts-c=10-steps=200k.csv │ │ │ ├── t4-c2x2-mcts-c=10-steps=500k.csv │ │ │ ├── t4-c2x2-mcts-c=100-steps=100k-b10.csv │ │ │ ├── t4-c2x2-mcts-c=100-steps=100k-b100.csv │ │ │ ├── t4-c2x2-mcts-c=100-steps=100k.csv │ │ │ ├── t5-c2x2-1.0366e-01.csv │ │ │ ├── t5-c2x2-3.0742e-02.csv │ │ │ ├── t5-c2x2-6.0737e-02.csv │ │ │ ├── t6-c2x2-nu=1.csv │ │ │ ├── t6-c2x2-nu=10.csv │ │ │ ├── t6-c2x2-nu=1000.csv │ │ │ └── t7-best-paper-1.8184e-1.csv │ │ ├── cubes_tests │ │ │ ├── cube2x2_d3.txt │ │ │ ├── cube2x2_d4.txt │ │ │ ├── cube2x2_d5.txt │ │ │ ├── cube2x2_d6.txt │ │ │ ├── cube3x3_d10.txt │ │ │ ├── cube3x3_d1000.txt │ │ │ ├── cube3x3_d15.txt │ │ │ ├── cube3x3_d3.txt │ │ │ └── cube3x3_d3_norepeat.txt │ │ ├── docs │ │ │ └── Notes.md │ │ ├── gen_cubes.py │ │ ├── ini │ │ │ ├── README.md │ │ │ ├── cube2x2-paper-d200.ini │ │ │ ├── cube2x2-zero-goal-d200.ini │ │ │ ├── cube3x3-paper-d20.ini │ │ │ ├── cube3x3-paper-d200.ini │ │ │ ├── cube3x3-zero-goal-d20-noweight.ini │ │ │ ├── cube3x3-zero-goal-d20.ini │ │ │ └── cube3x3-zero-goal-d200.ini │ │ ├── libcube │ │ │ ├── conf.py │ │ │ ├── cubes │ │ │ │ ├── __init__.py │ │ │ │ ├── _common.py │ │ │ │ ├── _env.py │ │ │ │ ├── cube2x2.py │ │ │ │ └── cube3x3.py │ │ │ ├── mcts.py │ │ │ └── model.py │ │ ├── models │ │ │ ├── .gitattributes │ │ │ └── cube2x2 │ │ │ │ └── .gitattributes │ │ ├── nbs │ │ │ ├── 01_paper-vs-zero_goal.ipynb │ │ │ ├── 02_fix_steps_limit.ipynb │ │ │ ├── 03_mcts_tuning.ipynb │ │ │ ├── 04_mcts_C-extra-data.ipynb │ │ │ ├── 05_batch_search.ipynb │ │ │ ├── 06_compare_models.ipynb │ │ │ └── 07_article_figs.ipynb │ │ ├── requirements.txt │ │ ├── run_tests.sh │ │ ├── solver.py │ │ ├── train.py │ │ └── train_debug.py │ ├── Chapter25 │ │ ├── .gitignore │ │ ├── README.md │ │ ├── battle_dqn.py │ │ ├── battle_play.py │ │ ├── forest_both_dqn.py │ │ ├── forest_both_play.py │ │ ├── forest_random.py │ │ ├── forest_tigers_dqn.py │ │ ├── forest_tigers_play.py │ │ └── lib │ │ │ ├── __init__.py │ │ │ ├── common.py │ │ │ ├── data.py │ │ │ └── model.py │ ├── formulas │ │ ├── .gitignore │ │ ├── _template.tex │ │ ├── ch01.tex │ │ ├── ch04.tex │ │ ├── ch05.tex │ │ ├── ch06.tex │ │ ├── ch07.tex │ │ ├── ch08.tex │ │ ├── ch11.tex │ │ ├── ch12.tex │ │ ├── ch14.tex │ │ ├── ch17.tex │ │ ├── ch19.tex │ │ ├── ch20.tex │ │ ├── ch21.tex │ │ ├── ch21 │ │ │ ├── ch21-001.eps │ │ │ ├── ch21-002.eps │ │ │ ├── ch21-003.eps │ │ │ └── ch21.pdf │ │ ├── ch23.tex │ │ ├── ch23 │ │ │ ├── ch23-001.eps │ │ │ ├── ch23-002.eps │ │ │ ├── ch23-003.eps │ │ │ ├── ch23-004.eps │ │ │ ├── ch23-005.eps │ │ │ ├── ch23-006.eps │ │ │ ├── ch23-007.eps │ │ │ ├── ch23-008.eps │ │ │ ├── ch23-009.eps │ │ │ └── ch23.pdf │ │ ├── ch24.tex │ │ ├── ch24 │ │ │ ├── ch24-001.eps │ │ │ ├── ch24-002.eps │ │ │ ├── ch24-003.eps │ │ │ ├── ch24-004.eps │ │ │ ├── ch24-005.eps │ │ │ ├── ch24-006.eps │ │ │ ├── ch24-007.eps │ │ │ ├── ch24-008.eps │ │ │ ├── ch24-009.eps │ │ │ ├── ch24-010.eps │ │ │ ├── ch24-011.eps │ │ │ ├── ch24-012.eps │ │ │ ├── ch24-013.eps │ │ │ ├── ch24-014.eps │ │ │ ├── ch24-015.eps │ │ │ ├── ch24-016.eps │ │ │ ├── ch24-017.eps │ │ │ ├── ch24-018.eps │ │ │ ├── ch24-019.eps │ │ │ ├── ch24-020.eps │ │ │ ├── ch24-021.eps │ │ │ ├── ch24-022.eps │ │ │ ├── ch24-023.eps │ │ │ ├── ch24-024.eps │ │ │ ├── ch24-025.eps │ │ │ ├── ch24-026.eps │ │ │ └── ch24.pdf │ │ ├── clean.sh │ │ └── make.sh │ └── plots │ │ ├── ch04 │ │ ├── cp-loss.csv │ │ ├── cp-loss.svg │ │ ├── cp-reward.csv │ │ ├── cp-reward.svg │ │ ├── cp-rw_bound.csv │ │ ├── cp-rw_bound.svg │ │ ├── fln-loss.csv │ │ ├── fln-loss.svg │ │ ├── fln-reward.csv │ │ ├── fln-reward.svg │ │ ├── fln-rw_bound.csv │ │ ├── fln-rw_bound.svg │ │ ├── flns-loss.csv │ │ ├── flns-loss.svg │ │ ├── flns-reward.csv │ │ ├── flns-reward.svg │ │ ├── flns-rw_bound.csv │ │ ├── flns-rw_bound.svg │ │ ├── flt-loss.csv │ │ ├── flt-loss.svg │ │ ├── flt-reward.csv │ │ ├── flt-reward.svg │ │ ├── flt-rw_bound.csv │ │ ├── flt-rw_bound.svg │ │ └── make.sh │ │ ├── ch05 │ │ ├── make.sh │ │ ├── viter-4x4-reward.csv │ │ ├── viter-4x4-reward.svg │ │ ├── viter-8x8-reward.csv │ │ └── viter-8x8-reward.svg │ │ ├── ch06 │ │ ├── 01_reward.csv │ │ ├── 01_reward.svg │ │ ├── 02_avg_reward.csv │ │ ├── 02_avg_reward.svg │ │ └── make.sh │ │ ├── ch08 │ │ ├── 01_base │ │ │ ├── avg_fps.csv │ │ │ ├── avg_loss.csv │ │ │ ├── avg_reward.csv │ │ │ └── steps.csv │ │ ├── 01_fps.svg │ │ ├── 01_loss.svg │ │ ├── 01_reward.svg │ │ ├── 01_steps.svg │ │ ├── 02_n_steps │ │ │ ├── 2_avg_reward.csv │ │ │ ├── 2_steps.csv │ │ │ ├── 3_avg_reward.csv │ │ │ ├── 3_steps.csv │ │ │ ├── 4_avg_reward.csv │ │ │ ├── 5_avg_reward.csv │ │ │ └── 6_avg_reward.csv │ │ ├── 02_reward_3456.svg │ │ ├── 02_reward_b23.svg │ │ ├── 02_steps_b3.svg │ │ ├── 03_double │ │ │ ├── false_avg_reward.csv │ │ │ ├── false_values.csv │ │ │ ├── true_avg_reward.csv │ │ │ └── true_values.csv │ │ ├── 03_reward.png │ │ ├── 03_values.png │ │ ├── 04_noisy │ │ │ ├── avg_reward.csv │ │ │ ├── snr_1.csv │ │ │ ├── snr_2.csv │ │ │ └── steps.csv │ │ ├── 04_reward.svg │ │ ├── 04_snr_1.svg │ │ ├── 04_snr_2.svg │ │ ├── 04_steps.svg │ │ ├── 05_loss.svg │ │ ├── 05_prio │ │ │ ├── avg_loss.csv │ │ │ └── avg_reward.csv │ │ ├── 05_reward_baseline.svg │ │ ├── 05_reward_prio.svg │ │ ├── 06_adv.svg │ │ ├── 06_dueling │ │ │ ├── adv.csv │ │ │ ├── avg_loss.csv │ │ │ ├── avg_reward.csv │ │ │ ├── steps.csv │ │ │ └── val.csv │ │ ├── 06_loss.svg │ │ ├── 06_reward.svg │ │ ├── 06_steps.svg │ │ ├── 06_val.svg │ │ ├── 07_distrib │ │ │ ├── avg_loss.csv │ │ │ └── avg_reward.csv │ │ ├── 07_loss.svg │ │ ├── 07_reward.svg │ │ ├── 08_fps.svg │ │ ├── 08_rainbow │ │ │ ├── avg_fps.csv │ │ │ ├── avg_loss.csv │ │ │ ├── avg_reward.csv │ │ │ └── steps.csv │ │ ├── 08_reward_comp.svg │ │ ├── 08_reward_only.svg │ │ ├── 08_steps.svg │ │ └── make.sh │ │ ├── ch09 │ │ ├── 00_slow_grads │ │ │ ├── avg_fps.csv │ │ │ └── avg_reward.csv │ │ ├── 01_original │ │ │ ├── avg_fps.csv │ │ │ ├── avg_loss.csv │ │ │ ├── avg_reward.csv │ │ │ └── steps.csv │ │ ├── 02_n_envs │ │ │ ├── 2_avg_fps.csv │ │ │ ├── 2_avg_reward.csv │ │ │ ├── 3_avg_fps.csv │ │ │ ├── 3_avg_reward.csv │ │ │ ├── 4_avg_fps.csv │ │ │ ├── 4_avg_reward.csv │ │ │ ├── 5_avg_fps.csv │ │ │ ├── 5_avg_reward.csv │ │ │ ├── 6_avg_fps.csv │ │ │ └── 6_avg_reward.csv │ │ ├── 03_parallel │ │ │ ├── avg_fps.csv │ │ │ └── avg_reward.csv │ │ ├── 04_new_wrappers │ │ │ ├── env_avg_fps.csv │ │ │ ├── env_avg_reward.csv │ │ │ ├── par_avg_fps.csv │ │ │ └── par_avg_reward.csv │ │ ├── make.sh │ │ ├── s00_01_reward.svg │ │ ├── s00_02_avg_fps.svg │ │ ├── s01_01_reward.svg │ │ ├── s01_02_steps.svg │ │ ├── s01_03_loss.svg │ │ ├── s01_04_avg_fps.svg │ │ ├── s02_01_reward_b23.svg │ │ ├── s02_01_reward_b456.svg │ │ ├── s02_02_avg_fps_b23.svg │ │ ├── s02_02_avg_fps_b456.svg │ │ ├── s03_01_reward.svg │ │ ├── s03_02_avg_fps.svg │ │ ├── s04_01_env_reward.svg │ │ ├── s04_02_env_fps.svg │ │ ├── s04_03_env_reward.svg │ │ └── s04_04_env_fps.svg │ │ ├── ch10 │ │ ├── cv-reward_train.csv │ │ ├── cv-reward_train.svg │ │ ├── cv-reward_val.csv │ │ ├── cv-reward_val.svg │ │ ├── ff-reward_test.csv │ │ ├── ff-reward_test.svg │ │ ├── ff-reward_train.csv │ │ ├── ff-reward_train.svg │ │ ├── ff-reward_val.csv │ │ ├── ff-reward_val.svg │ │ ├── ff-steps_train.csv │ │ ├── ff-steps_train.svg │ │ ├── ff-values.svg │ │ ├── ff-values_train.csv │ │ ├── make.sh │ │ └── trades │ │ │ ├── rewards-YNDX-2016.png │ │ │ └── rewards-YNDX_2015.png │ │ ├── ch11 │ │ ├── 01_dqn_rf_episodes.svg │ │ ├── 02_dqn_rf_rewards100.svg │ │ ├── 03_pg_reward.svg │ │ ├── 04_pg_baseline.svg │ │ ├── 05_pg_batch_scales.svg │ │ ├── 06_pg_entropy.svg │ │ ├── 07_pg_l_entropy.svg │ │ ├── 08_pg_l_policy.svg │ │ ├── 09_pg_l_total.svg │ │ ├── 10_pg_grad_l2.svg │ │ ├── 11_pg_grad_max.svg │ │ ├── 12_pg_kl.svg │ │ ├── dqn_episodes.csv │ │ ├── dqn_rewards100.csv │ │ ├── make.sh │ │ ├── pg_baseline.csv │ │ ├── pg_batch_scales.csv │ │ ├── pg_entropy.csv │ │ ├── pg_grad_l2.csv │ │ ├── pg_grad_max.csv │ │ ├── pg_kl.csv │ │ ├── pg_l_entropy.csv │ │ ├── pg_l_policy.csv │ │ ├── pg_l_total.csv │ │ ├── pg_reward100.csv │ │ ├── rf_episodes.csv │ │ └── rf_rewards100.csv │ │ ├── ch12 │ │ ├── 08-01-batch_rewards.svg │ │ ├── 08-02-reward.svg │ │ ├── 09-01-loss_ent.svg │ │ ├── 09-02-loss_policy.svg │ │ ├── 10-01-loss_value.svg │ │ ├── 10-02-loss_total.svg │ │ ├── 11-01-advanatage.svg │ │ ├── 11-02-grad-l2.svg │ │ ├── 12-01-grad_max.svg │ │ ├── 12-02-grad_var.svg │ │ ├── a2c-advantage.csv │ │ ├── a2c-batch_rewards.csv │ │ ├── a2c-grad_l2.csv │ │ ├── a2c-grad_max.csv │ │ ├── a2c-grad_var.csv │ │ ├── a2c-loss_entropy.csv │ │ ├── a2c-loss_policy.csv │ │ ├── a2c-loss_total.csv │ │ ├── a2c-loss_value.csv │ │ ├── a2c-reward.csv │ │ ├── cp-l2-bl.csv │ │ ├── cp-l2-nobl.csv │ │ ├── cp-l2.svg │ │ ├── cp-max-bl.csv │ │ ├── cp-max-nobl.csv │ │ ├── cp-max.svg │ │ ├── cp-reward-bl.svg │ │ ├── cp-reward-nobl.svg │ │ ├── cp-reward100.svg │ │ ├── cp-reward100_bl.csv │ │ ├── cp-reward100_nobl.csv │ │ ├── cp-reward_bl.csv │ │ ├── cp-reward_nobl.csv │ │ ├── cp-var-bl.csv │ │ ├── cp-var-nobl.csv │ │ ├── cp-var.svg │ │ └── make.sh │ │ ├── ch13 │ │ ├── data_adv.csv │ │ ├── data_adv.svg │ │ ├── data_loss.csv │ │ ├── data_loss.svg │ │ ├── data_reward.csv │ │ ├── data_reward.svg │ │ ├── grad_adv.svg │ │ ├── grad_adv_0.csv │ │ ├── grad_adv_1.csv │ │ ├── grad_adv_2.csv │ │ ├── grad_adv_3.csv │ │ ├── grad_loss.svg │ │ ├── grad_loss_0.csv │ │ ├── grad_loss_1.csv │ │ ├── grad_loss_2.csv │ │ ├── grad_loss_3.csv │ │ ├── grad_reward.svg │ │ ├── grad_reward_0.csv │ │ ├── grad_reward_1.csv │ │ ├── grad_reward_2.csv │ │ ├── grad_reward_3.csv │ │ └── make.sh │ │ ├── ch14 │ │ ├── make.sh │ │ ├── sc1-bleu-argmax.csv │ │ ├── sc1-bleu-argmax.svg │ │ ├── sc1-bleu-sample.csv │ │ ├── sc1-bleu-sample.svg │ │ ├── sc1-bleu-test.csv │ │ ├── sc1-bleu-test.svg │ │ ├── sc1-skipped-samples.csv │ │ ├── sc1-skipped-samples.svg │ │ ├── sc2-bleu-argmax.csv │ │ ├── sc2-bleu-argmax.svg │ │ ├── sc2-bleu-sample.csv │ │ ├── sc2-bleu-sample.svg │ │ ├── sc2-bleu-test.csv │ │ ├── sc2-bleu-test.svg │ │ ├── xe-bleu-test.csv │ │ ├── xe-bleu-test.svg │ │ ├── xe-bleu.csv │ │ ├── xe-bleu.svg │ │ ├── xe-loss.csv │ │ └── xe-loss.svg │ │ ├── ch15 │ │ ├── 01_basic_1_reward.svg │ │ ├── 02_basic_1_steps.svg │ │ ├── 03_basic_1_val.svg │ │ ├── 04_basic_10_reward.svg │ │ ├── 05_basic_10_steps.svg │ │ ├── 06_basic_10_val.svg │ │ ├── 07_basic_25_reward.svg │ │ ├── 08_basic_25_steps.svg │ │ ├── 09_basic_25_val.svg │ │ ├── 10_basic_200_reward.svg │ │ ├── 11_basic_200_steps.svg │ │ ├── 12_basic_200_val.svg │ │ ├── 13_lm_pre_1_small_reward.svg │ │ ├── 14_lm_pre_1_small_loss.svg │ │ ├── 15_lm_pre_1_med_reward.svg │ │ ├── 16_lm_pre_1_med_loss.svg │ │ ├── 17_lm_pre_5_reward.svg │ │ ├── 18_lm_pre_5_steps.svg │ │ ├── 19_lm_dqn_1_small_reward.svg │ │ ├── 20_lm_dqn_1_small_loss.svg │ │ ├── 21_lm_dqn_1_med_reward.svg │ │ ├── 22_lm_dqn_1_med_loss.svg │ │ ├── 23_lm_dqn_1_val.svg │ │ ├── basic_10_reward.csv │ │ ├── basic_10_steps.csv │ │ ├── basic_10_val.csv │ │ ├── basic_1_reward.csv │ │ ├── basic_1_steps.csv │ │ ├── basic_1_valreward.csv │ │ ├── basic_200_reward.csv │ │ ├── basic_200_steps.csv │ │ ├── basic_200_val.csv │ │ ├── basic_25_med_reward.csv │ │ ├── basic_25_med_steps.csv │ │ ├── basic_25_med_val.csv │ │ ├── basic_25_small_reward.csv │ │ ├── basic_25_small_steps.csv │ │ ├── basic_25_small_val.csv │ │ ├── lm_dqn_1_med_loss.csv │ │ ├── lm_dqn_1_med_reward.csv │ │ ├── lm_dqn_1_med_val.csv │ │ ├── lm_dqn_1_small_loss.csv │ │ ├── lm_dqn_1_small_reward.csv │ │ ├── lm_dqn_1_small_val.csv │ │ ├── lm_pre_1_med_loss.csv │ │ ├── lm_pre_1_med_reward.csv │ │ ├── lm_pre_1_small_loss.csv │ │ ├── lm_pre_1_small_reward.csv │ │ ├── lm_pre_5_med_reward.csv │ │ ├── lm_pre_5_med_steps.csv │ │ ├── lm_pre_5_small_reward.csv │ │ ├── lm_pre_5_small_steps.csv │ │ └── make.sh │ │ ├── ch16 │ │ ├── 05-reward100.svg │ │ ├── 06-01-loss-total.svg │ │ ├── 06-02-loss-ent.svg │ │ ├── 07-episode-steps.svg │ │ ├── 15-reward100.svg │ │ ├── 16-01-loss-total.svg │ │ ├── 16-02-loss-ent.svg │ │ ├── 17-episode-steps.svg │ │ ├── 18-reward100.svg │ │ ├── 19-01-loss-total.svg │ │ ├── 19-02-loss-ent.svg │ │ ├── 20-episode-steps.svg │ │ ├── 21-reward100.svg │ │ ├── 22-01-loss-total.svg │ │ ├── 22-02-loss-ent.svg │ │ ├── 22-episode-steps.svg │ │ ├── 23-reward100.svg │ │ ├── 24-01-loss-total.svg │ │ ├── 24-02-loss-ent.svg │ │ ├── 25-episode-steps.svg │ │ ├── 32-reward100.svg │ │ ├── 33-01-loss-total.svg │ │ ├── 33-02-loss-ent.svg │ │ ├── 34-episode-steps.svg │ │ ├── 35-reward100.svg │ │ ├── 36-01-loss-total.svg │ │ ├── 36-02-loss-ent.svg │ │ ├── 37-episode-steps.svg │ │ ├── cb-mm-episode-steps.csv │ │ ├── cb-mm-loss-ent.csv │ │ ├── cb-mm-loss-total.csv │ │ ├── cb-mm-reward100.csv │ │ ├── cb-nomm-episode-steps.csv │ │ ├── cb-nomm-loss-ent.csv │ │ ├── cb-nomm-loss-total.csv │ │ ├── cb-nomm-reward100.csv │ │ ├── cd-episode-steps.csv │ │ ├── cd-loss-ent.csv │ │ ├── cd-loss-total.csv │ │ ├── cd-reward100.csv │ │ ├── make.sh │ │ ├── s-demo-episode-steps.csv │ │ ├── s-demo-loss-ent.csv │ │ ├── s-demo-loss-total.csv │ │ ├── s-demo-reward100.csv │ │ ├── s-nodemo-episode-steps.csv │ │ ├── s-nodemo-loss-ent.csv │ │ ├── s-nodemo-loss-total.csv │ │ ├── s-nodemo-reward100.csv │ │ ├── t3-episode-steps.csv │ │ ├── t3-loss-ent.csv │ │ ├── t3-loss-total.csv │ │ └── t3-reward100.csv │ │ ├── ch17 │ │ ├── 02-01-reward100.svg │ │ ├── 02-02-episode-steps.svg │ │ ├── 04-01-test-reward.svg │ │ ├── 04-02-test-steps.svg │ │ ├── 06-01-reward100.svg │ │ ├── 06-02-episode-steps.svg │ │ ├── 07-01-loss-actor.svg │ │ ├── 07-02-loss-critic.svg │ │ ├── 08-01-test-reward.svg │ │ ├── 08-02-test-steps.svg │ │ ├── 09-01-reward100.svg │ │ ├── 09-02-episode-steps.svg │ │ ├── 10-01-loss-actor.svg │ │ ├── 10-02-loss-critic.svg │ │ ├── 11-01-test-reward.svg │ │ ├── 11-02-test-steps.svg │ │ ├── a2c-episode-steps.csv │ │ ├── a2c-reward100.csv │ │ ├── a2c-test-reward.csv │ │ ├── a2c-test-steps.csv │ │ ├── d4-episode-steps.csv │ │ ├── d4-loss-actor.csv │ │ ├── d4-loss-critic.csv │ │ ├── d4-reward100.csv │ │ ├── d4-test-reward.csv │ │ ├── d4-test-steps.csv │ │ ├── dd-episode-steps.csv │ │ ├── dd-loss-actor.csv │ │ ├── dd-loss-critic.csv │ │ ├── dd-reward100.csv │ │ ├── dd-test-reward.csv │ │ ├── dd-test-steps.csv │ │ └── make.sh │ │ ├── ch18 │ │ ├── 01_h_critic_ref.csv │ │ ├── 01_h_critic_ref.svg │ │ ├── 01_h_loss_actor.csv │ │ ├── 01_h_loss_critic.csv │ │ ├── 01_h_reward.csv │ │ ├── 01_h_test_reward.csv │ │ ├── 02_h_loss_actor.svg │ │ ├── 03_h_loss_critic.svg │ │ ├── 04_h_reward.svg │ │ ├── 05_h_test_reward.svg │ │ └── make.sh │ │ ├── ch19 │ │ ├── 02-01-reward100.svg │ │ ├── 02-02-steps.svg │ │ ├── 03-01-loss-policy.svg │ │ ├── 03-02-loss-value.svg │ │ ├── 04-loss-entropy.svg │ │ ├── 05-01-test-reward.svg │ │ ├── 05-02-test-steps.svg │ │ ├── 06-01-reward100.svg │ │ ├── 06-02-steps.svg │ │ ├── 07-01-loss-policy.svg │ │ ├── 07-02-loss-value.svg │ │ ├── 08-loss-entropy.svg │ │ ├── 09-01-test-reward.svg │ │ ├── 09-02-test-steps.svg │ │ ├── 10-01-reward100.svg │ │ ├── 10-02-steps.svg │ │ ├── 11-01-test-reward.svg │ │ ├── 11-02-test-steps.svg │ │ ├── 12-01-reward100.svg │ │ ├── 12-02-test-reward.svg │ │ ├── 13-01-reward-a2c-ppo.svg │ │ ├── 13-02-test-reward-a2c-ppo.svg │ │ ├── 14-01-reward100.svg │ │ ├── 14-02-test-reward.svg │ │ ├── 15-01-reward-trpo-ppo.svg │ │ ├── 15-02-test-reward-a2c-ppo.svg │ │ ├── 16-01-reward100.svg │ │ ├── 16-02-test-reward.svg │ │ ├── 17-01-reward-trpo-ppo.svg │ │ ├── 17-02-test-reward-trpo-ppo.svg │ │ ├── 18-01-reward100.svg │ │ ├── 18-02-test-reward.svg │ │ ├── 19-01-reward100.svg │ │ ├── 19-02-test-reward.svg │ │ ├── 20-01-reward100.svg │ │ ├── 20-02-test-reward.svg │ │ ├── 21-01-reward-sac-ppo.svg │ │ ├── 21-02-test-reward-sac-ppo.svg │ │ ├── 22-01-reward100.svg │ │ ├── 22-02-test-reward.svg │ │ ├── 23-01-reward-sac-ppo.svg │ │ ├── 23-02-test-reward-sac-ppo.svg │ │ ├── aa-loss-entropy.csv │ │ ├── aa-loss-policy.csv │ │ ├── aa-loss-value.csv │ │ ├── aa-reward.csv │ │ ├── aa-steps.csv │ │ ├── aa-test-reward.csv │ │ ├── aa-test-steps.csv │ │ ├── ac-loss-entropy.csv │ │ ├── ac-loss-policy.csv │ │ ├── ac-loss-value.csv │ │ ├── ac-reward.csv │ │ ├── ac-steps.csv │ │ ├── ac-test-reward.csv │ │ ├── ac-test-steps.csv │ │ ├── ka-reward.csv │ │ ├── ka-test-reward.csv │ │ ├── kc-reward.csv │ │ ├── kc-test-reward.csv │ │ ├── make.sh │ │ ├── pa-reward.csv │ │ ├── pa-test-reward.csv │ │ ├── pc-reward.csv │ │ ├── pc-steps.csv │ │ ├── pc-test-reward.csv │ │ ├── pc-test-steps.csv │ │ ├── sa-reward.csv │ │ ├── sa-test-reward.csv │ │ ├── sc-reward.csv │ │ ├── sc-test-reward.csv │ │ ├── ta-reward.csv │ │ ├── ta-test-reward.csv │ │ ├── tc-reward.csv │ │ └── tc-test-reward.csv │ │ ├── ch20 │ │ ├── 01-01-reward-max.svg │ │ ├── 01-02-reward-mean.svg │ │ ├── 02-01-reward-std.svg │ │ ├── 02-02-update-l2.svg │ │ ├── 03-01-reward-max.svg │ │ ├── 03-02-reward-mean.svg │ │ ├── 04-01-reward-std.svg │ │ ├── 04-02-update-l2.svg │ │ ├── 05-01-reward-max.svg │ │ ├── 05-02-reward-mean.svg │ │ ├── 06-reward-std.svg │ │ ├── 07-01-reward-max.svg │ │ ├── 07-02-reward-mean.svg │ │ ├── 08-reward-std.svg │ │ ├── escp-reward-max.csv │ │ ├── escp-reward-mean.csv │ │ ├── escp-reward-std.csv │ │ ├── escp-update-l2.csv │ │ ├── eshc-reward-max.csv │ │ ├── eshc-reward-mean.csv │ │ ├── eshc-reward-std.csv │ │ ├── eshc-update-l2.csv │ │ ├── gacp-reward-max.csv │ │ ├── gacp-reward-mean.csv │ │ ├── gacp-reward-std.csv │ │ ├── gahc-reward-max.csv │ │ ├── gahc-reward-mean.csv │ │ ├── gahc-reward-std.csv │ │ └── make.sh │ │ ├── ch21 │ │ ├── atari │ │ │ ├── dqn_egreedy_reward.csv │ │ │ ├── dqn_egreedy_steps.csv │ │ │ ├── ppo_distill_ref_ext.csv │ │ │ ├── ppo_distill_ref_int.csv │ │ │ ├── ppo_distill_reward.csv │ │ │ ├── ppo_distill_steps.csv │ │ │ ├── ppo_distill_test_reward.csv │ │ │ ├── ppo_loss.csv │ │ │ ├── ppo_loss_entropy.csv │ │ │ ├── ppo_loss_policy.csv │ │ │ ├── ppo_loss_value.csv │ │ │ ├── ppo_nn_loss_entropy.csv │ │ │ ├── ppo_nn_reward.csv │ │ │ ├── ppo_nn_steps.csv │ │ │ ├── ppo_reward.csv │ │ │ └── ppo_steps.csv │ │ ├── atari_dqn_egreedy_01_reward.svg │ │ ├── atari_dqn_egreedy_02_steps.svg │ │ ├── atari_ppo_01_reward.svg │ │ ├── atari_ppo_02_steps.svg │ │ ├── atari_ppo_03_loss.svg │ │ ├── atari_ppo_04_loss_policy.svg │ │ ├── atari_ppo_05_loss_value.svg │ │ ├── atari_ppo_06_loss_entropy.svg │ │ ├── atari_ppo_distill_01_reward.svg │ │ ├── atari_ppo_distill_02_steps.svg │ │ ├── atari_ppo_distill_03_test_reward.svg │ │ ├── atari_ppo_distill_04_ref_ext.svg │ │ ├── atari_ppo_distill_05_ref_int.svg │ │ ├── atari_ppo_nn_01_reward.svg │ │ ├── atari_ppo_nn_02_steps.svg │ │ ├── atari_ppo_nn_03_loss_entropy.svg │ │ ├── dqn │ │ │ ├── counts_loss.csv │ │ │ ├── counts_reward.csv │ │ │ ├── counts_steps.csv │ │ │ ├── counts_test_reward.csv │ │ │ ├── counts_test_steps.csv │ │ │ ├── egreedy_epsilon.csv │ │ │ ├── egreedy_loss.csv │ │ │ ├── egreedy_reward.csv │ │ │ ├── egreedy_steps.csv │ │ │ ├── nn_loss.csv │ │ │ ├── nn_reward.csv │ │ │ └── nn_steps.csv │ │ ├── dqn_counts_01_reward.svg │ │ ├── dqn_counts_02_steps.svg │ │ ├── dqn_counts_04_loss.svg │ │ ├── dqn_counts_05_test_reward.svg │ │ ├── dqn_counts_06_test_steps.svg │ │ ├── dqn_egreedy_01_reward.svg │ │ ├── dqn_egreedy_02_steps.svg │ │ ├── dqn_egreedy_03_epsilon.svg │ │ ├── dqn_egreedy_04_loss.svg │ │ ├── dqn_nn_01_reward.svg │ │ ├── dqn_nn_02_steps.svg │ │ ├── dqn_nn_04_loss.svg │ │ ├── make.sh │ │ ├── ppo │ │ │ ├── basic_loss.csv │ │ │ ├── basic_loss_entropy.csv │ │ │ ├── basic_reward.csv │ │ │ ├── basic_steps.csv │ │ │ ├── basic_test_reward.csv │ │ │ ├── counts_loss.csv │ │ │ ├── counts_loss_entropy.csv │ │ │ ├── counts_reward.csv │ │ │ ├── counts_steps.csv │ │ │ ├── counts_test_reward.csv │ │ │ ├── dist_loss.csv │ │ │ ├── dist_loss_dist.csv │ │ │ ├── dist_reward.csv │ │ │ ├── dist_steps.csv │ │ │ ├── dist_test_reward.csv │ │ │ ├── nn_loss.csv │ │ │ ├── nn_loss_entropy.csv │ │ │ ├── nn_reward.csv │ │ │ ├── nn_steps.csv │ │ │ └── nn_test_reward.csv │ │ ├── ppo_basic_01_reward.svg │ │ ├── ppo_basic_02_steps.svg │ │ ├── ppo_basic_03_loss.svg │ │ ├── ppo_basic_04_loss_ent.svg │ │ ├── ppo_basic_05_test_reward.svg │ │ ├── ppo_counts_01_reward.svg │ │ ├── ppo_counts_02_steps.svg │ │ ├── ppo_counts_03_loss.svg │ │ ├── ppo_counts_04_loss_ent.svg │ │ ├── ppo_counts_05_test_reward.svg │ │ ├── ppo_dist_01_reward.svg │ │ ├── ppo_dist_02_steps.svg │ │ ├── ppo_dist_03_loss.svg │ │ ├── ppo_dist_04_loss_dist.svg │ │ ├── ppo_dist_05_test_reward.svg │ │ ├── ppo_nn_01_reward.svg │ │ ├── ppo_nn_02_steps.svg │ │ ├── ppo_nn_03_loss.svg │ │ ├── ppo_nn_04_loss_ent.svg │ │ └── ppo_nn_05_test_reward.svg │ │ ├── ch22 │ │ ├── 05-01-test-reward.svg │ │ ├── 05-02-test-steps.svg │ │ ├── 06-01-total-reward.svg │ │ ├── 06-02-total-steps.svg │ │ ├── 07-01-adv.svg │ │ ├── 07-02-loss-ent.svg │ │ ├── 08-01-loss-policy.svg │ │ ├── 08-02-loss-value.svg │ │ ├── 09-01-loss-obs.svg │ │ ├── 09-02-loss-reward.svg │ │ ├── 10-01-test-reward.svg │ │ ├── 10-02-test-steps.svg │ │ ├── 11-01-total-reward.svg │ │ ├── 11-02-total-steps.svg │ │ ├── 12-loss-distill.svg │ │ ├── 13-01-test-reward-cmp.svg │ │ ├── 13-02-test-steps-cmp.svg │ │ ├── a2c-adv.csv │ │ ├── a2c-loss-ent.csv │ │ ├── a2c-loss-policy.csv │ │ ├── a2c-loss-value.csv │ │ ├── a2c-test-reward.csv │ │ ├── a2c-test-steps.csv │ │ ├── a2c-total-reward.csv │ │ ├── a2c-total-steps.csv │ │ ├── em-loss-obs.csv │ │ ├── em-loss-reward.csv │ │ ├── i2a-loss-distill.csv │ │ ├── i2a-test-reward.csv │ │ ├── i2a-test-steps.csv │ │ ├── i2a-total-reward.csv │ │ ├── i2a-total-steps.csv │ │ └── make.sh │ │ ├── ch23 │ │ ├── 03-win-ratio.svg │ │ ├── 04-01-loss-policy.svg │ │ ├── 04-02-loss-value.svg │ │ ├── loss-policy.csv │ │ ├── loss-value.csv │ │ ├── make.sh │ │ └── win-ratio.csv │ │ ├── ch25 │ │ ├── both-deer-loss.csv │ │ ├── both-deer-loss.svg │ │ ├── both-deer-reward.svg │ │ ├── both-reward-deer.csv │ │ ├── both-reward-tiger.csv │ │ ├── both-reward.svg │ │ ├── both-steps.svg │ │ ├── both-tiger-loss.csv │ │ ├── both-tiger-loss.svg │ │ ├── both-tiger-reward.svg │ │ ├── both_reward.csv │ │ ├── both_steps.csv │ │ ├── double-dqn-epsilon.csv │ │ ├── double-dqn-epsilon.svg │ │ ├── double-dqn-loss.csv │ │ ├── double-dqn-loss.svg │ │ ├── double-dqn-reward.csv │ │ ├── double-dqn-reward.svg │ │ ├── double-dqn-steps.csv │ │ ├── double-dqn-steps.svg │ │ ├── double-dqn-test-reward.csv │ │ ├── double-dqn-test-reward.svg │ │ ├── double-dqn-test-steps.csv │ │ ├── double-dqn-test-steps.svg │ │ ├── make.sh │ │ ├── tigers-dqn-epsilon.csv │ │ ├── tigers-dqn-epsilon.svg │ │ ├── tigers-dqn-loss.csv │ │ ├── tigers-dqn-loss.svg │ │ ├── tigers-dqn-reward.csv │ │ ├── tigers-dqn-reward.svg │ │ ├── tigers-dqn-steps.csv │ │ ├── tigers-dqn-steps.svg │ │ ├── tigers-dqn-test-reward.csv │ │ ├── tigers-dqn-test-reward.svg │ │ ├── tigers-dqn-test-steps.csv │ │ └── tigers-dqn-test-steps.svg │ │ └── plot.py ├── Multi-agent │ ├── Langchain.py │ ├── LeelaChess.py │ ├── Pistonball.py │ ├── Pystonball_play.py │ └── tictactoe.py ├── Sutton_Barto │ ├── chapter01 │ │ ├── policy_first.bin │ │ ├── policy_second.bin │ │ └── tic_tac_toe.py │ ├── chapter02 │ │ ├── ten_armed_testbed.ipynb │ │ └── ten_armed_testbed.py │ ├── chapter03 │ │ ├── grid_world.ipynb │ │ ├── grid_world.py │ │ └── gridworld.png │ ├── chapter04 │ │ ├── car_rental.ipynb │ │ ├── car_rental.py │ │ ├── car_rental_synchronous.ipynb │ │ ├── car_rental_synchronous.py │ │ ├── gamblers_problem.ipynb │ │ ├── gamblers_problem.py │ │ ├── grid_world.ipynb │ │ └── grid_world.py │ ├── chapter05 │ │ ├── blackjack.ipynb │ │ ├── blackjack.py │ │ ├── infinite_variance.ipynb │ │ └── infinite_variance.py │ ├── chapter06 │ │ ├── cliff_walking.ipynb │ │ ├── cliff_walking.py │ │ ├── maximization_bias.ipynb │ │ ├── maximization_bias.py │ │ ├── random_walk.ipynb │ │ ├── random_walk.py │ │ ├── windy_grid.png │ │ ├── windy_grid_world.ipynb │ │ └── windy_grid_world.py │ ├── chapter07 │ │ ├── random_walk.ipynb │ │ └── random_walk.py │ ├── chapter08 │ │ ├── expectation_vs_sample.ipynb │ │ ├── expectation_vs_sample.py │ │ ├── maze.ipynb │ │ ├── maze.py │ │ ├── trajectory_sampling.ipynb │ │ └── trajectory_sampling.py │ ├── chapter09 │ │ ├── random_walk.ipynb │ │ ├── random_walk.py │ │ ├── square_wave.ipynb │ │ └── square_wave.py │ ├── chapter10 │ │ ├── Mountain_car_continuous_gym.py │ │ ├── Mountain_car_gym.py │ │ ├── access_control.py │ │ ├── cartpole_gym.py │ │ ├── mountain_car.ipynb │ │ └── mountain_car.py │ ├── chapter11 │ │ └── counterexample.py │ ├── chapter12 │ │ ├── lambda_effect.py │ │ ├── mountain_car.py │ │ └── random_walk.py │ ├── chapter13 │ │ └── short_corridor.py │ └── images │ │ ├── example_13_1.png │ │ ├── example_6_2.png │ │ ├── figure_2_1.png │ │ ├── figure_2_2.png │ │ ├── figure_2_3.png │ │ ├── figure_2_4.png │ │ ├── figure_2_5.png │ │ ├── figure_2_6.png │ │ ├── figure_3_2.png │ │ ├── figure_3_2_linear_system.png │ │ ├── figure_3_5.png │ │ ├── figure_3_5_policy.png │ │ ├── figure_4_1.png │ │ ├── figure_4_2.png │ │ ├── figure_4_3.png │ │ ├── figure_6_2.png │ │ ├── figure_6_4.png │ │ └── figure_6_6.png └── textworld │ └── =5.9.0 ├── Exercises ├── BlackJack_QL.ipynb ├── Cliff_Walking_render.ipynb ├── Frozenlake_VI.ipynb ├── Frozenlake_q_learning.ipynb ├── continuous_mountain_car.py └── mountain_car.py ├── Guia_apresentacao.md ├── LICENSE ├── Lectures ├── .ipynb_checkpoints │ └── Appendix - Cross-entropy-checkpoint.ipynb ├── Appendix - Cross-entropy.ipynb ├── Appendix 2 - Monte carlo tree search.ipynb ├── Example - AlphaGo Zero.ipynb ├── Example - AlphaGo.ipynb ├── Example - The reinforce algorithm.ipynb ├── Lecture 1 - Introduction to RL.ipynb ├── Lecture 2 - Finite MDPs.ipynb ├── Lecture 3 - Q-learning and SARSA.ipynb ├── Lecture 4 - Temporal Difference Methods.ipynb ├── Lecture 5 - Approximate Solution Methods.ipynb ├── Lecture 6 - Multi-agent RL.ipynb ├── MN_Lecture 1 - Introduction to RL.py ├── chapter7.pdf ├── chapter7.tex ├── dreadlock_neurons2.jpeg ├── mujoco_reinforce_fig2.png ├── public │ ├── 600px-Markov_Decision_Process.png │ ├── Reinforcement_learning_diagram.png │ ├── n-step-offp.png │ ├── n-step-return.png │ ├── n-step-sarsa-alg.png │ ├── n-step-td-prediction-alg.png │ ├── reinforce_invpend_gym_v26_fig1.gif │ ├── reinforce_invpend_gym_v26_fig2.png │ ├── reinforce_invpend_gym_v26_fig3.jpeg │ ├── reinforce_invpend_gym_v26_fig4.png │ ├── rl-loop.jpg │ └── tictactoe.png ├── reinforce_invpend_gym_v26_fig4.png ├── vector_envs_tutorial.ipynb └── vector_envs_tutorial.py ├── MuZero.pdf ├── Projects ├── 2024 │ ├── 0-Descrição do Projeto.md │ ├── 1-Definição do Problema.md │ ├── 2-Agentes.md │ ├── 3-Ambiente.md │ ├── 4-Fluxo.md │ ├── Avaliação.md │ ├── README.md │ └── Solution template │ │ ├── agentes.py │ │ ├── ambiente.py │ │ └── main.py └── 2025 │ ├── 0-Descrição.md │ ├── 1 - Projeto 1.md │ └── Projeto Final │ ├── README.md │ ├── main.py │ ├── models │ └── MATD3 │ │ ├── MATD3_trained_agent.pt │ │ ├── training_scores_evolution.png │ │ └── training_scores_history.npy │ ├── pyproject.toml │ ├── replay.py │ └── videos │ └── speaker_listener.gif ├── README.md ├── Reinforcement Learning_ An Introduction, 2nd Edition-(2018).pdf ├── Temas_dos_Trabalhos_2023_old.md ├── alphago.pdf ├── grading.md └── pyproject.toml /.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints 2 | .aider* 3 | -------------------------------------------------------------------------------- /Alpha zero-2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Alpha zero-2018.pdf -------------------------------------------------------------------------------- /AlphaGeometry.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/AlphaGeometry.pdf -------------------------------------------------------------------------------- /AlphaStar.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/AlphaStar.pdf -------------------------------------------------------------------------------- /Alpha_go_zero.PDF: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Alpha_go_zero.PDF -------------------------------------------------------------------------------- /Code/Lapan/Chapter02/01_agent_anatomy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter02/01_agent_anatomy.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter02/02_cartpole_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter02/02_cartpole_random.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter03/.gitignore: -------------------------------------------------------------------------------- 1 | runs 2 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter03/01_modules.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter03/01_modules.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter03/02_tensorboard.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter03/02_tensorboard.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter03/03_atari_gan.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter03/03_atari_gan.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter04/01_cartpole.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter04/01_cartpole.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter06/02_dqn_pong.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter06/02_dqn_pong.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter06/03_dqn_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter06/03_dqn_play.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter06/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter06/lib/dqn_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter06/lib/dqn_model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter06/lib/wrappers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter06/lib/wrappers.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter07/01_actions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter07/01_actions.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter07/02_agents.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter07/02_agents.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter07/03_exp_sources.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter07/03_exp_sources.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter07/04_replay_buf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter07/04_replay_buf.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter07/05_target_net.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter07/05_target_net.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter07/06_cartpole.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter07/06_cartpole.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/01_dqn_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter08/01_dqn_basic.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/02_dqn_n_steps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter08/02_dqn_n_steps.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/03_dqn_double.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter08/03_dqn_double.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/04_dqn_noisy_net.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter08/04_dqn_noisy_net.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/05_dqn_prio_replay.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter08/05_dqn_prio_replay.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/06_dqn_dueling.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter08/06_dqn_dueling.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/07_dqn_distrib.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter08/07_dqn_distrib.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/08_dqn_rainbow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter08/08_dqn_rainbow.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/adhoc/commute.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter08/adhoc/commute.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/adhoc/distr_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter08/adhoc/distr_test.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter08/lib/common.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/lib/dqn_extra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter08/lib/dqn_extra.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter08/lib/dqn_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter08/lib/dqn_model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/.gitignore: -------------------------------------------------------------------------------- 1 | runs 2 | res 3 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/00_slow_grads.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter09/00_slow_grads.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/01_baseline.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter09/01_baseline.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/02_n_envs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter09/02_n_envs.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/03_parallel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter09/03_parallel.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/05_cule.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter09/05_cule.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/attic/02_env_steps.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter09/attic/02_env_steps.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/img/01_orig_tb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter09/img/01_orig_tb.png -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/img/02_steps-tb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter09/img/02_steps-tb.png -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/img/03_serial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter09/img/03_serial.png -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/img/04_parallel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter09/img/04_parallel.png -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/lib/atari_wrappers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter09/lib/atari_wrappers.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter09/lib/common.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter09/lib/dqn_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter09/lib/dqn_model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter10/.gitignore: -------------------------------------------------------------------------------- 1 | *.csv 2 | saves 3 | res 4 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter10/data/unpack_data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | tar xvf ch08-small-quotes.tgz 3 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter10/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter10/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter10/lib/common.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter10/lib/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter10/lib/data.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter10/lib/environ.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter10/lib/environ.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter10/lib/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter10/lib/models.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter10/lib/validation.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter10/lib/validation.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter10/run_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter10/run_model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter10/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter10/tests/test_data.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter10/tests/test_environ.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter10/tests/test_environ.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter10/train_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter10/train_model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter10/train_model_conv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter10/train_model_conv.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter11/01_cartpole_dqn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter11/01_cartpole_dqn.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter11/04_cartpole_pg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter11/04_cartpole_pg.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter11/05_pong_pg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter11/05_pong_pg.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter11/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter11/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter11/lib/common.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter12/.gitignore: -------------------------------------------------------------------------------- 1 | runs_arch 2 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter12/01_cartpole_pg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter12/01_cartpole_pg.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter12/02_pong_a2c.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter12/02_pong_a2c.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter12/04_pong_r2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter12/04_pong_r2.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter12/log.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter12/log.md -------------------------------------------------------------------------------- /Code/Lapan/Chapter12/torch_perf/t1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter12/torch_perf/t1.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter13/.gitignore: -------------------------------------------------------------------------------- 1 | res 2 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter13/01_a3c_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter13/01_a3c_data.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter13/02_a3c_grad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter13/02_a3c_grad.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter13/adhoc/distr_grad.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter13/adhoc/distr_grad.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter13/adhoc/distr_grad2.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter13/adhoc/distr_grad2.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter13/adhoc/sync_bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter13/adhoc/sync_bench.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter13/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter13/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter13/lib/common.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/.gitignore -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/cor_reader.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/cor_reader.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/data/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/data/.gitignore -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/data/get_data.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/data/get_data.sh -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/data_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/data_test.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/libbots/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/libbots/cornell.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/libbots/cornell.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/libbots/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/libbots/data.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/libbots/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/libbots/model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/libbots/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/libbots/utils.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/telegram_bot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/telegram_bot.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/tests/test_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/tests/test_data.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/train_crossent.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/train_crossent.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/train_scst.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/train_scst.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter14/use_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter14/use_model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter15/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter15/README.md -------------------------------------------------------------------------------- /Code/Lapan/Chapter15/games/.gitignore: -------------------------------------------------------------------------------- 1 | *.json 2 | *.ni 3 | *.ulx 4 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter15/games/make_games.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter15/games/make_games.sh -------------------------------------------------------------------------------- /Code/Lapan/Chapter15/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter15/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter15/lib/common.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter15/lib/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter15/lib/model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter15/lib/preproc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter15/lib/preproc.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter15/requirements.txt: -------------------------------------------------------------------------------- 1 | textworld==1.4.0 2 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter15/train_basic.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter15/train_basic.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter15/train_lm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter15/train_lm.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/.gitignore: -------------------------------------------------------------------------------- 1 | saves 2 | data 3 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/adhoc/demo_dump.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/adhoc/demo_dump.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/adhoc/fbs_dump.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/adhoc/fbs_dump.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/adhoc/fbs_join.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/adhoc/fbs_join.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/adhoc/fbs_read.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/adhoc/fbs_read.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/adhoc/start_docker.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/adhoc/start_docker.sh -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/adhoc/wd_tests.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/adhoc/wd_tests.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/adhoc/wob_clicks.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/adhoc/wob_clicks.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/adhoc/wob_create.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/adhoc/wob_create.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/adhoc/wob_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/adhoc/wob_test.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/environment.yml -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/ksy/fbs.ksy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/ksy/fbs.ksy -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/ksy/rfp_client.ksy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/ksy/rfp_client.ksy -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/ksy/rfp_server.ksy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/ksy/rfp_server.ksy -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/lib/common.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/lib/ksy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/lib/ksy/fbs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/lib/ksy/fbs.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/lib/ksy/rfp_client.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/lib/ksy/rfp_client.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/lib/ksy/rfp_server.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/lib/ksy/rfp_server.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/lib/model_vnc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/lib/model_vnc.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/lib/vnc_demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/lib/vnc_demo.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/lib/wob_vnc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/lib/wob_vnc.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/wob_click_mm_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/wob_click_mm_play.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/wob_click_mm_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/wob_click_mm_train.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/wob_click_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/wob_click_play.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/wob_click_train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/wob_click_train.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter16/wob_fixes/readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter16/wob_fixes/readme.md -------------------------------------------------------------------------------- /Code/Lapan/Chapter17/.gitignore: -------------------------------------------------------------------------------- 1 | res 2 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter17/01_check_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter17/01_check_env.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter17/02_train_a2c.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter17/02_train_a2c.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter17/03_play_a2c.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter17/03_play_a2c.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter17/04_train_ddpg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter17/04_train_ddpg.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter17/05_play_ddpg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter17/05_play_ddpg.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter17/06_train_d4pg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter17/06_train_d4pg.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter17/adhoc/record_a2c.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter17/adhoc/record_a2c.sh -------------------------------------------------------------------------------- /Code/Lapan/Chapter17/adhoc/record_ddpg.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | xvfb-run -s "-screen 0 640x480x24 +extension GLX" ./05_play_ddpg.py -m $1 -r $2 3 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter17/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter17/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter17/lib/common.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter17/lib/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter17/lib/model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/export_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/export_model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/hw/bench.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/hw/bench.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/hw/libhw/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/hw/libhw/nn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/hw/libhw/nn.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/hw/libhw/postproc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/hw/libhw/postproc.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/hw/libhw/sensors.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/hw/libhw/sensors.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/hw/libhw/servo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/hw/libhw/servo.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/hw/libhw/t1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/hw/libhw/t1.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/hw/libhw/t1zyh.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/hw/libhw/t1zyh.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/hw/libhw/t1zyho.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/hw/libhw/t1zyho.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/hw/obs.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/hw/obs.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/hw/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/hw/run.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/hw/zero.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/hw/zero.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/lib/ddpg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/lib/ddpg.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/lib/microtaur.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/lib/microtaur.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/micropython/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/micropython/README.md -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/show_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/show_model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/tests/test_hw_nn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/tests/test_hw_nn.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter18/train_ddpg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter18/train_ddpg.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter19/.gitignore: -------------------------------------------------------------------------------- 1 | res 2 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter19/01_train_a2c.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter19/01_train_a2c.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter19/02_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter19/02_play.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter19/03_train_trpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter19/03_train_trpo.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter19/04_train_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter19/04_train_ppo.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter19/05_train_acktr.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter19/05_train_acktr.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter19/06_train_sac.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter19/06_train_sac.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter19/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter19/README.md -------------------------------------------------------------------------------- /Code/Lapan/Chapter19/lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter19/lib/__init__.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter19/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter19/lib/common.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter19/lib/kfac.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter19/lib/kfac.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter19/lib/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter19/lib/model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter19/lib/trpo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter19/lib/trpo.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter20/.gitignore: -------------------------------------------------------------------------------- 1 | res 2 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter20/01_cartpole_es.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter20/01_cartpole_es.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter20/02_cheetah_es.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter20/02_cheetah_es.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter20/03_cartpole_ga.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter20/03_cartpole_ga.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter20/04_cheetah_ga.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter20/04_cheetah_ga.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter21/atari_dqn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter21/atari_dqn.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter21/atari_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter21/atari_ppo.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter21/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter21/lib/atari_wrappers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter21/lib/atari_wrappers.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter21/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter21/lib/common.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter21/lib/dqn_extra.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter21/lib/dqn_extra.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter21/lib/ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter21/lib/ppo.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter21/mcar_dqn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter21/mcar_dqn.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter21/mcar_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter21/mcar_ppo.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter21/riverswim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter21/riverswim.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter21/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter21/tests/test_ppo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter21/tests/test_ppo.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter22/.gitignore: -------------------------------------------------------------------------------- 1 | res 2 | saves 3 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter22/01_a2c.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter22/01_a2c.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter22/02_imag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter22/02_imag.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter22/03_i2a.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter22/03_i2a.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter22/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter22/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter22/lib/common.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter22/lib/i2a.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter22/lib/i2a.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter22/play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter22/play.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/.gitignore: -------------------------------------------------------------------------------- 1 | res 2 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/lib/game.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/lib/game.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/lib/mcts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/lib/mcts.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/lib/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/lib/model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/play.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/semi-final.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/semi-final.sh -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/telegram-bot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/telegram-bot.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tests/test_game.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tests/test_game.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tests/test_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tests/test_model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tournament/final.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tournament/final.csv -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tournament/final.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tournament/final.svg -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tournament/final.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tournament/final.txt -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tournament/semi-0.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tournament/semi-0.txt -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tournament/semi-1.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tournament/semi-1.txt -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tournament/semi-2.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tournament/semi-2.txt -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tournament/semi-3.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tournament/semi-3.txt -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tournament/semi-4.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tournament/semi-4.txt -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tournament/semi-5.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tournament/semi-5.txt -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tournament/semi-6.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tournament/semi-6.txt -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tournament/semi-7.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tournament/semi-7.txt -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tournament/semi-8.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tournament/semi-8.txt -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/tournament/semi-9.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/tournament/semi-9.txt -------------------------------------------------------------------------------- /Code/Lapan/Chapter23/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter23/train.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | runs 3 | saves 4 | *.png 5 | .ipynb_checkpoints 6 | *.log 7 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/csvs/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/csvs/README.md -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/csvs/t6-c2x2-nu=1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/csvs/t6-c2x2-nu=1.csv -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/docs/Notes.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/docs/Notes.md -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/gen_cubes.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/gen_cubes.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/ini/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/ini/README.md -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/libcube/conf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/libcube/conf.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/libcube/cubes/_env.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/libcube/cubes/_env.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/libcube/mcts.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/libcube/mcts.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/libcube/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/libcube/model.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/models/.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/models/.gitattributes -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/requirements.txt -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/run_tests.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/run_tests.sh -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/solver.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/solver.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/train.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/train.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter24/train_debug.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter24/train_debug.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter25/.gitignore: -------------------------------------------------------------------------------- 1 | MAgent 2 | render 3 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter25/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter25/README.md -------------------------------------------------------------------------------- /Code/Lapan/Chapter25/battle_dqn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter25/battle_dqn.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter25/battle_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter25/battle_play.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter25/forest_both_dqn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter25/forest_both_dqn.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter25/forest_both_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter25/forest_both_play.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter25/forest_random.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter25/forest_random.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter25/forest_tigers_dqn.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter25/forest_tigers_dqn.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter25/forest_tigers_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter25/forest_tigers_play.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter25/lib/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Code/Lapan/Chapter25/lib/common.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter25/lib/common.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter25/lib/data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter25/lib/data.py -------------------------------------------------------------------------------- /Code/Lapan/Chapter25/lib/model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/Chapter25/lib/model.py -------------------------------------------------------------------------------- /Code/Lapan/formulas/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/.gitignore -------------------------------------------------------------------------------- /Code/Lapan/formulas/_template.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/_template.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch01.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch01.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch04.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch04.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch05.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch05.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch06.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch06.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch07.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch07.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch08.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch08.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch11.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch11.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch12.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch12.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch14.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch14.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch17.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch17.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch19.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch19.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch20.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch20.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch21.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch21.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch21/ch21-001.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch21/ch21-001.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch21/ch21-002.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch21/ch21-002.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch21/ch21-003.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch21/ch21-003.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch21/ch21.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch21/ch21.pdf -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch23.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch23.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch23/ch23-001.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch23/ch23-001.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch23/ch23-002.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch23/ch23-002.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch23/ch23-003.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch23/ch23-003.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch23/ch23-004.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch23/ch23-004.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch23/ch23-005.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch23/ch23-005.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch23/ch23-006.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch23/ch23-006.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch23/ch23-007.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch23/ch23-007.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch23/ch23-008.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch23/ch23-008.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch23/ch23-009.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch23/ch23-009.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch23/ch23.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch23/ch23.pdf -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24.tex -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-001.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-001.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-002.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-002.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-003.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-003.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-004.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-004.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-005.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-005.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-006.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-006.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-007.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-007.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-008.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-008.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-009.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-009.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-010.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-010.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-011.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-011.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-012.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-012.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-013.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-013.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-014.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-014.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-015.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-015.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-016.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-016.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-017.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-017.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-018.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-018.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-019.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-019.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-020.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-020.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-021.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-021.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-022.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-022.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-023.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-023.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-024.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-024.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-025.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-025.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24-026.eps: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24-026.eps -------------------------------------------------------------------------------- /Code/Lapan/formulas/ch24/ch24.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/ch24/ch24.pdf -------------------------------------------------------------------------------- /Code/Lapan/formulas/clean.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/clean.sh -------------------------------------------------------------------------------- /Code/Lapan/formulas/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/formulas/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/cp-loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/cp-loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/cp-loss.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/cp-loss.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/cp-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/cp-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/cp-reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/cp-reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/cp-rw_bound.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/cp-rw_bound.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/cp-rw_bound.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/cp-rw_bound.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/fln-loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/fln-loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/fln-loss.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/fln-loss.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/fln-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/fln-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/fln-reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/fln-reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/fln-rw_bound.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/fln-rw_bound.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/fln-rw_bound.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/fln-rw_bound.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/flns-loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/flns-loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/flns-loss.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/flns-loss.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/flns-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/flns-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/flns-reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/flns-reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/flns-rw_bound.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/flns-rw_bound.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/flns-rw_bound.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/flns-rw_bound.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/flt-loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/flt-loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/flt-loss.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/flt-loss.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/flt-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/flt-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/flt-reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/flt-reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/flt-rw_bound.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/flt-rw_bound.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/flt-rw_bound.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/flt-rw_bound.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch04/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch04/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch05/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch05/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch05/viter-4x4-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch05/viter-4x4-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch05/viter-4x4-reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch05/viter-4x4-reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch05/viter-8x8-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch05/viter-8x8-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch05/viter-8x8-reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch05/viter-8x8-reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch06/01_reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch06/01_reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch06/01_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch06/01_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch06/02_avg_reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch06/02_avg_reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch06/02_avg_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch06/02_avg_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch06/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch06/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/01_base/avg_fps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/01_base/avg_fps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/01_base/avg_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/01_base/avg_loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/01_base/steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/01_base/steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/01_fps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/01_fps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/01_loss.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/01_loss.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/01_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/01_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/01_steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/01_steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/02_reward_3456.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/02_reward_3456.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/02_reward_b23.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/02_reward_b23.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/02_steps_b3.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/02_steps_b3.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/03_reward.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/03_reward.png -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/03_values.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/03_values.png -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/04_noisy/snr_1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/04_noisy/snr_1.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/04_noisy/snr_2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/04_noisy/snr_2.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/04_noisy/steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/04_noisy/steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/04_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/04_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/04_snr_1.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/04_snr_1.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/04_snr_2.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/04_snr_2.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/04_steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/04_steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/05_loss.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/05_loss.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/05_prio/avg_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/05_prio/avg_loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/05_reward_prio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/05_reward_prio.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/06_adv.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/06_adv.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/06_dueling/adv.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/06_dueling/adv.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/06_dueling/steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/06_dueling/steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/06_dueling/val.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/06_dueling/val.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/06_loss.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/06_loss.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/06_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/06_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/06_steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/06_steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/06_val.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/06_val.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/07_loss.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/07_loss.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/07_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/07_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/08_fps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/08_fps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/08_rainbow/steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/08_rainbow/steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/08_reward_comp.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/08_reward_comp.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/08_reward_only.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/08_reward_only.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/08_steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/08_steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch08/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch08/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch09/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch09/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch09/s00_01_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch09/s00_01_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch09/s00_02_avg_fps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch09/s00_02_avg_fps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch09/s01_01_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch09/s01_01_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch09/s01_02_steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch09/s01_02_steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch09/s01_03_loss.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch09/s01_03_loss.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch09/s01_04_avg_fps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch09/s01_04_avg_fps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch09/s03_01_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch09/s03_01_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch09/s03_02_avg_fps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch09/s03_02_avg_fps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch09/s04_02_env_fps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch09/s04_02_env_fps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch09/s04_04_env_fps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch09/s04_04_env_fps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/cv-reward_train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/cv-reward_train.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/cv-reward_train.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/cv-reward_train.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/cv-reward_val.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/cv-reward_val.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/cv-reward_val.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/cv-reward_val.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/ff-reward_test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/ff-reward_test.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/ff-reward_test.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/ff-reward_test.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/ff-reward_train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/ff-reward_train.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/ff-reward_train.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/ff-reward_train.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/ff-reward_val.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/ff-reward_val.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/ff-reward_val.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/ff-reward_val.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/ff-steps_train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/ff-steps_train.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/ff-steps_train.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/ff-steps_train.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/ff-values.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/ff-values.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/ff-values_train.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/ff-values_train.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch10/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch10/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/03_pg_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/03_pg_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/04_pg_baseline.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/04_pg_baseline.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/06_pg_entropy.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/06_pg_entropy.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/07_pg_l_entropy.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/07_pg_l_entropy.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/08_pg_l_policy.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/08_pg_l_policy.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/09_pg_l_total.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/09_pg_l_total.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/10_pg_grad_l2.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/10_pg_grad_l2.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/11_pg_grad_max.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/11_pg_grad_max.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/12_pg_kl.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/12_pg_kl.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/dqn_episodes.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/dqn_episodes.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/dqn_rewards100.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/dqn_rewards100.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/pg_baseline.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/pg_baseline.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/pg_batch_scales.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/pg_batch_scales.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/pg_entropy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/pg_entropy.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/pg_grad_l2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/pg_grad_l2.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/pg_grad_max.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/pg_grad_max.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/pg_kl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/pg_kl.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/pg_l_entropy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/pg_l_entropy.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/pg_l_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/pg_l_policy.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/pg_l_total.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/pg_l_total.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/pg_reward100.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/pg_reward100.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/rf_episodes.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/rf_episodes.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch11/rf_rewards100.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch11/rf_rewards100.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/08-02-reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/08-02-reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/09-01-loss_ent.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/09-01-loss_ent.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/10-01-loss_value.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/10-01-loss_value.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/10-02-loss_total.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/10-02-loss_total.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/11-01-advanatage.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/11-01-advanatage.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/11-02-grad-l2.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/11-02-grad-l2.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/12-01-grad_max.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/12-01-grad_max.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/12-02-grad_var.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/12-02-grad_var.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/a2c-advantage.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/a2c-advantage.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/a2c-grad_l2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/a2c-grad_l2.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/a2c-grad_max.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/a2c-grad_max.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/a2c-grad_var.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/a2c-grad_var.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/a2c-loss_entropy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/a2c-loss_entropy.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/a2c-loss_policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/a2c-loss_policy.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/a2c-loss_total.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/a2c-loss_total.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/a2c-loss_value.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/a2c-loss_value.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/a2c-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/a2c-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-l2-bl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-l2-bl.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-l2-nobl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-l2-nobl.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-l2.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-l2.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-max-bl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-max-bl.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-max-nobl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-max-nobl.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-max.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-max.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-reward-bl.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-reward-bl.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-reward-nobl.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-reward-nobl.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-reward100_bl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-reward100_bl.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-reward_bl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-reward_bl.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-reward_nobl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-reward_nobl.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-var-bl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-var-bl.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-var-nobl.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-var-nobl.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/cp-var.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/cp-var.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch12/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch12/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/data_adv.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/data_adv.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/data_adv.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/data_adv.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/data_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/data_loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/data_loss.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/data_loss.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/data_reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/data_reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/data_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/data_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_adv.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_adv.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_adv_0.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_adv_0.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_adv_1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_adv_1.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_adv_2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_adv_2.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_adv_3.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_adv_3.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_loss.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_loss.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_loss_0.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_loss_0.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_loss_1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_loss_1.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_loss_2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_loss_2.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_loss_3.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_loss_3.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_reward_0.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_reward_0.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_reward_1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_reward_1.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_reward_2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_reward_2.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/grad_reward_3.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/grad_reward_3.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch13/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch13/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/sc1-bleu-argmax.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/sc1-bleu-argmax.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/sc1-bleu-argmax.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/sc1-bleu-argmax.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/sc1-bleu-sample.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/sc1-bleu-sample.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/sc1-bleu-sample.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/sc1-bleu-sample.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/sc1-bleu-test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/sc1-bleu-test.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/sc1-bleu-test.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/sc1-bleu-test.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/sc2-bleu-argmax.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/sc2-bleu-argmax.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/sc2-bleu-argmax.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/sc2-bleu-argmax.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/sc2-bleu-sample.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/sc2-bleu-sample.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/sc2-bleu-sample.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/sc2-bleu-sample.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/sc2-bleu-test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/sc2-bleu-test.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/sc2-bleu-test.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/sc2-bleu-test.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/xe-bleu-test.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/xe-bleu-test.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/xe-bleu-test.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/xe-bleu-test.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/xe-bleu.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/xe-bleu.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/xe-bleu.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/xe-bleu.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/xe-loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/xe-loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch14/xe-loss.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch14/xe-loss.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/02_basic_1_steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/02_basic_1_steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/03_basic_1_val.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/03_basic_1_val.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/06_basic_10_val.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/06_basic_10_val.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/09_basic_25_val.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/09_basic_25_val.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/12_basic_200_val.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/12_basic_200_val.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/23_lm_dqn_1_val.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/23_lm_dqn_1_val.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/basic_10_reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/basic_10_reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/basic_10_steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/basic_10_steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/basic_10_val.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/basic_10_val.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/basic_1_reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/basic_1_reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/basic_1_steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/basic_1_steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/basic_200_reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/basic_200_reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/basic_200_steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/basic_200_steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/basic_200_val.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/basic_200_val.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/basic_25_med_val.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/basic_25_med_val.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/lm_dqn_1_med_val.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/lm_dqn_1_med_val.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch15/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch15/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/05-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/05-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/06-01-loss-total.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/06-01-loss-total.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/06-02-loss-ent.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/06-02-loss-ent.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/07-episode-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/07-episode-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/15-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/15-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/16-01-loss-total.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/16-01-loss-total.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/16-02-loss-ent.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/16-02-loss-ent.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/17-episode-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/17-episode-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/18-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/18-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/19-01-loss-total.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/19-01-loss-total.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/19-02-loss-ent.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/19-02-loss-ent.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/20-episode-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/20-episode-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/21-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/21-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/22-01-loss-total.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/22-01-loss-total.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/22-02-loss-ent.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/22-02-loss-ent.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/22-episode-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/22-episode-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/23-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/23-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/24-01-loss-total.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/24-01-loss-total.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/24-02-loss-ent.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/24-02-loss-ent.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/25-episode-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/25-episode-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/32-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/32-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/33-01-loss-total.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/33-01-loss-total.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/33-02-loss-ent.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/33-02-loss-ent.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/34-episode-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/34-episode-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/35-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/35-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/36-01-loss-total.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/36-01-loss-total.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/36-02-loss-ent.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/36-02-loss-ent.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/37-episode-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/37-episode-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/cb-mm-loss-ent.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/cb-mm-loss-ent.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/cb-mm-loss-total.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/cb-mm-loss-total.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/cb-mm-reward100.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/cb-mm-reward100.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/cb-nomm-loss-ent.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/cb-nomm-loss-ent.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/cd-episode-steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/cd-episode-steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/cd-loss-ent.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/cd-loss-ent.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/cd-loss-total.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/cd-loss-total.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/cd-reward100.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/cd-reward100.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/s-demo-loss-ent.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/s-demo-loss-ent.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/s-demo-reward100.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/s-demo-reward100.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/t3-episode-steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/t3-episode-steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/t3-loss-ent.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/t3-loss-ent.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/t3-loss-total.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/t3-loss-total.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch16/t3-reward100.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch16/t3-reward100.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/02-01-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/02-01-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/04-02-test-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/04-02-test-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/06-01-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/06-01-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/07-01-loss-actor.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/07-01-loss-actor.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/08-02-test-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/08-02-test-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/09-01-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/09-01-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/10-01-loss-actor.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/10-01-loss-actor.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/11-02-test-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/11-02-test-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/a2c-reward100.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/a2c-reward100.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/a2c-test-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/a2c-test-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/a2c-test-steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/a2c-test-steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/d4-episode-steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/d4-episode-steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/d4-loss-actor.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/d4-loss-actor.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/d4-loss-critic.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/d4-loss-critic.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/d4-reward100.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/d4-reward100.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/d4-test-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/d4-test-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/d4-test-steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/d4-test-steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/dd-episode-steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/dd-episode-steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/dd-loss-actor.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/dd-loss-actor.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/dd-loss-critic.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/dd-loss-critic.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/dd-reward100.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/dd-reward100.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/dd-test-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/dd-test-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/dd-test-steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/dd-test-steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch17/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch17/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch18/01_h_critic_ref.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch18/01_h_critic_ref.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch18/01_h_critic_ref.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch18/01_h_critic_ref.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch18/01_h_loss_actor.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch18/01_h_loss_actor.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch18/01_h_loss_critic.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch18/01_h_loss_critic.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch18/01_h_reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch18/01_h_reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch18/01_h_test_reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch18/01_h_test_reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch18/02_h_loss_actor.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch18/02_h_loss_actor.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch18/03_h_loss_critic.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch18/03_h_loss_critic.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch18/04_h_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch18/04_h_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch18/05_h_test_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch18/05_h_test_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch18/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch18/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/02-01-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/02-01-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/02-02-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/02-02-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/03-02-loss-value.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/03-02-loss-value.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/04-loss-entropy.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/04-loss-entropy.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/05-02-test-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/05-02-test-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/06-01-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/06-01-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/06-02-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/06-02-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/07-02-loss-value.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/07-02-loss-value.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/08-loss-entropy.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/08-loss-entropy.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/09-02-test-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/09-02-test-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/10-01-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/10-01-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/10-02-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/10-02-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/11-02-test-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/11-02-test-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/12-01-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/12-01-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/14-01-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/14-01-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/16-01-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/16-01-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/18-01-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/18-01-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/19-01-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/19-01-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/20-01-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/20-01-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/22-01-reward100.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/22-01-reward100.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/aa-loss-entropy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/aa-loss-entropy.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/aa-loss-policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/aa-loss-policy.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/aa-loss-value.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/aa-loss-value.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/aa-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/aa-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/aa-steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/aa-steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/aa-test-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/aa-test-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/aa-test-steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/aa-test-steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/ac-loss-entropy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/ac-loss-entropy.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/ac-loss-policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/ac-loss-policy.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/ac-loss-value.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/ac-loss-value.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/ac-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/ac-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/ac-steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/ac-steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/ac-test-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/ac-test-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/ac-test-steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/ac-test-steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/ka-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/ka-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/ka-test-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/ka-test-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/kc-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/kc-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/kc-test-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/kc-test-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/pa-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/pa-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/pa-test-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/pa-test-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/pc-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/pc-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/pc-steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/pc-steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/pc-test-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/pc-test-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/pc-test-steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/pc-test-steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/sa-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/sa-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/sa-test-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/sa-test-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/sc-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/sc-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/sc-test-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/sc-test-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/ta-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/ta-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/ta-test-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/ta-test-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/tc-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/tc-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch19/tc-test-reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch19/tc-test-reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/01-01-reward-max.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/01-01-reward-max.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/02-01-reward-std.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/02-01-reward-std.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/02-02-update-l2.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/02-02-update-l2.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/03-01-reward-max.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/03-01-reward-max.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/04-01-reward-std.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/04-01-reward-std.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/04-02-update-l2.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/04-02-update-l2.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/05-01-reward-max.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/05-01-reward-max.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/06-reward-std.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/06-reward-std.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/07-01-reward-max.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/07-01-reward-max.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/08-reward-std.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/08-reward-std.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/escp-reward-max.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/escp-reward-max.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/escp-reward-mean.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/escp-reward-mean.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/escp-reward-std.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/escp-reward-std.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/escp-update-l2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/escp-update-l2.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/eshc-reward-max.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/eshc-reward-max.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/eshc-reward-mean.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/eshc-reward-mean.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/eshc-reward-std.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/eshc-reward-std.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/eshc-update-l2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/eshc-update-l2.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/gacp-reward-max.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/gacp-reward-max.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/gacp-reward-mean.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/gacp-reward-mean.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/gacp-reward-std.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/gacp-reward-std.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/gahc-reward-max.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/gahc-reward-max.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/gahc-reward-mean.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/gahc-reward-mean.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/gahc-reward-std.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/gahc-reward-std.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch20/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch20/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/atari/ppo_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/atari/ppo_loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/atari/ppo_reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/atari/ppo_reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/atari/ppo_steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/atari/ppo_steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/dqn/counts_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/dqn/counts_loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/dqn/counts_steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/dqn/counts_steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/dqn/egreedy_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/dqn/egreedy_loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/dqn/nn_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/dqn/nn_loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/dqn/nn_reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/dqn/nn_reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/dqn/nn_steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/dqn/nn_steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/dqn_nn_01_reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/dqn_nn_01_reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/dqn_nn_02_steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/dqn_nn_02_steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/dqn_nn_04_loss.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/dqn_nn_04_loss.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/ppo/basic_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/ppo/basic_loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/ppo/basic_reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/ppo/basic_reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/ppo/dist_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/ppo/dist_loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/ppo/nn_loss.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/ppo/nn_loss.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/ppo/nn_reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/ppo/nn_reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch21/ppo/nn_steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch21/ppo/nn_steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch22/07-01-adv.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch22/07-01-adv.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch22/a2c-adv.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch22/a2c-adv.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch22/a2c-loss-ent.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch22/a2c-loss-ent.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch22/em-loss-obs.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch22/em-loss-obs.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch22/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch22/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch23/03-win-ratio.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch23/03-win-ratio.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch23/loss-policy.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch23/loss-policy.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch23/loss-value.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch23/loss-value.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch23/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch23/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/ch23/win-ratio.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch23/win-ratio.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch25/both-reward.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch25/both-reward.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch25/both-steps.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch25/both-steps.svg -------------------------------------------------------------------------------- /Code/Lapan/plots/ch25/both_reward.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch25/both_reward.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch25/both_steps.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch25/both_steps.csv -------------------------------------------------------------------------------- /Code/Lapan/plots/ch25/make.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/ch25/make.sh -------------------------------------------------------------------------------- /Code/Lapan/plots/plot.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Lapan/plots/plot.py -------------------------------------------------------------------------------- /Code/Multi-agent/Langchain.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Multi-agent/Langchain.py -------------------------------------------------------------------------------- /Code/Multi-agent/LeelaChess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Multi-agent/LeelaChess.py -------------------------------------------------------------------------------- /Code/Multi-agent/Pistonball.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Multi-agent/Pistonball.py -------------------------------------------------------------------------------- /Code/Multi-agent/Pystonball_play.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Multi-agent/Pystonball_play.py -------------------------------------------------------------------------------- /Code/Multi-agent/tictactoe.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Multi-agent/tictactoe.py -------------------------------------------------------------------------------- /Code/Sutton_Barto/chapter08/maze.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/chapter08/maze.ipynb -------------------------------------------------------------------------------- /Code/Sutton_Barto/chapter08/maze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/chapter08/maze.py -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_2_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_2_1.png -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_2_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_2_2.png -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_2_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_2_3.png -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_2_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_2_4.png -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_2_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_2_5.png -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_2_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_2_6.png -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_3_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_3_2.png -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_3_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_3_5.png -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_4_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_4_1.png -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_4_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_4_2.png -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_4_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_4_3.png -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_6_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_6_2.png -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_6_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_6_4.png -------------------------------------------------------------------------------- /Code/Sutton_Barto/images/figure_6_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/Sutton_Barto/images/figure_6_6.png -------------------------------------------------------------------------------- /Code/textworld/=5.9.0: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Code/textworld/=5.9.0 -------------------------------------------------------------------------------- /Exercises/BlackJack_QL.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Exercises/BlackJack_QL.ipynb -------------------------------------------------------------------------------- /Exercises/Cliff_Walking_render.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Exercises/Cliff_Walking_render.ipynb -------------------------------------------------------------------------------- /Exercises/Frozenlake_VI.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Exercises/Frozenlake_VI.ipynb -------------------------------------------------------------------------------- /Exercises/Frozenlake_q_learning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Exercises/Frozenlake_q_learning.ipynb -------------------------------------------------------------------------------- /Exercises/continuous_mountain_car.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Exercises/continuous_mountain_car.py -------------------------------------------------------------------------------- /Exercises/mountain_car.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Exercises/mountain_car.py -------------------------------------------------------------------------------- /Guia_apresentacao.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Guia_apresentacao.md -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/LICENSE -------------------------------------------------------------------------------- /Lectures/Appendix - Cross-entropy.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/Appendix - Cross-entropy.ipynb -------------------------------------------------------------------------------- /Lectures/Example - AlphaGo Zero.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/Example - AlphaGo Zero.ipynb -------------------------------------------------------------------------------- /Lectures/Example - AlphaGo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/Example - AlphaGo.ipynb -------------------------------------------------------------------------------- /Lectures/Lecture 2 - Finite MDPs.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/Lecture 2 - Finite MDPs.ipynb -------------------------------------------------------------------------------- /Lectures/chapter7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/chapter7.pdf -------------------------------------------------------------------------------- /Lectures/chapter7.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/chapter7.tex -------------------------------------------------------------------------------- /Lectures/dreadlock_neurons2.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/dreadlock_neurons2.jpeg -------------------------------------------------------------------------------- /Lectures/mujoco_reinforce_fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/mujoco_reinforce_fig2.png -------------------------------------------------------------------------------- /Lectures/public/n-step-offp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/public/n-step-offp.png -------------------------------------------------------------------------------- /Lectures/public/n-step-return.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/public/n-step-return.png -------------------------------------------------------------------------------- /Lectures/public/n-step-sarsa-alg.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/public/n-step-sarsa-alg.png -------------------------------------------------------------------------------- /Lectures/public/rl-loop.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/public/rl-loop.jpg -------------------------------------------------------------------------------- /Lectures/public/tictactoe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/public/tictactoe.png -------------------------------------------------------------------------------- /Lectures/vector_envs_tutorial.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/vector_envs_tutorial.ipynb -------------------------------------------------------------------------------- /Lectures/vector_envs_tutorial.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Lectures/vector_envs_tutorial.py -------------------------------------------------------------------------------- /MuZero.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/MuZero.pdf -------------------------------------------------------------------------------- /Projects/2024/0-Descrição do Projeto.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Projects/2024/0-Descrição do Projeto.md -------------------------------------------------------------------------------- /Projects/2024/2-Agentes.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Projects/2024/2-Agentes.md -------------------------------------------------------------------------------- /Projects/2024/3-Ambiente.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Projects/2024/3-Ambiente.md -------------------------------------------------------------------------------- /Projects/2024/4-Fluxo.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Projects/2024/4-Fluxo.md -------------------------------------------------------------------------------- /Projects/2024/Avaliação.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Projects/2024/Avaliação.md -------------------------------------------------------------------------------- /Projects/2024/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Projects/2024/README.md -------------------------------------------------------------------------------- /Projects/2024/Solution template/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Projects/2024/Solution template/main.py -------------------------------------------------------------------------------- /Projects/2025/0-Descrição.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Projects/2025/0-Descrição.md -------------------------------------------------------------------------------- /Projects/2025/1 - Projeto 1.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Projects/2025/1 - Projeto 1.md -------------------------------------------------------------------------------- /Projects/2025/Projeto Final/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Projects/2025/Projeto Final/README.md -------------------------------------------------------------------------------- /Projects/2025/Projeto Final/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Projects/2025/Projeto Final/main.py -------------------------------------------------------------------------------- /Projects/2025/Projeto Final/replay.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Projects/2025/Projeto Final/replay.py -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/README.md -------------------------------------------------------------------------------- /Temas_dos_Trabalhos_2023_old.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/Temas_dos_Trabalhos_2023_old.md -------------------------------------------------------------------------------- /alphago.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/alphago.pdf -------------------------------------------------------------------------------- /grading.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/grading.md -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fccoelho/Reinforcement-Learning-course/HEAD/pyproject.toml --------------------------------------------------------------------------------