├── .gitignore ├── LICENSE ├── README.md ├── archive ├── EXTRA.md ├── NOTES.md ├── bandits │ ├── bandit.py │ ├── linucb_bandit.ipynb │ ├── market.ipynb │ ├── market.tsv │ ├── simple_bandit.ipynb │ └── wheel_bandit.ipynb ├── cruft │ ├── devfestahm_rl.ipynb │ ├── gym_taxi2.ipynb │ ├── rllib_colab.ipynb │ └── rnd_ppo.ipynb ├── custom.ipynb ├── example.py ├── intro.ipynb ├── questions.ipynb ├── recsys │ ├── .gitignore │ ├── LICENSE │ ├── README.md │ ├── cluster.ipynb │ ├── jester-data-1.csv │ ├── recsys.ipynb │ ├── recsys.py │ └── requirements.txt └── tensorboard.ipynb ├── bsl4 ├── README.md ├── cpv1.ipynb ├── cpv1.py ├── libs.txt ├── out.txt └── requirements.txt ├── images ├── 1000-vs-10000-points.png ├── AnyscaleAcademyLogo.png ├── AnyscaleAcademy_Logo_clearbanner_141x100.png ├── Circle-vs-Square.png ├── ConwaysGameOfLife-ColorMap-RdYlBu.png ├── ConwaysGameOfLife-FlameGraph-crop.png ├── ConwaysGameOfLife-FlameGraph.png ├── ConwaysGameOfLife-FlameGraph2.png ├── ConwaysGameOfLife-Grid-Snapshot-White-Background.png ├── ConwaysGameOfLife-Grid-Snapshot.png ├── ConwaysGameOfLife-Snapshot-White-Background.png ├── ConwaysGameOfLife-Snapshot.png ├── Crash-Course-Ray-NoRay.png ├── Execution-Times-Local-v-Remote.png ├── Gospers_glider_gun.gif ├── ML-Landscape-Ray.png ├── ML-Landscape.png ├── Parameter-Server-smaller.png ├── Parameter-Server.png ├── Pi-Ns-vs-times-linear.png ├── Pi-Ns-vs-times.png ├── Pi-Results-with-Ray.png ├── Pi-Results-without-Ray.png ├── Pi-estimates.png ├── Pi-small-Ns-vs-times.png ├── Pi-trials-vs-times.png ├── RLlib.png ├── Ray-Architecture.png ├── Ray-Dashboard.png ├── Ray-Object-Management-1.png ├── Ray-Object-Management-2.png ├── Ray-Object-Management-3.png ├── Ray-Object-Management-4.png ├── Ray-Object-Management-5.png ├── Ray-Object-Management-6.png ├── Ray-Trace-Example.png ├── RayConwaysRules-FlameGraph.png ├── RayGame-FlameGraph.png ├── RaySGD.png ├── RayServe.png ├── RayTune.png ├── TwoTrends.png ├── actor-trials.png ├── raysummit-horizontal-white-banner-full.png ├── rllib │ ├── Bipedal-Walker-Example-100.meta.json │ ├── Bipedal-Walker-Example-100.mp4 │ ├── Bipedal-Walker-Rewards-100.png │ ├── Bipedal-Walker-Rewards-120.png │ ├── Bipedal-Walker-Rewards-50.png │ ├── Bipedal-Walker.png │ ├── Cart-Pole-Episode-Rewards-Exercise.png │ ├── Cart-Pole-Episode-Rewards.png │ ├── Cart-Pole-Episode-Rewards2.png │ ├── Cart-Pole-Episode-Rewards3.png │ ├── Cart-Pole-Example-Video.mp4 │ ├── Cart-Pole.png │ ├── LinTS-Cumulative-Regret-05.png │ ├── LinTS-Cumulative-Regret-07.png │ ├── LinTS-Cumulative-Regret-09.png │ ├── LinTS-Weight-Distribution-of-Arms-05.png │ ├── LinTS-Weight-Distribution-of-Arms-07.png │ ├── LinTS-Weight-Distribution-of-Arms-09.png │ ├── LinUCB-Cumulative-Regret.png │ ├── LinUCB-cumulative-regret2.png │ ├── Market-Bandit-Cumulative-Regret-LinUCB.png │ ├── Market-Bandit-Cumulative-Regret.png │ ├── Market-Bandit-Rewards-vs-Steps-LinUCB.png │ ├── Market-Bandit-Rewards-vs-Steps.png │ ├── MarketReward-Random.png │ ├── RL-concepts.png │ ├── RLlib-API.svg │ ├── RLlib-Stack-smaller.png │ ├── RLlib-Stack.png │ ├── RLlib-envs.svg │ ├── TensorBoard-CartPole-PPO.png │ ├── TensorBoard1.png │ ├── TensorBoard2.png │ ├── Wheel-Bandit.png │ ├── a2c-arch.svg │ ├── alpha-go.jpg │ ├── breakout.png │ ├── cassie-crouched-smaller.png │ ├── cassie-crouched.png │ ├── client.png │ ├── daimler-autonomous-car.jpg │ ├── dqn.png │ ├── learning.png │ ├── log.png │ ├── multi-flat.svg │ ├── ppo.png │ ├── rllib-components.svg │ ├── robot-arm.gif │ ├── stacking-legos-with-sawyer.gif │ ├── walking-man.gif │ └── web.png └── tune │ ├── K-means_convergence.gif │ ├── TensorBoard-CartPole-HPO-hyperparameters.png │ ├── TensorBoard-CartPole-HPO-scalars.png │ └── hpo-neural-network-example.png ├── intro_rl ├── 01-Introduction-to-Reinforcement-Learning.ipynb ├── 02-Introduction-to-RLlib.ipynb ├── 03-Application-Cart-Pole.ipynb ├── 04-Application-Taxi.ipynb ├── 05-Application-Frozen-Lake.ipynb ├── 06-Application-Mountain-Car.ipynb ├── References-Reinforcement-Learning.ipynb ├── cart_test.py ├── mountain-car-checkpoint │ ├── .is_checkpoint │ ├── checkpoint-20 │ └── checkpoint-20.tune_metadata └── rollout.sh ├── requirements.txt ├── rl_apps ├── multi-armed-bandits │ ├── 01-Introduction-to-Multi-Armed-Bandits.ipynb │ ├── 02-Exploration-vs-Exploitation-Strategies.ipynb │ ├── 03-Simple-Multi-Armed-Bandit.ipynb │ ├── 04-Linear-Upper-Confidence-Bound.ipynb │ ├── 05-Linear-Thompson-Sampling.ipynb │ ├── 06-Market-Example.ipynb │ ├── market.tsv │ ├── market_bandit.py │ └── solutions │ │ └── Multi-Armed-Bandits-Solutions.ipynb └── recsys │ ├── 01-Recsys.ipynb │ ├── README.md │ ├── jester-data-1.csv │ ├── jester-data-2.csv │ ├── jester-data-3.csv │ └── recsys.py └── wip ├── app_cart.ipynb ├── app_froz.ipynb ├── app_moun.ipynb ├── app_taxi.ipynb └── app_triv.ipynb /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/README.md -------------------------------------------------------------------------------- /archive/EXTRA.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/EXTRA.md -------------------------------------------------------------------------------- /archive/NOTES.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/NOTES.md -------------------------------------------------------------------------------- /archive/bandits/bandit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/bandits/bandit.py -------------------------------------------------------------------------------- /archive/bandits/linucb_bandit.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/bandits/linucb_bandit.ipynb -------------------------------------------------------------------------------- /archive/bandits/market.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/bandits/market.ipynb -------------------------------------------------------------------------------- /archive/bandits/market.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/bandits/market.tsv -------------------------------------------------------------------------------- /archive/bandits/simple_bandit.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/bandits/simple_bandit.ipynb -------------------------------------------------------------------------------- /archive/bandits/wheel_bandit.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/bandits/wheel_bandit.ipynb -------------------------------------------------------------------------------- /archive/cruft/devfestahm_rl.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/cruft/devfestahm_rl.ipynb -------------------------------------------------------------------------------- /archive/cruft/gym_taxi2.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/cruft/gym_taxi2.ipynb -------------------------------------------------------------------------------- /archive/cruft/rllib_colab.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/cruft/rllib_colab.ipynb -------------------------------------------------------------------------------- /archive/cruft/rnd_ppo.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/cruft/rnd_ppo.ipynb -------------------------------------------------------------------------------- /archive/custom.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/custom.ipynb -------------------------------------------------------------------------------- /archive/example.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/example.py -------------------------------------------------------------------------------- /archive/intro.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/intro.ipynb -------------------------------------------------------------------------------- /archive/questions.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/questions.ipynb -------------------------------------------------------------------------------- /archive/recsys/.gitignore: -------------------------------------------------------------------------------- 1 | tmp 2 | *~ 3 | -------------------------------------------------------------------------------- /archive/recsys/LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/recsys/LICENSE -------------------------------------------------------------------------------- /archive/recsys/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/recsys/README.md -------------------------------------------------------------------------------- /archive/recsys/cluster.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/recsys/cluster.ipynb -------------------------------------------------------------------------------- /archive/recsys/jester-data-1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/recsys/jester-data-1.csv -------------------------------------------------------------------------------- /archive/recsys/recsys.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/recsys/recsys.ipynb -------------------------------------------------------------------------------- /archive/recsys/recsys.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/recsys/recsys.py -------------------------------------------------------------------------------- /archive/recsys/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/recsys/requirements.txt -------------------------------------------------------------------------------- /archive/tensorboard.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/archive/tensorboard.ipynb -------------------------------------------------------------------------------- /bsl4/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/bsl4/README.md -------------------------------------------------------------------------------- /bsl4/cpv1.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/bsl4/cpv1.ipynb -------------------------------------------------------------------------------- /bsl4/cpv1.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/bsl4/cpv1.py -------------------------------------------------------------------------------- /bsl4/libs.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/bsl4/libs.txt -------------------------------------------------------------------------------- /bsl4/out.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/bsl4/out.txt -------------------------------------------------------------------------------- /bsl4/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/bsl4/requirements.txt -------------------------------------------------------------------------------- /images/1000-vs-10000-points.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/1000-vs-10000-points.png -------------------------------------------------------------------------------- /images/AnyscaleAcademyLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/AnyscaleAcademyLogo.png -------------------------------------------------------------------------------- /images/AnyscaleAcademy_Logo_clearbanner_141x100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/AnyscaleAcademy_Logo_clearbanner_141x100.png -------------------------------------------------------------------------------- /images/Circle-vs-Square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Circle-vs-Square.png -------------------------------------------------------------------------------- /images/ConwaysGameOfLife-ColorMap-RdYlBu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/ConwaysGameOfLife-ColorMap-RdYlBu.png -------------------------------------------------------------------------------- /images/ConwaysGameOfLife-FlameGraph-crop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/ConwaysGameOfLife-FlameGraph-crop.png -------------------------------------------------------------------------------- /images/ConwaysGameOfLife-FlameGraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/ConwaysGameOfLife-FlameGraph.png -------------------------------------------------------------------------------- /images/ConwaysGameOfLife-FlameGraph2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/ConwaysGameOfLife-FlameGraph2.png -------------------------------------------------------------------------------- /images/ConwaysGameOfLife-Grid-Snapshot-White-Background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/ConwaysGameOfLife-Grid-Snapshot-White-Background.png -------------------------------------------------------------------------------- /images/ConwaysGameOfLife-Grid-Snapshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/ConwaysGameOfLife-Grid-Snapshot.png -------------------------------------------------------------------------------- /images/ConwaysGameOfLife-Snapshot-White-Background.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/ConwaysGameOfLife-Snapshot-White-Background.png -------------------------------------------------------------------------------- /images/ConwaysGameOfLife-Snapshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/ConwaysGameOfLife-Snapshot.png -------------------------------------------------------------------------------- /images/Crash-Course-Ray-NoRay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Crash-Course-Ray-NoRay.png -------------------------------------------------------------------------------- /images/Execution-Times-Local-v-Remote.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Execution-Times-Local-v-Remote.png -------------------------------------------------------------------------------- /images/Gospers_glider_gun.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Gospers_glider_gun.gif -------------------------------------------------------------------------------- /images/ML-Landscape-Ray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/ML-Landscape-Ray.png -------------------------------------------------------------------------------- /images/ML-Landscape.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/ML-Landscape.png -------------------------------------------------------------------------------- /images/Parameter-Server-smaller.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Parameter-Server-smaller.png -------------------------------------------------------------------------------- /images/Parameter-Server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Parameter-Server.png -------------------------------------------------------------------------------- /images/Pi-Ns-vs-times-linear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Pi-Ns-vs-times-linear.png -------------------------------------------------------------------------------- /images/Pi-Ns-vs-times.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Pi-Ns-vs-times.png -------------------------------------------------------------------------------- /images/Pi-Results-with-Ray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Pi-Results-with-Ray.png -------------------------------------------------------------------------------- /images/Pi-Results-without-Ray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Pi-Results-without-Ray.png -------------------------------------------------------------------------------- /images/Pi-estimates.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Pi-estimates.png -------------------------------------------------------------------------------- /images/Pi-small-Ns-vs-times.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Pi-small-Ns-vs-times.png -------------------------------------------------------------------------------- /images/Pi-trials-vs-times.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Pi-trials-vs-times.png -------------------------------------------------------------------------------- /images/RLlib.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/RLlib.png -------------------------------------------------------------------------------- /images/Ray-Architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Ray-Architecture.png -------------------------------------------------------------------------------- /images/Ray-Dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Ray-Dashboard.png -------------------------------------------------------------------------------- /images/Ray-Object-Management-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Ray-Object-Management-1.png -------------------------------------------------------------------------------- /images/Ray-Object-Management-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Ray-Object-Management-2.png -------------------------------------------------------------------------------- /images/Ray-Object-Management-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Ray-Object-Management-3.png -------------------------------------------------------------------------------- /images/Ray-Object-Management-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Ray-Object-Management-4.png -------------------------------------------------------------------------------- /images/Ray-Object-Management-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Ray-Object-Management-5.png -------------------------------------------------------------------------------- /images/Ray-Object-Management-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Ray-Object-Management-6.png -------------------------------------------------------------------------------- /images/Ray-Trace-Example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/Ray-Trace-Example.png -------------------------------------------------------------------------------- /images/RayConwaysRules-FlameGraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/RayConwaysRules-FlameGraph.png -------------------------------------------------------------------------------- /images/RayGame-FlameGraph.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/RayGame-FlameGraph.png -------------------------------------------------------------------------------- /images/RaySGD.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/RaySGD.png -------------------------------------------------------------------------------- /images/RayServe.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/RayServe.png -------------------------------------------------------------------------------- /images/RayTune.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/RayTune.png -------------------------------------------------------------------------------- /images/TwoTrends.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/TwoTrends.png -------------------------------------------------------------------------------- /images/actor-trials.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/actor-trials.png -------------------------------------------------------------------------------- /images/raysummit-horizontal-white-banner-full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/raysummit-horizontal-white-banner-full.png -------------------------------------------------------------------------------- /images/rllib/Bipedal-Walker-Example-100.meta.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Bipedal-Walker-Example-100.meta.json -------------------------------------------------------------------------------- /images/rllib/Bipedal-Walker-Example-100.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Bipedal-Walker-Example-100.mp4 -------------------------------------------------------------------------------- /images/rllib/Bipedal-Walker-Rewards-100.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Bipedal-Walker-Rewards-100.png -------------------------------------------------------------------------------- /images/rllib/Bipedal-Walker-Rewards-120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Bipedal-Walker-Rewards-120.png -------------------------------------------------------------------------------- /images/rllib/Bipedal-Walker-Rewards-50.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Bipedal-Walker-Rewards-50.png -------------------------------------------------------------------------------- /images/rllib/Bipedal-Walker.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Bipedal-Walker.png -------------------------------------------------------------------------------- /images/rllib/Cart-Pole-Episode-Rewards-Exercise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Cart-Pole-Episode-Rewards-Exercise.png -------------------------------------------------------------------------------- /images/rllib/Cart-Pole-Episode-Rewards.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Cart-Pole-Episode-Rewards.png -------------------------------------------------------------------------------- /images/rllib/Cart-Pole-Episode-Rewards2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Cart-Pole-Episode-Rewards2.png -------------------------------------------------------------------------------- /images/rllib/Cart-Pole-Episode-Rewards3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Cart-Pole-Episode-Rewards3.png -------------------------------------------------------------------------------- /images/rllib/Cart-Pole-Example-Video.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Cart-Pole-Example-Video.mp4 -------------------------------------------------------------------------------- /images/rllib/Cart-Pole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Cart-Pole.png -------------------------------------------------------------------------------- /images/rllib/LinTS-Cumulative-Regret-05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/LinTS-Cumulative-Regret-05.png -------------------------------------------------------------------------------- /images/rllib/LinTS-Cumulative-Regret-07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/LinTS-Cumulative-Regret-07.png -------------------------------------------------------------------------------- /images/rllib/LinTS-Cumulative-Regret-09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/LinTS-Cumulative-Regret-09.png -------------------------------------------------------------------------------- /images/rllib/LinTS-Weight-Distribution-of-Arms-05.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/LinTS-Weight-Distribution-of-Arms-05.png -------------------------------------------------------------------------------- /images/rllib/LinTS-Weight-Distribution-of-Arms-07.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/LinTS-Weight-Distribution-of-Arms-07.png -------------------------------------------------------------------------------- /images/rllib/LinTS-Weight-Distribution-of-Arms-09.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/LinTS-Weight-Distribution-of-Arms-09.png -------------------------------------------------------------------------------- /images/rllib/LinUCB-Cumulative-Regret.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/LinUCB-Cumulative-Regret.png -------------------------------------------------------------------------------- /images/rllib/LinUCB-cumulative-regret2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/LinUCB-cumulative-regret2.png -------------------------------------------------------------------------------- /images/rllib/Market-Bandit-Cumulative-Regret-LinUCB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Market-Bandit-Cumulative-Regret-LinUCB.png -------------------------------------------------------------------------------- /images/rllib/Market-Bandit-Cumulative-Regret.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Market-Bandit-Cumulative-Regret.png -------------------------------------------------------------------------------- /images/rllib/Market-Bandit-Rewards-vs-Steps-LinUCB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Market-Bandit-Rewards-vs-Steps-LinUCB.png -------------------------------------------------------------------------------- /images/rllib/Market-Bandit-Rewards-vs-Steps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Market-Bandit-Rewards-vs-Steps.png -------------------------------------------------------------------------------- /images/rllib/MarketReward-Random.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/MarketReward-Random.png -------------------------------------------------------------------------------- /images/rllib/RL-concepts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/RL-concepts.png -------------------------------------------------------------------------------- /images/rllib/RLlib-API.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/RLlib-API.svg -------------------------------------------------------------------------------- /images/rllib/RLlib-Stack-smaller.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/RLlib-Stack-smaller.png -------------------------------------------------------------------------------- /images/rllib/RLlib-Stack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/RLlib-Stack.png -------------------------------------------------------------------------------- /images/rllib/RLlib-envs.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/RLlib-envs.svg -------------------------------------------------------------------------------- /images/rllib/TensorBoard-CartPole-PPO.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/TensorBoard-CartPole-PPO.png -------------------------------------------------------------------------------- /images/rllib/TensorBoard1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/TensorBoard1.png -------------------------------------------------------------------------------- /images/rllib/TensorBoard2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/TensorBoard2.png -------------------------------------------------------------------------------- /images/rllib/Wheel-Bandit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/Wheel-Bandit.png -------------------------------------------------------------------------------- /images/rllib/a2c-arch.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/a2c-arch.svg -------------------------------------------------------------------------------- /images/rllib/alpha-go.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/alpha-go.jpg -------------------------------------------------------------------------------- /images/rllib/breakout.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/breakout.png -------------------------------------------------------------------------------- /images/rllib/cassie-crouched-smaller.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/cassie-crouched-smaller.png -------------------------------------------------------------------------------- /images/rllib/cassie-crouched.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/cassie-crouched.png -------------------------------------------------------------------------------- /images/rllib/client.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/client.png -------------------------------------------------------------------------------- /images/rllib/daimler-autonomous-car.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/daimler-autonomous-car.jpg -------------------------------------------------------------------------------- /images/rllib/dqn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/dqn.png -------------------------------------------------------------------------------- /images/rllib/learning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/learning.png -------------------------------------------------------------------------------- /images/rllib/log.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/log.png -------------------------------------------------------------------------------- /images/rllib/multi-flat.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/multi-flat.svg -------------------------------------------------------------------------------- /images/rllib/ppo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/ppo.png -------------------------------------------------------------------------------- /images/rllib/rllib-components.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/rllib-components.svg -------------------------------------------------------------------------------- /images/rllib/robot-arm.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/robot-arm.gif -------------------------------------------------------------------------------- /images/rllib/stacking-legos-with-sawyer.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/stacking-legos-with-sawyer.gif -------------------------------------------------------------------------------- /images/rllib/walking-man.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/walking-man.gif -------------------------------------------------------------------------------- /images/rllib/web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/rllib/web.png -------------------------------------------------------------------------------- /images/tune/K-means_convergence.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/tune/K-means_convergence.gif -------------------------------------------------------------------------------- /images/tune/TensorBoard-CartPole-HPO-hyperparameters.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/tune/TensorBoard-CartPole-HPO-hyperparameters.png -------------------------------------------------------------------------------- /images/tune/TensorBoard-CartPole-HPO-scalars.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/tune/TensorBoard-CartPole-HPO-scalars.png -------------------------------------------------------------------------------- /images/tune/hpo-neural-network-example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/images/tune/hpo-neural-network-example.png -------------------------------------------------------------------------------- /intro_rl/01-Introduction-to-Reinforcement-Learning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/intro_rl/01-Introduction-to-Reinforcement-Learning.ipynb -------------------------------------------------------------------------------- /intro_rl/02-Introduction-to-RLlib.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/intro_rl/02-Introduction-to-RLlib.ipynb -------------------------------------------------------------------------------- /intro_rl/03-Application-Cart-Pole.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/intro_rl/03-Application-Cart-Pole.ipynb -------------------------------------------------------------------------------- /intro_rl/04-Application-Taxi.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/intro_rl/04-Application-Taxi.ipynb -------------------------------------------------------------------------------- /intro_rl/05-Application-Frozen-Lake.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/intro_rl/05-Application-Frozen-Lake.ipynb -------------------------------------------------------------------------------- /intro_rl/06-Application-Mountain-Car.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/intro_rl/06-Application-Mountain-Car.ipynb -------------------------------------------------------------------------------- /intro_rl/References-Reinforcement-Learning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/intro_rl/References-Reinforcement-Learning.ipynb -------------------------------------------------------------------------------- /intro_rl/cart_test.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/intro_rl/cart_test.py -------------------------------------------------------------------------------- /intro_rl/mountain-car-checkpoint/.is_checkpoint: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /intro_rl/mountain-car-checkpoint/checkpoint-20: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/intro_rl/mountain-car-checkpoint/checkpoint-20 -------------------------------------------------------------------------------- /intro_rl/mountain-car-checkpoint/checkpoint-20.tune_metadata: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/intro_rl/mountain-car-checkpoint/checkpoint-20.tune_metadata -------------------------------------------------------------------------------- /intro_rl/rollout.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/intro_rl/rollout.sh -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/requirements.txt -------------------------------------------------------------------------------- /rl_apps/multi-armed-bandits/01-Introduction-to-Multi-Armed-Bandits.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/multi-armed-bandits/01-Introduction-to-Multi-Armed-Bandits.ipynb -------------------------------------------------------------------------------- /rl_apps/multi-armed-bandits/02-Exploration-vs-Exploitation-Strategies.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/multi-armed-bandits/02-Exploration-vs-Exploitation-Strategies.ipynb -------------------------------------------------------------------------------- /rl_apps/multi-armed-bandits/03-Simple-Multi-Armed-Bandit.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/multi-armed-bandits/03-Simple-Multi-Armed-Bandit.ipynb -------------------------------------------------------------------------------- /rl_apps/multi-armed-bandits/04-Linear-Upper-Confidence-Bound.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/multi-armed-bandits/04-Linear-Upper-Confidence-Bound.ipynb -------------------------------------------------------------------------------- /rl_apps/multi-armed-bandits/05-Linear-Thompson-Sampling.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/multi-armed-bandits/05-Linear-Thompson-Sampling.ipynb -------------------------------------------------------------------------------- /rl_apps/multi-armed-bandits/06-Market-Example.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/multi-armed-bandits/06-Market-Example.ipynb -------------------------------------------------------------------------------- /rl_apps/multi-armed-bandits/market.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/multi-armed-bandits/market.tsv -------------------------------------------------------------------------------- /rl_apps/multi-armed-bandits/market_bandit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/multi-armed-bandits/market_bandit.py -------------------------------------------------------------------------------- /rl_apps/multi-armed-bandits/solutions/Multi-Armed-Bandits-Solutions.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/multi-armed-bandits/solutions/Multi-Armed-Bandits-Solutions.ipynb -------------------------------------------------------------------------------- /rl_apps/recsys/01-Recsys.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/recsys/01-Recsys.ipynb -------------------------------------------------------------------------------- /rl_apps/recsys/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/recsys/README.md -------------------------------------------------------------------------------- /rl_apps/recsys/jester-data-1.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/recsys/jester-data-1.csv -------------------------------------------------------------------------------- /rl_apps/recsys/jester-data-2.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/recsys/jester-data-2.csv -------------------------------------------------------------------------------- /rl_apps/recsys/jester-data-3.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/recsys/jester-data-3.csv -------------------------------------------------------------------------------- /rl_apps/recsys/recsys.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/rl_apps/recsys/recsys.py -------------------------------------------------------------------------------- /wip/app_cart.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/wip/app_cart.ipynb -------------------------------------------------------------------------------- /wip/app_froz.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/wip/app_froz.ipynb -------------------------------------------------------------------------------- /wip/app_moun.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/wip/app_moun.ipynb -------------------------------------------------------------------------------- /wip/app_taxi.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/wip/app_taxi.ipynb -------------------------------------------------------------------------------- /wip/app_triv.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DerwenAI/rllib_tutorials/HEAD/wip/app_triv.ipynb --------------------------------------------------------------------------------