├── .gitignore ├── README.md ├── RL_Theory_Book.pdf └── src ├── 1.Setup ├── 1. Chapter.tex ├── 1.0.Chapter.tex ├── 1.1.MDP.tex └── 1.2.Algorithms.tex ├── 2.MetaHeuristics ├── 2. Chapter.tex ├── 2.0.Chapter.tex ├── 2.1.Baselines.tex └── 2.2.EvolutionStrategies.tex ├── 3.ClassicTheory ├── 3. Chapter.tex ├── 3.0.Chapter.tex ├── 3.1.ValueFunctions.tex ├── 3.2.PolicyImprovement.tex ├── 3.3.DynamicProgramming.tex ├── 3.4.TabularAlgorithms.tex └── 3.5.BiasVarianceTradeoff.tex ├── 4.ValueBased ├── 4. Chapter.tex ├── 4.0.Chapter.tex ├── 4.1.DQN.tex ├── 4.2.Modifications.tex └── 4.3.Distributional.tex ├── 5.PolicyGradient ├── 5. Chapter.tex ├── 5.0.Chapter.tex ├── 5.1.PolicyGradientTheorem.tex ├── 5.2.ActorCritic.tex └── 5.3.TRPO.tex ├── 6.ContinuousControl ├── 6. Chapter.tex ├── 6.0.Chapter.tex ├── 6.1.DDPG.tex └── 6.2.SAC.tex ├── 7.ModelBased ├── 7. Chapter.tex ├── 7.0.Chapter.tex ├── 7.1.Bandits.tex ├── 7.2.WorldModels.tex ├── 7.3.MCTS.tex └── 7.4.LQR.tex ├── 8.NextStage ├── 8. Chapter.tex ├── 8.0.Chapter.tex ├── 8.1.ImitationLearning.tex ├── 8.2.IntrinsicMotivation.tex ├── 8.3.Multitask.tex ├── 8.4.Hierarchical.tex ├── 8.5.POMDP.tex └── 8.6.MultiAgent.tex ├── Appendix ├── NaturalGradient.tex └── QLearningConvergence.tex ├── DRL.bib ├── Images ├── ARP.png ├── AgentPOV.png ├── Atari.png ├── Baseline.png ├── BaselineIntuition.png ├── BeliefMDP │ ├── BeliefMDP-1.png │ ├── BeliefMDP-2.png │ ├── BeliefMDP-3.png │ ├── BeliefMDP-4.png │ └── BeliefMDP-5.png ├── Bellman.png ├── BellmanPrinciple.png ├── CEM │ ├── cem-0.png │ ├── cem-1.png │ ├── cem-2.png │ ├── cem-3.png │ ├── cem-4.png │ ├── cem-5.png │ ├── cem-6.png │ ├── cem-7.png │ ├── cem-8.png │ └── cem-9.png ├── CMA_ES │ ├── cma_es-0.png │ ├── cma_es-1.png │ ├── cma_es-2.png │ ├── cma_es-3.png │ ├── cma_es-4.png │ ├── cma_es-5.png │ ├── cma_es-6.png │ ├── cma_es-7.png │ ├── cma_es-8.png │ └── cma_es-9.png ├── CatAndFish.png ├── Categorical.png ├── Communication.png ├── ContractionMapping.png ├── DDP.png ├── DP_backup.png ├── Dancer.png ├── DistributionalBellmanExample.png ├── DistributionalExample.png ├── DistributionalRL1.png ├── DistributionalRL2.png ├── DistributionalTable.png ├── DuelingDQN.png ├── EL_ES │ ├── el_es-0.png │ ├── el_es-1.png │ ├── el_es-2.png │ ├── el_es-3.png │ ├── el_es-4.png │ ├── el_es-5.png │ ├── el_es-6.png │ ├── el_es-7.png │ ├── el_es-8.png │ └── el_es-9.png ├── ENV │ ├── Environment-1.png │ ├── Environment-10.png │ ├── Environment-11.png │ ├── Environment-12.png │ ├── Environment-13.png │ ├── Environment-14.png │ ├── Environment-15.png │ ├── Environment-2.png │ ├── Environment-3.png │ ├── Environment-4.png │ ├── Environment-5.png │ ├── Environment-6.png │ ├── Environment-7.png │ ├── Environment-8.png │ └── Environment-9.png ├── ES │ ├── es-0.png │ ├── es-1.png │ ├── es-2.png │ ├── es-3.png │ ├── es-4.png │ └── es-5.png ├── EligibilityTrace.png ├── EquivMDP.png ├── GPI.png ├── GreedyIsStupid.png ├── GreedyPIisBad.png ├── HC │ ├── hill_climbing-0.png │ ├── hill_climbing-1.png │ ├── hill_climbing-2.png │ ├── hill_climbing-3.png │ ├── hill_climbing-4.png │ ├── hill_climbing-5.png │ ├── hill_climbing-6.png │ ├── hill_climbing-7.png │ ├── hill_climbing-8.png │ └── hill_climbing-9.png ├── HER │ ├── HER-1.png │ ├── HER-2.png │ ├── HER-3.png │ ├── HER-4.png │ └── HER-5.png ├── HERres │ ├── HERres1.jpg │ ├── HERres2.jpg │ ├── HERres3.jpg │ ├── HERres4.jpg │ ├── HERres5.jpg │ ├── HERres6.jpg │ ├── HERres7.jpg │ ├── HERres8.jpg │ └── HERres9.jpg ├── HRL4.png ├── HardTrustRegion.png ├── ICM.png ├── IRL.png ├── KLdivProblem.png ├── KLgeometry.png ├── MC │ ├── MC-1.png │ ├── MC-10.png │ ├── MC-11.png │ ├── MC-12.png │ ├── MC-13.png │ ├── MC-14.png │ ├── MC-15.png │ ├── MC-2.png │ ├── MC-3.png │ ├── MC-4.png │ ├── MC-5.png │ ├── MC-6.png │ ├── MC-7.png │ ├── MC-8.png │ └── MC-9.png ├── MCTS.png ├── MCTS │ ├── MCTS-0.png │ ├── MCTS-1.png │ ├── MCTS-10.png │ ├── MCTS-11.png │ ├── MCTS-12.png │ ├── MCTS-13.png │ ├── MCTS-14.png │ ├── MCTS-15.png │ ├── MCTS-16.png │ ├── MCTS-17.png │ ├── MCTS-18.png │ ├── MCTS-19.png │ ├── MCTS-2.png │ ├── MCTS-20.png │ ├── MCTS-21.png │ ├── MCTS-22.png │ ├── MCTS-23.png │ ├── MCTS-3.png │ ├── MCTS-4.png │ ├── MCTS-5.png │ ├── MCTS-6.png │ ├── MCTS-7.png │ ├── MCTS-8.png │ └── MCTS-9.png ├── MC_backup.png ├── MDP │ ├── MDP-1.png │ ├── MDP-2.png │ ├── MDP-3.png │ ├── MDP-4.png │ ├── MDP-5.png │ └── MDP-6.png ├── MM.png ├── MS_ES │ ├── ms_es-0.png │ ├── ms_es-1.png │ ├── ms_es-2.png │ ├── ms_es-3.png │ ├── ms_es-4.png │ ├── ms_es-5.png │ ├── ms_es-6.png │ ├── ms_es-7.png │ └── ms_es-8.png ├── MarioStupid.png ├── MarioVAE.png ├── MuZero.png ├── MultiStepBad.png ├── MultiStepErrors3.png ├── MultiStepErrors4.png ├── MultiTask1.png ├── MultiTask3.png ├── Multimodal1.png ├── Multimodal2.png ├── NoisyNets1.png ├── NoisyNets2.png ├── OptDefEq.png ├── OptimalQ.png ├── OverestimationIssue.png ├── PE │ ├── policyeval0.png │ ├── policyeval1.png │ ├── policyeval2.png │ ├── policyeval3.png │ ├── policyeval4.png │ ├── policyeval5.png │ ├── policyeval6.png │ ├── policyeval7.png │ ├── policyeval8.png │ └── policyeval9.png ├── PER.png ├── PGisPI.png ├── PI │ ├── policyiter0.png │ ├── policyiter1.png │ ├── policyiter2.png │ └── policyiter3.png ├── PI_basic.png ├── PPOpipeline2.png ├── Planning.png ├── PlanningNonoptimal.png ├── PrioritizedIssue.png ├── Projecting.png ├── QisBad.png ├── Qnetwork1.png ├── Qnetwork2.png ├── Quantile.png ├── R2D2.png ├── RL.png ├── RND.png ├── SA │ ├── simulated_annealing-0.png │ ├── simulated_annealing-1.png │ ├── simulated_annealing-2.png │ ├── simulated_annealing-3.png │ ├── simulated_annealing-4.png │ ├── simulated_annealing-5.png │ ├── simulated_annealing-6.png │ ├── simulated_annealing-7.png │ ├── simulated_annealing-8.png │ └── simulated_annealing-9.png ├── SafeRL1.png ├── SafeRL2.png ├── SampleEfficiency.png ├── Score.png ├── Scrat_iceage.png ├── TD3_overestimation.png ├── TD_backup.png ├── TRPOpipeline2.png ├── TS.png ├── Traces.png ├── TrajIntersect.png ├── Tree.png ├── TrustRegion1.png ├── TrustRegion2.png ├── TrustRegion3.png ├── UCB.png ├── VI │ ├── valueiter0.png │ ├── valueiter1.png │ ├── valueiter2.png │ ├── valueiter3.png │ ├── valueiter4.png │ ├── valueiter5.png │ ├── valueiter6.png │ ├── valueiter7.png │ ├── valueiter8.png │ └── valueiter9.png ├── VI_basic.png ├── Value.png ├── ValueTrustRegion1.png ├── ValueTrustRegion2.png ├── ValueTrustRegion3.png ├── Wasserstein.png ├── WassersteinSamples.png ├── Zfunction.png ├── agentenv.jpeg ├── agentenv.png ├── backward_view.png ├── bandits.png ├── behavioralclonning.png ├── c51.png ├── cartpole.png ├── correlated.jpg ├── curiositydoors.png ├── decorrelated.jpg ├── distributionalVIissue.png ├── doors.jpg ├── eSoftBellman.png ├── evolution_strategy.png ├── exploration.png ├── forward_view.png ├── hierarchy.png ├── inversemodel.png ├── iqn.png ├── maze.png ├── neat.png ├── neatmutation.png ├── noisytv.png ├── novelty4.png ├── noveltydoors.png ├── nut.png ├── nut_cartoon.png ├── optimalZissue.png ├── ornstein_uhlenbeck_noise.png ├── puzzle.png ├── qrdqn.png ├── quantileloss.png ├── quantileprojection.png ├── randomsearch.png ├── reach.jpg ├── reach.pdf ├── reach.png ├── realsim.png ├── robot.png ├── rubik.png ├── scrat.png ├── sepheads.png ├── spaceinvaders.png ├── targetNetwork.png ├── td_lambda.png ├── topologymutation.png ├── traj.png ├── traj_rew.png ├── twopaths.png ├── virtualgoal.png └── world_models.jpeg ├── main.tex ├── sources.tex └── style.sty /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | src/Appendix/Tickets2019msu.tex 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/README.md -------------------------------------------------------------------------------- /RL_Theory_Book.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/RL_Theory_Book.pdf -------------------------------------------------------------------------------- /src/1.Setup/1. Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/1.Setup/1. Chapter.tex -------------------------------------------------------------------------------- /src/1.Setup/1.0.Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/1.Setup/1.0.Chapter.tex -------------------------------------------------------------------------------- /src/1.Setup/1.1.MDP.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/1.Setup/1.1.MDP.tex -------------------------------------------------------------------------------- /src/1.Setup/1.2.Algorithms.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/1.Setup/1.2.Algorithms.tex -------------------------------------------------------------------------------- /src/2.MetaHeuristics/2. Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/2.MetaHeuristics/2. Chapter.tex -------------------------------------------------------------------------------- /src/2.MetaHeuristics/2.0.Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/2.MetaHeuristics/2.0.Chapter.tex -------------------------------------------------------------------------------- /src/2.MetaHeuristics/2.1.Baselines.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/2.MetaHeuristics/2.1.Baselines.tex -------------------------------------------------------------------------------- /src/2.MetaHeuristics/2.2.EvolutionStrategies.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/2.MetaHeuristics/2.2.EvolutionStrategies.tex -------------------------------------------------------------------------------- /src/3.ClassicTheory/3. Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/3.ClassicTheory/3. Chapter.tex -------------------------------------------------------------------------------- /src/3.ClassicTheory/3.0.Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/3.ClassicTheory/3.0.Chapter.tex -------------------------------------------------------------------------------- /src/3.ClassicTheory/3.1.ValueFunctions.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/3.ClassicTheory/3.1.ValueFunctions.tex -------------------------------------------------------------------------------- /src/3.ClassicTheory/3.2.PolicyImprovement.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/3.ClassicTheory/3.2.PolicyImprovement.tex -------------------------------------------------------------------------------- /src/3.ClassicTheory/3.3.DynamicProgramming.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/3.ClassicTheory/3.3.DynamicProgramming.tex -------------------------------------------------------------------------------- /src/3.ClassicTheory/3.4.TabularAlgorithms.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/3.ClassicTheory/3.4.TabularAlgorithms.tex -------------------------------------------------------------------------------- /src/3.ClassicTheory/3.5.BiasVarianceTradeoff.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/3.ClassicTheory/3.5.BiasVarianceTradeoff.tex -------------------------------------------------------------------------------- /src/4.ValueBased/4. Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/4.ValueBased/4. Chapter.tex -------------------------------------------------------------------------------- /src/4.ValueBased/4.0.Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/4.ValueBased/4.0.Chapter.tex -------------------------------------------------------------------------------- /src/4.ValueBased/4.1.DQN.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/4.ValueBased/4.1.DQN.tex -------------------------------------------------------------------------------- /src/4.ValueBased/4.2.Modifications.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/4.ValueBased/4.2.Modifications.tex -------------------------------------------------------------------------------- /src/4.ValueBased/4.3.Distributional.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/4.ValueBased/4.3.Distributional.tex -------------------------------------------------------------------------------- /src/5.PolicyGradient/5. Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/5.PolicyGradient/5. Chapter.tex -------------------------------------------------------------------------------- /src/5.PolicyGradient/5.0.Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/5.PolicyGradient/5.0.Chapter.tex -------------------------------------------------------------------------------- /src/5.PolicyGradient/5.1.PolicyGradientTheorem.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/5.PolicyGradient/5.1.PolicyGradientTheorem.tex -------------------------------------------------------------------------------- /src/5.PolicyGradient/5.2.ActorCritic.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/5.PolicyGradient/5.2.ActorCritic.tex -------------------------------------------------------------------------------- /src/5.PolicyGradient/5.3.TRPO.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/5.PolicyGradient/5.3.TRPO.tex -------------------------------------------------------------------------------- /src/6.ContinuousControl/6. Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/6.ContinuousControl/6. Chapter.tex -------------------------------------------------------------------------------- /src/6.ContinuousControl/6.0.Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/6.ContinuousControl/6.0.Chapter.tex -------------------------------------------------------------------------------- /src/6.ContinuousControl/6.1.DDPG.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/6.ContinuousControl/6.1.DDPG.tex -------------------------------------------------------------------------------- /src/6.ContinuousControl/6.2.SAC.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/6.ContinuousControl/6.2.SAC.tex -------------------------------------------------------------------------------- /src/7.ModelBased/7. Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/7.ModelBased/7. Chapter.tex -------------------------------------------------------------------------------- /src/7.ModelBased/7.0.Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/7.ModelBased/7.0.Chapter.tex -------------------------------------------------------------------------------- /src/7.ModelBased/7.1.Bandits.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/7.ModelBased/7.1.Bandits.tex -------------------------------------------------------------------------------- /src/7.ModelBased/7.2.WorldModels.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/7.ModelBased/7.2.WorldModels.tex -------------------------------------------------------------------------------- /src/7.ModelBased/7.3.MCTS.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/7.ModelBased/7.3.MCTS.tex -------------------------------------------------------------------------------- /src/7.ModelBased/7.4.LQR.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/7.ModelBased/7.4.LQR.tex -------------------------------------------------------------------------------- /src/8.NextStage/8. Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/8.NextStage/8. Chapter.tex -------------------------------------------------------------------------------- /src/8.NextStage/8.0.Chapter.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/8.NextStage/8.0.Chapter.tex -------------------------------------------------------------------------------- /src/8.NextStage/8.1.ImitationLearning.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/8.NextStage/8.1.ImitationLearning.tex -------------------------------------------------------------------------------- /src/8.NextStage/8.2.IntrinsicMotivation.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/8.NextStage/8.2.IntrinsicMotivation.tex -------------------------------------------------------------------------------- /src/8.NextStage/8.3.Multitask.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/8.NextStage/8.3.Multitask.tex -------------------------------------------------------------------------------- /src/8.NextStage/8.4.Hierarchical.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/8.NextStage/8.4.Hierarchical.tex -------------------------------------------------------------------------------- /src/8.NextStage/8.5.POMDP.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/8.NextStage/8.5.POMDP.tex -------------------------------------------------------------------------------- /src/8.NextStage/8.6.MultiAgent.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/8.NextStage/8.6.MultiAgent.tex -------------------------------------------------------------------------------- /src/Appendix/NaturalGradient.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Appendix/NaturalGradient.tex -------------------------------------------------------------------------------- /src/Appendix/QLearningConvergence.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Appendix/QLearningConvergence.tex -------------------------------------------------------------------------------- /src/DRL.bib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/DRL.bib -------------------------------------------------------------------------------- /src/Images/ARP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ARP.png -------------------------------------------------------------------------------- /src/Images/AgentPOV.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/AgentPOV.png -------------------------------------------------------------------------------- /src/Images/Atari.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Atari.png -------------------------------------------------------------------------------- /src/Images/Baseline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Baseline.png -------------------------------------------------------------------------------- /src/Images/BaselineIntuition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/BaselineIntuition.png -------------------------------------------------------------------------------- /src/Images/BeliefMDP/BeliefMDP-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/BeliefMDP/BeliefMDP-1.png -------------------------------------------------------------------------------- /src/Images/BeliefMDP/BeliefMDP-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/BeliefMDP/BeliefMDP-2.png -------------------------------------------------------------------------------- /src/Images/BeliefMDP/BeliefMDP-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/BeliefMDP/BeliefMDP-3.png -------------------------------------------------------------------------------- /src/Images/BeliefMDP/BeliefMDP-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/BeliefMDP/BeliefMDP-4.png -------------------------------------------------------------------------------- /src/Images/BeliefMDP/BeliefMDP-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/BeliefMDP/BeliefMDP-5.png -------------------------------------------------------------------------------- /src/Images/Bellman.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Bellman.png -------------------------------------------------------------------------------- /src/Images/BellmanPrinciple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/BellmanPrinciple.png -------------------------------------------------------------------------------- /src/Images/CEM/cem-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CEM/cem-0.png -------------------------------------------------------------------------------- /src/Images/CEM/cem-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CEM/cem-1.png -------------------------------------------------------------------------------- /src/Images/CEM/cem-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CEM/cem-2.png -------------------------------------------------------------------------------- /src/Images/CEM/cem-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CEM/cem-3.png -------------------------------------------------------------------------------- /src/Images/CEM/cem-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CEM/cem-4.png -------------------------------------------------------------------------------- /src/Images/CEM/cem-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CEM/cem-5.png -------------------------------------------------------------------------------- /src/Images/CEM/cem-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CEM/cem-6.png -------------------------------------------------------------------------------- /src/Images/CEM/cem-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CEM/cem-7.png -------------------------------------------------------------------------------- /src/Images/CEM/cem-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CEM/cem-8.png -------------------------------------------------------------------------------- /src/Images/CEM/cem-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CEM/cem-9.png -------------------------------------------------------------------------------- /src/Images/CMA_ES/cma_es-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CMA_ES/cma_es-0.png -------------------------------------------------------------------------------- /src/Images/CMA_ES/cma_es-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CMA_ES/cma_es-1.png -------------------------------------------------------------------------------- /src/Images/CMA_ES/cma_es-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CMA_ES/cma_es-2.png -------------------------------------------------------------------------------- /src/Images/CMA_ES/cma_es-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CMA_ES/cma_es-3.png -------------------------------------------------------------------------------- /src/Images/CMA_ES/cma_es-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CMA_ES/cma_es-4.png -------------------------------------------------------------------------------- /src/Images/CMA_ES/cma_es-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CMA_ES/cma_es-5.png -------------------------------------------------------------------------------- /src/Images/CMA_ES/cma_es-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CMA_ES/cma_es-6.png -------------------------------------------------------------------------------- /src/Images/CMA_ES/cma_es-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CMA_ES/cma_es-7.png -------------------------------------------------------------------------------- /src/Images/CMA_ES/cma_es-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CMA_ES/cma_es-8.png -------------------------------------------------------------------------------- /src/Images/CMA_ES/cma_es-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CMA_ES/cma_es-9.png -------------------------------------------------------------------------------- /src/Images/CatAndFish.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/CatAndFish.png -------------------------------------------------------------------------------- /src/Images/Categorical.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Categorical.png -------------------------------------------------------------------------------- /src/Images/Communication.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Communication.png -------------------------------------------------------------------------------- /src/Images/ContractionMapping.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ContractionMapping.png -------------------------------------------------------------------------------- /src/Images/DDP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/DDP.png -------------------------------------------------------------------------------- /src/Images/DP_backup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/DP_backup.png -------------------------------------------------------------------------------- /src/Images/Dancer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Dancer.png -------------------------------------------------------------------------------- /src/Images/DistributionalBellmanExample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/DistributionalBellmanExample.png -------------------------------------------------------------------------------- /src/Images/DistributionalExample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/DistributionalExample.png -------------------------------------------------------------------------------- /src/Images/DistributionalRL1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/DistributionalRL1.png -------------------------------------------------------------------------------- /src/Images/DistributionalRL2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/DistributionalRL2.png -------------------------------------------------------------------------------- /src/Images/DistributionalTable.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/DistributionalTable.png -------------------------------------------------------------------------------- /src/Images/DuelingDQN.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/DuelingDQN.png -------------------------------------------------------------------------------- /src/Images/EL_ES/el_es-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/EL_ES/el_es-0.png -------------------------------------------------------------------------------- /src/Images/EL_ES/el_es-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/EL_ES/el_es-1.png -------------------------------------------------------------------------------- /src/Images/EL_ES/el_es-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/EL_ES/el_es-2.png -------------------------------------------------------------------------------- /src/Images/EL_ES/el_es-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/EL_ES/el_es-3.png -------------------------------------------------------------------------------- /src/Images/EL_ES/el_es-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/EL_ES/el_es-4.png -------------------------------------------------------------------------------- /src/Images/EL_ES/el_es-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/EL_ES/el_es-5.png -------------------------------------------------------------------------------- /src/Images/EL_ES/el_es-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/EL_ES/el_es-6.png -------------------------------------------------------------------------------- /src/Images/EL_ES/el_es-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/EL_ES/el_es-7.png -------------------------------------------------------------------------------- /src/Images/EL_ES/el_es-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/EL_ES/el_es-8.png -------------------------------------------------------------------------------- /src/Images/EL_ES/el_es-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/EL_ES/el_es-9.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-1.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-10.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-11.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-12.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-13.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-14.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-15.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-2.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-3.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-4.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-5.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-6.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-7.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-8.png -------------------------------------------------------------------------------- /src/Images/ENV/Environment-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ENV/Environment-9.png -------------------------------------------------------------------------------- /src/Images/ES/es-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ES/es-0.png -------------------------------------------------------------------------------- /src/Images/ES/es-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ES/es-1.png -------------------------------------------------------------------------------- /src/Images/ES/es-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ES/es-2.png -------------------------------------------------------------------------------- /src/Images/ES/es-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ES/es-3.png -------------------------------------------------------------------------------- /src/Images/ES/es-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ES/es-4.png -------------------------------------------------------------------------------- /src/Images/ES/es-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ES/es-5.png -------------------------------------------------------------------------------- /src/Images/EligibilityTrace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/EligibilityTrace.png -------------------------------------------------------------------------------- /src/Images/EquivMDP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/EquivMDP.png -------------------------------------------------------------------------------- /src/Images/GPI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/GPI.png -------------------------------------------------------------------------------- /src/Images/GreedyIsStupid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/GreedyIsStupid.png -------------------------------------------------------------------------------- /src/Images/GreedyPIisBad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/GreedyPIisBad.png -------------------------------------------------------------------------------- /src/Images/HC/hill_climbing-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HC/hill_climbing-0.png -------------------------------------------------------------------------------- /src/Images/HC/hill_climbing-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HC/hill_climbing-1.png -------------------------------------------------------------------------------- /src/Images/HC/hill_climbing-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HC/hill_climbing-2.png -------------------------------------------------------------------------------- /src/Images/HC/hill_climbing-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HC/hill_climbing-3.png -------------------------------------------------------------------------------- /src/Images/HC/hill_climbing-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HC/hill_climbing-4.png -------------------------------------------------------------------------------- /src/Images/HC/hill_climbing-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HC/hill_climbing-5.png -------------------------------------------------------------------------------- /src/Images/HC/hill_climbing-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HC/hill_climbing-6.png -------------------------------------------------------------------------------- /src/Images/HC/hill_climbing-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HC/hill_climbing-7.png -------------------------------------------------------------------------------- /src/Images/HC/hill_climbing-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HC/hill_climbing-8.png -------------------------------------------------------------------------------- /src/Images/HC/hill_climbing-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HC/hill_climbing-9.png -------------------------------------------------------------------------------- /src/Images/HER/HER-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HER/HER-1.png -------------------------------------------------------------------------------- /src/Images/HER/HER-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HER/HER-2.png -------------------------------------------------------------------------------- /src/Images/HER/HER-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HER/HER-3.png -------------------------------------------------------------------------------- /src/Images/HER/HER-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HER/HER-4.png -------------------------------------------------------------------------------- /src/Images/HER/HER-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HER/HER-5.png -------------------------------------------------------------------------------- /src/Images/HERres/HERres1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HERres/HERres1.jpg -------------------------------------------------------------------------------- /src/Images/HERres/HERres2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HERres/HERres2.jpg -------------------------------------------------------------------------------- /src/Images/HERres/HERres3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HERres/HERres3.jpg -------------------------------------------------------------------------------- /src/Images/HERres/HERres4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HERres/HERres4.jpg -------------------------------------------------------------------------------- /src/Images/HERres/HERres5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HERres/HERres5.jpg -------------------------------------------------------------------------------- /src/Images/HERres/HERres6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HERres/HERres6.jpg -------------------------------------------------------------------------------- /src/Images/HERres/HERres7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HERres/HERres7.jpg -------------------------------------------------------------------------------- /src/Images/HERres/HERres8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HERres/HERres8.jpg -------------------------------------------------------------------------------- /src/Images/HERres/HERres9.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HERres/HERres9.jpg -------------------------------------------------------------------------------- /src/Images/HRL4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HRL4.png -------------------------------------------------------------------------------- /src/Images/HardTrustRegion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/HardTrustRegion.png -------------------------------------------------------------------------------- /src/Images/ICM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ICM.png -------------------------------------------------------------------------------- /src/Images/IRL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/IRL.png -------------------------------------------------------------------------------- /src/Images/KLdivProblem.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/KLdivProblem.png -------------------------------------------------------------------------------- /src/Images/KLgeometry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/KLgeometry.png -------------------------------------------------------------------------------- /src/Images/MC/MC-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-1.png -------------------------------------------------------------------------------- /src/Images/MC/MC-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-10.png -------------------------------------------------------------------------------- /src/Images/MC/MC-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-11.png -------------------------------------------------------------------------------- /src/Images/MC/MC-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-12.png -------------------------------------------------------------------------------- /src/Images/MC/MC-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-13.png -------------------------------------------------------------------------------- /src/Images/MC/MC-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-14.png -------------------------------------------------------------------------------- /src/Images/MC/MC-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-15.png -------------------------------------------------------------------------------- /src/Images/MC/MC-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-2.png -------------------------------------------------------------------------------- /src/Images/MC/MC-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-3.png -------------------------------------------------------------------------------- /src/Images/MC/MC-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-4.png -------------------------------------------------------------------------------- /src/Images/MC/MC-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-5.png -------------------------------------------------------------------------------- /src/Images/MC/MC-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-6.png -------------------------------------------------------------------------------- /src/Images/MC/MC-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-7.png -------------------------------------------------------------------------------- /src/Images/MC/MC-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-8.png -------------------------------------------------------------------------------- /src/Images/MC/MC-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC/MC-9.png -------------------------------------------------------------------------------- /src/Images/MCTS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-0.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-1.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-10.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-10.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-11.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-11.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-12.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-12.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-13.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-13.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-14.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-14.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-15.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-15.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-16.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-17.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-17.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-18.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-18.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-19.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-2.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-20.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-20.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-21.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-22.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-23.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-23.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-3.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-4.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-5.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-6.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-7.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-8.png -------------------------------------------------------------------------------- /src/Images/MCTS/MCTS-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MCTS/MCTS-9.png -------------------------------------------------------------------------------- /src/Images/MC_backup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MC_backup.png -------------------------------------------------------------------------------- /src/Images/MDP/MDP-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MDP/MDP-1.png -------------------------------------------------------------------------------- /src/Images/MDP/MDP-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MDP/MDP-2.png -------------------------------------------------------------------------------- /src/Images/MDP/MDP-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MDP/MDP-3.png -------------------------------------------------------------------------------- /src/Images/MDP/MDP-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MDP/MDP-4.png -------------------------------------------------------------------------------- /src/Images/MDP/MDP-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MDP/MDP-5.png -------------------------------------------------------------------------------- /src/Images/MDP/MDP-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MDP/MDP-6.png -------------------------------------------------------------------------------- /src/Images/MM.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MM.png -------------------------------------------------------------------------------- /src/Images/MS_ES/ms_es-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MS_ES/ms_es-0.png -------------------------------------------------------------------------------- /src/Images/MS_ES/ms_es-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MS_ES/ms_es-1.png -------------------------------------------------------------------------------- /src/Images/MS_ES/ms_es-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MS_ES/ms_es-2.png -------------------------------------------------------------------------------- /src/Images/MS_ES/ms_es-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MS_ES/ms_es-3.png -------------------------------------------------------------------------------- /src/Images/MS_ES/ms_es-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MS_ES/ms_es-4.png -------------------------------------------------------------------------------- /src/Images/MS_ES/ms_es-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MS_ES/ms_es-5.png -------------------------------------------------------------------------------- /src/Images/MS_ES/ms_es-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MS_ES/ms_es-6.png -------------------------------------------------------------------------------- /src/Images/MS_ES/ms_es-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MS_ES/ms_es-7.png -------------------------------------------------------------------------------- /src/Images/MS_ES/ms_es-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MS_ES/ms_es-8.png -------------------------------------------------------------------------------- /src/Images/MarioStupid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MarioStupid.png -------------------------------------------------------------------------------- /src/Images/MarioVAE.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MarioVAE.png -------------------------------------------------------------------------------- /src/Images/MuZero.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MuZero.png -------------------------------------------------------------------------------- /src/Images/MultiStepBad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MultiStepBad.png -------------------------------------------------------------------------------- /src/Images/MultiStepErrors3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MultiStepErrors3.png -------------------------------------------------------------------------------- /src/Images/MultiStepErrors4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MultiStepErrors4.png -------------------------------------------------------------------------------- /src/Images/MultiTask1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MultiTask1.png -------------------------------------------------------------------------------- /src/Images/MultiTask3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/MultiTask3.png -------------------------------------------------------------------------------- /src/Images/Multimodal1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Multimodal1.png -------------------------------------------------------------------------------- /src/Images/Multimodal2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Multimodal2.png -------------------------------------------------------------------------------- /src/Images/NoisyNets1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/NoisyNets1.png -------------------------------------------------------------------------------- /src/Images/NoisyNets2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/NoisyNets2.png -------------------------------------------------------------------------------- /src/Images/OptDefEq.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/OptDefEq.png -------------------------------------------------------------------------------- /src/Images/OptimalQ.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/OptimalQ.png -------------------------------------------------------------------------------- /src/Images/OverestimationIssue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/OverestimationIssue.png -------------------------------------------------------------------------------- /src/Images/PE/policyeval0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PE/policyeval0.png -------------------------------------------------------------------------------- /src/Images/PE/policyeval1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PE/policyeval1.png -------------------------------------------------------------------------------- /src/Images/PE/policyeval2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PE/policyeval2.png -------------------------------------------------------------------------------- /src/Images/PE/policyeval3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PE/policyeval3.png -------------------------------------------------------------------------------- /src/Images/PE/policyeval4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PE/policyeval4.png -------------------------------------------------------------------------------- /src/Images/PE/policyeval5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PE/policyeval5.png -------------------------------------------------------------------------------- /src/Images/PE/policyeval6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PE/policyeval6.png -------------------------------------------------------------------------------- /src/Images/PE/policyeval7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PE/policyeval7.png -------------------------------------------------------------------------------- /src/Images/PE/policyeval8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PE/policyeval8.png -------------------------------------------------------------------------------- /src/Images/PE/policyeval9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PE/policyeval9.png -------------------------------------------------------------------------------- /src/Images/PER.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PER.png -------------------------------------------------------------------------------- /src/Images/PGisPI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PGisPI.png -------------------------------------------------------------------------------- /src/Images/PI/policyiter0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PI/policyiter0.png -------------------------------------------------------------------------------- /src/Images/PI/policyiter1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PI/policyiter1.png -------------------------------------------------------------------------------- /src/Images/PI/policyiter2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PI/policyiter2.png -------------------------------------------------------------------------------- /src/Images/PI/policyiter3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PI/policyiter3.png -------------------------------------------------------------------------------- /src/Images/PI_basic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PI_basic.png -------------------------------------------------------------------------------- /src/Images/PPOpipeline2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PPOpipeline2.png -------------------------------------------------------------------------------- /src/Images/Planning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Planning.png -------------------------------------------------------------------------------- /src/Images/PlanningNonoptimal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PlanningNonoptimal.png -------------------------------------------------------------------------------- /src/Images/PrioritizedIssue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/PrioritizedIssue.png -------------------------------------------------------------------------------- /src/Images/Projecting.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Projecting.png -------------------------------------------------------------------------------- /src/Images/QisBad.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/QisBad.png -------------------------------------------------------------------------------- /src/Images/Qnetwork1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Qnetwork1.png -------------------------------------------------------------------------------- /src/Images/Qnetwork2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Qnetwork2.png -------------------------------------------------------------------------------- /src/Images/Quantile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Quantile.png -------------------------------------------------------------------------------- /src/Images/R2D2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/R2D2.png -------------------------------------------------------------------------------- /src/Images/RL.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/RL.png -------------------------------------------------------------------------------- /src/Images/RND.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/RND.png -------------------------------------------------------------------------------- /src/Images/SA/simulated_annealing-0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/SA/simulated_annealing-0.png -------------------------------------------------------------------------------- /src/Images/SA/simulated_annealing-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/SA/simulated_annealing-1.png -------------------------------------------------------------------------------- /src/Images/SA/simulated_annealing-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/SA/simulated_annealing-2.png -------------------------------------------------------------------------------- /src/Images/SA/simulated_annealing-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/SA/simulated_annealing-3.png -------------------------------------------------------------------------------- /src/Images/SA/simulated_annealing-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/SA/simulated_annealing-4.png -------------------------------------------------------------------------------- /src/Images/SA/simulated_annealing-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/SA/simulated_annealing-5.png -------------------------------------------------------------------------------- /src/Images/SA/simulated_annealing-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/SA/simulated_annealing-6.png -------------------------------------------------------------------------------- /src/Images/SA/simulated_annealing-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/SA/simulated_annealing-7.png -------------------------------------------------------------------------------- /src/Images/SA/simulated_annealing-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/SA/simulated_annealing-8.png -------------------------------------------------------------------------------- /src/Images/SA/simulated_annealing-9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/SA/simulated_annealing-9.png -------------------------------------------------------------------------------- /src/Images/SafeRL1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/SafeRL1.png -------------------------------------------------------------------------------- /src/Images/SafeRL2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/SafeRL2.png -------------------------------------------------------------------------------- /src/Images/SampleEfficiency.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/SampleEfficiency.png -------------------------------------------------------------------------------- /src/Images/Score.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Score.png -------------------------------------------------------------------------------- /src/Images/Scrat_iceage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Scrat_iceage.png -------------------------------------------------------------------------------- /src/Images/TD3_overestimation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/TD3_overestimation.png -------------------------------------------------------------------------------- /src/Images/TD_backup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/TD_backup.png -------------------------------------------------------------------------------- /src/Images/TRPOpipeline2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/TRPOpipeline2.png -------------------------------------------------------------------------------- /src/Images/TS.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/TS.png -------------------------------------------------------------------------------- /src/Images/Traces.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Traces.png -------------------------------------------------------------------------------- /src/Images/TrajIntersect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/TrajIntersect.png -------------------------------------------------------------------------------- /src/Images/Tree.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Tree.png -------------------------------------------------------------------------------- /src/Images/TrustRegion1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/TrustRegion1.png -------------------------------------------------------------------------------- /src/Images/TrustRegion2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/TrustRegion2.png -------------------------------------------------------------------------------- /src/Images/TrustRegion3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/TrustRegion3.png -------------------------------------------------------------------------------- /src/Images/UCB.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/UCB.png -------------------------------------------------------------------------------- /src/Images/VI/valueiter0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/VI/valueiter0.png -------------------------------------------------------------------------------- /src/Images/VI/valueiter1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/VI/valueiter1.png -------------------------------------------------------------------------------- /src/Images/VI/valueiter2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/VI/valueiter2.png -------------------------------------------------------------------------------- /src/Images/VI/valueiter3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/VI/valueiter3.png -------------------------------------------------------------------------------- /src/Images/VI/valueiter4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/VI/valueiter4.png -------------------------------------------------------------------------------- /src/Images/VI/valueiter5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/VI/valueiter5.png -------------------------------------------------------------------------------- /src/Images/VI/valueiter6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/VI/valueiter6.png -------------------------------------------------------------------------------- /src/Images/VI/valueiter7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/VI/valueiter7.png -------------------------------------------------------------------------------- /src/Images/VI/valueiter8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/VI/valueiter8.png -------------------------------------------------------------------------------- /src/Images/VI/valueiter9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/VI/valueiter9.png -------------------------------------------------------------------------------- /src/Images/VI_basic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/VI_basic.png -------------------------------------------------------------------------------- /src/Images/Value.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Value.png -------------------------------------------------------------------------------- /src/Images/ValueTrustRegion1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ValueTrustRegion1.png -------------------------------------------------------------------------------- /src/Images/ValueTrustRegion2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ValueTrustRegion2.png -------------------------------------------------------------------------------- /src/Images/ValueTrustRegion3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ValueTrustRegion3.png -------------------------------------------------------------------------------- /src/Images/Wasserstein.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Wasserstein.png -------------------------------------------------------------------------------- /src/Images/WassersteinSamples.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/WassersteinSamples.png -------------------------------------------------------------------------------- /src/Images/Zfunction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/Zfunction.png -------------------------------------------------------------------------------- /src/Images/agentenv.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/agentenv.jpeg -------------------------------------------------------------------------------- /src/Images/agentenv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/agentenv.png -------------------------------------------------------------------------------- /src/Images/backward_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/backward_view.png -------------------------------------------------------------------------------- /src/Images/bandits.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/bandits.png -------------------------------------------------------------------------------- /src/Images/behavioralclonning.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/behavioralclonning.png -------------------------------------------------------------------------------- /src/Images/c51.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/c51.png -------------------------------------------------------------------------------- /src/Images/cartpole.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/cartpole.png -------------------------------------------------------------------------------- /src/Images/correlated.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/correlated.jpg -------------------------------------------------------------------------------- /src/Images/curiositydoors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/curiositydoors.png -------------------------------------------------------------------------------- /src/Images/decorrelated.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/decorrelated.jpg -------------------------------------------------------------------------------- /src/Images/distributionalVIissue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/distributionalVIissue.png -------------------------------------------------------------------------------- /src/Images/doors.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/doors.jpg -------------------------------------------------------------------------------- /src/Images/eSoftBellman.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/eSoftBellman.png -------------------------------------------------------------------------------- /src/Images/evolution_strategy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/evolution_strategy.png -------------------------------------------------------------------------------- /src/Images/exploration.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/exploration.png -------------------------------------------------------------------------------- /src/Images/forward_view.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/forward_view.png -------------------------------------------------------------------------------- /src/Images/hierarchy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/hierarchy.png -------------------------------------------------------------------------------- /src/Images/inversemodel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/inversemodel.png -------------------------------------------------------------------------------- /src/Images/iqn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/iqn.png -------------------------------------------------------------------------------- /src/Images/maze.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/maze.png -------------------------------------------------------------------------------- /src/Images/neat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/neat.png -------------------------------------------------------------------------------- /src/Images/neatmutation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/neatmutation.png -------------------------------------------------------------------------------- /src/Images/noisytv.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/noisytv.png -------------------------------------------------------------------------------- /src/Images/novelty4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/novelty4.png -------------------------------------------------------------------------------- /src/Images/noveltydoors.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/noveltydoors.png -------------------------------------------------------------------------------- /src/Images/nut.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/nut.png -------------------------------------------------------------------------------- /src/Images/nut_cartoon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/nut_cartoon.png -------------------------------------------------------------------------------- /src/Images/optimalZissue.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/optimalZissue.png -------------------------------------------------------------------------------- /src/Images/ornstein_uhlenbeck_noise.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/ornstein_uhlenbeck_noise.png -------------------------------------------------------------------------------- /src/Images/puzzle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/puzzle.png -------------------------------------------------------------------------------- /src/Images/qrdqn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/qrdqn.png -------------------------------------------------------------------------------- /src/Images/quantileloss.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/quantileloss.png -------------------------------------------------------------------------------- /src/Images/quantileprojection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/quantileprojection.png -------------------------------------------------------------------------------- /src/Images/randomsearch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/randomsearch.png -------------------------------------------------------------------------------- /src/Images/reach.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/reach.jpg -------------------------------------------------------------------------------- /src/Images/reach.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/reach.pdf -------------------------------------------------------------------------------- /src/Images/reach.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/reach.png -------------------------------------------------------------------------------- /src/Images/realsim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/realsim.png -------------------------------------------------------------------------------- /src/Images/robot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/robot.png -------------------------------------------------------------------------------- /src/Images/rubik.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/rubik.png -------------------------------------------------------------------------------- /src/Images/scrat.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/scrat.png -------------------------------------------------------------------------------- /src/Images/sepheads.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/sepheads.png -------------------------------------------------------------------------------- /src/Images/spaceinvaders.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/spaceinvaders.png -------------------------------------------------------------------------------- /src/Images/targetNetwork.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/targetNetwork.png -------------------------------------------------------------------------------- /src/Images/td_lambda.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/td_lambda.png -------------------------------------------------------------------------------- /src/Images/topologymutation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/topologymutation.png -------------------------------------------------------------------------------- /src/Images/traj.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/traj.png -------------------------------------------------------------------------------- /src/Images/traj_rew.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/traj_rew.png -------------------------------------------------------------------------------- /src/Images/twopaths.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/twopaths.png -------------------------------------------------------------------------------- /src/Images/virtualgoal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/virtualgoal.png -------------------------------------------------------------------------------- /src/Images/world_models.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/Images/world_models.jpeg -------------------------------------------------------------------------------- /src/main.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/main.tex -------------------------------------------------------------------------------- /src/sources.tex: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/sources.tex -------------------------------------------------------------------------------- /src/style.sty: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FortsAndMills/RL-Theory-book/HEAD/src/style.sty --------------------------------------------------------------------------------