24 |
25 | Q-Learning with Eligibility Traces
26 | ----------------------------------
27 |
28 | ``` r
29 | env$resetEverything()
30 | #> [1] 30
31 | policy = makePolicy("epsilon.greedy", epsilon = 0.1)
32 | alg = makeAlgorithm("qlearning", lambda = 0.8, traces = "accumulate")
33 | agent = makeAgent(policy, "table", alg)
34 |
35 | res = interact(env, agent, n.episodes = 500L)
36 | ```
37 |
38 |
39 |
40 | Q-Learning with Experience replay
41 | ---------------------------------
42 |
43 | ``` r
44 | env$resetEverything()
45 | #> [1] 30
46 | policy = makePolicy("epsilon.greedy", epsilon = 0.1)
47 | mem = makeReplayMemory(size = 10L, batch.size = 10L)
48 | agent = makeAgent(policy, "table", "qlearning", experience.replay = mem)
49 |
50 | res = interact(env, agent, n.episodes = 500L)
51 | ```
52 |
53 |
54 |
55 | Q-Learning with neural network and experience replay
56 | ----------------------------------------------------
57 |
58 | ``` r
59 | env$resetEverything()
60 | #> [1] 30
61 | library(keras)
62 | model = keras_model_sequential() %>%
63 | layer_dense(units = env$n.actions, activation = "linear",
64 | input_shape = c(env$n.states), kernel_initializer = initializer_zeros(),
65 | use_bias = FALSE) %>%
66 | compile(loss = "mae", optimizer = optimizer_sgd(lr = 1))
67 | mem = makeReplayMemory(size = 2L, batch.size = 2L)
68 | val = makeValueFunction("neural.network", model = model)
69 | policy = makePolicy("epsilon.greedy", epsilon = 0.1)
70 | preprocess = function(x) to_categorical(x, num_classes = env$n.states)
71 | agent = makeAgent(policy, val, "qlearning",
72 | preprocess = preprocess, experience.replay = mem)
73 |
74 | res = interact(env, agent, n.episodes = 500L)
75 | ```
76 |
77 |
78 |
--------------------------------------------------------------------------------
/man/tilecoding.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/tiles.R
3 | \name{tiles}
4 | \alias{tiles}
5 | \alias{iht}
6 | \title{Tile Coding}
7 | \usage{
8 | tiles(iht, n.tilings, state, action = integer(0))
9 |
10 | iht(max.size)
11 | }
12 | \arguments{
13 | \item{iht}{[\code{IHT}] \cr A hash table created with \code{iht}.}
14 |
15 | \item{n.tilings}{[\code{integer(1)}] \cr Number of tilings.}
16 |
17 | \item{state}{[\code{vector(2)}] \cr A two-dimensional state observation.
18 | Make sure to scale the observation to unit variance before.}
19 |
20 | \item{action}{[\code{integer(1)}] \cr Optional: If supplied the action space
21 | will also be tiled. All distinct actions will result in different tile numbers.}
22 |
23 | \item{max.size}{[\code{integer(1)}] \cr Maximal size of hash table.}
24 | }
25 | \value{
26 | \code{iht} creates a hash table, which can then be passed on to \code{tiles}.
27 | \code{tiles} returns an integer vector of size \code{n.tilings} with the active tile numbers.
28 | }
29 | \description{
30 | Implementation of Sutton's tile coding software version 3.
31 | }
32 | \details{
33 | Tile coding is a way of representing the values of a vector of continuous variables as a large
34 | binary vector with few 1s and many 0s. The binary vector is not represented explicitly,
35 | but as a list of the components that are 1s. The main step is to partition, or tile,
36 | the continuous space multiple times and select one tile from each tiling, that corresponding
37 | the the vector's value. Each tile is converted to an element in the big binary vector,
38 | and the list of the tile (element) numbers is returned as the representation of the vector's value.
39 | Tile coding is recommended as a way of applying online learning methods to domains with continuous
40 | state or action variables. [copied from manual]
41 |
42 | See detailed manual on the web.
43 | In comparison to the Python implementation indices start with 1 instead of 0. The hash table is
44 | implemented as an environment, which is an attribute of an R6 class.
45 |
46 | Make sure that the size of the hash table is large enough, else an error will be triggered,
47 | when trying to assign a value to a full hash table.
48 | }
49 | \examples{
50 | # Create hash table
51 | hash = iht(1024)
52 |
53 | # Partition state space using 8 tilings
54 | tiles(hash, n.tilings = 8, state = c(3.6, 7.21))
55 | tiles(hash, n.tilings = 8, state = c(3.7, 7.21))
56 | tiles(hash, n.tilings = 8, state = c(4, 7))
57 | tiles(hash, n.tilings = 8, state = c(- 37.2, 7))
58 |
59 | }
60 | \references{
61 | Sutton and Barto (Book draft 2017): Reinforcement Learning: An Introduction
62 | }
63 |
--------------------------------------------------------------------------------
/session_info.txt:
--------------------------------------------------------------------------------
1 | - Session info ----------------------------------------------------------
2 | setting value
3 | version R version 3.4.3 (2017-11-30)
4 | os Windows 10 x64
5 | system x86_64, mingw32
6 | ui RTerm
7 | language (EN)
8 | collate English_Germany.1252
9 | tz Europe/Berlin
10 | date 2017-12-23
11 |
12 | - Packages --------------------------------------------------------------
13 | package * version date source
14 | assertthat 0.2.0 2017-04-11 CRAN (R 3.4.2)
15 | backports 1.1.1 2017-09-25 CRAN (R 3.4.1)
16 | cli 1.0.0 2017-12-22 Github (r-lib/cli@ab1c3aa)
17 | clisymbols 1.2.0 2017-05-21 CRAN (R 3.4.3)
18 | crayon 1.3.4 2017-09-16 CRAN (R 3.4.2)
19 | desc 1.1.1 2017-08-03 CRAN (R 3.4.2)
20 | devtools 1.13.3.9000 2017-12-22 Github (hadley/devtools@0bcfd6e)
21 | digest 0.6.13 2017-12-14 CRAN (R 3.4.3)
22 | evaluate 0.10.1 2017-06-24 CRAN (R 3.4.2)
23 | htmltools 0.3.6 2017-04-28 CRAN (R 3.4.2)
24 | knitr 1.17 2017-08-10 CRAN (R 3.4.2)
25 | magrittr 1.5 2014-11-22 CRAN (R 3.4.2)
26 | memoise 1.1.0 2017-04-21 CRAN (R 3.4.2)
27 | pkgbuild 0.0.0.9000 2017-12-22 Github (r-lib/pkgbuild@ce7f6d1)
28 | pkgload 0.0.0.9000 2017-12-22 Github (r-lib/pkgload@70eaef8)
29 | R6 2.2.2 2017-06-17 CRAN (R 3.4.2)
30 | Rcpp 0.12.13 2017-09-28 CRAN (R 3.4.2)
31 | rlang 0.1.4.9000 2017-12-22 Github (tidyverse/rlang@cc7587c)
32 | rmarkdown 1.8 2017-11-17 CRAN (R 3.4.2)
33 | rprojroot 1.3-1 2017-12-18 CRAN (R 3.4.3)
34 | sessioninfo 1.0.1.9000 2017-12-22 Github (r-lib/sessioninfo@c871d01)
35 | stringi 1.1.6 2017-11-17 CRAN (R 3.4.2)
36 | stringr 1.2.0 2017-02-18 CRAN (R 3.4.2)
37 | testthat 2.0.0 2017-12-13 CRAN (R 3.4.3)
38 | usethis 1.1.0.9000 2017-12-22 Github (r-lib/usethis@973bcab)
39 | withr 2.1.1 2017-12-19 CRAN (R 3.4.3)
40 | yaml 2.1.16 2017-12-12 CRAN (R 3.4.3)
41 |
--------------------------------------------------------------------------------
/docs/jquery.sticky-kit.min.js:
--------------------------------------------------------------------------------
1 | /*
2 | Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 | http://leafo.net
3 | */
4 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k));
5 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b(""))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q,
6 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),eYEAR: 2017 100 | COPYRIGHT HOLDER: Markus Dumke 101 | 102 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 103 | 104 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 105 | 106 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 107 |108 | 109 |