├── .Rbuildignore
├── .ackrc
├── .gitignore
├── .travis.yml
├── .travis.yml.new
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
    ├── agent_base.R
    ├── agent_ddqn2.R
    ├── agent_double_qn.R
    ├── agent_dqn.R
    ├── agent_fdqn.R
    ├── agent_pg.R
    ├── agent_pg_actor_critic.R
    ├── agent_pg_baseline.R
    ├── agent_pg_compact.R
    ├── agent_pg_ddpg.R
    ├── agent_table.R
    ├── conf.R
    ├── confDefault.R
    ├── environment_base.R
    ├── environment_gym.R
    ├── experiment.R
    ├── interaction_base.R
    ├── interaction_observer.R
    ├── logging.R
    ├── nnArsenal.R
    ├── nnArsenal_ddpg.R
    ├── obsolette.R
    ├── performance.R
    ├── policy.R
    ├── replaymem.R
    ├── replaymem_helpers.R
    ├── replaymem_png.R
    ├── surrogate_base.R
    ├── surrogate_nn.R
    ├── visualize.R
    └── zzz.R
├── README.Rmd
├── README.md
├── _pkgdown.yml
├── appveyor.yml
├── attr
    ├── arsenal_attr.R
    ├── customized_brain_mountainCar.Rmd
    ├── play_atari_games.Rmd
    └── repeated_experiment.Rmd
├── benchmark
    ├── bt_algorithms.R
    ├── bt_conf.R
    ├── bt_experiment.R
    ├── bt_problem.R
    ├── plotHelper.R
    ├── rl_h.R
    └── test_topic_demo.R
├── codecov.yml
├── cran-comments.md
├── cran_check.sh
├── docs
    ├── LICENSE-text.html
    ├── articles
    │   ├── custom_configuration.html
    │   ├── define_custom_environments.html
    │   ├── index.html
    │   ├── python_dependencies.html
    │   ├── repeated_experiment.html
    │   └── table_learning.html
    ├── authors.html
    ├── docsearch.css
    ├── docsearch.js
    ├── index.html
    ├── link.svg
    ├── pkgdown.css
    ├── pkgdown.js
    ├── pkgdown.yml
    └── reference
    │   ├── Agent.html
    │   ├── AgentArmed.html
    │   ├── Environment.html
    │   ├── checkPyDep.html
    │   ├── getDefaultConf.html
    │   ├── index.html
    │   ├── initAgent.html
    │   ├── installDep2SysVirtualEnv.html
    │   ├── installDepConda.html
    │   ├── listAvailAgent.html
    │   ├── listAvailConf.html
    │   ├── listGymEnvs.html
    │   ├── makeGymEnv.html
    │   ├── repExperiment.html
    │   ├── rlr_test_if_gym_works.html
    │   ├── rlr_test_if_keras_works.html
    │   ├── rlr_test_if_tensorflow_works.html
    │   └── showDefaultConf.html
├── inst
    ├── figures
    │   ├── ac.png
    │   ├── ac300.png
    │   ├── acrobat.pdf
    │   ├── dqn.png
    │   └── mplot-1.png
    └── repAtari200.R
├── paper
    ├── Makefile
    ├── figures
    │   ├── ac.png
    │   ├── ac300.png
    │   ├── acrobat.pdf
    │   ├── dqn.png
    │   └── mplot-1.png
    ├── latex.template
    ├── paper.bib
    └── paper.md
├── requirement.txt
├── rlR.Rproj
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test_environment.R
    │   ├── test_file_conf.R
    │   ├── test_file_nnArsenal.R
    │   ├── test_file_replay_mem.R
    │   ├── test_file_zzz.R
    │   ├── test_gym_basic.R
    │   ├── test_gym_ddpg.R
    │   ├── test_rep_experiment.R
    │   ├── test_topic_atari.R
    │   └── test_topic_cnn.R
└── vignettes
    ├── custom_configuration.Rmd
    ├── define_custom_environments.Rmd
    ├── python_dependencies.Rmd
    └── table_learning.Rmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^docs$
 2 | ^_pkgdown\.yml$
 3 | ^.*\.Rproj$
 4 | ^\.Rproj\.user$
 5 | ^README.Rmd
 6 | ^.travis.yml
 7 | ^_pkgdown.yml
 8 | ^docs
 9 | ^cran-comments\.md$
10 | ^benchmark
11 | ^appveyor.yml
12 | ^codecov\.yml$
13 | ^.ackrc
14 | ^attr
15 | ^requirement.txt
16 | ^paper$
17 | ^cran_check.sh
18 | 


--------------------------------------------------------------------------------
/.ackrc:
--------------------------------------------------------------------------------
1 | --ignore-dir=.Rproj.user
2 | --ignore-dir=docs
3 | --ignore-dir=man
4 | --ignore-dir=.Rhistory
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | man
2 | log
3 | logout
4 | .Rproj.user
5 | .Rhistory
6 | .RData
7 | .Ruserdata
8 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: r
 2 | sudo: required
 3 | cache: packages
 4 | matrix:
 5 |   include:
 6 |     - python:2.7
 7 |     - python:3.6
 8 | addons:
 9 |   apt:
10 |     update: true
11 |     packages:
12 |       - python2.7
13 |       - python-pip 
14 |       - python-dev 
15 |       - build-essential
16 |       - zlib1g-dev
17 | before_install:
18 |   - sudo pip install gym
19 |   - sudo pip install gym[atari]
20 |   - sudo pip install tensorflow==1.1.0
21 |   - sudo pip install numpy==1.12.1
22 |   - sudo pip install h5py==2.7.0
23 | env:
24 |   matrix:
25 |   - _R_CHECK_LENGTH_1_CONDITION_=true
26 | r:
27 | - devel
28 | - release
29 | r_github_packages:
30 | - jimhester/covr
31 | - hadley/pkgdown
32 | - rstudio/keras
33 | before_script:
34 | - R --no-save <<< 'library("devtools"); document()'
35 | - R --no-save <<< 'library("tensorflow"); install_tensorflow()'
36 | - R --no-save <<< 'library("keras"); install_keras()'
37 | after_success:
38 | - 'if [[ "$TRAVIS_PULL_REQUEST" == "false" && "$TRAVIS_BRANCH" == "master" && "$TRAVIS_R_VERSION_STRING" == "release" && "$TRAVIS_EVENT_TYPE" != "cron" ]] ; then
39 |     R --no-save <<< "devtools::install(); pkgdown::build_site()";
40 |     git checkout master;
41 |     export TRAVIS_COMMIT_MSG="$(git log --format=%B --no-merges -n 1)";
42 |     git config --global user.name "Travis CI";
43 |     git config --global user.email "$COMMIT_AUTHOR_EMAIL";
44 |     git config credential.helper "store --file=.git/credentials";
45 |     echo "https://${GH_TOKEN}:@github.com" >> .git/credentials;
46 |     git config push.default matching;
47 |     git add --force man/*;
48 |     git add --force README.md;
49 |     git add --force docs/*;
50 |     git rm -r --cached $(find . -type d -name "*_cache");
51 |     git commit man DESCRIPTION NAMESPACE README.md docs -m "update auto-generated documentation [ci skip]" -m "$TRAVIS_COMMIT_MSG" || true;
52 |     git push;
53 |   fi;'
54 | - 'if [[ "$TRAVIS_R_VERSION_STRING" == "devel" && "$TRAVIS_EVENT_TYPE" != "cron" ]] ; then
55 |     Rscript -e "covr::coveralls()";
56 |   fi;'
57 | 


--------------------------------------------------------------------------------
/.travis.yml.new:
--------------------------------------------------------------------------------
 1 | dist: xenial
 2 | language: r
 3 | sudo: required
 4 | cache: packages
 5 | addons:
 6 |   apt:
 7 |     sources:
 8 |       - ppa:jonathonf/python-3.6
 9 |     update: true
10 |     packages:
11 |       - python3
12 |       - python3-pip 
13 |       - python3-dev 
14 |       - build-essential
15 |       - zlib1g-dev
16 | before_install:
17 |   - sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.5 1
18 |   - sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2
19 |   - sudo update-alternatives  --set python /usr/bin/python3
20 |   - sudo pip3 install --upgrade setuptools pip wheel
21 |   - sudo apt-get install -y software-properties-common
22 |   - sudo apt-get install -y zlib1g-dev
23 |   - sudo pip3 install cmake
24 |   - sudo pip3 install scipy==1.1.0
25 |   - sudo pip3 install numpy==1.14.5
26 |   - sudo pip3 install tensorflow==1.8.0
27 |   - sudo pip3 install keras==2.2.0
28 |   - sudo pip3 install gym==0.10.5
29 |   - sudo pip3 install gym[atari]
30 |   - sudo pip3 install virtualenv
31 | env:
32 |   matrix:
33 |   - _R_CHECK_LENGTH_1_CONDITION_=true
34 | r:
35 |   - devel
36 |   - release
37 | r_packages:
38 |   - devtools
39 |   - covr
40 | r_github_packages:
41 |   - codecov/example-r
42 |   - hadley/pkgdown
43 |   - rstudio/keras
44 | before_script:
45 |   - R --no-save <<< 'library("devtools"); document()'
46 |   - R --no-save <<< 'library("tensorflow"); # install_tensorflow()'
47 |   - R --no-save <<< 'library("keras"); # install_keras()'
48 |   - R --no-save <<< 'reticulate::py_discover_config()'
49 | after_success:
50 |   - Rscript -e 'covr::codecov()'
51 |   - Rscript -e "covr::coveralls()"
52 |   - 'if [[ "$TRAVIS_PULL_REQUEST" == "false" && "$TRAVIS_BRANCH" == "master" && "$TRAVIS_R_VERSION_STRING" == "release" && "$TRAVIS_EVENT_TYPE" != "cron" ]] ; then
53 |     R --no-save <<< "devtools::install(); pkgdown::build_site()";
54 |     git checkout master;
55 |     export TRAVIS_COMMIT_MSG="$(git log --format=%B --no-merges -n 1)";
56 |     git config --global user.name "Travis CI";
57 |     git config --global user.email "$COMMIT_AUTHOR_EMAIL";
58 |     git config credential.helper "store --file=.git/credentials";
59 |     echo "https://${GH_TOKEN}:@github.com" >> .git/credentials;
60 |     git config push.default matching;
61 |     git add --force man/*;
62 |     git add --force README.md;
63 |     git add --force docs/*;
64 |     git rm -r --cached $(find . -type d -name "*_cache");
65 |     git commit man DESCRIPTION NAMESPACE README.md docs -m "update auto-generated documentation [ci skip]" -m "$TRAVIS_COMMIT_MSG" || true;
66 |     git push;
67 |   fi;'
68 |   - 'if [[ "$TRAVIS_R_VERSION_STRING" == "devel" && "$TRAVIS_EVENT_TYPE" != "cron" ]] ; then
69 |     Rscript -e "covr::coveralls()";
70 |   fi;'
71 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: rlR
 2 | Type: Package
 3 | Title: Reinforcement Learning in R
 4 | Version: 0.1.0
 5 | Authors@R: c(
 6 |     person("Xudong", "Sun", email = {"smilesun.east@gmail.com"}, role = c("aut", "cre")),
 7 |     person("Sebastian", "Gruber", email = {"gruber_sebastian@t-online.de"}, role = c("ctb"))
 8 |     )
 9 | Maintainer: Xudong Sun <smilesun.east@gmail.com>
10 | Description: Reinforcement Learning with deep Q learning, double deep Q
11 |     learning, frozen target deep Q learning, policy gradient deep learning, policy
12 |     gradient with baseline deep learning, actor-critic deep reinforcement learning.
13 | License: BSD_2_clause + file LICENSE
14 | Encoding: UTF-8
15 | Depends:
16 |     R (>= 3.4.0),
17 | Imports:
18 |     R6,
19 |     checkmate,
20 |     data.table,
21 |     reticulate,
22 |     keras,
23 |     tensorflow,
24 |     logging,
25 |     ggplot2,
26 |     openssl,
27 |     magrittr,
28 |     abind,
29 |     foreach
30 | LazyData: true
31 | RoxygenNote: 6.1.1
32 | BugReports: https://github.com/smilesun/rlR/issues
33 | URL: https://github.com/smilesun/rlR
34 | SystemRequirements: The following python package are needed to use the gym openAI environment. gym >= 0.10.5; At least one deep learning backend which keras requires(tensorflow, cntk, theano) should be installed on your computer, for example tensorflow >= 1.1.0 (tested on Ubuntu 14.04); The backend keras requires could be installed by keras::install_keras(); Both dependencies can also be installed by rlR::installDep() function. It is important to note that the user should run 'reticulate::use_python("/usr/local/bin/python")' to specify the python path and 'reticulate::use_virtualenv("myenv")' to specify which virtual environment to use. By default, the package is using "~/anaconda3/bin/python"  as its python version. For detail, please refer to https://rstudio.github.io/reticulate/articles/versions.html
35 | Suggests:
36 |     imager,
37 |     png,
38 |     devtools,
39 |     testthat,
40 |     knitr,
41 |     covr,
42 |     rmarkdown
43 | VignetteBuilder: knitr
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2018
2 | COPYRIGHT HOLDER: Xudong Sun
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(Agent)
 4 | export(AgentArmed)
 5 | export(Environment)
 6 | export(checkPyDep)
 7 | export(getDefaultConf)
 8 | export(initAgent)
 9 | export(installDep2SysVirtualEnv)
10 | export(installDepConda)
11 | export(listAvailAgent)
12 | export(listAvailConf)
13 | export(listGymEnvs)
14 | export(makeGymEnv)
15 | export(repExperiment)
16 | export(rlr_test_if_gym_works)
17 | export(rlr_test_if_keras_works)
18 | export(rlr_test_if_tensorflow_works)
19 | export(showDefaultConf)
20 | import(R6)
21 | import(abind)
22 | import(checkmate)
23 | import(data.table)
24 | import(foreach)
25 | import(ggplot2)
26 | import(keras)
27 | import(logging)
28 | import(openssl)
29 | import(reticulate)
30 | import(tensorflow)
31 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # rlR 0.1.0
2 | * Initial release.
3 | * Added a `NEWS.md` file to track changes to the package.
4 | 


--------------------------------------------------------------------------------
/R/agent_ddqn2.R:
--------------------------------------------------------------------------------
 1 | # @title Double Q learning
 2 | #
 3 | # @format \code{\link{R6Class}} object
 4 | # @description
 5 | # A \code{\link{R6Class}} to represent Double Deep Q learning Armed Agent
 6 | # %$Q_u(S, a; \theta_1) = r + Q_u(S', argmax_a' Q_h(S',a'), \theta_1) + delta$
 7 | # target action = argmax Q_h
 8 | # @section Methods:
 9 | # Inherited from \code{AgentArmed}:
10 | # @inheritSection AgentArmed Methods
11 | #
12 | # @return [\code{\link{AgentDDQN}}].
13 | AgentDDQN = R6::R6Class("AgentDDQN",
14 |   inherit = AgentFDQN,
15 |   public = list(
16 |     p.next.h = NULL,
17 |     setBrain = function() {
18 |       super$setBrain()  # current setBrain will overwrite super$setBrain()
19 |       self$brain_update = self$brain
20 |       self$brain_target = SurroNN$new(self)
21 |     },
22 | 
23 |     getXY = function(batchsize) {
24 |       self$list.replay = self$mem$sample.fun(batchsize)
25 |       list.states.old = lapply(self$list.replay, ReplayMem$extractOldState)
26 |       list.states.next = lapply(self$list.replay, ReplayMem$extractNextState)
27 |       self$model = self$brain_update
28 |       self$p.old = self$getYhat(list.states.old)
29 |       self$p.next = self$getYhat(list.states.next)
30 |       self$model = self$brain_target
31 |       self$p.next.h = self$getYhat(list.states.next)
32 |       list.targets = lapply(1:length(self$list.replay), self$extractTarget)
33 |       self$list.acts = lapply(self$list.replay, ReplayMem$extractAction)
34 |       temp = simplify2array(list.states.old) # R array put elements columnwise
35 |       mdim = dim(temp)
36 |       norder = length(mdim)
37 |       self$replay.x = aperm(temp, c(norder, 1:(norder - 1)))
38 |       self$replay.y = t(simplify2array(list.targets))  # array p
39 |     },
40 | 
41 |     extractTarget = function(i) {
42 |       ins = self$list.replay[[i]]
43 |       act2update =  ReplayMem$extractAction(ins)
44 |       yhat = self$p.old[i, ]
45 |       vec.next.Q.u = self$p.next[i, ]    # action selection
46 |       vec.next.Q.h = self$p.next.h[i, ]  # action evaluation
47 |       a_1 = which.max(vec.next.Q.u)  # action selection
48 |       r = ReplayMem$extractReward(ins)
49 |       done = ReplayMem$extractDone(ins)
50 |       if (done) {
51 |         target = r
52 |       } else {
53 |         target = r + self$gamma * vec.next.Q.h[a_1]  # action evaluation
54 |       }
55 |       mt = yhat
56 |       mt[act2update] = target
57 |       return(mt)
58 |     }
59 |   ) # public
60 | )
61 | 
62 | AgentDDQN$info = function() {
63 |   "Double Deep Q Learning"
64 | }
65 | 
66 | rlR.conf.AgentDDQN  =  function() {
67 |   RLConf$new(
68 |           render = FALSE,
69 |           console = TRUE,
70 |           log = FALSE,
71 |           policy.maxEpsilon = 1,
72 |           policy.minEpsilon = 0.01,
73 |           policy.decay.rate = exp(-0.001),
74 |           policy.name = "EpsilonGreedy",
75 |           replay.batchsize = 64L,
76 |           agent.update.target.freq  = 200
77 |           )
78 | }
79 | 
80 | AgentDDQN$test = function() {
81 |   library(rlR)
82 |   env = makeGymEnv("CartPole-v0")
83 |   agent = initAgent("AgentDDQN", env)
84 |   agent$learn(200L)
85 | }
86 | 


--------------------------------------------------------------------------------
/R/agent_double_qn.R:
--------------------------------------------------------------------------------
  1 | # @title Double Q learning
  2 | #
  3 | # @format \code{\link{R6Class}} object
  4 | # @description
  5 | # A \code{\link{R6Class}} to represent Double Deep Q learning Armed Agent
  6 | # %$Q_u(S, a; \theta_1) = r + Q_u(S', argmax_a' Q_h(S',a'), \theta_1) + delta$
  7 | # target action = argmax Q_h
  8 | # @section Methods:
  9 | # Inherited from \code{AgentArmed}:
 10 | # @inheritSection AgentArmed Methods
 11 | #
 12 | # @return [\code{\link{AgentDDQN}}].
 13 | # AgentDDQN = R6::R6Class("AgentDDQN",
 14 | #   inherit = AgentDQN,
 15 | #   public = list(
 16 | #     brain2 = NULL,
 17 | #     brain_u = NULL,  # u: to be updated
 18 | #     brain_h = NULL,  # h: to help
 19 | #     p.next.h = NULL,
 20 | #     setBrain = function() {
 21 | #       super$setBrain()  # current setBrain will overwrite super$setBrain()
 22 | #       self$brain2 = SurroNN$new(self)
 23 | #     },
 24 | # 
 25 | #     toss = function() {
 26 | #       if (runif(1L) < 0.5) {
 27 | #         self$brain_u = self$brain
 28 | #         self$brain_h = self$brain2
 29 | #       } else {
 30 | #         self$brain_u = self$brain2
 31 | #         self$brain_h = self$brain
 32 | #       }
 33 | # 
 34 | #     },
 35 | # 
 36 | #     getXY = function(batchsize) {
 37 | #       self$list.replay = self$mem$sample.fun(batchsize)
 38 | #       self$glogger$log.nn$info("replaying %s", self$mem$replayed.idx)
 39 | #       list.states.old = lapply(self$list.replay, ReplayMem$extractOldState)
 40 | #       list.states.next = lapply(self$list.replay, ReplayMem$extractNextState)
 41 | #       self$model = self$brain_u
 42 | #       self$p.old = self$getYhat(list.states.old)
 43 | #       self$p.next = self$getYhat(list.states.next)
 44 | #       self$model = self$brain_h
 45 | #       self$p.next.h = self$getYhat(list.states.next)
 46 | #       list.targets = lapply(1:length(self$list.replay), self$extractTarget)
 47 | #       self$list.acts = lapply(self$list.replay, ReplayMem$extractAction)
 48 | #       temp = simplify2array(list.states.old) # R array put elements columnwise
 49 | #       mdim = dim(temp)
 50 | #       norder = length(mdim)
 51 | #       self$replay.x = aperm(temp, c(norder, 1:(norder - 1)))
 52 | ##assert(self$replay.x[1,]== list.states.old[[1L]])
 53 | #       self$replay.y = t(simplify2array(list.targets))  # array p
 54 | #     },
 55 | # 
 56 | # 
 57 | #     replay = function(batchsize) {
 58 | #       self$getXY(batchsize)
 59 | #       self$brain_u$train(self$replay.x, self$replay.y)
 60 | #     },
 61 | # 
 62 | #     extractTarget = function(i) {
 63 | #       ins = self$list.replay[[i]]
 64 | #       act2update =  ReplayMem$extractAction(ins)
 65 | #       yhat = self$p.old[i, ]
 66 | #       vec.next.Q.u = self$p.next[i, ]
 67 | #       vec.next.Q.h = self$p.next.h[i, ]
 68 | #       a_1 = which.max(vec.next.Q.u)  # not h!
 69 | #       r = ReplayMem$extractReward(ins)
 70 | #       done = ReplayMem$extractDone(ins)
 71 | #       if (done) {
 72 | #         target = r
 73 | #       } else {
 74 | #         target = r + self$gamma * vec.next.Q.h[a_1]  # not u!
 75 | #       }
 76 | #       mt = yhat
 77 | #       mt[act2update] = target
 78 | #       return(mt)
 79 | #     },
 80 | # 
 81 | #     evaluateArm = function(state) {
 82 | #       state = array_reshape(state, c(1L, dim(state)))
 83 | #       self$glogger$log.nn$info("state: %s", paste(state, collapse = " "))
 84 | #       vec.arm.q.u = self$brain_u$pred(state)
 85 | #       vec.arm.q.h = self$brain_h$pred(state)
 86 | #       self$vec.arm.q = (vec.arm.q.u + vec.arm.q.h) / 2.0
 87 | #       self$glogger$log.nn$info("prediction: %s", paste(self$vec.arm.q, collapse = " "))
 88 | #     },
 89 | # 
 90 | #     act = function(state) {
 91 | #       self$toss()
 92 | #       assert(class(state) == "array")
 93 | #       self$evaluateArm(state)
 94 | #       self$policy$act(state)
 95 | #     }
 96 | #   ) # public
 97 | # )
 98 | # 
 99 | # AgentDDQN$info = function() {
100 | #   "Double Deep Q Learning"
101 | # }
102 | 


--------------------------------------------------------------------------------
/R/agent_dqn.R:
--------------------------------------------------------------------------------
  1 | # @title  DQN
  2 | #
  3 | # @format \code{\link{R6Class}} object
  4 | # @description Deep Q Network
  5 | #
  6 | # @section Methods:
  7 | # Inherited from \code{AgentArmed}:
  8 | # @inheritSection AgentArmed Methods
  9 | #
 10 | # @return [\code{\link{AgentDQN}}].
 11 | AgentDQN = R6::R6Class("AgentDQN",
 12 |   inherit = AgentArmed,
 13 |   public = list(
 14 |     setBrain = function() {
 15 |        self$task = "value_fun"
 16 |        self$brain = SurroNN$new(self)
 17 |        self$model = self$brain
 18 |     },
 19 | 
 20 |     getXY = function(batchsize) {
 21 |         self$list.replay = self$mem$sample.fun(batchsize)
 22 |         self$glogger$log.nn$info("replaying %s", self$mem$replayed.idx)
 23 |         list.states.old = lapply(self$list.replay, ReplayMem$extractOldState)
 24 |         list.states.next = lapply(self$list.replay, ReplayMem$extractNextState)
 25 |         self$p.old = self$getYhat(list.states.old)
 26 |         self$p.next = self$getYhat(list.states.next)
 27 |         list.targets = lapply(1:length(self$list.replay), self$extractTarget)
 28 |         self$list.acts = lapply(self$list.replay, ReplayMem$extractAction)
 29 |         temp = Reduce(rbind, list.states.old)
 30 |         nr = length(list.states.old)
 31 |         temp = simplify2array(list.states.old) # R array put elements columnwise
 32 |         mdim = dim(temp)
 33 |         norder = length(mdim)
 34 |         self$replay.x = aperm(temp, c(norder, 1:(norder - 1)))
 35 |         self$replay.y = t(simplify2array(list.targets))  # array put elements columnwise
 36 |         diff_table = abs(self$replay.y - self$p.old)
 37 |         self$replay_delta = apply(diff_table, 1, mean)
 38 |     },
 39 | 
 40 | 
 41 |     extractTarget = function(i) {
 42 |         ins = self$list.replay[[i]]
 43 |         act2update =  ReplayMem$extractAction(ins)
 44 |         p.old = self$p.old[i, ]
 45 |         self$yhat = p.old  # for calculating the  TD error
 46 |         r = ReplayMem$extractReward(ins)
 47 |         done = ReplayMem$extractDone(ins)
 48 |         if (done) {
 49 |           target = r
 50 |         } else {
 51 |           vec.next.Q = self$p.next[i, ]
 52 |           a_1 = which.max(vec.next.Q)  # action index start from 1L
 53 |           target = r + self$gamma * max(vec.next.Q)
 54 |           # equivalent to huber loss
 55 |           if (self$clip_td_err) {
 56 |             target = max(target, p.old[act2update] - 1L)
 57 |             target = min(target, p.old[act2update] + 1L)
 58 |           }
 59 |         }
 60 |         mt = p.old
 61 |         mt[act2update] = target  # the not active action arm's Q will not be updated
 62 |         #FIXME: shall here be 0?
 63 |         # mt[-act2update] = 0.0  # the not active action arm will be set to be zero
 64 |         return(mt)
 65 |     },
 66 | 
 67 |     afterStep = function() {
 68 |         if (self$interact$step_in_episode %% self$replay.freq == 0L) {
 69 |           self$replay(self$replay.size)
 70 |         }
 71 |         self$policy$afterStep()
 72 |     },
 73 | 
 74 |     afterEpisode = function() {
 75 |           self$policy$afterEpisode()
 76 |           self$mem$afterEpisode()
 77 |           self$brain$afterEpisode()
 78 |     }
 79 |     ) # public
 80 | )
 81 | 
 82 | AgentDQN$info = function() {
 83 |   "Vanilla Deep Q learning"
 84 | }
 85 | 
 86 | rlR.conf.AgentDQN  =  function() {
 87 |   RLConf$new(
 88 |           render = FALSE,
 89 |           console = TRUE,
 90 |           log = FALSE,
 91 |           policy.maxEpsilon = 1,
 92 |           policy.minEpsilon = 0.01,
 93 |           policy.decay.rate = exp(-0.001),
 94 |           policy.name = "EpsilonGreedy",
 95 |           replay.batchsize = 64L)
 96 | }
 97 | 
 98 | AgentDQN$test = function() {
 99 |   library(rlR)
100 |   env = makeGymEnv("CartPole-v0")
101 |   agent = initAgent("AgentDQN", env)
102 |   agent$learn(200L)
103 | }
104 | 


--------------------------------------------------------------------------------
/R/agent_fdqn.R:
--------------------------------------------------------------------------------
  1 | # @title Frozen target Q learning
  2 | #
  3 | # @format \code{\link{R6Class}} object
  4 | # @description Frozen target Q learning
  5 | #
  6 | # @section Methods:
  7 | # Inherited from \code{AgentArmed}:
  8 | # @inheritSection AgentArmed Methods
  9 | #
 10 | # @return [\code{\link{AgentFDQN}}].
 11 | AgentFDQN = R6::R6Class("AgentFDQN", inherit = AgentDQN,
 12 |   public = list(
 13 |     brain_target = NULL,
 14 |     brain_update = NULL,
 15 |     last_update = NULL,
 16 |     initialize = function(env, conf) {
 17 |       self$last_update = 0
 18 |       super$initialize(env, conf)
 19 |       self$updateFreq = self$conf$get("agent.update.target.freq")
 20 |     },
 21 | 
 22 |     setBrain = function() {
 23 |       super$setBrain()
 24 |       self$brain_update = SurroNN$new(self)
 25 |       self$brain_target = self$brain
 26 |     },
 27 | 
 28 |     showBrain = function() {
 29 |       print("control network:")
 30 |       print(self$brain_update$model)
 31 |       print("target network:")
 32 |       print(self$brain_target$model)
 33 |     },
 34 | 
 35 |     ## @override
 36 |     getXY = function(batchsize) {
 37 |         self$list.replay = self$mem$sample.fun(batchsize)
 38 |         list.states.old = lapply(self$list.replay, ReplayMem$extractOldState)
 39 |         list.states.next = lapply(self$list.replay, ReplayMem$extractNextState)
 40 |         self$model = self$brain_update  # use update network to generate target
 41 |         self$p.old = self$getYhat(list.states.old)
 42 |         self$model = self$brain_target  # use target network to generate target
 43 |         self$p.next = self$getYhat(list.states.next)
 44 |         list.targets = lapply(1:length(self$list.replay), self$extractTarget)
 45 |         #temp = Reduce(rbind, list.states.old)  # does not work for tensor
 46 |         batch_states = simplify2array(list.states.old) # R array put elements columnwise
 47 |         mdim = dim(batch_states)
 48 |         norder = length(mdim)
 49 |         self$replay.x = aperm(batch_states, c(norder, 1:(norder - 1)))
 50 |         self$replay.y = t(simplify2array(list.targets))  # array put elements columnwise
 51 |         #diff_table = abs(self$replay.y - self$p.old)
 52 |         #self$replay_delta = apply(diff_table, 1, mean)
 53 |     },
 54 | 
 55 | 
 56 |     replay = function(batchsize) {
 57 |       self$getXY(batchsize)  # from base class
 58 |       self$brain_update$train(self$replay.x, self$replay.y)  # update the policy model
 59 |     },
 60 | 
 61 |     act = function(state) {
 62 |       assert(class(state) == "array")
 63 |       self$model = self$brain_update
 64 |       self$evaluateArm(state)
 65 |       self$policy$act(state)
 66 |     },
 67 | 
 68 |     updateModel = function() {
 69 |       cat(sprintf("\n\n updating model \n\n"))
 70 |       tw = self$brain_target$getWeights()
 71 |       uw = self$brain_update$getWeights()
 72 |       uuw = lapply(uw, function(x) x * 0.1)
 73 |       ttw = lapply(tw, function(x) x * 0.9)
 74 |       ww = mapply("+", uw, tw)
 75 |       self$brain_target$setWeights(uw)
 76 |       self$last_update = self$interact$global_step_len
 77 |     },
 78 | 
 79 |     shouldUpdateModel = function() {
 80 |       self$interact$global_step_len - self$last_update > self$updateFreq
 81 |     },
 82 | 
 83 |     afterEpisode = function() {
 84 |       if (self$shouldUpdateModel()) {
 85 |         self$updateModel()
 86 |       }
 87 |       super$afterEpisode()
 88 |     }
 89 |   )
 90 | )
 91 | 
 92 | rlR.conf.AgentFDQN  =  function() {
 93 |   RLConf$new(
 94 |           render = FALSE,
 95 |           console = TRUE,
 96 |           log = FALSE,
 97 |           policy.maxEpsilon = 1,
 98 |           policy.minEpsilon = 0.01,
 99 |           policy.decay.rate = exp(-0.001),
100 |           policy.name = "EpsilonGreedy",
101 |           replay.batchsize = 64L,
102 |           agent.update.target.freq  = 400
103 |           )
104 | }
105 | 
106 | 
107 | 
108 | AgentFDQN$info = function() {
109 |  "Frozen Target Deep Q Learning"
110 | }
111 | 
112 | AgentFDQN$test = function() {
113 |   library(rlR)
114 |   env = makeGymEnv("CartPole-v0")
115 |   agent = initAgent("AgentFDQN", env)
116 |   agent$learn(400L)
117 | }
118 | 


--------------------------------------------------------------------------------
/R/agent_pg.R:
--------------------------------------------------------------------------------
  1 | # @title Policy Gradient
  2 | # @format \code{\link{R6Class}} object
  3 | # @description Policy Gradient
  4 | #
  5 | # @section Methods:
  6 | # Inherited from \code{AgentArmed}:
  7 | # @inheritSection AgentArmed Methods
  8 | #
  9 | # @return [\code{\link{AgentPG}}].
 10 | AgentPG = R6::R6Class("AgentPG",
 11 |   inherit = AgentArmed,
 12 |   public = list(
 13 |     flag_rescue = NULL,
 14 |     amf = NULL,
 15 |     initialize = function(env, conf) {
 16 |       self$flag_rescue = conf$get("agent.flag.reset.net")
 17 |       super$initialize(env, conf = conf)
 18 |     },
 19 | 
 20 |     setBrain = function() {
 21 |       self$task = "policy_fun"
 22 |       self$brain = SurroNN$new(self)
 23 |       self$model = self$brain
 24 |     },
 25 | 
 26 |     extractTarget = function(ins) {
 27 |         act =  ReplayMem$extractAction(ins)
 28 |         vec_act = rep(0.0, self$act_cnt)
 29 |         vec_act[act] =  +1.0
 30 |         return(vec_act)
 31 |     },
 32 | 
 33 |     # loss = -\sum_k{(y_k\log(yhat_k)}, \frac{\partial loss}{\partial \yhat} = -\sum_k{y_k\frac{yhat_k}{yhat_k}} = -policy gradient
 34 |     #@override
 35 |     getXY = function(batchsize) {
 36 |         self$list.replay = self$mem$sample.fun(batchsize)
 37 |         self$glogger$log.nn$info("replaying %s", self$mem$replayed.idx)
 38 |         list_states_old = lapply(self$list.replay, ReplayMem$extractOldState)
 39 |         list_targets = lapply(self$list.replay, self$extractTarget)
 40 |         self$list.acts = lapply(self$list.replay, ReplayMem$extractAction)
 41 |         arr_states_old = simplify2array(list_states_old)
 42 |         norder = length(dim(arr_states_old))
 43 |         self$replay.x = aperm(arr_states_old, c(norder, 1:(norder - 1)))
 44 |         self$replay.y = t(simplify2array(list_targets))
 45 |         # self$replay.y = array(, dim = c(batchsize, self$act_cnt))
 46 |     },
 47 | 
 48 |     setAmf = function(batchsize) {
 49 |         self$setReturn()
 50 |         vec_discount = cumprod(rep(self$gamma, batchsize))
 51 |         amf = self$vec_dis_return * vec_discount
 52 |         amf = self$vec_dis_return
 53 |         amf = amf - mean(amf)
 54 |         self$amf = amf / sd(amf)
 55 |     },
 56 | 
 57 |     # replay is executed at the end of episode for each step of the episode, batch size is always set to be the episode length
 58 |     replay = function(batchsize) {
 59 |         self$setAmf(batchsize)
 60 |         self$getXY(batchsize)
 61 |         self$replay.y =  diag(self$amf) %*%  self$replay.y
 62 |         self$brain$batch_update(self$replay.x, self$replay.y)  # update the policy model
 63 |     },
 64 | 
 65 |     setReturn = function() {
 66 |         episode_idx = self$interact$perf$epi_idx
 67 |         self$vec_dis_return = self$interact$perf$list_discount_reward_epi[[episode_idx]]
 68 |     },
 69 | 
 70 |     #@override
 71 |     afterEpisode = function() {
 72 |       self$replay(self$interact$perf$total_steps)   # key difference here
 73 |       super$afterEpisode()
 74 |     }
 75 |     ) # public
 76 | )
 77 | 
 78 | rlR.conf.AgentPG = rlR.conf.AgentPGBaseline = function() {
 79 |   RLConf$new(
 80 |           agent.lr = 1e-2,
 81 |           render = FALSE,
 82 |           console = TRUE,
 83 |           flag_rescue = FALSE,
 84 |           agent.gamma = 0.99,
 85 |           policy.maxEpsilon = 0,
 86 |           policy.minEpsilon = 0,
 87 |           agent.flag.reset.net = FALSE,
 88 |           policy.name = "Prob",
 89 |           replay.memname = "Latest",
 90 |           replay.epochs = 1L)
 91 | }
 92 | 
 93 | AgentPG$info = function() {
 94 |    "Policy Gradient Monte Carlo"
 95 | }
 96 | 
 97 | AgentPG$test = function() {
 98 |   env = makeGymEnv("CartPole-v0")
 99 |   conf = getDefaultConf("AgentPG")
100 |   agent = initAgent("AgentPG", env, conf, custom_brain = F)
101 |   agent$learn(200L)
102 | }
103 | 


--------------------------------------------------------------------------------
/R/agent_pg_actor_critic.R:
--------------------------------------------------------------------------------
 1 | # @title AgentActorCritic
 2 | #
 3 | # @format \code{\link{R6Class}} object
 4 | # @description ActorCritic Agent
 5 | #
 6 | # @section Methods:
 7 | # Inherited from \code{AgentArmed}:
 8 | # @inheritSection AgentArmed Methods
 9 | #
10 | # @return [\code{\link{AgentActorCritic}}].
11 | AgentActorCritic = R6::R6Class("AgentActorCritic",
12 |   inherit = AgentPGBaseline,
13 |   public = list(
14 |     setBrain = function() {
15 |       self$task = "policy_fun"
16 |       self$brain_actor = SurroNN$new(self)
17 |       self$brain_actor$lr = 0.001
18 |       self$task = "value_fun"
19 |       self$brain_critic = SurroNN$new(self)
20 |       self$brain_critic$lr = 0.01
21 |       self$model = self$brain_critic
22 |     },
23 | 
24 |     setAmf = function() {
25 |         vec.step = unlist(lapply(self$list.replay, ReplayMem$extractStep))
26 |         vec_discount = sapply(vec.step, function(x) self$gamma^x)
27 |         self$amf = vec_discount
28 |     },
29 | 
30 |     replay = function(batchsize) {
31 |       self$getReplayYhat(batchsize)  # self$list.rewards are extracted here
32 |       self$list.acts = lapply(self$list.replay, ReplayMem$extractAction)
33 |       self$setAmf()
34 |       len = length(self$list.replay)
35 |       list.targets.critic = lapply(1:len, function(i) as.vector(self$extractCriticTarget(i)))
36 |       list.targets.actor = lapply(1:len, function(i) as.vector(self$extractActorTarget(i)))
37 |       y_actor = t(simplify2array(list.targets.actor))
38 |       y_actor =  self$amf %*%  y_actor
39 |       y_actor =  self$delta %*%  y_actor
40 |       y_critic = array(unlist(list.targets.critic), dim = c(len, 1L))
41 |       self$brain_critic$batch_update(self$replay.x, y_critic)  # first update critic
42 |       self$brain_actor$batch_update(self$replay.x, y_actor)
43 |     },
44 | 
45 |     extractCriticTarget = function(i) {
46 |       nv = self$gamma * self$p_next_c
47 |       vec.done = unlist(lapply(self$list.replay, ReplayMem$extractDone))
48 |       idx = which(vec.done)
49 |       target = (unlist(self$list.rewards) +  nv)
50 |       if (length(idx) > 0)  target = unlist(self$list.rewards)
51 |       self$delta =  target - self$p_old_c  # Bellman Error as advantage
52 |       return(target)
53 |     },
54 | 
55 |     afterStep = function() {
56 |       self$policy$afterStep()
57 |       self$replay(1)
58 |     },
59 | 
60 |     afterEpisode = function() {
61 |       self$policy$afterEpisode()
62 |       self$mem$afterEpisode()
63 |       #if (self$flag_rescue) self$interact$perf$rescue()
64 |       self$brain_actor$afterEpisode()
65 |       self$brain_critic$afterEpisode()
66 |       #self$adaptLearnRate()
67 |     }
68 | 
69 |     )
70 |   )
71 | 
72 | AgentActorCritic$info = function() {
73 |   "Actor Critic Method"
74 | }
75 | 
76 | AgentActorCritic$test = function() {
77 |   env = makeGymEnv("CartPole-v0")
78 |   agent = initAgent("AgentActorCritic", env)
79 |   agent$learn(2000L)
80 | }
81 | 


--------------------------------------------------------------------------------
/R/agent_pg_baseline.R:
--------------------------------------------------------------------------------
  1 | # @title ReinforceWithBaseline
  2 | # @format \code{\link{R6Class}} object
  3 | # @description ReinforceWithBaseline
  4 | # $\delta = G_t - v_w(s_t)$
  5 | # $w = w + \beta * \delta * \nabla_w v_w(s_t)$
  6 | # $\theta = \theta + \alpha * \gamma^t * \delta * \nabla_{\theta}log(\pi_{\theta}(A_t|S_t))
  7 | # @return [\code{\link{AgentPGBaseline}}].
  8 | AgentPGBaseline = R6::R6Class("AgentPGBaseline",
  9 |   inherit = AgentPG,
 10 |   public = list(
 11 |     brain_actor = NULL,  # cross entropy loss
 12 |     brain_critic = NULL, # mse loss
 13 |     critic_yhat = NULL,
 14 |     p_old_c = NULL,
 15 |     p_next_c = NULL,
 16 |     delta = NULL,
 17 |     list.rewards = NULL,
 18 |     setBrain = function() {
 19 |       self$task = "policy_fun"
 20 |       self$brain_actor = SurroNN$new(self)
 21 |       self$task = "value_fun"
 22 |       self$brain_critic = SurroNN$new(self)
 23 |       self$model = self$brain_critic
 24 |     },
 25 | 
 26 |      getReplayYhat = function(batchsize) {
 27 |         self$list.replay = self$mem$sample.fun(batchsize)
 28 |         self$glogger$log.nn$info("replaying %s", self$mem$replayed.idx)
 29 |         list.states.old = lapply(self$list.replay, ReplayMem$extractOldState)
 30 |         list.states.next = lapply(self$list.replay, ReplayMem$extractNextState)
 31 |         self$list.rewards = lapply(self$list.replay, ReplayMem$extractReward)
 32 |         self$list.acts = lapply(self$list.replay, ReplayMem$extractAction)
 33 |         self$model = self$brain_critic
 34 |         self$p_old_c = self$getYhat(list.states.old)
 35 |         self$p_next_c = self$getYhat(list.states.next)
 36 |         temp = simplify2array(list.states.old) # R array put elements columnwise
 37 |         mdim = dim(temp)
 38 |         norder = length(mdim)
 39 |         self$replay.x = aperm(temp, c(norder, 1:(norder - 1)))
 40 |     },
 41 | 
 42 |      replay = function(batchsize) {
 43 |           self$getReplayYhat(batchsize)
 44 |           len = length(self$list.replay)   # replay.list might be smaller than batchsize
 45 |           self$setAmf(batchsize)
 46 |           self$delta = array(self$vec_dis_return, dim = dim(self$p_old_c)) - self$p_old_c
 47 |           list.targets.actor = lapply(1:len, function(i) as.vector(self$extractActorTarget(i)))
 48 |           list.targets.critic = lapply(1:len, function(i) as.vector(self$extractCriticTarget(i)))
 49 |           y_actor = t(simplify2array(list.targets.actor))
 50 |           y_actor =  diag(self$amf) %*%  y_actor
 51 |           y_actor =  diag(as.vector(self$delta)) %*%  y_actor
 52 |           y_critic = array(unlist(list.targets.critic), dim = c(len, 1L))
 53 |           self$brain_actor$batch_update(self$replay.x, y_actor)  # update the policy model
 54 |           self$brain_critic$batch_update(self$replay.x, y_critic)  # update the policy model
 55 |       },
 56 | 
 57 |       extractCriticTarget = function(i) {
 58 |           y = self$p_old_c[i, ] + self$delta[i]
 59 |           return(y)
 60 |       },
 61 | 
 62 |       extractActorTarget = function(i) {
 63 |         act = self$list.acts[[i]]
 64 |         vec.act = rep(0L, self$act_cnt)
 65 |         vec.act[act] = 1.0
 66 |         target = vec.act
 67 |         return(target)
 68 |       },
 69 | 
 70 |       adaptLearnRate = function() {
 71 |           self$brain_actor$lr =  self$brain_actor$lr * self$lr_decay
 72 |           self$brain_critic$lr =  self$brain_critic$lr * self$lr_decay
 73 |       },
 74 | 
 75 |       afterStep = function() {
 76 |           self$policy$afterStep()
 77 |       },
 78 | 
 79 |       #@override
 80 |       evaluateArm = function(state) {
 81 |         state = array_reshape(state, c(1L, dim(state)))
 82 |         self$vec.arm.q = self$brain_actor$pred(state)
 83 |         self$glogger$log.nn$info("state: %s", paste(state, collapse = " "))
 84 |         self$glogger$log.nn$info("prediction: %s", paste(self$vec.arm.q, collapse = " "))
 85 |       },
 86 | 
 87 |        afterEpisode = function() {
 88 |          self$replay(self$interact$perf$total_steps)   # key difference here
 89 |       }
 90 |     ) # public
 91 | )
 92 | 
 93 | AgentPGBaseline$info = function() {
 94 |    "Policy Gradient with Baseline"
 95 | }
 96 | 
 97 | quicktest = function() {
 98 |   #pg.bl.agent.nn.arch.actor = list(nhidden = 64, act1 = "tanh", act2 = "softmax", loss = "categorical_crossentropy", lr = 25e-3, kernel_regularizer = "regularizer_l2(l=0.0001)", bias_regularizer = "regularizer_l2(l=0.0001)", decay = 0.9, clipnorm = 5)
 99 |   #pg.bl.agent.nn.arch.critic = list(nhidden = 64, act1 = "tanh", act2 = "linear", loss = "mse", lr = 25e-3, kernel_regularizer = "regularizer_l2(l=0.0001)", bias_regularizer = "regularizer_l2(l=0)", decay = 0.9, clipnorm = 5)
100 |   #value_fun = makeNetFun(pg.bl.agent.nn.arch.critic, flag_critic = T)
101 |   #policy_fun = makeNetFun(pg.bl.agent.nn.arch.actor)
102 |   env = makeGymEnv("CartPole-v0")
103 |   conf = getDefaultConf("AgentPGBaseline")
104 |   agent = initAgent("AgentPGBaseline", env, conf, custom_brain = F)
105 |   #agent$customizeBrain(list(value_fun = value_fun, policy_fun = policy_fun))
106 |   agent$learn(200L)
107 | }
108 | 


--------------------------------------------------------------------------------
/R/agent_pg_compact.R:
--------------------------------------------------------------------------------
 1 | # AgentPGCompactBL = R6::R6Class("AgentPGCompactBL",
 2 | #   inherit = AgentPGBaseline,
 3 | #   public = list(
 4 | #     p_old_c = NULL,
 5 | #     p_next_c = NULL,
 6 | #     delta = NULL,
 7 | #     list.rewards = NULL,
 8 | # 
 9 | #     setBrain = function() {
10 | #       self$task = "policy_fun"
11 | #       self$brain_actor = SurroNN$new(self)
12 | #     },
13 | # 
14 | #      getReplayYhat = function(batchsize) {
15 | #         self$list.replay = self$mem$sample.fun(batchsize)
16 | #         self$glogger$log.nn$info("replaying %s", self$mem$replayed.idx)
17 | #         list.states.old = lapply(self$list.replay, ReplayMem$extractOldState)
18 | #         list.states.next = lapply(self$list.replay, ReplayMem$extractNextState)
19 | #         self$list.rewards = lapply(self$list.replay, ReplayMem$extractReward)
20 | #         self$list.acts = lapply(self$list.replay, ReplayMem$extractAction)
21 | #         self$model = self$brain_critic
22 | #         self$p_old_c = self$getYhat(list.states.old)
23 | #         self$p_next_c = self$getYhat(list.states.next)
24 | #         temp = simplify2array(list.states.old) # R array put elements columnwise
25 | #         mdim = dim(temp)
26 | #         norder = length(mdim)
27 | #         self$replay.x = aperm(temp, c(norder, 1:(norder - 1)))
28 | #     },
29 | # 
30 | #      replay = function(batchsize) {
31 | #           self$getReplayYhat(batchsize)
32 | #           len = length(self$list.replay)   # replay.list might be smaller than batchsize
33 | #           self$setAmf(batchsize)
34 | #           self$delta = array(self$vec_dis_return, dim = dim(self$p_old_c)) - self$p_old_c
35 | #           list.targets.actor = lapply(1:len, function(i) as.vector(self$extractActorTarget(i)))
36 | #           list.targets.critic = lapply(1:len, function(i) as.vector(self$extractCriticTarget(i)))
37 | #           y_actor = t(simplify2array(list.targets.actor))
38 | #           y_actor =  diag(self$amf) %*%  y_actor
39 | #           y_actor =  diag(self$delta) %*%  y_actor
40 | #           y_critic = array(unlist(list.targets.critic), dim = c(len, 1L))
41 | #           self$brain_actor$train(self$replay.x, y_actor)  # update the policy model
42 | #           self$brain_critic$train(self$replay.x, y_critic)  # update the policy model
43 | #       },
44 | # 
45 | #       extractCriticTarget = function(i) {
46 | #           y = self$p_old_c[i, ] + self$delta[i]
47 | #           return(y)
48 | #       },
49 | # 
50 | #       extractActorTarget = function(i) {
51 | #           act = self$list.acts[[i]]
52 | #           delta = (+1.0) * as.vector(self$delta[i])
53 | ##FIXME: interestingly, multiply advantage by -1 also works
54 | #           vec.act = rep(0L, self$act_cnt)
55 | #           vec.act[act] = 1.0
56 | #           target = delta * array(vec.act, dim = c(1L, self$act_cnt))
57 | #           return(target)
58 | #     },
59 | # 
60 | #       adaptLearnRate = function() {
61 | #           self$brain_actor$lr =  self$brain_actor$lr * self$lr_decay
62 | #           self$brain_critic$lr =  self$brain_critic$lr * self$lr_decay
63 | #       },
64 | # 
65 | #       afterStep = function() {
66 | #           self$policy$afterStep()
67 | #       },
68 | # 
69 | ##@override
70 | #       evaluateArm = function(state) {
71 | #         state = array_reshape(state, c(1L, dim(state)))
72 | #         self$vec.arm.q = self$brain_actor$pred(state)
73 | #         self$glogger$log.nn$info("state: %s", paste(state, collapse = " "))
74 | #         self$glogger$log.nn$info("prediction: %s", paste(self$vec.arm.q, collapse = " "))
75 | #       },
76 | # 
77 | #     afterEpisode = function(interact) {
78 | #       self$replay(self$interact$perf$total_steps)   # key difference here
79 | #     }
80 | #     ) # public
81 | # )
82 | 


--------------------------------------------------------------------------------
/R/agent_table.R:
--------------------------------------------------------------------------------
  1 | AgentTable = R6Class("AgentTable",
  2 |   inherit = AgentArmed,
  3 |   public = list(
  4 |     q_tab = NULL,
  5 |     alpha = NULL,
  6 |     lr_min = NULL,
  7 |     act_names_per_state = NULL,
  8 |     vis_after_episode = NULL,
  9 |     initialize = function(env, conf, q_init = 0.0, state_names = NULL, act_names_per_state = NULL, vis_after_episode = F) {
 10 |       super$initialize(env, conf)
 11 |       self$vis_after_episode = vis_after_episode
 12 |       self$act_names_per_state = act_names_per_state
 13 |       self$q_tab = matrix(q_init, nrow = self$state_dim, ncol = self$act_cnt)
 14 |       if (!is.null(state_names)) rownames(self$q_tab) = state_names
 15 |     },
 16 | 
 17 |     buildConf = function() {
 18 |       self$lr_decay = self$conf$get("agent.lr_decay")
 19 |       self$lr_min = self$conf$get("agent.lr.min")
 20 |       memname = self$conf$get("replay.memname")
 21 |       self$mem = makeReplayMem(memname, agent = self, conf = self$conf)
 22 |       self$alpha = self$conf$get("agent.lr")
 23 |       self$gamma = self$conf$get("agent.gamma")
 24 |       policy_name = self$conf$get("policy.name")
 25 |       self$policy = makePolicy(policy_name, self)
 26 |       self$glogger = RLLog$new(self$conf)
 27 |       self$createInteract(self$env)  # initialize after all other members are initialized!!
 28 |     },
 29 |     act = function(state) {
 30 |       self$vec.arm.q  = self$q_tab[state, ]
 31 |       self$vec.arm.q = self$env$evaluateArm(self$vec.arm.q)
 32 |       self$policy$act(state)
 33 |     },
 34 | 
 35 |     afterStep = function() {
 36 |       # Q^{\pi^{*}}(s, a)  = R + max \gamma Q^{\pi^{*}}(s', a)
 37 |       transact = self$mem$samples[[self$mem$size]]  # take the latest transaction?
 38 |       # self$q_tab has dim: $#states * #actions$
 39 |       if (ReplayMem$extractDone(transact)) future = transact$reward
 40 |       else future = transact$reward + self$gamma * max(self$q_tab[(transact$state.new), ])  # state start from 0 in cliaff walker
 41 |       delta = future - self$q_tab[(transact$state.old), transact$action]
 42 |       self$q_tab[(transact$state.old), transact$action] = self$q_tab[(transact$state.old), transact$action]  + self$alpha * delta
 43 |     },
 44 | 
 45 |     customizeBrain = function() {
 46 |     },
 47 | 
 48 |     afterEpisode = function(interact) {
 49 |       self$policy$afterEpisode()
 50 |       cat(sprintf("\n learning rate: %f \n", self$alpha))
 51 |       self$alpha = max(self$alpha * self$lr_decay, self$lr_min)
 52 |       if (self$vis_after_episode) self$print2()
 53 |     },
 54 | 
 55 |     print = function() {
 56 |       self$q_tab
 57 |     },
 58 | 
 59 |     print2 = function() {
 60 |       x = self$q_tab
 61 |       rowise_val = split(x, rep(1:nrow(x), each = ncol(x)))
 62 |       if (!checkmate::testNull(self$act_names_per_state)) {
 63 |         checkmate::assert_list(self$act_names_per_state)
 64 |         checkmate::assert_true(length(self$act_names_per_state) == nrow(self$q_tab))
 65 |         colnames_per_row = self$act_names_per_state
 66 |         list_act_names = mapply(setNames, rowise_val, colnames_per_row, SIMPLIFY = FALSE)
 67 |         list_act_names = setNames(list_act_names, names(colnames_per_row))
 68 |         print(list_act_names)
 69 |       } else print(rowise_val)
 70 |     }
 71 |   )
 72 | )
 73 | 
 74 | AgentTable$info = function() {
 75 |   "Tabular Learning"
 76 | }
 77 | 
 78 | AgentTable$test = function() {
 79 |   conf = getDefaultConf("AgentTable")
 80 |   #conf$set(agent.lr.mean = 0.1, agent.lr = 0.5, agent.lr_decay = 1, policy.name = "EpsilonGreedy")
 81 |   conf$set(agent.lr.mean = 0.1, agent.lr = 0.5, agent.lr_decay = 0.9999, policy.name = "EpsGreedTie")
 82 |   agent = initAgent(name = "AgentTable", env = "CliffWalking-v0", conf = conf)
 83 |   agent$learn(500)
 84 |   rlR:::visualize(agent$q_tab)
 85 |   agent$plotPerf()
 86 |   expect_true(agent$interact$perf$getAccPerf() > -40.0)
 87 | }
 88 | 
 89 | 
 90 | agent.brain.dict.AgentTable = function() NULL
 91 | 
 92 | rlR.conf.AgentTable = function() {
 93 |   RLConf$new(
 94 |           render = F,
 95 |           console = T,
 96 |           log = FALSE,
 97 |           agent.lr = 0.5,
 98 |           agent.gamma = 0.95,
 99 |           agent.lr_decay = 1.0,
100 |           agent.lr.min = 0.01,
101 |           policy.maxEpsilon = 0.1,
102 |           policy.minEpsilon = 0,
103 |           policy.decay.type = "decay_linear",
104 |           policy.aneal.steps = 400,
105 |           #policy.decay.rate = exp(-0.001),
106 |           policy.name = "EpsGreedTie",
107 |           agent.start.learn = 0L)
108 | }
109 | 


--------------------------------------------------------------------------------
/R/conf.R:
--------------------------------------------------------------------------------
 1 | # The reason that there exist a Configuration object which is throughout the experiment is that we want to look at the effect of those configuration parameters.
 2 | RLConf = R6::R6Class("RLConf",
 3 |   public = list(
 4 |     static = NULL,
 5 |     conf.log.perf = NULL,   # seperate configuration for output like logging, RData, etc
 6 |     # get persistence file prefix
 7 |     getPersist = function(foldername) {
 8 |       list.str = lapply(names(self$static), function(x) sprintf("-%s: %s-\n", x, self$static[[x]]))
 9 |       self$conf.log.perf$str.conf = paste0("\n", toString(list.str))
10 |       hash.conf = openssl::md5(self$conf.log.perf$str.conf)
11 |       str.time = toString(Sys.time())
12 |       str.time = gsub(" ", "_", str.time)
13 |       str.date = toString(Sys.Date())
14 |       filePrefix = file.path(getwd(), foldername, str.date, str.time, hash.conf)
15 |       cat(sprintf("Creating the following output folder %s:\n", filePrefix))
16 |       dir.create(filePrefix, recursive = TRUE)
17 |       self$conf.log.perf$filePrefix = filePrefix
18 |       self$conf.log.perf$resultTbPath =  file.path(filePrefix, rlR.conf4log$resultTbPath)  # RData file persistence place
19 |     },
20 | 
21 |     initialize = function(...) {
22 |       self$conf.log.perf = data.table::copy(rlR.conf4log)  # valid only when log = TRUE
23 |       self$static = data.table::copy(rlR.conf.default)  # deep copy
24 |       #par.list = list(...)
25 |       #dns = setdiff(names(par.list), rlR.conf.default)
26 |       #list.default = setNames(lapply(dns, function(x) self$static[[x]]), dns)
27 |       self$set(...)
28 |     },
29 | 
30 |     get = function(name) {
31 |       self$static[[name]]
32 |     },
33 | 
34 |     set = function(...) {
35 |       par.list = list(...)
36 |       lapply(names(par.list), function(x) self$updatePara(x, par.list[[x]]))
37 |       flag = self$get("log")
38 |       if (is.null(flag)) flag = FALSE
39 |       if (flag) {
40 |         folder_name = readline(prompt = "Please enter folder name relative to current working directory to store output files\n")
41 |         self$getPersist(folder_name)
42 |       }
43 |     },
44 | 
45 |     updatePara = function(str.para, val.value) {
46 |       self$static[[str.para]] = val.value
47 |     },
48 | 
49 |     show = function() {
50 |       list_param = self$static
51 |       dns = names(list_param)
52 |       ## remove agent.nn
53 |       #flag = sapply(dns, function(x) grepl("agent.nn", x))
54 |       #dns = dns[-which(flag)]
55 |       list_conf = lapply(dns, function(x) self$static[[x]])
56 |       names(list_conf) = dns
57 |       df = as.data.frame(unlist(list_conf))
58 |       colnames(df) = "value"
59 |       df
60 |     },
61 |     print = function() {
62 |       print(self$show())
63 |     }
64 |     )
65 | )
66 | 


--------------------------------------------------------------------------------
/R/confDefault.R:
--------------------------------------------------------------------------------
  1 | # hyper-parameters range
  2 | rlR.conf.lod = list(
  3 | render = list(name = "render", note = "Whether to show rendering video or not", value = FALSE),
  4 | log = list(name = "log", note = "Whether to log important information on drive", value = FALSE),
  5 | console = list(name = "console", note = "Whether to enable debug info output to console", value = FALSE),
  6 | agent.gamma = list(name = "agent.gamma", note = "The discount factor in reinforcement learning", value = 0.99),
  7 | agent.flag.reset.net = list(name = "agent.flag.reset.net", note = "Whether to reset the neural network ", value = TRUE),  #FIXME: should be set this?
  8 | agent.lr.decay = list(name = "agent.lr.decay", note = "The decay factor of the learning rate at each step", value = exp(-0.001)),  # decaying with regard to step is better since some episode can be too long 
  9 | agent.lr = list(name = "agent.lr", note = "learning rate for the agent", value = 1e-3),
 10 | agent.lr.min = list(name = "agent.lr.min", note = "minimum learning rate", value = 0),
 11 | agent.store.model = list(name = "agent.store.model", note = "whether to store the model of the agent or not", value = FALSE),  #FIXME: exclude this
 12 | agent.update.target.freq = list(name = "agent.update.target.freq", note = "How often should the target network be set", value = 2000L),
 13 | agent.start.learn = list(name = "agent.start.learn", note = "after how many transitions should replay begin", value = 64L),
 14 | agent.clip.td = list(name = "agent.clip.td", note = "whether to clip TD error", value = FALSE),
 15 | policy.maxEpsilon = list(name = "policy.maxEpsilon", note = "The maximum epsilon exploration rate", value = 1.0),
 16 | policy.minEpsilon = list(name = "policy.minEpsilon", note = "The minimum epsilon exploration rate", value = 0.01),
 17 | policy.decay.rate = list(name = "policy.decay.rate", note = "the decay rate", value = 1.0),
 18 | policy.decay.type = list(name = "policy.decay.type", note = "the way to decay epsion, can be decay_geo, decay_exp, decay_linear", value = "decay_geo"),
 19 | policy.aneal.steps = list(name = "policy.aneal.steps", note = "only valid when policy.decay.type = 'decay_linear'", value = 1e6),
 20 | policy.softmax.magnify = list(name = "policy.softmax.magnify", value = 1),
 21 | replay.batchsize = list(name = "replay.batchsize", note = "how many samples to take from replay memory each time", value = 64),
 22 | replay.memname = list(name = "replay.memname", range = c("Uniform"), note = "The type of replay memory", value = "Uniform"),
 23 | replay.mem.size = list(name = "replay.mem.size", note = "The size of the replay memory", value = 2e4),
 24 | replay.epochs = list(name = "replay.epochs", note = "How many gradient decent epochs to carry out for one replay", value = 1L),
 25 | replay.freq = list(name = "replay.freq", note = "how many steps to wait until one replay", value = 1L)
 26 | )
 27 | 
 28 | rlR.conf.dt = data.table::rbindlist(rlR.conf.lod, fill = TRUE)
 29 | rlR.conf.df = as.data.frame(rlR.conf.dt)
 30 | 
 31 | 
 32 | # define default hyper-parameters
 33 | rlR.conf.default = lapply(rlR.conf.lod, function(x) x$value)
 34 | 
 35 | #' @title listAvailConf
 36 | #' @description List defaults hyper-parameters names
 37 | #' @export
 38 | listAvailConf = function() {
 39 |   rlR.conf.dt
 40 | }
 41 | 
 42 | rlR.conf.AgentActorCritic = function() {
 43 |   conf = RLConf$new(
 44 |     render = FALSE,
 45 |     log = FALSE,
 46 |     agent.lr = 1e-2,
 47 |     agent.gamma = 0.9,
 48 |     agent.lr.decay = 1,
 49 |     console = TRUE,
 50 |     policy.name = "Prob",
 51 |     policy.maxEpsilon = 0,
 52 |     policy.minEpsilon = 0,
 53 |     replay.epochs = 1L,
 54 |     replay.memname = "Latest"
 55 |     #agent.nn.arch.actor = list(nhidden = 64, act1 = "tanh", act2 = "softmax", loss = "categorical_crossentropy", lr = 1e-4, kernel_regularizer = "regularizer_l2(l=0.0001)", bias_regularizer = "regularizer_l2(l=1e-4)", decay = 0.9, clipnorm = 5),
 56 |     #agent.nn.arch.critic = list(nhidden = 64, act1 = "tanh", act2 = "linear", loss = "mse", lr =1e-4, kernel_regularizer = "regularizer_l2(l=0.0001)", bias_regularizer = "regularizer_l2(l=1e-4)", decay = 0.9, clipnorm = 5)
 57 |     )
 58 | }
 59 | 
 60 | rlR.conf.AgentDDPG = function() {
 61 |   conf = RLConf$new(
 62 |     render = FALSE,
 63 |     log = FALSE,
 64 |     agent.lr = 1e-2,
 65 |     agent.gamma = 0.9,
 66 |     agent.lr.decay = 1,
 67 |     console = TRUE,
 68 |     policy.name = "Prob",
 69 |     policy.maxEpsilon = 0,
 70 |     policy.minEpsilon = 0,
 71 |     replay.batchsize = 32,   # saves a lot of time compared to when batchsize = 64
 72 |     replay.epochs = 1L,
 73 |     replay.memname = "Uniform"
 74 |     )
 75 | }
 76 | 
 77 | 
 78 | #' @title get Default Configuration according to agent name
 79 | #' @description List defaults hyper-parameters
 80 | #' @param agent_name The name for Agent
 81 | #' @export
 82 | #' @examples
 83 | #' conf = rlR::getDefaultConf("AgentDQN")
 84 | getDefaultConf = function(agent_name) {
 85 |     get(paste0("rlR.conf.", agent_name))()
 86 | }
 87 | 
 88 | #' @title show Default Configuration
 89 | #' @description List defaults hyper-parameters in dataframe
 90 | #' @export
 91 | #' @examples
 92 | #' df = rlR::showDefaultConf()
 93 | showDefaultConf = function() {
 94 |   rlR.conf.df = data.frame(unlist(rlR.conf.default))
 95 |   colnames(rlR.conf.df) = NULL
 96 |   rlR.conf.df
 97 | }
 98 | 
 99 | 
100 | rlR.conf4log = list(
101 | policy.epi_wait_ini = 5L,  # initially the performance should increase
102 | policy.epi_wait_middle = 25L,
103 | policy.epi_wait_expl = 40L,
104 | replay.mem.dt = FALSE,
105 | replay.mem.laplace.smoother = 0.001,
106 | resultTbPath = "Perf.RData",
107 | LOGGERNAMENN = "nn.logger",
108 | LOGGERNAMERL = "rl.logger",
109 | NNSufix = "nn.log",
110 | RLSufix = "rl.log.R"
111 | )
112 | 
113 | 
114 | agent.brain.dict.AgentDQN =  agent.brain.dict.AgentFDQN = agent.brain.dict.AgentDDQN = function() list(value_fun = makeValueNet.DQN)
115 | agent.brain.dict.AgentPG = function() list(policy_fun = makePolicyNet)
116 | agent.brain.dict.AgentPGBaseline  = function() list(policy_fun = makePolicyNet, value_fun = makeValueNet)
117 | agent.brain.dict.AgentActorCritic = function() list(policy_fun = makePolicyNet2, value_fun = makeValueNet2)
118 | 


--------------------------------------------------------------------------------
/R/environment_base.R:
--------------------------------------------------------------------------------
  1 | #' @title Reinforcement Learning Environment
  2 | #'
  3 | #' @format \code{\link{R6Class}} object
  4 | #'
  5 | #' @description
  6 | #' A \code{\link{R6Class}} to represent reinforcement learning environments. To define custom environment, one should define a \code{\link{R6Class}} which inherit rlR::Environment.
  7 | #'
  8 | #' @section Member Variables:
  9 | #'
 10 | #' \describe{
 11 | #'   \item{act_cnt}{[\code{int}] \cr
 12 | #'     Number of actions of the agent to environment
 13 | #'   }
 14 | #'   \item{state_dim}{[\code{vector(int)}] \cr
 15 | #'     The dimension of the observation(or state) space on the environment. Must be vector of integers. For example, c(28, 28, 3), which can be the dimension for a tensor of order 3.
 16 | #'     }
 17 | #'   \item{name}{[\code{character}] \cr
 18 | #'     A string to represent the name of the environment}
 19 | #'   \item{flag_continous}{[\code{logic}] \cr
 20 | #'     A boolean variable to represent whether the action space is continous or not}
 21 | #' }
 22 | #'
 23 | #' @section Methods:
 24 | #' \describe{
 25 | #'   \item{initialize(...)}{[\code{function}] \cr
 26 | #'   Constructor function to initialize environment}
 27 | #'   \item{step(action)}{[\code{function}] \cr
 28 | #'   Function to make a step in the environment. Must return a named list of [\code{state(array of size state_dim), reward(reward the agent get after making the step), done(boolean variable whether the episode is finished or not), info(list of anything)}]. There must be stoping criteria in step function which should return [\code{list(state = state, reward = reward, done = TRUE, info = list())}] to stop the interaction between the environment and the agent.}
 29 | #'   \item{reset()}{[\code{function}] \cr
 30 | #'   Reset the environment}
 31 | #'   \item{render()}{[\code{function}] \cr
 32 | #'   Print out information to user about the environment, can be left empty}
 33 | #'   \item{afterAll()}{[\code{function}] \cr
 34 | #'   What needs to be done after learning is finished, could be left empty}
 35 | #'   \item{evaluateArm(vec_arm)}{[\code{function}] \cr
 36 | #'   process value of vec_arm which is the same length vector as action count act_cnt to only generate legal action, by default doing nothing}
 37 | #' }
 38 | #' @return [\code{\link{Environment}}].
 39 | #' @export
 40 | Environment = R6::R6Class("Environment",
 41 |   public = list(
 42 |     act_cnt = NULL,
 43 |     state_dim = NULL,
 44 |     name = NULL,
 45 |     flag_continous = FALSE,
 46 |     flag_tensor = FALSE,
 47 |     observ_stack_len = 1L,
 48 |     maxStepPerEpisode = 1e4L,
 49 |     agent = NULL,  # used to get access to replaymem
 50 |     initialize = function() {
 51 |     },
 52 | 
 53 |     evaluateArm = function(vec_arm) {
 54 |       return(vec_arm)
 55 |     },
 56 | 
 57 |     afterEpisode = function() {
 58 |     },
 59 | 
 60 |     # environment get a hook to agent so it can access the replay memory
 61 |     setAgent = function(agent) {
 62 |       self$agent = agent
 63 |       self$agent$mem$observ_stack_len = self$observ_stack_len
 64 |     },
 65 | 
 66 |     render = function() {
 67 | 
 68 |     },
 69 | 
 70 |     overview = function() {
 71 |       cat(sprintf("\naction cnt: %s \n", toString(self$act_cnt)))
 72 |       cat(sprintf("state dim: %s \n", toString(self$state_dim)))
 73 |       cat(sprintf("%s\n", ifelse(self$flag_continous, "continous action", "discrete action")))
 74 |     },
 75 | 
 76 |     reset = function() {
 77 |     },
 78 | 
 79 |     step = function(action) {
 80 |     },
 81 | 
 82 |     afterAll = function() {
 83 |     },
 84 | 
 85 |     print = function() {
 86 |       self$overview()
 87 |     }
 88 |   )
 89 | )
 90 | 
 91 | EnvToy = R6::R6Class("EnvToy",
 92 |   inherit = Environment,
 93 |   public = list(
 94 |     initialize = function(...) {
 95 |       self$act_cnt = c(2)
 96 |       self$state_dim = c(4)
 97 |     },
 98 | 
 99 |     reset = function() {
100 |       return(list(
101 |           state = array(rnorm(self$state_dim), dim = self$state_dim),
102 |           reward = NULL,
103 |           done = FALSE,
104 |           info = list()
105 |       ))
106 |     },
107 | 
108 |     step = function(action) {
109 |       return(list(
110 |           state = array(rnorm(self$state_dim), dim = self$state_dim),
111 |           reward = 1.0,
112 |           done = TRUE,
113 |           info = list()
114 |       ))
115 |     }
116 |   )
117 | )
118 | 


--------------------------------------------------------------------------------
/R/experiment.R:
--------------------------------------------------------------------------------
 1 | #' @title Repeat experiment
 2 | #'
 3 | #' @description Repeat the experiment for serveral times
 4 | #'
 5 | #' @param sname The scenario name of Gym environment
 6 | #' @param aname The name of the Agent
 7 | #' @param conf Configuration object
 8 | #' @param nrep Number of repetitions
 9 | #' @param nepi Number of episode to learn
10 | #' @param value_fun customized neural network as value function approximator, default NULL
11 | #' @param ... Other Parameters to pass to GymEnv
12 | #' @return list of ggplot2 object for performance and list of reward per experiment per episode
13 | #' @export
14 | # library(doMC) # registerDoMC(4) # res = repExperiment(sname = "CartPole-v0", aname = "AgentDQN", conf = getDefaultConf("AgentDQN"), nrep = 5, nepi = 200)
15 | repExperiment = function(sname, aname, conf, nrep = 5L, nepi, value_fun = NULL, ...) {
16 |   list.agent = foreach::foreach(i = 1:nrep) %dopar% {
17 |     env = makeGymEnv(sname, ...)
18 |     agent = initAgent(aname, env, conf)
19 |     agent$learn(nepi)
20 |     agent
21 |   }
22 |   list.r = lapply(list.agent, function(agent) {
23 |     agent$interact$perf$list.reward.epi})
24 |   list.len = lapply(1:nrep, function(i) lapply(list.r[[i]], function(x) length(x)))
25 |   len = max(unlist(list.len))
26 |   init.list = lapply(1:nepi, function(j) vector(mode = "numeric", length = len))
27 |   convert2SameLen = function(init1) {
28 |     init2 = vector(mode = "numeric", length = len)
29 |     init2[1:length(init1)] = init1
30 |     init2
31 |   }
32 |   list.episode = lapply(1:nepi, function(episode_ind) {
33 |     init = vector(mode = "numeric", length = len)
34 |     for (i in 1:nrep) {
35 |       init = init + convert2SameLen(list.r[[i]][[episode_ind]])
36 |     }
37 |     init
38 |   })
39 |   #for (i in 1L:nrep) {
40 |   #  init.list = lapply(2:nepi, function(episode_ind) init.list[[episode_ind]] + convert2SameLen(list.r[[i]][[episode_ind]]))
41 |   #}
42 |   #init = lapply(init, function(vec) vec / nrep)
43 |   #list.episode = lapply(init, function(vec) vec / nrep)
44 |   list.episode = lapply(list.episode, function(vec) vec / nrep)
45 |   env = makeGymEnv(sname, ...)
46 |   agent = initAgent(aname, env, conf = conf)
47 |   #agent$interact$perf$list.reward.epi = init
48 |   agent$interact$perf$list.reward.epi = list.episode
49 |   plot = agent$plotPerf()
50 |   return(list(plot = plot, list.r = list.r, list.agent = list.agent))
51 | }
52 | 


--------------------------------------------------------------------------------
/R/interaction_base.R:
--------------------------------------------------------------------------------
 1 | InteractionBase = R6::R6Class("InteractionBase",
 2 |   public = list(
 3 |     rl_agent = NULL,
 4 |     rl_env = NULL,
 5 |     perf = NULL,
 6 |     maxiter = NULL,
 7 |     glogger = NULL,
 8 |     run = function() {
 9 |       stop("not implemented")
10 |     }
11 |     ), # public
12 |   private = list(),
13 |   active = list()
14 |   )
15 | 


--------------------------------------------------------------------------------
/R/logging.R:
--------------------------------------------------------------------------------
 1 | RLLog = R6::R6Class("RLLog",  #nocov start
 2 |   public = list(
 3 |     log.root = NULL,
 4 |     log.nn = NULL,
 5 |     conf = NULL,
 6 |     flag = NULL,
 7 |     # the configuration of logging does not impact the performance, so use global configuration
 8 |     initialize = function(conf) {
 9 |       logging::logReset()
10 |       conf.logging = conf$conf.log.perf
11 |       self$conf = conf
12 |       # make log obj
13 |       self$log.root = logging::getLogger(conf$conf.log.perf$LOGGERNAMERL)
14 |       self$log.nn = logging::getLogger(conf$conf.log.perf$LOGGERNAMENN)
15 |       logging::removeHandler("writeToConsole", logger = conf$conf.log.perf$LOGGERNAMENN)
16 |       logging::removeHandler("basic.stdout", logger = conf$conf.log.perf$LOGGERNAMENN)
17 |       # whether log to file
18 |       self$flag = conf$get("log")
19 |       if (is.null(self$flag)) self$flag = FALSE
20 |       if (self$flag) {
21 |         # root logger
22 |         logging::addHandler(writeToFile, file = file.path(conf$conf.log.perf$filePrefix, conf.logging$RLSufix), logger = conf.logging$LOGGERNAMERL)
23 |         # every step logger
24 |         logging::addHandler(writeToFile, file = file.path(conf.logging$filePrefix, conf$conf.log.perf$NNSufix), logger = conf$conf.log.perf$LOGGERNAMENN)
25 |         # first logging
26 |         self$log.root$info(conf.logging$str.conf)
27 |         self$log.root$info(conf$conf.log.perf$filePrefix)  # take down the directory name
28 |         info = paste0("\n", conf.logging$info.before, conf.logging$filePrefix, conf.logging$info.after)
29 |         self$log.root$info(info)
30 |       }
31 |     },
32 | 
33 |     afterAll = function() {
34 |       if (self$flag) {
35 |       filename.replay = file.path(rlR.conf4log$filePrefix, "replay.dt.csv")
36 |       filename.experience = file.path(self$conf$conf.log.perf$filePrefix, "experience.dt.csv")
37 |       self$log.root$info("\n a = BBmisc::load2('%s')\n", self$conf$conf.log.perf$resultTbPath)
38 |       cat(sprintf("\n a = BBmisc::load2('%s') \n", self$conf$conf.log.perf$resultTbPath))
39 |       write.csv(self$rl.agent$mem$dt, file = filename.experience)
40 |       self$log.root$info("\n b = read.csv('%s') \n", filename.experience)
41 |       }
42 |     }
43 |   )
44 | ) # nocov end
45 | 


--------------------------------------------------------------------------------
/R/nnArsenal_ddpg.R:
--------------------------------------------------------------------------------
  1 | # normal 1 arm output network with only state as input
  2 | createActorNetwork.AgentDDPG.torc = function(state_dim = 3, action_dim = 1L) {
  3 |   input_state = keras::layer_input(shape = state_dim)
  4 |   states_hidden = input_state %>%
  5 |     layer_dense(units = 27, activation = "relu")
  6 |   states_hidden2 = states_hidden %>%
  7 |     layer_dense(units = 27, activation = "linear") %>%
  8 |     layer_dense(units = action_dim, activation = "linear")  # only 1L output!
  9 |   model = keras::keras_model(inputs = input_state, outputs = states_hidden2)
 10 |   opt = keras::optimizer_adam(lr = 0.0001)
 11 |   model %>% compile(
 12 |     optimizer = opt,
 13 |     loss = "mse"
 14 |     )
 15 |   return(list(model = model, input_state = input_state, weights = model$trainable_weights))
 16 | }
 17 | 
 18 | # both state and action are inputs!
 19 | createCriticNetwork.AgentDDPG.torc = function(state_dim, action_dim) {
 20 |   input_state = keras::layer_input(shape = state_dim)
 21 |   input_action = keras::layer_input(shape = action_dim, name = "input_action")
 22 |   action_hidden = input_action %>%
 23 |     layer_dense(units = 30, activation = "linear")
 24 |   states_hidden = input_state %>%
 25 |     layer_dense(units = 30, activation = "relu")
 26 |   states_hidden2 = states_hidden %>%
 27 |     layer_dense(units = 30, activation = "linear")
 28 |   hiddens = keras::layer_add(c(states_hidden2, action_hidden))
 29 |   # outputs compose input + dense layers
 30 |   predictions = hiddens %>%
 31 |     layer_dense(units = 30, activation = "relu") %>%
 32 |     layer_dense(units = action_dim, activation = "linear")
 33 |   # create and compile model
 34 |   model = keras::keras_model(inputs = c(input_action, input_state), outputs = predictions)
 35 |   opt = keras::optimizer_adam(lr = 0.0001)
 36 |   model %>% compile(
 37 |     optimizer = opt,
 38 |     loss = "mse"
 39 |     )
 40 |   return(list(model = model, input_action = input_action, input_state = input_state))
 41 | }
 42 | 
 43 | 
 44 | createCriticNetwork.AgentDDPG = function(state_dim, action_dim) {
 45 |   input_state = keras::layer_input(shape = state_dim)
 46 |   input_action = keras::layer_input(shape = action_dim, name = "input_action")
 47 |   action_hidden = input_action %>%
 48 |     layer_dense(units = 30, activation = "linear")
 49 |   states_hidden = input_state %>% layer_dense(units = 30, activation = "linear")
 50 |   hiddens = keras::layer_add(c(states_hidden, action_hidden))
 51 |   #concat = keras::layer_concatenate(c(action_hidden, states_hidden))
 52 |   hiddens2 = keras::layer_activation_relu(hiddens)
 53 | 
 54 |   # outputs compose input + dense layers
 55 |   predictions = hiddens2 %>% layer_dense(units = action_dim, activation = "linear")
 56 |   # create and compile model
 57 |   model = keras::keras_model(inputs = c(input_action, input_state), outputs = predictions)
 58 |   opt = keras::optimizer_adam(lr = 0.002)
 59 |   model %>% compile(
 60 |     optimizer = opt,
 61 |     loss = "mse"
 62 |     )
 63 |   return(list(model = model, input_action = input_action, input_state = input_state))
 64 | }
 65 | 
 66 | LayerKMultiply <- R6::R6Class(
 67 |   "KerasLayer",
 68 |   inherit = KerasLayer,
 69 |   
 70 |   public = list(
 71 |     m = NULL,
 72 |     
 73 |     initialize = function(m) {
 74 |       self$m <- m
 75 |     },
 76 |     
 77 |     call = function(x, mask = NULL) {
 78 |       x * self$m
 79 |     }
 80 |   )
 81 | )
 82 | 
 83 | layer_LayerKMultiply <- function(object, m) {
 84 |   create_layer(LayerKMultiply, object, list(m = m))
 85 | }
 86 |  
 87 | 
 88 | createActorNetwork.AgentDDPG = function(state_dim = 3, action_dim = 1L, a_bound) {
 89 |   input_state = keras::layer_input(shape = state_dim)
 90 |   states_hidden = input_state %>%
 91 |     layer_dense(units = 30, activation = "relu")
 92 |   states_hidden2 = states_hidden %>%
 93 |     layer_dense(units = action_dim, activation = "tanh")  # only 1L output!
 94 |   output = states_hidden2 %>% layer_LayerKMultiply(m = a_bound)
 95 |   model = keras::keras_model(inputs = input_state, outputs = states_hidden2)
 96 |   opt = keras::optimizer_adam(0.001)
 97 |   fun_loss = function(y_true, y_pred) {
 98 |     # currently not used at all
 99 |     k_b = keras::backend()
100 |     hh = k_b$print_tensor(y_true)
101 |     temp = y_true * k_b$log(y_pred)
102 |     sloss = -k_b$sum(temp)
103 |     cross_entropy =  k_b$mean(sloss)
104 |   }
105 |   model %>% compile(
106 |     optimizer = opt,
107 |     loss = fun_loss
108 |     )
109 |   return(list(model = model, input_state = input_state, weights = model$trainable_weights))
110 | }
111 | 


--------------------------------------------------------------------------------
/R/obsolette.R:
--------------------------------------------------------------------------------
 1 | function() {
 2 |   library(profvis)
 3 |   profvis(
 4 |     {
 5 |   agent = initAgent("AgentTable", "CliffWalking-v0")
 6 |   agent = initAgent("AgentTable", "FrozenLake-v0")
 7 |   agent = initAgent("AgentTable", "Taxi-v2")
 8 |   agent$learn(500)
 9 |   visualize(agent$q_tab)
10 |   agent$plotPerf(F)
11 |     }
12 |   )
13 | }
14 | 


--------------------------------------------------------------------------------
/R/policy.R:
--------------------------------------------------------------------------------
  1 | Policy = R6::R6Class("Policy",
  2 |   public = list(
  3 |     decay_rate = NULL,
  4 |     host = NULL,
  5 |     gstep_idx = NULL,
  6 |     action = NULL,
  7 |     random_cnt = NULL,
  8 |     random_action = NULL,
  9 |     fun_aneal = NULL,
 10 |     total_aneal_step = NULL,
 11 |     epsilon = NULL,
 12 |     min_epsilon = NULL,
 13 |     max_epsilon = NULL,
 14 |     initialize = function(host) {
 15 |       self$random_cnt = 0L
 16 |       self$host = host
 17 |       self$decay_rate = self$host$conf$get("policy.decay.rate")
 18 |       self$total_aneal_step = self$host$conf$get("policy.aneal.steps")
 19 |       self$fun_aneal = get(self$host$conf$get("policy.decay.type"), envir = self)
 20 |       self$min_epsilon = self$host$conf$get("policy.minEpsilon")
 21 |       self$max_epsilon = self$host$conf$get("policy.maxEpsilon")
 22 |       self$epsilon = self$max_epsilon
 23 |       self$gstep_idx = 1
 24 |     },
 25 | 
 26 |     sampleRandomAct = function(state) {
 27 |         self$random_action = sample.int(self$host$act_cnt)[1L]
 28 |     },
 29 | 
 30 |     predProbRank = function(state) {
 31 |       prob = order(self$host$vec.arm.q)
 32 |       action = sample.int(self$host$act_cnt, prob = prob)[1L]
 33 |       return(action)
 34 |     },
 35 | 
 36 |     decay_geo = function() {
 37 |         temp = self$epsilon * self$decay_rate
 38 |         self$epsilon = max(temp, self$min_epsilon)
 39 |     },
 40 | 
 41 |     decay_exp = function() {
 42 |         self$epsilon =  self$min_epsilon + (self$max_epsilon - self$min_epsilon) * exp(self$decay_rate * self$gstep_idx)
 43 |         self$gstep_idx = self$gstep_idx + 1L
 44 |     },
 45 | 
 46 |     decay_linear = function() {
 47 |         self$epsilon =  self$max_epsilon - (self$gstep_idx / self$total_aneal_step) * (self$max_epsilon - self$min_epsilon)
 48 |         # if self$gstep_idx > self$total_aneal_step
 49 |         self$epsilon = max(self$epsilon, self$min_epsilon)
 50 |         self$gstep_idx = self$gstep_idx + 1L
 51 |     },
 52 | 
 53 |     afterStep = function() {
 54 |     },
 55 | 
 56 |     afterEpisode = function() {
 57 |       self$host$interact$toConsole("Epsilon%f \n", self$epsilon)
 58 |       self$host$glogger$log.nn$info("rand steps:%d \n", self$random_cnt)
 59 |       self$host$interact$toConsole("rand steps:%i \n", self$random_cnt)  # same message to console
 60 |       self$random_cnt = 0L
 61 |     }
 62 |   )
 63 | )
 64 | 
 65 | 
 66 | PolicyProb = R6::R6Class("PolicyProb",
 67 |   inherit = Policy,
 68 |   public = list(
 69 |    act = function(state) {
 70 |       sample.int(self$host$act_cnt, prob = self$host$vec.arm.q, size = 1L)
 71 |     }
 72 |     )
 73 |   )
 74 | 
 75 | 
 76 | 
 77 | PolicyEpsilonGreedy = R6::R6Class("PolicyEpsilonGreedy",
 78 |   inherit = Policy,
 79 |   public = list(
 80 |     initialize = function(host) {
 81 |       super$initialize(host)
 82 |     },
 83 | 
 84 |     toss = function() {
 85 |       flag = runif(1L) < self$epsilon
 86 |       if (flag) {
 87 |         self$sampleRandomAct()
 88 |         self$action = self$random_action
 89 |         self$random_cnt = self$random_cnt + 1L
 90 |         self$host$glogger$log.nn$info("epsilon random action: %d", self$action)
 91 |       }
 92 |     },
 93 | 
 94 |     act = function(state) {
 95 |       self$action = which.max(self$host$vec.arm.q)
 96 |       self$toss()
 97 |       return(self$action)
 98 |     },
 99 | 
100 |     afterStep = function() {
101 |       self$fun_aneal()
102 |     },
103 | 
104 |     afterEpisode = function() {
105 |       self$fun_aneal()  # FIXME: not necessary here since we always decrease by step?
106 |       super$afterEpisode()
107 |     }
108 |     )
109 |   )
110 | 
111 | PolicyEpsGreedTie = R6::R6Class("PolicyEpsGreedTie",
112 |   inherit = PolicyEpsilonGreedy,
113 |   public = list(
114 |     sampleRandomAct = function() {
115 |       self$random_action = sample(which(!is.na(self$host$vec.arm.q)), size = 1)
116 |     },
117 | 
118 |     act = function(state) {
119 |       best_val = max(self$host$vec.arm.q, na.rm = T)
120 |       best_arm = which(self$host$vec.arm.q == best_val)
121 |       self$action = sample(best_arm, size = 1)
122 |       self$toss()
123 |       return(self$action)
124 |     }
125 |     )
126 | )
127 | 
128 | 
129 | 
130 | 
131 | PolicyProbEpsilon = R6::R6Class("PolicyProbEpsilon",
132 |   inherit = PolicyEpsilonGreedy,
133 |   public = list(
134 |     initialize = function(host) {
135 |       super$initialize(host)
136 |     },
137 | 
138 |     # all suboptimal arm probability sum up to epsilon with probability epsilon/act_cnt
139 |     act = function(state) {
140 |       prob = rep(self$epsilon, self$host$act_cnt) / (self$host$act_cnt)
141 |       optarm = which.max(self$host$vec.arm.q)
142 |       prob[optarm] = prob[optarm] + 1.0 - self$epsilon
143 |       action  = sample.int(self$host$act_cnt, prob = prob)[1L]
144 |       if (optarm != action) self$random_cnt = self$random_cnt + 1L
145 |       return(action)
146 |     },
147 | 
148 |     afterEpisode = function() {
149 |       super$afterEpisode()
150 |     }
151 |     )
152 |   )
153 | 
154 | PolicySoftMax = R6::R6Class("PolicySoftMax",
155 |   inherit = Policy,
156 |   public = list(
157 |     softmax_magnify = NULL,
158 |     softmax_base = NULL,
159 |     initialize = function(host) {
160 |       super$initialize(host)
161 |       self$softmax_base = self$host$conf$get("policy.softmax.base")
162 |       self$softmax_magnify = self$host$conf$get("policy.softmax.magnify")
163 |     },
164 | 
165 |     # softmax will magnify the difference
166 |     softmax = function(state) {
167 |       z = self$host$vec.arm.q - max(self$host$vec.arm.q) # numerical stability
168 |       prob = exp(self$softmax_magnify * z)
169 |       prob = prob / sum(prob)
170 |       action = sample.int(self$host$act_cnt, prob = prob)[1L]
171 |       #action = rmultinom(n = 1L, size = self$host$act_cnt, prob = prob)  # FIXME: any difference between multinomial and sample.int?
172 |       #action = which.max(action)
173 |       if (action != which.max(self$host$vec.arm.q))  self$random_cnt = self$random_cnt + 1L
174 |       return(action)
175 |     },
176 | 
177 |     act = function(state) {
178 |       self$action = self$softmax(state)
179 |       #self$toss()  # epsilon chance
180 |       return(self$action)
181 |     },
182 | 
183 |     afterEpisode = function() {
184 |       self$host$interact$toConsole("softmax_base %f \n", self$softmax_base)
185 |       self$softmax_base = self$softmax_magnify * self$softmax_base
186 |       super$afterEpisode()
187 |     }
188 | 
189 |     )
190 |   )
191 | 
192 | makePolicy = function(name, host) {
193 |   fn = paste0("Policy", name)
194 |   get(fn)$new(host = host)
195 | }
196 | 


--------------------------------------------------------------------------------
/R/replaymem_helpers.R:
--------------------------------------------------------------------------------
 1 | ReplayMem$extractOldState = function(x) {
 2 |   return(x[[1L]])
 3 | }
 4 | 
 5 | ReplayMem$extractAction = function(x) {
 6 |   return(x[[2L]])
 7 | }
 8 | 
 9 | ReplayMem$extractReward = function(x) {
10 |   return(x[[3L]])
11 | }
12 | 
13 | ReplayMem$extractNextState = function(x) {
14 |   return(x[[4L]])
15 | }
16 | ReplayMem$extractDone = function(x) {
17 |   return(x[[5L]])
18 | }
19 | ReplayMem$extractStep = function(x) {
20 |   return(x[[6L]][["stepidx"]])
21 | }
22 | 


--------------------------------------------------------------------------------
/R/replaymem_png.R:
--------------------------------------------------------------------------------
 1 | ReplayMemPng = R6::R6Class(
 2 |   "ReplayMemPng",
 3 |   inherit = ReplayMemUniform,
 4 |   public = list(
 5 |     initialize = function(agent, conf) {
 6 |       super$initialize(agent, conf)
 7 |     },
 8 | 
 9 |     mkInst = function(state.old, action, reward, state.new, done, info) {
10 |       # transform/compress states into single string for DB entry
11 |       if (length(self$agent$state_dim) == 1) {
12 |         state.old %<>% paste(collapse = "_")
13 |         state.new %<>% paste(collapse = "_")
14 |       } else {
15 |         state.old = (state.old / 255L) %>% (png::writePNG) %>% paste(collapse = "")
16 |         state.new = (state.new / 255L) %>% (png::writePNG) %>% paste(collapse = "")
17 |       }
18 |       super$mkInst(state.old, action, reward, state.new, done, info)
19 |     },
20 | 
21 |     sample.fun = function(k) {
22 |       k = min(k, self$size)
23 |       self$replayed.idx = sample(self$size)[1L:k]
24 |       # replay.samples = lapply(self$replayed.idx, function(x) self$samples[[x]])
25 |       replay.samples = self$samples[self$replayed.idx]
26 |       #FIXME: IS THE Orientation of the array right! Critically Important
27 |       list.replay = lapply(replay.samples, function(x) list(
28 |         state.old = x$state.old %>% str_to_array_h %>% array(dim = self$agent$state_dim),
29 |         action    = x$action,
30 |         reward    = x$reward,
31 |         state.new = x$state.new %>% str_to_array_h %>% array(dim = self$agent$state_dim),
32 |         done      = x$done,
33 |         info      = list(
34 |           episode = x$episode,
35 |           stepidx = x$stepidx,
36 |           info    = x$info
37 |         )
38 |       ))
39 |       list.replay    # DEBUG: self$agent$env$showImage(list.replay[[64]][["state.new"]]) make sense
40 |       #DEBUG from ctrl+c: only agent is available
41 |       # indx = agent$mem$replayed.idx
42 |       # replay.samples = agent$mem$samples[indx]
43 |       # x = replay.samples[[2]]
44 |       # image = x$state.old %>% str_to_array_h %>% array(dim = agent$state_dim)
45 |       # image = x$state.new %>% str_to_array_h %>% array(dim = agent$state_dim)
46 |       # agent$env$showImage(image[,,1])
47 |       # agent$env$showImage(image[,,2])
48 |     }
49 |     )
50 | )
51 | 
52 | 
53 | 
54 | change_storage = function(y) {
55 |   storage.mode(y) = "integer"  # change storage type to integer to save space
56 |   y
57 | }
58 | 
59 | str_to_array_h = function(string) {
60 |   (
61 |     # magittr  require ()
62 |     string %>%
63 |       strsplit("") %>%     # ABEF39 SPLIT into c("A", "B", "E", ...)
64 |       (function(x) x[[1]])  %>%    # return of split is a list
65 |         (function(x) paste0(x[c(TRUE, FALSE)], x[c(FALSE, TRUE)])) %>% #combine to pairs, equivalent to zip:    x[c(TRUE, FALSE)] takes the 1st,3st,5st  and x[c(FALSE, TRUE)] take the 2st, 4st
66 |           as.hexmode %>%   # necessary for correct as.raw. For R to understand this is hexcode other than String.
67 |             as.raw %>%       # make it readable as PNG
68 |               (png::readPNG) * 255   # png package assums image to have range 0-1
69 |       ) %>%
70 |   change_storage    # float storage to int storage
71 | }
72 | 


--------------------------------------------------------------------------------
/R/surrogate_base.R:
--------------------------------------------------------------------------------
 1 | Surrogate = R6::R6Class("Surrogate",
 2 |   public = list(
 3 |       act_cnt = NULL,
 4 |       state_dim = NULL,
 5 |       createModel.fun = NULL,
 6 |       model = NULL,
 7 |     initialize = function(actionCnt, state_dim, createModel.fun) {
 8 |       self$act_cnt = actionCnt
 9 |       self$state_dim = state_dim
10 |       self$createModel.fun = createModel.fun
11 |     },
12 | 
13 |     train = function(X_train, Y_train, epochs) {
14 |       stop("not implmented!")
15 | },
16 | 
17 |     persist = function(path) {
18 |       temp = self$clone()
19 |       save(temp, file = path)
20 |     },
21 | 
22 |     pred = function(X) {
23 |       stop("not implemented")
24 |     }
25 |     )
26 | )
27 | 


--------------------------------------------------------------------------------
/R/visualize.R:
--------------------------------------------------------------------------------
 1 | visualize = function(tabular, env = c("cliff"), latex = FALSE) {
 2 |   
 3 |   left  = if (latex) "$\\leftarrow$"             else "<"
 4 |   right = if (latex) "$\\rightarrow$"            else ">"
 5 |   up    = if (latex) "$\\,\\,\\uparrow\\,\\,$"   else "^"
 6 |   down  = if (latex) "$\\,\\,\\downarrow\\,\\,$" else "v"
 7 |   
 8 |   parser_lake = function(x) {
 9 |     if (x == 0) left
10 |     else if (x == 1) down
11 |     else if (x == 2) right
12 |     else if (x == 3) up
13 |   }
14 |   
15 |   parser_cliff = function(x) {
16 |     if (x == 0) up
17 |     else if (x == 1) right
18 |     else if (x == 2) down
19 |     else if (x == 3) left
20 |   }
21 | 
22 |   policy = data.frame(position = 1:nrow(tabular))
23 |   policy$action = sapply(policy$position, function(x) which.max(tabular[x, ]) - 1, USE.NAMES = FALSE)
24 |   policy$action = sapply(policy$action, if (env == "lake") parser_lake else parser_cliff, USE.NAMES = FALSE)
25 |   
26 |   if (env == "lake" && latex)
27 |     cat( "\\hline \n",
28 |          paste(policy$action[1:4],  collapse = " & "), "\\\\ \\hline \n",
29 |          paste(policy$action[5:8],  collapse = " & "), "\\\\ \\hline \n",
30 |          paste(policy$action[9:12],  collapse = " & "), "\\\\ \\hline \n",
31 |          paste(policy$action[13:16],  collapse = " & "), "\\\\ \\hline \n"
32 |     )
33 |   else if (env == "cliff" && latex)
34 |     cat( "\\hline \n",
35 |          paste(policy$action[1:12],  collapse = " & "), "\\\\ \\hline \n",
36 |          paste(policy$action[13:24], collapse = " & "), "\\\\ \\hline \n",
37 |          paste(policy$action[25:36], collapse = " & "), "\\\\ \\hline \n",
38 |          paste(policy$action[37:48], collapse = " & "), "\\\\ \\hline \n"
39 |     )
40 |   else if (env == "lake")
41 |     cat( "\n",
42 |          policy$action[1], policy$action[2], policy$action[3], policy$action[4], "\n",
43 |          policy$action[5], policy$action[6], policy$action[7], policy$action[8], "\n",
44 |          policy$action[9], policy$action[10], policy$action[11], policy$action[12], "\n",
45 |          policy$action[13], policy$action[14], policy$action[15], policy$action[16], "\n"
46 |     )
47 |   else
48 |     cat( "\n",
49 |          paste(policy$action[1:12],  collapse = " "), "\n",
50 |          paste(policy$action[13:24], collapse = " "), "\n",
51 |          paste(policy$action[25:36], collapse = " "), "\n",
52 |          paste(policy$action[37:48], collapse = " "), "\n"
53 |     )
54 | }
55 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
  1 | #' @import R6
  2 | #' @import data.table
  3 | #' @import checkmate
  4 | #' @import data.table
  5 | #' @import reticulate
  6 | #' @import keras
  7 | #' @import logging
  8 | #' @import openssl
  9 | #' @import ggplot2
 10 | #' @import tensorflow
 11 | #' @import abind
 12 | #' @import foreach
 13 | 
 14 | NULL # nocov
 15 | 
 16 | .onAttach <- function(libname, pkgname) {
 17 |    try(expr = {
 18 |    packageStartupMessage("- type 'reticulate::py_discover_config()' to check default python")
 19 |    packageStartupMessage("- to use a different python path, execute the following immediately after package is loaded:")
 20 |    packageStartupMessage("reticulate::use_python('/path/to/your/python')")
 21 |    packageStartupMessage("\nor\n reticulate::use_conda_env('name-of-conda-env')")
 22 |    }, silent = TRUE)
 23 | }
 24 | 
 25 | #' @title List implemented Agents
 26 | #' @description List all implemented Agents
 27 | #' @export
 28 | listAvailAgent = function() {
 29 |   all = getNamespaceExports("rlR")
 30 |   all =  all[which(sapply(all, function(x) grepl("^Agent", x)))]
 31 |   kickout = c("Agent", "AgentArmed")
 32 |   all = setdiff(all, kickout)
 33 |   list_res = lapply(all, function(x) get(x)$info())
 34 |   names(list_res) = all
 35 |   list_res
 36 | }
 37 | 
 38 | #' @title list environments from OPENAI gym
 39 | #' @description List all Gym Environments without testing them
 40 | #' @export
 41 | listGymEnvs = function() {
 42 |   envs = reticulate::import("gym.envs")
 43 |   all_spec = envs$registry$env_specs
 44 |   res = sapply(all_spec, function(x) x$id)
 45 |   names(res) = NULL
 46 | }
 47 | 
 48 | 
 49 | #' @title Test if tensorflow works from R session
 50 | #'
 51 | #' @description Test if tensorflow works from R session
 52 | #'
 53 | #' @return TRUE if tensorflow works
 54 | #' @export
 55 | rlr_test_if_tensorflow_works = function() {
 56 |   res <- try({
 57 |     tf = reticulate::import("tensorflow")
 58 |     sess = tf$Session()
 59 |     hello = tf$constant("Hello, TensorFlow!")
 60 |     sess$run(hello)
 61 |   }, silent = FALSE)
 62 |   if (class(res)[1L] == "try-error") return(FALSE)
 63 |   return(TRUE)
 64 | }
 65 | 
 66 | #' @title  Test if gym is installed
 67 | #' @description Test if gym is installed
 68 | #' @return TRUE if success
 69 | #' @export
 70 | rlr_test_if_gym_works = function() {
 71 |   res <- try({
 72 |     gym = reticulate::import("gym")
 73 |     gym.sp = reticulate::import("gym.spaces")
 74 |     gym$logger$set_level(40)  # supress warning
 75 |     gym$logger$setLevel(40)
 76 |     genv = gym$make("CartPole-v0")
 77 |     genv$reset()
 78 |   }, silent = FALSE)
 79 |   if (class(res)[1L] == "try-error") return(FALSE)
 80 |   return(TRUE)
 81 | }
 82 | 
 83 | #' @title  Check if python dependencies work
 84 | #' @description Check if python dependencies work
 85 | #' @return TRUE if all python dependencies work
 86 | #' @export
 87 | checkPyDep = function() {
 88 |   flag_tensorflow = rlr_test_if_tensorflow_works()
 89 |   flag_keras = rlr_test_if_keras_works()
 90 |   flag_gym = rlr_test_if_gym_works()
 91 |   cat(sprintf("\n tensorlfow: %s, keras: %s, gym:%s\n", flag_tensorflow, flag_keras, flag_gym))
 92 |   return(flag_tensorflow && flag_keras && flag_gym)
 93 | }
 94 | 
 95 | #' @title  Install dependencies into system virtual environment called r-tensorflow
 96 | #' @param gpu If TRUE, will install gpu version of tensorflow. By default, FALSE
 97 | #' @description Install Keras dependencies into system virtual environment called r-tensorflow
 98 | #' @return NULL
 99 | #' @export
100 | installDep2SysVirtualEnv = function(gpu = FALSE) {  # nocov start
101 |   cat(sprintf("\ninstalling dependencies using %s \n",  Sys.which("virtualenv")))
102 |   # install_keras will install tensorflow along into the virtual environment called "r-tensorflow"
103 |   if (gpu) {
104 |     version = paste0("1.8.0", "-gpu")
105 |   } else {
106 |     version = "1.8.0"
107 |   }
108 |   keras::install_keras(method = "virtualenv", tensorflow = version, extra_packages = c("gym==0.10.5", "cmake==3.12.0", "atari-py==0.1.6"))
109 |   #reticulate::py_install()
110 |   # sudo pip instlal uwsgi
111 |   # sudo apt-get install python3-pip
112 | } # nocov end
113 | 
114 | #' @title  Install dependencies into a conda virtual environment called r-tensorflow
115 | #' @param gpu If TRUE, will install gpu version of tensorflow. By default, FALSE
116 | #' @param conda_path The conda path in your system, default "auto" will search in system path
117 | #' @description Install Keras dependencies into a conda virtual environment called r-tensorflow
118 | #' @return NULL
119 | #' @export
120 | installDepConda = function(conda_path = "auto", gpu = FALSE) { # nocov start
121 |   str4gpu = ifelse(gpu, "-gpu", "")
122 |   if (conda_path == "auto") cat(sprintf("\ninstalling dependencies using %s \n", Sys.which("conda")))
123 |   tf_version = paste0("1.9.0", str4gpu)
124 |   keras_version = "default"
125 |   keras::install_keras(method = "conda", conda = conda_path, version = keras_version, tensorflow = tf_version, extra_packages = c("gym==0.10.5",  "cmake==3.12.0", "atari-py==0.1.6"))
126 | } # nocov end
127 | 
128 | 
129 | #' @title  Test if keras works
130 | #' @description Test if keras is installed
131 | #' @return TRUE if success
132 | #' @export
133 | rlr_test_if_keras_works = function() {
134 |   requireNamespace("keras")
135 |   res <- try({
136 | model <- keras_model_sequential()
137 | model %>% 
138 |   layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>% 
139 |   layer_dropout(rate = 0.4) %>% 
140 |   layer_dense(units = 128, activation = 'relu') %>%
141 |   layer_dropout(rate = 0.3) %>%
142 |   layer_dense(units = 10, activation = 'softmax')
143 |   }, silent = FALSE)
144 |   if (class(res)[1L] == "try-error") return(FALSE)
145 |   return(TRUE)
146 | }
147 | 
148 | rlR.debug = FALSE  # nocov
149 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
 1 | [![Build Status](https://travis-ci.com/smilesun/rlR.svg?branch=master)](https://travis-ci.com/smilesun/rlR)
 2 | [![Coverage Status](https://coveralls.io/repos/github/smilesun/rlR/badge.svg?branch=master)](https://coveralls.io/github/smilesun/rlR?branch=master)
 3 | [![Build status](https://ci.appveyor.com/api/projects/status/d0oyb358bh3e8r7r?svg=true)](https://ci.appveyor.com/project/smilesun/rlr)
 4 | 
 5 | [Documentation](https://smilesun.github.io/rlR/)
 6 | 
 7 | # rlR: (Deep) Reinforcement learning in R
 8 | 
 9 | ## Installation
10 | 
11 | ### R package installation
12 | ```{r eval = FALSE}
13 | devtools::install_github("smilesun/rlR")
14 | ```
15 | or 
16 | 
17 | ```{r eval = FALSE}
18 | devtools::install_github("smilesun/rlR", dependencies = TRUE)
19 | ```
20 | 
21 | ## Python dependency
22 | 
23 | rlR use keras with tensorflow as its backend for neural network as functional approximator and OpenAI gym.
24 | 
25 | see [Python Dependencies Installation and Configuration](https://smilesun.github.io/rlR/articles/python_dependencies.html)
26 | 
27 | ## Example of Neural Network as Functional Approximator
28 | 
29 | ### Choose an environment to learn
30 | ```{r}
31 | library(rlR)
32 | env = makeGymEnv("CartPole-v0")
33 | env
34 | ```
35 | 
36 | If you have R package "imager" installed, you could get a snapshot of the environment by
37 | ```{r, eval=FALSE}
38 | env$snapshot(preprocess = F)
39 | ```
40 | 
41 | 
42 | ### Initialize agent with the environment
43 | ```{r learn, eval=FALSE} 
44 | agent = initAgent("AgentDQN", env)
45 | agent$learn(200L)  
46 | ```
47 | 
48 | ### Look at the performance
49 | ```{r mplot, eval=FALSE,fig.path="inst/figures/", warning=FALSE, message=FALSE, eval=FALSE}
50 | agent$plotPerf(F)
51 | ```
52 | 
53 | ## Specify a task to be sovled by creating your own Environment
54 | 
55 | see [Custom Environment](https://smilesun.github.io/rlR/articles/define_custom_environments.html)
56 | 
57 | ## More Examples
58 | - [Configuration](https://smilesun.github.io/rlR/articles/custom_configuration.html)
59 | - [Tabular Learning](https://smilesun.github.io/rlR/articles/table_learning.html)
60 | - [Repeated Experiment](https://smilesun.github.io/rlR/articles/repeated_experiment.html)
61 | - Discover in [Documentation](https://smilesun.github.io/rlR/)
62 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build Status](https://travis-ci.com/smilesun/rlR.svg?branch=master)](https://travis-ci.com/smilesun/rlR)
 2 | [![Coverage Status](https://coveralls.io/repos/github/smilesun/rlR/badge.svg?branch=master)](https://coveralls.io/github/smilesun/rlR?branch=master)
 3 | [![Build status](https://ci.appveyor.com/api/projects/status/d0oyb358bh3e8r7r?svg=true)](https://ci.appveyor.com/project/smilesun/rlr)
 4 | 
 5 | [Documentation](https://smilesun.github.io/rlR/)
 6 | 
 7 | # rlR: (Deep) Reinforcement learning in R
 8 | 
 9 | ## Installation
10 | 
11 | ### R package installation
12 | 
13 | ```r
14 | devtools::install_github("smilesun/rlR")
15 | ```
16 | or 
17 | 
18 | 
19 | ```r
20 | devtools::install_github("smilesun/rlR", dependencies = TRUE)
21 | ```
22 | 
23 | ## Python dependency
24 | 
25 | rlR use keras with tensorflow as its backend for neural network as functional approximator and OpenAI gym.
26 | 
27 | see [Python Dependencies Installation and Configuration](https://smilesun.github.io/rlR/articles/python_dependencies.html)
28 | 
29 | ## Example of Neural Network as Functional Approximator
30 | 
31 | ### Choose an environment to learn
32 | 
33 | ```r
34 | library(rlR)
35 | env = makeGymEnv("CartPole-v0")
36 | env
37 | ```
38 | 
39 | ```
40 | ## 
41 | ## action cnt: 2 
42 | ## state original dim: 4 
43 | ## discrete action
44 | ```
45 | 
46 | If you have R package "imager" installed, you could get a snapshot of the environment by
47 | 
48 | ```r
49 | env$snapshot(preprocess = F)
50 | ```
51 | 
52 | 
53 | ### Initialize agent with the environment
54 | 
55 | ```r
56 | agent = initAgent("AgentDQN", env)
57 | agent$learn(200L)  
58 | ```
59 | 
60 | ### Look at the performance
61 | 
62 | ```r
63 | agent$plotPerf(F)
64 | ```
65 | 
66 | ## Specify a task to be sovled by creating your own Environment
67 | 
68 | see [Custom Environment](https://smilesun.github.io/rlR/articles/define_custom_environments.html)
69 | 
70 | ## More Examples
71 | - [Configuration](https://smilesun.github.io/rlR/articles/custom_configuration.html)
72 | - [Tabular Learning](https://smilesun.github.io/rlR/articles/table_learning.html)
73 | - [Repeated Experiment](https://smilesun.github.io/rlR/articles/repeated_experiment.html)
74 | - Discover in [Documentation](https://smilesun.github.io/rlR/)
75 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | template:
 2 |   params:
 3 |     bootswatch: cosmo
 4 | 
 5 | navbar:
 6 |   left:
 7 |   - text: Topics
 8 |     icon: fa-file-text-o
 9 |     menu:
10 |       - text: Specify Custom Environment
11 |         href: articles/define_custom_environments.html
12 |       - text: Repeated Experiment
13 |         href: articles/repeated_experiment.html
14 |       - text: Customize Neural Network Functional Approximator
15 |         href: articles/customized_brain_mountainCar.html
16 |       - text: Play Atari Games 
17 |         href: articles/play_atari_games.html
18 |       - text: Tabular Learning
19 |         href: articles/table_learning.html
20 |   - text: Reference
21 |     icon: fa-book
22 |     href: reference/index.html
23 | 
24 | 


--------------------------------------------------------------------------------
/appveyor.yml:
--------------------------------------------------------------------------------
 1 | init:
 2 |   ps: |
 3 |         $ErrorActionPreference = "Stop"
 4 |         Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1"
 5 |         Import-Module '..\appveyor-tool.ps1'
 6 | 
 7 | install:
 8 |   ps: Bootstrap
 9 | 
10 | cache:
11 |   - C:\RLibrary
12 | 
13 | environment:
14 |   global:
15 |     USE_RTOOLS: true
16 |   matrix:
17 |     - R_VERSION: devel
18 |       PKGTYPE: source
19 |       
20 |     - R_VERSION: release
21 |     # - R_VERSION: oldrel
22 |     #  RTOOLS_VERSION: 32
23 |       CRAN: http://cran.rstudio.com
24 | 
25 | 
26 | build_script:
27 |   - travis-tool.sh install_deps
28 | 
29 | test_script:
30 |   - travis-tool.sh run_tests
31 | 
32 | on_failure:
33 |   - travis-tool.sh dump_logs
34 | 
35 | artifacts:
36 |   - path: '*.Rcheck\**\*.log'
37 |     name: Logs
38 | 
39 |   - path: '*.Rcheck\**\*.out'
40 |     name: Logs
41 | 
42 |   - path: '*.Rcheck\**\*.fail'
43 |     name: Logs
44 | 
45 |   - path: '*.Rcheck\**\*.Rout'
46 |     name: Logs
47 | 
48 |   - path: '\*_*.tar.gz'
49 |     name: Bits
50 | 
51 |   - path: '\*_*.zip'
52 |     name: Bits
53 | 


--------------------------------------------------------------------------------
/attr/arsenal_attr.R:
--------------------------------------------------------------------------------
 1 | makeCompactableNetTF = function(state_dim, act_cnt) {
 2 |  hun = 10L
 3 |  requireNamespace(tensorflow)
 4 |  input =  tf$placeholder(tf$float32, shape(NULL, state_dim))
 5 |  W = tf$Variable(tf$zeros(shape(state_dim, hun)))
 6 |  b = tf$Variable(tf$zeros(shape(hun)))
 7 |  hidden = tf$nn$relu(tf$matmul(input, W) + b)
 8 |  w_critic = tf$Variable(tf$zeros(shape(hun, 1L)))
 9 |  b_critic = tf$Variable(tf$zeros(shape(1L)))
10 |  w_actor = tf$Variable(tf$zeros(shape(hun, act_cnt)))
11 |  b_actor = tf$Variable(tf$zeros(shape(act_cnt)))
12 |  critic = tf$matmul(hidden, w_critic) + b_critic
13 |  actor = tf$matmul(hidden, w_actor) + b_actor
14 |  w_critic = tf$Variable(tf$zeros(shape(hun, 1L)))
15 |  b_critic = tf$Variable(tf$zeros(shape(1L)))
16 |  #loss_critic <- tf$reduce_mean(0.5 * (critic - critic_target) ^ 2)
17 | }
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/attr/customized_brain_mountainCar.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Cutomized Neural Network for Mountain Car Problem"
 3 | output:
 4 |   html_document:
 5 |     toc: true
 6 |     toc_float:
 7 |       collapsed: true
 8 |       smooth_scroll: false
 9 |     dev: svg
10 | vignette: >
11 |   %\VignetteIndexEntry{Customized Neural Network for Mountain Car Problem}
12 |   %\VignetteEngine{knitr::rmarkdown}
13 |   %\VignetteEncoding{UTF-8}
14 | ---
15 | 
16 | ```{r setup, include = FALSE, cache = FALSE}
17 | library(rlR)
18 | set.seed(123)
19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5)
20 | knitr::knit_hooks$set(document = function(x){
21 |   gsub("```\n*```r*\n*", "", x)
22 | })
23 | library(reticulate)
24 | #os = import("os")
25 | #os$environ[["TF_CPP_MIN_LOG_LEVEL"]]="3"
26 | ```
27 | 
28 | # Customized Brain for Mountain Car Problem
29 | 
30 | ## Action cheat to Environment
31 | For the Mountain Car Senario, there are three valid actions: move left, do nothing and move right. Since do nothing does not help us in this environment, we could ignore this action.
32 | In rlR this is done by the following code.
33 | 
34 | ```{r}
35 | library(rlR)
36 | env = makeGymEnv("MountainCar-v0", act_cheat = c(0, 2))
37 | ```
38 | act_cheat is a vector where the first element means the first action maps to the 0th action in the gym environment and the second element means the second action maps to the 2th action of the gym environment. But this definition, the 1th gym action is eliminated. Note that in gym the index is python convention where 0th means the 1th in R.
39 | 
40 | ## Define custom neural network
41 | ```{r}
42 | net_fun = function(state_dim, act_cnt) {
43 |   model = keras::keras_model_sequential()
44 |     model %>%
45 |       layer_dense(units = 8, activation = "relu", input_shape = c(state_dim)) %>%
46 |       layer_dropout(rate = 0.25) %>%
47 |       layer_dense(units = act_cnt, activation = "linear")
48 |     model$compile(loss = "mse", optimizer = optimizer_rmsprop(lr = 0.001, clipnorm = 1.0))
49 |     model
50 | }
51 | ```
52 | 
53 | ## Learning
54 | ```{r}
55 | conf = getDefaultConf("AgentDQN")
56 | conf$set(console = TRUE, render = TRUE, policy.maxEpsilon = 1, policy.minEpsilon = 0, policy.decay = 1.0 / 1.01, replay.batchsize = 64, replay.epochs = 4, agent.lr.decay = 1, agent.gamma = 0.95)
57 | agent = initAgent("AgentDQN", env, conf, custom_brain = T)
58 | library(magrittr)
59 | library(keras)
60 | agent$customizeBrain(list(value_fun = net_fun))
61 | agent$learn(1)
62 | ```
63 | 


--------------------------------------------------------------------------------
/attr/play_atari_games.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Play Atari Games"
  3 | output:
  4 |   html_document:
  5 |     toc: true
  6 |     toc_float:
  7 |       collapsed: true
  8 |       smooth_scroll: false
  9 |     dev: svg
 10 | vignette: >
 11 |   %\VignetteIndexEntry{Play Atari Games}
 12 |   %\VignetteEngine{knitr::rmarkdown}
 13 |   %\VignetteEncoding{UTF-8}
 14 | ---
 15 | 
 16 | ```{r setup, include = FALSE, cache = FALSE}
 17 | library(rlR)
 18 | set.seed(123)
 19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5)
 20 | knitr::knit_hooks$set(document = function(x){
 21 |   gsub("```\n*```r*\n*", "", x)
 22 | })
 23 | library(reticulate)
 24 | os = import("os")
 25 | os$environ[["TF_CPP_MIN_LOG_LEVEL"]]="3"
 26 | ```
 27 | 
 28 | # rlR: play Atari games
 29 | 
 30 | ## Convolutional Neural Network Structure
 31 | 
 32 | ## Atari Environment
 33 | For Atari Games, it makes more since to stack several recent frames since the agent need to know what is happening and with only one frame it is hard to judge the current situation. So we have the `observ_stack_len` parameter. 
 34 | 
 35 | ```{r}
 36 | library(rlR)
 37 | env = makeGymEnv("Seaquest-v0", observ_stack_len = 4L, state_preprocess = list(fun = rlR:::subsample))
 38 | ```
 39 | Since the input state space is RGB image, we would like to down sample the state space by the following function
 40 | ```{r}
 41 | rlR:::subsample
 42 | ```
 43 | 
 44 | ```{r}
 45 | env$overview()
 46 | ```
 47 | 
 48 | ```{r eval=FALSE}
 49 | env$snapshot(preprocess = T)
 50 | env$snapshot(steps = 500, preprocess = F)
 51 | ```
 52 | 
 53 | ```{r}
 54 | conf = getDefaultConf("AgentFDQN")
 55 | ```
 56 | 
 57 | The rlR package has been optimized to handle replay memory in a very efficient way, to ensure performance,
 58 | you could also use the following parameters which has a bigger replay memory.
 59 | 
 60 | ```{r}
 61 | conf$set(replay.batchsize = 32, 
 62 |   replay.freq = 1L, 
 63 |   console = TRUE, 
 64 |   agent.lr.decay = 1, 
 65 |   agent.lr = 0.00025, 
 66 |   agent.update.target.freq = 1e4,
 67 |   replay.memname = "Png", 
 68 |   render = F, 
 69 |   policy.minEpsilon = 0.1, 
 70 |   agent.start.learn = 5e4L, 
 71 |   policy.aneal.steps = 1e6,
 72 |   replay.mem.size = 1e6, 
 73 |   log = FALSE, 
 74 |   agent.clip.td = TRUE, 
 75 |   policy.decay.type = "decay_linear")
 76 | ```
 77 | 
 78 | 
 79 | ```{r}
 80 | makeCnnCritic = function(state_dim, act_cnt) {
 81 |   require("keras")
 82 |   text = paste("model <- keras_model_sequential();",
 83 |   'model %>%',
 84 |   ' layer_conv_2d(filter = 16, kernel_size = c(8,8), strides = c(4, 4), 
 85 |   padding = "same", input_shape = state_dim) %>%',
 86 |     'layer_activation("relu") %>%',
 87 |     'layer_conv_2d(filter = 32, kernel_size = c(4,4), strides = c(2, 2)) %>%',
 88 |     'layer_activation("relu") %>%',
 89 |     'layer_flatten() %>%',
 90 |     'layer_dense(256) %>%',
 91 |     'layer_activation("relu") %>%',
 92 |     'layer_dense(act_cnt) %>%',
 93 |     'layer_activation("linear");',
 94 |     'opt <- optimizer_rmsprop(lr = 0.00025);',
 95 |     'model %>% compile(loss = "mse", optimizer = opt, metrics = "accuracy")')
 96 |   model = eval(parse(text = text))
 97 |   return(model)
 98 | }
 99 | ```
100 | 
101 | ```{r}
102 | agent = initAgent("AgentFDQN", env, conf, custom_brain = TRUE)
103 | agent$customizeBrain(list(value_fun = makeCnnCritic))
104 | ```
105 | 
106 | ```{r}
107 | agent$learn(1L)
108 | ```
109 | 


--------------------------------------------------------------------------------
/attr/repeated_experiment.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Repeated Experiment"
 3 | output:
 4 |   html_document:
 5 |     toc: true
 6 |     toc_float:
 7 |       collapsed: true
 8 |       smooth_scroll: false
 9 |     dev: svg
10 | vignette: >
11 |   %\VignetteIndexEntry{Repeated Experiment}
12 |   %\VignetteEngine{knitr::rmarkdown}
13 |   %\VignetteEncoding{UTF-8}
14 | ---
15 | 
16 | ```{r setup, include = FALSE, cache = FALSE}
17 | library(rlR)
18 | set.seed(123)
19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5)
20 | knitr::knit_hooks$set(document = function(x){
21 |   gsub("```\n*```r*\n*", "", x)
22 | })
23 | library(reticulate)
24 | os = import("os")
25 | os$environ[["TF_CPP_MIN_LOG_LEVEL"]]="3"
26 | ```
27 | 
28 | # rlR: repeated experiment
29 | 
30 | It make sense to repeatedly  evaluate  how an algorithm works for a particular scenario or environment. rlR provides the function `rlR::repExperiment` to serve this need. One could also use different cores to execute seperate experiment.
31 | 
32 | 
33 | ```{r}
34 | library(doMC)
35 | registerDoMC(5)
36 | # or
37 | library(doParallel)
38 | cl <- makeCluster(5)
39 | registerDoParallel(cl)
40 | res = repExperiment(sname = "CartPole-v0", aname = "AgentDQN", conf = getDefaultConf("AgentDQN"), nrep = 5, nepi = 5)
41 | ```
42 | 


--------------------------------------------------------------------------------
/benchmark/bt_algorithms.R:
--------------------------------------------------------------------------------
 1 | nn4mountainCar = function(name, env) {
 2 |   model = keras_model_sequential()
 3 |   model %>% layer_dense(units = 10, activation = 'relu', input_shape = c(2)) %>%
 4 |     layer_dropout(rate = 0.25) %>%
 5 |     layer_dense(units = 3, activation = 'linear');model$compile(loss = 'mse', optimizer = optimizer_rmsprop(lr = 9e-4))
 6 |   model
 7 | }
 8 | 
 9 | # instance is the return for problem
10 | rl_algo_dqn = function(data, job, instance) {
11 |   env = makeGymEnv(name = instance)
12 |   agent = initAgent("AgentDQN", env = env)
13 |   if (instance == "MountainCar-v0") {
14 |     model = nn4mountainCar()
15 |     agent$customizeBrain(model)
16 |     agent$updatePara(console = TRUE, render = TRUE,  log = TRUE, policy.maxEpsilon = 0.15, policy.minEpsilon = 0.05, policy.decay = exp(-0.001), replay.batchsize = 10, replay.epochs = 4, agent.lr_decay = exp(-0.001), agent.gamma = 0.95)
17 |   }
18 |     perf = agent$learn(data$iteration)
19 |     return(perf = perf)  # key for table join
20 | }
21 | 
22 | rl_algo_ddqn = function(data, job, instance) {
23 |   env = makeGymEnv(name = instance)
24 |   agent = initAgent("AgentDDQN", env = env)
25 |   perf = agent$learn(data$iteration)
26 |   return(perf = perf)  # key for table join
27 | }
28 | 
29 | rl_algo_fdqn = function(data, job, instance) {
30 |   env = makeGymEnv(name = instance)
31 |   agent = initAgent("AgentFDQN", env = env)
32 |   perf = agent$learn(data$iteration)
33 |   return(perf = perf)  # key for table join
34 | }
35 | 
36 | rl_algo_pg = function(data, job, instance) {
37 |   env = makeGymEnv(name = instance)
38 |   agent = initAgent("AgentPG", env = env)
39 |   perf = agent$learn(data$iteration)
40 |   return(perf = perf)  # key for table join
41 | }
42 | 
43 | rl_algo_pgb = function(data, job, instance) {
44 |   env = makeGymEnv(name = instance)
45 |   agent = initAgent("AgentPGBaseline", env = env)
46 |   perf = agent$learn(data$iteration)
47 |   return(perf = perf)  # key for table join
48 | }
49 | 
50 | rl_algo_pgac = function(data, job, instance) {
51 |   env = makeGymEnv(name = instance)
52 |   agent = initAgent("AgentActorCritic", env = env)
53 |   perf = agent$learn(data$iteration)
54 |   return(perf = perf)  # key for table join
55 | }
56 | 


--------------------------------------------------------------------------------
/benchmark/bt_conf.R:
--------------------------------------------------------------------------------
 1 | # Configuration for benchmarking with batchtools: only one global conf variable
 2 | gbtconf = list()
 3 | 
 4 | ## Dependencies
 5 | gbtconf$preSource = c("bt_algorithms.R", "bt_conf.R", "bt_problem.R")
 6 | gbtconf$prePackage = c("batchtools", "checkmate", "data.table", "R6", "reticulate", "keras", "logging", "BBmisc", "openssl", "ggplot2", "reshape2", "rlR")
 7 | #gbtconf$prePackage = c("aslib")
 8 | ## EVALUATION
 9 | gbtconf$SEED_REGISTRY     = 1273L                                 # global seed for reg
10 | gbtconf$SEED_ADDPROBLEM   = 1L                                    # seed for each problem
11 | gbtconf$REPLS = 1L
12 | 
13 | gbtconf$agent.name = c("AgentDQN", "AgentFDQN", "AgentDDQN", "AgentPG", "AgentPGBaseline", "AgentActorCritic")
14 | gbtconf$replay = c("ReplayMemUniform", "ReplayMemLatest", "ReplayMemPrioritizedRank")
15 | gbtconf$policy = c("PolicyEpsilonGreedy", "PolicyProbEpsilon")
16 | 
17 | ## Experiment
18 | gbtconf$REG_FILE_DIR = "bt_reg_new"
19 | gbtconf$ALGO_RUN = c("rl_algo_dqn", "rl_algo_ddqn", "rl_algo_fdqn", "rl_algo_pg", "rl_algo_pgb", "rl_algo_pgac")
20 | gbtconf$PROB_RUN = c("rl_prob")
21 | gbtconf$PROB_LIST = list()
22 | #gbtconf$PROB_LIST[["rl_prob"]] = list(fun = "rl_prob", prob.data = c("MountainCar-v0", "CartPole-v0", "Amidar-ram-v0", "WizardOfWor-ram-v0", "Asteroids-ram-v0", "KungFuMaster-ram-v0", "JourneyEscape-ram-v0", "Acrobot-v1")
23 | #gbtconf$PROB_LIST[["rl_prob"]] = list(fun = "rl_prob", prob.data = c("Pong-ram-v0", "CartPole-v0", "Acrobot-v1")
24 | gbtconf$PROB_LIST[["rl_prob"]] = list(fun = "rl_prob", prob.data = c("Pong-ram-v0")
25 | )
26 | gbtconf$iteration = 1000L
27 | 


--------------------------------------------------------------------------------
/benchmark/bt_experiment.R:
--------------------------------------------------------------------------------
 1 | # addProblem, addAlgorithm, addExperiments(algo.design = ades, repls = REPLS)
 2 | source("bt_conf.R")
 3 | pp = readline("Are you really sure to delete the registry and restart? Y OR N")
 4 | if (pp == "Y") unlink(gbtconf$REG_FILE_DIR, recursive = TRUE, force = TRUE)
 5 | reg = batchtools::makeExperimentRegistry(file.dir = gbtconf$REG_FILE_DIR,
 6 |   source = c(gbtconf$preSource),
 7 |   packages = gbtconf$prePackage,
 8 |   seed = gbtconf$SEED_REGISTRY)
 9 | 
10 | 
11 | 
12 | lapply(gbtconf$prePackage, require, character.only = TRUE)
13 | lapply(gbtconf$preSource, source)
14 | 
15 | # Cartesian product
16 | #des = expand.grid(lrn.cl = c("1", "2"), ft.extract.method = c("A", "B"), stringsAsFactors = FALSE)
17 | 
18 | pdes = list()
19 | for (prob in gbtconf$PROB_RUN) {
20 |   addProblem(name = prob, data = list(iteration = gbtconf$iteration), fun = get(gbtconf$PROB_LIST[[prob]]$fun), seed = gbtconf$SEED_ADDPROBLEM)
21 |   pdes[[prob]] = data.frame(s.name = gbtconf$PROB_LIST[[prob]]$prob.data, stringsAsFactors = FALSE)
22 | }
23 | 
24 | gbtconf$ALGO_LIST = list()
25 | 
26 | 
27 | #gbtconf$ALGO_LIST$rl_algo_dqn = list(fun = rl_algo, design = data.frame(agent.name = gbtconf$agent.name, stringsAsFactors = FALSE))
28 | gbtconf$ALGO_LIST$rl_algo_dqn = list(fun = rl_algo_dqn)
29 | gbtconf$ALGO_LIST$rl_algo_fdqn = list(fun = rl_algo_fdqn)
30 | gbtconf$ALGO_LIST$rl_algo_ddqn = list(fun = rl_algo_ddqn)
31 | gbtconf$ALGO_LIST$rl_algo_pg = list(fun = rl_algo_pg)
32 | gbtconf$ALGO_LIST$rl_algo_pgb = list(fun = rl_algo_pgb)
33 | gbtconf$ALGO_LIST$rl_algo_pgac = list(fun = rl_algo_pgac)
34 | 
35 | 
36 | ades = list()
37 | 
38 | for (algo in gbtconf$ALGO_RUN) {
39 |   addAlgorithm(name = algo, fun = gbtconf$ALGO_LIST[[algo]]$fun)
40 |   # ades[[algo]] = gbtconf$ALGO_LIST[[algo]]$design
41 | }
42 | addExperiments(prob.design = pdes, algo.design = NULL, repls = gbtconf$REPLS)
43 | unwrap(getJobPars())
44 | 


--------------------------------------------------------------------------------
/benchmark/bt_problem.R:
--------------------------------------------------------------------------------
1 | # create configuration object
2 | rl_prob = function(data, job, s.name) {
3 |   return(s.name)
4 | }
5 | 


--------------------------------------------------------------------------------
/benchmark/plotHelper.R:
--------------------------------------------------------------------------------
 1 | library(ggplot2)
 2 | 
 3 | 
 4 | 
 5 | # Multiple plot function
 6 | #
 7 | # ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
 8 | # - cols:   Number of columns in layout
 9 | # - layout: A matrix specifying the layout. If present, 'cols' is ignored.
10 | #
11 | # If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
12 | # then plot 1 will go in the upper left, 2 will go in the upper right, and
13 | # 3 will go all the way across the bottom.
14 | #
15 | multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
16 |   library(grid)
17 | 
18 |   # Make a list from the ... arguments and plotlist
19 |   plots <- c(list(...), plotlist)
20 | 
21 |   numPlots = length(plots)
22 | 
23 |   # If layout is NULL, then use 'cols' to determine layout
24 |   if (is.null(layout)) {
25 |     # Make the panel
26 |     # ncol: Number of columns of plots
27 |     # nrow: Number of rows needed, calculated from # of cols
28 |     layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
29 |                     ncol = cols, nrow = ceiling(numPlots/cols))
30 |   }
31 | 
32 |  if (numPlots==1) {
33 |     print(plots[[1]])
34 | 
35 |   } else {
36 |     # Set up the page
37 |     grid.newpage()
38 |     pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
39 | 
40 |     # Make each plot, in the correct location
41 |     for (i in 1:numPlots) {
42 |       # Get the i,j matrix positions of the regions that contain this subplot
43 |       matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
44 | 
45 |       print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
46 |                                       layout.pos.col = matchidx$col))
47 |     }
48 |   }
49 | }
50 | 
51 | 
52 | 
53 | 
54 | # This example uses the ChickWeight dataset, which comes with ggplot2
55 | # First plot
56 | # p1 <- ggplot(ChickWeight, aes(x=Time, y=weight, colour=Diet, group=Chick)) +
57 | #     geom_line() +
58 | #     ggtitle("Growth curve for individual chicks")
59 | # 
60 | # Second plot
61 | # p2 <- ggplot(ChickWeight, aes(x=Time, y=weight, colour=Diet)) +
62 | #     geom_point(alpha=.3) +
63 | #     geom_smooth(alpha=.2, size=1) +
64 | #     ggtitle("Fitted growth curve per diet")
65 | # 
66 | # Third plot
67 | # p3 <- ggplot(subset(ChickWeight, Time==21), aes(x=weight, colour=Diet)) +
68 | #     geom_density() +
69 | #     ggtitle("Final weight, by diet")
70 | # 
71 | # Fourth plot
72 | # p4 <- ggplot(subset(ChickWeight, Time==21), aes(x=weight, fill=Diet)) +
73 | #     geom_histogram(colour="black", binwidth=50) +
74 | #     facet_grid(Diet ~ .) +
75 | #     ggtitle("Final weight, by diet") +
76 | #     theme(legend.position="none")        # No legend (redundant in this graph)  
77 | # 
78 | # multiplot(p1, p2, p3, p4, cols = 2)
79 | #> `geom_smooth()` using method = 'loess'
80 | 


--------------------------------------------------------------------------------
/benchmark/rl_h.R:
--------------------------------------------------------------------------------
 1 | # this file is temporary before a package is made
 2 | library(checkmate)
 3 | library(data.table)
 4 | library(R6)
 5 | library(reticulate)
 6 | library(keras)
 7 | library(logging)
 8 | library(BBmisc)
 9 | library(openssl)
10 | library(ggplot2)
11 | library(reshape2)
12 | library(formattable)
13 | list.libs.imports = c("checkmate", "data.table", "R6", "reticulate", "keras", "logging", "BBmisc", "openssl", "ggplot2", "reshape2", "formattable")
14 | list.libs.suggest = c("checkmate", "data.table", "BBmisc", "openssl", "ggplot2", "reshape2", "formattable")
15 | lapply(list.libs.imports, function(x) devtools::use_package(x))
16 | lapply(list.libs.imports, function(x) devtools::use_package(x, "Suggest"))
17 | set.seed(1L)
18 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
1 | comment: false
2 | codecov:
3 |   token: 06465217-2436-4008-85f9-9a56a3c6c785
4 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
 1 | ## Test environments
 2 | * local OS X install, R 3.4.4
 3 | * ubuntu 12.04 (on travis-ci), R 3.4.4
 4 | * win-builder (devel and release)
 5 | 
 6 | ## R CMD check results
 7 | 
 8 | 0 errors | 0 warnings | 1 note
 9 | 
10 | * This is a new release.
11 | 
12 | ## Reverse dependencies
13 | 
14 | This is a new release, so there are no reverse dependencies.
15 | 
16 | ---
17 | 
18 | * I have run R CMD check on the NUMBER downstream dependencies.
19 |   (Summary at ...). 
20 |   
21 | * FAILURE SUMMARY
22 | 
23 | * All revdep maintainers were notified of the release on RELEASE DATE.
24 | 


--------------------------------------------------------------------------------
/cran_check.sh:
--------------------------------------------------------------------------------
1 | R CMD build .
2 | R CMD check --as-cran rlR_0.1.0.tar.gz
3 | 


--------------------------------------------------------------------------------
/docs/LICENSE-text.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>License • rlR</title>
 10 | 
 11 | <!-- jquery -->
 12 | <script src="https://code.jquery.com/jquery-3.1.0.min.js" integrity="sha384-nrOSfDHtoPMzJHjVTdCopGqIqeYETSXhZDFyniQ8ZHcVy08QesyHcnOUpMpqnmWq" crossorigin="anonymous"></script>
 13 | <!-- Bootstrap -->
 14 | <link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
 15 | 
 16 | <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
 17 | 
 18 | <!-- Font Awesome icons -->
 19 | <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" integrity="sha384-T8Gy5hrqNKT+hzMclPo118YTQO6cYprQmhrYwIiQ/3axmI1hQomh7Ud2hPOy8SP1" crossorigin="anonymous">
 20 | 
 21 | <!-- clipboard.js -->
 22 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/1.7.1/clipboard.min.js" integrity="sha384-cV+rhyOuRHc9Ub/91rihWcGmMmCXDeksTtCihMupQHSsi8GIIRDG0ThDc3HGQFJ3" crossorigin="anonymous"></script>
 23 | 
 24 | <!-- sticky kit -->
 25 | <script src="https://cdnjs.cloudflare.com/ajax/libs/sticky-kit/1.1.3/sticky-kit.min.js" integrity="sha256-c4Rlo1ZozqTPE2RLuvbusY3+SU1pQaJC0TjuhygMipw=" crossorigin="anonymous"></script>
 26 | 
 27 | <!-- pkgdown -->
 28 | <link href="pkgdown.css" rel="stylesheet">
 29 | <script src="pkgdown.js"></script>
 30 | 
 31 | 
 32 | 
 33 | <meta property="og:title" content="License" />
 34 | 
 35 | 
 36 | 
 37 | <!-- mathjax -->
 38 | <script src='https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'></script>
 39 | 
 40 | <!--[if lt IE 9]>
 41 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 42 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 43 | <![endif]-->
 44 | 
 45 | 
 46 |   </head>
 47 | 
 48 |   <body>
 49 |     <div class="container template-title-body">
 50 |       <header>
 51 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 52 |   <div class="container">
 53 |     <div class="navbar-header">
 54 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 55 |         <span class="sr-only">Toggle navigation</span>
 56 |         <span class="icon-bar"></span>
 57 |         <span class="icon-bar"></span>
 58 |         <span class="icon-bar"></span>
 59 |       </button>
 60 |       <span class="navbar-brand">
 61 |         <a class="navbar-link" href="index.html">rlR</a>
 62 |         <span class="label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.0</span>
 63 |       </span>
 64 |     </div>
 65 | 
 66 |     <div id="navbar" class="navbar-collapse collapse">
 67 |       <ul class="nav navbar-nav">
 68 |         <li class="dropdown">
 69 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
 70 |     <span class="fa fa-file-text-o"></span>
 71 |      
 72 |     Topics
 73 |      
 74 |     <span class="caret"></span>
 75 |   </a>
 76 |   <ul class="dropdown-menu" role="menu">
 77 |     <li>
 78 |       <a href="articles/define_custom_environments.html">Specify Custom Environment</a>
 79 |     </li>
 80 |     <li>
 81 |       <a href="articles/repeated_experiment.html">Repeated Experiment</a>
 82 |     </li>
 83 |     <li>
 84 |       <a href="articles/customized_brain_mountainCar.html">Customize Neural Network Functional Approximator</a>
 85 |     </li>
 86 |     <li>
 87 |       <a href="articles/play_atari_games.html">Play Atari Games</a>
 88 |     </li>
 89 |     <li>
 90 |       <a href="articles/table_learning.html">Tabular Learning</a>
 91 |     </li>
 92 |   </ul>
 93 | </li>
 94 | <li>
 95 |   <a href="reference/index.html">
 96 |     <span class="fa fa-book"></span>
 97 |      
 98 |     Reference
 99 |   </a>
100 | </li>
101 |       </ul>
102 |       
103 |       <ul class="nav navbar-nav navbar-right">
104 |         <li>
105 |   <a href="https://github.com/smilesun/rlR">
106 |     <span class="fa fa-github fa-lg"></span>
107 |      
108 |   </a>
109 | </li>
110 |       </ul>
111 |       
112 |     </div><!--/.nav-collapse -->
113 |   </div><!--/.container -->
114 | </div><!--/.navbar -->
115 | 
116 |       
117 |       </header>
118 | 
119 | <div class="row">
120 |   <div class="contents col-md-9">
121 |     <div class="page-header">
122 |       <h1>License</h1>
123 |     </div>
124 | 
125 | <pre>YEAR: 2018
126 | COPYRIGHT HOLDER: Xudong Sun
127 | </pre>
128 | 
129 |   </div>
130 | 
131 | </div>
132 | 
133 | 
134 |       <footer>
135 |       <div class="copyright">
136 |   <p>Developed by Xudong Sun.</p>
137 | </div>
138 | 
139 | <div class="pkgdown">
140 |   <p>Site built with <a href="http://pkgdown.r-lib.org/">pkgdown</a>.</p>
141 | </div>
142 | 
143 |       </footer>
144 |    </div>
145 | 
146 |   
147 | 
148 |   </body>
149 | </html>
150 | 
151 | 


--------------------------------------------------------------------------------
/docs/articles/index.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>Articles • rlR</title>
 10 | 
 11 | <!-- jquery -->
 12 | <script src="https://code.jquery.com/jquery-3.1.0.min.js" integrity="sha384-nrOSfDHtoPMzJHjVTdCopGqIqeYETSXhZDFyniQ8ZHcVy08QesyHcnOUpMpqnmWq" crossorigin="anonymous"></script>
 13 | <!-- Bootstrap -->
 14 | <link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
 15 | 
 16 | <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
 17 | 
 18 | <!-- Font Awesome icons -->
 19 | <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" integrity="sha384-T8Gy5hrqNKT+hzMclPo118YTQO6cYprQmhrYwIiQ/3axmI1hQomh7Ud2hPOy8SP1" crossorigin="anonymous">
 20 | 
 21 | <!-- clipboard.js -->
 22 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/1.7.1/clipboard.min.js" integrity="sha384-cV+rhyOuRHc9Ub/91rihWcGmMmCXDeksTtCihMupQHSsi8GIIRDG0ThDc3HGQFJ3" crossorigin="anonymous"></script>
 23 | 
 24 | <!-- sticky kit -->
 25 | <script src="https://cdnjs.cloudflare.com/ajax/libs/sticky-kit/1.1.3/sticky-kit.min.js" integrity="sha256-c4Rlo1ZozqTPE2RLuvbusY3+SU1pQaJC0TjuhygMipw=" crossorigin="anonymous"></script>
 26 | 
 27 | <!-- pkgdown -->
 28 | <link href="../pkgdown.css" rel="stylesheet">
 29 | <script src="../pkgdown.js"></script>
 30 | 
 31 | 
 32 | 
 33 | <meta property="og:title" content="Articles" />
 34 | 
 35 | 
 36 | 
 37 | <!-- mathjax -->
 38 | <script src='https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'></script>
 39 | 
 40 | <!--[if lt IE 9]>
 41 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 42 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 43 | <![endif]-->
 44 | 
 45 | 
 46 |   </head>
 47 | 
 48 |   <body>
 49 |     <div class="container template-article-index">
 50 |       <header>
 51 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 52 |   <div class="container">
 53 |     <div class="navbar-header">
 54 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 55 |         <span class="sr-only">Toggle navigation</span>
 56 |         <span class="icon-bar"></span>
 57 |         <span class="icon-bar"></span>
 58 |         <span class="icon-bar"></span>
 59 |       </button>
 60 |       <span class="navbar-brand">
 61 |         <a class="navbar-link" href="../index.html">rlR</a>
 62 |         <span class="label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.0</span>
 63 |       </span>
 64 |     </div>
 65 | 
 66 |     <div id="navbar" class="navbar-collapse collapse">
 67 |       <ul class="nav navbar-nav">
 68 |         <li class="dropdown">
 69 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
 70 |     <span class="fa fa-file-text-o"></span>
 71 |      
 72 |     Topics
 73 |      
 74 |     <span class="caret"></span>
 75 |   </a>
 76 |   <ul class="dropdown-menu" role="menu">
 77 |     <li>
 78 |       <a href="../articles/define_custom_environments.html">Specify Custom Environment</a>
 79 |     </li>
 80 |     <li>
 81 |       <a href="../articles/repeated_experiment.html">Repeated Experiment</a>
 82 |     </li>
 83 |     <li>
 84 |       <a href="../articles/customized_brain_mountainCar.html">Customize Neural Network Functional Approximator</a>
 85 |     </li>
 86 |     <li>
 87 |       <a href="../articles/play_atari_games.html">Play Atari Games</a>
 88 |     </li>
 89 |     <li>
 90 |       <a href="../articles/table_learning.html">Tabular Learning</a>
 91 |     </li>
 92 |   </ul>
 93 | </li>
 94 | <li>
 95 |   <a href="../reference/index.html">
 96 |     <span class="fa fa-book"></span>
 97 |      
 98 |     Reference
 99 |   </a>
100 | </li>
101 |       </ul>
102 |       
103 |       <ul class="nav navbar-nav navbar-right">
104 |         <li>
105 |   <a href="https://github.com/smilesun/rlR">
106 |     <span class="fa fa-github fa-lg"></span>
107 |      
108 |   </a>
109 | </li>
110 |       </ul>
111 |       
112 |     </div><!--/.nav-collapse -->
113 |   </div><!--/.container -->
114 | </div><!--/.navbar -->
115 | 
116 |       
117 |       </header>
118 | 
119 | <div class="row">
120 |   <div class="col-md-9 contents">
121 |     <div class="page-header">
122 |       <h1>Articles</h1>
123 |     </div>
124 | 
125 |     <div class="section ">
126 |       <h3>All vignettes</h3>
127 |       <p class="section-desc"></p>
128 | 
129 |       <ul>
130 |         <li><a href="custom_configuration.html">Custom Configuration</a></li>
131 |         <li><a href="define_custom_environments.html">Custom Learning Environment</a></li>
132 |         <li><a href="python_dependencies.html">Python Dependency</a></li>
133 |         <li><a href="repeated_experiment.html">Repeated Experiment</a></li>
134 |         <li><a href="table_learning.html">Table Learning on Toy-text</a></li>
135 |       </ul>
136 |     </div>
137 |   </div>
138 | </div>
139 | 
140 |       <footer>
141 |       <div class="copyright">
142 |   <p>Developed by Xudong Sun.</p>
143 | </div>
144 | 
145 | <div class="pkgdown">
146 |   <p>Site built with <a href="http://pkgdown.r-lib.org/">pkgdown</a>.</p>
147 | </div>
148 | 
149 |       </footer>
150 |    </div>
151 | 
152 |   
153 | 
154 |   </body>
155 | </html>
156 | 
157 | 


--------------------------------------------------------------------------------
/docs/authors.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>Authors • rlR</title>
 10 | 
 11 | <!-- jquery -->
 12 | <script src="https://code.jquery.com/jquery-3.1.0.min.js" integrity="sha384-nrOSfDHtoPMzJHjVTdCopGqIqeYETSXhZDFyniQ8ZHcVy08QesyHcnOUpMpqnmWq" crossorigin="anonymous"></script>
 13 | <!-- Bootstrap -->
 14 | <link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
 15 | 
 16 | <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
 17 | 
 18 | <!-- Font Awesome icons -->
 19 | <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" integrity="sha384-T8Gy5hrqNKT+hzMclPo118YTQO6cYprQmhrYwIiQ/3axmI1hQomh7Ud2hPOy8SP1" crossorigin="anonymous">
 20 | 
 21 | <!-- clipboard.js -->
 22 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/1.7.1/clipboard.min.js" integrity="sha384-cV+rhyOuRHc9Ub/91rihWcGmMmCXDeksTtCihMupQHSsi8GIIRDG0ThDc3HGQFJ3" crossorigin="anonymous"></script>
 23 | 
 24 | <!-- sticky kit -->
 25 | <script src="https://cdnjs.cloudflare.com/ajax/libs/sticky-kit/1.1.3/sticky-kit.min.js" integrity="sha256-c4Rlo1ZozqTPE2RLuvbusY3+SU1pQaJC0TjuhygMipw=" crossorigin="anonymous"></script>
 26 | 
 27 | <!-- pkgdown -->
 28 | <link href="pkgdown.css" rel="stylesheet">
 29 | <script src="pkgdown.js"></script>
 30 | 
 31 | 
 32 | 
 33 | <meta property="og:title" content="Authors" />
 34 | 
 35 | 
 36 | 
 37 | <!-- mathjax -->
 38 | <script src='https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'></script>
 39 | 
 40 | <!--[if lt IE 9]>
 41 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 42 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 43 | <![endif]-->
 44 | 
 45 | 
 46 |   </head>
 47 | 
 48 |   <body>
 49 |     <div class="container template-authors">
 50 |       <header>
 51 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 52 |   <div class="container">
 53 |     <div class="navbar-header">
 54 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 55 |         <span class="sr-only">Toggle navigation</span>
 56 |         <span class="icon-bar"></span>
 57 |         <span class="icon-bar"></span>
 58 |         <span class="icon-bar"></span>
 59 |       </button>
 60 |       <span class="navbar-brand">
 61 |         <a class="navbar-link" href="index.html">rlR</a>
 62 |         <span class="label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.0</span>
 63 |       </span>
 64 |     </div>
 65 | 
 66 |     <div id="navbar" class="navbar-collapse collapse">
 67 |       <ul class="nav navbar-nav">
 68 |         <li class="dropdown">
 69 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
 70 |     <span class="fa fa-file-text-o"></span>
 71 |      
 72 |     Topics
 73 |      
 74 |     <span class="caret"></span>
 75 |   </a>
 76 |   <ul class="dropdown-menu" role="menu">
 77 |     <li>
 78 |       <a href="articles/define_custom_environments.html">Specify Custom Environment</a>
 79 |     </li>
 80 |     <li>
 81 |       <a href="articles/repeated_experiment.html">Repeated Experiment</a>
 82 |     </li>
 83 |     <li>
 84 |       <a href="articles/customized_brain_mountainCar.html">Customize Neural Network Functional Approximator</a>
 85 |     </li>
 86 |     <li>
 87 |       <a href="articles/play_atari_games.html">Play Atari Games</a>
 88 |     </li>
 89 |     <li>
 90 |       <a href="articles/table_learning.html">Tabular Learning</a>
 91 |     </li>
 92 |   </ul>
 93 | </li>
 94 | <li>
 95 |   <a href="reference/index.html">
 96 |     <span class="fa fa-book"></span>
 97 |      
 98 |     Reference
 99 |   </a>
100 | </li>
101 |       </ul>
102 |       
103 |       <ul class="nav navbar-nav navbar-right">
104 |         <li>
105 |   <a href="https://github.com/smilesun/rlR">
106 |     <span class="fa fa-github fa-lg"></span>
107 |      
108 |   </a>
109 | </li>
110 |       </ul>
111 |       
112 |     </div><!--/.nav-collapse -->
113 |   </div><!--/.container -->
114 | </div><!--/.navbar -->
115 | 
116 |       
117 |       </header>
118 | 
119 | <div class="row">
120 |   <div class="contents col-md-9">
121 |     <div class="page-header">
122 |       <h1>Authors</h1>
123 |     </div>
124 | 
125 |     <ul class="list-unstyled">
126 |       <li>
127 |         <p><strong>Xudong Sun</strong>. Author, maintainer. 
128 |         </p>
129 |       </li>
130 |       <li>
131 |         <p><strong>Sebastian Gruber</strong>. Contributor. 
132 |         </p>
133 |       </li>
134 |     </ul>
135 | 
136 |   </div>
137 | 
138 | </div>
139 | 
140 | 
141 |       <footer>
142 |       <div class="copyright">
143 |   <p>Developed by Xudong Sun.</p>
144 | </div>
145 | 
146 | <div class="pkgdown">
147 |   <p>Site built with <a href="http://pkgdown.r-lib.org/">pkgdown</a>.</p>
148 | </div>
149 | 
150 |       </footer>
151 |    </div>
152 | 
153 |   
154 | 
155 |   </body>
156 | </html>
157 | 
158 | 


--------------------------------------------------------------------------------
/docs/docsearch.js:
--------------------------------------------------------------------------------
 1 | $(function() {
 2 | 
 3 |   // register a handler to move the focus to the search bar
 4 |   // upon pressing shift + "/" (i.e. "?")
 5 |   $(document).on('keydown', function(e) {
 6 |     if (e.shiftKey && e.keyCode == 191) {
 7 |       e.preventDefault();
 8 |       $("#search-input").focus();
 9 |     }
10 |   });
11 | 
12 |   $(document).ready(function() {
13 |     // do keyword highlighting
14 |     /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */
15 |     var mark = function() {
16 | 
17 |       var referrer = document.URL ;
18 |       var paramKey = "q" ;
19 | 
20 |       if (referrer.indexOf("?") !== -1) {
21 |         var qs = referrer.substr(referrer.indexOf('?') + 1);
22 |         var qs_noanchor = qs.split('#')[0];
23 |         var qsa = qs_noanchor.split('&');
24 |         var keyword = "";
25 | 
26 |         for (var i = 0; i < qsa.length; i++) {
27 |           var currentParam = qsa[i].split('=');
28 | 
29 |           if (currentParam.length !== 2) {
30 |             continue;
31 |           }
32 | 
33 |           if (currentParam[0] == paramKey) {
34 |             keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20"));
35 |           }
36 |         }
37 | 
38 |         if (keyword !== "") {
39 |           $(".contents").unmark({
40 |             done: function() {
41 |               $(".contents").mark(keyword);
42 |             }
43 |           });
44 |         }
45 |       }
46 |     };
47 | 
48 |     mark();
49 |   });
50 | });
51 | 
52 | /* Search term highlighting ------------------------------*/
53 | 
54 | function matchedWords(hit) {
55 |   var words = [];
56 | 
57 |   var hierarchy = hit._highlightResult.hierarchy;
58 |   // loop to fetch from lvl0, lvl1, etc.
59 |   for (var idx in hierarchy) {
60 |     words = words.concat(hierarchy[idx].matchedWords);
61 |   }
62 | 
63 |   var content = hit._highlightResult.content;
64 |   if (content) {
65 |     words = words.concat(content.matchedWords);
66 |   }
67 | 
68 |   // return unique words
69 |   var words_uniq = [...new Set(words)];
70 |   return words_uniq;
71 | }
72 | 
73 | function updateHitURL(hit) {
74 | 
75 |   var words = matchedWords(hit);
76 |   var url = "";
77 | 
78 |   if (hit.anchor) {
79 |     url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor;
80 |   } else {
81 |     url = hit.url + '?q=' + escape(words.join(" "));
82 |   }
83 | 
84 |   return url;
85 | }
86 | 


--------------------------------------------------------------------------------
/docs/link.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <!-- Generator: Adobe Illustrator 19.2.1, SVG Export Plug-In . SVG Version: 6.00 Build 0)  -->
 3 | <svg version="1.1" id="Layer_1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0px" y="0px"
 4 | 	 viewBox="0 0 20 20" style="enable-background:new 0 0 20 20;" xml:space="preserve">
 5 | <style type="text/css">
 6 | 	.st0{fill:#75AADB;}
 7 | </style>
 8 | <path class="st0" d="M4,11.3h1.3v1.3H4c-2,0-4-2.3-4-4.7s2.1-4.7,4-4.7h5.3c1.9,0,4,2.3,4,4.7c0,1.9-1.2,3.6-2.7,4.3v-1.5
 9 | 	C11.4,10.2,12,9.1,12,8c0-1.7-1.4-3.3-2.7-3.3H4C2.7,4.7,1.3,6.3,1.3,8S2.7,11.3,4,11.3z M16,7.3h-1.3v1.3H16c1.3,0,2.7,1.6,2.7,3.3
10 | 	s-1.4,3.3-2.7,3.3h-5.3C9.4,15.3,8,13.7,8,12c0-1.1,0.6-2.2,1.3-2.8V7.7C7.9,8.4,6.7,10.1,6.7,12c0,2.4,2.1,4.7,4,4.7H16
11 | 	c1.9,0,4-2.3,4-4.7S18,7.3,16,7.3z"/>
12 | </svg>
13 | 


--------------------------------------------------------------------------------
/docs/pkgdown.css:
--------------------------------------------------------------------------------
  1 | /* Sticky footer */
  2 | 
  3 | /**
  4 |  * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/
  5 |  * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css
  6 |  *
  7 |  * .Site -> body > .container
  8 |  * .Site-content -> body > .container .row
  9 |  * .footer -> footer
 10 |  *
 11 |  * Key idea seems to be to ensure that .container and __all its parents__
 12 |  * have height set to 100%
 13 |  *
 14 |  */
 15 | 
 16 | html, body {
 17 |   height: 100%;
 18 | }
 19 | 
 20 | body > .container {
 21 |   display: flex;
 22 |   height: 100%;
 23 |   flex-direction: column;
 24 | 
 25 |   padding-top: 60px;
 26 | }
 27 | 
 28 | body > .container .row {
 29 |   flex: 1 0 auto;
 30 | }
 31 | 
 32 | footer {
 33 |   margin-top: 45px;
 34 |   padding: 35px 0 36px;
 35 |   border-top: 1px solid #e5e5e5;
 36 |   color: #666;
 37 |   display: flex;
 38 |   flex-shrink: 0;
 39 | }
 40 | footer p {
 41 |   margin-bottom: 0;
 42 | }
 43 | footer div {
 44 |   flex: 1;
 45 | }
 46 | footer .pkgdown {
 47 |   text-align: right;
 48 | }
 49 | footer p {
 50 |   margin-bottom: 0;
 51 | }
 52 | 
 53 | img.icon {
 54 |   float: right;
 55 | }
 56 | 
 57 | img {
 58 |   max-width: 100%;
 59 | }
 60 | 
 61 | /* Typographic tweaking ---------------------------------*/
 62 | 
 63 | .contents h1.page-header {
 64 |   margin-top: calc(-60px + 1em);
 65 | }
 66 | 
 67 | /* Section anchors ---------------------------------*/
 68 | 
 69 | a.anchor {
 70 |   margin-left: -30px;
 71 |   display:inline-block;
 72 |   width: 30px;
 73 |   height: 30px;
 74 |   visibility: hidden;
 75 | 
 76 |   background-image: url(./link.svg);
 77 |   background-repeat: no-repeat;
 78 |   background-size: 20px 20px;
 79 |   background-position: center center;
 80 | }
 81 | 
 82 | .hasAnchor:hover a.anchor {
 83 |   visibility: visible;
 84 | }
 85 | 
 86 | @media (max-width: 767px) {
 87 |   .hasAnchor:hover a.anchor {
 88 |     visibility: hidden;
 89 |   }
 90 | }
 91 | 
 92 | 
 93 | /* Fixes for fixed navbar --------------------------*/
 94 | 
 95 | .contents h1, .contents h2, .contents h3, .contents h4 {
 96 |   padding-top: 60px;
 97 |   margin-top: -40px;
 98 | }
 99 | 
100 | /* Static header placement on mobile devices */
101 | @media (max-width: 767px) {
102 |   .navbar-fixed-top {
103 |     position: absolute;
104 |   }
105 |   .navbar {
106 |     padding: 0;
107 |   }
108 | }
109 | 
110 | 
111 | /* Sidebar --------------------------*/
112 | 
113 | #sidebar {
114 |   margin-top: 30px;
115 | }
116 | #sidebar h2 {
117 |   font-size: 1.5em;
118 |   margin-top: 1em;
119 | }
120 | 
121 | #sidebar h2:first-child {
122 |   margin-top: 0;
123 | }
124 | 
125 | #sidebar .list-unstyled li {
126 |   margin-bottom: 0.5em;
127 | }
128 | 
129 | .orcid {
130 |   height: 16px;
131 |   vertical-align: middle;
132 | }
133 | 
134 | /* Reference index & topics ----------------------------------------------- */
135 | 
136 | .ref-index th {font-weight: normal;}
137 | 
138 | .ref-index td {vertical-align: top;}
139 | .ref-index .alias {width: 40%;}
140 | .ref-index .title {width: 60%;}
141 | 
142 | .ref-index .alias {width: 40%;}
143 | .ref-index .title {width: 60%;}
144 | 
145 | .ref-arguments th {text-align: right; padding-right: 10px;}
146 | .ref-arguments th, .ref-arguments td {vertical-align: top;}
147 | .ref-arguments .name {width: 20%;}
148 | .ref-arguments .desc {width: 80%;}
149 | 
150 | /* Nice scrolling for wide elements --------------------------------------- */
151 | 
152 | table {
153 |   display: block;
154 |   overflow: auto;
155 | }
156 | 
157 | /* Syntax highlighting ---------------------------------------------------- */
158 | 
159 | pre {
160 |   word-wrap: normal;
161 |   word-break: normal;
162 |   border: 1px solid #eee;
163 | }
164 | 
165 | pre, code {
166 |   background-color: #f8f8f8;
167 |   color: #333;
168 | }
169 | 
170 | pre code {
171 |   overflow: auto;
172 |   word-wrap: normal;
173 |   white-space: pre;
174 | }
175 | 
176 | pre .img {
177 |   margin: 5px 0;
178 | }
179 | 
180 | pre .img img {
181 |   background-color: #fff;
182 |   display: block;
183 |   height: auto;
184 | }
185 | 
186 | code a, pre a {
187 |   color: #375f84;
188 | }
189 | 
190 | a.sourceLine:hover {
191 |   text-decoration: none;
192 | }
193 | 
194 | .fl      {color: #1514b5;}
195 | .fu      {color: #000000;} /* function */
196 | .ch,.st  {color: #036a07;} /* string */
197 | .kw      {color: #264D66;} /* keyword */
198 | .co      {color: #888888;} /* comment */
199 | 
200 | .message { color: black;   font-weight: bolder;}
201 | .error   { color: orange;  font-weight: bolder;}
202 | .warning { color: #6A0366; font-weight: bolder;}
203 | 
204 | /* Clipboard --------------------------*/
205 | 
206 | .hasCopyButton {
207 |   position: relative;
208 | }
209 | 
210 | .btn-copy-ex {
211 |   position: absolute;
212 |   right: 0;
213 |   top: 0;
214 |   visibility: hidden;
215 | }
216 | 
217 | .hasCopyButton:hover button.btn-copy-ex {
218 |   visibility: visible;
219 | }
220 | 
221 | /* mark.js ----------------------------*/
222 | 
223 | mark {
224 |   background-color: rgba(255, 255, 51, 0.5);
225 |   border-bottom: 2px solid rgba(255, 153, 51, 0.3);
226 |   padding: 1px;
227 | }
228 | 
229 | /* vertical spacing after htmlwidgets */
230 | .html-widget {
231 |   margin-bottom: 10px;
232 | }
233 | 


--------------------------------------------------------------------------------
/docs/pkgdown.js:
--------------------------------------------------------------------------------
  1 | /* http://gregfranko.com/blog/jquery-best-practices/ */
  2 | (function($) {
  3 |   $(function() {
  4 | 
  5 |     $("#sidebar")
  6 |       .stick_in_parent({offset_top: 40})
  7 |       .on('sticky_kit:bottom', function(e) {
  8 |         $(this).parent().css('position', 'static');
  9 |       })
 10 |       .on('sticky_kit:unbottom', function(e) {
 11 |         $(this).parent().css('position', 'relative');
 12 |       });
 13 | 
 14 |     $('body').scrollspy({
 15 |       target: '#sidebar',
 16 |       offset: 60
 17 |     });
 18 | 
 19 |     $('[data-toggle="tooltip"]').tooltip();
 20 | 
 21 |     var cur_path = paths(location.pathname);
 22 |     var links = $("#navbar ul li a");
 23 |     var max_length = -1;
 24 |     var pos = -1;
 25 |     for (var i = 0; i < links.length; i++) {
 26 |       if (links[i].getAttribute("href") === "#")
 27 |         continue;
 28 |       var path = paths(links[i].pathname);
 29 | 
 30 |       var length = prefix_length(cur_path, path);
 31 |       if (length > max_length) {
 32 |         max_length = length;
 33 |         pos = i;
 34 |       }
 35 |     }
 36 | 
 37 |     // Add class to parent <li>, and enclosing <li> if in dropdown
 38 |     if (pos >= 0) {
 39 |       var menu_anchor = $(links[pos]);
 40 |       menu_anchor.parent().addClass("active");
 41 |       menu_anchor.closest("li.dropdown").addClass("active");
 42 |     }
 43 |   });
 44 | 
 45 |   function paths(pathname) {
 46 |     var pieces = pathname.split("/");
 47 |     pieces.shift(); // always starts with /
 48 | 
 49 |     var end = pieces[pieces.length - 1];
 50 |     if (end === "index.html" || end === "")
 51 |       pieces.pop();
 52 |     return(pieces);
 53 |   }
 54 | 
 55 |   function prefix_length(needle, haystack) {
 56 |     if (needle.length > haystack.length)
 57 |       return(0);
 58 | 
 59 |     // Special case for length-0 haystack, since for loop won't run
 60 |     if (haystack.length === 0) {
 61 |       return(needle.length === 0 ? 1 : 0);
 62 |     }
 63 | 
 64 |     for (var i = 0; i < haystack.length; i++) {
 65 |       if (needle[i] != haystack[i])
 66 |         return(i);
 67 |     }
 68 | 
 69 |     return(haystack.length);
 70 |   }
 71 | 
 72 |   /* Clipboard --------------------------*/
 73 | 
 74 |   function changeTooltipMessage(element, msg) {
 75 |     var tooltipOriginalTitle=element.getAttribute('data-original-title');
 76 |     element.setAttribute('data-original-title', msg);
 77 |     $(element).tooltip('show');
 78 |     element.setAttribute('data-original-title', tooltipOriginalTitle);
 79 |   }
 80 | 
 81 |   if(Clipboard.isSupported()) {
 82 |     $(document).ready(function() {
 83 |       var copyButton = "<button type='button' class='btn btn-primary btn-copy-ex' type = 'submit' title='Copy to clipboard' aria-label='Copy to clipboard' data-toggle='tooltip' data-placement='left auto' data-trigger='hover' data-clipboard-copy><i class='fa fa-copy'></i></button>";
 84 | 
 85 |       $(".examples, div.sourceCode").addClass("hasCopyButton");
 86 | 
 87 |       // Insert copy buttons:
 88 |       $(copyButton).prependTo(".hasCopyButton");
 89 | 
 90 |       // Initialize tooltips:
 91 |       $('.btn-copy-ex').tooltip({container: 'body'});
 92 | 
 93 |       // Initialize clipboard:
 94 |       var clipboardBtnCopies = new Clipboard('[data-clipboard-copy]', {
 95 |         text: function(trigger) {
 96 |           return trigger.parentNode.textContent;
 97 |         }
 98 |       });
 99 | 
100 |       clipboardBtnCopies.on('success', function(e) {
101 |         changeTooltipMessage(e.trigger, 'Copied!');
102 |         e.clearSelection();
103 |       });
104 | 
105 |       clipboardBtnCopies.on('error', function() {
106 |         changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy');
107 |       });
108 |     });
109 |   }
110 | })(window.jQuery || window.$)
111 | 


--------------------------------------------------------------------------------
/docs/pkgdown.yml:
--------------------------------------------------------------------------------
 1 | pandoc: 1.19.2.1
 2 | pkgdown: 1.1.0.9000
 3 | pkgdown_sha: ~
 4 | articles:
 5 |   custom_configuration: custom_configuration.html
 6 |   define_custom_environments: define_custom_environments.html
 7 |   python_dependencies: python_dependencies.html
 8 |   repeated_experiment: repeated_experiment.html
 9 |   table_learning: table_learning.html
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/reference/Agent.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>Agent — Agent • rlR</title>
 10 | 
 11 | <!-- jquery -->
 12 | <script src="https://code.jquery.com/jquery-3.1.0.min.js" integrity="sha384-nrOSfDHtoPMzJHjVTdCopGqIqeYETSXhZDFyniQ8ZHcVy08QesyHcnOUpMpqnmWq" crossorigin="anonymous"></script>
 13 | <!-- Bootstrap -->
 14 | <link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
 15 | 
 16 | <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
 17 | 
 18 | <!-- Font Awesome icons -->
 19 | <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" integrity="sha384-T8Gy5hrqNKT+hzMclPo118YTQO6cYprQmhrYwIiQ/3axmI1hQomh7Ud2hPOy8SP1" crossorigin="anonymous">
 20 | 
 21 | <!-- clipboard.js -->
 22 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/1.7.1/clipboard.min.js" integrity="sha384-cV+rhyOuRHc9Ub/91rihWcGmMmCXDeksTtCihMupQHSsi8GIIRDG0ThDc3HGQFJ3" crossorigin="anonymous"></script>
 23 | 
 24 | <!-- sticky kit -->
 25 | <script src="https://cdnjs.cloudflare.com/ajax/libs/sticky-kit/1.1.3/sticky-kit.min.js" integrity="sha256-c4Rlo1ZozqTPE2RLuvbusY3+SU1pQaJC0TjuhygMipw=" crossorigin="anonymous"></script>
 26 | 
 27 | <!-- pkgdown -->
 28 | <link href="../pkgdown.css" rel="stylesheet">
 29 | <script src="../pkgdown.js"></script>
 30 | 
 31 | 
 32 | 
 33 | <meta property="og:title" content="Agent — Agent" />
 34 | 
 35 | <meta property="og:description" content="An abstract R6Class to represent Agent" />
 36 | <meta name="twitter:card" content="summary" />
 37 | 
 38 | 
 39 | 
 40 | <!-- mathjax -->
 41 | <script src='https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'></script>
 42 | 
 43 | <!--[if lt IE 9]>
 44 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 45 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 46 | <![endif]-->
 47 | 
 48 | 
 49 |   </head>
 50 | 
 51 |   <body>
 52 |     <div class="container template-reference-topic">
 53 |       <header>
 54 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 55 |   <div class="container">
 56 |     <div class="navbar-header">
 57 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 58 |         <span class="sr-only">Toggle navigation</span>
 59 |         <span class="icon-bar"></span>
 60 |         <span class="icon-bar"></span>
 61 |         <span class="icon-bar"></span>
 62 |       </button>
 63 |       <span class="navbar-brand">
 64 |         <a class="navbar-link" href="../index.html">rlR</a>
 65 |         <span class="label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.0</span>
 66 |       </span>
 67 |     </div>
 68 | 
 69 |     <div id="navbar" class="navbar-collapse collapse">
 70 |       <ul class="nav navbar-nav">
 71 |         <li class="dropdown">
 72 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
 73 |     <span class="fa fa-file-text-o"></span>
 74 |      
 75 |     Topics
 76 |      
 77 |     <span class="caret"></span>
 78 |   </a>
 79 |   <ul class="dropdown-menu" role="menu">
 80 |     <li>
 81 |       <a href="../articles/define_custom_environments.html">Specify Custom Environment</a>
 82 |     </li>
 83 |     <li>
 84 |       <a href="../articles/repeated_experiment.html">Repeated Experiment</a>
 85 |     </li>
 86 |     <li>
 87 |       <a href="../articles/customized_brain_mountainCar.html">Customize Neural Network Functional Approximator</a>
 88 |     </li>
 89 |     <li>
 90 |       <a href="../articles/play_atari_games.html">Play Atari Games</a>
 91 |     </li>
 92 |     <li>
 93 |       <a href="../articles/table_learning.html">Tabular Learning</a>
 94 |     </li>
 95 |   </ul>
 96 | </li>
 97 | <li>
 98 |   <a href="../reference/index.html">
 99 |     <span class="fa fa-book"></span>
100 |      
101 |     Reference
102 |   </a>
103 | </li>
104 |       </ul>
105 |       
106 |       <ul class="nav navbar-nav navbar-right">
107 |         <li>
108 |   <a href="https://github.com/smilesun/rlR">
109 |     <span class="fa fa-github fa-lg"></span>
110 |      
111 |   </a>
112 | </li>
113 |       </ul>
114 |       
115 |     </div><!--/.nav-collapse -->
116 |   </div><!--/.container -->
117 | </div><!--/.navbar -->
118 | 
119 |       
120 |       </header>
121 | 
122 | <div class="row">
123 |   <div class="col-md-9 contents">
124 |     <div class="page-header">
125 |     <h1>Agent</h1>
126 |     <small class="dont-index">Source: <a href='https://github.com/smilesun/rlR/blob/master/R/agent_base.R'><code>R/agent_base.R</code></a></small>
127 |     <div class="hidden name"><code>Agent.Rd</code></div>
128 |     </div>
129 | 
130 |     <div class="ref-description">
131 |     
132 |     <p>An abstract <code>R6Class</code> to represent Agent</p>
133 |     
134 |     </div>
135 | 
136 |     <pre class="usage"><span class='no'>Agent</span></pre>
137 |         
138 |     <h2 class="hasAnchor" id="format"><a class="anchor" href="#format"></a>Format</h2>
139 | 
140 |     <p><code>R6Class</code> object</p>
141 |     
142 |     <h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
143 | 
144 |     <p>[<code>Agent</code>].</p>
145 |     
146 |     <h2 class="hasAnchor" id="methods"><a class="anchor" href="#methods"></a>Methods</h2>
147 | 
148 |     
149 |     <dl class='dl-horizontal'>
150 |   <dt>learn(iter)</dt><dd><p>[<code>function</code>] <br />
151 |   Run iter number of Episodes</p></dd>
152 |   <dt>plotPerf()</dt><dd><p>[<code>function</code>] <br />
153 |   plot performance</p></dd>
154 | </dl>
155 |     
156 | 
157 |   </div>
158 |   <div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
159 |     <h2>Contents</h2>
160 |     <ul class="nav nav-pills nav-stacked">
161 |       
162 |       <li><a href="#format">Format</a></li>
163 | 
164 |       <li><a href="#value">Value</a></li>
165 | 
166 |       <li><a href="#methods">Methods</a></li>
167 |           </ul>
168 | 
169 |   </div>
170 | </div>
171 | 
172 |       <footer>
173 |       <div class="copyright">
174 |   <p>Developed by Xudong Sun.</p>
175 | </div>
176 | 
177 | <div class="pkgdown">
178 |   <p>Site built with <a href="http://pkgdown.r-lib.org/">pkgdown</a>.</p>
179 | </div>
180 | 
181 |       </footer>
182 |    </div>
183 | 
184 |   
185 | 
186 |   </body>
187 | </html>
188 | 
189 | 


--------------------------------------------------------------------------------
/docs/reference/checkPyDep.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>Check if python dependencies work — checkPyDep • rlR</title>
 10 | 
 11 | <!-- jquery -->
 12 | <script src="https://code.jquery.com/jquery-3.1.0.min.js" integrity="sha384-nrOSfDHtoPMzJHjVTdCopGqIqeYETSXhZDFyniQ8ZHcVy08QesyHcnOUpMpqnmWq" crossorigin="anonymous"></script>
 13 | <!-- Bootstrap -->
 14 | <link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
 15 | 
 16 | <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
 17 | 
 18 | <!-- Font Awesome icons -->
 19 | <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" integrity="sha384-T8Gy5hrqNKT+hzMclPo118YTQO6cYprQmhrYwIiQ/3axmI1hQomh7Ud2hPOy8SP1" crossorigin="anonymous">
 20 | 
 21 | <!-- clipboard.js -->
 22 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/1.7.1/clipboard.min.js" integrity="sha384-cV+rhyOuRHc9Ub/91rihWcGmMmCXDeksTtCihMupQHSsi8GIIRDG0ThDc3HGQFJ3" crossorigin="anonymous"></script>
 23 | 
 24 | <!-- sticky kit -->
 25 | <script src="https://cdnjs.cloudflare.com/ajax/libs/sticky-kit/1.1.3/sticky-kit.min.js" integrity="sha256-c4Rlo1ZozqTPE2RLuvbusY3+SU1pQaJC0TjuhygMipw=" crossorigin="anonymous"></script>
 26 | 
 27 | <!-- pkgdown -->
 28 | <link href="../pkgdown.css" rel="stylesheet">
 29 | <script src="../pkgdown.js"></script>
 30 | 
 31 | 
 32 | 
 33 | <meta property="og:title" content="Check if python dependencies work — checkPyDep" />
 34 | 
 35 | <meta property="og:description" content="Check if python dependencies work" />
 36 | <meta name="twitter:card" content="summary" />
 37 | 
 38 | 
 39 | 
 40 | <!-- mathjax -->
 41 | <script src='https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'></script>
 42 | 
 43 | <!--[if lt IE 9]>
 44 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 45 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 46 | <![endif]-->
 47 | 
 48 | 
 49 |   </head>
 50 | 
 51 |   <body>
 52 |     <div class="container template-reference-topic">
 53 |       <header>
 54 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 55 |   <div class="container">
 56 |     <div class="navbar-header">
 57 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 58 |         <span class="sr-only">Toggle navigation</span>
 59 |         <span class="icon-bar"></span>
 60 |         <span class="icon-bar"></span>
 61 |         <span class="icon-bar"></span>
 62 |       </button>
 63 |       <span class="navbar-brand">
 64 |         <a class="navbar-link" href="../index.html">rlR</a>
 65 |         <span class="label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.0</span>
 66 |       </span>
 67 |     </div>
 68 | 
 69 |     <div id="navbar" class="navbar-collapse collapse">
 70 |       <ul class="nav navbar-nav">
 71 |         <li class="dropdown">
 72 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
 73 |     <span class="fa fa-file-text-o"></span>
 74 |      
 75 |     Topics
 76 |      
 77 |     <span class="caret"></span>
 78 |   </a>
 79 |   <ul class="dropdown-menu" role="menu">
 80 |     <li>
 81 |       <a href="../articles/define_custom_environments.html">Specify Custom Environment</a>
 82 |     </li>
 83 |     <li>
 84 |       <a href="../articles/repeated_experiment.html">Repeated Experiment</a>
 85 |     </li>
 86 |     <li>
 87 |       <a href="../articles/customized_brain_mountainCar.html">Customize Neural Network Functional Approximator</a>
 88 |     </li>
 89 |     <li>
 90 |       <a href="../articles/play_atari_games.html">Play Atari Games</a>
 91 |     </li>
 92 |     <li>
 93 |       <a href="../articles/table_learning.html">Tabular Learning</a>
 94 |     </li>
 95 |   </ul>
 96 | </li>
 97 | <li>
 98 |   <a href="../reference/index.html">
 99 |     <span class="fa fa-book"></span>
100 |      
101 |     Reference
102 |   </a>
103 | </li>
104 |       </ul>
105 |       
106 |       <ul class="nav navbar-nav navbar-right">
107 |         <li>
108 |   <a href="https://github.com/smilesun/rlR">
109 |     <span class="fa fa-github fa-lg"></span>
110 |      
111 |   </a>
112 | </li>
113 |       </ul>
114 |       
115 |     </div><!--/.nav-collapse -->
116 |   </div><!--/.container -->
117 | </div><!--/.navbar -->
118 | 
119 |       
120 |       </header>
121 | 
122 | <div class="row">
123 |   <div class="col-md-9 contents">
124 |     <div class="page-header">
125 |     <h1>Check if python dependencies work</h1>
126 |     <small class="dont-index">Source: <a href='https://github.com/smilesun/rlR/blob/master/R/zzz.R'><code>R/zzz.R</code></a></small>
127 |     <div class="hidden name"><code>checkPyDep.Rd</code></div>
128 |     </div>
129 | 
130 |     <div class="ref-description">
131 |     
132 |     <p>Check if python dependencies work</p>
133 |     
134 |     </div>
135 | 
136 |     <pre class="usage"><span class='fu'>checkPyDep</span>()</pre>
137 |         
138 |     <h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
139 | 
140 |     <p>TRUE if all python dependencies work</p>
141 |     
142 | 
143 |   </div>
144 |   <div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
145 |     <h2>Contents</h2>
146 |     <ul class="nav nav-pills nav-stacked">
147 |       
148 |       <li><a href="#value">Value</a></li>
149 |           </ul>
150 | 
151 |   </div>
152 | </div>
153 | 
154 |       <footer>
155 |       <div class="copyright">
156 |   <p>Developed by Xudong Sun.</p>
157 | </div>
158 | 
159 | <div class="pkgdown">
160 |   <p>Site built with <a href="http://pkgdown.r-lib.org/">pkgdown</a>.</p>
161 | </div>
162 | 
163 |       </footer>
164 |    </div>
165 | 
166 |   
167 | 
168 |   </body>
169 | </html>
170 | 
171 | 


--------------------------------------------------------------------------------
/docs/reference/listAvailAgent.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>List implemented Agents — listAvailAgent • rlR</title>
 10 | 
 11 | <!-- jquery -->
 12 | <script src="https://code.jquery.com/jquery-3.1.0.min.js" integrity="sha384-nrOSfDHtoPMzJHjVTdCopGqIqeYETSXhZDFyniQ8ZHcVy08QesyHcnOUpMpqnmWq" crossorigin="anonymous"></script>
 13 | <!-- Bootstrap -->
 14 | <link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
 15 | 
 16 | <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
 17 | 
 18 | <!-- Font Awesome icons -->
 19 | <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" integrity="sha384-T8Gy5hrqNKT+hzMclPo118YTQO6cYprQmhrYwIiQ/3axmI1hQomh7Ud2hPOy8SP1" crossorigin="anonymous">
 20 | 
 21 | <!-- clipboard.js -->
 22 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/1.7.1/clipboard.min.js" integrity="sha384-cV+rhyOuRHc9Ub/91rihWcGmMmCXDeksTtCihMupQHSsi8GIIRDG0ThDc3HGQFJ3" crossorigin="anonymous"></script>
 23 | 
 24 | <!-- sticky kit -->
 25 | <script src="https://cdnjs.cloudflare.com/ajax/libs/sticky-kit/1.1.3/sticky-kit.min.js" integrity="sha256-c4Rlo1ZozqTPE2RLuvbusY3+SU1pQaJC0TjuhygMipw=" crossorigin="anonymous"></script>
 26 | 
 27 | <!-- pkgdown -->
 28 | <link href="../pkgdown.css" rel="stylesheet">
 29 | <script src="../pkgdown.js"></script>
 30 | 
 31 | 
 32 | 
 33 | <meta property="og:title" content="List implemented Agents — listAvailAgent" />
 34 | 
 35 | <meta property="og:description" content="List all implemented Agents" />
 36 | <meta name="twitter:card" content="summary" />
 37 | 
 38 | 
 39 | 
 40 | <!-- mathjax -->
 41 | <script src='https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'></script>
 42 | 
 43 | <!--[if lt IE 9]>
 44 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 45 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 46 | <![endif]-->
 47 | 
 48 | 
 49 |   </head>
 50 | 
 51 |   <body>
 52 |     <div class="container template-reference-topic">
 53 |       <header>
 54 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 55 |   <div class="container">
 56 |     <div class="navbar-header">
 57 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 58 |         <span class="sr-only">Toggle navigation</span>
 59 |         <span class="icon-bar"></span>
 60 |         <span class="icon-bar"></span>
 61 |         <span class="icon-bar"></span>
 62 |       </button>
 63 |       <span class="navbar-brand">
 64 |         <a class="navbar-link" href="../index.html">rlR</a>
 65 |         <span class="label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.0</span>
 66 |       </span>
 67 |     </div>
 68 | 
 69 |     <div id="navbar" class="navbar-collapse collapse">
 70 |       <ul class="nav navbar-nav">
 71 |         <li class="dropdown">
 72 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
 73 |     <span class="fa fa-file-text-o"></span>
 74 |      
 75 |     Topics
 76 |      
 77 |     <span class="caret"></span>
 78 |   </a>
 79 |   <ul class="dropdown-menu" role="menu">
 80 |     <li>
 81 |       <a href="../articles/define_custom_environments.html">Specify Custom Environment</a>
 82 |     </li>
 83 |     <li>
 84 |       <a href="../articles/repeated_experiment.html">Repeated Experiment</a>
 85 |     </li>
 86 |     <li>
 87 |       <a href="../articles/customized_brain_mountainCar.html">Customize Neural Network Functional Approximator</a>
 88 |     </li>
 89 |     <li>
 90 |       <a href="../articles/play_atari_games.html">Play Atari Games</a>
 91 |     </li>
 92 |     <li>
 93 |       <a href="../articles/table_learning.html">Tabular Learning</a>
 94 |     </li>
 95 |   </ul>
 96 | </li>
 97 | <li>
 98 |   <a href="../reference/index.html">
 99 |     <span class="fa fa-book"></span>
100 |      
101 |     Reference
102 |   </a>
103 | </li>
104 |       </ul>
105 |       
106 |       <ul class="nav navbar-nav navbar-right">
107 |         <li>
108 |   <a href="https://github.com/smilesun/rlR">
109 |     <span class="fa fa-github fa-lg"></span>
110 |      
111 |   </a>
112 | </li>
113 |       </ul>
114 |       
115 |     </div><!--/.nav-collapse -->
116 |   </div><!--/.container -->
117 | </div><!--/.navbar -->
118 | 
119 |       
120 |       </header>
121 | 
122 | <div class="row">
123 |   <div class="col-md-9 contents">
124 |     <div class="page-header">
125 |     <h1>List implemented Agents</h1>
126 |     <small class="dont-index">Source: <a href='https://github.com/smilesun/rlR/blob/master/R/zzz.R'><code>R/zzz.R</code></a></small>
127 |     <div class="hidden name"><code>listAvailAgent.Rd</code></div>
128 |     </div>
129 | 
130 |     <div class="ref-description">
131 |     
132 |     <p>List all implemented Agents</p>
133 |     
134 |     </div>
135 | 
136 |     <pre class="usage"><span class='fu'>listAvailAgent</span>()</pre>
137 |         
138 | 
139 |   </div>
140 |   <div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
141 |     <h2>Contents</h2>
142 |     <ul class="nav nav-pills nav-stacked">
143 |                 </ul>
144 | 
145 |   </div>
146 | </div>
147 | 
148 |       <footer>
149 |       <div class="copyright">
150 |   <p>Developed by Xudong Sun.</p>
151 | </div>
152 | 
153 | <div class="pkgdown">
154 |   <p>Site built with <a href="http://pkgdown.r-lib.org/">pkgdown</a>.</p>
155 | </div>
156 | 
157 |       </footer>
158 |    </div>
159 | 
160 |   
161 | 
162 |   </body>
163 | </html>
164 | 
165 | 


--------------------------------------------------------------------------------
/docs/reference/listAvailConf.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>listAvailConf — listAvailConf • rlR</title>
 10 | 
 11 | <!-- jquery -->
 12 | <script src="https://code.jquery.com/jquery-3.1.0.min.js" integrity="sha384-nrOSfDHtoPMzJHjVTdCopGqIqeYETSXhZDFyniQ8ZHcVy08QesyHcnOUpMpqnmWq" crossorigin="anonymous"></script>
 13 | <!-- Bootstrap -->
 14 | <link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
 15 | 
 16 | <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
 17 | 
 18 | <!-- Font Awesome icons -->
 19 | <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" integrity="sha384-T8Gy5hrqNKT+hzMclPo118YTQO6cYprQmhrYwIiQ/3axmI1hQomh7Ud2hPOy8SP1" crossorigin="anonymous">
 20 | 
 21 | <!-- clipboard.js -->
 22 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/1.7.1/clipboard.min.js" integrity="sha384-cV+rhyOuRHc9Ub/91rihWcGmMmCXDeksTtCihMupQHSsi8GIIRDG0ThDc3HGQFJ3" crossorigin="anonymous"></script>
 23 | 
 24 | <!-- sticky kit -->
 25 | <script src="https://cdnjs.cloudflare.com/ajax/libs/sticky-kit/1.1.3/sticky-kit.min.js" integrity="sha256-c4Rlo1ZozqTPE2RLuvbusY3+SU1pQaJC0TjuhygMipw=" crossorigin="anonymous"></script>
 26 | 
 27 | <!-- pkgdown -->
 28 | <link href="../pkgdown.css" rel="stylesheet">
 29 | <script src="../pkgdown.js"></script>
 30 | 
 31 | 
 32 | 
 33 | <meta property="og:title" content="listAvailConf — listAvailConf" />
 34 | 
 35 | <meta property="og:description" content="List defaults hyper-parameters names" />
 36 | <meta name="twitter:card" content="summary" />
 37 | 
 38 | 
 39 | 
 40 | <!-- mathjax -->
 41 | <script src='https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'></script>
 42 | 
 43 | <!--[if lt IE 9]>
 44 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 45 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 46 | <![endif]-->
 47 | 
 48 | 
 49 |   </head>
 50 | 
 51 |   <body>
 52 |     <div class="container template-reference-topic">
 53 |       <header>
 54 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 55 |   <div class="container">
 56 |     <div class="navbar-header">
 57 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 58 |         <span class="sr-only">Toggle navigation</span>
 59 |         <span class="icon-bar"></span>
 60 |         <span class="icon-bar"></span>
 61 |         <span class="icon-bar"></span>
 62 |       </button>
 63 |       <span class="navbar-brand">
 64 |         <a class="navbar-link" href="../index.html">rlR</a>
 65 |         <span class="label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.0</span>
 66 |       </span>
 67 |     </div>
 68 | 
 69 |     <div id="navbar" class="navbar-collapse collapse">
 70 |       <ul class="nav navbar-nav">
 71 |         <li class="dropdown">
 72 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
 73 |     <span class="fa fa-file-text-o"></span>
 74 |      
 75 |     Topics
 76 |      
 77 |     <span class="caret"></span>
 78 |   </a>
 79 |   <ul class="dropdown-menu" role="menu">
 80 |     <li>
 81 |       <a href="../articles/define_custom_environments.html">Specify Custom Environment</a>
 82 |     </li>
 83 |     <li>
 84 |       <a href="../articles/repeated_experiment.html">Repeated Experiment</a>
 85 |     </li>
 86 |     <li>
 87 |       <a href="../articles/customized_brain_mountainCar.html">Customize Neural Network Functional Approximator</a>
 88 |     </li>
 89 |     <li>
 90 |       <a href="../articles/play_atari_games.html">Play Atari Games</a>
 91 |     </li>
 92 |     <li>
 93 |       <a href="../articles/table_learning.html">Tabular Learning</a>
 94 |     </li>
 95 |   </ul>
 96 | </li>
 97 | <li>
 98 |   <a href="../reference/index.html">
 99 |     <span class="fa fa-book"></span>
100 |      
101 |     Reference
102 |   </a>
103 | </li>
104 |       </ul>
105 |       
106 |       <ul class="nav navbar-nav navbar-right">
107 |         <li>
108 |   <a href="https://github.com/smilesun/rlR">
109 |     <span class="fa fa-github fa-lg"></span>
110 |      
111 |   </a>
112 | </li>
113 |       </ul>
114 |       
115 |     </div><!--/.nav-collapse -->
116 |   </div><!--/.container -->
117 | </div><!--/.navbar -->
118 | 
119 |       
120 |       </header>
121 | 
122 | <div class="row">
123 |   <div class="col-md-9 contents">
124 |     <div class="page-header">
125 |     <h1>listAvailConf</h1>
126 |     <small class="dont-index">Source: <a href='https://github.com/smilesun/rlR/blob/master/R/confDefault.R'><code>R/confDefault.R</code></a></small>
127 |     <div class="hidden name"><code>listAvailConf.Rd</code></div>
128 |     </div>
129 | 
130 |     <div class="ref-description">
131 |     
132 |     <p>List defaults hyper-parameters names</p>
133 |     
134 |     </div>
135 | 
136 |     <pre class="usage"><span class='fu'>listAvailConf</span>()</pre>
137 |         
138 | 
139 |   </div>
140 |   <div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
141 |     <h2>Contents</h2>
142 |     <ul class="nav nav-pills nav-stacked">
143 |                 </ul>
144 | 
145 |   </div>
146 | </div>
147 | 
148 |       <footer>
149 |       <div class="copyright">
150 |   <p>Developed by Xudong Sun.</p>
151 | </div>
152 | 
153 | <div class="pkgdown">
154 |   <p>Site built with <a href="http://pkgdown.r-lib.org/">pkgdown</a>.</p>
155 | </div>
156 | 
157 |       </footer>
158 |    </div>
159 | 
160 |   
161 | 
162 |   </body>
163 | </html>
164 | 
165 | 


--------------------------------------------------------------------------------
/docs/reference/listGymEnvs.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>list environments from OPENAI gym — listGymEnvs • rlR</title>
 10 | 
 11 | <!-- jquery -->
 12 | <script src="https://code.jquery.com/jquery-3.1.0.min.js" integrity="sha384-nrOSfDHtoPMzJHjVTdCopGqIqeYETSXhZDFyniQ8ZHcVy08QesyHcnOUpMpqnmWq" crossorigin="anonymous"></script>
 13 | <!-- Bootstrap -->
 14 | <link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
 15 | 
 16 | <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
 17 | 
 18 | <!-- Font Awesome icons -->
 19 | <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" integrity="sha384-T8Gy5hrqNKT+hzMclPo118YTQO6cYprQmhrYwIiQ/3axmI1hQomh7Ud2hPOy8SP1" crossorigin="anonymous">
 20 | 
 21 | <!-- clipboard.js -->
 22 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/1.7.1/clipboard.min.js" integrity="sha384-cV+rhyOuRHc9Ub/91rihWcGmMmCXDeksTtCihMupQHSsi8GIIRDG0ThDc3HGQFJ3" crossorigin="anonymous"></script>
 23 | 
 24 | <!-- sticky kit -->
 25 | <script src="https://cdnjs.cloudflare.com/ajax/libs/sticky-kit/1.1.3/sticky-kit.min.js" integrity="sha256-c4Rlo1ZozqTPE2RLuvbusY3+SU1pQaJC0TjuhygMipw=" crossorigin="anonymous"></script>
 26 | 
 27 | <!-- pkgdown -->
 28 | <link href="../pkgdown.css" rel="stylesheet">
 29 | <script src="../pkgdown.js"></script>
 30 | 
 31 | 
 32 | 
 33 | <meta property="og:title" content="list environments from OPENAI gym — listGymEnvs" />
 34 | 
 35 | <meta property="og:description" content="List all Gym Environments without testing them" />
 36 | <meta name="twitter:card" content="summary" />
 37 | 
 38 | 
 39 | 
 40 | <!-- mathjax -->
 41 | <script src='https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'></script>
 42 | 
 43 | <!--[if lt IE 9]>
 44 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 45 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 46 | <![endif]-->
 47 | 
 48 | 
 49 |   </head>
 50 | 
 51 |   <body>
 52 |     <div class="container template-reference-topic">
 53 |       <header>
 54 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 55 |   <div class="container">
 56 |     <div class="navbar-header">
 57 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 58 |         <span class="sr-only">Toggle navigation</span>
 59 |         <span class="icon-bar"></span>
 60 |         <span class="icon-bar"></span>
 61 |         <span class="icon-bar"></span>
 62 |       </button>
 63 |       <span class="navbar-brand">
 64 |         <a class="navbar-link" href="../index.html">rlR</a>
 65 |         <span class="label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.0</span>
 66 |       </span>
 67 |     </div>
 68 | 
 69 |     <div id="navbar" class="navbar-collapse collapse">
 70 |       <ul class="nav navbar-nav">
 71 |         <li class="dropdown">
 72 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
 73 |     <span class="fa fa-file-text-o"></span>
 74 |      
 75 |     Topics
 76 |      
 77 |     <span class="caret"></span>
 78 |   </a>
 79 |   <ul class="dropdown-menu" role="menu">
 80 |     <li>
 81 |       <a href="../articles/define_custom_environments.html">Specify Custom Environment</a>
 82 |     </li>
 83 |     <li>
 84 |       <a href="../articles/repeated_experiment.html">Repeated Experiment</a>
 85 |     </li>
 86 |     <li>
 87 |       <a href="../articles/customized_brain_mountainCar.html">Customize Neural Network Functional Approximator</a>
 88 |     </li>
 89 |     <li>
 90 |       <a href="../articles/play_atari_games.html">Play Atari Games</a>
 91 |     </li>
 92 |     <li>
 93 |       <a href="../articles/table_learning.html">Tabular Learning</a>
 94 |     </li>
 95 |   </ul>
 96 | </li>
 97 | <li>
 98 |   <a href="../reference/index.html">
 99 |     <span class="fa fa-book"></span>
100 |      
101 |     Reference
102 |   </a>
103 | </li>
104 |       </ul>
105 |       
106 |       <ul class="nav navbar-nav navbar-right">
107 |         <li>
108 |   <a href="https://github.com/smilesun/rlR">
109 |     <span class="fa fa-github fa-lg"></span>
110 |      
111 |   </a>
112 | </li>
113 |       </ul>
114 |       
115 |     </div><!--/.nav-collapse -->
116 |   </div><!--/.container -->
117 | </div><!--/.navbar -->
118 | 
119 |       
120 |       </header>
121 | 
122 | <div class="row">
123 |   <div class="col-md-9 contents">
124 |     <div class="page-header">
125 |     <h1>list environments from OPENAI gym</h1>
126 |     <small class="dont-index">Source: <a href='https://github.com/smilesun/rlR/blob/master/R/zzz.R'><code>R/zzz.R</code></a></small>
127 |     <div class="hidden name"><code>listGymEnvs.Rd</code></div>
128 |     </div>
129 | 
130 |     <div class="ref-description">
131 |     
132 |     <p>List all Gym Environments without testing them</p>
133 |     
134 |     </div>
135 | 
136 |     <pre class="usage"><span class='fu'>listGymEnvs</span>()</pre>
137 |         
138 | 
139 |   </div>
140 |   <div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
141 |     <h2>Contents</h2>
142 |     <ul class="nav nav-pills nav-stacked">
143 |                 </ul>
144 | 
145 |   </div>
146 | </div>
147 | 
148 |       <footer>
149 |       <div class="copyright">
150 |   <p>Developed by Xudong Sun.</p>
151 | </div>
152 | 
153 | <div class="pkgdown">
154 |   <p>Site built with <a href="http://pkgdown.r-lib.org/">pkgdown</a>.</p>
155 | </div>
156 | 
157 |       </footer>
158 |    </div>
159 | 
160 |   
161 | 
162 |   </body>
163 | </html>
164 | 
165 | 


--------------------------------------------------------------------------------
/docs/reference/rlr_test_if_gym_works.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>Test if gym is installed — rlr_test_if_gym_works • rlR</title>
 10 | 
 11 | <!-- jquery -->
 12 | <script src="https://code.jquery.com/jquery-3.1.0.min.js" integrity="sha384-nrOSfDHtoPMzJHjVTdCopGqIqeYETSXhZDFyniQ8ZHcVy08QesyHcnOUpMpqnmWq" crossorigin="anonymous"></script>
 13 | <!-- Bootstrap -->
 14 | <link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
 15 | 
 16 | <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
 17 | 
 18 | <!-- Font Awesome icons -->
 19 | <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" integrity="sha384-T8Gy5hrqNKT+hzMclPo118YTQO6cYprQmhrYwIiQ/3axmI1hQomh7Ud2hPOy8SP1" crossorigin="anonymous">
 20 | 
 21 | <!-- clipboard.js -->
 22 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/1.7.1/clipboard.min.js" integrity="sha384-cV+rhyOuRHc9Ub/91rihWcGmMmCXDeksTtCihMupQHSsi8GIIRDG0ThDc3HGQFJ3" crossorigin="anonymous"></script>
 23 | 
 24 | <!-- sticky kit -->
 25 | <script src="https://cdnjs.cloudflare.com/ajax/libs/sticky-kit/1.1.3/sticky-kit.min.js" integrity="sha256-c4Rlo1ZozqTPE2RLuvbusY3+SU1pQaJC0TjuhygMipw=" crossorigin="anonymous"></script>
 26 | 
 27 | <!-- pkgdown -->
 28 | <link href="../pkgdown.css" rel="stylesheet">
 29 | <script src="../pkgdown.js"></script>
 30 | 
 31 | 
 32 | 
 33 | <meta property="og:title" content="Test if gym is installed — rlr_test_if_gym_works" />
 34 | 
 35 | <meta property="og:description" content="Test if gym is installed" />
 36 | <meta name="twitter:card" content="summary" />
 37 | 
 38 | 
 39 | 
 40 | <!-- mathjax -->
 41 | <script src='https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'></script>
 42 | 
 43 | <!--[if lt IE 9]>
 44 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 45 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 46 | <![endif]-->
 47 | 
 48 | 
 49 |   </head>
 50 | 
 51 |   <body>
 52 |     <div class="container template-reference-topic">
 53 |       <header>
 54 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 55 |   <div class="container">
 56 |     <div class="navbar-header">
 57 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 58 |         <span class="sr-only">Toggle navigation</span>
 59 |         <span class="icon-bar"></span>
 60 |         <span class="icon-bar"></span>
 61 |         <span class="icon-bar"></span>
 62 |       </button>
 63 |       <span class="navbar-brand">
 64 |         <a class="navbar-link" href="../index.html">rlR</a>
 65 |         <span class="label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.0</span>
 66 |       </span>
 67 |     </div>
 68 | 
 69 |     <div id="navbar" class="navbar-collapse collapse">
 70 |       <ul class="nav navbar-nav">
 71 |         <li class="dropdown">
 72 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
 73 |     <span class="fa fa-file-text-o"></span>
 74 |      
 75 |     Topics
 76 |      
 77 |     <span class="caret"></span>
 78 |   </a>
 79 |   <ul class="dropdown-menu" role="menu">
 80 |     <li>
 81 |       <a href="../articles/define_custom_environments.html">Specify Custom Environment</a>
 82 |     </li>
 83 |     <li>
 84 |       <a href="../articles/repeated_experiment.html">Repeated Experiment</a>
 85 |     </li>
 86 |     <li>
 87 |       <a href="../articles/customized_brain_mountainCar.html">Customize Neural Network Functional Approximator</a>
 88 |     </li>
 89 |     <li>
 90 |       <a href="../articles/play_atari_games.html">Play Atari Games</a>
 91 |     </li>
 92 |     <li>
 93 |       <a href="../articles/table_learning.html">Tabular Learning</a>
 94 |     </li>
 95 |   </ul>
 96 | </li>
 97 | <li>
 98 |   <a href="../reference/index.html">
 99 |     <span class="fa fa-book"></span>
100 |      
101 |     Reference
102 |   </a>
103 | </li>
104 |       </ul>
105 |       
106 |       <ul class="nav navbar-nav navbar-right">
107 |         <li>
108 |   <a href="https://github.com/smilesun/rlR">
109 |     <span class="fa fa-github fa-lg"></span>
110 |      
111 |   </a>
112 | </li>
113 |       </ul>
114 |       
115 |     </div><!--/.nav-collapse -->
116 |   </div><!--/.container -->
117 | </div><!--/.navbar -->
118 | 
119 |       
120 |       </header>
121 | 
122 | <div class="row">
123 |   <div class="col-md-9 contents">
124 |     <div class="page-header">
125 |     <h1>Test if gym is installed</h1>
126 |     <small class="dont-index">Source: <a href='https://github.com/smilesun/rlR/blob/master/R/zzz.R'><code>R/zzz.R</code></a></small>
127 |     <div class="hidden name"><code>rlr_test_if_gym_works.Rd</code></div>
128 |     </div>
129 | 
130 |     <div class="ref-description">
131 |     
132 |     <p>Test if gym is installed</p>
133 |     
134 |     </div>
135 | 
136 |     <pre class="usage"><span class='fu'>rlr_test_if_gym_works</span>()</pre>
137 |         
138 |     <h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
139 | 
140 |     <p>TRUE if success</p>
141 |     
142 | 
143 |   </div>
144 |   <div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
145 |     <h2>Contents</h2>
146 |     <ul class="nav nav-pills nav-stacked">
147 |       
148 |       <li><a href="#value">Value</a></li>
149 |           </ul>
150 | 
151 |   </div>
152 | </div>
153 | 
154 |       <footer>
155 |       <div class="copyright">
156 |   <p>Developed by Xudong Sun.</p>
157 | </div>
158 | 
159 | <div class="pkgdown">
160 |   <p>Site built with <a href="http://pkgdown.r-lib.org/">pkgdown</a>.</p>
161 | </div>
162 | 
163 |       </footer>
164 |    </div>
165 | 
166 |   
167 | 
168 |   </body>
169 | </html>
170 | 
171 | 


--------------------------------------------------------------------------------
/docs/reference/rlr_test_if_keras_works.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>Test if keras works — rlr_test_if_keras_works • rlR</title>
 10 | 
 11 | <!-- jquery -->
 12 | <script src="https://code.jquery.com/jquery-3.1.0.min.js" integrity="sha384-nrOSfDHtoPMzJHjVTdCopGqIqeYETSXhZDFyniQ8ZHcVy08QesyHcnOUpMpqnmWq" crossorigin="anonymous"></script>
 13 | <!-- Bootstrap -->
 14 | <link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
 15 | 
 16 | <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
 17 | 
 18 | <!-- Font Awesome icons -->
 19 | <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" integrity="sha384-T8Gy5hrqNKT+hzMclPo118YTQO6cYprQmhrYwIiQ/3axmI1hQomh7Ud2hPOy8SP1" crossorigin="anonymous">
 20 | 
 21 | <!-- clipboard.js -->
 22 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/1.7.1/clipboard.min.js" integrity="sha384-cV+rhyOuRHc9Ub/91rihWcGmMmCXDeksTtCihMupQHSsi8GIIRDG0ThDc3HGQFJ3" crossorigin="anonymous"></script>
 23 | 
 24 | <!-- sticky kit -->
 25 | <script src="https://cdnjs.cloudflare.com/ajax/libs/sticky-kit/1.1.3/sticky-kit.min.js" integrity="sha256-c4Rlo1ZozqTPE2RLuvbusY3+SU1pQaJC0TjuhygMipw=" crossorigin="anonymous"></script>
 26 | 
 27 | <!-- pkgdown -->
 28 | <link href="../pkgdown.css" rel="stylesheet">
 29 | <script src="../pkgdown.js"></script>
 30 | 
 31 | 
 32 | 
 33 | <meta property="og:title" content="Test if keras works — rlr_test_if_keras_works" />
 34 | 
 35 | <meta property="og:description" content="Test if keras is installed" />
 36 | <meta name="twitter:card" content="summary" />
 37 | 
 38 | 
 39 | 
 40 | <!-- mathjax -->
 41 | <script src='https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'></script>
 42 | 
 43 | <!--[if lt IE 9]>
 44 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 45 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 46 | <![endif]-->
 47 | 
 48 | 
 49 |   </head>
 50 | 
 51 |   <body>
 52 |     <div class="container template-reference-topic">
 53 |       <header>
 54 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 55 |   <div class="container">
 56 |     <div class="navbar-header">
 57 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 58 |         <span class="sr-only">Toggle navigation</span>
 59 |         <span class="icon-bar"></span>
 60 |         <span class="icon-bar"></span>
 61 |         <span class="icon-bar"></span>
 62 |       </button>
 63 |       <span class="navbar-brand">
 64 |         <a class="navbar-link" href="../index.html">rlR</a>
 65 |         <span class="label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.0</span>
 66 |       </span>
 67 |     </div>
 68 | 
 69 |     <div id="navbar" class="navbar-collapse collapse">
 70 |       <ul class="nav navbar-nav">
 71 |         <li class="dropdown">
 72 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
 73 |     <span class="fa fa-file-text-o"></span>
 74 |      
 75 |     Topics
 76 |      
 77 |     <span class="caret"></span>
 78 |   </a>
 79 |   <ul class="dropdown-menu" role="menu">
 80 |     <li>
 81 |       <a href="../articles/define_custom_environments.html">Specify Custom Environment</a>
 82 |     </li>
 83 |     <li>
 84 |       <a href="../articles/repeated_experiment.html">Repeated Experiment</a>
 85 |     </li>
 86 |     <li>
 87 |       <a href="../articles/customized_brain_mountainCar.html">Customize Neural Network Functional Approximator</a>
 88 |     </li>
 89 |     <li>
 90 |       <a href="../articles/play_atari_games.html">Play Atari Games</a>
 91 |     </li>
 92 |     <li>
 93 |       <a href="../articles/table_learning.html">Tabular Learning</a>
 94 |     </li>
 95 |   </ul>
 96 | </li>
 97 | <li>
 98 |   <a href="../reference/index.html">
 99 |     <span class="fa fa-book"></span>
100 |      
101 |     Reference
102 |   </a>
103 | </li>
104 |       </ul>
105 |       
106 |       <ul class="nav navbar-nav navbar-right">
107 |         <li>
108 |   <a href="https://github.com/smilesun/rlR">
109 |     <span class="fa fa-github fa-lg"></span>
110 |      
111 |   </a>
112 | </li>
113 |       </ul>
114 |       
115 |     </div><!--/.nav-collapse -->
116 |   </div><!--/.container -->
117 | </div><!--/.navbar -->
118 | 
119 |       
120 |       </header>
121 | 
122 | <div class="row">
123 |   <div class="col-md-9 contents">
124 |     <div class="page-header">
125 |     <h1>Test if keras works</h1>
126 |     <small class="dont-index">Source: <a href='https://github.com/smilesun/rlR/blob/master/R/zzz.R'><code>R/zzz.R</code></a></small>
127 |     <div class="hidden name"><code>rlr_test_if_keras_works.Rd</code></div>
128 |     </div>
129 | 
130 |     <div class="ref-description">
131 |     
132 |     <p>Test if keras is installed</p>
133 |     
134 |     </div>
135 | 
136 |     <pre class="usage"><span class='fu'>rlr_test_if_keras_works</span>()</pre>
137 |         
138 |     <h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
139 | 
140 |     <p>TRUE if success</p>
141 |     
142 | 
143 |   </div>
144 |   <div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
145 |     <h2>Contents</h2>
146 |     <ul class="nav nav-pills nav-stacked">
147 |       
148 |       <li><a href="#value">Value</a></li>
149 |           </ul>
150 | 
151 |   </div>
152 | </div>
153 | 
154 |       <footer>
155 |       <div class="copyright">
156 |   <p>Developed by Xudong Sun.</p>
157 | </div>
158 | 
159 | <div class="pkgdown">
160 |   <p>Site built with <a href="http://pkgdown.r-lib.org/">pkgdown</a>.</p>
161 | </div>
162 | 
163 |       </footer>
164 |    </div>
165 | 
166 |   
167 | 
168 |   </body>
169 | </html>
170 | 
171 | 


--------------------------------------------------------------------------------
/docs/reference/rlr_test_if_tensorflow_works.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>Test if tensorflow works from R session — rlr_test_if_tensorflow_works • rlR</title>
 10 | 
 11 | <!-- jquery -->
 12 | <script src="https://code.jquery.com/jquery-3.1.0.min.js" integrity="sha384-nrOSfDHtoPMzJHjVTdCopGqIqeYETSXhZDFyniQ8ZHcVy08QesyHcnOUpMpqnmWq" crossorigin="anonymous"></script>
 13 | <!-- Bootstrap -->
 14 | <link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
 15 | 
 16 | <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
 17 | 
 18 | <!-- Font Awesome icons -->
 19 | <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" integrity="sha384-T8Gy5hrqNKT+hzMclPo118YTQO6cYprQmhrYwIiQ/3axmI1hQomh7Ud2hPOy8SP1" crossorigin="anonymous">
 20 | 
 21 | <!-- clipboard.js -->
 22 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/1.7.1/clipboard.min.js" integrity="sha384-cV+rhyOuRHc9Ub/91rihWcGmMmCXDeksTtCihMupQHSsi8GIIRDG0ThDc3HGQFJ3" crossorigin="anonymous"></script>
 23 | 
 24 | <!-- sticky kit -->
 25 | <script src="https://cdnjs.cloudflare.com/ajax/libs/sticky-kit/1.1.3/sticky-kit.min.js" integrity="sha256-c4Rlo1ZozqTPE2RLuvbusY3+SU1pQaJC0TjuhygMipw=" crossorigin="anonymous"></script>
 26 | 
 27 | <!-- pkgdown -->
 28 | <link href="../pkgdown.css" rel="stylesheet">
 29 | <script src="../pkgdown.js"></script>
 30 | 
 31 | 
 32 | 
 33 | <meta property="og:title" content="Test if tensorflow works from R session — rlr_test_if_tensorflow_works" />
 34 | 
 35 | <meta property="og:description" content="Test if tensorflow works from R session" />
 36 | <meta name="twitter:card" content="summary" />
 37 | 
 38 | 
 39 | 
 40 | <!-- mathjax -->
 41 | <script src='https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'></script>
 42 | 
 43 | <!--[if lt IE 9]>
 44 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 45 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 46 | <![endif]-->
 47 | 
 48 | 
 49 |   </head>
 50 | 
 51 |   <body>
 52 |     <div class="container template-reference-topic">
 53 |       <header>
 54 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 55 |   <div class="container">
 56 |     <div class="navbar-header">
 57 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 58 |         <span class="sr-only">Toggle navigation</span>
 59 |         <span class="icon-bar"></span>
 60 |         <span class="icon-bar"></span>
 61 |         <span class="icon-bar"></span>
 62 |       </button>
 63 |       <span class="navbar-brand">
 64 |         <a class="navbar-link" href="../index.html">rlR</a>
 65 |         <span class="label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.0</span>
 66 |       </span>
 67 |     </div>
 68 | 
 69 |     <div id="navbar" class="navbar-collapse collapse">
 70 |       <ul class="nav navbar-nav">
 71 |         <li class="dropdown">
 72 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
 73 |     <span class="fa fa-file-text-o"></span>
 74 |      
 75 |     Topics
 76 |      
 77 |     <span class="caret"></span>
 78 |   </a>
 79 |   <ul class="dropdown-menu" role="menu">
 80 |     <li>
 81 |       <a href="../articles/define_custom_environments.html">Specify Custom Environment</a>
 82 |     </li>
 83 |     <li>
 84 |       <a href="../articles/repeated_experiment.html">Repeated Experiment</a>
 85 |     </li>
 86 |     <li>
 87 |       <a href="../articles/customized_brain_mountainCar.html">Customize Neural Network Functional Approximator</a>
 88 |     </li>
 89 |     <li>
 90 |       <a href="../articles/play_atari_games.html">Play Atari Games</a>
 91 |     </li>
 92 |     <li>
 93 |       <a href="../articles/table_learning.html">Tabular Learning</a>
 94 |     </li>
 95 |   </ul>
 96 | </li>
 97 | <li>
 98 |   <a href="../reference/index.html">
 99 |     <span class="fa fa-book"></span>
100 |      
101 |     Reference
102 |   </a>
103 | </li>
104 |       </ul>
105 |       
106 |       <ul class="nav navbar-nav navbar-right">
107 |         <li>
108 |   <a href="https://github.com/smilesun/rlR">
109 |     <span class="fa fa-github fa-lg"></span>
110 |      
111 |   </a>
112 | </li>
113 |       </ul>
114 |       
115 |     </div><!--/.nav-collapse -->
116 |   </div><!--/.container -->
117 | </div><!--/.navbar -->
118 | 
119 |       
120 |       </header>
121 | 
122 | <div class="row">
123 |   <div class="col-md-9 contents">
124 |     <div class="page-header">
125 |     <h1>Test if tensorflow works from R session</h1>
126 |     <small class="dont-index">Source: <a href='https://github.com/smilesun/rlR/blob/master/R/zzz.R'><code>R/zzz.R</code></a></small>
127 |     <div class="hidden name"><code>rlr_test_if_tensorflow_works.Rd</code></div>
128 |     </div>
129 | 
130 |     <div class="ref-description">
131 |     
132 |     <p>Test if tensorflow works from R session</p>
133 |     
134 |     </div>
135 | 
136 |     <pre class="usage"><span class='fu'>rlr_test_if_tensorflow_works</span>()</pre>
137 |         
138 |     <h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
139 | 
140 |     <p>TRUE if tensorflow works</p>
141 |     
142 | 
143 |   </div>
144 |   <div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
145 |     <h2>Contents</h2>
146 |     <ul class="nav nav-pills nav-stacked">
147 |       
148 |       <li><a href="#value">Value</a></li>
149 |           </ul>
150 | 
151 |   </div>
152 | </div>
153 | 
154 |       <footer>
155 |       <div class="copyright">
156 |   <p>Developed by Xudong Sun.</p>
157 | </div>
158 | 
159 | <div class="pkgdown">
160 |   <p>Site built with <a href="http://pkgdown.r-lib.org/">pkgdown</a>.</p>
161 | </div>
162 | 
163 |       </footer>
164 |    </div>
165 | 
166 |   
167 | 
168 |   </body>
169 | </html>
170 | 
171 | 


--------------------------------------------------------------------------------
/docs/reference/showDefaultConf.html:
--------------------------------------------------------------------------------
  1 | <!-- Generated by pkgdown: do not edit by hand -->
  2 | <!DOCTYPE html>
  3 | <html lang="en">
  4 |   <head>
  5 |   <meta charset="utf-8">
  6 | <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 | <meta name="viewport" content="width=device-width, initial-scale=1.0">
  8 | 
  9 | <title>show Default Configuration — showDefaultConf • rlR</title>
 10 | 
 11 | <!-- jquery -->
 12 | <script src="https://code.jquery.com/jquery-3.1.0.min.js" integrity="sha384-nrOSfDHtoPMzJHjVTdCopGqIqeYETSXhZDFyniQ8ZHcVy08QesyHcnOUpMpqnmWq" crossorigin="anonymous"></script>
 13 | <!-- Bootstrap -->
 14 | <link href="https://maxcdn.bootstrapcdn.com/bootswatch/3.3.7/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous">
 15 | 
 16 | <script src="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/js/bootstrap.min.js" integrity="sha384-Tc5IQib027qvyjSMfHjOMaLkfuWVxZxUPnCJA7l2mCWNIpG9mGCD8wGNIcPD7Txa" crossorigin="anonymous"></script>
 17 | 
 18 | <!-- Font Awesome icons -->
 19 | <link href="https://maxcdn.bootstrapcdn.com/font-awesome/4.6.3/css/font-awesome.min.css" rel="stylesheet" integrity="sha384-T8Gy5hrqNKT+hzMclPo118YTQO6cYprQmhrYwIiQ/3axmI1hQomh7Ud2hPOy8SP1" crossorigin="anonymous">
 20 | 
 21 | <!-- clipboard.js -->
 22 | <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/1.7.1/clipboard.min.js" integrity="sha384-cV+rhyOuRHc9Ub/91rihWcGmMmCXDeksTtCihMupQHSsi8GIIRDG0ThDc3HGQFJ3" crossorigin="anonymous"></script>
 23 | 
 24 | <!-- sticky kit -->
 25 | <script src="https://cdnjs.cloudflare.com/ajax/libs/sticky-kit/1.1.3/sticky-kit.min.js" integrity="sha256-c4Rlo1ZozqTPE2RLuvbusY3+SU1pQaJC0TjuhygMipw=" crossorigin="anonymous"></script>
 26 | 
 27 | <!-- pkgdown -->
 28 | <link href="../pkgdown.css" rel="stylesheet">
 29 | <script src="../pkgdown.js"></script>
 30 | 
 31 | 
 32 | 
 33 | <meta property="og:title" content="show Default Configuration — showDefaultConf" />
 34 | 
 35 | <meta property="og:description" content="List defaults hyper-parameters in dataframe" />
 36 | <meta name="twitter:card" content="summary" />
 37 | 
 38 | 
 39 | 
 40 | <!-- mathjax -->
 41 | <script src='https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML'></script>
 42 | 
 43 | <!--[if lt IE 9]>
 44 | <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
 45 | <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
 46 | <![endif]-->
 47 | 
 48 | 
 49 |   </head>
 50 | 
 51 |   <body>
 52 |     <div class="container template-reference-topic">
 53 |       <header>
 54 |       <div class="navbar navbar-default navbar-fixed-top" role="navigation">
 55 |   <div class="container">
 56 |     <div class="navbar-header">
 57 |       <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
 58 |         <span class="sr-only">Toggle navigation</span>
 59 |         <span class="icon-bar"></span>
 60 |         <span class="icon-bar"></span>
 61 |         <span class="icon-bar"></span>
 62 |       </button>
 63 |       <span class="navbar-brand">
 64 |         <a class="navbar-link" href="../index.html">rlR</a>
 65 |         <span class="label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">0.1.0</span>
 66 |       </span>
 67 |     </div>
 68 | 
 69 |     <div id="navbar" class="navbar-collapse collapse">
 70 |       <ul class="nav navbar-nav">
 71 |         <li class="dropdown">
 72 |   <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
 73 |     <span class="fa fa-file-text-o"></span>
 74 |      
 75 |     Topics
 76 |      
 77 |     <span class="caret"></span>
 78 |   </a>
 79 |   <ul class="dropdown-menu" role="menu">
 80 |     <li>
 81 |       <a href="../articles/define_custom_environments.html">Specify Custom Environment</a>
 82 |     </li>
 83 |     <li>
 84 |       <a href="../articles/repeated_experiment.html">Repeated Experiment</a>
 85 |     </li>
 86 |     <li>
 87 |       <a href="../articles/customized_brain_mountainCar.html">Customize Neural Network Functional Approximator</a>
 88 |     </li>
 89 |     <li>
 90 |       <a href="../articles/play_atari_games.html">Play Atari Games</a>
 91 |     </li>
 92 |     <li>
 93 |       <a href="../articles/table_learning.html">Tabular Learning</a>
 94 |     </li>
 95 |   </ul>
 96 | </li>
 97 | <li>
 98 |   <a href="../reference/index.html">
 99 |     <span class="fa fa-book"></span>
100 |      
101 |     Reference
102 |   </a>
103 | </li>
104 |       </ul>
105 |       
106 |       <ul class="nav navbar-nav navbar-right">
107 |         <li>
108 |   <a href="https://github.com/smilesun/rlR">
109 |     <span class="fa fa-github fa-lg"></span>
110 |      
111 |   </a>
112 | </li>
113 |       </ul>
114 |       
115 |     </div><!--/.nav-collapse -->
116 |   </div><!--/.container -->
117 | </div><!--/.navbar -->
118 | 
119 |       
120 |       </header>
121 | 
122 | <div class="row">
123 |   <div class="col-md-9 contents">
124 |     <div class="page-header">
125 |     <h1>show Default Configuration</h1>
126 |     <small class="dont-index">Source: <a href='https://github.com/smilesun/rlR/blob/master/R/confDefault.R'><code>R/confDefault.R</code></a></small>
127 |     <div class="hidden name"><code>showDefaultConf.Rd</code></div>
128 |     </div>
129 | 
130 |     <div class="ref-description">
131 |     
132 |     <p>List defaults hyper-parameters in dataframe</p>
133 |     
134 |     </div>
135 | 
136 |     <pre class="usage"><span class='fu'>showDefaultConf</span>()</pre>
137 |         
138 | 
139 |     <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
140 |     <pre class="examples"><div class='input'><span class='no'>df</span> <span class='kw'>=</span> <span class='kw pkg'>rlR</span><span class='kw ns'>::</span><span class='fu'><a href='http://www.rdocumentation.org/packages/rlR/topics/showDefaultConf'>showDefaultConf</a></span>()</div></pre>
141 |   </div>
142 |   <div class="col-md-3 hidden-xs hidden-sm" id="sidebar">
143 |     <h2>Contents</h2>
144 |     <ul class="nav nav-pills nav-stacked">
145 |             
146 |       <li><a href="#examples">Examples</a></li>
147 |     </ul>
148 | 
149 |   </div>
150 | </div>
151 | 
152 |       <footer>
153 |       <div class="copyright">
154 |   <p>Developed by Xudong Sun.</p>
155 | </div>
156 | 
157 | <div class="pkgdown">
158 |   <p>Site built with <a href="http://pkgdown.r-lib.org/">pkgdown</a>.</p>
159 | </div>
160 | 
161 |       </footer>
162 |    </div>
163 | 
164 |   
165 | 
166 |   </body>
167 | </html>
168 | 
169 | 


--------------------------------------------------------------------------------
/inst/figures/ac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/inst/figures/ac.png


--------------------------------------------------------------------------------
/inst/figures/ac300.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/inst/figures/ac300.png


--------------------------------------------------------------------------------
/inst/figures/acrobat.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/inst/figures/acrobat.pdf


--------------------------------------------------------------------------------
/inst/figures/dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/inst/figures/dqn.png


--------------------------------------------------------------------------------
/inst/figures/mplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/inst/figures/mplot-1.png


--------------------------------------------------------------------------------
/inst/repAtari200.R:
--------------------------------------------------------------------------------
 1 | library(rlR)
 2 | conf = getDefaultConf("AgentFDQN")
 3 | conf$set(replay.batchsize = 32,
 4 |   replay.freq = 1L,
 5 |   console = TRUE,
 6 |   agent.lr.decay = 1,
 7 |   agent.lr = 0.00025,
 8 |   agent.update.target.freq = 1e4,
 9 |   replay.memname = "Png",
10 |   render = F,
11 |   policy.minEpsilon = 0.1,
12 |   agent.start.learn = 5e4L,
13 |   policy.aneal.steps = 4e5,
14 |   replay.mem.size = 4e5,
15 |   log = FALSE,
16 |   agent.clip.td = TRUE,
17 |   policy.decay.type = "decay_linear")
18 | 
19 | makeCnnCritic = function(state_dim, act_cnt) {
20 |   require("keras")
21 |   text = paste("model <- keras_model_sequential();",
22 |   'model %>%',
23 |   ' layer_conv_2d(filter = 16, kernel_size = c(8,8), strides = c(4, 4), 
24 |   padding = "same", input_shape = state_dim) %>%',
25 |     'layer_activation("relu") %>%',
26 |     'layer_conv_2d(filter = 32, kernel_size = c(4,4), strides = c(2, 2)) %>%',
27 |     'layer_activation("relu") %>%',
28 |     'layer_flatten() %>%',
29 |     'layer_dense(256) %>%',
30 |     'layer_activation("relu") %>%',
31 |     'layer_dense(act_cnt) %>%',
32 |     'layer_activation("linear");',
33 |     'opt <- optimizer_rmsprop(lr = 0.00025);',
34 |     'model %>% compile(loss = "mse", optimizer = opt, metrics = "accuracy")')
35 |   model = eval(parse(text = text))
36 |   return(model)
37 | }
38 | 
39 | library(doParallel)
40 | cl = makeCluster(5)
41 | registerDoParallel(cl)
42 | res = repExperiment(sname = "Seaquest-v0", aname = "AgentFDQN", conf = conf, nrep = 10, nepi = 200, value_fun = makeCnnCritic, observ_stack_len = 3L, state_preprocess = list(fun = rlR:::subsample))
43 | 


--------------------------------------------------------------------------------
/paper/Makefile:
--------------------------------------------------------------------------------
 1 | all: paper.pdf
 2 | 
 3 | paper.pdf: paper.md paper.bib latex.template
 4 | 	pandoc --filter pandoc-citeproc --bibliography paper.bib  paper.md \
 5 |   --template latex.template -o paper.pdf
 6 | 
 7 | clean:
 8 | 	rm paper.pdf
 9 | 
10 | .PHONY: clean
11 | 


--------------------------------------------------------------------------------
/paper/figures/ac.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/paper/figures/ac.png


--------------------------------------------------------------------------------
/paper/figures/ac300.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/paper/figures/ac300.png


--------------------------------------------------------------------------------
/paper/figures/acrobat.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/paper/figures/acrobat.pdf


--------------------------------------------------------------------------------
/paper/figures/dqn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/paper/figures/dqn.png


--------------------------------------------------------------------------------
/paper/figures/mplot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/paper/figures/mplot-1.png


--------------------------------------------------------------------------------
/paper/paper.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: 'rlR: A R package for deep reinforcement learning'
 3 | tags:
 4 | - R
 5 | - reinforcement learning
 6 | - deep learning
 7 | authors:
 8 | - name: Xudong Sun
 9 | orcid: 0000-0001-9234-4932
10 | affiliation: 1
11 | - name: Sebastian Gruber
12 | orcid: 0000-0002-8544-3470
13 | affiliation: 1
14 | - name: Markus Dumke
15 | orcid: 0000-0000-0000-0000
16 | affiliation: 1
17 | - name: Bernd Bischl
18 | orcid: 0000-0000-0000-0000
19 | affiliation: 1
20 | affiliations:
21 | - name: Ludwig-Maximillians-University of Munich
22 | index: 1
23 | date: 15 October 2018
24 | bibliography: paper.bib
25 | output: pdf_document
26 | ---
27 | 
28 | # Summary
29 | 
30 | Deep reinforcement learning has gained increasing attention in recent years due to its success in solving
31 | many complex scenarios including Atari Games [@Mnih2015], Continuous Robotic Control [@Lillicrap2016a],
32 | The game of Go [@Silver2016a] and so on. Although during our package development, we noticed some light-weight R packages occurs in between for doing reinforcement learning, most of them either only have tabular learning algorithm [@Nicolas2018], or lacks the ability to handle complicated state input like image series state input(Atari games for example) or contain only a single deep reinforcement learning algorithm [@Dumke2018]. More over, as a software package, it is not only important to show examples, but should also handle user defined environments at full fledge as input and the architecture design of the package should be loose coupling as possible to incorporate new algorithms.
33 | 
34 | The package rlR aims at solving the drawbacks above by serving as a generic deep reinforcement learning solver where we expect the user to create their customized environment or scenario as input. Several deep reinforcement learning algorithms are included and examples of how to use the library are well documented. We also wrapped around the OpenAI Gym Environments including the Atari Games so the user could play with it. Tensorflow is used as our deep learning backend serving as an universal function approximator.
35 | 
36 | # Highlights
37 | 
38 | The package rlR is written in an Aspect Oriented Programming fashion which allows customized
39 | operation during the interaction between the agent and the environment.  The package is also designed in an Object Oriented fashion with various design patterns used in software engineering which makes it easily extensible to new algorithms.
40 | 
41 | Most of the operations are configurable through a single configuration object where the user could easily
42 | query the  meaning of each configuration parameter instead of giving different arguments to
43 | different functions. This could greatly facilitate the reproducibility.
44 | 
45 | User could define an environment to be a R6 Class which greatly heaves the expressibility of the
46 | customized environment. For example, The user could define the initialization for the
47 | environment, what to do after each step and each episode, etc.
48 | 
49 | # Example
50 | ```
51 | env = makeGymEnv("CartPole-v1")
52 | env$overview()
53 | conf = getDefaultConf("AgentDQN")
54 | conf$show()
55 | conf$set(render = FALSE, console = FALSE)
56 | agent = initAgent("AgentDQN", env, conf)
57 | agent$learn(200L)  
58 | agent$plotPerf()
59 | ```
60 | ![CartPole Scenario Performance](figures/mplot-1.png)
61 | 
62 | # Acknowledgements
63 | 
64 | We acknowledge helpful suggestions from Janek Thomas and support of DFG.
65 | 
66 | # References
67 | 


--------------------------------------------------------------------------------
/requirement.txt:
--------------------------------------------------------------------------------
 1 | ###### Requirements without Version Specifiers ######
 2 | #numpy
 3 | #
 4 | ###### Requirements with Version Specifiers ######
 5 | #   See https://www.python.org/dev/peps/pep-0440/#version-specifiers
 6 | tensorflow >= 1.8.0             # Version Matching. Must be version 0.6.1
 7 | keras == 2.1.6
 8 | gym == 0.10.5           
 9 | cmake
10 | gym[atari]==0.10.5
11 | # overage != 3.5             # Version Exclusion. Anything except version 3.5
12 | #Mopidy-Dirble ~= 1.1        # Compatible release. Same as >= 1.1, == 1.efer to other requirements files ######
13 | ###### Refer to other requirements files ######
14 | #-r other-requirements.txt
15 | 


--------------------------------------------------------------------------------
/rlR.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | StripTrailingWhitespace: Yes
16 | 
17 | BuildType: Package
18 | PackageUseDevtools: Yes
19 | PackageInstallArgs: --no-multiarch --with-keep.source
20 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(rlR)
3 | 
4 | test_check("rlR")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test_environment.R:
--------------------------------------------------------------------------------
 1 | context("custom_environment")
 2 | test_that("test custom environment EnvToy works", {
 3 |   env = EnvToy$new()
 4 |   env$overview()
 5 |   env$reset()
 6 |   env$step(1)
 7 |   env$afterAll()
 8 |   conf = getDefaultConf("AgentDQN")
 9 |   agent = initAgent("AgentDQN", env, conf)
10 |   perf = agent$learn(3L)
11 |   expect_class(perf, "Performance")
12 | })
13 | 
14 | 
15 | test_that("Gym constructor Works", {
16 |   env = makeGymEnv(name = "CartPole-v0")
17 |   sr = Surrogate$new(3, c(2, 5, 7), createModel.fun = NULL)
18 |   expect_error(sr$train())
19 |   expect_error(sr$pred())
20 | })
21 | 


--------------------------------------------------------------------------------
/tests/testthat/test_file_conf.R:
--------------------------------------------------------------------------------
 1 | context("configuration")
 2 | test_that("Conf object", {
 3 |   conf = getDefaultConf("AgentDQN")
 4 |   conf$get("agent.lr")
 5 |   conf$updatePara("agent.lr", 0.1)
 6 |   conf$set(agent.lr = 0.1)
 7 |   conf$show()
 8 |   expect_true(TRUE)
 9 | })
10 | 
11 | context("naked conf")
12 | test_that("test conf", {
13 |   conf = RLConf$new()
14 |   expect_class(conf, "RLConf")
15 |   RLLog$new(conf)
16 | })
17 | 


--------------------------------------------------------------------------------
/tests/testthat/test_file_nnArsenal.R:
--------------------------------------------------------------------------------
 1 | context("nnArsenal")
 2 | test_that("check custom network", {
 3 |   fun = function(state_dim, act_cnt) {
 4 |     requireNamespace("keras")
 5 |     require("keras")
 6 |     model = keras_model_sequential()
 7 |     model %>%
 8 |       layer_dense(units = 256, activation = 'relu', input_shape = c(state_dim)) %>%
 9 |       layer_dropout(rate = 0.4) %>%
10 |       layer_dense(units = 128, activation = 'relu') %>%
11 |       layer_dropout(rate = 0.3) %>%
12 |       layer_dense(units = act_cnt, activation = 'softmax')
13 |     model
14 |   }
15 |   checkCustomNetwork(fun, 3, 3)
16 |   expect_true(TRUE)
17 | })
18 | 
19 | test_that("default network works", {
20 |   agent.nn.arch = list(nhidden = 64, act1 = "relu", act2 = "linear", loss = "mse", lr = 0.00025, kernel_regularizer = "regularizer_l2(l=0.0)", bias_regularizer = "regularizer_l2(l=0.0)")
21 |   makeKerasModel(input_shape = 2, output_shape = 2, arch.list = agent.nn.arch)
22 |   makeCnnActor(c(32, 32, 3), 10L)
23 |   makeCnnCritic(c(32, 32, 3), 10L)
24 |   #createActorNetwork(3, 2)
25 |   #createCriticNetwork(3, 2)
26 |   expect_true(TRUE)
27 | })
28 | 
29 | 
30 | test_that("custom policy network works", {
31 |   conf = getDefaultConf("AgentActorCritic")
32 |   conf$set(console = TRUE)
33 |   env = makeGymEnv("KungFuMaster-ram-v0", repeat_n_act = 4)
34 |   agent = initAgent("AgentActorCritic", env, conf)
35 |   mfun_val = function(state_dim, act_cnt) {
36 |     requireNamespace("keras")
37 |       model = keras::keras_model_sequential()
38 |         model %>% 
39 |           layer_dense(units = 512, activation = "relu",
40 |             input_shape = c(state_dim)) %>%
41 |           layer_dropout(rate = 0.25) %>%
42 |           layer_dense(units = 1,
43 |             activation = "linear")
44 |         model$compile(loss = "mse",
45 |           optimizer = optimizer_rmsprop(lr = 0.001))
46 |         model
47 |     }
48 | 
49 |   mfun_policy = function(state_dim, act_cnt) {
50 |     requireNamespace("keras")
51 |     model = keras::keras_model_sequential()
52 |       model %>% 
53 |         layer_dense(units = 512, activation = "relu",
54 |           input_shape = c(state_dim)) %>%
55 |         layer_dropout(rate = 0.25) %>%
56 |         layer_dense(units = act_cnt,
57 |           activation = "softmax")
58 |       model$compile(loss = "categorical_crossentropy",
59 |         optimizer = optimizer_rmsprop(lr = 0.001))
60 |       model
61 | }
62 |   agent$customizeBrain(list(value_fun = mfun_val, policy_fun = mfun_policy))
63 |   agent$learn(1L)
64 |   expect_true(TRUE)
65 | })
66 | 


--------------------------------------------------------------------------------
/tests/testthat/test_file_replay_mem.R:
--------------------------------------------------------------------------------
 1 | # context("replay_mem")
 2 | # test_that("test basic replay_mem works", {
 3 | #   conf = getDefaultConf("AgentFDQN")
 4 | #   env = rlR::Environment$new()
 5 | #   env$overview()
 6 | #   agent = initAgent("AgentFDQN", env)
 7 | #   mem = ReplayMem$new(agent, conf)
 8 | #   mem$reset()
 9 | #   ins = mem$mkInst(state.old = array(rep(1, 4)), action = c(1, 2), reward = 1, state.new = array(rep(2, 4)), done = TRUE, info = list())
10 | #   mem$add(ins)
11 | #   expect_class(mem, "ReplayMem")
12 | # })
13 | # 
14 | # test_that("test stack replay_mem works", {
15 | #   conf = getDefaultConf("AgentFDQN")
16 | #   env = rlR::Environment$new()
17 | #   env$overview()
18 | #   env = makeGymEnv("Pong-v0", observ_stack_len = 4L, state_preprocess = list(fun = subsample))
19 | #   agent = initAgent("AgentFDQN", env, conf)
20 | #   makeArray = function(i) array(rep(i, 61*80*4), dim = c(61,80,4))
21 | #   mem = agent$mem
22 | #   mem$reset()
23 | #   for (i in 1:70) {
24 | #     ins = mem$mkInst(state.old = makeArray(i-1), action = 1, reward = i, state.new = makeArray(i), done = TRUE, info = list(episode = 1, stepidx = i))
25 | #     mem$add(ins)
26 | #   }
27 | #   for (i in 71:140) {
28 | #     ins = mem$mkInst(state.old = makeArray(i-1), action = 1, reward = i, state.new = makeArray(i), done = TRUE, info = list(episode = 2, stepidx = i))
29 | #     mem$add(ins)
30 | #     res = mem$sample.fun(64)
31 | #     a = sapply(res, function(x) x$info$stepidx)
32 | #     e = sapply(res, function(x) x$info$episode)
33 | #     b = sapply(res, function(x) x$state.new[1])
34 | #     expect_true(all(a - b == 3L))
35 | #   }
36 | #   expect_class(mem, "ReplayMem")
37 | # })
38 | # 
39 | # 
40 | # test_that("test uniformStack_mem works", {
41 | #   skip_on_cran()
42 | #   conf = rlR.conf.DQN()
43 | #   conf$set(replay.memname = "UniformStack", replay.mem.size = 70L)  # bigger than batchsize
44 | #   env = makeGymEnv("Pong-v0", repeat_n_act = 400, observ_stack_len = 2, state_preprocess = list(fun = subsample))
45 | #   env$overview()
46 | #   agent = initAgent("AgentFDQN", env, conf)
47 | #   agent$learn(3)
48 | #   expect_class(agent, "AgentFDQN")
49 | # })
50 | # 
51 | context("interact")
52 | test_that("test interact base works", {
53 |   inter = InteractionBase$new()
54 |   expect_error(inter$run())
55 | })
56 | 


--------------------------------------------------------------------------------
/tests/testthat/test_file_zzz.R:
--------------------------------------------------------------------------------
 1 | context("zzz")
 2 | test_that("zzz", {
 3 |   rlr_test_if_tensorflow_works()
 4 |   checkPyDep()
 5 |   listGymEnvs()
 6 |   env = makeGymEnv("CartPole-v0")
 7 |   listAvailAgent()
 8 |   rlr_test_if_tensorflow_works()
 9 |   rlr_test_if_gym_works()
10 |   rlr_test_if_keras_works()
11 |   expect_true(TRUE)
12 | })
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test_gym_basic.R:
--------------------------------------------------------------------------------
 1 | context("gym_basic")
 2 | 
 3 | test_that("cran table", {
 4 |   agent = initAgent(name = "AgentTable", env = "CliffWalking-v0")
 5 |   agent$learn(1)
 6 |   expect_true(T)
 7 | })
 8 | 
 9 | test_that("table", {
10 |   skip_on_cran()
11 |   agent = initAgent(name = "AgentTable", env = "CliffWalking-v0")
12 |   agent$learn(500)
13 |   expect_true(agent$interact$perf$getAccPerf() > -40.0)
14 | })
15 | 
16 | test_that("cran test initAgent works", {
17 |   agent.names = c("AgentDQN", "AgentFDQN", "AgentDDQN", "AgentPG", "AgentPGBaseline", "AgentActorCritic")
18 |   env = makeGymEnv("CartPole-v0")
19 |   lapply(agent.names, function(name) initAgent(name, env, conf = getDefaultConf(name)))
20 |   expect_true(TRUE)
21 | })
22 | 
23 | test_that("Basic test Cart-Pole could run with agents", {
24 |   skip_on_cran()
25 |   agent.names = c("AgentDQN", "AgentFDQN", "AgentDDQN", "AgentPG", "AgentPGBaseline", "AgentActorCritic")
26 |   lapply(agent.names, function(agent.name) {
27 |     env = makeGymEnv("CartPole-v0")
28 |     agent = initAgent(agent.name, env)
29 |     agent$learn(1L)
30 |     expect_true(T, info = agent.name)
31 |   })
32 | })
33 | 
34 | test_that("test Cart-Pole works for each Policy Agent", {
35 |   skip_on_cran()
36 |   agent.names = c("AgentPG", "AgentPGBaseline", "AgentActorCritic")
37 |   lapply(agent.names, function(agent.name) {
38 |     print(agent.name)
39 |     conf = getDefaultConf(agent.name)
40 |     env = makeGymEnv("CartPole-v0")
41 |     agent = initAgent(agent.name, env, conf)
42 |     agent$learn(80)
43 |     expect_true(agent$interact$perf$getAccPerf() > 20, info = agent.name)
44 |   })
45 | })
46 | 
47 | test_that("test Cart-Pole works for DQN Agent", {
48 |   skip_on_cran()
49 |   env = makeGymEnv("CartPole-v0")
50 |   agent = initAgent("AgentDQN", env)
51 |   agent$learn(100)
52 |   expect_true(agent$interact$perf$getAccPerf() > 20, info = agent.name)
53 | })
54 | 
55 | test_that("test AgentFDQN works", {
56 |   skip_on_cran()
57 |   skip_on_travis()
58 |   env = makeGymEnv("CartPole-v0")
59 |   agent = initAgent("AgentFDQN", env)
60 |   agent$learn(300)
61 |   ave_reward = agent$interact$perf$getAccPerf()
62 |   expect_true(ave_reward > 20, info = "AgentFDQN")
63 | })
64 | 
65 | 
66 | test_that("test AgentDDQN works", {
67 |   skip_on_cran()
68 |   env = makeGymEnv("CartPole-v0")
69 |   agent = initAgent("AgentDDQN", env)
70 |   agent$learn(200)
71 |   expect_true(agent$interact$perf$getAccPerf() > 20, info = agent.name)
72 | })
73 | 
74 | test_that("test rescue works each Policy based Agent", {
75 |   skip_on_cran()
76 |   agent.names = c("AgentPG", "AgentPGBaseline", "AgentActorCritic")
77 |   lapply(agent.names, function(agent.name) {
78 |     conf = getDefaultConf(agent.name)
79 |     conf$set(agent.flag.reset.net = TRUE)
80 |     env = makeGymEnv("CartPole-v0")
81 |     agent = initAgent(agent.name, env, conf)
82 |     agent$learn(2)
83 |   })
84 |   expect_true(TRUE)
85 | })
86 | 


--------------------------------------------------------------------------------
/tests/testthat/test_gym_ddpg.R:
--------------------------------------------------------------------------------
 1 | context("gym_continuous")
 2 | test_that("test ddpg works", {
 3 |   skip_on_cran()
 4 |   skip("skipping ddpg")
 5 |   env = makeGymEnv("Pendulum-v0")
 6 |   conf = getDefaultConf("AgentDQN")
 7 |   agent = initAgent("AgentDDPG", env, conf)
 8 |   agent$learn(1)
 9 |   expect_true(T)
10 | })
11 | 


--------------------------------------------------------------------------------
/tests/testthat/test_rep_experiment.R:
--------------------------------------------------------------------------------
 1 | context("repeat experiment")
 2 | test_that("travis repeat experiment", {
 3 |   skip_on_cran()
 4 |   skip_on_travis()
 5 |   skip("repeat experiment should be tested individually")
 6 |   doMC::registerDoMC(4)
 7 |   agent.names = c("AgentDQN")  # too many agents takes too long  #agent.names = c("AgentDQN", "AgentFDQN", "AgentDDQN", "AgentPG", "AgentPGBaseline", "AgentActorCritic")
 8 |   env = makeGymEnv("CartPole-v0")
 9 |   lapply(agent.names, function(name) repExperiment(sname = "CartPole-v0", aname = name, conf = getDefaultConf(name), nrep = 2, nepi = 2))
10 |   expect_true(TRUE)
11 | })
12 | 


--------------------------------------------------------------------------------
/tests/testthat/test_topic_atari.R:
--------------------------------------------------------------------------------
 1 | context("atari intensive")
 2 | test_that("test Seaquest improves with time", {
 3 |   skip_on_cran()
 4 |   skip_on_travis()
 5 |   skip("heavy computation")
 6 |   env = makeGymEnv("Seaquest-v0", observ_stack_len = 4L, state_preprocess = list(fun = rlR:::subsample))
 7 |   conf = getDefaultConf("AgentDDQN")
 8 |   conf$set(replay.batchsize = 32,
 9 |     replay.freq = 1L,
10 |     console = TRUE,
11 |     agent.lr.decay = 1,
12 |     agent.lr = 0.00025,
13 |     agent.update.target.freq = 1e4, replay.memname = "Png",
14 |     render = F,
15 |     policy.minEpsilon = 0.1,
16 |     agent.start.learn = 5e4L,
17 |     policy.aneal.steps = 1e6,
18 |     replay.mem.size = 1e6,
19 |     log = FALSE,
20 |     agent.clip.td = TRUE,
21 |     policy.decay.type = "decay_linear")
22 | 
23 |   makeCnnCritic = function(state_dim, act_cnt) {
24 |     require("keras")
25 |     text = paste("model <- keras_model_sequential();",
26 |     'model %>%',
27 |     ' layer_conv_2d(filter = 16, kernel_size = c(8,8), strides = c(4, 4), 
28 |     padding = "same", input_shape = state_dim) %>%',
29 |       'layer_activation("relu") %>%',
30 |       'layer_conv_2d(filter = 32, kernel_size = c(4,4), strides = c(2, 2)) %>%',
31 |       'layer_activation("relu") %>%',
32 |       'layer_flatten() %>%',
33 |       'layer_dense(256) %>%',
34 |       'layer_activation("relu") %>%',
35 |       'layer_dense(act_cnt) %>%',
36 |       'layer_activation("linear");',
37 |       'opt <- optimizer_rmsprop(lr = 0.00025);',
38 |       'model %>% compile(loss = "mse", optimizer = opt, metrics = "accuracy")')
39 |     model = eval(parse(text = text))
40 |     return(model)
41 |   }
42 |   agent = initAgent("AgentFDQN", env, conf, custom_brain = TRUE)
43 |   agent$customizeBrain(list(value_fun = makeCnnCritic))
44 |   agent$learn(2000L)
45 | })
46 | 


--------------------------------------------------------------------------------
/tests/testthat/test_topic_cnn.R:
--------------------------------------------------------------------------------
 1 | context("atari")
 2 | test_that("test cnn stack input works for each value based agent", {
 3 |   agent.names = c("AgentDQN", "AgentFDQN", "AgentDDQN")
 4 |   lapply(agent.names, function(agent.name) {
 5 |     conf = getDefaultConf(agent.name)
 6 |     conf$set(replay.batchsize = 32, replay.freq = 40L, console = TRUE, agent.lr.decay = 1, agent.lr = 0.00025, replay.memname = "UniformStack")
 7 |     env = makeGymEnv("KungFuMaster-v0", repeat_n_act = 80L, observ_stack_len = 4L)
 8 |     agent = initAgent(agent.name, env, conf)
 9 |     perf = agent$learn(1)
10 |     expect_class(perf, "Performance")
11 |   })
12 | })
13 | 
14 | #FIXME:Valueexpected conv2d_46_input to have shape (210, 160, 4) but got array with shape (210, 160, 12)
15 | 
16 | # test_that("test cnn works for each policy based agent", {
17 | #   agent.names = c("AgentPG", "AgentPGBaseline", "AgentActorCritic")
18 | #   lapply(agent.names, function(agent.name) {
19 | #     env = makeGymEnv("KungFuMaster-v0", repeat_n_act = 80L)
20 | #     agent = initAgent(agent.name, env, conf)
21 | #     agent$learn(1)
22 | #     expect_true(T)
23 | #   })
24 | # })
25 | 


--------------------------------------------------------------------------------
/vignettes/custom_configuration.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Custom Configuration"
 3 | output:
 4 |   html_document:
 5 |     toc: true
 6 |     toc_float:
 7 |       collapsed: true
 8 |       smooth_scroll: false
 9 |     dev: svg
10 | vignette: >
11 |   %\VignetteIndexEntry{Custom Configuration}
12 |   %\VignetteEngine{knitr::rmarkdown}
13 |   %\VignetteEncoding{UTF-8}
14 | ---
15 | 
16 | ```{r setup, include = FALSE, cache = FALSE}
17 | library(rlR)
18 | set.seed(123)
19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5)
20 | knitr::knit_hooks$set(document = function(x){
21 |   gsub("```\n*```r*\n*", "", x)
22 | })
23 | ```
24 | 
25 | 
26 | # Configure
27 | 
28 | ```{r}
29 | options(width=1000)
30 | listAvailConf()[, .(name, note)]
31 | ```
32 | 
33 | ```{r}
34 | conf = getDefaultConf("AgentDQN")
35 | conf
36 | conf$set(render = FALSE, console = FALSE)
37 | ```
38 | 
39 | ```{r learn} 
40 | env = makeGymEnv("CartPole-v0")
41 | agent = initAgent("AgentDQN", env, conf)
42 | agent$learn(2)  
43 | ```
44 | 
45 | ```{r mplot, eval=FALSE,fig.path="inst/figures/", warning=FALSE, message=FALSE, eval=FALSE}
46 | agent$plotPerf(F)
47 | ```
48 | 


--------------------------------------------------------------------------------
/vignettes/define_custom_environments.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Custom Learning Environment"
 3 | output:
 4 |   html_document:
 5 |     toc: true
 6 |     toc_float:
 7 |       collapsed: true
 8 |       smooth_scroll: false
 9 |     dev: svg
10 | vignette: >
11 |   %\VignetteIndexEntry{Define custom environment for deep reinforcement learn}
12 |   %\VignetteEngine{knitr::rmarkdown}
13 |   %\VignetteEncoding{UTF-8}
14 | ---
15 | 
16 | ```{r setup, include = FALSE, cache = FALSE}
17 | library(rlR)
18 | set.seed(123)
19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5)
20 | knitr::knit_hooks$set(document = function(x){
21 |   gsub("```\n*```r*\n*", "", x)
22 | })
23 | library(reticulate)
24 | os = import("os") 
25 | os$environ[["TF_CPP_MIN_LOG_LEVEL"]]="3" 
26 | ``` 
27 | # rlR: Define Custom Task to solve
28 | 
29 | ## Environment class
30 | 
31 | If you want to use this package for your self defined task, you need to implement your own R6 class to represent the environment which must inherit the `rlR::Environment` Class. You could define other public and private members as you like which do not collide with the names in `rlR::Environment`.  Type the following to have a look at the documentation of `rlR::Environment`
32 | 
33 | ```{r}
34 | help(topic="Environment", package = "rlR")
35 | ```
36 | 
37 | ## A toy Example
38 | 
39 | ```{r}
40 | env = rlR:::EnvToy$new()
41 | ```
42 | 
43 | `rlR:::EnvToy` is an R6 class which inherit  `rlR::Environment`. 
44 | 
45 | ```{r}
46 | class(env)
47 | ```
48 | 
49 | There are 3 methods you must override when defining your own Environment class.
50 | 
51 | ```{r}
52 | env$initialize  # the fields 'act_cnt' and  'state_dim' must be defined here
53 | ```
54 | 
55 | ```{r}
56 | env$reset  # The return must be a  list with fields state(must be an array), reward = NULL, done = FALSE, and info = list()
57 | ```
58 | 
59 | 
60 | ```{r}
61 | env$step  # The return must be a list with fields state(must be an array), reward(numeric), done(Boolean), and info (list of anything or empty list)
62 | ```
63 | 
64 | ## Testing
65 | 
66 | Afterwards you could choose one of the available  Agents to check if the newly defined environments works. 
67 | 
68 | ```{r}
69 | agent = initAgent("AgentDQN", env)
70 | agent$learn(3)
71 | ```
72 | 


--------------------------------------------------------------------------------
/vignettes/python_dependencies.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Python Dependency"
  3 | output:
  4 |   html_document:
  5 |     toc: true
  6 |     toc_float:
  7 |       collapsed: true
  8 |       smooth_scroll: false
  9 |     dev: svg
 10 | vignette: >
 11 |   %\VignetteIndexEntry{Python Dependency}
 12 |   %\VignetteEngine{knitr::rmarkdown}
 13 |   %\VignetteEncoding{UTF-8}
 14 | ---
 15 | 
 16 | ```{r setup, include = FALSE, cache = FALSE}
 17 | library(rlR)
 18 | set.seed(123)
 19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5)
 20 | knitr::knit_hooks$set(document = function(x){
 21 |   gsub("```\n*```r*\n*", "", x)
 22 | })
 23 | library(reticulate)
 24 | ```
 25 | 
 26 | 
 27 | # Configure to connect to python
 28 | To run the examples,  you need to have the python packages `numpy-1.14.5`, `tensorflow-1.8.0`, `keras-2.1.6`, `gym-0.10.5` installed in the **same** python path. 
 29 | 
 30 | This python path can be your system default python path or a virtual environment(either system python virtual environment or anaconda virtual environment).
 31 | 
 32 | Other package versions might also work but not tested.
 33 | 
 34 | To look at all python paths you have, in a R session, run
 35 | ```{r eval=FALSE}
 36 | reticulate::py_discover_config()
 37 | ```
 38 | 
 39 | Check which is your system default python:
 40 | ```{r eval=FALSE}
 41 | Sys.which("python")
 42 | ```
 43 | 
 44 | If you want to use a python path other than this system default, run the following(replace the '/usr/bin/python' with the python path you want) before doing anything else with reticulate.
 45 | ```{r eval=FALSE}
 46 | reticulate::use_python("/usr/bin/python", required=TRUE)
 47 | ```
 48 | **"Note that you can only load one Python interpreter per R session so the use_python call only applies before you actually initialize the interpreter."** Which means if you changed your mind, you have to close the current R session and open a new R session.
 49 | 
 50 | Confirm from the following if the first path is the one you wanted
 51 | ```{r eval=FALSE}
 52 | reticulate::py_config()
 53 | ```
 54 | 
 55 | ### Python dependencies installation by rlR function
 56 | It is not recommended to mix things up with the system python, so by default, the rlR facility will install the dependencies to virtual environment named 'r-tensorflow' either to your system virtualenv or Anaconda virtualenv.
 57 | 
 58 | For Unix user
 59 | - Ensure that you have **either** of the following available
 60 |   - Python Virtual Environment: 
 61 |     ```{bash eval=F}
 62 |     pip install virtualenv
 63 |     ```
 64 |   - Anaconda
 65 |   - Native system  python that ships with your OS. (you have to install python libraries mannually in this case, see instructions below)
 66 | - Install dependencies through 
 67 |   - if you have python virtualenv available:
 68 |     ```{r eval=F}
 69 |     rlR::installDep2SysVirtualEnv(gpu = FALSE)
 70 |     ```
 71 |   - if you have anaconda available:
 72 |     ```{r eval=FALSE}
 73 |     rlR::installDepConda(conda_path = "auto", gpu = FALSE)
 74 |     ```
 75 | 
 76 | For Windows user
 77 | - Ensure that you have Anaconda available **or** a native local system python installed(in this case you also have to install python libraries mannually, see instructions below)
 78 | - Install dependencies through `{r eval=FALSE} rlR::installDepConda(gpu = FALSE)` 
 79 | 
 80 | If you want to have gpu support, simply set the gpu argument to be true in the function call.
 81 | 
 82 | ### Mannual python dependency installation
 83 | You can also install python dependencies without using rlR facility function, for example, you can open an anaconda virtual environment  "r-tensorflow" by `source activate r-tensorflow`
 84 | 
 85 | All python libraries that are required could be installed either in a virtual environment or in system native python using pip:
 86 | 
 87 | ```{bash, eval=F}
 88 | pip install --upgrade pip  # set your prefered path to the search path first
 89 | pip install -r requirement.txt
 90 | # or
 91 | pip install tensorflow
 92 | pip install keras
 93 | pip install gym
 94 | pip install cmake
 95 | pip install gym[atari]  # this need to be runned even you use require.txt for installation
 96 | ```
 97 | where 'cmake' is required to build atari environments.
 98 | 
 99 | 
100 | 
101 | # Independencies for visualization of environments
102 | The R package imager is required if you want to visualize different environments but the other functionality of rlR is not affected by this R package. For ubuntu, the R package imager depends on libraries which could be installed
103 | 
104 | ```{bash, eval=F}
105 | sudo apt-get install -y libfftw3-dev libx11-dev libtiff-dev
106 | sudo apt-get install -y libcairo2-dev
107 | sudo apt-get install -y libxt-dev
108 | ```
109 | 


--------------------------------------------------------------------------------
/vignettes/table_learning.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Table Learning on Toy-text"
 3 | output:
 4 |   html_document:
 5 |     toc: true
 6 |     toc_float:
 7 |       collapsed: true
 8 |       smooth_scroll: false
 9 |     dev: svg
10 | vignette: >
11 |   %\VignetteIndexEntry{Tablular Learning}
12 |   %\VignetteEngine{knitr::rmarkdown}
13 |   %\VignetteEncoding{UTF-8}
14 | ---
15 | 
16 | ```{r setup, include = FALSE, cache = FALSE}
17 | library(rlR)
18 | set.seed(123)
19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5)
20 | knitr::knit_hooks$set(document = function(x){
21 |   gsub("```\n*```r*\n*", "", x)
22 | })
23 | library(reticulate)
24 | os = import("os")
25 | os$environ[["TF_CPP_MIN_LOG_LEVEL"]]="3"
26 | ```
27 | 
28 | # Toy text and tabular learning
29 | 
30 | ```{r}
31 | library(rlR)
32 | agent = initAgent(name = "AgentTable", env = "CliffWalking-v0")
33 | ```
34 | 
35 | ```{r}
36 | agent$learn(500)
37 | ```
38 | 
39 | ```{r eval=F}
40 | agent$plotPerf()
41 | ```
42 | 


--------------------------------------------------------------------------------