├── .Rbuildignore ├── .ackrc ├── .gitignore ├── .travis.yml ├── .travis.yml.new ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS.md ├── R ├── agent_base.R ├── agent_ddqn2.R ├── agent_double_qn.R ├── agent_dqn.R ├── agent_fdqn.R ├── agent_pg.R ├── agent_pg_actor_critic.R ├── agent_pg_baseline.R ├── agent_pg_compact.R ├── agent_pg_ddpg.R ├── agent_table.R ├── conf.R ├── confDefault.R ├── environment_base.R ├── environment_gym.R ├── experiment.R ├── interaction_base.R ├── interaction_observer.R ├── logging.R ├── nnArsenal.R ├── nnArsenal_ddpg.R ├── obsolette.R ├── performance.R ├── policy.R ├── replaymem.R ├── replaymem_helpers.R ├── replaymem_png.R ├── surrogate_base.R ├── surrogate_nn.R ├── visualize.R └── zzz.R ├── README.Rmd ├── README.md ├── _pkgdown.yml ├── appveyor.yml ├── attr ├── arsenal_attr.R ├── customized_brain_mountainCar.Rmd ├── play_atari_games.Rmd └── repeated_experiment.Rmd ├── benchmark ├── bt_algorithms.R ├── bt_conf.R ├── bt_experiment.R ├── bt_problem.R ├── plotHelper.R ├── rl_h.R └── test_topic_demo.R ├── codecov.yml ├── cran-comments.md ├── cran_check.sh ├── docs ├── LICENSE-text.html ├── articles │ ├── custom_configuration.html │ ├── define_custom_environments.html │ ├── index.html │ ├── python_dependencies.html │ ├── repeated_experiment.html │ └── table_learning.html ├── authors.html ├── docsearch.css ├── docsearch.js ├── index.html ├── link.svg ├── pkgdown.css ├── pkgdown.js ├── pkgdown.yml └── reference │ ├── Agent.html │ ├── AgentArmed.html │ ├── Environment.html │ ├── checkPyDep.html │ ├── getDefaultConf.html │ ├── index.html │ ├── initAgent.html │ ├── installDep2SysVirtualEnv.html │ ├── installDepConda.html │ ├── listAvailAgent.html │ ├── listAvailConf.html │ ├── listGymEnvs.html │ ├── makeGymEnv.html │ ├── repExperiment.html │ ├── rlr_test_if_gym_works.html │ ├── rlr_test_if_keras_works.html │ ├── rlr_test_if_tensorflow_works.html │ └── showDefaultConf.html ├── inst ├── figures │ ├── ac.png │ ├── ac300.png │ ├── acrobat.pdf │ ├── dqn.png │ └── mplot-1.png └── repAtari200.R ├── paper ├── Makefile ├── figures │ ├── ac.png │ ├── ac300.png │ ├── acrobat.pdf │ ├── dqn.png │ └── mplot-1.png ├── latex.template ├── paper.bib └── paper.md ├── requirement.txt ├── rlR.Rproj ├── tests ├── testthat.R └── testthat │ ├── test_environment.R │ ├── test_file_conf.R │ ├── test_file_nnArsenal.R │ ├── test_file_replay_mem.R │ ├── test_file_zzz.R │ ├── test_gym_basic.R │ ├── test_gym_ddpg.R │ ├── test_rep_experiment.R │ ├── test_topic_atari.R │ └── test_topic_cnn.R └── vignettes ├── custom_configuration.Rmd ├── define_custom_environments.Rmd ├── python_dependencies.Rmd └── table_learning.Rmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^docs$ 2 | ^_pkgdown\.yml$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | ^README.Rmd 6 | ^.travis.yml 7 | ^_pkgdown.yml 8 | ^docs 9 | ^cran-comments\.md$ 10 | ^benchmark 11 | ^appveyor.yml 12 | ^codecov\.yml$ 13 | ^.ackrc 14 | ^attr 15 | ^requirement.txt 16 | ^paper$ 17 | ^cran_check.sh 18 | -------------------------------------------------------------------------------- /.ackrc: -------------------------------------------------------------------------------- 1 | --ignore-dir=.Rproj.user 2 | --ignore-dir=docs 3 | --ignore-dir=man 4 | --ignore-dir=.Rhistory 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | man 2 | log 3 | logout 4 | .Rproj.user 5 | .Rhistory 6 | .RData 7 | .Ruserdata 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | sudo: required 3 | cache: packages 4 | matrix: 5 | include: 6 | - python:2.7 7 | - python:3.6 8 | addons: 9 | apt: 10 | update: true 11 | packages: 12 | - python2.7 13 | - python-pip 14 | - python-dev 15 | - build-essential 16 | - zlib1g-dev 17 | before_install: 18 | - sudo pip install gym 19 | - sudo pip install gym[atari] 20 | - sudo pip install tensorflow==1.1.0 21 | - sudo pip install numpy==1.12.1 22 | - sudo pip install h5py==2.7.0 23 | env: 24 | matrix: 25 | - _R_CHECK_LENGTH_1_CONDITION_=true 26 | r: 27 | - devel 28 | - release 29 | r_github_packages: 30 | - jimhester/covr 31 | - hadley/pkgdown 32 | - rstudio/keras 33 | before_script: 34 | - R --no-save <<< 'library("devtools"); document()' 35 | - R --no-save <<< 'library("tensorflow"); install_tensorflow()' 36 | - R --no-save <<< 'library("keras"); install_keras()' 37 | after_success: 38 | - 'if [[ "$TRAVIS_PULL_REQUEST" == "false" && "$TRAVIS_BRANCH" == "master" && "$TRAVIS_R_VERSION_STRING" == "release" && "$TRAVIS_EVENT_TYPE" != "cron" ]] ; then 39 | R --no-save <<< "devtools::install(); pkgdown::build_site()"; 40 | git checkout master; 41 | export TRAVIS_COMMIT_MSG="$(git log --format=%B --no-merges -n 1)"; 42 | git config --global user.name "Travis CI"; 43 | git config --global user.email "$COMMIT_AUTHOR_EMAIL"; 44 | git config credential.helper "store --file=.git/credentials"; 45 | echo "https://${GH_TOKEN}:@github.com" >> .git/credentials; 46 | git config push.default matching; 47 | git add --force man/*; 48 | git add --force README.md; 49 | git add --force docs/*; 50 | git rm -r --cached $(find . -type d -name "*_cache"); 51 | git commit man DESCRIPTION NAMESPACE README.md docs -m "update auto-generated documentation [ci skip]" -m "$TRAVIS_COMMIT_MSG" || true; 52 | git push; 53 | fi;' 54 | - 'if [[ "$TRAVIS_R_VERSION_STRING" == "devel" && "$TRAVIS_EVENT_TYPE" != "cron" ]] ; then 55 | Rscript -e "covr::coveralls()"; 56 | fi;' 57 | -------------------------------------------------------------------------------- /.travis.yml.new: -------------------------------------------------------------------------------- 1 | dist: xenial 2 | language: r 3 | sudo: required 4 | cache: packages 5 | addons: 6 | apt: 7 | sources: 8 | - ppa:jonathonf/python-3.6 9 | update: true 10 | packages: 11 | - python3 12 | - python3-pip 13 | - python3-dev 14 | - build-essential 15 | - zlib1g-dev 16 | before_install: 17 | - sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.5 1 18 | - sudo update-alternatives --install /usr/bin/python python /usr/bin/python3.6 2 19 | - sudo update-alternatives --set python /usr/bin/python3 20 | - sudo pip3 install --upgrade setuptools pip wheel 21 | - sudo apt-get install -y software-properties-common 22 | - sudo apt-get install -y zlib1g-dev 23 | - sudo pip3 install cmake 24 | - sudo pip3 install scipy==1.1.0 25 | - sudo pip3 install numpy==1.14.5 26 | - sudo pip3 install tensorflow==1.8.0 27 | - sudo pip3 install keras==2.2.0 28 | - sudo pip3 install gym==0.10.5 29 | - sudo pip3 install gym[atari] 30 | - sudo pip3 install virtualenv 31 | env: 32 | matrix: 33 | - _R_CHECK_LENGTH_1_CONDITION_=true 34 | r: 35 | - devel 36 | - release 37 | r_packages: 38 | - devtools 39 | - covr 40 | r_github_packages: 41 | - codecov/example-r 42 | - hadley/pkgdown 43 | - rstudio/keras 44 | before_script: 45 | - R --no-save <<< 'library("devtools"); document()' 46 | - R --no-save <<< 'library("tensorflow"); # install_tensorflow()' 47 | - R --no-save <<< 'library("keras"); # install_keras()' 48 | - R --no-save <<< 'reticulate::py_discover_config()' 49 | after_success: 50 | - Rscript -e 'covr::codecov()' 51 | - Rscript -e "covr::coveralls()" 52 | - 'if [[ "$TRAVIS_PULL_REQUEST" == "false" && "$TRAVIS_BRANCH" == "master" && "$TRAVIS_R_VERSION_STRING" == "release" && "$TRAVIS_EVENT_TYPE" != "cron" ]] ; then 53 | R --no-save <<< "devtools::install(); pkgdown::build_site()"; 54 | git checkout master; 55 | export TRAVIS_COMMIT_MSG="$(git log --format=%B --no-merges -n 1)"; 56 | git config --global user.name "Travis CI"; 57 | git config --global user.email "$COMMIT_AUTHOR_EMAIL"; 58 | git config credential.helper "store --file=.git/credentials"; 59 | echo "https://${GH_TOKEN}:@github.com" >> .git/credentials; 60 | git config push.default matching; 61 | git add --force man/*; 62 | git add --force README.md; 63 | git add --force docs/*; 64 | git rm -r --cached $(find . -type d -name "*_cache"); 65 | git commit man DESCRIPTION NAMESPACE README.md docs -m "update auto-generated documentation [ci skip]" -m "$TRAVIS_COMMIT_MSG" || true; 66 | git push; 67 | fi;' 68 | - 'if [[ "$TRAVIS_R_VERSION_STRING" == "devel" && "$TRAVIS_EVENT_TYPE" != "cron" ]] ; then 69 | Rscript -e "covr::coveralls()"; 70 | fi;' 71 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: rlR 2 | Type: Package 3 | Title: Reinforcement Learning in R 4 | Version: 0.1.0 5 | Authors@R: c( 6 | person("Xudong", "Sun", email = {"smilesun.east@gmail.com"}, role = c("aut", "cre")), 7 | person("Sebastian", "Gruber", email = {"gruber_sebastian@t-online.de"}, role = c("ctb")) 8 | ) 9 | Maintainer: Xudong Sun 10 | Description: Reinforcement Learning with deep Q learning, double deep Q 11 | learning, frozen target deep Q learning, policy gradient deep learning, policy 12 | gradient with baseline deep learning, actor-critic deep reinforcement learning. 13 | License: BSD_2_clause + file LICENSE 14 | Encoding: UTF-8 15 | Depends: 16 | R (>= 3.4.0), 17 | Imports: 18 | R6, 19 | checkmate, 20 | data.table, 21 | reticulate, 22 | keras, 23 | tensorflow, 24 | logging, 25 | ggplot2, 26 | openssl, 27 | magrittr, 28 | abind, 29 | foreach 30 | LazyData: true 31 | RoxygenNote: 6.1.1 32 | BugReports: https://github.com/smilesun/rlR/issues 33 | URL: https://github.com/smilesun/rlR 34 | SystemRequirements: The following python package are needed to use the gym openAI environment. gym >= 0.10.5; At least one deep learning backend which keras requires(tensorflow, cntk, theano) should be installed on your computer, for example tensorflow >= 1.1.0 (tested on Ubuntu 14.04); The backend keras requires could be installed by keras::install_keras(); Both dependencies can also be installed by rlR::installDep() function. It is important to note that the user should run 'reticulate::use_python("/usr/local/bin/python")' to specify the python path and 'reticulate::use_virtualenv("myenv")' to specify which virtual environment to use. By default, the package is using "~/anaconda3/bin/python" as its python version. For detail, please refer to https://rstudio.github.io/reticulate/articles/versions.html 35 | Suggests: 36 | imager, 37 | png, 38 | devtools, 39 | testthat, 40 | knitr, 41 | covr, 42 | rmarkdown 43 | VignetteBuilder: knitr 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2018 2 | COPYRIGHT HOLDER: Xudong Sun 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(Agent) 4 | export(AgentArmed) 5 | export(Environment) 6 | export(checkPyDep) 7 | export(getDefaultConf) 8 | export(initAgent) 9 | export(installDep2SysVirtualEnv) 10 | export(installDepConda) 11 | export(listAvailAgent) 12 | export(listAvailConf) 13 | export(listGymEnvs) 14 | export(makeGymEnv) 15 | export(repExperiment) 16 | export(rlr_test_if_gym_works) 17 | export(rlr_test_if_keras_works) 18 | export(rlr_test_if_tensorflow_works) 19 | export(showDefaultConf) 20 | import(R6) 21 | import(abind) 22 | import(checkmate) 23 | import(data.table) 24 | import(foreach) 25 | import(ggplot2) 26 | import(keras) 27 | import(logging) 28 | import(openssl) 29 | import(reticulate) 30 | import(tensorflow) 31 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # rlR 0.1.0 2 | * Initial release. 3 | * Added a `NEWS.md` file to track changes to the package. 4 | -------------------------------------------------------------------------------- /R/agent_ddqn2.R: -------------------------------------------------------------------------------- 1 | # @title Double Q learning 2 | # 3 | # @format \code{\link{R6Class}} object 4 | # @description 5 | # A \code{\link{R6Class}} to represent Double Deep Q learning Armed Agent 6 | # %$Q_u(S, a; \theta_1) = r + Q_u(S', argmax_a' Q_h(S',a'), \theta_1) + delta$ 7 | # target action = argmax Q_h 8 | # @section Methods: 9 | # Inherited from \code{AgentArmed}: 10 | # @inheritSection AgentArmed Methods 11 | # 12 | # @return [\code{\link{AgentDDQN}}]. 13 | AgentDDQN = R6::R6Class("AgentDDQN", 14 | inherit = AgentFDQN, 15 | public = list( 16 | p.next.h = NULL, 17 | setBrain = function() { 18 | super$setBrain() # current setBrain will overwrite super$setBrain() 19 | self$brain_update = self$brain 20 | self$brain_target = SurroNN$new(self) 21 | }, 22 | 23 | getXY = function(batchsize) { 24 | self$list.replay = self$mem$sample.fun(batchsize) 25 | list.states.old = lapply(self$list.replay, ReplayMem$extractOldState) 26 | list.states.next = lapply(self$list.replay, ReplayMem$extractNextState) 27 | self$model = self$brain_update 28 | self$p.old = self$getYhat(list.states.old) 29 | self$p.next = self$getYhat(list.states.next) 30 | self$model = self$brain_target 31 | self$p.next.h = self$getYhat(list.states.next) 32 | list.targets = lapply(1:length(self$list.replay), self$extractTarget) 33 | self$list.acts = lapply(self$list.replay, ReplayMem$extractAction) 34 | temp = simplify2array(list.states.old) # R array put elements columnwise 35 | mdim = dim(temp) 36 | norder = length(mdim) 37 | self$replay.x = aperm(temp, c(norder, 1:(norder - 1))) 38 | self$replay.y = t(simplify2array(list.targets)) # array p 39 | }, 40 | 41 | extractTarget = function(i) { 42 | ins = self$list.replay[[i]] 43 | act2update = ReplayMem$extractAction(ins) 44 | yhat = self$p.old[i, ] 45 | vec.next.Q.u = self$p.next[i, ] # action selection 46 | vec.next.Q.h = self$p.next.h[i, ] # action evaluation 47 | a_1 = which.max(vec.next.Q.u) # action selection 48 | r = ReplayMem$extractReward(ins) 49 | done = ReplayMem$extractDone(ins) 50 | if (done) { 51 | target = r 52 | } else { 53 | target = r + self$gamma * vec.next.Q.h[a_1] # action evaluation 54 | } 55 | mt = yhat 56 | mt[act2update] = target 57 | return(mt) 58 | } 59 | ) # public 60 | ) 61 | 62 | AgentDDQN$info = function() { 63 | "Double Deep Q Learning" 64 | } 65 | 66 | rlR.conf.AgentDDQN = function() { 67 | RLConf$new( 68 | render = FALSE, 69 | console = TRUE, 70 | log = FALSE, 71 | policy.maxEpsilon = 1, 72 | policy.minEpsilon = 0.01, 73 | policy.decay.rate = exp(-0.001), 74 | policy.name = "EpsilonGreedy", 75 | replay.batchsize = 64L, 76 | agent.update.target.freq = 200 77 | ) 78 | } 79 | 80 | AgentDDQN$test = function() { 81 | library(rlR) 82 | env = makeGymEnv("CartPole-v0") 83 | agent = initAgent("AgentDDQN", env) 84 | agent$learn(200L) 85 | } 86 | -------------------------------------------------------------------------------- /R/agent_double_qn.R: -------------------------------------------------------------------------------- 1 | # @title Double Q learning 2 | # 3 | # @format \code{\link{R6Class}} object 4 | # @description 5 | # A \code{\link{R6Class}} to represent Double Deep Q learning Armed Agent 6 | # %$Q_u(S, a; \theta_1) = r + Q_u(S', argmax_a' Q_h(S',a'), \theta_1) + delta$ 7 | # target action = argmax Q_h 8 | # @section Methods: 9 | # Inherited from \code{AgentArmed}: 10 | # @inheritSection AgentArmed Methods 11 | # 12 | # @return [\code{\link{AgentDDQN}}]. 13 | # AgentDDQN = R6::R6Class("AgentDDQN", 14 | # inherit = AgentDQN, 15 | # public = list( 16 | # brain2 = NULL, 17 | # brain_u = NULL, # u: to be updated 18 | # brain_h = NULL, # h: to help 19 | # p.next.h = NULL, 20 | # setBrain = function() { 21 | # super$setBrain() # current setBrain will overwrite super$setBrain() 22 | # self$brain2 = SurroNN$new(self) 23 | # }, 24 | # 25 | # toss = function() { 26 | # if (runif(1L) < 0.5) { 27 | # self$brain_u = self$brain 28 | # self$brain_h = self$brain2 29 | # } else { 30 | # self$brain_u = self$brain2 31 | # self$brain_h = self$brain 32 | # } 33 | # 34 | # }, 35 | # 36 | # getXY = function(batchsize) { 37 | # self$list.replay = self$mem$sample.fun(batchsize) 38 | # self$glogger$log.nn$info("replaying %s", self$mem$replayed.idx) 39 | # list.states.old = lapply(self$list.replay, ReplayMem$extractOldState) 40 | # list.states.next = lapply(self$list.replay, ReplayMem$extractNextState) 41 | # self$model = self$brain_u 42 | # self$p.old = self$getYhat(list.states.old) 43 | # self$p.next = self$getYhat(list.states.next) 44 | # self$model = self$brain_h 45 | # self$p.next.h = self$getYhat(list.states.next) 46 | # list.targets = lapply(1:length(self$list.replay), self$extractTarget) 47 | # self$list.acts = lapply(self$list.replay, ReplayMem$extractAction) 48 | # temp = simplify2array(list.states.old) # R array put elements columnwise 49 | # mdim = dim(temp) 50 | # norder = length(mdim) 51 | # self$replay.x = aperm(temp, c(norder, 1:(norder - 1))) 52 | ##assert(self$replay.x[1,]== list.states.old[[1L]]) 53 | # self$replay.y = t(simplify2array(list.targets)) # array p 54 | # }, 55 | # 56 | # 57 | # replay = function(batchsize) { 58 | # self$getXY(batchsize) 59 | # self$brain_u$train(self$replay.x, self$replay.y) 60 | # }, 61 | # 62 | # extractTarget = function(i) { 63 | # ins = self$list.replay[[i]] 64 | # act2update = ReplayMem$extractAction(ins) 65 | # yhat = self$p.old[i, ] 66 | # vec.next.Q.u = self$p.next[i, ] 67 | # vec.next.Q.h = self$p.next.h[i, ] 68 | # a_1 = which.max(vec.next.Q.u) # not h! 69 | # r = ReplayMem$extractReward(ins) 70 | # done = ReplayMem$extractDone(ins) 71 | # if (done) { 72 | # target = r 73 | # } else { 74 | # target = r + self$gamma * vec.next.Q.h[a_1] # not u! 75 | # } 76 | # mt = yhat 77 | # mt[act2update] = target 78 | # return(mt) 79 | # }, 80 | # 81 | # evaluateArm = function(state) { 82 | # state = array_reshape(state, c(1L, dim(state))) 83 | # self$glogger$log.nn$info("state: %s", paste(state, collapse = " ")) 84 | # vec.arm.q.u = self$brain_u$pred(state) 85 | # vec.arm.q.h = self$brain_h$pred(state) 86 | # self$vec.arm.q = (vec.arm.q.u + vec.arm.q.h) / 2.0 87 | # self$glogger$log.nn$info("prediction: %s", paste(self$vec.arm.q, collapse = " ")) 88 | # }, 89 | # 90 | # act = function(state) { 91 | # self$toss() 92 | # assert(class(state) == "array") 93 | # self$evaluateArm(state) 94 | # self$policy$act(state) 95 | # } 96 | # ) # public 97 | # ) 98 | # 99 | # AgentDDQN$info = function() { 100 | # "Double Deep Q Learning" 101 | # } 102 | -------------------------------------------------------------------------------- /R/agent_dqn.R: -------------------------------------------------------------------------------- 1 | # @title DQN 2 | # 3 | # @format \code{\link{R6Class}} object 4 | # @description Deep Q Network 5 | # 6 | # @section Methods: 7 | # Inherited from \code{AgentArmed}: 8 | # @inheritSection AgentArmed Methods 9 | # 10 | # @return [\code{\link{AgentDQN}}]. 11 | AgentDQN = R6::R6Class("AgentDQN", 12 | inherit = AgentArmed, 13 | public = list( 14 | setBrain = function() { 15 | self$task = "value_fun" 16 | self$brain = SurroNN$new(self) 17 | self$model = self$brain 18 | }, 19 | 20 | getXY = function(batchsize) { 21 | self$list.replay = self$mem$sample.fun(batchsize) 22 | self$glogger$log.nn$info("replaying %s", self$mem$replayed.idx) 23 | list.states.old = lapply(self$list.replay, ReplayMem$extractOldState) 24 | list.states.next = lapply(self$list.replay, ReplayMem$extractNextState) 25 | self$p.old = self$getYhat(list.states.old) 26 | self$p.next = self$getYhat(list.states.next) 27 | list.targets = lapply(1:length(self$list.replay), self$extractTarget) 28 | self$list.acts = lapply(self$list.replay, ReplayMem$extractAction) 29 | temp = Reduce(rbind, list.states.old) 30 | nr = length(list.states.old) 31 | temp = simplify2array(list.states.old) # R array put elements columnwise 32 | mdim = dim(temp) 33 | norder = length(mdim) 34 | self$replay.x = aperm(temp, c(norder, 1:(norder - 1))) 35 | self$replay.y = t(simplify2array(list.targets)) # array put elements columnwise 36 | diff_table = abs(self$replay.y - self$p.old) 37 | self$replay_delta = apply(diff_table, 1, mean) 38 | }, 39 | 40 | 41 | extractTarget = function(i) { 42 | ins = self$list.replay[[i]] 43 | act2update = ReplayMem$extractAction(ins) 44 | p.old = self$p.old[i, ] 45 | self$yhat = p.old # for calculating the TD error 46 | r = ReplayMem$extractReward(ins) 47 | done = ReplayMem$extractDone(ins) 48 | if (done) { 49 | target = r 50 | } else { 51 | vec.next.Q = self$p.next[i, ] 52 | a_1 = which.max(vec.next.Q) # action index start from 1L 53 | target = r + self$gamma * max(vec.next.Q) 54 | # equivalent to huber loss 55 | if (self$clip_td_err) { 56 | target = max(target, p.old[act2update] - 1L) 57 | target = min(target, p.old[act2update] + 1L) 58 | } 59 | } 60 | mt = p.old 61 | mt[act2update] = target # the not active action arm's Q will not be updated 62 | #FIXME: shall here be 0? 63 | # mt[-act2update] = 0.0 # the not active action arm will be set to be zero 64 | return(mt) 65 | }, 66 | 67 | afterStep = function() { 68 | if (self$interact$step_in_episode %% self$replay.freq == 0L) { 69 | self$replay(self$replay.size) 70 | } 71 | self$policy$afterStep() 72 | }, 73 | 74 | afterEpisode = function() { 75 | self$policy$afterEpisode() 76 | self$mem$afterEpisode() 77 | self$brain$afterEpisode() 78 | } 79 | ) # public 80 | ) 81 | 82 | AgentDQN$info = function() { 83 | "Vanilla Deep Q learning" 84 | } 85 | 86 | rlR.conf.AgentDQN = function() { 87 | RLConf$new( 88 | render = FALSE, 89 | console = TRUE, 90 | log = FALSE, 91 | policy.maxEpsilon = 1, 92 | policy.minEpsilon = 0.01, 93 | policy.decay.rate = exp(-0.001), 94 | policy.name = "EpsilonGreedy", 95 | replay.batchsize = 64L) 96 | } 97 | 98 | AgentDQN$test = function() { 99 | library(rlR) 100 | env = makeGymEnv("CartPole-v0") 101 | agent = initAgent("AgentDQN", env) 102 | agent$learn(200L) 103 | } 104 | -------------------------------------------------------------------------------- /R/agent_fdqn.R: -------------------------------------------------------------------------------- 1 | # @title Frozen target Q learning 2 | # 3 | # @format \code{\link{R6Class}} object 4 | # @description Frozen target Q learning 5 | # 6 | # @section Methods: 7 | # Inherited from \code{AgentArmed}: 8 | # @inheritSection AgentArmed Methods 9 | # 10 | # @return [\code{\link{AgentFDQN}}]. 11 | AgentFDQN = R6::R6Class("AgentFDQN", inherit = AgentDQN, 12 | public = list( 13 | brain_target = NULL, 14 | brain_update = NULL, 15 | last_update = NULL, 16 | initialize = function(env, conf) { 17 | self$last_update = 0 18 | super$initialize(env, conf) 19 | self$updateFreq = self$conf$get("agent.update.target.freq") 20 | }, 21 | 22 | setBrain = function() { 23 | super$setBrain() 24 | self$brain_update = SurroNN$new(self) 25 | self$brain_target = self$brain 26 | }, 27 | 28 | showBrain = function() { 29 | print("control network:") 30 | print(self$brain_update$model) 31 | print("target network:") 32 | print(self$brain_target$model) 33 | }, 34 | 35 | ## @override 36 | getXY = function(batchsize) { 37 | self$list.replay = self$mem$sample.fun(batchsize) 38 | list.states.old = lapply(self$list.replay, ReplayMem$extractOldState) 39 | list.states.next = lapply(self$list.replay, ReplayMem$extractNextState) 40 | self$model = self$brain_update # use update network to generate target 41 | self$p.old = self$getYhat(list.states.old) 42 | self$model = self$brain_target # use target network to generate target 43 | self$p.next = self$getYhat(list.states.next) 44 | list.targets = lapply(1:length(self$list.replay), self$extractTarget) 45 | #temp = Reduce(rbind, list.states.old) # does not work for tensor 46 | batch_states = simplify2array(list.states.old) # R array put elements columnwise 47 | mdim = dim(batch_states) 48 | norder = length(mdim) 49 | self$replay.x = aperm(batch_states, c(norder, 1:(norder - 1))) 50 | self$replay.y = t(simplify2array(list.targets)) # array put elements columnwise 51 | #diff_table = abs(self$replay.y - self$p.old) 52 | #self$replay_delta = apply(diff_table, 1, mean) 53 | }, 54 | 55 | 56 | replay = function(batchsize) { 57 | self$getXY(batchsize) # from base class 58 | self$brain_update$train(self$replay.x, self$replay.y) # update the policy model 59 | }, 60 | 61 | act = function(state) { 62 | assert(class(state) == "array") 63 | self$model = self$brain_update 64 | self$evaluateArm(state) 65 | self$policy$act(state) 66 | }, 67 | 68 | updateModel = function() { 69 | cat(sprintf("\n\n updating model \n\n")) 70 | tw = self$brain_target$getWeights() 71 | uw = self$brain_update$getWeights() 72 | uuw = lapply(uw, function(x) x * 0.1) 73 | ttw = lapply(tw, function(x) x * 0.9) 74 | ww = mapply("+", uw, tw) 75 | self$brain_target$setWeights(uw) 76 | self$last_update = self$interact$global_step_len 77 | }, 78 | 79 | shouldUpdateModel = function() { 80 | self$interact$global_step_len - self$last_update > self$updateFreq 81 | }, 82 | 83 | afterEpisode = function() { 84 | if (self$shouldUpdateModel()) { 85 | self$updateModel() 86 | } 87 | super$afterEpisode() 88 | } 89 | ) 90 | ) 91 | 92 | rlR.conf.AgentFDQN = function() { 93 | RLConf$new( 94 | render = FALSE, 95 | console = TRUE, 96 | log = FALSE, 97 | policy.maxEpsilon = 1, 98 | policy.minEpsilon = 0.01, 99 | policy.decay.rate = exp(-0.001), 100 | policy.name = "EpsilonGreedy", 101 | replay.batchsize = 64L, 102 | agent.update.target.freq = 400 103 | ) 104 | } 105 | 106 | 107 | 108 | AgentFDQN$info = function() { 109 | "Frozen Target Deep Q Learning" 110 | } 111 | 112 | AgentFDQN$test = function() { 113 | library(rlR) 114 | env = makeGymEnv("CartPole-v0") 115 | agent = initAgent("AgentFDQN", env) 116 | agent$learn(400L) 117 | } 118 | -------------------------------------------------------------------------------- /R/agent_pg.R: -------------------------------------------------------------------------------- 1 | # @title Policy Gradient 2 | # @format \code{\link{R6Class}} object 3 | # @description Policy Gradient 4 | # 5 | # @section Methods: 6 | # Inherited from \code{AgentArmed}: 7 | # @inheritSection AgentArmed Methods 8 | # 9 | # @return [\code{\link{AgentPG}}]. 10 | AgentPG = R6::R6Class("AgentPG", 11 | inherit = AgentArmed, 12 | public = list( 13 | flag_rescue = NULL, 14 | amf = NULL, 15 | initialize = function(env, conf) { 16 | self$flag_rescue = conf$get("agent.flag.reset.net") 17 | super$initialize(env, conf = conf) 18 | }, 19 | 20 | setBrain = function() { 21 | self$task = "policy_fun" 22 | self$brain = SurroNN$new(self) 23 | self$model = self$brain 24 | }, 25 | 26 | extractTarget = function(ins) { 27 | act = ReplayMem$extractAction(ins) 28 | vec_act = rep(0.0, self$act_cnt) 29 | vec_act[act] = +1.0 30 | return(vec_act) 31 | }, 32 | 33 | # loss = -\sum_k{(y_k\log(yhat_k)}, \frac{\partial loss}{\partial \yhat} = -\sum_k{y_k\frac{yhat_k}{yhat_k}} = -policy gradient 34 | #@override 35 | getXY = function(batchsize) { 36 | self$list.replay = self$mem$sample.fun(batchsize) 37 | self$glogger$log.nn$info("replaying %s", self$mem$replayed.idx) 38 | list_states_old = lapply(self$list.replay, ReplayMem$extractOldState) 39 | list_targets = lapply(self$list.replay, self$extractTarget) 40 | self$list.acts = lapply(self$list.replay, ReplayMem$extractAction) 41 | arr_states_old = simplify2array(list_states_old) 42 | norder = length(dim(arr_states_old)) 43 | self$replay.x = aperm(arr_states_old, c(norder, 1:(norder - 1))) 44 | self$replay.y = t(simplify2array(list_targets)) 45 | # self$replay.y = array(, dim = c(batchsize, self$act_cnt)) 46 | }, 47 | 48 | setAmf = function(batchsize) { 49 | self$setReturn() 50 | vec_discount = cumprod(rep(self$gamma, batchsize)) 51 | amf = self$vec_dis_return * vec_discount 52 | amf = self$vec_dis_return 53 | amf = amf - mean(amf) 54 | self$amf = amf / sd(amf) 55 | }, 56 | 57 | # replay is executed at the end of episode for each step of the episode, batch size is always set to be the episode length 58 | replay = function(batchsize) { 59 | self$setAmf(batchsize) 60 | self$getXY(batchsize) 61 | self$replay.y = diag(self$amf) %*% self$replay.y 62 | self$brain$batch_update(self$replay.x, self$replay.y) # update the policy model 63 | }, 64 | 65 | setReturn = function() { 66 | episode_idx = self$interact$perf$epi_idx 67 | self$vec_dis_return = self$interact$perf$list_discount_reward_epi[[episode_idx]] 68 | }, 69 | 70 | #@override 71 | afterEpisode = function() { 72 | self$replay(self$interact$perf$total_steps) # key difference here 73 | super$afterEpisode() 74 | } 75 | ) # public 76 | ) 77 | 78 | rlR.conf.AgentPG = rlR.conf.AgentPGBaseline = function() { 79 | RLConf$new( 80 | agent.lr = 1e-2, 81 | render = FALSE, 82 | console = TRUE, 83 | flag_rescue = FALSE, 84 | agent.gamma = 0.99, 85 | policy.maxEpsilon = 0, 86 | policy.minEpsilon = 0, 87 | agent.flag.reset.net = FALSE, 88 | policy.name = "Prob", 89 | replay.memname = "Latest", 90 | replay.epochs = 1L) 91 | } 92 | 93 | AgentPG$info = function() { 94 | "Policy Gradient Monte Carlo" 95 | } 96 | 97 | AgentPG$test = function() { 98 | env = makeGymEnv("CartPole-v0") 99 | conf = getDefaultConf("AgentPG") 100 | agent = initAgent("AgentPG", env, conf, custom_brain = F) 101 | agent$learn(200L) 102 | } 103 | -------------------------------------------------------------------------------- /R/agent_pg_actor_critic.R: -------------------------------------------------------------------------------- 1 | # @title AgentActorCritic 2 | # 3 | # @format \code{\link{R6Class}} object 4 | # @description ActorCritic Agent 5 | # 6 | # @section Methods: 7 | # Inherited from \code{AgentArmed}: 8 | # @inheritSection AgentArmed Methods 9 | # 10 | # @return [\code{\link{AgentActorCritic}}]. 11 | AgentActorCritic = R6::R6Class("AgentActorCritic", 12 | inherit = AgentPGBaseline, 13 | public = list( 14 | setBrain = function() { 15 | self$task = "policy_fun" 16 | self$brain_actor = SurroNN$new(self) 17 | self$brain_actor$lr = 0.001 18 | self$task = "value_fun" 19 | self$brain_critic = SurroNN$new(self) 20 | self$brain_critic$lr = 0.01 21 | self$model = self$brain_critic 22 | }, 23 | 24 | setAmf = function() { 25 | vec.step = unlist(lapply(self$list.replay, ReplayMem$extractStep)) 26 | vec_discount = sapply(vec.step, function(x) self$gamma^x) 27 | self$amf = vec_discount 28 | }, 29 | 30 | replay = function(batchsize) { 31 | self$getReplayYhat(batchsize) # self$list.rewards are extracted here 32 | self$list.acts = lapply(self$list.replay, ReplayMem$extractAction) 33 | self$setAmf() 34 | len = length(self$list.replay) 35 | list.targets.critic = lapply(1:len, function(i) as.vector(self$extractCriticTarget(i))) 36 | list.targets.actor = lapply(1:len, function(i) as.vector(self$extractActorTarget(i))) 37 | y_actor = t(simplify2array(list.targets.actor)) 38 | y_actor = self$amf %*% y_actor 39 | y_actor = self$delta %*% y_actor 40 | y_critic = array(unlist(list.targets.critic), dim = c(len, 1L)) 41 | self$brain_critic$batch_update(self$replay.x, y_critic) # first update critic 42 | self$brain_actor$batch_update(self$replay.x, y_actor) 43 | }, 44 | 45 | extractCriticTarget = function(i) { 46 | nv = self$gamma * self$p_next_c 47 | vec.done = unlist(lapply(self$list.replay, ReplayMem$extractDone)) 48 | idx = which(vec.done) 49 | target = (unlist(self$list.rewards) + nv) 50 | if (length(idx) > 0) target = unlist(self$list.rewards) 51 | self$delta = target - self$p_old_c # Bellman Error as advantage 52 | return(target) 53 | }, 54 | 55 | afterStep = function() { 56 | self$policy$afterStep() 57 | self$replay(1) 58 | }, 59 | 60 | afterEpisode = function() { 61 | self$policy$afterEpisode() 62 | self$mem$afterEpisode() 63 | #if (self$flag_rescue) self$interact$perf$rescue() 64 | self$brain_actor$afterEpisode() 65 | self$brain_critic$afterEpisode() 66 | #self$adaptLearnRate() 67 | } 68 | 69 | ) 70 | ) 71 | 72 | AgentActorCritic$info = function() { 73 | "Actor Critic Method" 74 | } 75 | 76 | AgentActorCritic$test = function() { 77 | env = makeGymEnv("CartPole-v0") 78 | agent = initAgent("AgentActorCritic", env) 79 | agent$learn(2000L) 80 | } 81 | -------------------------------------------------------------------------------- /R/agent_pg_baseline.R: -------------------------------------------------------------------------------- 1 | # @title ReinforceWithBaseline 2 | # @format \code{\link{R6Class}} object 3 | # @description ReinforceWithBaseline 4 | # $\delta = G_t - v_w(s_t)$ 5 | # $w = w + \beta * \delta * \nabla_w v_w(s_t)$ 6 | # $\theta = \theta + \alpha * \gamma^t * \delta * \nabla_{\theta}log(\pi_{\theta}(A_t|S_t)) 7 | # @return [\code{\link{AgentPGBaseline}}]. 8 | AgentPGBaseline = R6::R6Class("AgentPGBaseline", 9 | inherit = AgentPG, 10 | public = list( 11 | brain_actor = NULL, # cross entropy loss 12 | brain_critic = NULL, # mse loss 13 | critic_yhat = NULL, 14 | p_old_c = NULL, 15 | p_next_c = NULL, 16 | delta = NULL, 17 | list.rewards = NULL, 18 | setBrain = function() { 19 | self$task = "policy_fun" 20 | self$brain_actor = SurroNN$new(self) 21 | self$task = "value_fun" 22 | self$brain_critic = SurroNN$new(self) 23 | self$model = self$brain_critic 24 | }, 25 | 26 | getReplayYhat = function(batchsize) { 27 | self$list.replay = self$mem$sample.fun(batchsize) 28 | self$glogger$log.nn$info("replaying %s", self$mem$replayed.idx) 29 | list.states.old = lapply(self$list.replay, ReplayMem$extractOldState) 30 | list.states.next = lapply(self$list.replay, ReplayMem$extractNextState) 31 | self$list.rewards = lapply(self$list.replay, ReplayMem$extractReward) 32 | self$list.acts = lapply(self$list.replay, ReplayMem$extractAction) 33 | self$model = self$brain_critic 34 | self$p_old_c = self$getYhat(list.states.old) 35 | self$p_next_c = self$getYhat(list.states.next) 36 | temp = simplify2array(list.states.old) # R array put elements columnwise 37 | mdim = dim(temp) 38 | norder = length(mdim) 39 | self$replay.x = aperm(temp, c(norder, 1:(norder - 1))) 40 | }, 41 | 42 | replay = function(batchsize) { 43 | self$getReplayYhat(batchsize) 44 | len = length(self$list.replay) # replay.list might be smaller than batchsize 45 | self$setAmf(batchsize) 46 | self$delta = array(self$vec_dis_return, dim = dim(self$p_old_c)) - self$p_old_c 47 | list.targets.actor = lapply(1:len, function(i) as.vector(self$extractActorTarget(i))) 48 | list.targets.critic = lapply(1:len, function(i) as.vector(self$extractCriticTarget(i))) 49 | y_actor = t(simplify2array(list.targets.actor)) 50 | y_actor = diag(self$amf) %*% y_actor 51 | y_actor = diag(as.vector(self$delta)) %*% y_actor 52 | y_critic = array(unlist(list.targets.critic), dim = c(len, 1L)) 53 | self$brain_actor$batch_update(self$replay.x, y_actor) # update the policy model 54 | self$brain_critic$batch_update(self$replay.x, y_critic) # update the policy model 55 | }, 56 | 57 | extractCriticTarget = function(i) { 58 | y = self$p_old_c[i, ] + self$delta[i] 59 | return(y) 60 | }, 61 | 62 | extractActorTarget = function(i) { 63 | act = self$list.acts[[i]] 64 | vec.act = rep(0L, self$act_cnt) 65 | vec.act[act] = 1.0 66 | target = vec.act 67 | return(target) 68 | }, 69 | 70 | adaptLearnRate = function() { 71 | self$brain_actor$lr = self$brain_actor$lr * self$lr_decay 72 | self$brain_critic$lr = self$brain_critic$lr * self$lr_decay 73 | }, 74 | 75 | afterStep = function() { 76 | self$policy$afterStep() 77 | }, 78 | 79 | #@override 80 | evaluateArm = function(state) { 81 | state = array_reshape(state, c(1L, dim(state))) 82 | self$vec.arm.q = self$brain_actor$pred(state) 83 | self$glogger$log.nn$info("state: %s", paste(state, collapse = " ")) 84 | self$glogger$log.nn$info("prediction: %s", paste(self$vec.arm.q, collapse = " ")) 85 | }, 86 | 87 | afterEpisode = function() { 88 | self$replay(self$interact$perf$total_steps) # key difference here 89 | } 90 | ) # public 91 | ) 92 | 93 | AgentPGBaseline$info = function() { 94 | "Policy Gradient with Baseline" 95 | } 96 | 97 | quicktest = function() { 98 | #pg.bl.agent.nn.arch.actor = list(nhidden = 64, act1 = "tanh", act2 = "softmax", loss = "categorical_crossentropy", lr = 25e-3, kernel_regularizer = "regularizer_l2(l=0.0001)", bias_regularizer = "regularizer_l2(l=0.0001)", decay = 0.9, clipnorm = 5) 99 | #pg.bl.agent.nn.arch.critic = list(nhidden = 64, act1 = "tanh", act2 = "linear", loss = "mse", lr = 25e-3, kernel_regularizer = "regularizer_l2(l=0.0001)", bias_regularizer = "regularizer_l2(l=0)", decay = 0.9, clipnorm = 5) 100 | #value_fun = makeNetFun(pg.bl.agent.nn.arch.critic, flag_critic = T) 101 | #policy_fun = makeNetFun(pg.bl.agent.nn.arch.actor) 102 | env = makeGymEnv("CartPole-v0") 103 | conf = getDefaultConf("AgentPGBaseline") 104 | agent = initAgent("AgentPGBaseline", env, conf, custom_brain = F) 105 | #agent$customizeBrain(list(value_fun = value_fun, policy_fun = policy_fun)) 106 | agent$learn(200L) 107 | } 108 | -------------------------------------------------------------------------------- /R/agent_pg_compact.R: -------------------------------------------------------------------------------- 1 | # AgentPGCompactBL = R6::R6Class("AgentPGCompactBL", 2 | # inherit = AgentPGBaseline, 3 | # public = list( 4 | # p_old_c = NULL, 5 | # p_next_c = NULL, 6 | # delta = NULL, 7 | # list.rewards = NULL, 8 | # 9 | # setBrain = function() { 10 | # self$task = "policy_fun" 11 | # self$brain_actor = SurroNN$new(self) 12 | # }, 13 | # 14 | # getReplayYhat = function(batchsize) { 15 | # self$list.replay = self$mem$sample.fun(batchsize) 16 | # self$glogger$log.nn$info("replaying %s", self$mem$replayed.idx) 17 | # list.states.old = lapply(self$list.replay, ReplayMem$extractOldState) 18 | # list.states.next = lapply(self$list.replay, ReplayMem$extractNextState) 19 | # self$list.rewards = lapply(self$list.replay, ReplayMem$extractReward) 20 | # self$list.acts = lapply(self$list.replay, ReplayMem$extractAction) 21 | # self$model = self$brain_critic 22 | # self$p_old_c = self$getYhat(list.states.old) 23 | # self$p_next_c = self$getYhat(list.states.next) 24 | # temp = simplify2array(list.states.old) # R array put elements columnwise 25 | # mdim = dim(temp) 26 | # norder = length(mdim) 27 | # self$replay.x = aperm(temp, c(norder, 1:(norder - 1))) 28 | # }, 29 | # 30 | # replay = function(batchsize) { 31 | # self$getReplayYhat(batchsize) 32 | # len = length(self$list.replay) # replay.list might be smaller than batchsize 33 | # self$setAmf(batchsize) 34 | # self$delta = array(self$vec_dis_return, dim = dim(self$p_old_c)) - self$p_old_c 35 | # list.targets.actor = lapply(1:len, function(i) as.vector(self$extractActorTarget(i))) 36 | # list.targets.critic = lapply(1:len, function(i) as.vector(self$extractCriticTarget(i))) 37 | # y_actor = t(simplify2array(list.targets.actor)) 38 | # y_actor = diag(self$amf) %*% y_actor 39 | # y_actor = diag(self$delta) %*% y_actor 40 | # y_critic = array(unlist(list.targets.critic), dim = c(len, 1L)) 41 | # self$brain_actor$train(self$replay.x, y_actor) # update the policy model 42 | # self$brain_critic$train(self$replay.x, y_critic) # update the policy model 43 | # }, 44 | # 45 | # extractCriticTarget = function(i) { 46 | # y = self$p_old_c[i, ] + self$delta[i] 47 | # return(y) 48 | # }, 49 | # 50 | # extractActorTarget = function(i) { 51 | # act = self$list.acts[[i]] 52 | # delta = (+1.0) * as.vector(self$delta[i]) 53 | ##FIXME: interestingly, multiply advantage by -1 also works 54 | # vec.act = rep(0L, self$act_cnt) 55 | # vec.act[act] = 1.0 56 | # target = delta * array(vec.act, dim = c(1L, self$act_cnt)) 57 | # return(target) 58 | # }, 59 | # 60 | # adaptLearnRate = function() { 61 | # self$brain_actor$lr = self$brain_actor$lr * self$lr_decay 62 | # self$brain_critic$lr = self$brain_critic$lr * self$lr_decay 63 | # }, 64 | # 65 | # afterStep = function() { 66 | # self$policy$afterStep() 67 | # }, 68 | # 69 | ##@override 70 | # evaluateArm = function(state) { 71 | # state = array_reshape(state, c(1L, dim(state))) 72 | # self$vec.arm.q = self$brain_actor$pred(state) 73 | # self$glogger$log.nn$info("state: %s", paste(state, collapse = " ")) 74 | # self$glogger$log.nn$info("prediction: %s", paste(self$vec.arm.q, collapse = " ")) 75 | # }, 76 | # 77 | # afterEpisode = function(interact) { 78 | # self$replay(self$interact$perf$total_steps) # key difference here 79 | # } 80 | # ) # public 81 | # ) 82 | -------------------------------------------------------------------------------- /R/agent_table.R: -------------------------------------------------------------------------------- 1 | AgentTable = R6Class("AgentTable", 2 | inherit = AgentArmed, 3 | public = list( 4 | q_tab = NULL, 5 | alpha = NULL, 6 | lr_min = NULL, 7 | act_names_per_state = NULL, 8 | vis_after_episode = NULL, 9 | initialize = function(env, conf, q_init = 0.0, state_names = NULL, act_names_per_state = NULL, vis_after_episode = F) { 10 | super$initialize(env, conf) 11 | self$vis_after_episode = vis_after_episode 12 | self$act_names_per_state = act_names_per_state 13 | self$q_tab = matrix(q_init, nrow = self$state_dim, ncol = self$act_cnt) 14 | if (!is.null(state_names)) rownames(self$q_tab) = state_names 15 | }, 16 | 17 | buildConf = function() { 18 | self$lr_decay = self$conf$get("agent.lr_decay") 19 | self$lr_min = self$conf$get("agent.lr.min") 20 | memname = self$conf$get("replay.memname") 21 | self$mem = makeReplayMem(memname, agent = self, conf = self$conf) 22 | self$alpha = self$conf$get("agent.lr") 23 | self$gamma = self$conf$get("agent.gamma") 24 | policy_name = self$conf$get("policy.name") 25 | self$policy = makePolicy(policy_name, self) 26 | self$glogger = RLLog$new(self$conf) 27 | self$createInteract(self$env) # initialize after all other members are initialized!! 28 | }, 29 | act = function(state) { 30 | self$vec.arm.q = self$q_tab[state, ] 31 | self$vec.arm.q = self$env$evaluateArm(self$vec.arm.q) 32 | self$policy$act(state) 33 | }, 34 | 35 | afterStep = function() { 36 | # Q^{\pi^{*}}(s, a) = R + max \gamma Q^{\pi^{*}}(s', a) 37 | transact = self$mem$samples[[self$mem$size]] # take the latest transaction? 38 | # self$q_tab has dim: $#states * #actions$ 39 | if (ReplayMem$extractDone(transact)) future = transact$reward 40 | else future = transact$reward + self$gamma * max(self$q_tab[(transact$state.new), ]) # state start from 0 in cliaff walker 41 | delta = future - self$q_tab[(transact$state.old), transact$action] 42 | self$q_tab[(transact$state.old), transact$action] = self$q_tab[(transact$state.old), transact$action] + self$alpha * delta 43 | }, 44 | 45 | customizeBrain = function() { 46 | }, 47 | 48 | afterEpisode = function(interact) { 49 | self$policy$afterEpisode() 50 | cat(sprintf("\n learning rate: %f \n", self$alpha)) 51 | self$alpha = max(self$alpha * self$lr_decay, self$lr_min) 52 | if (self$vis_after_episode) self$print2() 53 | }, 54 | 55 | print = function() { 56 | self$q_tab 57 | }, 58 | 59 | print2 = function() { 60 | x = self$q_tab 61 | rowise_val = split(x, rep(1:nrow(x), each = ncol(x))) 62 | if (!checkmate::testNull(self$act_names_per_state)) { 63 | checkmate::assert_list(self$act_names_per_state) 64 | checkmate::assert_true(length(self$act_names_per_state) == nrow(self$q_tab)) 65 | colnames_per_row = self$act_names_per_state 66 | list_act_names = mapply(setNames, rowise_val, colnames_per_row, SIMPLIFY = FALSE) 67 | list_act_names = setNames(list_act_names, names(colnames_per_row)) 68 | print(list_act_names) 69 | } else print(rowise_val) 70 | } 71 | ) 72 | ) 73 | 74 | AgentTable$info = function() { 75 | "Tabular Learning" 76 | } 77 | 78 | AgentTable$test = function() { 79 | conf = getDefaultConf("AgentTable") 80 | #conf$set(agent.lr.mean = 0.1, agent.lr = 0.5, agent.lr_decay = 1, policy.name = "EpsilonGreedy") 81 | conf$set(agent.lr.mean = 0.1, agent.lr = 0.5, agent.lr_decay = 0.9999, policy.name = "EpsGreedTie") 82 | agent = initAgent(name = "AgentTable", env = "CliffWalking-v0", conf = conf) 83 | agent$learn(500) 84 | rlR:::visualize(agent$q_tab) 85 | agent$plotPerf() 86 | expect_true(agent$interact$perf$getAccPerf() > -40.0) 87 | } 88 | 89 | 90 | agent.brain.dict.AgentTable = function() NULL 91 | 92 | rlR.conf.AgentTable = function() { 93 | RLConf$new( 94 | render = F, 95 | console = T, 96 | log = FALSE, 97 | agent.lr = 0.5, 98 | agent.gamma = 0.95, 99 | agent.lr_decay = 1.0, 100 | agent.lr.min = 0.01, 101 | policy.maxEpsilon = 0.1, 102 | policy.minEpsilon = 0, 103 | policy.decay.type = "decay_linear", 104 | policy.aneal.steps = 400, 105 | #policy.decay.rate = exp(-0.001), 106 | policy.name = "EpsGreedTie", 107 | agent.start.learn = 0L) 108 | } 109 | -------------------------------------------------------------------------------- /R/conf.R: -------------------------------------------------------------------------------- 1 | # The reason that there exist a Configuration object which is throughout the experiment is that we want to look at the effect of those configuration parameters. 2 | RLConf = R6::R6Class("RLConf", 3 | public = list( 4 | static = NULL, 5 | conf.log.perf = NULL, # seperate configuration for output like logging, RData, etc 6 | # get persistence file prefix 7 | getPersist = function(foldername) { 8 | list.str = lapply(names(self$static), function(x) sprintf("-%s: %s-\n", x, self$static[[x]])) 9 | self$conf.log.perf$str.conf = paste0("\n", toString(list.str)) 10 | hash.conf = openssl::md5(self$conf.log.perf$str.conf) 11 | str.time = toString(Sys.time()) 12 | str.time = gsub(" ", "_", str.time) 13 | str.date = toString(Sys.Date()) 14 | filePrefix = file.path(getwd(), foldername, str.date, str.time, hash.conf) 15 | cat(sprintf("Creating the following output folder %s:\n", filePrefix)) 16 | dir.create(filePrefix, recursive = TRUE) 17 | self$conf.log.perf$filePrefix = filePrefix 18 | self$conf.log.perf$resultTbPath = file.path(filePrefix, rlR.conf4log$resultTbPath) # RData file persistence place 19 | }, 20 | 21 | initialize = function(...) { 22 | self$conf.log.perf = data.table::copy(rlR.conf4log) # valid only when log = TRUE 23 | self$static = data.table::copy(rlR.conf.default) # deep copy 24 | #par.list = list(...) 25 | #dns = setdiff(names(par.list), rlR.conf.default) 26 | #list.default = setNames(lapply(dns, function(x) self$static[[x]]), dns) 27 | self$set(...) 28 | }, 29 | 30 | get = function(name) { 31 | self$static[[name]] 32 | }, 33 | 34 | set = function(...) { 35 | par.list = list(...) 36 | lapply(names(par.list), function(x) self$updatePara(x, par.list[[x]])) 37 | flag = self$get("log") 38 | if (is.null(flag)) flag = FALSE 39 | if (flag) { 40 | folder_name = readline(prompt = "Please enter folder name relative to current working directory to store output files\n") 41 | self$getPersist(folder_name) 42 | } 43 | }, 44 | 45 | updatePara = function(str.para, val.value) { 46 | self$static[[str.para]] = val.value 47 | }, 48 | 49 | show = function() { 50 | list_param = self$static 51 | dns = names(list_param) 52 | ## remove agent.nn 53 | #flag = sapply(dns, function(x) grepl("agent.nn", x)) 54 | #dns = dns[-which(flag)] 55 | list_conf = lapply(dns, function(x) self$static[[x]]) 56 | names(list_conf) = dns 57 | df = as.data.frame(unlist(list_conf)) 58 | colnames(df) = "value" 59 | df 60 | }, 61 | print = function() { 62 | print(self$show()) 63 | } 64 | ) 65 | ) 66 | -------------------------------------------------------------------------------- /R/confDefault.R: -------------------------------------------------------------------------------- 1 | # hyper-parameters range 2 | rlR.conf.lod = list( 3 | render = list(name = "render", note = "Whether to show rendering video or not", value = FALSE), 4 | log = list(name = "log", note = "Whether to log important information on drive", value = FALSE), 5 | console = list(name = "console", note = "Whether to enable debug info output to console", value = FALSE), 6 | agent.gamma = list(name = "agent.gamma", note = "The discount factor in reinforcement learning", value = 0.99), 7 | agent.flag.reset.net = list(name = "agent.flag.reset.net", note = "Whether to reset the neural network ", value = TRUE), #FIXME: should be set this? 8 | agent.lr.decay = list(name = "agent.lr.decay", note = "The decay factor of the learning rate at each step", value = exp(-0.001)), # decaying with regard to step is better since some episode can be too long 9 | agent.lr = list(name = "agent.lr", note = "learning rate for the agent", value = 1e-3), 10 | agent.lr.min = list(name = "agent.lr.min", note = "minimum learning rate", value = 0), 11 | agent.store.model = list(name = "agent.store.model", note = "whether to store the model of the agent or not", value = FALSE), #FIXME: exclude this 12 | agent.update.target.freq = list(name = "agent.update.target.freq", note = "How often should the target network be set", value = 2000L), 13 | agent.start.learn = list(name = "agent.start.learn", note = "after how many transitions should replay begin", value = 64L), 14 | agent.clip.td = list(name = "agent.clip.td", note = "whether to clip TD error", value = FALSE), 15 | policy.maxEpsilon = list(name = "policy.maxEpsilon", note = "The maximum epsilon exploration rate", value = 1.0), 16 | policy.minEpsilon = list(name = "policy.minEpsilon", note = "The minimum epsilon exploration rate", value = 0.01), 17 | policy.decay.rate = list(name = "policy.decay.rate", note = "the decay rate", value = 1.0), 18 | policy.decay.type = list(name = "policy.decay.type", note = "the way to decay epsion, can be decay_geo, decay_exp, decay_linear", value = "decay_geo"), 19 | policy.aneal.steps = list(name = "policy.aneal.steps", note = "only valid when policy.decay.type = 'decay_linear'", value = 1e6), 20 | policy.softmax.magnify = list(name = "policy.softmax.magnify", value = 1), 21 | replay.batchsize = list(name = "replay.batchsize", note = "how many samples to take from replay memory each time", value = 64), 22 | replay.memname = list(name = "replay.memname", range = c("Uniform"), note = "The type of replay memory", value = "Uniform"), 23 | replay.mem.size = list(name = "replay.mem.size", note = "The size of the replay memory", value = 2e4), 24 | replay.epochs = list(name = "replay.epochs", note = "How many gradient decent epochs to carry out for one replay", value = 1L), 25 | replay.freq = list(name = "replay.freq", note = "how many steps to wait until one replay", value = 1L) 26 | ) 27 | 28 | rlR.conf.dt = data.table::rbindlist(rlR.conf.lod, fill = TRUE) 29 | rlR.conf.df = as.data.frame(rlR.conf.dt) 30 | 31 | 32 | # define default hyper-parameters 33 | rlR.conf.default = lapply(rlR.conf.lod, function(x) x$value) 34 | 35 | #' @title listAvailConf 36 | #' @description List defaults hyper-parameters names 37 | #' @export 38 | listAvailConf = function() { 39 | rlR.conf.dt 40 | } 41 | 42 | rlR.conf.AgentActorCritic = function() { 43 | conf = RLConf$new( 44 | render = FALSE, 45 | log = FALSE, 46 | agent.lr = 1e-2, 47 | agent.gamma = 0.9, 48 | agent.lr.decay = 1, 49 | console = TRUE, 50 | policy.name = "Prob", 51 | policy.maxEpsilon = 0, 52 | policy.minEpsilon = 0, 53 | replay.epochs = 1L, 54 | replay.memname = "Latest" 55 | #agent.nn.arch.actor = list(nhidden = 64, act1 = "tanh", act2 = "softmax", loss = "categorical_crossentropy", lr = 1e-4, kernel_regularizer = "regularizer_l2(l=0.0001)", bias_regularizer = "regularizer_l2(l=1e-4)", decay = 0.9, clipnorm = 5), 56 | #agent.nn.arch.critic = list(nhidden = 64, act1 = "tanh", act2 = "linear", loss = "mse", lr =1e-4, kernel_regularizer = "regularizer_l2(l=0.0001)", bias_regularizer = "regularizer_l2(l=1e-4)", decay = 0.9, clipnorm = 5) 57 | ) 58 | } 59 | 60 | rlR.conf.AgentDDPG = function() { 61 | conf = RLConf$new( 62 | render = FALSE, 63 | log = FALSE, 64 | agent.lr = 1e-2, 65 | agent.gamma = 0.9, 66 | agent.lr.decay = 1, 67 | console = TRUE, 68 | policy.name = "Prob", 69 | policy.maxEpsilon = 0, 70 | policy.minEpsilon = 0, 71 | replay.batchsize = 32, # saves a lot of time compared to when batchsize = 64 72 | replay.epochs = 1L, 73 | replay.memname = "Uniform" 74 | ) 75 | } 76 | 77 | 78 | #' @title get Default Configuration according to agent name 79 | #' @description List defaults hyper-parameters 80 | #' @param agent_name The name for Agent 81 | #' @export 82 | #' @examples 83 | #' conf = rlR::getDefaultConf("AgentDQN") 84 | getDefaultConf = function(agent_name) { 85 | get(paste0("rlR.conf.", agent_name))() 86 | } 87 | 88 | #' @title show Default Configuration 89 | #' @description List defaults hyper-parameters in dataframe 90 | #' @export 91 | #' @examples 92 | #' df = rlR::showDefaultConf() 93 | showDefaultConf = function() { 94 | rlR.conf.df = data.frame(unlist(rlR.conf.default)) 95 | colnames(rlR.conf.df) = NULL 96 | rlR.conf.df 97 | } 98 | 99 | 100 | rlR.conf4log = list( 101 | policy.epi_wait_ini = 5L, # initially the performance should increase 102 | policy.epi_wait_middle = 25L, 103 | policy.epi_wait_expl = 40L, 104 | replay.mem.dt = FALSE, 105 | replay.mem.laplace.smoother = 0.001, 106 | resultTbPath = "Perf.RData", 107 | LOGGERNAMENN = "nn.logger", 108 | LOGGERNAMERL = "rl.logger", 109 | NNSufix = "nn.log", 110 | RLSufix = "rl.log.R" 111 | ) 112 | 113 | 114 | agent.brain.dict.AgentDQN = agent.brain.dict.AgentFDQN = agent.brain.dict.AgentDDQN = function() list(value_fun = makeValueNet.DQN) 115 | agent.brain.dict.AgentPG = function() list(policy_fun = makePolicyNet) 116 | agent.brain.dict.AgentPGBaseline = function() list(policy_fun = makePolicyNet, value_fun = makeValueNet) 117 | agent.brain.dict.AgentActorCritic = function() list(policy_fun = makePolicyNet2, value_fun = makeValueNet2) 118 | -------------------------------------------------------------------------------- /R/environment_base.R: -------------------------------------------------------------------------------- 1 | #' @title Reinforcement Learning Environment 2 | #' 3 | #' @format \code{\link{R6Class}} object 4 | #' 5 | #' @description 6 | #' A \code{\link{R6Class}} to represent reinforcement learning environments. To define custom environment, one should define a \code{\link{R6Class}} which inherit rlR::Environment. 7 | #' 8 | #' @section Member Variables: 9 | #' 10 | #' \describe{ 11 | #' \item{act_cnt}{[\code{int}] \cr 12 | #' Number of actions of the agent to environment 13 | #' } 14 | #' \item{state_dim}{[\code{vector(int)}] \cr 15 | #' The dimension of the observation(or state) space on the environment. Must be vector of integers. For example, c(28, 28, 3), which can be the dimension for a tensor of order 3. 16 | #' } 17 | #' \item{name}{[\code{character}] \cr 18 | #' A string to represent the name of the environment} 19 | #' \item{flag_continous}{[\code{logic}] \cr 20 | #' A boolean variable to represent whether the action space is continous or not} 21 | #' } 22 | #' 23 | #' @section Methods: 24 | #' \describe{ 25 | #' \item{initialize(...)}{[\code{function}] \cr 26 | #' Constructor function to initialize environment} 27 | #' \item{step(action)}{[\code{function}] \cr 28 | #' Function to make a step in the environment. Must return a named list of [\code{state(array of size state_dim), reward(reward the agent get after making the step), done(boolean variable whether the episode is finished or not), info(list of anything)}]. There must be stoping criteria in step function which should return [\code{list(state = state, reward = reward, done = TRUE, info = list())}] to stop the interaction between the environment and the agent.} 29 | #' \item{reset()}{[\code{function}] \cr 30 | #' Reset the environment} 31 | #' \item{render()}{[\code{function}] \cr 32 | #' Print out information to user about the environment, can be left empty} 33 | #' \item{afterAll()}{[\code{function}] \cr 34 | #' What needs to be done after learning is finished, could be left empty} 35 | #' \item{evaluateArm(vec_arm)}{[\code{function}] \cr 36 | #' process value of vec_arm which is the same length vector as action count act_cnt to only generate legal action, by default doing nothing} 37 | #' } 38 | #' @return [\code{\link{Environment}}]. 39 | #' @export 40 | Environment = R6::R6Class("Environment", 41 | public = list( 42 | act_cnt = NULL, 43 | state_dim = NULL, 44 | name = NULL, 45 | flag_continous = FALSE, 46 | flag_tensor = FALSE, 47 | observ_stack_len = 1L, 48 | maxStepPerEpisode = 1e4L, 49 | agent = NULL, # used to get access to replaymem 50 | initialize = function() { 51 | }, 52 | 53 | evaluateArm = function(vec_arm) { 54 | return(vec_arm) 55 | }, 56 | 57 | afterEpisode = function() { 58 | }, 59 | 60 | # environment get a hook to agent so it can access the replay memory 61 | setAgent = function(agent) { 62 | self$agent = agent 63 | self$agent$mem$observ_stack_len = self$observ_stack_len 64 | }, 65 | 66 | render = function() { 67 | 68 | }, 69 | 70 | overview = function() { 71 | cat(sprintf("\naction cnt: %s \n", toString(self$act_cnt))) 72 | cat(sprintf("state dim: %s \n", toString(self$state_dim))) 73 | cat(sprintf("%s\n", ifelse(self$flag_continous, "continous action", "discrete action"))) 74 | }, 75 | 76 | reset = function() { 77 | }, 78 | 79 | step = function(action) { 80 | }, 81 | 82 | afterAll = function() { 83 | }, 84 | 85 | print = function() { 86 | self$overview() 87 | } 88 | ) 89 | ) 90 | 91 | EnvToy = R6::R6Class("EnvToy", 92 | inherit = Environment, 93 | public = list( 94 | initialize = function(...) { 95 | self$act_cnt = c(2) 96 | self$state_dim = c(4) 97 | }, 98 | 99 | reset = function() { 100 | return(list( 101 | state = array(rnorm(self$state_dim), dim = self$state_dim), 102 | reward = NULL, 103 | done = FALSE, 104 | info = list() 105 | )) 106 | }, 107 | 108 | step = function(action) { 109 | return(list( 110 | state = array(rnorm(self$state_dim), dim = self$state_dim), 111 | reward = 1.0, 112 | done = TRUE, 113 | info = list() 114 | )) 115 | } 116 | ) 117 | ) 118 | -------------------------------------------------------------------------------- /R/experiment.R: -------------------------------------------------------------------------------- 1 | #' @title Repeat experiment 2 | #' 3 | #' @description Repeat the experiment for serveral times 4 | #' 5 | #' @param sname The scenario name of Gym environment 6 | #' @param aname The name of the Agent 7 | #' @param conf Configuration object 8 | #' @param nrep Number of repetitions 9 | #' @param nepi Number of episode to learn 10 | #' @param value_fun customized neural network as value function approximator, default NULL 11 | #' @param ... Other Parameters to pass to GymEnv 12 | #' @return list of ggplot2 object for performance and list of reward per experiment per episode 13 | #' @export 14 | # library(doMC) # registerDoMC(4) # res = repExperiment(sname = "CartPole-v0", aname = "AgentDQN", conf = getDefaultConf("AgentDQN"), nrep = 5, nepi = 200) 15 | repExperiment = function(sname, aname, conf, nrep = 5L, nepi, value_fun = NULL, ...) { 16 | list.agent = foreach::foreach(i = 1:nrep) %dopar% { 17 | env = makeGymEnv(sname, ...) 18 | agent = initAgent(aname, env, conf) 19 | agent$learn(nepi) 20 | agent 21 | } 22 | list.r = lapply(list.agent, function(agent) { 23 | agent$interact$perf$list.reward.epi}) 24 | list.len = lapply(1:nrep, function(i) lapply(list.r[[i]], function(x) length(x))) 25 | len = max(unlist(list.len)) 26 | init.list = lapply(1:nepi, function(j) vector(mode = "numeric", length = len)) 27 | convert2SameLen = function(init1) { 28 | init2 = vector(mode = "numeric", length = len) 29 | init2[1:length(init1)] = init1 30 | init2 31 | } 32 | list.episode = lapply(1:nepi, function(episode_ind) { 33 | init = vector(mode = "numeric", length = len) 34 | for (i in 1:nrep) { 35 | init = init + convert2SameLen(list.r[[i]][[episode_ind]]) 36 | } 37 | init 38 | }) 39 | #for (i in 1L:nrep) { 40 | # init.list = lapply(2:nepi, function(episode_ind) init.list[[episode_ind]] + convert2SameLen(list.r[[i]][[episode_ind]])) 41 | #} 42 | #init = lapply(init, function(vec) vec / nrep) 43 | #list.episode = lapply(init, function(vec) vec / nrep) 44 | list.episode = lapply(list.episode, function(vec) vec / nrep) 45 | env = makeGymEnv(sname, ...) 46 | agent = initAgent(aname, env, conf = conf) 47 | #agent$interact$perf$list.reward.epi = init 48 | agent$interact$perf$list.reward.epi = list.episode 49 | plot = agent$plotPerf() 50 | return(list(plot = plot, list.r = list.r, list.agent = list.agent)) 51 | } 52 | -------------------------------------------------------------------------------- /R/interaction_base.R: -------------------------------------------------------------------------------- 1 | InteractionBase = R6::R6Class("InteractionBase", 2 | public = list( 3 | rl_agent = NULL, 4 | rl_env = NULL, 5 | perf = NULL, 6 | maxiter = NULL, 7 | glogger = NULL, 8 | run = function() { 9 | stop("not implemented") 10 | } 11 | ), # public 12 | private = list(), 13 | active = list() 14 | ) 15 | -------------------------------------------------------------------------------- /R/logging.R: -------------------------------------------------------------------------------- 1 | RLLog = R6::R6Class("RLLog", #nocov start 2 | public = list( 3 | log.root = NULL, 4 | log.nn = NULL, 5 | conf = NULL, 6 | flag = NULL, 7 | # the configuration of logging does not impact the performance, so use global configuration 8 | initialize = function(conf) { 9 | logging::logReset() 10 | conf.logging = conf$conf.log.perf 11 | self$conf = conf 12 | # make log obj 13 | self$log.root = logging::getLogger(conf$conf.log.perf$LOGGERNAMERL) 14 | self$log.nn = logging::getLogger(conf$conf.log.perf$LOGGERNAMENN) 15 | logging::removeHandler("writeToConsole", logger = conf$conf.log.perf$LOGGERNAMENN) 16 | logging::removeHandler("basic.stdout", logger = conf$conf.log.perf$LOGGERNAMENN) 17 | # whether log to file 18 | self$flag = conf$get("log") 19 | if (is.null(self$flag)) self$flag = FALSE 20 | if (self$flag) { 21 | # root logger 22 | logging::addHandler(writeToFile, file = file.path(conf$conf.log.perf$filePrefix, conf.logging$RLSufix), logger = conf.logging$LOGGERNAMERL) 23 | # every step logger 24 | logging::addHandler(writeToFile, file = file.path(conf.logging$filePrefix, conf$conf.log.perf$NNSufix), logger = conf$conf.log.perf$LOGGERNAMENN) 25 | # first logging 26 | self$log.root$info(conf.logging$str.conf) 27 | self$log.root$info(conf$conf.log.perf$filePrefix) # take down the directory name 28 | info = paste0("\n", conf.logging$info.before, conf.logging$filePrefix, conf.logging$info.after) 29 | self$log.root$info(info) 30 | } 31 | }, 32 | 33 | afterAll = function() { 34 | if (self$flag) { 35 | filename.replay = file.path(rlR.conf4log$filePrefix, "replay.dt.csv") 36 | filename.experience = file.path(self$conf$conf.log.perf$filePrefix, "experience.dt.csv") 37 | self$log.root$info("\n a = BBmisc::load2('%s')\n", self$conf$conf.log.perf$resultTbPath) 38 | cat(sprintf("\n a = BBmisc::load2('%s') \n", self$conf$conf.log.perf$resultTbPath)) 39 | write.csv(self$rl.agent$mem$dt, file = filename.experience) 40 | self$log.root$info("\n b = read.csv('%s') \n", filename.experience) 41 | } 42 | } 43 | ) 44 | ) # nocov end 45 | -------------------------------------------------------------------------------- /R/nnArsenal_ddpg.R: -------------------------------------------------------------------------------- 1 | # normal 1 arm output network with only state as input 2 | createActorNetwork.AgentDDPG.torc = function(state_dim = 3, action_dim = 1L) { 3 | input_state = keras::layer_input(shape = state_dim) 4 | states_hidden = input_state %>% 5 | layer_dense(units = 27, activation = "relu") 6 | states_hidden2 = states_hidden %>% 7 | layer_dense(units = 27, activation = "linear") %>% 8 | layer_dense(units = action_dim, activation = "linear") # only 1L output! 9 | model = keras::keras_model(inputs = input_state, outputs = states_hidden2) 10 | opt = keras::optimizer_adam(lr = 0.0001) 11 | model %>% compile( 12 | optimizer = opt, 13 | loss = "mse" 14 | ) 15 | return(list(model = model, input_state = input_state, weights = model$trainable_weights)) 16 | } 17 | 18 | # both state and action are inputs! 19 | createCriticNetwork.AgentDDPG.torc = function(state_dim, action_dim) { 20 | input_state = keras::layer_input(shape = state_dim) 21 | input_action = keras::layer_input(shape = action_dim, name = "input_action") 22 | action_hidden = input_action %>% 23 | layer_dense(units = 30, activation = "linear") 24 | states_hidden = input_state %>% 25 | layer_dense(units = 30, activation = "relu") 26 | states_hidden2 = states_hidden %>% 27 | layer_dense(units = 30, activation = "linear") 28 | hiddens = keras::layer_add(c(states_hidden2, action_hidden)) 29 | # outputs compose input + dense layers 30 | predictions = hiddens %>% 31 | layer_dense(units = 30, activation = "relu") %>% 32 | layer_dense(units = action_dim, activation = "linear") 33 | # create and compile model 34 | model = keras::keras_model(inputs = c(input_action, input_state), outputs = predictions) 35 | opt = keras::optimizer_adam(lr = 0.0001) 36 | model %>% compile( 37 | optimizer = opt, 38 | loss = "mse" 39 | ) 40 | return(list(model = model, input_action = input_action, input_state = input_state)) 41 | } 42 | 43 | 44 | createCriticNetwork.AgentDDPG = function(state_dim, action_dim) { 45 | input_state = keras::layer_input(shape = state_dim) 46 | input_action = keras::layer_input(shape = action_dim, name = "input_action") 47 | action_hidden = input_action %>% 48 | layer_dense(units = 30, activation = "linear") 49 | states_hidden = input_state %>% layer_dense(units = 30, activation = "linear") 50 | hiddens = keras::layer_add(c(states_hidden, action_hidden)) 51 | #concat = keras::layer_concatenate(c(action_hidden, states_hidden)) 52 | hiddens2 = keras::layer_activation_relu(hiddens) 53 | 54 | # outputs compose input + dense layers 55 | predictions = hiddens2 %>% layer_dense(units = action_dim, activation = "linear") 56 | # create and compile model 57 | model = keras::keras_model(inputs = c(input_action, input_state), outputs = predictions) 58 | opt = keras::optimizer_adam(lr = 0.002) 59 | model %>% compile( 60 | optimizer = opt, 61 | loss = "mse" 62 | ) 63 | return(list(model = model, input_action = input_action, input_state = input_state)) 64 | } 65 | 66 | LayerKMultiply <- R6::R6Class( 67 | "KerasLayer", 68 | inherit = KerasLayer, 69 | 70 | public = list( 71 | m = NULL, 72 | 73 | initialize = function(m) { 74 | self$m <- m 75 | }, 76 | 77 | call = function(x, mask = NULL) { 78 | x * self$m 79 | } 80 | ) 81 | ) 82 | 83 | layer_LayerKMultiply <- function(object, m) { 84 | create_layer(LayerKMultiply, object, list(m = m)) 85 | } 86 | 87 | 88 | createActorNetwork.AgentDDPG = function(state_dim = 3, action_dim = 1L, a_bound) { 89 | input_state = keras::layer_input(shape = state_dim) 90 | states_hidden = input_state %>% 91 | layer_dense(units = 30, activation = "relu") 92 | states_hidden2 = states_hidden %>% 93 | layer_dense(units = action_dim, activation = "tanh") # only 1L output! 94 | output = states_hidden2 %>% layer_LayerKMultiply(m = a_bound) 95 | model = keras::keras_model(inputs = input_state, outputs = states_hidden2) 96 | opt = keras::optimizer_adam(0.001) 97 | fun_loss = function(y_true, y_pred) { 98 | # currently not used at all 99 | k_b = keras::backend() 100 | hh = k_b$print_tensor(y_true) 101 | temp = y_true * k_b$log(y_pred) 102 | sloss = -k_b$sum(temp) 103 | cross_entropy = k_b$mean(sloss) 104 | } 105 | model %>% compile( 106 | optimizer = opt, 107 | loss = fun_loss 108 | ) 109 | return(list(model = model, input_state = input_state, weights = model$trainable_weights)) 110 | } 111 | -------------------------------------------------------------------------------- /R/obsolette.R: -------------------------------------------------------------------------------- 1 | function() { 2 | library(profvis) 3 | profvis( 4 | { 5 | agent = initAgent("AgentTable", "CliffWalking-v0") 6 | agent = initAgent("AgentTable", "FrozenLake-v0") 7 | agent = initAgent("AgentTable", "Taxi-v2") 8 | agent$learn(500) 9 | visualize(agent$q_tab) 10 | agent$plotPerf(F) 11 | } 12 | ) 13 | } 14 | -------------------------------------------------------------------------------- /R/policy.R: -------------------------------------------------------------------------------- 1 | Policy = R6::R6Class("Policy", 2 | public = list( 3 | decay_rate = NULL, 4 | host = NULL, 5 | gstep_idx = NULL, 6 | action = NULL, 7 | random_cnt = NULL, 8 | random_action = NULL, 9 | fun_aneal = NULL, 10 | total_aneal_step = NULL, 11 | epsilon = NULL, 12 | min_epsilon = NULL, 13 | max_epsilon = NULL, 14 | initialize = function(host) { 15 | self$random_cnt = 0L 16 | self$host = host 17 | self$decay_rate = self$host$conf$get("policy.decay.rate") 18 | self$total_aneal_step = self$host$conf$get("policy.aneal.steps") 19 | self$fun_aneal = get(self$host$conf$get("policy.decay.type"), envir = self) 20 | self$min_epsilon = self$host$conf$get("policy.minEpsilon") 21 | self$max_epsilon = self$host$conf$get("policy.maxEpsilon") 22 | self$epsilon = self$max_epsilon 23 | self$gstep_idx = 1 24 | }, 25 | 26 | sampleRandomAct = function(state) { 27 | self$random_action = sample.int(self$host$act_cnt)[1L] 28 | }, 29 | 30 | predProbRank = function(state) { 31 | prob = order(self$host$vec.arm.q) 32 | action = sample.int(self$host$act_cnt, prob = prob)[1L] 33 | return(action) 34 | }, 35 | 36 | decay_geo = function() { 37 | temp = self$epsilon * self$decay_rate 38 | self$epsilon = max(temp, self$min_epsilon) 39 | }, 40 | 41 | decay_exp = function() { 42 | self$epsilon = self$min_epsilon + (self$max_epsilon - self$min_epsilon) * exp(self$decay_rate * self$gstep_idx) 43 | self$gstep_idx = self$gstep_idx + 1L 44 | }, 45 | 46 | decay_linear = function() { 47 | self$epsilon = self$max_epsilon - (self$gstep_idx / self$total_aneal_step) * (self$max_epsilon - self$min_epsilon) 48 | # if self$gstep_idx > self$total_aneal_step 49 | self$epsilon = max(self$epsilon, self$min_epsilon) 50 | self$gstep_idx = self$gstep_idx + 1L 51 | }, 52 | 53 | afterStep = function() { 54 | }, 55 | 56 | afterEpisode = function() { 57 | self$host$interact$toConsole("Epsilon%f \n", self$epsilon) 58 | self$host$glogger$log.nn$info("rand steps:%d \n", self$random_cnt) 59 | self$host$interact$toConsole("rand steps:%i \n", self$random_cnt) # same message to console 60 | self$random_cnt = 0L 61 | } 62 | ) 63 | ) 64 | 65 | 66 | PolicyProb = R6::R6Class("PolicyProb", 67 | inherit = Policy, 68 | public = list( 69 | act = function(state) { 70 | sample.int(self$host$act_cnt, prob = self$host$vec.arm.q, size = 1L) 71 | } 72 | ) 73 | ) 74 | 75 | 76 | 77 | PolicyEpsilonGreedy = R6::R6Class("PolicyEpsilonGreedy", 78 | inherit = Policy, 79 | public = list( 80 | initialize = function(host) { 81 | super$initialize(host) 82 | }, 83 | 84 | toss = function() { 85 | flag = runif(1L) < self$epsilon 86 | if (flag) { 87 | self$sampleRandomAct() 88 | self$action = self$random_action 89 | self$random_cnt = self$random_cnt + 1L 90 | self$host$glogger$log.nn$info("epsilon random action: %d", self$action) 91 | } 92 | }, 93 | 94 | act = function(state) { 95 | self$action = which.max(self$host$vec.arm.q) 96 | self$toss() 97 | return(self$action) 98 | }, 99 | 100 | afterStep = function() { 101 | self$fun_aneal() 102 | }, 103 | 104 | afterEpisode = function() { 105 | self$fun_aneal() # FIXME: not necessary here since we always decrease by step? 106 | super$afterEpisode() 107 | } 108 | ) 109 | ) 110 | 111 | PolicyEpsGreedTie = R6::R6Class("PolicyEpsGreedTie", 112 | inherit = PolicyEpsilonGreedy, 113 | public = list( 114 | sampleRandomAct = function() { 115 | self$random_action = sample(which(!is.na(self$host$vec.arm.q)), size = 1) 116 | }, 117 | 118 | act = function(state) { 119 | best_val = max(self$host$vec.arm.q, na.rm = T) 120 | best_arm = which(self$host$vec.arm.q == best_val) 121 | self$action = sample(best_arm, size = 1) 122 | self$toss() 123 | return(self$action) 124 | } 125 | ) 126 | ) 127 | 128 | 129 | 130 | 131 | PolicyProbEpsilon = R6::R6Class("PolicyProbEpsilon", 132 | inherit = PolicyEpsilonGreedy, 133 | public = list( 134 | initialize = function(host) { 135 | super$initialize(host) 136 | }, 137 | 138 | # all suboptimal arm probability sum up to epsilon with probability epsilon/act_cnt 139 | act = function(state) { 140 | prob = rep(self$epsilon, self$host$act_cnt) / (self$host$act_cnt) 141 | optarm = which.max(self$host$vec.arm.q) 142 | prob[optarm] = prob[optarm] + 1.0 - self$epsilon 143 | action = sample.int(self$host$act_cnt, prob = prob)[1L] 144 | if (optarm != action) self$random_cnt = self$random_cnt + 1L 145 | return(action) 146 | }, 147 | 148 | afterEpisode = function() { 149 | super$afterEpisode() 150 | } 151 | ) 152 | ) 153 | 154 | PolicySoftMax = R6::R6Class("PolicySoftMax", 155 | inherit = Policy, 156 | public = list( 157 | softmax_magnify = NULL, 158 | softmax_base = NULL, 159 | initialize = function(host) { 160 | super$initialize(host) 161 | self$softmax_base = self$host$conf$get("policy.softmax.base") 162 | self$softmax_magnify = self$host$conf$get("policy.softmax.magnify") 163 | }, 164 | 165 | # softmax will magnify the difference 166 | softmax = function(state) { 167 | z = self$host$vec.arm.q - max(self$host$vec.arm.q) # numerical stability 168 | prob = exp(self$softmax_magnify * z) 169 | prob = prob / sum(prob) 170 | action = sample.int(self$host$act_cnt, prob = prob)[1L] 171 | #action = rmultinom(n = 1L, size = self$host$act_cnt, prob = prob) # FIXME: any difference between multinomial and sample.int? 172 | #action = which.max(action) 173 | if (action != which.max(self$host$vec.arm.q)) self$random_cnt = self$random_cnt + 1L 174 | return(action) 175 | }, 176 | 177 | act = function(state) { 178 | self$action = self$softmax(state) 179 | #self$toss() # epsilon chance 180 | return(self$action) 181 | }, 182 | 183 | afterEpisode = function() { 184 | self$host$interact$toConsole("softmax_base %f \n", self$softmax_base) 185 | self$softmax_base = self$softmax_magnify * self$softmax_base 186 | super$afterEpisode() 187 | } 188 | 189 | ) 190 | ) 191 | 192 | makePolicy = function(name, host) { 193 | fn = paste0("Policy", name) 194 | get(fn)$new(host = host) 195 | } 196 | -------------------------------------------------------------------------------- /R/replaymem_helpers.R: -------------------------------------------------------------------------------- 1 | ReplayMem$extractOldState = function(x) { 2 | return(x[[1L]]) 3 | } 4 | 5 | ReplayMem$extractAction = function(x) { 6 | return(x[[2L]]) 7 | } 8 | 9 | ReplayMem$extractReward = function(x) { 10 | return(x[[3L]]) 11 | } 12 | 13 | ReplayMem$extractNextState = function(x) { 14 | return(x[[4L]]) 15 | } 16 | ReplayMem$extractDone = function(x) { 17 | return(x[[5L]]) 18 | } 19 | ReplayMem$extractStep = function(x) { 20 | return(x[[6L]][["stepidx"]]) 21 | } 22 | -------------------------------------------------------------------------------- /R/replaymem_png.R: -------------------------------------------------------------------------------- 1 | ReplayMemPng = R6::R6Class( 2 | "ReplayMemPng", 3 | inherit = ReplayMemUniform, 4 | public = list( 5 | initialize = function(agent, conf) { 6 | super$initialize(agent, conf) 7 | }, 8 | 9 | mkInst = function(state.old, action, reward, state.new, done, info) { 10 | # transform/compress states into single string for DB entry 11 | if (length(self$agent$state_dim) == 1) { 12 | state.old %<>% paste(collapse = "_") 13 | state.new %<>% paste(collapse = "_") 14 | } else { 15 | state.old = (state.old / 255L) %>% (png::writePNG) %>% paste(collapse = "") 16 | state.new = (state.new / 255L) %>% (png::writePNG) %>% paste(collapse = "") 17 | } 18 | super$mkInst(state.old, action, reward, state.new, done, info) 19 | }, 20 | 21 | sample.fun = function(k) { 22 | k = min(k, self$size) 23 | self$replayed.idx = sample(self$size)[1L:k] 24 | # replay.samples = lapply(self$replayed.idx, function(x) self$samples[[x]]) 25 | replay.samples = self$samples[self$replayed.idx] 26 | #FIXME: IS THE Orientation of the array right! Critically Important 27 | list.replay = lapply(replay.samples, function(x) list( 28 | state.old = x$state.old %>% str_to_array_h %>% array(dim = self$agent$state_dim), 29 | action = x$action, 30 | reward = x$reward, 31 | state.new = x$state.new %>% str_to_array_h %>% array(dim = self$agent$state_dim), 32 | done = x$done, 33 | info = list( 34 | episode = x$episode, 35 | stepidx = x$stepidx, 36 | info = x$info 37 | ) 38 | )) 39 | list.replay # DEBUG: self$agent$env$showImage(list.replay[[64]][["state.new"]]) make sense 40 | #DEBUG from ctrl+c: only agent is available 41 | # indx = agent$mem$replayed.idx 42 | # replay.samples = agent$mem$samples[indx] 43 | # x = replay.samples[[2]] 44 | # image = x$state.old %>% str_to_array_h %>% array(dim = agent$state_dim) 45 | # image = x$state.new %>% str_to_array_h %>% array(dim = agent$state_dim) 46 | # agent$env$showImage(image[,,1]) 47 | # agent$env$showImage(image[,,2]) 48 | } 49 | ) 50 | ) 51 | 52 | 53 | 54 | change_storage = function(y) { 55 | storage.mode(y) = "integer" # change storage type to integer to save space 56 | y 57 | } 58 | 59 | str_to_array_h = function(string) { 60 | ( 61 | # magittr require () 62 | string %>% 63 | strsplit("") %>% # ABEF39 SPLIT into c("A", "B", "E", ...) 64 | (function(x) x[[1]]) %>% # return of split is a list 65 | (function(x) paste0(x[c(TRUE, FALSE)], x[c(FALSE, TRUE)])) %>% #combine to pairs, equivalent to zip: x[c(TRUE, FALSE)] takes the 1st,3st,5st and x[c(FALSE, TRUE)] take the 2st, 4st 66 | as.hexmode %>% # necessary for correct as.raw. For R to understand this is hexcode other than String. 67 | as.raw %>% # make it readable as PNG 68 | (png::readPNG) * 255 # png package assums image to have range 0-1 69 | ) %>% 70 | change_storage # float storage to int storage 71 | } 72 | -------------------------------------------------------------------------------- /R/surrogate_base.R: -------------------------------------------------------------------------------- 1 | Surrogate = R6::R6Class("Surrogate", 2 | public = list( 3 | act_cnt = NULL, 4 | state_dim = NULL, 5 | createModel.fun = NULL, 6 | model = NULL, 7 | initialize = function(actionCnt, state_dim, createModel.fun) { 8 | self$act_cnt = actionCnt 9 | self$state_dim = state_dim 10 | self$createModel.fun = createModel.fun 11 | }, 12 | 13 | train = function(X_train, Y_train, epochs) { 14 | stop("not implmented!") 15 | }, 16 | 17 | persist = function(path) { 18 | temp = self$clone() 19 | save(temp, file = path) 20 | }, 21 | 22 | pred = function(X) { 23 | stop("not implemented") 24 | } 25 | ) 26 | ) 27 | -------------------------------------------------------------------------------- /R/visualize.R: -------------------------------------------------------------------------------- 1 | visualize = function(tabular, env = c("cliff"), latex = FALSE) { 2 | 3 | left = if (latex) "$\\leftarrow$" else "<" 4 | right = if (latex) "$\\rightarrow$" else ">" 5 | up = if (latex) "$\\,\\,\\uparrow\\,\\,$" else "^" 6 | down = if (latex) "$\\,\\,\\downarrow\\,\\,$" else "v" 7 | 8 | parser_lake = function(x) { 9 | if (x == 0) left 10 | else if (x == 1) down 11 | else if (x == 2) right 12 | else if (x == 3) up 13 | } 14 | 15 | parser_cliff = function(x) { 16 | if (x == 0) up 17 | else if (x == 1) right 18 | else if (x == 2) down 19 | else if (x == 3) left 20 | } 21 | 22 | policy = data.frame(position = 1:nrow(tabular)) 23 | policy$action = sapply(policy$position, function(x) which.max(tabular[x, ]) - 1, USE.NAMES = FALSE) 24 | policy$action = sapply(policy$action, if (env == "lake") parser_lake else parser_cliff, USE.NAMES = FALSE) 25 | 26 | if (env == "lake" && latex) 27 | cat( "\\hline \n", 28 | paste(policy$action[1:4], collapse = " & "), "\\\\ \\hline \n", 29 | paste(policy$action[5:8], collapse = " & "), "\\\\ \\hline \n", 30 | paste(policy$action[9:12], collapse = " & "), "\\\\ \\hline \n", 31 | paste(policy$action[13:16], collapse = " & "), "\\\\ \\hline \n" 32 | ) 33 | else if (env == "cliff" && latex) 34 | cat( "\\hline \n", 35 | paste(policy$action[1:12], collapse = " & "), "\\\\ \\hline \n", 36 | paste(policy$action[13:24], collapse = " & "), "\\\\ \\hline \n", 37 | paste(policy$action[25:36], collapse = " & "), "\\\\ \\hline \n", 38 | paste(policy$action[37:48], collapse = " & "), "\\\\ \\hline \n" 39 | ) 40 | else if (env == "lake") 41 | cat( "\n", 42 | policy$action[1], policy$action[2], policy$action[3], policy$action[4], "\n", 43 | policy$action[5], policy$action[6], policy$action[7], policy$action[8], "\n", 44 | policy$action[9], policy$action[10], policy$action[11], policy$action[12], "\n", 45 | policy$action[13], policy$action[14], policy$action[15], policy$action[16], "\n" 46 | ) 47 | else 48 | cat( "\n", 49 | paste(policy$action[1:12], collapse = " "), "\n", 50 | paste(policy$action[13:24], collapse = " "), "\n", 51 | paste(policy$action[25:36], collapse = " "), "\n", 52 | paste(policy$action[37:48], collapse = " "), "\n" 53 | ) 54 | } 55 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | #' @import R6 2 | #' @import data.table 3 | #' @import checkmate 4 | #' @import data.table 5 | #' @import reticulate 6 | #' @import keras 7 | #' @import logging 8 | #' @import openssl 9 | #' @import ggplot2 10 | #' @import tensorflow 11 | #' @import abind 12 | #' @import foreach 13 | 14 | NULL # nocov 15 | 16 | .onAttach <- function(libname, pkgname) { 17 | try(expr = { 18 | packageStartupMessage("- type 'reticulate::py_discover_config()' to check default python") 19 | packageStartupMessage("- to use a different python path, execute the following immediately after package is loaded:") 20 | packageStartupMessage("reticulate::use_python('/path/to/your/python')") 21 | packageStartupMessage("\nor\n reticulate::use_conda_env('name-of-conda-env')") 22 | }, silent = TRUE) 23 | } 24 | 25 | #' @title List implemented Agents 26 | #' @description List all implemented Agents 27 | #' @export 28 | listAvailAgent = function() { 29 | all = getNamespaceExports("rlR") 30 | all = all[which(sapply(all, function(x) grepl("^Agent", x)))] 31 | kickout = c("Agent", "AgentArmed") 32 | all = setdiff(all, kickout) 33 | list_res = lapply(all, function(x) get(x)$info()) 34 | names(list_res) = all 35 | list_res 36 | } 37 | 38 | #' @title list environments from OPENAI gym 39 | #' @description List all Gym Environments without testing them 40 | #' @export 41 | listGymEnvs = function() { 42 | envs = reticulate::import("gym.envs") 43 | all_spec = envs$registry$env_specs 44 | res = sapply(all_spec, function(x) x$id) 45 | names(res) = NULL 46 | } 47 | 48 | 49 | #' @title Test if tensorflow works from R session 50 | #' 51 | #' @description Test if tensorflow works from R session 52 | #' 53 | #' @return TRUE if tensorflow works 54 | #' @export 55 | rlr_test_if_tensorflow_works = function() { 56 | res <- try({ 57 | tf = reticulate::import("tensorflow") 58 | sess = tf$Session() 59 | hello = tf$constant("Hello, TensorFlow!") 60 | sess$run(hello) 61 | }, silent = FALSE) 62 | if (class(res)[1L] == "try-error") return(FALSE) 63 | return(TRUE) 64 | } 65 | 66 | #' @title Test if gym is installed 67 | #' @description Test if gym is installed 68 | #' @return TRUE if success 69 | #' @export 70 | rlr_test_if_gym_works = function() { 71 | res <- try({ 72 | gym = reticulate::import("gym") 73 | gym.sp = reticulate::import("gym.spaces") 74 | gym$logger$set_level(40) # supress warning 75 | gym$logger$setLevel(40) 76 | genv = gym$make("CartPole-v0") 77 | genv$reset() 78 | }, silent = FALSE) 79 | if (class(res)[1L] == "try-error") return(FALSE) 80 | return(TRUE) 81 | } 82 | 83 | #' @title Check if python dependencies work 84 | #' @description Check if python dependencies work 85 | #' @return TRUE if all python dependencies work 86 | #' @export 87 | checkPyDep = function() { 88 | flag_tensorflow = rlr_test_if_tensorflow_works() 89 | flag_keras = rlr_test_if_keras_works() 90 | flag_gym = rlr_test_if_gym_works() 91 | cat(sprintf("\n tensorlfow: %s, keras: %s, gym:%s\n", flag_tensorflow, flag_keras, flag_gym)) 92 | return(flag_tensorflow && flag_keras && flag_gym) 93 | } 94 | 95 | #' @title Install dependencies into system virtual environment called r-tensorflow 96 | #' @param gpu If TRUE, will install gpu version of tensorflow. By default, FALSE 97 | #' @description Install Keras dependencies into system virtual environment called r-tensorflow 98 | #' @return NULL 99 | #' @export 100 | installDep2SysVirtualEnv = function(gpu = FALSE) { # nocov start 101 | cat(sprintf("\ninstalling dependencies using %s \n", Sys.which("virtualenv"))) 102 | # install_keras will install tensorflow along into the virtual environment called "r-tensorflow" 103 | if (gpu) { 104 | version = paste0("1.8.0", "-gpu") 105 | } else { 106 | version = "1.8.0" 107 | } 108 | keras::install_keras(method = "virtualenv", tensorflow = version, extra_packages = c("gym==0.10.5", "cmake==3.12.0", "atari-py==0.1.6")) 109 | #reticulate::py_install() 110 | # sudo pip instlal uwsgi 111 | # sudo apt-get install python3-pip 112 | } # nocov end 113 | 114 | #' @title Install dependencies into a conda virtual environment called r-tensorflow 115 | #' @param gpu If TRUE, will install gpu version of tensorflow. By default, FALSE 116 | #' @param conda_path The conda path in your system, default "auto" will search in system path 117 | #' @description Install Keras dependencies into a conda virtual environment called r-tensorflow 118 | #' @return NULL 119 | #' @export 120 | installDepConda = function(conda_path = "auto", gpu = FALSE) { # nocov start 121 | str4gpu = ifelse(gpu, "-gpu", "") 122 | if (conda_path == "auto") cat(sprintf("\ninstalling dependencies using %s \n", Sys.which("conda"))) 123 | tf_version = paste0("1.9.0", str4gpu) 124 | keras_version = "default" 125 | keras::install_keras(method = "conda", conda = conda_path, version = keras_version, tensorflow = tf_version, extra_packages = c("gym==0.10.5", "cmake==3.12.0", "atari-py==0.1.6")) 126 | } # nocov end 127 | 128 | 129 | #' @title Test if keras works 130 | #' @description Test if keras is installed 131 | #' @return TRUE if success 132 | #' @export 133 | rlr_test_if_keras_works = function() { 134 | requireNamespace("keras") 135 | res <- try({ 136 | model <- keras_model_sequential() 137 | model %>% 138 | layer_dense(units = 256, activation = 'relu', input_shape = c(784)) %>% 139 | layer_dropout(rate = 0.4) %>% 140 | layer_dense(units = 128, activation = 'relu') %>% 141 | layer_dropout(rate = 0.3) %>% 142 | layer_dense(units = 10, activation = 'softmax') 143 | }, silent = FALSE) 144 | if (class(res)[1L] == "try-error") return(FALSE) 145 | return(TRUE) 146 | } 147 | 148 | rlR.debug = FALSE # nocov 149 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.com/smilesun/rlR.svg?branch=master)](https://travis-ci.com/smilesun/rlR) 2 | [![Coverage Status](https://coveralls.io/repos/github/smilesun/rlR/badge.svg?branch=master)](https://coveralls.io/github/smilesun/rlR?branch=master) 3 | [![Build status](https://ci.appveyor.com/api/projects/status/d0oyb358bh3e8r7r?svg=true)](https://ci.appveyor.com/project/smilesun/rlr) 4 | 5 | [Documentation](https://smilesun.github.io/rlR/) 6 | 7 | # rlR: (Deep) Reinforcement learning in R 8 | 9 | ## Installation 10 | 11 | ### R package installation 12 | ```{r eval = FALSE} 13 | devtools::install_github("smilesun/rlR") 14 | ``` 15 | or 16 | 17 | ```{r eval = FALSE} 18 | devtools::install_github("smilesun/rlR", dependencies = TRUE) 19 | ``` 20 | 21 | ## Python dependency 22 | 23 | rlR use keras with tensorflow as its backend for neural network as functional approximator and OpenAI gym. 24 | 25 | see [Python Dependencies Installation and Configuration](https://smilesun.github.io/rlR/articles/python_dependencies.html) 26 | 27 | ## Example of Neural Network as Functional Approximator 28 | 29 | ### Choose an environment to learn 30 | ```{r} 31 | library(rlR) 32 | env = makeGymEnv("CartPole-v0") 33 | env 34 | ``` 35 | 36 | If you have R package "imager" installed, you could get a snapshot of the environment by 37 | ```{r, eval=FALSE} 38 | env$snapshot(preprocess = F) 39 | ``` 40 | 41 | 42 | ### Initialize agent with the environment 43 | ```{r learn, eval=FALSE} 44 | agent = initAgent("AgentDQN", env) 45 | agent$learn(200L) 46 | ``` 47 | 48 | ### Look at the performance 49 | ```{r mplot, eval=FALSE,fig.path="inst/figures/", warning=FALSE, message=FALSE, eval=FALSE} 50 | agent$plotPerf(F) 51 | ``` 52 | 53 | ## Specify a task to be sovled by creating your own Environment 54 | 55 | see [Custom Environment](https://smilesun.github.io/rlR/articles/define_custom_environments.html) 56 | 57 | ## More Examples 58 | - [Configuration](https://smilesun.github.io/rlR/articles/custom_configuration.html) 59 | - [Tabular Learning](https://smilesun.github.io/rlR/articles/table_learning.html) 60 | - [Repeated Experiment](https://smilesun.github.io/rlR/articles/repeated_experiment.html) 61 | - Discover in [Documentation](https://smilesun.github.io/rlR/) 62 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.com/smilesun/rlR.svg?branch=master)](https://travis-ci.com/smilesun/rlR) 2 | [![Coverage Status](https://coveralls.io/repos/github/smilesun/rlR/badge.svg?branch=master)](https://coveralls.io/github/smilesun/rlR?branch=master) 3 | [![Build status](https://ci.appveyor.com/api/projects/status/d0oyb358bh3e8r7r?svg=true)](https://ci.appveyor.com/project/smilesun/rlr) 4 | 5 | [Documentation](https://smilesun.github.io/rlR/) 6 | 7 | # rlR: (Deep) Reinforcement learning in R 8 | 9 | ## Installation 10 | 11 | ### R package installation 12 | 13 | ```r 14 | devtools::install_github("smilesun/rlR") 15 | ``` 16 | or 17 | 18 | 19 | ```r 20 | devtools::install_github("smilesun/rlR", dependencies = TRUE) 21 | ``` 22 | 23 | ## Python dependency 24 | 25 | rlR use keras with tensorflow as its backend for neural network as functional approximator and OpenAI gym. 26 | 27 | see [Python Dependencies Installation and Configuration](https://smilesun.github.io/rlR/articles/python_dependencies.html) 28 | 29 | ## Example of Neural Network as Functional Approximator 30 | 31 | ### Choose an environment to learn 32 | 33 | ```r 34 | library(rlR) 35 | env = makeGymEnv("CartPole-v0") 36 | env 37 | ``` 38 | 39 | ``` 40 | ## 41 | ## action cnt: 2 42 | ## state original dim: 4 43 | ## discrete action 44 | ``` 45 | 46 | If you have R package "imager" installed, you could get a snapshot of the environment by 47 | 48 | ```r 49 | env$snapshot(preprocess = F) 50 | ``` 51 | 52 | 53 | ### Initialize agent with the environment 54 | 55 | ```r 56 | agent = initAgent("AgentDQN", env) 57 | agent$learn(200L) 58 | ``` 59 | 60 | ### Look at the performance 61 | 62 | ```r 63 | agent$plotPerf(F) 64 | ``` 65 | 66 | ## Specify a task to be sovled by creating your own Environment 67 | 68 | see [Custom Environment](https://smilesun.github.io/rlR/articles/define_custom_environments.html) 69 | 70 | ## More Examples 71 | - [Configuration](https://smilesun.github.io/rlR/articles/custom_configuration.html) 72 | - [Tabular Learning](https://smilesun.github.io/rlR/articles/table_learning.html) 73 | - [Repeated Experiment](https://smilesun.github.io/rlR/articles/repeated_experiment.html) 74 | - Discover in [Documentation](https://smilesun.github.io/rlR/) 75 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | template: 2 | params: 3 | bootswatch: cosmo 4 | 5 | navbar: 6 | left: 7 | - text: Topics 8 | icon: fa-file-text-o 9 | menu: 10 | - text: Specify Custom Environment 11 | href: articles/define_custom_environments.html 12 | - text: Repeated Experiment 13 | href: articles/repeated_experiment.html 14 | - text: Customize Neural Network Functional Approximator 15 | href: articles/customized_brain_mountainCar.html 16 | - text: Play Atari Games 17 | href: articles/play_atari_games.html 18 | - text: Tabular Learning 19 | href: articles/table_learning.html 20 | - text: Reference 21 | icon: fa-book 22 | href: reference/index.html 23 | 24 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | init: 2 | ps: | 3 | $ErrorActionPreference = "Stop" 4 | Invoke-WebRequest http://raw.github.com/krlmlr/r-appveyor/master/scripts/appveyor-tool.ps1 -OutFile "..\appveyor-tool.ps1" 5 | Import-Module '..\appveyor-tool.ps1' 6 | 7 | install: 8 | ps: Bootstrap 9 | 10 | cache: 11 | - C:\RLibrary 12 | 13 | environment: 14 | global: 15 | USE_RTOOLS: true 16 | matrix: 17 | - R_VERSION: devel 18 | PKGTYPE: source 19 | 20 | - R_VERSION: release 21 | # - R_VERSION: oldrel 22 | # RTOOLS_VERSION: 32 23 | CRAN: http://cran.rstudio.com 24 | 25 | 26 | build_script: 27 | - travis-tool.sh install_deps 28 | 29 | test_script: 30 | - travis-tool.sh run_tests 31 | 32 | on_failure: 33 | - travis-tool.sh dump_logs 34 | 35 | artifacts: 36 | - path: '*.Rcheck\**\*.log' 37 | name: Logs 38 | 39 | - path: '*.Rcheck\**\*.out' 40 | name: Logs 41 | 42 | - path: '*.Rcheck\**\*.fail' 43 | name: Logs 44 | 45 | - path: '*.Rcheck\**\*.Rout' 46 | name: Logs 47 | 48 | - path: '\*_*.tar.gz' 49 | name: Bits 50 | 51 | - path: '\*_*.zip' 52 | name: Bits 53 | -------------------------------------------------------------------------------- /attr/arsenal_attr.R: -------------------------------------------------------------------------------- 1 | makeCompactableNetTF = function(state_dim, act_cnt) { 2 | hun = 10L 3 | requireNamespace(tensorflow) 4 | input = tf$placeholder(tf$float32, shape(NULL, state_dim)) 5 | W = tf$Variable(tf$zeros(shape(state_dim, hun))) 6 | b = tf$Variable(tf$zeros(shape(hun))) 7 | hidden = tf$nn$relu(tf$matmul(input, W) + b) 8 | w_critic = tf$Variable(tf$zeros(shape(hun, 1L))) 9 | b_critic = tf$Variable(tf$zeros(shape(1L))) 10 | w_actor = tf$Variable(tf$zeros(shape(hun, act_cnt))) 11 | b_actor = tf$Variable(tf$zeros(shape(act_cnt))) 12 | critic = tf$matmul(hidden, w_critic) + b_critic 13 | actor = tf$matmul(hidden, w_actor) + b_actor 14 | w_critic = tf$Variable(tf$zeros(shape(hun, 1L))) 15 | b_critic = tf$Variable(tf$zeros(shape(1L))) 16 | #loss_critic <- tf$reduce_mean(0.5 * (critic - critic_target) ^ 2) 17 | } 18 | 19 | 20 | -------------------------------------------------------------------------------- /attr/customized_brain_mountainCar.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Cutomized Neural Network for Mountain Car Problem" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: 7 | collapsed: true 8 | smooth_scroll: false 9 | dev: svg 10 | vignette: > 11 | %\VignetteIndexEntry{Customized Neural Network for Mountain Car Problem} 12 | %\VignetteEngine{knitr::rmarkdown} 13 | %\VignetteEncoding{UTF-8} 14 | --- 15 | 16 | ```{r setup, include = FALSE, cache = FALSE} 17 | library(rlR) 18 | set.seed(123) 19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5) 20 | knitr::knit_hooks$set(document = function(x){ 21 | gsub("```\n*```r*\n*", "", x) 22 | }) 23 | library(reticulate) 24 | #os = import("os") 25 | #os$environ[["TF_CPP_MIN_LOG_LEVEL"]]="3" 26 | ``` 27 | 28 | # Customized Brain for Mountain Car Problem 29 | 30 | ## Action cheat to Environment 31 | For the Mountain Car Senario, there are three valid actions: move left, do nothing and move right. Since do nothing does not help us in this environment, we could ignore this action. 32 | In rlR this is done by the following code. 33 | 34 | ```{r} 35 | library(rlR) 36 | env = makeGymEnv("MountainCar-v0", act_cheat = c(0, 2)) 37 | ``` 38 | act_cheat is a vector where the first element means the first action maps to the 0th action in the gym environment and the second element means the second action maps to the 2th action of the gym environment. But this definition, the 1th gym action is eliminated. Note that in gym the index is python convention where 0th means the 1th in R. 39 | 40 | ## Define custom neural network 41 | ```{r} 42 | net_fun = function(state_dim, act_cnt) { 43 | model = keras::keras_model_sequential() 44 | model %>% 45 | layer_dense(units = 8, activation = "relu", input_shape = c(state_dim)) %>% 46 | layer_dropout(rate = 0.25) %>% 47 | layer_dense(units = act_cnt, activation = "linear") 48 | model$compile(loss = "mse", optimizer = optimizer_rmsprop(lr = 0.001, clipnorm = 1.0)) 49 | model 50 | } 51 | ``` 52 | 53 | ## Learning 54 | ```{r} 55 | conf = getDefaultConf("AgentDQN") 56 | conf$set(console = TRUE, render = TRUE, policy.maxEpsilon = 1, policy.minEpsilon = 0, policy.decay = 1.0 / 1.01, replay.batchsize = 64, replay.epochs = 4, agent.lr.decay = 1, agent.gamma = 0.95) 57 | agent = initAgent("AgentDQN", env, conf, custom_brain = T) 58 | library(magrittr) 59 | library(keras) 60 | agent$customizeBrain(list(value_fun = net_fun)) 61 | agent$learn(1) 62 | ``` 63 | -------------------------------------------------------------------------------- /attr/play_atari_games.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Play Atari Games" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: 7 | collapsed: true 8 | smooth_scroll: false 9 | dev: svg 10 | vignette: > 11 | %\VignetteIndexEntry{Play Atari Games} 12 | %\VignetteEngine{knitr::rmarkdown} 13 | %\VignetteEncoding{UTF-8} 14 | --- 15 | 16 | ```{r setup, include = FALSE, cache = FALSE} 17 | library(rlR) 18 | set.seed(123) 19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5) 20 | knitr::knit_hooks$set(document = function(x){ 21 | gsub("```\n*```r*\n*", "", x) 22 | }) 23 | library(reticulate) 24 | os = import("os") 25 | os$environ[["TF_CPP_MIN_LOG_LEVEL"]]="3" 26 | ``` 27 | 28 | # rlR: play Atari games 29 | 30 | ## Convolutional Neural Network Structure 31 | 32 | ## Atari Environment 33 | For Atari Games, it makes more since to stack several recent frames since the agent need to know what is happening and with only one frame it is hard to judge the current situation. So we have the `observ_stack_len` parameter. 34 | 35 | ```{r} 36 | library(rlR) 37 | env = makeGymEnv("Seaquest-v0", observ_stack_len = 4L, state_preprocess = list(fun = rlR:::subsample)) 38 | ``` 39 | Since the input state space is RGB image, we would like to down sample the state space by the following function 40 | ```{r} 41 | rlR:::subsample 42 | ``` 43 | 44 | ```{r} 45 | env$overview() 46 | ``` 47 | 48 | ```{r eval=FALSE} 49 | env$snapshot(preprocess = T) 50 | env$snapshot(steps = 500, preprocess = F) 51 | ``` 52 | 53 | ```{r} 54 | conf = getDefaultConf("AgentFDQN") 55 | ``` 56 | 57 | The rlR package has been optimized to handle replay memory in a very efficient way, to ensure performance, 58 | you could also use the following parameters which has a bigger replay memory. 59 | 60 | ```{r} 61 | conf$set(replay.batchsize = 32, 62 | replay.freq = 1L, 63 | console = TRUE, 64 | agent.lr.decay = 1, 65 | agent.lr = 0.00025, 66 | agent.update.target.freq = 1e4, 67 | replay.memname = "Png", 68 | render = F, 69 | policy.minEpsilon = 0.1, 70 | agent.start.learn = 5e4L, 71 | policy.aneal.steps = 1e6, 72 | replay.mem.size = 1e6, 73 | log = FALSE, 74 | agent.clip.td = TRUE, 75 | policy.decay.type = "decay_linear") 76 | ``` 77 | 78 | 79 | ```{r} 80 | makeCnnCritic = function(state_dim, act_cnt) { 81 | require("keras") 82 | text = paste("model <- keras_model_sequential();", 83 | 'model %>%', 84 | ' layer_conv_2d(filter = 16, kernel_size = c(8,8), strides = c(4, 4), 85 | padding = "same", input_shape = state_dim) %>%', 86 | 'layer_activation("relu") %>%', 87 | 'layer_conv_2d(filter = 32, kernel_size = c(4,4), strides = c(2, 2)) %>%', 88 | 'layer_activation("relu") %>%', 89 | 'layer_flatten() %>%', 90 | 'layer_dense(256) %>%', 91 | 'layer_activation("relu") %>%', 92 | 'layer_dense(act_cnt) %>%', 93 | 'layer_activation("linear");', 94 | 'opt <- optimizer_rmsprop(lr = 0.00025);', 95 | 'model %>% compile(loss = "mse", optimizer = opt, metrics = "accuracy")') 96 | model = eval(parse(text = text)) 97 | return(model) 98 | } 99 | ``` 100 | 101 | ```{r} 102 | agent = initAgent("AgentFDQN", env, conf, custom_brain = TRUE) 103 | agent$customizeBrain(list(value_fun = makeCnnCritic)) 104 | ``` 105 | 106 | ```{r} 107 | agent$learn(1L) 108 | ``` 109 | -------------------------------------------------------------------------------- /attr/repeated_experiment.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Repeated Experiment" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: 7 | collapsed: true 8 | smooth_scroll: false 9 | dev: svg 10 | vignette: > 11 | %\VignetteIndexEntry{Repeated Experiment} 12 | %\VignetteEngine{knitr::rmarkdown} 13 | %\VignetteEncoding{UTF-8} 14 | --- 15 | 16 | ```{r setup, include = FALSE, cache = FALSE} 17 | library(rlR) 18 | set.seed(123) 19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5) 20 | knitr::knit_hooks$set(document = function(x){ 21 | gsub("```\n*```r*\n*", "", x) 22 | }) 23 | library(reticulate) 24 | os = import("os") 25 | os$environ[["TF_CPP_MIN_LOG_LEVEL"]]="3" 26 | ``` 27 | 28 | # rlR: repeated experiment 29 | 30 | It make sense to repeatedly evaluate how an algorithm works for a particular scenario or environment. rlR provides the function `rlR::repExperiment` to serve this need. One could also use different cores to execute seperate experiment. 31 | 32 | 33 | ```{r} 34 | library(doMC) 35 | registerDoMC(5) 36 | # or 37 | library(doParallel) 38 | cl <- makeCluster(5) 39 | registerDoParallel(cl) 40 | res = repExperiment(sname = "CartPole-v0", aname = "AgentDQN", conf = getDefaultConf("AgentDQN"), nrep = 5, nepi = 5) 41 | ``` 42 | -------------------------------------------------------------------------------- /benchmark/bt_algorithms.R: -------------------------------------------------------------------------------- 1 | nn4mountainCar = function(name, env) { 2 | model = keras_model_sequential() 3 | model %>% layer_dense(units = 10, activation = 'relu', input_shape = c(2)) %>% 4 | layer_dropout(rate = 0.25) %>% 5 | layer_dense(units = 3, activation = 'linear');model$compile(loss = 'mse', optimizer = optimizer_rmsprop(lr = 9e-4)) 6 | model 7 | } 8 | 9 | # instance is the return for problem 10 | rl_algo_dqn = function(data, job, instance) { 11 | env = makeGymEnv(name = instance) 12 | agent = initAgent("AgentDQN", env = env) 13 | if (instance == "MountainCar-v0") { 14 | model = nn4mountainCar() 15 | agent$customizeBrain(model) 16 | agent$updatePara(console = TRUE, render = TRUE, log = TRUE, policy.maxEpsilon = 0.15, policy.minEpsilon = 0.05, policy.decay = exp(-0.001), replay.batchsize = 10, replay.epochs = 4, agent.lr_decay = exp(-0.001), agent.gamma = 0.95) 17 | } 18 | perf = agent$learn(data$iteration) 19 | return(perf = perf) # key for table join 20 | } 21 | 22 | rl_algo_ddqn = function(data, job, instance) { 23 | env = makeGymEnv(name = instance) 24 | agent = initAgent("AgentDDQN", env = env) 25 | perf = agent$learn(data$iteration) 26 | return(perf = perf) # key for table join 27 | } 28 | 29 | rl_algo_fdqn = function(data, job, instance) { 30 | env = makeGymEnv(name = instance) 31 | agent = initAgent("AgentFDQN", env = env) 32 | perf = agent$learn(data$iteration) 33 | return(perf = perf) # key for table join 34 | } 35 | 36 | rl_algo_pg = function(data, job, instance) { 37 | env = makeGymEnv(name = instance) 38 | agent = initAgent("AgentPG", env = env) 39 | perf = agent$learn(data$iteration) 40 | return(perf = perf) # key for table join 41 | } 42 | 43 | rl_algo_pgb = function(data, job, instance) { 44 | env = makeGymEnv(name = instance) 45 | agent = initAgent("AgentPGBaseline", env = env) 46 | perf = agent$learn(data$iteration) 47 | return(perf = perf) # key for table join 48 | } 49 | 50 | rl_algo_pgac = function(data, job, instance) { 51 | env = makeGymEnv(name = instance) 52 | agent = initAgent("AgentActorCritic", env = env) 53 | perf = agent$learn(data$iteration) 54 | return(perf = perf) # key for table join 55 | } 56 | -------------------------------------------------------------------------------- /benchmark/bt_conf.R: -------------------------------------------------------------------------------- 1 | # Configuration for benchmarking with batchtools: only one global conf variable 2 | gbtconf = list() 3 | 4 | ## Dependencies 5 | gbtconf$preSource = c("bt_algorithms.R", "bt_conf.R", "bt_problem.R") 6 | gbtconf$prePackage = c("batchtools", "checkmate", "data.table", "R6", "reticulate", "keras", "logging", "BBmisc", "openssl", "ggplot2", "reshape2", "rlR") 7 | #gbtconf$prePackage = c("aslib") 8 | ## EVALUATION 9 | gbtconf$SEED_REGISTRY = 1273L # global seed for reg 10 | gbtconf$SEED_ADDPROBLEM = 1L # seed for each problem 11 | gbtconf$REPLS = 1L 12 | 13 | gbtconf$agent.name = c("AgentDQN", "AgentFDQN", "AgentDDQN", "AgentPG", "AgentPGBaseline", "AgentActorCritic") 14 | gbtconf$replay = c("ReplayMemUniform", "ReplayMemLatest", "ReplayMemPrioritizedRank") 15 | gbtconf$policy = c("PolicyEpsilonGreedy", "PolicyProbEpsilon") 16 | 17 | ## Experiment 18 | gbtconf$REG_FILE_DIR = "bt_reg_new" 19 | gbtconf$ALGO_RUN = c("rl_algo_dqn", "rl_algo_ddqn", "rl_algo_fdqn", "rl_algo_pg", "rl_algo_pgb", "rl_algo_pgac") 20 | gbtconf$PROB_RUN = c("rl_prob") 21 | gbtconf$PROB_LIST = list() 22 | #gbtconf$PROB_LIST[["rl_prob"]] = list(fun = "rl_prob", prob.data = c("MountainCar-v0", "CartPole-v0", "Amidar-ram-v0", "WizardOfWor-ram-v0", "Asteroids-ram-v0", "KungFuMaster-ram-v0", "JourneyEscape-ram-v0", "Acrobot-v1") 23 | #gbtconf$PROB_LIST[["rl_prob"]] = list(fun = "rl_prob", prob.data = c("Pong-ram-v0", "CartPole-v0", "Acrobot-v1") 24 | gbtconf$PROB_LIST[["rl_prob"]] = list(fun = "rl_prob", prob.data = c("Pong-ram-v0") 25 | ) 26 | gbtconf$iteration = 1000L 27 | -------------------------------------------------------------------------------- /benchmark/bt_experiment.R: -------------------------------------------------------------------------------- 1 | # addProblem, addAlgorithm, addExperiments(algo.design = ades, repls = REPLS) 2 | source("bt_conf.R") 3 | pp = readline("Are you really sure to delete the registry and restart? Y OR N") 4 | if (pp == "Y") unlink(gbtconf$REG_FILE_DIR, recursive = TRUE, force = TRUE) 5 | reg = batchtools::makeExperimentRegistry(file.dir = gbtconf$REG_FILE_DIR, 6 | source = c(gbtconf$preSource), 7 | packages = gbtconf$prePackage, 8 | seed = gbtconf$SEED_REGISTRY) 9 | 10 | 11 | 12 | lapply(gbtconf$prePackage, require, character.only = TRUE) 13 | lapply(gbtconf$preSource, source) 14 | 15 | # Cartesian product 16 | #des = expand.grid(lrn.cl = c("1", "2"), ft.extract.method = c("A", "B"), stringsAsFactors = FALSE) 17 | 18 | pdes = list() 19 | for (prob in gbtconf$PROB_RUN) { 20 | addProblem(name = prob, data = list(iteration = gbtconf$iteration), fun = get(gbtconf$PROB_LIST[[prob]]$fun), seed = gbtconf$SEED_ADDPROBLEM) 21 | pdes[[prob]] = data.frame(s.name = gbtconf$PROB_LIST[[prob]]$prob.data, stringsAsFactors = FALSE) 22 | } 23 | 24 | gbtconf$ALGO_LIST = list() 25 | 26 | 27 | #gbtconf$ALGO_LIST$rl_algo_dqn = list(fun = rl_algo, design = data.frame(agent.name = gbtconf$agent.name, stringsAsFactors = FALSE)) 28 | gbtconf$ALGO_LIST$rl_algo_dqn = list(fun = rl_algo_dqn) 29 | gbtconf$ALGO_LIST$rl_algo_fdqn = list(fun = rl_algo_fdqn) 30 | gbtconf$ALGO_LIST$rl_algo_ddqn = list(fun = rl_algo_ddqn) 31 | gbtconf$ALGO_LIST$rl_algo_pg = list(fun = rl_algo_pg) 32 | gbtconf$ALGO_LIST$rl_algo_pgb = list(fun = rl_algo_pgb) 33 | gbtconf$ALGO_LIST$rl_algo_pgac = list(fun = rl_algo_pgac) 34 | 35 | 36 | ades = list() 37 | 38 | for (algo in gbtconf$ALGO_RUN) { 39 | addAlgorithm(name = algo, fun = gbtconf$ALGO_LIST[[algo]]$fun) 40 | # ades[[algo]] = gbtconf$ALGO_LIST[[algo]]$design 41 | } 42 | addExperiments(prob.design = pdes, algo.design = NULL, repls = gbtconf$REPLS) 43 | unwrap(getJobPars()) 44 | -------------------------------------------------------------------------------- /benchmark/bt_problem.R: -------------------------------------------------------------------------------- 1 | # create configuration object 2 | rl_prob = function(data, job, s.name) { 3 | return(s.name) 4 | } 5 | -------------------------------------------------------------------------------- /benchmark/plotHelper.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | 3 | 4 | 5 | # Multiple plot function 6 | # 7 | # ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects) 8 | # - cols: Number of columns in layout 9 | # - layout: A matrix specifying the layout. If present, 'cols' is ignored. 10 | # 11 | # If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE), 12 | # then plot 1 will go in the upper left, 2 will go in the upper right, and 13 | # 3 will go all the way across the bottom. 14 | # 15 | multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) { 16 | library(grid) 17 | 18 | # Make a list from the ... arguments and plotlist 19 | plots <- c(list(...), plotlist) 20 | 21 | numPlots = length(plots) 22 | 23 | # If layout is NULL, then use 'cols' to determine layout 24 | if (is.null(layout)) { 25 | # Make the panel 26 | # ncol: Number of columns of plots 27 | # nrow: Number of rows needed, calculated from # of cols 28 | layout <- matrix(seq(1, cols * ceiling(numPlots/cols)), 29 | ncol = cols, nrow = ceiling(numPlots/cols)) 30 | } 31 | 32 | if (numPlots==1) { 33 | print(plots[[1]]) 34 | 35 | } else { 36 | # Set up the page 37 | grid.newpage() 38 | pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout)))) 39 | 40 | # Make each plot, in the correct location 41 | for (i in 1:numPlots) { 42 | # Get the i,j matrix positions of the regions that contain this subplot 43 | matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE)) 44 | 45 | print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row, 46 | layout.pos.col = matchidx$col)) 47 | } 48 | } 49 | } 50 | 51 | 52 | 53 | 54 | # This example uses the ChickWeight dataset, which comes with ggplot2 55 | # First plot 56 | # p1 <- ggplot(ChickWeight, aes(x=Time, y=weight, colour=Diet, group=Chick)) + 57 | # geom_line() + 58 | # ggtitle("Growth curve for individual chicks") 59 | # 60 | # Second plot 61 | # p2 <- ggplot(ChickWeight, aes(x=Time, y=weight, colour=Diet)) + 62 | # geom_point(alpha=.3) + 63 | # geom_smooth(alpha=.2, size=1) + 64 | # ggtitle("Fitted growth curve per diet") 65 | # 66 | # Third plot 67 | # p3 <- ggplot(subset(ChickWeight, Time==21), aes(x=weight, colour=Diet)) + 68 | # geom_density() + 69 | # ggtitle("Final weight, by diet") 70 | # 71 | # Fourth plot 72 | # p4 <- ggplot(subset(ChickWeight, Time==21), aes(x=weight, fill=Diet)) + 73 | # geom_histogram(colour="black", binwidth=50) + 74 | # facet_grid(Diet ~ .) + 75 | # ggtitle("Final weight, by diet") + 76 | # theme(legend.position="none") # No legend (redundant in this graph) 77 | # 78 | # multiplot(p1, p2, p3, p4, cols = 2) 79 | #> `geom_smooth()` using method = 'loess' 80 | -------------------------------------------------------------------------------- /benchmark/rl_h.R: -------------------------------------------------------------------------------- 1 | # this file is temporary before a package is made 2 | library(checkmate) 3 | library(data.table) 4 | library(R6) 5 | library(reticulate) 6 | library(keras) 7 | library(logging) 8 | library(BBmisc) 9 | library(openssl) 10 | library(ggplot2) 11 | library(reshape2) 12 | library(formattable) 13 | list.libs.imports = c("checkmate", "data.table", "R6", "reticulate", "keras", "logging", "BBmisc", "openssl", "ggplot2", "reshape2", "formattable") 14 | list.libs.suggest = c("checkmate", "data.table", "BBmisc", "openssl", "ggplot2", "reshape2", "formattable") 15 | lapply(list.libs.imports, function(x) devtools::use_package(x)) 16 | lapply(list.libs.imports, function(x) devtools::use_package(x, "Suggest")) 17 | set.seed(1L) 18 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | codecov: 3 | token: 06465217-2436-4008-85f9-9a56a3c6c785 4 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## Test environments 2 | * local OS X install, R 3.4.4 3 | * ubuntu 12.04 (on travis-ci), R 3.4.4 4 | * win-builder (devel and release) 5 | 6 | ## R CMD check results 7 | 8 | 0 errors | 0 warnings | 1 note 9 | 10 | * This is a new release. 11 | 12 | ## Reverse dependencies 13 | 14 | This is a new release, so there are no reverse dependencies. 15 | 16 | --- 17 | 18 | * I have run R CMD check on the NUMBER downstream dependencies. 19 | (Summary at ...). 20 | 21 | * FAILURE SUMMARY 22 | 23 | * All revdep maintainers were notified of the release on RELEASE DATE. 24 | -------------------------------------------------------------------------------- /cran_check.sh: -------------------------------------------------------------------------------- 1 | R CMD build . 2 | R CMD check --as-cran rlR_0.1.0.tar.gz 3 | -------------------------------------------------------------------------------- /docs/LICENSE-text.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | License • rlR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 44 | 45 | 46 | 47 | 48 | 49 |
50 |
51 | 115 | 116 | 117 |
118 | 119 |
120 |
121 | 124 | 125 |
YEAR: 2018
126 | COPYRIGHT HOLDER: Xudong Sun
127 | 
128 | 129 |
130 | 131 |
132 | 133 | 134 |
135 | 138 | 139 |
140 |

Site built with pkgdown.

141 |
142 | 143 |
144 |
145 | 146 | 147 | 148 | 149 | 150 | 151 | -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Articles • rlR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 44 | 45 | 46 | 47 | 48 | 49 |
50 |
51 | 115 | 116 | 117 |
118 | 119 |
120 |
121 | 124 | 125 |
126 |

All vignettes

127 |

128 | 129 | 136 |
137 |
138 |
139 | 140 |
141 | 144 | 145 |
146 |

Site built with pkgdown.

147 |
148 | 149 |
150 |
151 | 152 | 153 | 154 | 155 | 156 | 157 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Authors • rlR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 44 | 45 | 46 | 47 | 48 | 49 |
50 |
51 | 115 | 116 | 117 |
118 | 119 |
120 |
121 | 124 | 125 |
    126 |
  • 127 |

    Xudong Sun. Author, maintainer. 128 |

    129 |
  • 130 |
  • 131 |

    Sebastian Gruber. Contributor. 132 |

    133 |
  • 134 |
135 | 136 |
137 | 138 |
139 | 140 | 141 |
142 | 145 | 146 |
147 |

Site built with pkgdown.

148 |
149 | 150 |
151 |
152 | 153 | 154 | 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /docs/pkgdown.css: -------------------------------------------------------------------------------- 1 | /* Sticky footer */ 2 | 3 | /** 4 | * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ 5 | * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css 6 | * 7 | * .Site -> body > .container 8 | * .Site-content -> body > .container .row 9 | * .footer -> footer 10 | * 11 | * Key idea seems to be to ensure that .container and __all its parents__ 12 | * have height set to 100% 13 | * 14 | */ 15 | 16 | html, body { 17 | height: 100%; 18 | } 19 | 20 | body > .container { 21 | display: flex; 22 | height: 100%; 23 | flex-direction: column; 24 | 25 | padding-top: 60px; 26 | } 27 | 28 | body > .container .row { 29 | flex: 1 0 auto; 30 | } 31 | 32 | footer { 33 | margin-top: 45px; 34 | padding: 35px 0 36px; 35 | border-top: 1px solid #e5e5e5; 36 | color: #666; 37 | display: flex; 38 | flex-shrink: 0; 39 | } 40 | footer p { 41 | margin-bottom: 0; 42 | } 43 | footer div { 44 | flex: 1; 45 | } 46 | footer .pkgdown { 47 | text-align: right; 48 | } 49 | footer p { 50 | margin-bottom: 0; 51 | } 52 | 53 | img.icon { 54 | float: right; 55 | } 56 | 57 | img { 58 | max-width: 100%; 59 | } 60 | 61 | /* Typographic tweaking ---------------------------------*/ 62 | 63 | .contents h1.page-header { 64 | margin-top: calc(-60px + 1em); 65 | } 66 | 67 | /* Section anchors ---------------------------------*/ 68 | 69 | a.anchor { 70 | margin-left: -30px; 71 | display:inline-block; 72 | width: 30px; 73 | height: 30px; 74 | visibility: hidden; 75 | 76 | background-image: url(./link.svg); 77 | background-repeat: no-repeat; 78 | background-size: 20px 20px; 79 | background-position: center center; 80 | } 81 | 82 | .hasAnchor:hover a.anchor { 83 | visibility: visible; 84 | } 85 | 86 | @media (max-width: 767px) { 87 | .hasAnchor:hover a.anchor { 88 | visibility: hidden; 89 | } 90 | } 91 | 92 | 93 | /* Fixes for fixed navbar --------------------------*/ 94 | 95 | .contents h1, .contents h2, .contents h3, .contents h4 { 96 | padding-top: 60px; 97 | margin-top: -40px; 98 | } 99 | 100 | /* Static header placement on mobile devices */ 101 | @media (max-width: 767px) { 102 | .navbar-fixed-top { 103 | position: absolute; 104 | } 105 | .navbar { 106 | padding: 0; 107 | } 108 | } 109 | 110 | 111 | /* Sidebar --------------------------*/ 112 | 113 | #sidebar { 114 | margin-top: 30px; 115 | } 116 | #sidebar h2 { 117 | font-size: 1.5em; 118 | margin-top: 1em; 119 | } 120 | 121 | #sidebar h2:first-child { 122 | margin-top: 0; 123 | } 124 | 125 | #sidebar .list-unstyled li { 126 | margin-bottom: 0.5em; 127 | } 128 | 129 | .orcid { 130 | height: 16px; 131 | vertical-align: middle; 132 | } 133 | 134 | /* Reference index & topics ----------------------------------------------- */ 135 | 136 | .ref-index th {font-weight: normal;} 137 | 138 | .ref-index td {vertical-align: top;} 139 | .ref-index .alias {width: 40%;} 140 | .ref-index .title {width: 60%;} 141 | 142 | .ref-index .alias {width: 40%;} 143 | .ref-index .title {width: 60%;} 144 | 145 | .ref-arguments th {text-align: right; padding-right: 10px;} 146 | .ref-arguments th, .ref-arguments td {vertical-align: top;} 147 | .ref-arguments .name {width: 20%;} 148 | .ref-arguments .desc {width: 80%;} 149 | 150 | /* Nice scrolling for wide elements --------------------------------------- */ 151 | 152 | table { 153 | display: block; 154 | overflow: auto; 155 | } 156 | 157 | /* Syntax highlighting ---------------------------------------------------- */ 158 | 159 | pre { 160 | word-wrap: normal; 161 | word-break: normal; 162 | border: 1px solid #eee; 163 | } 164 | 165 | pre, code { 166 | background-color: #f8f8f8; 167 | color: #333; 168 | } 169 | 170 | pre code { 171 | overflow: auto; 172 | word-wrap: normal; 173 | white-space: pre; 174 | } 175 | 176 | pre .img { 177 | margin: 5px 0; 178 | } 179 | 180 | pre .img img { 181 | background-color: #fff; 182 | display: block; 183 | height: auto; 184 | } 185 | 186 | code a, pre a { 187 | color: #375f84; 188 | } 189 | 190 | a.sourceLine:hover { 191 | text-decoration: none; 192 | } 193 | 194 | .fl {color: #1514b5;} 195 | .fu {color: #000000;} /* function */ 196 | .ch,.st {color: #036a07;} /* string */ 197 | .kw {color: #264D66;} /* keyword */ 198 | .co {color: #888888;} /* comment */ 199 | 200 | .message { color: black; font-weight: bolder;} 201 | .error { color: orange; font-weight: bolder;} 202 | .warning { color: #6A0366; font-weight: bolder;} 203 | 204 | /* Clipboard --------------------------*/ 205 | 206 | .hasCopyButton { 207 | position: relative; 208 | } 209 | 210 | .btn-copy-ex { 211 | position: absolute; 212 | right: 0; 213 | top: 0; 214 | visibility: hidden; 215 | } 216 | 217 | .hasCopyButton:hover button.btn-copy-ex { 218 | visibility: visible; 219 | } 220 | 221 | /* mark.js ----------------------------*/ 222 | 223 | mark { 224 | background-color: rgba(255, 255, 51, 0.5); 225 | border-bottom: 2px solid rgba(255, 153, 51, 0.3); 226 | padding: 1px; 227 | } 228 | 229 | /* vertical spacing after htmlwidgets */ 230 | .html-widget { 231 | margin-bottom: 10px; 232 | } 233 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $("#sidebar") 6 | .stick_in_parent({offset_top: 40}) 7 | .on('sticky_kit:bottom', function(e) { 8 | $(this).parent().css('position', 'static'); 9 | }) 10 | .on('sticky_kit:unbottom', function(e) { 11 | $(this).parent().css('position', 'relative'); 12 | }); 13 | 14 | $('body').scrollspy({ 15 | target: '#sidebar', 16 | offset: 60 17 | }); 18 | 19 | $('[data-toggle="tooltip"]').tooltip(); 20 | 21 | var cur_path = paths(location.pathname); 22 | var links = $("#navbar ul li a"); 23 | var max_length = -1; 24 | var pos = -1; 25 | for (var i = 0; i < links.length; i++) { 26 | if (links[i].getAttribute("href") === "#") 27 | continue; 28 | var path = paths(links[i].pathname); 29 | 30 | var length = prefix_length(cur_path, path); 31 | if (length > max_length) { 32 | max_length = length; 33 | pos = i; 34 | } 35 | } 36 | 37 | // Add class to parent
  • , and enclosing
  • if in dropdown 38 | if (pos >= 0) { 39 | var menu_anchor = $(links[pos]); 40 | menu_anchor.parent().addClass("active"); 41 | menu_anchor.closest("li.dropdown").addClass("active"); 42 | } 43 | }); 44 | 45 | function paths(pathname) { 46 | var pieces = pathname.split("/"); 47 | pieces.shift(); // always starts with / 48 | 49 | var end = pieces[pieces.length - 1]; 50 | if (end === "index.html" || end === "") 51 | pieces.pop(); 52 | return(pieces); 53 | } 54 | 55 | function prefix_length(needle, haystack) { 56 | if (needle.length > haystack.length) 57 | return(0); 58 | 59 | // Special case for length-0 haystack, since for loop won't run 60 | if (haystack.length === 0) { 61 | return(needle.length === 0 ? 1 : 0); 62 | } 63 | 64 | for (var i = 0; i < haystack.length; i++) { 65 | if (needle[i] != haystack[i]) 66 | return(i); 67 | } 68 | 69 | return(haystack.length); 70 | } 71 | 72 | /* Clipboard --------------------------*/ 73 | 74 | function changeTooltipMessage(element, msg) { 75 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 76 | element.setAttribute('data-original-title', msg); 77 | $(element).tooltip('show'); 78 | element.setAttribute('data-original-title', tooltipOriginalTitle); 79 | } 80 | 81 | if(Clipboard.isSupported()) { 82 | $(document).ready(function() { 83 | var copyButton = ""; 84 | 85 | $(".examples, div.sourceCode").addClass("hasCopyButton"); 86 | 87 | // Insert copy buttons: 88 | $(copyButton).prependTo(".hasCopyButton"); 89 | 90 | // Initialize tooltips: 91 | $('.btn-copy-ex').tooltip({container: 'body'}); 92 | 93 | // Initialize clipboard: 94 | var clipboardBtnCopies = new Clipboard('[data-clipboard-copy]', { 95 | text: function(trigger) { 96 | return trigger.parentNode.textContent; 97 | } 98 | }); 99 | 100 | clipboardBtnCopies.on('success', function(e) { 101 | changeTooltipMessage(e.trigger, 'Copied!'); 102 | e.clearSelection(); 103 | }); 104 | 105 | clipboardBtnCopies.on('error', function() { 106 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 107 | }); 108 | }); 109 | } 110 | })(window.jQuery || window.$) 111 | -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 1.19.2.1 2 | pkgdown: 1.1.0.9000 3 | pkgdown_sha: ~ 4 | articles: 5 | custom_configuration: custom_configuration.html 6 | define_custom_environments: define_custom_environments.html 7 | python_dependencies: python_dependencies.html 8 | repeated_experiment: repeated_experiment.html 9 | table_learning: table_learning.html 10 | 11 | -------------------------------------------------------------------------------- /docs/reference/Agent.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Agent — Agent • rlR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 118 | 119 | 120 |
    121 | 122 |
    123 |
    124 | 129 | 130 |
    131 | 132 |

    An abstract R6Class to represent Agent

    133 | 134 |
    135 | 136 |
    Agent
    137 | 138 |

    Format

    139 | 140 |

    R6Class object

    141 | 142 |

    Value

    143 | 144 |

    [Agent].

    145 | 146 |

    Methods

    147 | 148 | 149 |
    150 |
    learn(iter)

    [function]
    151 | Run iter number of Episodes

    152 |
    plotPerf()

    [function]
    153 | plot performance

    154 |
    155 | 156 | 157 |
    158 | 170 |
    171 | 172 |
    173 | 176 | 177 |
    178 |

    Site built with pkgdown.

    179 |
    180 | 181 |
    182 |
    183 | 184 | 185 | 186 | 187 | 188 | 189 | -------------------------------------------------------------------------------- /docs/reference/checkPyDep.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Check if python dependencies work — checkPyDep • rlR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 118 | 119 | 120 |
    121 | 122 |
    123 |
    124 | 129 | 130 |
    131 | 132 |

    Check if python dependencies work

    133 | 134 |
    135 | 136 |
    checkPyDep()
    137 | 138 |

    Value

    139 | 140 |

    TRUE if all python dependencies work

    141 | 142 | 143 |
    144 | 152 |
    153 | 154 |
    155 | 158 | 159 |
    160 |

    Site built with pkgdown.

    161 |
    162 | 163 |
    164 |
    165 | 166 | 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /docs/reference/listAvailAgent.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | List implemented Agents — listAvailAgent • rlR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 118 | 119 | 120 |
    121 | 122 |
    123 |
    124 | 129 | 130 |
    131 | 132 |

    List all implemented Agents

    133 | 134 |
    135 | 136 |
    listAvailAgent()
    137 | 138 | 139 |
    140 | 146 |
    147 | 148 |
    149 | 152 | 153 |
    154 |

    Site built with pkgdown.

    155 |
    156 | 157 |
    158 |
    159 | 160 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /docs/reference/listAvailConf.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | listAvailConf — listAvailConf • rlR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 118 | 119 | 120 |
    121 | 122 |
    123 |
    124 | 129 | 130 |
    131 | 132 |

    List defaults hyper-parameters names

    133 | 134 |
    135 | 136 |
    listAvailConf()
    137 | 138 | 139 |
    140 | 146 |
    147 | 148 |
    149 | 152 | 153 |
    154 |

    Site built with pkgdown.

    155 |
    156 | 157 |
    158 |
    159 | 160 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /docs/reference/listGymEnvs.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | list environments from OPENAI gym — listGymEnvs • rlR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 118 | 119 | 120 |
    121 | 122 |
    123 |
    124 | 129 | 130 |
    131 | 132 |

    List all Gym Environments without testing them

    133 | 134 |
    135 | 136 |
    listGymEnvs()
    137 | 138 | 139 |
    140 | 146 |
    147 | 148 |
    149 | 152 | 153 |
    154 |

    Site built with pkgdown.

    155 |
    156 | 157 |
    158 |
    159 | 160 | 161 | 162 | 163 | 164 | 165 | -------------------------------------------------------------------------------- /docs/reference/rlr_test_if_gym_works.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Test if gym is installed — rlr_test_if_gym_works • rlR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 118 | 119 | 120 |
    121 | 122 |
    123 |
    124 | 129 | 130 |
    131 | 132 |

    Test if gym is installed

    133 | 134 |
    135 | 136 |
    rlr_test_if_gym_works()
    137 | 138 |

    Value

    139 | 140 |

    TRUE if success

    141 | 142 | 143 |
    144 | 152 |
    153 | 154 |
    155 | 158 | 159 |
    160 |

    Site built with pkgdown.

    161 |
    162 | 163 |
    164 |
    165 | 166 | 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /docs/reference/rlr_test_if_keras_works.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Test if keras works — rlr_test_if_keras_works • rlR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 118 | 119 | 120 |
    121 | 122 |
    123 |
    124 | 129 | 130 |
    131 | 132 |

    Test if keras is installed

    133 | 134 |
    135 | 136 |
    rlr_test_if_keras_works()
    137 | 138 |

    Value

    139 | 140 |

    TRUE if success

    141 | 142 | 143 |
    144 | 152 |
    153 | 154 |
    155 | 158 | 159 |
    160 |

    Site built with pkgdown.

    161 |
    162 | 163 |
    164 |
    165 | 166 | 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /docs/reference/rlr_test_if_tensorflow_works.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Test if tensorflow works from R session — rlr_test_if_tensorflow_works • rlR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 118 | 119 | 120 |
    121 | 122 |
    123 |
    124 | 129 | 130 |
    131 | 132 |

    Test if tensorflow works from R session

    133 | 134 |
    135 | 136 |
    rlr_test_if_tensorflow_works()
    137 | 138 |

    Value

    139 | 140 |

    TRUE if tensorflow works

    141 | 142 | 143 |
    144 | 152 |
    153 | 154 |
    155 | 158 | 159 |
    160 |

    Site built with pkgdown.

    161 |
    162 | 163 |
    164 |
    165 | 166 | 167 | 168 | 169 | 170 | 171 | -------------------------------------------------------------------------------- /docs/reference/showDefaultConf.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | show Default Configuration — showDefaultConf • rlR 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 118 | 119 | 120 |
    121 | 122 |
    123 |
    124 | 129 | 130 |
    131 | 132 |

    List defaults hyper-parameters in dataframe

    133 | 134 |
    135 | 136 |
    showDefaultConf()
    137 | 138 | 139 |

    Examples

    140 |
    df = rlR::showDefaultConf()
    141 |
    142 | 150 |
    151 | 152 | 162 |
    163 | 164 | 165 | 166 | 167 | 168 | 169 | -------------------------------------------------------------------------------- /inst/figures/ac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/inst/figures/ac.png -------------------------------------------------------------------------------- /inst/figures/ac300.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/inst/figures/ac300.png -------------------------------------------------------------------------------- /inst/figures/acrobat.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/inst/figures/acrobat.pdf -------------------------------------------------------------------------------- /inst/figures/dqn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/inst/figures/dqn.png -------------------------------------------------------------------------------- /inst/figures/mplot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/inst/figures/mplot-1.png -------------------------------------------------------------------------------- /inst/repAtari200.R: -------------------------------------------------------------------------------- 1 | library(rlR) 2 | conf = getDefaultConf("AgentFDQN") 3 | conf$set(replay.batchsize = 32, 4 | replay.freq = 1L, 5 | console = TRUE, 6 | agent.lr.decay = 1, 7 | agent.lr = 0.00025, 8 | agent.update.target.freq = 1e4, 9 | replay.memname = "Png", 10 | render = F, 11 | policy.minEpsilon = 0.1, 12 | agent.start.learn = 5e4L, 13 | policy.aneal.steps = 4e5, 14 | replay.mem.size = 4e5, 15 | log = FALSE, 16 | agent.clip.td = TRUE, 17 | policy.decay.type = "decay_linear") 18 | 19 | makeCnnCritic = function(state_dim, act_cnt) { 20 | require("keras") 21 | text = paste("model <- keras_model_sequential();", 22 | 'model %>%', 23 | ' layer_conv_2d(filter = 16, kernel_size = c(8,8), strides = c(4, 4), 24 | padding = "same", input_shape = state_dim) %>%', 25 | 'layer_activation("relu") %>%', 26 | 'layer_conv_2d(filter = 32, kernel_size = c(4,4), strides = c(2, 2)) %>%', 27 | 'layer_activation("relu") %>%', 28 | 'layer_flatten() %>%', 29 | 'layer_dense(256) %>%', 30 | 'layer_activation("relu") %>%', 31 | 'layer_dense(act_cnt) %>%', 32 | 'layer_activation("linear");', 33 | 'opt <- optimizer_rmsprop(lr = 0.00025);', 34 | 'model %>% compile(loss = "mse", optimizer = opt, metrics = "accuracy")') 35 | model = eval(parse(text = text)) 36 | return(model) 37 | } 38 | 39 | library(doParallel) 40 | cl = makeCluster(5) 41 | registerDoParallel(cl) 42 | res = repExperiment(sname = "Seaquest-v0", aname = "AgentFDQN", conf = conf, nrep = 10, nepi = 200, value_fun = makeCnnCritic, observ_stack_len = 3L, state_preprocess = list(fun = rlR:::subsample)) 43 | -------------------------------------------------------------------------------- /paper/Makefile: -------------------------------------------------------------------------------- 1 | all: paper.pdf 2 | 3 | paper.pdf: paper.md paper.bib latex.template 4 | pandoc --filter pandoc-citeproc --bibliography paper.bib paper.md \ 5 | --template latex.template -o paper.pdf 6 | 7 | clean: 8 | rm paper.pdf 9 | 10 | .PHONY: clean 11 | -------------------------------------------------------------------------------- /paper/figures/ac.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/paper/figures/ac.png -------------------------------------------------------------------------------- /paper/figures/ac300.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/paper/figures/ac300.png -------------------------------------------------------------------------------- /paper/figures/acrobat.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/paper/figures/acrobat.pdf -------------------------------------------------------------------------------- /paper/figures/dqn.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/paper/figures/dqn.png -------------------------------------------------------------------------------- /paper/figures/mplot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/smilesun/rlR/f066471ec4d0ccab3962eb4a1bebccfc60196211/paper/figures/mplot-1.png -------------------------------------------------------------------------------- /paper/paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: 'rlR: A R package for deep reinforcement learning' 3 | tags: 4 | - R 5 | - reinforcement learning 6 | - deep learning 7 | authors: 8 | - name: Xudong Sun 9 | orcid: 0000-0001-9234-4932 10 | affiliation: 1 11 | - name: Sebastian Gruber 12 | orcid: 0000-0002-8544-3470 13 | affiliation: 1 14 | - name: Markus Dumke 15 | orcid: 0000-0000-0000-0000 16 | affiliation: 1 17 | - name: Bernd Bischl 18 | orcid: 0000-0000-0000-0000 19 | affiliation: 1 20 | affiliations: 21 | - name: Ludwig-Maximillians-University of Munich 22 | index: 1 23 | date: 15 October 2018 24 | bibliography: paper.bib 25 | output: pdf_document 26 | --- 27 | 28 | # Summary 29 | 30 | Deep reinforcement learning has gained increasing attention in recent years due to its success in solving 31 | many complex scenarios including Atari Games [@Mnih2015], Continuous Robotic Control [@Lillicrap2016a], 32 | The game of Go [@Silver2016a] and so on. Although during our package development, we noticed some light-weight R packages occurs in between for doing reinforcement learning, most of them either only have tabular learning algorithm [@Nicolas2018], or lacks the ability to handle complicated state input like image series state input(Atari games for example) or contain only a single deep reinforcement learning algorithm [@Dumke2018]. More over, as a software package, it is not only important to show examples, but should also handle user defined environments at full fledge as input and the architecture design of the package should be loose coupling as possible to incorporate new algorithms. 33 | 34 | The package rlR aims at solving the drawbacks above by serving as a generic deep reinforcement learning solver where we expect the user to create their customized environment or scenario as input. Several deep reinforcement learning algorithms are included and examples of how to use the library are well documented. We also wrapped around the OpenAI Gym Environments including the Atari Games so the user could play with it. Tensorflow is used as our deep learning backend serving as an universal function approximator. 35 | 36 | # Highlights 37 | 38 | The package rlR is written in an Aspect Oriented Programming fashion which allows customized 39 | operation during the interaction between the agent and the environment. The package is also designed in an Object Oriented fashion with various design patterns used in software engineering which makes it easily extensible to new algorithms. 40 | 41 | Most of the operations are configurable through a single configuration object where the user could easily 42 | query the meaning of each configuration parameter instead of giving different arguments to 43 | different functions. This could greatly facilitate the reproducibility. 44 | 45 | User could define an environment to be a R6 Class which greatly heaves the expressibility of the 46 | customized environment. For example, The user could define the initialization for the 47 | environment, what to do after each step and each episode, etc. 48 | 49 | # Example 50 | ``` 51 | env = makeGymEnv("CartPole-v1") 52 | env$overview() 53 | conf = getDefaultConf("AgentDQN") 54 | conf$show() 55 | conf$set(render = FALSE, console = FALSE) 56 | agent = initAgent("AgentDQN", env, conf) 57 | agent$learn(200L) 58 | agent$plotPerf() 59 | ``` 60 | ![CartPole Scenario Performance](figures/mplot-1.png) 61 | 62 | # Acknowledgements 63 | 64 | We acknowledge helpful suggestions from Janek Thomas and support of DFG. 65 | 66 | # References 67 | -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | ###### Requirements without Version Specifiers ###### 2 | #numpy 3 | # 4 | ###### Requirements with Version Specifiers ###### 5 | # See https://www.python.org/dev/peps/pep-0440/#version-specifiers 6 | tensorflow >= 1.8.0 # Version Matching. Must be version 0.6.1 7 | keras == 2.1.6 8 | gym == 0.10.5 9 | cmake 10 | gym[atari]==0.10.5 11 | # overage != 3.5 # Version Exclusion. Anything except version 3.5 12 | #Mopidy-Dirble ~= 1.1 # Compatible release. Same as >= 1.1, == 1.efer to other requirements files ###### 13 | ###### Refer to other requirements files ###### 14 | #-r other-requirements.txt 15 | -------------------------------------------------------------------------------- /rlR.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | StripTrailingWhitespace: Yes 16 | 17 | BuildType: Package 18 | PackageUseDevtools: Yes 19 | PackageInstallArgs: --no-multiarch --with-keep.source 20 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(rlR) 3 | 4 | test_check("rlR") 5 | -------------------------------------------------------------------------------- /tests/testthat/test_environment.R: -------------------------------------------------------------------------------- 1 | context("custom_environment") 2 | test_that("test custom environment EnvToy works", { 3 | env = EnvToy$new() 4 | env$overview() 5 | env$reset() 6 | env$step(1) 7 | env$afterAll() 8 | conf = getDefaultConf("AgentDQN") 9 | agent = initAgent("AgentDQN", env, conf) 10 | perf = agent$learn(3L) 11 | expect_class(perf, "Performance") 12 | }) 13 | 14 | 15 | test_that("Gym constructor Works", { 16 | env = makeGymEnv(name = "CartPole-v0") 17 | sr = Surrogate$new(3, c(2, 5, 7), createModel.fun = NULL) 18 | expect_error(sr$train()) 19 | expect_error(sr$pred()) 20 | }) 21 | -------------------------------------------------------------------------------- /tests/testthat/test_file_conf.R: -------------------------------------------------------------------------------- 1 | context("configuration") 2 | test_that("Conf object", { 3 | conf = getDefaultConf("AgentDQN") 4 | conf$get("agent.lr") 5 | conf$updatePara("agent.lr", 0.1) 6 | conf$set(agent.lr = 0.1) 7 | conf$show() 8 | expect_true(TRUE) 9 | }) 10 | 11 | context("naked conf") 12 | test_that("test conf", { 13 | conf = RLConf$new() 14 | expect_class(conf, "RLConf") 15 | RLLog$new(conf) 16 | }) 17 | -------------------------------------------------------------------------------- /tests/testthat/test_file_nnArsenal.R: -------------------------------------------------------------------------------- 1 | context("nnArsenal") 2 | test_that("check custom network", { 3 | fun = function(state_dim, act_cnt) { 4 | requireNamespace("keras") 5 | require("keras") 6 | model = keras_model_sequential() 7 | model %>% 8 | layer_dense(units = 256, activation = 'relu', input_shape = c(state_dim)) %>% 9 | layer_dropout(rate = 0.4) %>% 10 | layer_dense(units = 128, activation = 'relu') %>% 11 | layer_dropout(rate = 0.3) %>% 12 | layer_dense(units = act_cnt, activation = 'softmax') 13 | model 14 | } 15 | checkCustomNetwork(fun, 3, 3) 16 | expect_true(TRUE) 17 | }) 18 | 19 | test_that("default network works", { 20 | agent.nn.arch = list(nhidden = 64, act1 = "relu", act2 = "linear", loss = "mse", lr = 0.00025, kernel_regularizer = "regularizer_l2(l=0.0)", bias_regularizer = "regularizer_l2(l=0.0)") 21 | makeKerasModel(input_shape = 2, output_shape = 2, arch.list = agent.nn.arch) 22 | makeCnnActor(c(32, 32, 3), 10L) 23 | makeCnnCritic(c(32, 32, 3), 10L) 24 | #createActorNetwork(3, 2) 25 | #createCriticNetwork(3, 2) 26 | expect_true(TRUE) 27 | }) 28 | 29 | 30 | test_that("custom policy network works", { 31 | conf = getDefaultConf("AgentActorCritic") 32 | conf$set(console = TRUE) 33 | env = makeGymEnv("KungFuMaster-ram-v0", repeat_n_act = 4) 34 | agent = initAgent("AgentActorCritic", env, conf) 35 | mfun_val = function(state_dim, act_cnt) { 36 | requireNamespace("keras") 37 | model = keras::keras_model_sequential() 38 | model %>% 39 | layer_dense(units = 512, activation = "relu", 40 | input_shape = c(state_dim)) %>% 41 | layer_dropout(rate = 0.25) %>% 42 | layer_dense(units = 1, 43 | activation = "linear") 44 | model$compile(loss = "mse", 45 | optimizer = optimizer_rmsprop(lr = 0.001)) 46 | model 47 | } 48 | 49 | mfun_policy = function(state_dim, act_cnt) { 50 | requireNamespace("keras") 51 | model = keras::keras_model_sequential() 52 | model %>% 53 | layer_dense(units = 512, activation = "relu", 54 | input_shape = c(state_dim)) %>% 55 | layer_dropout(rate = 0.25) %>% 56 | layer_dense(units = act_cnt, 57 | activation = "softmax") 58 | model$compile(loss = "categorical_crossentropy", 59 | optimizer = optimizer_rmsprop(lr = 0.001)) 60 | model 61 | } 62 | agent$customizeBrain(list(value_fun = mfun_val, policy_fun = mfun_policy)) 63 | agent$learn(1L) 64 | expect_true(TRUE) 65 | }) 66 | -------------------------------------------------------------------------------- /tests/testthat/test_file_replay_mem.R: -------------------------------------------------------------------------------- 1 | # context("replay_mem") 2 | # test_that("test basic replay_mem works", { 3 | # conf = getDefaultConf("AgentFDQN") 4 | # env = rlR::Environment$new() 5 | # env$overview() 6 | # agent = initAgent("AgentFDQN", env) 7 | # mem = ReplayMem$new(agent, conf) 8 | # mem$reset() 9 | # ins = mem$mkInst(state.old = array(rep(1, 4)), action = c(1, 2), reward = 1, state.new = array(rep(2, 4)), done = TRUE, info = list()) 10 | # mem$add(ins) 11 | # expect_class(mem, "ReplayMem") 12 | # }) 13 | # 14 | # test_that("test stack replay_mem works", { 15 | # conf = getDefaultConf("AgentFDQN") 16 | # env = rlR::Environment$new() 17 | # env$overview() 18 | # env = makeGymEnv("Pong-v0", observ_stack_len = 4L, state_preprocess = list(fun = subsample)) 19 | # agent = initAgent("AgentFDQN", env, conf) 20 | # makeArray = function(i) array(rep(i, 61*80*4), dim = c(61,80,4)) 21 | # mem = agent$mem 22 | # mem$reset() 23 | # for (i in 1:70) { 24 | # ins = mem$mkInst(state.old = makeArray(i-1), action = 1, reward = i, state.new = makeArray(i), done = TRUE, info = list(episode = 1, stepidx = i)) 25 | # mem$add(ins) 26 | # } 27 | # for (i in 71:140) { 28 | # ins = mem$mkInst(state.old = makeArray(i-1), action = 1, reward = i, state.new = makeArray(i), done = TRUE, info = list(episode = 2, stepidx = i)) 29 | # mem$add(ins) 30 | # res = mem$sample.fun(64) 31 | # a = sapply(res, function(x) x$info$stepidx) 32 | # e = sapply(res, function(x) x$info$episode) 33 | # b = sapply(res, function(x) x$state.new[1]) 34 | # expect_true(all(a - b == 3L)) 35 | # } 36 | # expect_class(mem, "ReplayMem") 37 | # }) 38 | # 39 | # 40 | # test_that("test uniformStack_mem works", { 41 | # skip_on_cran() 42 | # conf = rlR.conf.DQN() 43 | # conf$set(replay.memname = "UniformStack", replay.mem.size = 70L) # bigger than batchsize 44 | # env = makeGymEnv("Pong-v0", repeat_n_act = 400, observ_stack_len = 2, state_preprocess = list(fun = subsample)) 45 | # env$overview() 46 | # agent = initAgent("AgentFDQN", env, conf) 47 | # agent$learn(3) 48 | # expect_class(agent, "AgentFDQN") 49 | # }) 50 | # 51 | context("interact") 52 | test_that("test interact base works", { 53 | inter = InteractionBase$new() 54 | expect_error(inter$run()) 55 | }) 56 | -------------------------------------------------------------------------------- /tests/testthat/test_file_zzz.R: -------------------------------------------------------------------------------- 1 | context("zzz") 2 | test_that("zzz", { 3 | rlr_test_if_tensorflow_works() 4 | checkPyDep() 5 | listGymEnvs() 6 | env = makeGymEnv("CartPole-v0") 7 | listAvailAgent() 8 | rlr_test_if_tensorflow_works() 9 | rlr_test_if_gym_works() 10 | rlr_test_if_keras_works() 11 | expect_true(TRUE) 12 | }) 13 | -------------------------------------------------------------------------------- /tests/testthat/test_gym_basic.R: -------------------------------------------------------------------------------- 1 | context("gym_basic") 2 | 3 | test_that("cran table", { 4 | agent = initAgent(name = "AgentTable", env = "CliffWalking-v0") 5 | agent$learn(1) 6 | expect_true(T) 7 | }) 8 | 9 | test_that("table", { 10 | skip_on_cran() 11 | agent = initAgent(name = "AgentTable", env = "CliffWalking-v0") 12 | agent$learn(500) 13 | expect_true(agent$interact$perf$getAccPerf() > -40.0) 14 | }) 15 | 16 | test_that("cran test initAgent works", { 17 | agent.names = c("AgentDQN", "AgentFDQN", "AgentDDQN", "AgentPG", "AgentPGBaseline", "AgentActorCritic") 18 | env = makeGymEnv("CartPole-v0") 19 | lapply(agent.names, function(name) initAgent(name, env, conf = getDefaultConf(name))) 20 | expect_true(TRUE) 21 | }) 22 | 23 | test_that("Basic test Cart-Pole could run with agents", { 24 | skip_on_cran() 25 | agent.names = c("AgentDQN", "AgentFDQN", "AgentDDQN", "AgentPG", "AgentPGBaseline", "AgentActorCritic") 26 | lapply(agent.names, function(agent.name) { 27 | env = makeGymEnv("CartPole-v0") 28 | agent = initAgent(agent.name, env) 29 | agent$learn(1L) 30 | expect_true(T, info = agent.name) 31 | }) 32 | }) 33 | 34 | test_that("test Cart-Pole works for each Policy Agent", { 35 | skip_on_cran() 36 | agent.names = c("AgentPG", "AgentPGBaseline", "AgentActorCritic") 37 | lapply(agent.names, function(agent.name) { 38 | print(agent.name) 39 | conf = getDefaultConf(agent.name) 40 | env = makeGymEnv("CartPole-v0") 41 | agent = initAgent(agent.name, env, conf) 42 | agent$learn(80) 43 | expect_true(agent$interact$perf$getAccPerf() > 20, info = agent.name) 44 | }) 45 | }) 46 | 47 | test_that("test Cart-Pole works for DQN Agent", { 48 | skip_on_cran() 49 | env = makeGymEnv("CartPole-v0") 50 | agent = initAgent("AgentDQN", env) 51 | agent$learn(100) 52 | expect_true(agent$interact$perf$getAccPerf() > 20, info = agent.name) 53 | }) 54 | 55 | test_that("test AgentFDQN works", { 56 | skip_on_cran() 57 | skip_on_travis() 58 | env = makeGymEnv("CartPole-v0") 59 | agent = initAgent("AgentFDQN", env) 60 | agent$learn(300) 61 | ave_reward = agent$interact$perf$getAccPerf() 62 | expect_true(ave_reward > 20, info = "AgentFDQN") 63 | }) 64 | 65 | 66 | test_that("test AgentDDQN works", { 67 | skip_on_cran() 68 | env = makeGymEnv("CartPole-v0") 69 | agent = initAgent("AgentDDQN", env) 70 | agent$learn(200) 71 | expect_true(agent$interact$perf$getAccPerf() > 20, info = agent.name) 72 | }) 73 | 74 | test_that("test rescue works each Policy based Agent", { 75 | skip_on_cran() 76 | agent.names = c("AgentPG", "AgentPGBaseline", "AgentActorCritic") 77 | lapply(agent.names, function(agent.name) { 78 | conf = getDefaultConf(agent.name) 79 | conf$set(agent.flag.reset.net = TRUE) 80 | env = makeGymEnv("CartPole-v0") 81 | agent = initAgent(agent.name, env, conf) 82 | agent$learn(2) 83 | }) 84 | expect_true(TRUE) 85 | }) 86 | -------------------------------------------------------------------------------- /tests/testthat/test_gym_ddpg.R: -------------------------------------------------------------------------------- 1 | context("gym_continuous") 2 | test_that("test ddpg works", { 3 | skip_on_cran() 4 | skip("skipping ddpg") 5 | env = makeGymEnv("Pendulum-v0") 6 | conf = getDefaultConf("AgentDQN") 7 | agent = initAgent("AgentDDPG", env, conf) 8 | agent$learn(1) 9 | expect_true(T) 10 | }) 11 | -------------------------------------------------------------------------------- /tests/testthat/test_rep_experiment.R: -------------------------------------------------------------------------------- 1 | context("repeat experiment") 2 | test_that("travis repeat experiment", { 3 | skip_on_cran() 4 | skip_on_travis() 5 | skip("repeat experiment should be tested individually") 6 | doMC::registerDoMC(4) 7 | agent.names = c("AgentDQN") # too many agents takes too long #agent.names = c("AgentDQN", "AgentFDQN", "AgentDDQN", "AgentPG", "AgentPGBaseline", "AgentActorCritic") 8 | env = makeGymEnv("CartPole-v0") 9 | lapply(agent.names, function(name) repExperiment(sname = "CartPole-v0", aname = name, conf = getDefaultConf(name), nrep = 2, nepi = 2)) 10 | expect_true(TRUE) 11 | }) 12 | -------------------------------------------------------------------------------- /tests/testthat/test_topic_atari.R: -------------------------------------------------------------------------------- 1 | context("atari intensive") 2 | test_that("test Seaquest improves with time", { 3 | skip_on_cran() 4 | skip_on_travis() 5 | skip("heavy computation") 6 | env = makeGymEnv("Seaquest-v0", observ_stack_len = 4L, state_preprocess = list(fun = rlR:::subsample)) 7 | conf = getDefaultConf("AgentDDQN") 8 | conf$set(replay.batchsize = 32, 9 | replay.freq = 1L, 10 | console = TRUE, 11 | agent.lr.decay = 1, 12 | agent.lr = 0.00025, 13 | agent.update.target.freq = 1e4, replay.memname = "Png", 14 | render = F, 15 | policy.minEpsilon = 0.1, 16 | agent.start.learn = 5e4L, 17 | policy.aneal.steps = 1e6, 18 | replay.mem.size = 1e6, 19 | log = FALSE, 20 | agent.clip.td = TRUE, 21 | policy.decay.type = "decay_linear") 22 | 23 | makeCnnCritic = function(state_dim, act_cnt) { 24 | require("keras") 25 | text = paste("model <- keras_model_sequential();", 26 | 'model %>%', 27 | ' layer_conv_2d(filter = 16, kernel_size = c(8,8), strides = c(4, 4), 28 | padding = "same", input_shape = state_dim) %>%', 29 | 'layer_activation("relu") %>%', 30 | 'layer_conv_2d(filter = 32, kernel_size = c(4,4), strides = c(2, 2)) %>%', 31 | 'layer_activation("relu") %>%', 32 | 'layer_flatten() %>%', 33 | 'layer_dense(256) %>%', 34 | 'layer_activation("relu") %>%', 35 | 'layer_dense(act_cnt) %>%', 36 | 'layer_activation("linear");', 37 | 'opt <- optimizer_rmsprop(lr = 0.00025);', 38 | 'model %>% compile(loss = "mse", optimizer = opt, metrics = "accuracy")') 39 | model = eval(parse(text = text)) 40 | return(model) 41 | } 42 | agent = initAgent("AgentFDQN", env, conf, custom_brain = TRUE) 43 | agent$customizeBrain(list(value_fun = makeCnnCritic)) 44 | agent$learn(2000L) 45 | }) 46 | -------------------------------------------------------------------------------- /tests/testthat/test_topic_cnn.R: -------------------------------------------------------------------------------- 1 | context("atari") 2 | test_that("test cnn stack input works for each value based agent", { 3 | agent.names = c("AgentDQN", "AgentFDQN", "AgentDDQN") 4 | lapply(agent.names, function(agent.name) { 5 | conf = getDefaultConf(agent.name) 6 | conf$set(replay.batchsize = 32, replay.freq = 40L, console = TRUE, agent.lr.decay = 1, agent.lr = 0.00025, replay.memname = "UniformStack") 7 | env = makeGymEnv("KungFuMaster-v0", repeat_n_act = 80L, observ_stack_len = 4L) 8 | agent = initAgent(agent.name, env, conf) 9 | perf = agent$learn(1) 10 | expect_class(perf, "Performance") 11 | }) 12 | }) 13 | 14 | #FIXME:Valueexpected conv2d_46_input to have shape (210, 160, 4) but got array with shape (210, 160, 12) 15 | 16 | # test_that("test cnn works for each policy based agent", { 17 | # agent.names = c("AgentPG", "AgentPGBaseline", "AgentActorCritic") 18 | # lapply(agent.names, function(agent.name) { 19 | # env = makeGymEnv("KungFuMaster-v0", repeat_n_act = 80L) 20 | # agent = initAgent(agent.name, env, conf) 21 | # agent$learn(1) 22 | # expect_true(T) 23 | # }) 24 | # }) 25 | -------------------------------------------------------------------------------- /vignettes/custom_configuration.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Custom Configuration" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: 7 | collapsed: true 8 | smooth_scroll: false 9 | dev: svg 10 | vignette: > 11 | %\VignetteIndexEntry{Custom Configuration} 12 | %\VignetteEngine{knitr::rmarkdown} 13 | %\VignetteEncoding{UTF-8} 14 | --- 15 | 16 | ```{r setup, include = FALSE, cache = FALSE} 17 | library(rlR) 18 | set.seed(123) 19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5) 20 | knitr::knit_hooks$set(document = function(x){ 21 | gsub("```\n*```r*\n*", "", x) 22 | }) 23 | ``` 24 | 25 | 26 | # Configure 27 | 28 | ```{r} 29 | options(width=1000) 30 | listAvailConf()[, .(name, note)] 31 | ``` 32 | 33 | ```{r} 34 | conf = getDefaultConf("AgentDQN") 35 | conf 36 | conf$set(render = FALSE, console = FALSE) 37 | ``` 38 | 39 | ```{r learn} 40 | env = makeGymEnv("CartPole-v0") 41 | agent = initAgent("AgentDQN", env, conf) 42 | agent$learn(2) 43 | ``` 44 | 45 | ```{r mplot, eval=FALSE,fig.path="inst/figures/", warning=FALSE, message=FALSE, eval=FALSE} 46 | agent$plotPerf(F) 47 | ``` 48 | -------------------------------------------------------------------------------- /vignettes/define_custom_environments.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Custom Learning Environment" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: 7 | collapsed: true 8 | smooth_scroll: false 9 | dev: svg 10 | vignette: > 11 | %\VignetteIndexEntry{Define custom environment for deep reinforcement learn} 12 | %\VignetteEngine{knitr::rmarkdown} 13 | %\VignetteEncoding{UTF-8} 14 | --- 15 | 16 | ```{r setup, include = FALSE, cache = FALSE} 17 | library(rlR) 18 | set.seed(123) 19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5) 20 | knitr::knit_hooks$set(document = function(x){ 21 | gsub("```\n*```r*\n*", "", x) 22 | }) 23 | library(reticulate) 24 | os = import("os") 25 | os$environ[["TF_CPP_MIN_LOG_LEVEL"]]="3" 26 | ``` 27 | # rlR: Define Custom Task to solve 28 | 29 | ## Environment class 30 | 31 | If you want to use this package for your self defined task, you need to implement your own R6 class to represent the environment which must inherit the `rlR::Environment` Class. You could define other public and private members as you like which do not collide with the names in `rlR::Environment`. Type the following to have a look at the documentation of `rlR::Environment` 32 | 33 | ```{r} 34 | help(topic="Environment", package = "rlR") 35 | ``` 36 | 37 | ## A toy Example 38 | 39 | ```{r} 40 | env = rlR:::EnvToy$new() 41 | ``` 42 | 43 | `rlR:::EnvToy` is an R6 class which inherit `rlR::Environment`. 44 | 45 | ```{r} 46 | class(env) 47 | ``` 48 | 49 | There are 3 methods you must override when defining your own Environment class. 50 | 51 | ```{r} 52 | env$initialize # the fields 'act_cnt' and 'state_dim' must be defined here 53 | ``` 54 | 55 | ```{r} 56 | env$reset # The return must be a list with fields state(must be an array), reward = NULL, done = FALSE, and info = list() 57 | ``` 58 | 59 | 60 | ```{r} 61 | env$step # The return must be a list with fields state(must be an array), reward(numeric), done(Boolean), and info (list of anything or empty list) 62 | ``` 63 | 64 | ## Testing 65 | 66 | Afterwards you could choose one of the available Agents to check if the newly defined environments works. 67 | 68 | ```{r} 69 | agent = initAgent("AgentDQN", env) 70 | agent$learn(3) 71 | ``` 72 | -------------------------------------------------------------------------------- /vignettes/python_dependencies.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Python Dependency" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: 7 | collapsed: true 8 | smooth_scroll: false 9 | dev: svg 10 | vignette: > 11 | %\VignetteIndexEntry{Python Dependency} 12 | %\VignetteEngine{knitr::rmarkdown} 13 | %\VignetteEncoding{UTF-8} 14 | --- 15 | 16 | ```{r setup, include = FALSE, cache = FALSE} 17 | library(rlR) 18 | set.seed(123) 19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5) 20 | knitr::knit_hooks$set(document = function(x){ 21 | gsub("```\n*```r*\n*", "", x) 22 | }) 23 | library(reticulate) 24 | ``` 25 | 26 | 27 | # Configure to connect to python 28 | To run the examples, you need to have the python packages `numpy-1.14.5`, `tensorflow-1.8.0`, `keras-2.1.6`, `gym-0.10.5` installed in the **same** python path. 29 | 30 | This python path can be your system default python path or a virtual environment(either system python virtual environment or anaconda virtual environment). 31 | 32 | Other package versions might also work but not tested. 33 | 34 | To look at all python paths you have, in a R session, run 35 | ```{r eval=FALSE} 36 | reticulate::py_discover_config() 37 | ``` 38 | 39 | Check which is your system default python: 40 | ```{r eval=FALSE} 41 | Sys.which("python") 42 | ``` 43 | 44 | If you want to use a python path other than this system default, run the following(replace the '/usr/bin/python' with the python path you want) before doing anything else with reticulate. 45 | ```{r eval=FALSE} 46 | reticulate::use_python("/usr/bin/python", required=TRUE) 47 | ``` 48 | **"Note that you can only load one Python interpreter per R session so the use_python call only applies before you actually initialize the interpreter."** Which means if you changed your mind, you have to close the current R session and open a new R session. 49 | 50 | Confirm from the following if the first path is the one you wanted 51 | ```{r eval=FALSE} 52 | reticulate::py_config() 53 | ``` 54 | 55 | ### Python dependencies installation by rlR function 56 | It is not recommended to mix things up with the system python, so by default, the rlR facility will install the dependencies to virtual environment named 'r-tensorflow' either to your system virtualenv or Anaconda virtualenv. 57 | 58 | For Unix user 59 | - Ensure that you have **either** of the following available 60 | - Python Virtual Environment: 61 | ```{bash eval=F} 62 | pip install virtualenv 63 | ``` 64 | - Anaconda 65 | - Native system python that ships with your OS. (you have to install python libraries mannually in this case, see instructions below) 66 | - Install dependencies through 67 | - if you have python virtualenv available: 68 | ```{r eval=F} 69 | rlR::installDep2SysVirtualEnv(gpu = FALSE) 70 | ``` 71 | - if you have anaconda available: 72 | ```{r eval=FALSE} 73 | rlR::installDepConda(conda_path = "auto", gpu = FALSE) 74 | ``` 75 | 76 | For Windows user 77 | - Ensure that you have Anaconda available **or** a native local system python installed(in this case you also have to install python libraries mannually, see instructions below) 78 | - Install dependencies through `{r eval=FALSE} rlR::installDepConda(gpu = FALSE)` 79 | 80 | If you want to have gpu support, simply set the gpu argument to be true in the function call. 81 | 82 | ### Mannual python dependency installation 83 | You can also install python dependencies without using rlR facility function, for example, you can open an anaconda virtual environment "r-tensorflow" by `source activate r-tensorflow` 84 | 85 | All python libraries that are required could be installed either in a virtual environment or in system native python using pip: 86 | 87 | ```{bash, eval=F} 88 | pip install --upgrade pip # set your prefered path to the search path first 89 | pip install -r requirement.txt 90 | # or 91 | pip install tensorflow 92 | pip install keras 93 | pip install gym 94 | pip install cmake 95 | pip install gym[atari] # this need to be runned even you use require.txt for installation 96 | ``` 97 | where 'cmake' is required to build atari environments. 98 | 99 | 100 | 101 | # Independencies for visualization of environments 102 | The R package imager is required if you want to visualize different environments but the other functionality of rlR is not affected by this R package. For ubuntu, the R package imager depends on libraries which could be installed 103 | 104 | ```{bash, eval=F} 105 | sudo apt-get install -y libfftw3-dev libx11-dev libtiff-dev 106 | sudo apt-get install -y libcairo2-dev 107 | sudo apt-get install -y libxt-dev 108 | ``` 109 | -------------------------------------------------------------------------------- /vignettes/table_learning.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Table Learning on Toy-text" 3 | output: 4 | html_document: 5 | toc: true 6 | toc_float: 7 | collapsed: true 8 | smooth_scroll: false 9 | dev: svg 10 | vignette: > 11 | %\VignetteIndexEntry{Tablular Learning} 12 | %\VignetteEngine{knitr::rmarkdown} 13 | %\VignetteEncoding{UTF-8} 14 | --- 15 | 16 | ```{r setup, include = FALSE, cache = FALSE} 17 | library(rlR) 18 | set.seed(123) 19 | knitr::opts_chunk$set(cache = TRUE, collapse = FALSE, dev = "svg", fig.height = 3.5) 20 | knitr::knit_hooks$set(document = function(x){ 21 | gsub("```\n*```r*\n*", "", x) 22 | }) 23 | library(reticulate) 24 | os = import("os") 25 | os$environ[["TF_CPP_MIN_LOG_LEVEL"]]="3" 26 | ``` 27 | 28 | # Toy text and tabular learning 29 | 30 | ```{r} 31 | library(rlR) 32 | agent = initAgent(name = "AgentTable", env = "CliffWalking-v0") 33 | ``` 34 | 35 | ```{r} 36 | agent$learn(500) 37 | ``` 38 | 39 | ```{r eval=F} 40 | agent$plotPerf() 41 | ``` 42 | --------------------------------------------------------------------------------