├── tests ├── data │ ├── sample_submission.csv │ ├── testocr.png │ └── notebook.ipynb ├── test_secrets.R ├── test_bigquery.R ├── test_mxnet.R ├── test_graphics.R ├── test_tidymodels.R ├── test_tesseract.R ├── test_topicmodels.R ├── test_papermill.R ├── test_patchwork.R ├── test_tensorflow.R ├── test_torch.R ├── test_languageserver.R ├── test_gg.R ├── test_jupyterlab-lsp.R ├── test_nbconvert.R ├── test_imports.R └── test_keras.R ├── install_iR.R ├── tools └── r_list_versions.R ├── kaggle ├── template_conf.json ├── kaggle_secrets.R └── kaggle_bigquery.R ├── nbconvert-extensions.tpl ├── testthat.R ├── clean-layer.sh ├── bioconductor_installs.R ├── ldpaths ├── RProfile.R ├── push ├── package_installs.R ├── diff ├── README.md ├── test ├── Dockerfile ├── gpu.Dockerfile ├── Jenkinsfile └── LICENSE /tests/data/sample_submission.csv: -------------------------------------------------------------------------------- 1 | id_code,diagnosis 2 | testocr,0 -------------------------------------------------------------------------------- /tests/data/testocr.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kaggle/docker-rstats/HEAD/tests/data/testocr.png -------------------------------------------------------------------------------- /tests/test_secrets.R: -------------------------------------------------------------------------------- 1 | context("user_secrets") 2 | 3 | test_that("get_user_secret exists", { 4 | exists('get_user_secret') 5 | }) 6 | -------------------------------------------------------------------------------- /install_iR.R: -------------------------------------------------------------------------------- 1 | library(devtools) 2 | install_github('IRkernel/repr') 3 | install_github('IRkernel/IRdisplay') 4 | install_github('IRkernel/IRkernel') 5 | -------------------------------------------------------------------------------- /tests/test_bigquery.R: -------------------------------------------------------------------------------- 1 | context("bigquery") 2 | 3 | test_that("bigquery exists", { 4 | exists('TokenBigQueryKernel') 5 | exists('query_exec') 6 | }) 7 | -------------------------------------------------------------------------------- /tests/test_mxnet.R: -------------------------------------------------------------------------------- 1 | context("mxnet") 2 | 3 | test_that("mxnet", { 4 | library("xgboost") 5 | library("mxnet") 6 | a = mx.nd.ones(c(2,3)) 7 | 8 | expect_equal(6, length(a)) 9 | }) 10 | -------------------------------------------------------------------------------- /tools/r_list_versions.R: -------------------------------------------------------------------------------- 1 | ip <- as.data.frame(installed.packages()[,c(1,3:4)]) 2 | ip <- ip[is.na(ip$Priority),1:2,drop=FALSE] 3 | write.table(ip, quote=FALSE, sep="==", row.names=FALSE, col.names=FALSE) -------------------------------------------------------------------------------- /tests/test_graphics.R: -------------------------------------------------------------------------------- 1 | context("graphics") 2 | 3 | test_that("plot", { 4 | testImage <- "/working/base_graphics_test.jpg" 5 | jpeg(testImage) 6 | plot(runif(10)) 7 | dev.off() 8 | expect_true(file.exists(testImage)) 9 | }) 10 | -------------------------------------------------------------------------------- /tests/test_tidymodels.R: -------------------------------------------------------------------------------- 1 | context("tidymodels") 2 | 3 | test_that("tidymodels exists", { 4 | library(tidymodels) 5 | }) 6 | 7 | test_that("broom", { 8 | library(broom) 9 | fit <- lm(mpg ~ wt, mtcars) 10 | expect_equal(ncol(tidy(fit)), 5) 11 | }) 12 | -------------------------------------------------------------------------------- /tests/test_tesseract.R: -------------------------------------------------------------------------------- 1 | context("tesseract") 2 | 3 | test_that("ocr", { 4 | library(tesseract) 5 | eng <- tesseract("eng") 6 | fpath <- file.path('/input/tests/data/testocr.png') 7 | text <- tesseract::ocr(fpath, engine = eng) 8 | expect_match(text, "This is a lot of 12 point text") 9 | }) 10 | -------------------------------------------------------------------------------- /tests/test_topicmodels.R: -------------------------------------------------------------------------------- 1 | context("topicmodels") 2 | 3 | test_that("basic topic model example", { 4 | expect_error({ 5 | library(topicmodels) 6 | data("AssociatedPress") 7 | ap_lda <- LDA(AssociatedPress, k = 2, control = list(seed = 1234)) 8 | }, NA) # expect no error to be thrown 9 | }) 10 | -------------------------------------------------------------------------------- /kaggle/template_conf.json: -------------------------------------------------------------------------------- 1 | { 2 | "base_template": "classic", 3 | "mimetypes": { 4 | "text/html": true 5 | }, 6 | "preprocessors": { 7 | "100-pygments": { 8 | "type": "nbconvert.preprocessors.CSSHTMLHeaderPreprocessor", 9 | "enabled": true, 10 | "style": "default" 11 | } 12 | } 13 | } -------------------------------------------------------------------------------- /tests/test_papermill.R: -------------------------------------------------------------------------------- 1 | context("papermill") 2 | 3 | test_that("papermill exists", { 4 | expect_error({ 5 | library(jsonlite) 6 | 7 | results <- system("papermill /input/tests/data/notebook.ipynb -", 8 | intern = TRUE) 9 | json <- fromJSON(results, simplifyVector = FALSE) 10 | expect_equal(json$cells[[1]]$outputs[[1]]$text[[1]], "[1] 999\n") 11 | }, NA) # expect no error to be thrown 12 | }) 13 | -------------------------------------------------------------------------------- /tests/test_patchwork.R: -------------------------------------------------------------------------------- 1 | context("patchwork") 2 | 3 | # https://github.com/thomasp85/patchwork#basic-example 4 | test_that("basic example", { 5 | expect_error({ 6 | library(ggplot2) 7 | library(patchwork) 8 | 9 | p1 <- ggplot(mtcars) + geom_point(aes(mpg, disp)) 10 | p2 <- ggplot(mtcars) + geom_boxplot(aes(gear, disp, group = gear)) 11 | 12 | p1 + p2 13 | }, NA) # expect no error to be thrown 14 | }) 15 | -------------------------------------------------------------------------------- /tests/test_tensorflow.R: -------------------------------------------------------------------------------- 1 | context("tensorflow") 2 | 3 | test_that("check gpu device", { 4 | check_gpu() 5 | 6 | library(tensorflow) 7 | gpus = tf$config$experimental$list_physical_devices('GPU') 8 | expect_gte(length(gpus), 1) 9 | }) 10 | 11 | test_that("tensorflow with gpu", { 12 | check_gpu() 13 | 14 | library(tensorflow) 15 | with(tf$device("/gpu:0"), { 16 | const <- tf$constant(42) 17 | expect_equal(42, as.integer(const)) 18 | }) 19 | }) 20 | -------------------------------------------------------------------------------- /tests/test_torch.R: -------------------------------------------------------------------------------- 1 | context("torch") 2 | 3 | test_that("cpu imports", { 4 | library(torch) 5 | x <- array(runif(8), dim = c(2, 2, 2)) 6 | y <- torch_tensor(x, dtype = torch_float64()) 7 | expect_identical(x, as_array(y)) 8 | }) 9 | 10 | test_that("gpu imports", { 11 | check_gpu() 12 | 13 | library(torch) 14 | x <- array(runif(8), dim = c(2, 2, 2)) 15 | y <- torch_tensor(x, dtype = torch_float64(), device = "cuda") 16 | expect_identical(x, as_array(y$cpu())) 17 | }) 18 | 19 | -------------------------------------------------------------------------------- /nbconvert-extensions.tpl: -------------------------------------------------------------------------------- 1 | {# 2 | Jinja template to inject notebook cell metadata to enhance generated HTML output 3 | All cell metadata starting with '_kg_' will be included with its value ({key}-{value}) 4 | as a class in the cell's DIV container 5 | #} 6 | 7 | {% extends 'classic/index.html.j2'%} 8 | {% block any_cell %} 9 |
12 | {% endblock any_cell %} -------------------------------------------------------------------------------- /testthat.R: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | # Usage: testthat.R [TEST_FILE] 3 | # 4 | # TEST_FILE Run tests for the specified TEST_FILE (e.g. 'test_keras.R'). 5 | # 6 | library("testthat") 7 | 8 | args = commandArgs(trailingOnly=TRUE) 9 | 10 | check_gpu <- function() { 11 | if (Sys.getenv("CUDA_VERSION") == "") { 12 | skip("Skipping GPU tests for CPU image") 13 | } 14 | } 15 | 16 | if (length(args)==1) { 17 | testthat::test_file(paste("/input/tests", args[1], sep="/")) 18 | } else { 19 | testthat::test_dir("/input/tests", stop_on_failure=TRUE) 20 | } 21 | -------------------------------------------------------------------------------- /tests/test_languageserver.R: -------------------------------------------------------------------------------- 1 | context("languageserver") 2 | 3 | test_that("languageserver responds to commands", { 4 | expect_error({ 5 | result <- system( 6 | "R -e 'languageserver::run()'", 7 | input="Content-Length: 38\n\n{ \"id\": \"123\", \"method\": \"shutdown\" }\n", 8 | intern=TRUE) 9 | 10 | found_response <- FALSE 11 | for (line in result) { 12 | if (grepl("\"id\":\"123\"", line, fixed=TRUE) & grepl("\"result\":[]", line, fixed=TRUE)) { 13 | found_response <- TRUE 14 | } 15 | } 16 | 17 | expect_true(found_response) 18 | }, NA) # expect no error to be thrown 19 | }) -------------------------------------------------------------------------------- /clean-layer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # This scripts should be called at the end of each RUN command 4 | # in the Dockerfiles. 5 | # 6 | # Each RUN command creates a new layer that is stored separately. 7 | # At the end of each command, we should ensure we clean up downloaded 8 | # archives and source files used to produce binary to reduce the size 9 | # of the layer. 10 | set -e 11 | set -x 12 | 13 | # Delete files that pip caches when installing a package. 14 | rm -rf /root/.cache/pip/* 15 | # Delete old downloaded archive files 16 | apt-get autoremove -y 17 | # Delete downloaded archive files 18 | apt-get clean 19 | # Delete source files used for building binaries 20 | rm -rf /usr/local/src/* 21 | -------------------------------------------------------------------------------- /tests/data/notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "metadata": { 5 | "trusted": true 6 | }, 7 | "cell_type": "code", 8 | "source": "x <- 999\nprint(x)", 9 | "execution_count": null, 10 | "outputs": [] 11 | } 12 | ], 13 | "metadata": { 14 | "kernelspec": { 15 | "display_name": "R", 16 | "language": "R", 17 | "name": "ir" 18 | }, 19 | "language_info": { 20 | "mimetype": "text/x-r-source", 21 | "name": "R", 22 | "pygments_lexer": "r", 23 | "version": "3.4.2", 24 | "file_extension": ".r", 25 | "codemirror_mode": "r" 26 | } 27 | }, 28 | "nbformat": 4, 29 | "nbformat_minor": 4 30 | } -------------------------------------------------------------------------------- /bioconductor_installs.R: -------------------------------------------------------------------------------- 1 | options(repos = c("CRAN" = "http://cran.us.r-project.org")) 2 | options(Ncpus = parallel::detectCores()) 3 | 4 | if("devtools" %in% rownames(installed.packages()) == FALSE) 5 | install.packages("devtools") 6 | library(devtools) 7 | 8 | if (!requireNamespace("BiocManager", quietly = TRUE)) 9 | install.packages("BiocManager") 10 | 11 | BiocManager::install(update=FALSE, ask=FALSE) 12 | BiocManager::install("BiocGenerics", update=FALSE, ask=FALSE) 13 | install_version("locfit", version = "1.5.9.4", ask=FALSE) 14 | BiocManager::install("EBImage", update=FALSE, ask=FALSE) 15 | BiocManager::install("rhdf5", update=FALSE, ask=FALSE) 16 | BiocManager::install("limma", update=FALSE, ask=FALSE) 17 | -------------------------------------------------------------------------------- /tests/test_gg.R: -------------------------------------------------------------------------------- 1 | context("gg* packages") 2 | 3 | test_that("gganimate", { 4 | expect_error({ 5 | library("gganimate") 6 | library("gapminder") 7 | 8 | testPlot2 <- ggplot(gapminder, 9 | aes(gdpPercap, lifeExp, size = pop, color = continent, frame = year), 10 | transition_states(gear, transition_length = 2, state_length = 1)) + 11 | geom_point() + 12 | scale_x_log10() 13 | }, NA) # expect no error to be thrown 14 | }) 15 | 16 | test_that("ggplot", { 17 | testImage <- "/working/ggplot_test.png" 18 | library("ggplot2") 19 | testPlot1 <- ggplot(data.frame(x=1:10,y=runif(10))) + aes(x=x,y=y) + geom_line() 20 | ggsave(testPlot1, filename=testImage) 21 | expect_true(file.exists(testImage)) 22 | }) 23 | -------------------------------------------------------------------------------- /tests/test_jupyterlab-lsp.R: -------------------------------------------------------------------------------- 1 | context("jupyterlab-lsp") 2 | 3 | library(httr) 4 | 5 | test_that("jupyterlab-lsp is installed", { 6 | expect_error({ 7 | # Start a jupyterlab server and wait for it to initialize 8 | system( 9 | "/usr/local/bin/jupyter server --allow-root --no-browser --port 9999 --notebook-dir /tmp", 10 | wait=FALSE) 11 | 12 | code <- 0 13 | for (x in 1:5) { 14 | # Ping LSP endpoint, verify 200 response 15 | print("ping lsp server...") 16 | response <- try(GET("http://localhost:9999/lsp/status")) 17 | if (class(response) == "response") 18 | code <- status_code(response) 19 | 20 | if (code == 200) { 21 | break 22 | } 23 | 24 | Sys.sleep(5) 25 | } 26 | expect_equal(code, 200) 27 | 28 | # Kill the server 29 | pid <- system("ps -ef | grep jupyter | grep 9999 | awk '{print $2}' | head -n 1", intern = TRUE) 30 | tools::pskill(pid) 31 | }, NA) # expect no error to be thrown 32 | }) 33 | -------------------------------------------------------------------------------- /ldpaths: -------------------------------------------------------------------------------- 1 | : ${JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64} 2 | : ${R_JAVA_LD_LIBRARY_PATH=${JAVA_HOME}/lib/server} 3 | if test -n "/usr/local/lib"; then 4 | : ${R_LD_LIBRARY_PATH=${R_HOME}/lib:/usr/local/lib} 5 | else 6 | : ${R_LD_LIBRARY_PATH=${R_HOME}/lib} 7 | fi 8 | if test -n "${R_JAVA_LD_LIBRARY_PATH}"; then 9 | R_LD_LIBRARY_PATH="${R_LD_LIBRARY_PATH}:${R_JAVA_LD_LIBRARY_PATH}" 10 | fi 11 | ## This is DYLD_FALLBACK_LIBRARY_PATH on Darwin (macOS) and 12 | ## LD_LIBRARY_PATH elsewhere. 13 | ## However, on macOS >=10.11 (if SIP is enabled, the default), the 14 | ## environment value will not be passed to a script such as R.sh, so 15 | ## would not seen here. 16 | if test -z "${LD_LIBRARY_PATH}"; then 17 | LD_LIBRARY_PATH="${R_LD_LIBRARY_PATH}" 18 | else 19 | LD_LIBRARY_PATH="${R_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}" 20 | fi 21 | if test -n "/usr/lib/x86_64-linux-gnu"; then 22 | : ${LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/lib/x86_64-linux-gnu} 23 | fi 24 | export LD_LIBRARY_PATH -------------------------------------------------------------------------------- /tests/test_nbconvert.R: -------------------------------------------------------------------------------- 1 | context("nbconvert") 2 | 3 | test_that("nbconvert to notebook", { 4 | expect_error({ 5 | library(jsonlite) 6 | 7 | results <- system("jupyter nbconvert --to notebook --template /opt/kaggle/nbconvert-extensions.tpl --execute --stdout /input/tests/data/notebook.ipynb", 8 | intern = TRUE) 9 | json <- fromJSON(results, simplifyVector = FALSE) 10 | expect_equal(json$cells[[1]]$outputs[[1]]$text[[1]], "[1] 999\n") 11 | }, NA) # expect no error to be thrown 12 | }) 13 | 14 | test_that("nbconvert to html", { 15 | expect_error({ 16 | results <- system("jupyter nbconvert --to html --stdout --template /opt/kaggle/nbconvert-extensions.tpl --Exporter.preprocessors=[\\\"nbconvert.preprocessors.ExtractOutputPreprocessor\\\"] \"/input/tests/data/notebook.ipynb\"", 17 | intern = TRUE) 18 | expect_match(toString(results), ".*>999<.*") # [...] x <- 999 [...] 19 | }, NA) # expect no error to be thrown 20 | }) -------------------------------------------------------------------------------- /RProfile.R: -------------------------------------------------------------------------------- 1 | options(repos = list(CRAN = "http://cran.rstudio.com/")) 2 | 3 | options(device = function() png(width = 900)) 4 | 5 | # Suppressing package startup messages in package loads 6 | # WART: this appears dangerous and is likely the source of 7 | # future tough-to-debug bugs 8 | # (removing this for now as it caused issues with the gbm package) 9 | # env <- as.environment('package:base') 10 | # unlockBinding('library', env) 11 | # library.warn <- library 12 | # utils::assignInNamespace('library', function( 13 | # package, help, pos = 2, lib.loc = NULL, character.only = FALSE, 14 | # logical.return = FALSE, warn.conflicts = TRUE, quietly = FALSE, 15 | # verbose = getOption("verbose")) { 16 | # if (!character.only) { 17 | # package <- as.character(substitute(package)) 18 | # } 19 | 20 | # suppressPackageStartupMessages(library.warn( 21 | # package, help, pos, lib.loc, character.only = TRUE, 22 | # logical.return, warn.conflicts, quietly, verbose)) 23 | # }, ns="base") 24 | # lockBinding('library', env) 25 | 26 | # Needed to make plots in rendered iR notebooks display correctly 27 | options(jupyter.plot_mimetypes = "image/png") 28 | 29 | source("/kaggle/kaggle_bigquery.R") 30 | source("/kaggle/kaggle_secrets.R") 31 | # Ensure the file ends in a newline 32 | # https://yihui.name/en/2018/04/rprofile-trailing-newline/. 33 | -------------------------------------------------------------------------------- /tests/test_imports.R: -------------------------------------------------------------------------------- 1 | context("import") 2 | 3 | Library <- function(libname){ 4 | print(libname) 5 | suppressPackageStartupMessages(library(libname, character.only=TRUE)) 6 | } 7 | 8 | # Add packages to that list to ensure they are installed on the image 9 | # and prevent future regression. 10 | test_that("imports", { 11 | import_pkgs <- function() { 12 | Library("bitops") 13 | Library("colorspace") 14 | Library("dichromat") 15 | Library("digest") 16 | Library("dplyr") 17 | Library("fftw") 18 | Library("fslr") 19 | Library("ggforce") 20 | Library("ggrepel") 21 | Library("gtable") 22 | Library("hrbrthemes") 23 | Library("imager") 24 | Library("knitr") 25 | Library("labeling") 26 | Library("lightgbm") 27 | Library("mime") 28 | Library("munsell") 29 | Library("plyr") 30 | Library("proto") 31 | Library("randomForest") 32 | Library("RColorBrewer") 33 | Library("Rcpp") 34 | Library("RCurl") 35 | Library("readr") 36 | Library("reshape2") 37 | Library("rstan") 38 | Library("Rtsne") 39 | Library("scales") 40 | Library("seewave") 41 | Library("stringr") 42 | Library("tesseract") 43 | Library("tidyr") 44 | Library("xgboost") 45 | Library("zoo") 46 | 47 | # bioconductor 48 | Library("BiocGenerics") 49 | Library("EBImage") 50 | Library("limma") 51 | Library("rhdf5") 52 | } 53 | 54 | # expect no error to be thrown 55 | expect_error(import_pkgs(), NA) 56 | }) 57 | -------------------------------------------------------------------------------- /push: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | usage() { 5 | cat << EOF 6 | Usage: $0 [OPTIONS] [LABEL] 7 | Push a newly-built image with the given LABEL to gcr.io and DockerHub. 8 | Options: 9 | -g, --gpu Push the image with GPU support. 10 | -s, --source-image IMAGE Tag for the source image. 11 | EOF 12 | } 13 | 14 | SOURCE_IMAGE_TAG='kaggle/rstats-build:latest' 15 | SOURCE_IMAGE_TAG_OVERRIDE='' 16 | TARGET_IMAGE='gcr.io/kaggle-images/rstats' 17 | 18 | while :; do 19 | case "$1" in 20 | -h|--help) 21 | usage 22 | exit 23 | ;; 24 | -g|--gpu) 25 | SOURCE_IMAGE_TAG='kaggle/rstats-gpu-build:latest' 26 | TARGET_IMAGE='gcr.io/kaggle-private-byod/rstats' 27 | ;; 28 | -s|--source-image) 29 | if [[ -z $2 ]]; then 30 | usage 31 | printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2 32 | exit 33 | fi 34 | SOURCE_IMAGE_TAG_OVERRIDE=$2 35 | shift # skip the flag value 36 | ;; 37 | -?*) 38 | usage 39 | printf 'ERROR: Unknown option: %s\n' "$1" >&2 40 | exit 41 | ;; 42 | *) 43 | break 44 | esac 45 | 46 | shift 47 | done 48 | 49 | LABEL=${1:-testing} 50 | 51 | if [[ -n "$SOURCE_IMAGE_TAG_OVERRIDE" ]]; then 52 | SOURCE_IMAGE_TAG="$SOURCE_IMAGE_TAG_OVERRIDE" 53 | fi 54 | 55 | readonly SOURCE_IMAGE_TAG 56 | readonly TARGET_IMAGE 57 | readonly LABEL 58 | 59 | set -x 60 | 61 | docker tag "${SOURCE_IMAGE_TAG}" "${TARGET_IMAGE}:${LABEL}" 62 | gcloud docker -- push "${TARGET_IMAGE}:${LABEL}" 63 | -------------------------------------------------------------------------------- /kaggle/kaggle_secrets.R: -------------------------------------------------------------------------------- 1 | # This library adds support for User Secrets, which can be added to 2 | # the Notebook by selecting Add-Ons toolbar -> Secrets. 3 | # 4 | # Sample user code: 5 | # 6 | # paste(get_user_secret('r_secret')) 7 | 8 | get_user_secret <- function(label) { 9 | KAGGLE_USER_SECRETS_TOKEN <- Sys.getenv("KAGGLE_USER_SECRETS_TOKEN") 10 | KAGGLE_BASE_URL <- Sys.getenv("KAGGLE_URL_BASE") 11 | KAGGLE_IAP_TOKEN <- Sys.getenv("KAGGLE_IAP_TOKEN") 12 | GET_USER_SECRET_BY_LABEL_ENDPOINT = "/requests/GetUserSecretByLabelRequest" 13 | 14 | if (KAGGLE_USER_SECRETS_TOKEN == '') { 15 | stop("Expected KAGGLE_USER_SECRETS_TOKEN environment variable to be present.", call. = FALSE) 16 | } 17 | request_body <- list(Label = label) 18 | auth_header <- paste0("Bearer ", KAGGLE_USER_SECRETS_TOKEN) 19 | if (KAGGLE_IAP_TOKEN != '') { 20 | iap_auth_header <- paste0("Bearer ", KAGGLE_IAP_TOKEN) 21 | headers <- add_headers(c("X-Kaggle-Authorization" = auth_header, "Authorization" = iap_auth_header)) 22 | } else { 23 | headers <- add_headers(c("X-Kaggle-Authorization" = auth_header)) 24 | } 25 | response <- POST( 26 | paste0(KAGGLE_BASE_URL, GET_USER_SECRET_BY_LABEL_ENDPOINT), 27 | headers, 28 | # Reset the cookies on each request, since the server expects none. 29 | handle = handle(''), 30 | body = request_body, 31 | encode = "json" 32 | ) 33 | if (http_error(response) || !identical(content(response)$wasSuccessful, TRUE)) { 34 | err <- paste("Unable to get user secret. Please ensure you have internet enabled. Error: ", 35 | paste(content(response, "text", encoding = 'utf-8'))) 36 | stop(err, call. = FALSE) 37 | } 38 | response_body <- content(response) 39 | return(response_body$result$secret) 40 | } 41 | -------------------------------------------------------------------------------- /package_installs.R: -------------------------------------------------------------------------------- 1 | library(devtools) 2 | options(repos = c("CRAN" = "http://cran.us.r-project.org")) 3 | options(Ncpus = parallel::detectCores()) 4 | 5 | # Set download method, to avoid the default behavior of using 6 | # R's internal HTTP implementation, which doesn't support HTTPS connections. 7 | # https://stackoverflow.com/questions/45061272/r-and-ssl-curl-on-ubuntu-linux-failed-ssl-connect-in-r-but-works-in-curl 8 | options(download.file.method = "libcurl") 9 | 10 | # Install the lightGBM installer package 11 | install_github("Laurae2/lgbdl") 12 | lgbdl::lgb.dl(compiler = "gcc", commit = "tags/v2.3.1") 13 | 14 | install_github("dgrtwo/widyr") 15 | install_github("ellisp/forecastxgb-r-package/pkg") 16 | install_github("rstudio/leaflet") 17 | # install_github fails for catboost. 18 | # Following direct installation instructions instead: https://tech.yandex.com/catboost/doc/dg/installation/r-installation-binary-installation-docpage/ 19 | install_url('https://github.com/catboost/catboost/releases/download/v0.23.2/catboost-R-Linux-0.23.2.tgz', INSTALL_opts = c("--no-multiarch")) 20 | install_github("sassalley/hexmapr") 21 | install_github("hadley/multidplyr") 22 | install_github("dselivanov/LSHR") 23 | 24 | # install latest sparklyr and Spark (for local mode) 25 | install_github("rstudio/sparklyr") 26 | sparklyr::spark_install() 27 | 28 | install.packages("genderdata", repos = "http://packages.ropensci.org") 29 | 30 | install.packages("openNLPmodels.en", 31 | repos = "http://datacube.wu.ac.at/", 32 | type = "source") 33 | 34 | install_github("davpinto/fastknn") 35 | install_github("mukul13/rword2vec") 36 | 37 | # b/232137539 Removed from RCRAN but required for Neurohacking in R coursera course 38 | install_github("muschellij2/neurobase") 39 | install_github("muschellij2/fslr") 40 | 41 | # These signal processing libraries are on CRAN, but they require apt-get dependences that are 42 | # handled in this image's Dockerfile. 43 | install.packages("fftw") 44 | 45 | # https://github.com/Kaggle/docker-rstats/issues/74 46 | install_github("thomasp85/patchwork") 47 | 48 | # https://github.com/Kaggle/docker-rstats/issues/73 49 | install.packages("topicmodels") 50 | 51 | install.packages("tesseract") 52 | 53 | # Try to reinstall igraph and imager her until fixed in rcran. 54 | install.packages("igraph") 55 | install.packages("imager") 56 | 57 | # Torch: install the full package upfront otherwise it will be installed on loading the package which doesn't work for kernels 58 | # without internet (competitions for example). 59 | install.packages("torch") 60 | library(torch) 61 | install_torch() 62 | 63 | install.packages(c('collections', 'languageserver'), dependencies=TRUE) 64 | 65 | # The tfhub package is added to the rcran image. 66 | library(tfhub) 67 | install_tfhub() 68 | -------------------------------------------------------------------------------- /diff: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | usage() { 5 | cat << EOF 6 | Usage: $0 [OPTIONS] 7 | Compare a given Docker image package versions against the prod image. 8 | 9 | Options: 10 | -g, --gpu Compare GPU images. 11 | -b, --base The base image to diff against. 12 | -t, --target The image to diff against the base image. 13 | Default is the locally built image. 14 | EOF 15 | } 16 | 17 | 18 | BASE_IMAGE_TAG='gcr.io/kaggle-images/rstats:latest' 19 | BASE_IMAGE_TAG_OVERRIDE='' 20 | TARGET_IMAGE_TAG='kaggle/rstats-build' 21 | TARGET_IMAGE_TAG_OVERRIDE='' 22 | 23 | while :; do 24 | case "$1" in 25 | -h|--help) 26 | usage 27 | exit 28 | ;; 29 | -g|--gpu) 30 | BASE_IMAGE_TAG='gcr.io/kaggle-private-byod/rstats:latest' 31 | TARGET_IMAGE_TAG='kaggle/rstats-gpu-build' 32 | ;; 33 | -b|--base) 34 | if [[ -z "$2" ]]; then 35 | usage 36 | printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2 37 | exit 38 | fi 39 | BASE_IMAGE_TAG_OVERRIDE="$2" 40 | shift # skip the flag value 41 | ;; 42 | -t|--target) 43 | if [[ -z "$2" ]]; then 44 | usage 45 | printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2 46 | exit 47 | fi 48 | TARGET_IMAGE_TAG_OVERRIDE="$2" 49 | shift # skip the flag value 50 | ;; 51 | -?*) 52 | usage 53 | printf 'ERROR: Unknown option: %s\n' "$1" >&2 54 | exit 55 | ;; 56 | *) 57 | break 58 | esac 59 | 60 | shift 61 | done 62 | 63 | if [[ -n "$BASE_IMAGE_TAG_OVERRIDE" ]]; then 64 | BASE_IMAGE_TAG="$BASE_IMAGE_TAG_OVERRIDE" 65 | fi 66 | 67 | if [[ -n "$TARGET_IMAGE_TAG_OVERRIDE" ]]; then 68 | TARGET_IMAGE_TAG="$TARGET_IMAGE_TAG_OVERRIDE" 69 | fi 70 | 71 | readonly BASE_IMAGE_TAG 72 | readonly TARGET_IMAGE_TAG 73 | 74 | echo "Base: $BASE_IMAGE_TAG" 75 | echo "Target: $TARGET_IMAGE_TAG" 76 | 77 | if [[ "$BASE_IMAGE_TAG" == "gcr.io/"* ]]; then 78 | docker pull "$BASE_IMAGE_TAG" 79 | fi 80 | 81 | CMDS=("Rscript /tools/r_list_versions.R | sort" "pip freeze" 'cat /etc/os-release | grep -oP "PRETTY_NAME=\"\K([^\"]*)"' "uname -r" "dpkg --list | awk '{print \$2\"==\"\$3}'") 82 | for cmd in "${CMDS[@]}"; do 83 | echo "== Comparing $cmd ==" 84 | diff --suppress-common-lines --side-by-side \ 85 | <(docker run -v $PWD/tools:/tools --rm "$BASE_IMAGE_TAG" /bin/bash -c "$cmd") \ 86 | <(docker run -v $PWD/tools:/tools --rm "$TARGET_IMAGE_TAG" /bin/bash -c "$cmd") \ 87 | && echo 'No diff' || true 88 | done 89 | -------------------------------------------------------------------------------- /kaggle/kaggle_bigquery.R: -------------------------------------------------------------------------------- 1 | # This library adds support for BigQuery (via the bigrquery library), by using 2 | # Kaggle's UserSecrets service to retrieve an OAuth access token for the connected 3 | # credentials attached to the running Kernel. 4 | # 5 | # Sample user code: 6 | # 7 | # project <- "yes-theory-1" # put your project ID here 8 | # sql <- "SELECT year, month, day, weight_pounds FROM [publicdata:samples.natality] LIMIT 5" 9 | # query_exec(sql, project = project) 10 | 11 | KAGGLE_USER_SECRETS_TOKEN <- Sys.getenv("KAGGLE_USER_SECRETS_TOKEN") 12 | KAGGLE_BASE_URL <- Sys.getenv("KAGGLE_URL_BASE") 13 | KAGGLE_IAP_TOKEN <- Sys.getenv("KAGGLE_IAP_TOKEN") 14 | GET_USER_SECRET_ENDPOINT = "/requests/GetUserSecretRequest" 15 | 16 | # We create a Token2.0 Credential object (from httr library) and use bigrquery's set_access_cred 17 | # to override the interactive authentication (https://github.com/r-dbi/bigrquery/blob/master/R/auth.R). 18 | library(httr) 19 | TokenBigQueryKernel <- R6::R6Class("TokenBigQueryKernel", inherit = Token2.0, list( 20 | params = list(as_header = TRUE), 21 | endpoint = oauth_endpoints("google"), 22 | initialize = function() { 23 | }, 24 | can_refresh = function() { 25 | TRUE 26 | }, 27 | refresh = function() { 28 | if (KAGGLE_USER_SECRETS_TOKEN == '') { 29 | stop("Expected KAGGLE_USER_SECRETS_TOKEN environment variable to be present.", call. = FALSE) 30 | } 31 | request_body <- list(Target = 1) 32 | auth_header <- paste0("Bearer ", KAGGLE_USER_SECRETS_TOKEN) 33 | if (KAGGLE_IAP_TOKEN != '') { 34 | iap_auth_header <- paste0("Bearer ", KAGGLE_IAP_TOKEN) 35 | headers <- add_headers(c("X-Kaggle-Authorization" = auth_header, "Authorization" = iap_auth_header)) 36 | } else { 37 | headers <- add_headers(c("X-Kaggle-Authorization" = auth_header)) 38 | } 39 | response <- POST(paste0(KAGGLE_BASE_URL, GET_USER_SECRET_ENDPOINT), 40 | headers, 41 | # Reset the cookies on each request, since the server expects none. 42 | handle = handle(''), 43 | body = request_body, 44 | encode = "json") 45 | if (http_error(response) || !identical(content(response)$wasSuccessful, TRUE)) { 46 | err <- paste("Unable to refresh token. Please ensure you have a connected BigQuery account. Error: ", 47 | paste(content(response, "text", encoding = 'utf-8'))) 48 | stop(err, call. = FALSE) 49 | } 50 | response_body <- content(response) 51 | self$credentials$access_token <- response_body$result$secret 52 | self 53 | }, 54 | # Never cache 55 | cache = function(path) self, 56 | load_from_cache = function() self 57 | )) 58 | 59 | library(bigrquery) 60 | # A hack to allow users to use bigrquery directly. The "correct" way would be to use: 61 | # `bq_auth(scopes = NULL, token = TokenBigQueryKernel$new())`, but that would force auth immediately, 62 | # which would slow kernels starting and could cause errors on startup. 63 | auth <- getNamespace("bigrquery")$.auth 64 | auth$set_cred(TokenBigQueryKernel$new()) 65 | auth$set_auth_active(TRUE) 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # docker-rstats 2 | 3 | [Kaggle Notebooks](https://www.kaggle.com/notebooks) allow users to run scripts against our competitions and datasets without having to download data or set up their environment. 4 | 5 | Our R Docker images are stored on Google Container Registry at: 6 | 7 | * CPU-only: [gcr.io/kaggle-images/rstats](https://gcr.io/kaggle-images/rstats) 8 | * GPU: [gcr.io/kaggle-gpu-images/rstats](https://gcr.io/kaggle-gpu-images/rstats) 9 | 10 | Here's [an example](https://www.kaggle.com/benhamner/bike-sharing-demand/bike-rentals-by-time-and-temperature): 11 | 12 |  13 | 14 | This is the Dockerfile (etc.) used for building the image that runs R scripts on Kaggle. [Here's](https://registry.hub.docker.com/u/kaggle/rstats/) the Docker image on Dockerhub. 15 | 16 | ## Getting started 17 | 18 | To get started with this image, read our [guide](http://blog.kaggle.com/2016/02/05/how-to-get-started-with-data-science-in-containers/) to using it yourself, or browse [Kaggle Notebooks](https://www.kaggle.com/notebooks) for ideas. 19 | 20 | ## Requesting new features 21 | 22 | **We welcome pull requests** if there are any packages you'd like to add! 23 | 24 | We can merge your request quickly if you check that it builds correctly. Here's how to do that. 25 | 26 | ### New R libraries 27 | 28 | If you want a library that's, say, on GitHub but not yet on CRAN, then you can add it to [`package_installs.R`](https://github.com/Kaggle/docker-rstats/blob/master/package_installs.R). To check that it will work, you can follow this example, which shows how to add a library called `coolstuff` that's available from GitHub user `nerdcha`. 29 | 30 | ```bash 31 | me@my-computer:/home$ docker run --rm -it kaggle/rstats 32 | R version 3.3.1 (2016-06-21) -- "Bug in Your Hair" 33 | [...etc...] 34 | > library(devtools) 35 | > install_github("nerdcha/coolstuff") 36 | Downloading GitHub repo nerdcha/coolstuff@master 37 | [...etc...] 38 | ** testing if installed package can be loaded 39 | * DONE (coolstuff) 40 | > library(coolstuff) 41 | > 42 | ``` 43 | 44 | Everything worked, so we can add the line `install_github("nerdcha/coolstuff")` to `package_installs.R` and submit the pull request. 45 | 46 | ### New libraries with complex dependencies 47 | 48 | Some libraries will need extra system support to work. Installing them follows a pretty similar pattern; just try whatever prerequisites the package maintainer says are needed for a Linux system. For example, if the `coolstuff` package says to run `apt-get install libcool-dev` first, then you can test it in the following way. 49 | 50 | ```bash 51 | me@my-computer:/home$ docker run --rm -it kaggle/rstats /bin/bash 52 | root@2dd4317c8799:/# apt-get update 53 | Ign:1 http://ftp.de.debian.org/debian jessie InRelease 54 | [...] 55 | root@2dd4317c8799:/# apt-get install libcool-dev 56 | Reading package lists... Done 57 | [...] 58 | root@2dd4317c8799:/# R 59 | R version 3.3.1 (2016-06-21) -- "Bug in Your Hair" 60 | [...] 61 | > library(devtools) 62 | > install_github("nerdcha/coolstuff") 63 | Downloading GitHub repo nerdcha/coolstuff@master 64 | [...] 65 | ** testing if installed package can be loaded 66 | * DONE (coolstuff) 67 | > library(coolstuff) 68 | > 69 | ``` 70 | 71 | If that's all working as expected, then you can add `apt-get install libcool-dev` to the end of the [`Dockerfile`](https://github.com/Kaggle/docker-rstats/blob/master/Dockerfile), and `install_github("nerdcha/coolstuff")` to `package_installs.R`. 72 | 73 | -------------------------------------------------------------------------------- /test: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | IMAGE_TAG='kaggle/rstats-build' 5 | IMAGE_TAG_OVERRIDE='' 6 | ADDITONAL_OPTS='' 7 | TEST_FILE='' 8 | 9 | usage() { 10 | cat << EOF 11 | Usage: $0 [OPTIONS] 12 | Run tests for a newly-built R Docker image. 13 | By default, it runs the tests for the CPU image. 14 | Options: 15 | -g, --gpu Run tests for the GPU image. 16 | -i, --image IMAGE Run tests against the specified image 17 | -t, --test_file FILENAME Run all tests for the specified file (e.g. test_keras.R) 18 | EOF 19 | } 20 | 21 | while :; do 22 | case "$1" in 23 | -h|--help) 24 | usage 25 | exit 26 | ;; 27 | -g|--gpu) 28 | IMAGE_TAG='kaggle/rstats-gpu-build' 29 | ADDITONAL_OPTS='-v /tmp/empty_dir:/usr/local/cuda/lib64/stubs:ro' 30 | ;; 31 | -i|--image) 32 | if [[ -z $2 ]]; then 33 | usage 34 | printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2 35 | exit 36 | fi 37 | IMAGE_TAG_OVERRIDE=$2 38 | shift # skip the flag value 39 | ;; 40 | -t|--test_file) 41 | if [[ -z $2 ]]; then 42 | usage 43 | printf 'ERROR: No FILENAME specified after the %s flag.\n' "$1" >&2 44 | exit 45 | fi 46 | TEST_FILE=$2 47 | shift # skip the flag value 48 | ;; 49 | -?*) 50 | usage 51 | printf 'ERROR: Unknown option: %s\n' "$1" >&2 52 | exit 53 | ;; 54 | *) 55 | break 56 | esac 57 | 58 | shift 59 | done 60 | 61 | if [[ -n "$IMAGE_TAG_OVERRIDE" ]]; then 62 | IMAGE_TAG="$IMAGE_TAG_OVERRIDE" 63 | fi 64 | 65 | readonly IMAGE_TAG 66 | readonly ADDITONAL_OPTS 67 | readonly TEST_FILE 68 | 69 | set -x 70 | docker rm jupyter_test_r || true 71 | rm -rf /tmp/rstats-build 72 | mkdir -p /tmp/rstats-build/tmp 73 | mkdir -p /tmp/rstats-build/devshm 74 | mkdir -p /tmp/rstats-build/working 75 | 76 | # Check that Jupyter server can run; if it dies on startup, the `docker kill` command will throw an error 77 | docker run -d --name=jupyter_test_r --read-only --net=none \ 78 | -e HOME=/tmp \ 79 | -e NVIDIA_DISABLE_REQUIRE=1 \ 80 | -v $PWD:/input:ro -v /tmp/rstats-build/working:/working \ 81 | -v /tmp/rstats-build/tmp:/tmp -v /tmp/rstats-build/devshm:/dev/shm \ 82 | -w=/working \ 83 | "$IMAGE_TAG" jupyter notebook --allow-root --ip="*" 84 | sleep 3 85 | docker kill jupyter_test_r && docker rm jupyter_test_r 86 | 87 | # Check that papermill is installed in python (b/191304257). 88 | docker run --rm -e NVIDIA_DISABLE_REQUIRE=1 --name=papermill_test_r --read-only --net=none \ 89 | "$IMAGE_TAG" python -c 'import sys;import papermill as pm; print(pm.__version__)' 90 | 91 | 92 | # TF_FORCE_GPU_ALLOW_GROWTH is to prevent tensorflow from allocating the totality of a GPU memory. 93 | # https://stackoverflow.com/questions/34199233/how-to-prevent-tensorflow-from-allocating-the-totality-of-a-gpu-memory/55541385#55541385 94 | docker run --rm -t --net=none \ 95 | -e HOME=/tmp \ 96 | -e TF_FORCE_GPU_ALLOW_GROWTH=true \ 97 | -e NVIDIA_DISABLE_REQUIRE=1 \ 98 | -v $PWD:/input:ro -v /tmp/rstats-build/working:/working \ 99 | -v /tmp/rstats-build/tmp:/tmp -v /tmp/rstats-build/devshm:/dev/shm \ 100 | -w=/working \ 101 | $ADDITONAL_OPTS \ 102 | "$IMAGE_TAG" \ 103 | /bin/bash -c "/input/testthat.R $TEST_FILE" 104 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_TAG=latest 2 | 3 | FROM gcr.io/kaggle-images/rcran:${BASE_TAG} 4 | 5 | ARG PYTHON_VERSION=3.10 6 | 7 | ADD clean-layer.sh /tmp/clean-layer.sh 8 | 9 | # Install Python 10 | RUN apt-get install -y software-properties-common && \ 11 | add-apt-repository ppa:deadsnakes/ppa -y && \ 12 | apt-get update && \ 13 | echo "MOD: python${PYTHON_VERSION}" && \ 14 | apt-get install -y python${PYTHON_VERSION} && \ 15 | ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python && \ 16 | curl -sS https://bootstrap.pypa.io/get-pip.py | python && \ 17 | /tmp/clean-layer.sh 18 | 19 | RUN apt-get update && \ 20 | apt-get install -y libzmq3-dev default-jdk && \ 21 | apt-get install -y python${PYTHON_VERSION}-dev python3-venv libcurl4-openssl-dev libssl-dev && \ 22 | pip install jupyter pycurl && \ 23 | # Install older tornado - https://github.com/jupyter/notebook/issues/4437 24 | pip install "tornado<6" && \ 25 | pip install notebook && \ 26 | pip install nbconvert && \ 27 | R -e 'IRkernel::installspec()' && \ 28 | # Build pyzmq from source instead of using a pre-built binary. 29 | yes | pip uninstall pyzmq && \ 30 | pip install pyzmq --no-binary pyzmq && \ 31 | cp -r /root/.local/share/jupyter/kernels/ir /usr/local/share/jupyter/kernels && \ 32 | # Make sure Jupyter won't try to "migrate" its junk in a read-only container 33 | mkdir -p /root/.jupyter/kernels && \ 34 | cp -r /root/.local/share/jupyter/kernels/ir /root/.jupyter/kernels && \ 35 | touch /root/.jupyter/jupyter_nbconvert_config.py && touch /root/.jupyter/migrated && \ 36 | # papermill can replace nbconvert for executing notebooks 37 | pip install papermill && \ 38 | # b/276358430 fix Jupyter lsp freezing up the jupyter server 39 | pip install jupyterlab-lsp "jupyter-lsp==1.5.1" && \ 40 | /tmp/clean-layer.sh 41 | 42 | # Miniconda 43 | ARG MINICONDA_PATH=/root/.local/share/r-miniconda 44 | ARG ENV_NAME=r-reticulate 45 | RUN R -e "reticulate::install_miniconda(path = \"${MINICONDA_PATH}\", update = TRUE, force = TRUE)" 46 | RUN R -e "reticulate::conda_create(envname = \"${ENV_NAME}\", conda = \"auto\", required = TRUE, python_version = \"${PYTHON_VERSION}\")" 47 | ENV RETICULATE_PYTHON="${MINICONDA_PATH}/envs/${ENV_NAME}/bin/python" 48 | 49 | # Tensorflow and Keras 50 | ARG TENSORFLOW_VERSION=2.12.0 51 | RUN R -e "keras::install_keras(tensorflow = \"${TENSORFLOW_VERSION}\", extra_packages = c(\"pandas\", \"numpy\", \"pycryptodome\"), method=\"conda\", envname=\"${ENV_NAME}\")" 52 | 53 | # Install kaggle libraries. 54 | # Do this at the end to avoid rebuilding everything when any change is made. 55 | ADD kaggle/ /kaggle/ 56 | # RProfile sources files from /kaggle/ so ensure this runs after ADDing it. 57 | ENV R_HOME=/usr/local/lib/R 58 | ADD RProfile.R /usr/local/lib/R/etc/Rprofile.site 59 | ADD install_iR.R /tmp/install_iR.R 60 | ADD bioconductor_installs.R /tmp/bioconductor_installs.R 61 | ADD package_installs.R /tmp/package_installs.R 62 | ADD nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl 63 | ADD kaggle/template_conf.json /opt/kaggle/conf.json 64 | # Install with `--vanilla` flag to avoid conflict. https://support.bioconductor.org/p/57187/ 65 | RUN Rscript --vanilla /tmp/package_installs.R 66 | RUN Rscript --vanilla /tmp/bioconductor_installs.R 67 | RUN Rscript --vanilla /tmp/install_iR.R 68 | 69 | ARG GIT_COMMIT=unknown 70 | ARG BUILD_DATE_RSTATS=unknown 71 | 72 | LABEL git-commit=$GIT_COMMIT 73 | LABEL build-date=$BUILD_DATE_RSTATS 74 | 75 | # Find the current release git hash & build date inside the kernel editor. 76 | RUN echo "$GIT_COMMIT" > /etc/git_commit && echo "$BUILD_DATE_RSTATS" > /etc/build_date 77 | 78 | CMD ["R"] 79 | -------------------------------------------------------------------------------- /gpu.Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_TAG=staging 2 | FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu18.04 AS nvidia 3 | FROM gcr.io/kaggle-images/rstats:${BASE_TAG} 4 | ARG ncpus=1 5 | 6 | ADD clean-layer.sh /tmp/clean-layer.sh 7 | 8 | # Cuda support 9 | COPY --from=nvidia /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/ 10 | COPY --from=nvidia /etc/apt/trusted.gpg /etc/apt/trusted.gpg.d/cuda.gpg 11 | 12 | ENV CUDA_MAJOR_VERSION=11 13 | ENV CUDA_MINOR_VERSION=7 14 | ENV CUDA_PATCH_VERSION=0 15 | ENV CUDA_VERSION=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION.$CUDA_PATCH_VERSION 16 | ENV CUDA_PKG_VERSION=$CUDA_MAJOR_VERSION-$CUDA_MINOR_VERSION 17 | ENV CUDNN_VERSION=8.5.0.96 18 | ENV NCCL_VERSION=2.13.4-1 19 | LABEL com.nvidia.volumes.needed="nvidia_driver" 20 | LABEL com.nvidia.cuda.version="${CUDA_VERSION}" 21 | LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}" 22 | ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} 23 | # The stub is useful to us both for built-time linking and run-time linking, on CPU-only systems. 24 | # When intended to be used with actual GPUs, make sure to (besides providing access to the host 25 | # CUDA user libraries, either manually or through the use of nvidia-docker) exclude them. One 26 | # convenient way to do so is to obscure its contents by a bind mount: 27 | # docker run .... -v /non-existing-directory:/usr/local/cuda/lib64/stubs:ro ... 28 | ENV LD_LIBRARY_PATH="/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs" 29 | ENV NVIDIA_VISIBLE_DEVICES=all 30 | ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility 31 | ENV NVIDIA_REQUIRE_CUDA="cuda>=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION" 32 | RUN apt-get update && apt-get install -y --no-install-recommends \ 33 | cuda-cupti-$CUDA_PKG_VERSION \ 34 | cuda-cudart-$CUDA_PKG_VERSION \ 35 | cuda-cudart-dev-$CUDA_PKG_VERSION \ 36 | cuda-libraries-$CUDA_PKG_VERSION \ 37 | cuda-libraries-dev-$CUDA_PKG_VERSION \ 38 | cuda-nvml-dev-$CUDA_PKG_VERSION \ 39 | cuda-minimal-build-$CUDA_PKG_VERSION \ 40 | cuda-command-line-tools-$CUDA_PKG_VERSION \ 41 | libcudnn8=$CUDNN_VERSION-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \ 42 | libcudnn8-dev=$CUDNN_VERSION-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \ 43 | libcublas-$CUDA_PKG_VERSION \ 44 | libcublas-dev-$CUDA_PKG_VERSION \ 45 | libnccl2=$NCCL_VERSION+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \ 46 | libnccl-dev=$NCCL_VERSION+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \ 47 | /tmp/clean-layer.sh 48 | 49 | ENV CUDA_HOME=/usr/local/cuda 50 | 51 | # Hack to fix R trying to use CUDA in `/usr/lib/x86_64-linux-gnu` directory instead 52 | # of `/usr/local/nvidia/lib64` (b/152401083). 53 | # For some reason, the CUDA file `libcuda.so.418.67` in the former directory is empty. 54 | # R's ldpaths modifies LD_LIBRARY_PATH on start by adding `/usr/lib/x86_64-linux-gnu` upfront. 55 | # Instead, this version of ldpaths adds it at the end. 56 | ADD ldpaths $R_HOME/etc/ldpaths 57 | 58 | # Install tensorflow with GPU support 59 | ARG TENSORFLOW_VERSION=2.11.0 60 | RUN R -e "keras::install_keras(version = \"${TENSORFLOW_VERSION}-gpu\", method = \"conda\", conda = \"auto\", envname=\"r-reticulate\")" && \ 61 | rm -rf /tmp/tensorflow_gpu && \ 62 | /tmp/clean-layer.sh 63 | 64 | # OpenCL for bayesCL, gpuR, ... 65 | RUN apt-get install -y --no-install-recommends ocl-icd-opencl-dev && \ 66 | mkdir -p /etc/OpenCL/vendors && \ 67 | echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd 68 | 69 | # Install GPU specific packages 70 | RUN CPATH=/usr/local/cuda/targets/x86_64-linux/include install2.r --error --ncpus $ncpus --repo http://cran.rstudio.com \ 71 | h2o4gpu 72 | 73 | # Torch: install the full package upfront otherwise it will be installed on loading the package which doesn't work for kernels 74 | # without internet (competitions for example). It will detect CUDA and install the proper version. 75 | # Make Torch think we use CUDA 11.8 (https://github.com/mlverse/torch/issues/807) 76 | ENV CUDA=11.7 77 | RUN R -e 'install.packages("torch")' 78 | RUN R -e 'library(torch); install_torch()' 79 | 80 | CMD ["R"] 81 | -------------------------------------------------------------------------------- /tests/test_keras.R: -------------------------------------------------------------------------------- 1 | context("keras") 2 | 3 | test_that("model training", { 4 | library(keras) 5 | 6 | x_train <- matrix(rnorm(100 * 10), nrow = 100) 7 | y_train <- to_categorical(matrix(sample(0:2, 100, TRUE), ncol = 1), 3) 8 | 9 | model <- keras_model_sequential() 10 | model %>% 11 | layer_dense(units=100, activation='relu', input_shape=dim(x_train)[2]) %>% 12 | layer_dropout(rate=0.4) %>% 13 | layer_dense(unit=3, activation='softmax') 14 | 15 | optimizers <- keras::keras$optimizers 16 | 17 | model %>% compile( 18 | loss = 'categorical_crossentropy', 19 | optimizer = optimizers$RMSprop(), 20 | metrics = c('accuracy') 21 | ) 22 | 23 | history <- model %>% fit( 24 | x_train, y_train, 25 | epochs=5, batch_size = 8, 26 | validation_split=0.2 27 | ) 28 | 29 | expect_is(history, "keras_training_history") 30 | }) 31 | 32 | test_that("CNN model training", { 33 | library(keras) 34 | 35 | # Preprocess data 36 | train.label<- to_categorical(matrix(sample(0:9, 100, TRUE), ncol = 1), 10) 37 | train.feature<- matrix(sample(0:255, 28 * 28 * 100, TRUE), nrow = 100) 38 | dim(train.feature)<-c(nrow(train.feature), 28, 28, 1) 39 | 40 | # Build simple CNN 41 | model<-keras_model_sequential() 42 | 43 | model %>% 44 | layer_conv_2d(filters = 32, kernel_size = c(5,5),padding = 'Valid', activation = 'relu', input_shape = c(28,28,1)) %>% 45 | layer_batch_normalization() %>% 46 | layer_conv_2d(filters = 32, kernel_size = c(5,5),padding = 'Same', activation = 'relu') %>% 47 | layer_batch_normalization() %>% 48 | layer_max_pooling_2d(pool_size = c(2, 2)) %>% 49 | layer_dropout(rate = 0.2) %>% 50 | layer_conv_2d(filters = 64, kernel_size = c(3,3),padding = 'Same', activation = 'relu') %>% 51 | layer_batch_normalization()%>% 52 | layer_conv_2d(filters = 64, kernel_size = c(3,3),padding = 'Same', activation = 'relu') %>% 53 | layer_batch_normalization() %>% 54 | layer_max_pooling_2d(pool_size = c(2, 2)) %>% 55 | layer_dropout(rate = 0.2) %>% 56 | layer_flatten() %>% 57 | layer_dense(units=1024,activation='relu') %>% 58 | layer_dense(units=512,activation='relu') %>% 59 | layer_dense(units=256,activation='relu') %>% 60 | layer_dense(units=10,activation='softmax') 61 | 62 | model %>% compile( 63 | loss='categorical_crossentropy', 64 | optimizer='adam', 65 | metrics='accuracy' 66 | ) 67 | 68 | # Train model 69 | datagen <- image_data_generator( 70 | featurewise_center = F, 71 | samplewise_center=F, 72 | featurewise_std_normalization = F, 73 | samplewise_std_normalization=F, 74 | zca_whitening=F, 75 | horizontal_flip = F, 76 | vertical_flip = F, 77 | width_shift_range = 0.15, 78 | height_shift_range = 0.15, 79 | zoom_range = 0.15, 80 | rotation_range = 0.15, 81 | shear_range = 0.15 82 | ) 83 | 84 | datagen %>% fit_image_data_generator(train.feature) 85 | 86 | history <- model %>% 87 | fit( 88 | flow_images_from_data(train.feature, train.label, datagen, batch_size = 10), 89 | steps_per_epoch = nrow(train.feature) / 10, 90 | epochs = 1) 91 | 92 | expect_is(history, "keras_training_history") 93 | }) 94 | 95 | test_that("flow_images_from_dataframe", { 96 | library(keras) 97 | library(readr) 98 | 99 | base_dir <- '/input/tests/data' 100 | test_labels <- read_csv("/input/tests/data/sample_submission.csv") 101 | 102 | test_labels$filename <- paste0(test_labels$id_code, ".png") 103 | 104 | pred <- flow_images_from_dataframe( 105 | dataframe = test_labels, 106 | x_col = "filename", 107 | y_col = NULL, 108 | directory = base_dir, 109 | shuffle = FALSE, 110 | class_mode = NULL, 111 | target_size = c(224, 224)) 112 | 113 | batch <- generator_next(pred, completed = NULL) 114 | expect_gt(length(batch), 0) 115 | }) 116 | -------------------------------------------------------------------------------- /Jenkinsfile: -------------------------------------------------------------------------------- 1 | String cron_string = BRANCH_NAME == "main" ? "H 12 * * 1,3" : "" 2 | 3 | pipeline { 4 | agent { label 'ephemeral-linux' } 5 | options { 6 | // The Build GPU stage depends on the image from the Push CPU stage 7 | disableConcurrentBuilds() 8 | } 9 | triggers { 10 | cron(cron_string) 11 | } 12 | environment { 13 | GIT_COMMIT_SHORT = sh(returnStdout: true, script:"git rev-parse --short=7 HEAD").trim() 14 | GIT_COMMIT_SUBJECT = sh(returnStdout: true, script:"git log --format=%s -n 1 HEAD").trim() 15 | GIT_COMMIT_AUTHOR = sh(returnStdout: true, script:"git log --format='%an' -n 1 HEAD").trim() 16 | GIT_COMMIT_SUMMARY = "`