├── tests
    ├── data
    │   ├── sample_submission.csv
    │   ├── testocr.png
    │   └── notebook.ipynb
    ├── test_secrets.R
    ├── test_bigquery.R
    ├── test_mxnet.R
    ├── test_graphics.R
    ├── test_tidymodels.R
    ├── test_tesseract.R
    ├── test_topicmodels.R
    ├── test_papermill.R
    ├── test_patchwork.R
    ├── test_tensorflow.R
    ├── test_torch.R
    ├── test_languageserver.R
    ├── test_gg.R
    ├── test_jupyterlab-lsp.R
    ├── test_nbconvert.R
    ├── test_imports.R
    └── test_keras.R
├── install_iR.R
├── tools
    └── r_list_versions.R
├── kaggle
    ├── template_conf.json
    ├── kaggle_secrets.R
    └── kaggle_bigquery.R
├── nbconvert-extensions.tpl
├── testthat.R
├── clean-layer.sh
├── bioconductor_installs.R
├── ldpaths
├── RProfile.R
├── push
├── package_installs.R
├── diff
├── README.md
├── test
├── Dockerfile
├── gpu.Dockerfile
├── Jenkinsfile
└── LICENSE


/tests/data/sample_submission.csv:
--------------------------------------------------------------------------------
1 | id_code,diagnosis
2 | testocr,0


--------------------------------------------------------------------------------
/tests/data/testocr.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kaggle/docker-rstats/HEAD/tests/data/testocr.png


--------------------------------------------------------------------------------
/tests/test_secrets.R:
--------------------------------------------------------------------------------
1 | context("user_secrets")
2 | 
3 | test_that("get_user_secret exists", {
4 |     exists('get_user_secret')
5 | })
6 | 


--------------------------------------------------------------------------------
/install_iR.R:
--------------------------------------------------------------------------------
1 | library(devtools)
2 | install_github('IRkernel/repr')
3 | install_github('IRkernel/IRdisplay')
4 | install_github('IRkernel/IRkernel')
5 | 


--------------------------------------------------------------------------------
/tests/test_bigquery.R:
--------------------------------------------------------------------------------
1 | context("bigquery")
2 | 
3 | test_that("bigquery exists", {
4 |     exists('TokenBigQueryKernel')
5 |     exists('query_exec')
6 | })
7 | 


--------------------------------------------------------------------------------
/tests/test_mxnet.R:
--------------------------------------------------------------------------------
 1 | context("mxnet")
 2 | 
 3 | test_that("mxnet", {
 4 |   library("xgboost")
 5 |   library("mxnet")
 6 |   a = mx.nd.ones(c(2,3))
 7 | 
 8 |   expect_equal(6, length(a))
 9 | })
10 | 


--------------------------------------------------------------------------------
/tools/r_list_versions.R:
--------------------------------------------------------------------------------
1 | ip <- as.data.frame(installed.packages()[,c(1,3:4)])
2 | ip <- ip[is.na(ip$Priority),1:2,drop=FALSE]
3 | write.table(ip, quote=FALSE, sep="==", row.names=FALSE, col.names=FALSE)


--------------------------------------------------------------------------------
/tests/test_graphics.R:
--------------------------------------------------------------------------------
 1 | context("graphics")
 2 | 
 3 | test_that("plot", {
 4 |   testImage <- "/working/base_graphics_test.jpg"
 5 |   jpeg(testImage)
 6 |   plot(runif(10))
 7 |   dev.off()
 8 |   expect_true(file.exists(testImage))
 9 | })
10 | 


--------------------------------------------------------------------------------
/tests/test_tidymodels.R:
--------------------------------------------------------------------------------
 1 | context("tidymodels")
 2 | 
 3 | test_that("tidymodels exists", {
 4 | 	library(tidymodels)
 5 | })
 6 | 
 7 | test_that("broom", {
 8 | 	library(broom)
 9 | 	fit <- lm(mpg ~ wt, mtcars)
10 |         expect_equal(ncol(tidy(fit)), 5)
11 | })
12 | 


--------------------------------------------------------------------------------
/tests/test_tesseract.R:
--------------------------------------------------------------------------------
 1 | context("tesseract")
 2 | 
 3 | test_that("ocr", {
 4 | 	library(tesseract)
 5 | 	eng <- tesseract("eng")
 6 | 	fpath <- file.path('/input/tests/data/testocr.png')
 7 | 	text <- tesseract::ocr(fpath, engine = eng)
 8 | 	expect_match(text, "This is a lot of 12 point text")
 9 | })
10 | 


--------------------------------------------------------------------------------
/tests/test_topicmodels.R:
--------------------------------------------------------------------------------
 1 | context("topicmodels")
 2 | 
 3 | test_that("basic topic model example", {
 4 | 	expect_error({
 5 | 		library(topicmodels)
 6 |         	data("AssociatedPress")
 7 | 		ap_lda <- LDA(AssociatedPress, k = 2, control = list(seed = 1234))
 8 | 	}, NA) # expect no error to be thrown
 9 | })
10 | 


--------------------------------------------------------------------------------
/kaggle/template_conf.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "base_template": "classic",
 3 |     "mimetypes": {
 4 |         "text/html": true
 5 |     },
 6 |     "preprocessors": {
 7 |         "100-pygments": {
 8 |             "type": "nbconvert.preprocessors.CSSHTMLHeaderPreprocessor",
 9 |             "enabled": true,
10 |             "style": "default"
11 |         }
12 |     }
13 | }


--------------------------------------------------------------------------------
/tests/test_papermill.R:
--------------------------------------------------------------------------------
 1 | context("papermill")
 2 | 
 3 | test_that("papermill exists", {
 4 | 	expect_error({
 5 | 		library(jsonlite)
 6 | 
 7 | 		results <- system("papermill /input/tests/data/notebook.ipynb -",
 8 | 			intern = TRUE)
 9 | 		json <- fromJSON(results, simplifyVector = FALSE)
10 | 		expect_equal(json$cells[[1]]$outputs[[1]]$text[[1]], "[1] 999\n")
11 |     }, NA) # expect no error to be thrown
12 | })
13 | 


--------------------------------------------------------------------------------
/tests/test_patchwork.R:
--------------------------------------------------------------------------------
 1 | context("patchwork")
 2 | 
 3 | # https://github.com/thomasp85/patchwork#basic-example
 4 | test_that("basic example", {
 5 | 	expect_error({
 6 | 		library(ggplot2)
 7 | 		library(patchwork)
 8 | 
 9 | 		p1 <- ggplot(mtcars) + geom_point(aes(mpg, disp))
10 | 		p2 <- ggplot(mtcars) + geom_boxplot(aes(gear, disp, group = gear))
11 | 
12 | 		p1 + p2
13 | 	}, NA) # expect no error to be thrown
14 | })
15 | 


--------------------------------------------------------------------------------
/tests/test_tensorflow.R:
--------------------------------------------------------------------------------
 1 | context("tensorflow")
 2 | 
 3 | test_that("check gpu device", {
 4 |   check_gpu()
 5 |  
 6 |   library(tensorflow)
 7 |   gpus = tf$config$experimental$list_physical_devices('GPU')
 8 |   expect_gte(length(gpus), 1)
 9 | })
10 | 
11 | test_that("tensorflow with gpu", {
12 |   check_gpu()
13 |  
14 |   library(tensorflow)
15 |   with(tf$device("/gpu:0"), {
16 |       const <- tf$constant(42)
17 |       expect_equal(42, as.integer(const))
18 |   })
19 | })
20 | 


--------------------------------------------------------------------------------
/tests/test_torch.R:
--------------------------------------------------------------------------------
 1 | context("torch")
 2 | 
 3 | test_that("cpu imports", {
 4 |   library(torch)
 5 |   x <- array(runif(8), dim = c(2, 2, 2))
 6 |   y <- torch_tensor(x, dtype = torch_float64())
 7 |   expect_identical(x, as_array(y))
 8 | })
 9 | 
10 | test_that("gpu imports", {
11 |   check_gpu()
12 |  
13 |   library(torch)
14 |   x <- array(runif(8), dim = c(2, 2, 2))
15 |   y <- torch_tensor(x, dtype = torch_float64(), device = "cuda")
16 |   expect_identical(x, as_array(y$cpu()))
17 | })
18 | 
19 | 


--------------------------------------------------------------------------------
/nbconvert-extensions.tpl:
--------------------------------------------------------------------------------
 1 | {# 
 2 | Jinja template to inject notebook cell metadata to enhance generated HTML output
 3 | All cell metadata starting with '_kg_' will be included with its value ({key}-{value}) 
 4 | as a class in the cell's DIV container
 5 | #}
 6 |     
 7 | {% extends 'classic/index.html.j2'%}
 8 | {% block any_cell %}
 9 |     <div class="{% for k in cell['metadata'] if k.startswith("_kg_") %}{{k}}-{{cell['metadata'][k] | lower}} {% endfor %}">
10 |         {{ super() }}
11 |     </div>
12 | {% endblock any_cell %}


--------------------------------------------------------------------------------
/testthat.R:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | # Usage: testthat.R [TEST_FILE]
 3 | #
 4 | # TEST_FILE Run tests for the specified TEST_FILE (e.g. 'test_keras.R').
 5 | #
 6 | library("testthat")
 7 | 
 8 | args = commandArgs(trailingOnly=TRUE)
 9 | 
10 | check_gpu <- function() {
11 |   if (Sys.getenv("CUDA_VERSION") == "") {
12 |     skip("Skipping GPU tests for CPU image")
13 |   }
14 | }
15 | 
16 | if (length(args)==1) {
17 |   testthat::test_file(paste("/input/tests", args[1], sep="/"))
18 | } else {
19 |   testthat::test_dir("/input/tests", stop_on_failure=TRUE)
20 | }
21 | 


--------------------------------------------------------------------------------
/tests/test_languageserver.R:
--------------------------------------------------------------------------------
 1 | context("languageserver")
 2 | 
 3 | test_that("languageserver responds to commands", {
 4 |     expect_error({
 5 |         result <- system(
 6 |             "R -e 'languageserver::run()'",
 7 |             input="Content-Length: 38\n\n{ \"id\": \"123\", \"method\": \"shutdown\" }\n",
 8 |             intern=TRUE)
 9 | 
10 |         found_response <- FALSE
11 |         for (line in result) {
12 |             if (grepl("\"id\":\"123\"", line, fixed=TRUE) & grepl("\"result\":[]", line, fixed=TRUE)) {
13 |                 found_response <- TRUE
14 |             }
15 |         }
16 | 
17 |         expect_true(found_response)
18 | 	}, NA) # expect no error to be thrown
19 | })


--------------------------------------------------------------------------------
/clean-layer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #
 3 | # This scripts should be called at the end of each RUN command
 4 | # in the Dockerfiles.
 5 | #
 6 | # Each RUN command creates a new layer that is stored separately.
 7 | # At the end of each command, we should ensure we clean up downloaded
 8 | # archives and source files used to produce binary to reduce the size
 9 | # of the layer.
10 | set -e
11 | set -x
12 | 
13 | # Delete files that pip caches when installing a package.
14 | rm -rf /root/.cache/pip/*
15 | # Delete old downloaded archive files 
16 | apt-get autoremove -y
17 | # Delete downloaded archive files
18 | apt-get clean
19 | # Delete source files used for building binaries
20 | rm -rf /usr/local/src/*
21 | 


--------------------------------------------------------------------------------
/tests/data/notebook.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cells": [
 3 |     {
 4 |       "metadata": {
 5 |         "trusted": true
 6 |       },
 7 |       "cell_type": "code",
 8 |       "source": "x <- 999\nprint(x)",
 9 |       "execution_count": null,
10 |       "outputs": []
11 |     }
12 |   ],
13 |   "metadata": {
14 |     "kernelspec": {
15 |       "display_name": "R",
16 |       "language": "R",
17 |       "name": "ir"
18 |     },
19 |     "language_info": {
20 |       "mimetype": "text/x-r-source",
21 |       "name": "R",
22 |       "pygments_lexer": "r",
23 |       "version": "3.4.2",
24 |       "file_extension": ".r",
25 |       "codemirror_mode": "r"
26 |     }
27 |   },
28 |   "nbformat": 4,
29 |   "nbformat_minor": 4
30 | }


--------------------------------------------------------------------------------
/bioconductor_installs.R:
--------------------------------------------------------------------------------
 1 | options(repos = c("CRAN" = "http://cran.us.r-project.org"))
 2 | options(Ncpus = parallel::detectCores())
 3 | 
 4 | if("devtools" %in% rownames(installed.packages()) == FALSE)
 5 | 	install.packages("devtools")
 6 | library(devtools)
 7 | 
 8 | if (!requireNamespace("BiocManager", quietly = TRUE))
 9 |     install.packages("BiocManager")
10 | 
11 | BiocManager::install(update=FALSE, ask=FALSE)
12 | BiocManager::install("BiocGenerics", update=FALSE, ask=FALSE)
13 | install_version("locfit", version = "1.5.9.4", ask=FALSE)
14 | BiocManager::install("EBImage", update=FALSE, ask=FALSE)
15 | BiocManager::install("rhdf5", update=FALSE, ask=FALSE)
16 | BiocManager::install("limma", update=FALSE, ask=FALSE)
17 | 


--------------------------------------------------------------------------------
/tests/test_gg.R:
--------------------------------------------------------------------------------
 1 | context("gg* packages")
 2 | 
 3 | test_that("gganimate", {
 4 |   expect_error({
 5 |     library("gganimate")
 6 |     library("gapminder")
 7 |     
 8 |     testPlot2 <- ggplot(gapminder,
 9 |                       aes(gdpPercap, lifeExp, size = pop, color = continent, frame = year),
10 |                       transition_states(gear, transition_length = 2, state_length = 1)) +
11 |     geom_point() +
12 |     scale_x_log10()
13 |   }, NA) # expect no error to be thrown
14 | })
15 | 
16 | test_that("ggplot", {
17 |   testImage <- "/working/ggplot_test.png"
18 |   library("ggplot2")
19 |   testPlot1 <- ggplot(data.frame(x=1:10,y=runif(10))) + aes(x=x,y=y) + geom_line()
20 |   ggsave(testPlot1, filename=testImage)
21 |   expect_true(file.exists(testImage))
22 | })
23 | 


--------------------------------------------------------------------------------
/tests/test_jupyterlab-lsp.R:
--------------------------------------------------------------------------------
 1 | context("jupyterlab-lsp")
 2 | 
 3 | library(httr)
 4 | 
 5 | test_that("jupyterlab-lsp is installed", {
 6 | 	expect_error({
 7 | 		# Start a jupyterlab server and wait for it to initialize
 8 | 		system(
 9 | 			"/usr/local/bin/jupyter server --allow-root --no-browser --port 9999 --notebook-dir /tmp",
10 | 			wait=FALSE)
11 | 		
12 | 		code <- 0
13 | 		for (x in 1:5) {
14 | 			# Ping LSP endpoint, verify 200 response
15 | 			print("ping lsp server...")
16 | 			response <- try(GET("http://localhost:9999/lsp/status"))
17 | 			if (class(response) == "response")
18 | 				code <- status_code(response)
19 | 
20 | 			if (code == 200) {
21 | 				break
22 | 			}
23 | 
24 | 			Sys.sleep(5)
25 | 		}
26 | 		expect_equal(code, 200)
27 | 
28 | 		# Kill the server
29 | 		pid <- system("ps -ef | grep jupyter | grep 9999 | awk '{print $2}' | head -n 1", intern = TRUE)
30 | 		tools::pskill(pid)
31 | 	}, NA) # expect no error to be thrown
32 | })
33 | 


--------------------------------------------------------------------------------
/ldpaths:
--------------------------------------------------------------------------------
 1 | : ${JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64}
 2 | : ${R_JAVA_LD_LIBRARY_PATH=${JAVA_HOME}/lib/server}
 3 | if test -n "/usr/local/lib"; then
 4 | : ${R_LD_LIBRARY_PATH=${R_HOME}/lib:/usr/local/lib}
 5 | else
 6 | : ${R_LD_LIBRARY_PATH=${R_HOME}/lib}
 7 | fi
 8 | if test -n "${R_JAVA_LD_LIBRARY_PATH}"; then
 9 |   R_LD_LIBRARY_PATH="${R_LD_LIBRARY_PATH}:${R_JAVA_LD_LIBRARY_PATH}"
10 | fi
11 | ## This is DYLD_FALLBACK_LIBRARY_PATH on Darwin (macOS) and
12 | ## LD_LIBRARY_PATH elsewhere.
13 | ## However, on macOS >=10.11 (if SIP is enabled, the default), the
14 | ## environment value will not be passed to a script such as R.sh, so
15 | ## would not seen here.
16 | if test -z "${LD_LIBRARY_PATH}"; then
17 |   LD_LIBRARY_PATH="${R_LD_LIBRARY_PATH}"
18 | else
19 |   LD_LIBRARY_PATH="${R_LD_LIBRARY_PATH}:${LD_LIBRARY_PATH}"
20 | fi
21 | if test -n "/usr/lib/x86_64-linux-gnu"; then
22 | : ${LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/usr/lib/x86_64-linux-gnu}
23 | fi
24 | export LD_LIBRARY_PATH


--------------------------------------------------------------------------------
/tests/test_nbconvert.R:
--------------------------------------------------------------------------------
 1 | context("nbconvert")
 2 | 
 3 | test_that("nbconvert to notebook", {
 4 | 	expect_error({
 5 | 		library(jsonlite)
 6 | 
 7 | 		results <- system("jupyter nbconvert --to notebook --template /opt/kaggle/nbconvert-extensions.tpl --execute --stdout /input/tests/data/notebook.ipynb",
 8 | 			intern = TRUE)
 9 | 		json <- fromJSON(results, simplifyVector = FALSE)
10 | 		expect_equal(json$cells[[1]]$outputs[[1]]$text[[1]], "[1] 999\n")
11 |     }, NA) # expect no error to be thrown
12 | })
13 | 
14 | test_that("nbconvert to html", {
15 | 	expect_error({
16 | 		results <- system("jupyter nbconvert --to html --stdout --template /opt/kaggle/nbconvert-extensions.tpl --Exporter.preprocessors=[\\\"nbconvert.preprocessors.ExtractOutputPreprocessor\\\"] \"/input/tests/data/notebook.ipynb\"",
17 | 			intern = TRUE)
18 | 		expect_match(toString(results), ".*>999<.*")  # [...] <span class="n">x</span> <span class="o">&lt;-</span> <span class="m">999</span> [...]
19 |     }, NA) # expect no error to be thrown
20 | })


--------------------------------------------------------------------------------
/RProfile.R:
--------------------------------------------------------------------------------
 1 | options(repos = list(CRAN = "http://cran.rstudio.com/"))
 2 | 
 3 | options(device = function() png(width = 900))
 4 | 
 5 | # Suppressing package startup messages in package loads
 6 | # WART: this appears dangerous and is likely the source of
 7 | #       future tough-to-debug bugs
 8 | #  (removing this for now as it caused issues with the gbm package)
 9 | # env <- as.environment('package:base')
10 | # unlockBinding('library', env)
11 | # library.warn <- library
12 | # utils::assignInNamespace('library', function(
13 | #   package, help, pos = 2, lib.loc = NULL, character.only = FALSE,
14 | #   logical.return = FALSE, warn.conflicts = TRUE, quietly = FALSE,
15 | #   verbose = getOption("verbose")) {
16 | #   if (!character.only) {
17 | #     package <- as.character(substitute(package))
18 | #   }
19 | 
20 | #   suppressPackageStartupMessages(library.warn(
21 | #     package, help, pos, lib.loc, character.only = TRUE,
22 | #     logical.return, warn.conflicts, quietly, verbose))
23 | # }, ns="base")
24 | # lockBinding('library', env)
25 | 
26 | # Needed to make plots in rendered iR notebooks display correctly
27 | options(jupyter.plot_mimetypes = "image/png")
28 | 
29 | source("/kaggle/kaggle_bigquery.R")
30 | source("/kaggle/kaggle_secrets.R")
31 | # Ensure the file ends in a newline
32 | # https://yihui.name/en/2018/04/rprofile-trailing-newline/.
33 | 


--------------------------------------------------------------------------------
/tests/test_imports.R:
--------------------------------------------------------------------------------
 1 | context("import")
 2 | 
 3 | Library <- function(libname){
 4 |   print(libname)
 5 |   suppressPackageStartupMessages(library(libname, character.only=TRUE))
 6 | }
 7 | 
 8 | # Add packages to that list to ensure they are installed on the image
 9 | # and prevent future regression.
10 | test_that("imports", {
11 |   import_pkgs <- function() {
12 |     Library("bitops")
13 |     Library("colorspace")
14 |     Library("dichromat")
15 |     Library("digest")
16 |     Library("dplyr")
17 |     Library("fftw")
18 |     Library("fslr")
19 |     Library("ggforce")
20 |     Library("ggrepel")
21 |     Library("gtable")
22 |     Library("hrbrthemes")
23 |     Library("imager")
24 |     Library("knitr")
25 |     Library("labeling")
26 |     Library("lightgbm")
27 |     Library("mime")
28 |     Library("munsell")
29 |     Library("plyr")
30 |     Library("proto")
31 |     Library("randomForest")
32 |     Library("RColorBrewer")
33 |     Library("Rcpp")
34 |     Library("RCurl")
35 |     Library("readr")
36 |     Library("reshape2")
37 |     Library("rstan")
38 |     Library("Rtsne")
39 |     Library("scales")
40 |     Library("seewave")
41 |     Library("stringr")
42 |     Library("tesseract")
43 |     Library("tidyr")
44 |     Library("xgboost")
45 |     Library("zoo")
46 | 
47 |     # bioconductor
48 |     Library("BiocGenerics")
49 |     Library("EBImage")
50 |     Library("limma")
51 |     Library("rhdf5")
52 |   }
53 | 
54 |   # expect no error to be thrown
55 |   expect_error(import_pkgs(), NA) 
56 | })
57 | 


--------------------------------------------------------------------------------
/push:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | usage() {
 5 | cat << EOF
 6 | Usage: $0 [OPTIONS] [LABEL]
 7 | Push a newly-built image with the given LABEL to gcr.io and DockerHub.
 8 | Options:
 9 |     -g, --gpu                   Push the image with GPU support.
10 |     -s, --source-image IMAGE    Tag for the source image. 
11 | EOF
12 | }
13 | 
14 | SOURCE_IMAGE_TAG='kaggle/rstats-build:latest'
15 | SOURCE_IMAGE_TAG_OVERRIDE=''
16 | TARGET_IMAGE='gcr.io/kaggle-images/rstats'
17 | 
18 | while :; do
19 |     case "$1" in 
20 |         -h|--help)
21 |             usage
22 |             exit
23 |             ;;
24 |         -g|--gpu)
25 |             SOURCE_IMAGE_TAG='kaggle/rstats-gpu-build:latest'
26 |             TARGET_IMAGE='gcr.io/kaggle-private-byod/rstats'
27 |             ;;
28 |         -s|--source-image)
29 |             if [[ -z $2 ]]; then
30 |                 usage
31 |                 printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2
32 |                 exit
33 |             fi
34 |             SOURCE_IMAGE_TAG_OVERRIDE=$2
35 |             shift # skip the flag value
36 |             ;;
37 |         -?*)
38 |             usage
39 |             printf 'ERROR: Unknown option: %s\n' "$1" >&2
40 |             exit
41 |             ;;
42 |         *)            
43 |             break
44 |     esac
45 | 
46 |     shift
47 | done
48 | 
49 | LABEL=${1:-testing}
50 | 
51 | if [[ -n "$SOURCE_IMAGE_TAG_OVERRIDE" ]]; then
52 |     SOURCE_IMAGE_TAG="$SOURCE_IMAGE_TAG_OVERRIDE"
53 | fi
54 | 
55 | readonly SOURCE_IMAGE_TAG
56 | readonly TARGET_IMAGE
57 | readonly LABEL
58 | 
59 | set -x
60 | 
61 | docker tag "${SOURCE_IMAGE_TAG}" "${TARGET_IMAGE}:${LABEL}"
62 | gcloud docker -- push "${TARGET_IMAGE}:${LABEL}"
63 | 


--------------------------------------------------------------------------------
/kaggle/kaggle_secrets.R:
--------------------------------------------------------------------------------
 1 | # This library adds support for User Secrets, which can be added to
 2 | # the Notebook by selecting Add-Ons toolbar -> Secrets.
 3 | #
 4 | # Sample user code:
 5 | #
 6 | # paste(get_user_secret('r_secret'))
 7 | 
 8 | get_user_secret <- function(label) {
 9 |     KAGGLE_USER_SECRETS_TOKEN <- Sys.getenv("KAGGLE_USER_SECRETS_TOKEN")
10 |     KAGGLE_BASE_URL <- Sys.getenv("KAGGLE_URL_BASE")
11 |     KAGGLE_IAP_TOKEN <- Sys.getenv("KAGGLE_IAP_TOKEN")
12 |     GET_USER_SECRET_BY_LABEL_ENDPOINT = "/requests/GetUserSecretByLabelRequest"
13 | 
14 |     if (KAGGLE_USER_SECRETS_TOKEN == '') {
15 |       stop("Expected KAGGLE_USER_SECRETS_TOKEN environment variable to be present.", call. = FALSE)
16 |     }
17 |     request_body <- list(Label = label)
18 |     auth_header <- paste0("Bearer ", KAGGLE_USER_SECRETS_TOKEN)
19 |     if (KAGGLE_IAP_TOKEN != '') {
20 |         iap_auth_header <- paste0("Bearer ", KAGGLE_IAP_TOKEN)
21 |         headers <- add_headers(c("X-Kaggle-Authorization" = auth_header, "Authorization" = iap_auth_header))
22 |     } else {
23 |         headers <- add_headers(c("X-Kaggle-Authorization" = auth_header))
24 |     }
25 |     response <- POST(
26 |       paste0(KAGGLE_BASE_URL, GET_USER_SECRET_BY_LABEL_ENDPOINT),
27 |       headers,
28 |       # Reset the cookies on each request, since the server expects none.
29 |       handle = handle(''),
30 |       body = request_body,
31 |       encode = "json"
32 |     )
33 |     if (http_error(response) || !identical(content(response)$wasSuccessful, TRUE)) {
34 |       err <- paste("Unable to get user secret. Please ensure you have internet enabled. Error: ",
35 |                         paste(content(response, "text", encoding = 'utf-8')))
36 |       stop(err, call. = FALSE)
37 |     }
38 |     response_body <- content(response)
39 |     return(response_body$result$secret)
40 | }
41 | 


--------------------------------------------------------------------------------
/package_installs.R:
--------------------------------------------------------------------------------
 1 | library(devtools)
 2 | options(repos = c("CRAN" = "http://cran.us.r-project.org"))
 3 | options(Ncpus = parallel::detectCores())
 4 | 
 5 | # Set download method, to avoid the default behavior of using
 6 | # R's internal HTTP implementation, which doesn't support HTTPS connections.
 7 | # https://stackoverflow.com/questions/45061272/r-and-ssl-curl-on-ubuntu-linux-failed-ssl-connect-in-r-but-works-in-curl
 8 | options(download.file.method = "libcurl")
 9 | 
10 | # Install the lightGBM installer package
11 | install_github("Laurae2/lgbdl")
12 | lgbdl::lgb.dl(compiler = "gcc", commit = "tags/v2.3.1")
13 | 
14 | install_github("dgrtwo/widyr")
15 | install_github("ellisp/forecastxgb-r-package/pkg")
16 | install_github("rstudio/leaflet")
17 | # install_github fails for catboost.
18 | # Following direct installation instructions instead: https://tech.yandex.com/catboost/doc/dg/installation/r-installation-binary-installation-docpage/
19 | install_url('https://github.com/catboost/catboost/releases/download/v0.23.2/catboost-R-Linux-0.23.2.tgz', INSTALL_opts = c("--no-multiarch"))
20 | install_github("sassalley/hexmapr")
21 | install_github("hadley/multidplyr")
22 | install_github("dselivanov/LSHR")
23 | 
24 | # install latest sparklyr and Spark (for local mode)
25 | install_github("rstudio/sparklyr")
26 | sparklyr::spark_install()
27 | 
28 | install.packages("genderdata", repos = "http://packages.ropensci.org")
29 | 
30 | install.packages("openNLPmodels.en",
31 |                  repos = "http://datacube.wu.ac.at/",
32 |                  type = "source")
33 | 
34 | install_github("davpinto/fastknn")
35 | install_github("mukul13/rword2vec")
36 | 
37 | # b/232137539 Removed from RCRAN but required for Neurohacking in R coursera course
38 | install_github("muschellij2/neurobase")
39 | install_github("muschellij2/fslr")
40 | 
41 | # These signal processing libraries are on CRAN, but they require apt-get dependences that are
42 | # handled in this image's Dockerfile.
43 | install.packages("fftw")
44 | 
45 | # https://github.com/Kaggle/docker-rstats/issues/74
46 | install_github("thomasp85/patchwork")
47 | 
48 | # https://github.com/Kaggle/docker-rstats/issues/73
49 | install.packages("topicmodels")
50 | 
51 | install.packages("tesseract")
52 | 
53 | # Try to reinstall igraph and imager her until fixed in rcran.
54 | install.packages("igraph")
55 | install.packages("imager")
56 | 
57 | # Torch: install the full package upfront otherwise it will be installed on loading the package which doesn't work for kernels
58 | # without internet (competitions for example).
59 | install.packages("torch")
60 | library(torch)
61 | install_torch()
62 | 
63 | install.packages(c('collections', 'languageserver'), dependencies=TRUE)
64 | 
65 | # The tfhub package is added to the rcran image.
66 | library(tfhub)
67 | install_tfhub()
68 | 


--------------------------------------------------------------------------------
/diff:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e
 3 | 
 4 | usage() {
 5 | cat << EOF
 6 | Usage: $0 [OPTIONS]
 7 | Compare a given Docker image package versions against the prod image.
 8 | 
 9 | Options:
10 |     -g, --gpu       Compare GPU images.
11 |     -b, --base      The base image to diff against.
12 |     -t, --target    The image to diff against the base image.
13 |                     Default is the locally built image.
14 | EOF
15 | }
16 | 
17 | 
18 | BASE_IMAGE_TAG='gcr.io/kaggle-images/rstats:latest'
19 | BASE_IMAGE_TAG_OVERRIDE=''
20 | TARGET_IMAGE_TAG='kaggle/rstats-build'
21 | TARGET_IMAGE_TAG_OVERRIDE=''
22 | 
23 | while :; do
24 |     case "$1" in 
25 |         -h|--help)
26 |             usage
27 |             exit
28 |             ;;
29 |         -g|--gpu)
30 |             BASE_IMAGE_TAG='gcr.io/kaggle-private-byod/rstats:latest'
31 |             TARGET_IMAGE_TAG='kaggle/rstats-gpu-build'
32 |             ;;
33 |         -b|--base)
34 |             if [[ -z "$2" ]]; then
35 |                 usage
36 |                 printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2
37 |                 exit
38 |             fi
39 |             BASE_IMAGE_TAG_OVERRIDE="$2"
40 |             shift # skip the flag value
41 |             ;;
42 |         -t|--target)
43 |             if [[ -z "$2" ]]; then
44 |                 usage
45 |                 printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2
46 |                 exit
47 |             fi
48 |             TARGET_IMAGE_TAG_OVERRIDE="$2"
49 |             shift # skip the flag value
50 |             ;;
51 |         -?*)
52 |             usage
53 |             printf 'ERROR: Unknown option: %s\n' "$1" >&2
54 |             exit
55 |             ;;
56 |         *)            
57 |             break
58 |     esac
59 | 
60 |     shift
61 | done
62 | 
63 | if [[ -n "$BASE_IMAGE_TAG_OVERRIDE" ]]; then
64 |     BASE_IMAGE_TAG="$BASE_IMAGE_TAG_OVERRIDE"
65 | fi
66 | 
67 | if [[ -n "$TARGET_IMAGE_TAG_OVERRIDE" ]]; then
68 |     TARGET_IMAGE_TAG="$TARGET_IMAGE_TAG_OVERRIDE"
69 | fi
70 | 
71 | readonly BASE_IMAGE_TAG
72 | readonly TARGET_IMAGE_TAG
73 | 
74 | echo "Base: $BASE_IMAGE_TAG"
75 | echo "Target: $TARGET_IMAGE_TAG"
76 | 
77 | if [[ "$BASE_IMAGE_TAG" == "gcr.io/"* ]]; then
78 |     docker pull "$BASE_IMAGE_TAG"
79 | fi
80 | 
81 | CMDS=("Rscript /tools/r_list_versions.R | sort" "pip freeze" 'cat /etc/os-release | grep -oP "PRETTY_NAME=\"\K([^\"]*)"' "uname -r" "dpkg --list | awk '{print \$2\"==\"\$3}'")
82 | for cmd in "${CMDS[@]}"; do
83 |     echo "== Comparing $cmd =="
84 |     diff --suppress-common-lines --side-by-side \
85 |         <(docker run -v $PWD/tools:/tools --rm "$BASE_IMAGE_TAG" /bin/bash -c "$cmd") \
86 |         <(docker run -v $PWD/tools:/tools --rm "$TARGET_IMAGE_TAG" /bin/bash -c "$cmd") \
87 |         && echo 'No diff' || true
88 | done
89 | 


--------------------------------------------------------------------------------
/kaggle/kaggle_bigquery.R:
--------------------------------------------------------------------------------
 1 | # This library adds support for BigQuery (via the bigrquery library), by using
 2 | # Kaggle's UserSecrets service to retrieve an OAuth access token for the connected
 3 | # credentials attached to the running Kernel.
 4 | #
 5 | # Sample user code:
 6 | #
 7 | # project <- "yes-theory-1" # put your project ID here
 8 | # sql <- "SELECT year, month, day, weight_pounds FROM [publicdata:samples.natality] LIMIT 5"
 9 | # query_exec(sql, project = project)
10 | 
11 | KAGGLE_USER_SECRETS_TOKEN <- Sys.getenv("KAGGLE_USER_SECRETS_TOKEN")
12 | KAGGLE_BASE_URL <- Sys.getenv("KAGGLE_URL_BASE")
13 | KAGGLE_IAP_TOKEN <- Sys.getenv("KAGGLE_IAP_TOKEN")
14 | GET_USER_SECRET_ENDPOINT = "/requests/GetUserSecretRequest"
15 | 
16 | # We create a Token2.0 Credential object (from httr library) and use bigrquery's set_access_cred
17 | # to override the interactive authentication (https://github.com/r-dbi/bigrquery/blob/master/R/auth.R).
18 | library(httr)
19 | TokenBigQueryKernel <- R6::R6Class("TokenBigQueryKernel", inherit = Token2.0, list(
20 |   params = list(as_header = TRUE),
21 |   endpoint = oauth_endpoints("google"),
22 |   initialize = function() {
23 |   },
24 |   can_refresh = function() {
25 |     TRUE
26 |   },
27 |   refresh = function() {
28 |     if (KAGGLE_USER_SECRETS_TOKEN == '') {
29 |       stop("Expected KAGGLE_USER_SECRETS_TOKEN environment variable to be present.", call. = FALSE)
30 |     }
31 |     request_body <- list(Target = 1)
32 |     auth_header <- paste0("Bearer ", KAGGLE_USER_SECRETS_TOKEN)
33 |     if (KAGGLE_IAP_TOKEN != '') {
34 |         iap_auth_header <- paste0("Bearer ", KAGGLE_IAP_TOKEN)
35 |         headers <- add_headers(c("X-Kaggle-Authorization" = auth_header, "Authorization" = iap_auth_header))
36 |     } else {
37 |         headers <- add_headers(c("X-Kaggle-Authorization" = auth_header))
38 |     }
39 |     response <- POST(paste0(KAGGLE_BASE_URL, GET_USER_SECRET_ENDPOINT),
40 |                      headers,
41 |                      # Reset the cookies on each request, since the server expects none.
42 |                      handle = handle(''),
43 |                      body = request_body,
44 |                      encode = "json")
45 |     if (http_error(response) || !identical(content(response)$wasSuccessful, TRUE)) {
46 |       err <- paste("Unable to refresh token. Please ensure you have a connected BigQuery account. Error: ",
47 |                         paste(content(response, "text", encoding = 'utf-8')))
48 |       stop(err, call. = FALSE)
49 |     }
50 |     response_body <- content(response)
51 |     self$credentials$access_token <- response_body$result$secret
52 |     self
53 |   },
54 |   # Never cache
55 |   cache = function(path) self,
56 |   load_from_cache = function() self
57 | ))
58 | 
59 | library(bigrquery)
60 | # A hack to allow users to use bigrquery directly. The "correct" way would be to use:
61 | # `bq_auth(scopes = NULL, token = TokenBigQueryKernel$new())`, but that would force auth immediately,
62 | # which would slow kernels starting and could cause errors on startup.
63 | auth <- getNamespace("bigrquery")$.auth
64 | auth$set_cred(TokenBigQueryKernel$new())
65 | auth$set_auth_active(TRUE)
66 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # docker-rstats
 2 | 
 3 | [Kaggle Notebooks](https://www.kaggle.com/notebooks) allow users to run scripts against our competitions and datasets without having to download data or set up their environment. 
 4 | 
 5 | Our R Docker images are stored on Google Container Registry at:
 6 | 
 7 | * CPU-only: [gcr.io/kaggle-images/rstats](https://gcr.io/kaggle-images/rstats)
 8 | * GPU: [gcr.io/kaggle-gpu-images/rstats](https://gcr.io/kaggle-gpu-images/rstats)
 9 | 
10 | Here's [an example](https://www.kaggle.com/benhamner/bike-sharing-demand/bike-rentals-by-time-and-temperature):
11 | 
12 | ![example script](http://i.imgur.com/Hk703P7.png)
13 | 
14 | This is the Dockerfile (etc.) used for building the image that runs R scripts on Kaggle. [Here's](https://registry.hub.docker.com/u/kaggle/rstats/) the Docker image on Dockerhub.
15 | 
16 | ## Getting started
17 | 
18 | To get started with this image, read our [guide](http://blog.kaggle.com/2016/02/05/how-to-get-started-with-data-science-in-containers/) to using it yourself, or browse [Kaggle Notebooks](https://www.kaggle.com/notebooks) for ideas.
19 | 
20 | ## Requesting new features
21 | 
22 | **We welcome pull requests** if there are any packages you'd like to add!
23 | 
24 | We can merge your request quickly if you check that it builds correctly. Here's how to do that.
25 | 
26 | ### New R libraries
27 | 
28 | If you want a library that's, say, on GitHub but not yet on CRAN, then you can add it to [`package_installs.R`](https://github.com/Kaggle/docker-rstats/blob/master/package_installs.R). To check that it will work, you can follow this example, which shows how to add a library called `coolstuff` that's available from GitHub user `nerdcha`.
29 | 
30 | ```bash
31 | me@my-computer:/home$ docker run --rm -it kaggle/rstats
32 | R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
33 | [...etc...]
34 | > library(devtools)
35 | > install_github("nerdcha/coolstuff")
36 | Downloading GitHub repo nerdcha/coolstuff@master
37 | [...etc...]
38 | ** testing if installed package can be loaded
39 | * DONE (coolstuff)
40 | > library(coolstuff)
41 | >
42 | ```
43 | 
44 | Everything worked, so we can add the line `install_github("nerdcha/coolstuff")` to `package_installs.R` and submit the pull request.
45 | 
46 | ### New libraries with complex dependencies
47 | 
48 | Some libraries will need extra system support to work. Installing them follows a pretty similar pattern; just try whatever prerequisites the package maintainer says are needed for a Linux system. For example, if the `coolstuff` package says to run `apt-get install libcool-dev` first, then you can test it in the following way.
49 | 
50 | ```bash
51 | me@my-computer:/home$ docker run --rm -it kaggle/rstats /bin/bash
52 | root@2dd4317c8799:/# apt-get update
53 | Ign:1 http://ftp.de.debian.org/debian jessie InRelease
54 | [...]
55 | root@2dd4317c8799:/# apt-get install libcool-dev
56 | Reading package lists... Done
57 | [...]
58 | root@2dd4317c8799:/# R
59 | R version 3.3.1 (2016-06-21) -- "Bug in Your Hair"
60 | [...]
61 | > library(devtools)
62 | > install_github("nerdcha/coolstuff")
63 | Downloading GitHub repo nerdcha/coolstuff@master
64 | [...]
65 | ** testing if installed package can be loaded
66 | * DONE (coolstuff)
67 | > library(coolstuff)
68 | >
69 | ```
70 | 
71 | If that's all working as expected, then you can add `apt-get install libcool-dev` to the end of the [`Dockerfile`](https://github.com/Kaggle/docker-rstats/blob/master/Dockerfile), and `install_github("nerdcha/coolstuff")` to `package_installs.R`.
72 | 
73 | 


--------------------------------------------------------------------------------
/test:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | set -e
  3 | 
  4 | IMAGE_TAG='kaggle/rstats-build'
  5 | IMAGE_TAG_OVERRIDE=''
  6 | ADDITONAL_OPTS=''
  7 | TEST_FILE=''
  8 | 
  9 | usage() {
 10 | cat << EOF
 11 | Usage: $0 [OPTIONS]
 12 | Run tests for a newly-built R Docker image.
 13 | By default, it runs the tests for the CPU image.
 14 | Options:
 15 |     -g, --gpu               Run tests for the GPU image.
 16 |     -i, --image IMAGE       Run tests against the specified image
 17 |     -t, --test_file FILENAME Run all tests for the specified file (e.g. test_keras.R)
 18 | EOF
 19 | }
 20 | 
 21 | while :; do
 22 |     case "$1" in 
 23 |         -h|--help)
 24 |             usage
 25 |             exit
 26 |             ;;
 27 |         -g|--gpu)
 28 |             IMAGE_TAG='kaggle/rstats-gpu-build'
 29 |             ADDITONAL_OPTS='-v /tmp/empty_dir:/usr/local/cuda/lib64/stubs:ro'
 30 |             ;;
 31 |         -i|--image)
 32 |             if [[ -z $2 ]]; then
 33 |                 usage
 34 |                 printf 'ERROR: No IMAGE specified after the %s flag.\n' "$1" >&2
 35 |                 exit
 36 |             fi
 37 |             IMAGE_TAG_OVERRIDE=$2
 38 |             shift # skip the flag value
 39 |             ;;
 40 |         -t|--test_file)
 41 |             if [[ -z $2 ]]; then
 42 |                 usage
 43 |                 printf 'ERROR: No FILENAME specified after the %s flag.\n' "$1" >&2
 44 |                 exit
 45 |             fi
 46 |             TEST_FILE=$2
 47 |             shift # skip the flag value
 48 |             ;;
 49 |         -?*)
 50 |             usage
 51 |             printf 'ERROR: Unknown option: %s\n' "$1" >&2
 52 |             exit
 53 |             ;;
 54 |         *)            
 55 |             break
 56 |     esac
 57 | 
 58 |     shift
 59 | done
 60 | 
 61 | if [[ -n "$IMAGE_TAG_OVERRIDE" ]]; then
 62 |     IMAGE_TAG="$IMAGE_TAG_OVERRIDE"
 63 | fi
 64 | 
 65 | readonly IMAGE_TAG
 66 | readonly ADDITONAL_OPTS
 67 | readonly TEST_FILE
 68 | 
 69 | set -x
 70 | docker rm jupyter_test_r || true
 71 | rm -rf /tmp/rstats-build
 72 | mkdir -p /tmp/rstats-build/tmp
 73 | mkdir -p /tmp/rstats-build/devshm
 74 | mkdir -p /tmp/rstats-build/working
 75 | 
 76 | # Check that Jupyter server can run; if it dies on startup, the `docker kill` command will throw an error
 77 | docker run -d --name=jupyter_test_r --read-only --net=none \
 78 |     -e HOME=/tmp \
 79 |     -e NVIDIA_DISABLE_REQUIRE=1 \
 80 |     -v $PWD:/input:ro -v /tmp/rstats-build/working:/working \
 81 |     -v /tmp/rstats-build/tmp:/tmp -v /tmp/rstats-build/devshm:/dev/shm \
 82 |     -w=/working \
 83 |     "$IMAGE_TAG" jupyter notebook --allow-root --ip="*"
 84 | sleep 3
 85 | docker kill jupyter_test_r && docker rm jupyter_test_r
 86 | 
 87 | # Check that papermill is installed in python (b/191304257).
 88 | docker run --rm -e NVIDIA_DISABLE_REQUIRE=1 --name=papermill_test_r --read-only --net=none \
 89 |     "$IMAGE_TAG" python -c 'import sys;import papermill as pm; print(pm.__version__)'
 90 | 
 91 | 
 92 | # TF_FORCE_GPU_ALLOW_GROWTH is to prevent tensorflow from allocating the totality of a GPU memory.
 93 | # https://stackoverflow.com/questions/34199233/how-to-prevent-tensorflow-from-allocating-the-totality-of-a-gpu-memory/55541385#55541385
 94 | docker run --rm -t --net=none \
 95 |     -e HOME=/tmp \
 96 |     -e TF_FORCE_GPU_ALLOW_GROWTH=true \
 97 |     -e NVIDIA_DISABLE_REQUIRE=1 \
 98 |     -v $PWD:/input:ro -v /tmp/rstats-build/working:/working \
 99 |     -v /tmp/rstats-build/tmp:/tmp -v /tmp/rstats-build/devshm:/dev/shm \
100 |     -w=/working \
101 |     $ADDITONAL_OPTS \
102 |     "$IMAGE_TAG" \
103 |     /bin/bash -c "/input/testthat.R $TEST_FILE"
104 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE_TAG=latest
 2 | 
 3 | FROM gcr.io/kaggle-images/rcran:${BASE_TAG}
 4 | 
 5 | ARG PYTHON_VERSION=3.10
 6 | 
 7 | ADD clean-layer.sh  /tmp/clean-layer.sh
 8 | 
 9 | # Install Python 
10 | RUN apt-get install -y software-properties-common && \
11 |     add-apt-repository ppa:deadsnakes/ppa -y && \
12 |     apt-get update && \
13 |     echo "MOD: python${PYTHON_VERSION}" && \
14 |     apt-get install -y python${PYTHON_VERSION} && \
15 |     ln -sf /usr/bin/python${PYTHON_VERSION} /usr/bin/python && \
16 |     curl -sS https://bootstrap.pypa.io/get-pip.py | python && \
17 |     /tmp/clean-layer.sh    
18 | 
19 | RUN apt-get update && \
20 |     apt-get install -y libzmq3-dev default-jdk && \
21 |     apt-get install -y python${PYTHON_VERSION}-dev python3-venv libcurl4-openssl-dev libssl-dev && \
22 |     pip install jupyter pycurl && \
23 |     # Install older tornado - https://github.com/jupyter/notebook/issues/4437
24 |     pip install "tornado<6" && \
25 |     pip install notebook && \
26 |     pip install nbconvert && \
27 |     R -e 'IRkernel::installspec()' && \
28 |     # Build pyzmq from source instead of using a pre-built binary.
29 |     yes | pip uninstall pyzmq && \
30 |     pip install pyzmq --no-binary pyzmq && \
31 |     cp -r /root/.local/share/jupyter/kernels/ir /usr/local/share/jupyter/kernels && \
32 |     # Make sure Jupyter won't try to "migrate" its junk in a read-only container
33 |     mkdir -p /root/.jupyter/kernels && \
34 |     cp -r /root/.local/share/jupyter/kernels/ir /root/.jupyter/kernels && \
35 |     touch /root/.jupyter/jupyter_nbconvert_config.py && touch /root/.jupyter/migrated && \
36 |     # papermill can replace nbconvert for executing notebooks
37 |     pip install papermill && \
38 |     # b/276358430 fix Jupyter lsp freezing up the jupyter server
39 |     pip install jupyterlab-lsp "jupyter-lsp==1.5.1" && \
40 |     /tmp/clean-layer.sh
41 | 
42 | # Miniconda
43 | ARG MINICONDA_PATH=/root/.local/share/r-miniconda
44 | ARG ENV_NAME=r-reticulate
45 | RUN R -e "reticulate::install_miniconda(path = \"${MINICONDA_PATH}\", update = TRUE, force = TRUE)"
46 | RUN R -e "reticulate::conda_create(envname = \"${ENV_NAME}\", conda = \"auto\", required = TRUE, python_version = \"${PYTHON_VERSION}\")"
47 | ENV RETICULATE_PYTHON="${MINICONDA_PATH}/envs/${ENV_NAME}/bin/python"
48 | 
49 | # Tensorflow and Keras
50 | ARG TENSORFLOW_VERSION=2.12.0
51 | RUN R -e "keras::install_keras(tensorflow = \"${TENSORFLOW_VERSION}\", extra_packages = c(\"pandas\", \"numpy\", \"pycryptodome\"), method=\"conda\", envname=\"${ENV_NAME}\")"
52 | 
53 | # Install kaggle libraries.
54 | # Do this at the end to avoid rebuilding everything when any change is made.
55 | ADD kaggle/ /kaggle/
56 | # RProfile sources files from /kaggle/ so ensure this runs after ADDing it.
57 | ENV R_HOME=/usr/local/lib/R
58 | ADD RProfile.R /usr/local/lib/R/etc/Rprofile.site
59 | ADD install_iR.R  /tmp/install_iR.R
60 | ADD bioconductor_installs.R /tmp/bioconductor_installs.R
61 | ADD package_installs.R /tmp/package_installs.R
62 | ADD nbconvert-extensions.tpl /opt/kaggle/nbconvert-extensions.tpl
63 | ADD kaggle/template_conf.json /opt/kaggle/conf.json
64 | # Install with `--vanilla` flag to avoid conflict. https://support.bioconductor.org/p/57187/
65 | RUN Rscript --vanilla /tmp/package_installs.R
66 | RUN Rscript --vanilla /tmp/bioconductor_installs.R
67 | RUN Rscript --vanilla /tmp/install_iR.R
68 | 
69 | ARG GIT_COMMIT=unknown
70 | ARG BUILD_DATE_RSTATS=unknown
71 | 
72 | LABEL git-commit=$GIT_COMMIT
73 | LABEL build-date=$BUILD_DATE_RSTATS
74 | 
75 | # Find the current release git hash & build date inside the kernel editor.
76 | RUN echo "$GIT_COMMIT" > /etc/git_commit && echo "$BUILD_DATE_RSTATS" > /etc/build_date
77 | 
78 | CMD ["R"]
79 | 


--------------------------------------------------------------------------------
/gpu.Dockerfile:
--------------------------------------------------------------------------------
 1 | ARG BASE_TAG=staging
 2 | FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu18.04 AS nvidia
 3 | FROM gcr.io/kaggle-images/rstats:${BASE_TAG}
 4 | ARG ncpus=1
 5 | 
 6 | ADD clean-layer.sh  /tmp/clean-layer.sh
 7 | 
 8 | # Cuda support
 9 | COPY --from=nvidia /etc/apt/sources.list.d/cuda.list /etc/apt/sources.list.d/
10 | COPY --from=nvidia /etc/apt/trusted.gpg /etc/apt/trusted.gpg.d/cuda.gpg
11 | 
12 | ENV CUDA_MAJOR_VERSION=11
13 | ENV CUDA_MINOR_VERSION=7
14 | ENV CUDA_PATCH_VERSION=0
15 | ENV CUDA_VERSION=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION.$CUDA_PATCH_VERSION
16 | ENV CUDA_PKG_VERSION=$CUDA_MAJOR_VERSION-$CUDA_MINOR_VERSION
17 | ENV CUDNN_VERSION=8.5.0.96
18 | ENV NCCL_VERSION=2.13.4-1
19 | LABEL com.nvidia.volumes.needed="nvidia_driver"
20 | LABEL com.nvidia.cuda.version="${CUDA_VERSION}"
21 | LABEL com.nvidia.cudnn.version="${CUDNN_VERSION}"
22 | ENV PATH=/usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
23 | # The stub is useful to us both for built-time linking and run-time linking, on CPU-only systems.
24 | # When intended to be used with actual GPUs, make sure to (besides providing access to the host
25 | # CUDA user libraries, either manually or through the use of nvidia-docker) exclude them. One
26 | # convenient way to do so is to obscure its contents by a bind mount:
27 | #   docker run .... -v /non-existing-directory:/usr/local/cuda/lib64/stubs:ro ...
28 | ENV LD_LIBRARY_PATH="/usr/local/nvidia/lib64:/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs"
29 | ENV NVIDIA_VISIBLE_DEVICES=all
30 | ENV NVIDIA_DRIVER_CAPABILITIES=compute,utility
31 | ENV NVIDIA_REQUIRE_CUDA="cuda>=$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION"
32 | RUN apt-get update && apt-get install -y --no-install-recommends \
33 |       cuda-cupti-$CUDA_PKG_VERSION \
34 |       cuda-cudart-$CUDA_PKG_VERSION \
35 |       cuda-cudart-dev-$CUDA_PKG_VERSION \
36 |       cuda-libraries-$CUDA_PKG_VERSION \
37 |       cuda-libraries-dev-$CUDA_PKG_VERSION \
38 |       cuda-nvml-dev-$CUDA_PKG_VERSION \
39 |       cuda-minimal-build-$CUDA_PKG_VERSION \
40 |       cuda-command-line-tools-$CUDA_PKG_VERSION \
41 |       libcudnn8=$CUDNN_VERSION-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \
42 |       libcudnn8-dev=$CUDNN_VERSION-1+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \
43 |       libcublas-$CUDA_PKG_VERSION \
44 |       libcublas-dev-$CUDA_PKG_VERSION \
45 |       libnccl2=$NCCL_VERSION+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION \
46 |       libnccl-dev=$NCCL_VERSION+cuda$CUDA_MAJOR_VERSION.$CUDA_MINOR_VERSION && \
47 |     /tmp/clean-layer.sh
48 | 
49 | ENV CUDA_HOME=/usr/local/cuda
50 | 
51 | # Hack to fix R trying to use CUDA in `/usr/lib/x86_64-linux-gnu` directory instead
52 | # of `/usr/local/nvidia/lib64` (b/152401083).
53 | # For some reason, the CUDA file `libcuda.so.418.67` in the former directory is empty.
54 | # R's ldpaths modifies LD_LIBRARY_PATH on start by adding `/usr/lib/x86_64-linux-gnu` upfront.
55 | # Instead, this version of ldpaths adds it at the end.
56 | ADD ldpaths $R_HOME/etc/ldpaths
57 | 
58 | # Install tensorflow with GPU support
59 | ARG TENSORFLOW_VERSION=2.11.0
60 | RUN R -e "keras::install_keras(version = \"${TENSORFLOW_VERSION}-gpu\", method = \"conda\", conda = \"auto\", envname=\"r-reticulate\")" && \
61 |     rm -rf /tmp/tensorflow_gpu && \
62 |     /tmp/clean-layer.sh
63 | 
64 | # OpenCL for bayesCL, gpuR, ...
65 | RUN apt-get install -y --no-install-recommends ocl-icd-opencl-dev && \
66 |     mkdir -p /etc/OpenCL/vendors && \
67 |     echo "libnvidia-opencl.so.1" > /etc/OpenCL/vendors/nvidia.icd
68 | 
69 | # Install GPU specific packages
70 | RUN CPATH=/usr/local/cuda/targets/x86_64-linux/include install2.r --error --ncpus $ncpus --repo http://cran.rstudio.com \
71 |     h2o4gpu
72 | 
73 | # Torch: install the full package upfront otherwise it will be installed on loading the package which doesn't work for kernels
74 | # without internet (competitions for example). It will detect CUDA and install the proper version.
75 | # Make Torch think we use CUDA 11.8 (https://github.com/mlverse/torch/issues/807)
76 | ENV CUDA=11.7
77 | RUN R -e 'install.packages("torch")'
78 | RUN R -e 'library(torch); install_torch()'
79 | 
80 | CMD ["R"]
81 | 


--------------------------------------------------------------------------------
/tests/test_keras.R:
--------------------------------------------------------------------------------
  1 | context("keras")
  2 | 
  3 | test_that("model training", {
  4 |     library(keras)
  5 | 
  6 |     x_train <- matrix(rnorm(100 * 10), nrow = 100)
  7 |     y_train <- to_categorical(matrix(sample(0:2, 100, TRUE), ncol = 1), 3)
  8 | 
  9 |     model <- keras_model_sequential()
 10 |     model %>%
 11 |         layer_dense(units=100, activation='relu', input_shape=dim(x_train)[2]) %>%
 12 |         layer_dropout(rate=0.4) %>%
 13 |         layer_dense(unit=3, activation='softmax')
 14 | 
 15 |     optimizers <- keras::keras$optimizers
 16 | 
 17 |     model %>% compile(
 18 |         loss = 'categorical_crossentropy',
 19 |         optimizer = optimizers$RMSprop(),
 20 |         metrics = c('accuracy')
 21 |     )
 22 | 
 23 |     history <- model %>% fit(
 24 |         x_train, y_train,
 25 |         epochs=5, batch_size = 8,
 26 |         validation_split=0.2
 27 |     )
 28 | 
 29 |     expect_is(history, "keras_training_history")
 30 | })
 31 | 
 32 | test_that("CNN model training", {
 33 |     library(keras)
 34 | 
 35 |     # Preprocess data
 36 |     train.label<- to_categorical(matrix(sample(0:9, 100, TRUE), ncol = 1), 10)
 37 |     train.feature<- matrix(sample(0:255, 28 * 28 * 100, TRUE), nrow = 100)
 38 |     dim(train.feature)<-c(nrow(train.feature), 28, 28, 1)
 39 | 
 40 |     # Build simple CNN
 41 |     model<-keras_model_sequential()
 42 | 
 43 |     model %>% 
 44 |         layer_conv_2d(filters = 32, kernel_size = c(5,5),padding = 'Valid', activation = 'relu', input_shape = c(28,28,1)) %>%
 45 |         layer_batch_normalization() %>%
 46 |         layer_conv_2d(filters = 32, kernel_size = c(5,5),padding = 'Same', activation = 'relu') %>%
 47 |         layer_batch_normalization() %>%
 48 |         layer_max_pooling_2d(pool_size = c(2, 2)) %>%
 49 |         layer_dropout(rate = 0.2) %>%
 50 |         layer_conv_2d(filters = 64, kernel_size = c(3,3),padding = 'Same', activation = 'relu') %>%
 51 |         layer_batch_normalization()%>%
 52 |         layer_conv_2d(filters = 64, kernel_size = c(3,3),padding = 'Same', activation = 'relu') %>%
 53 |         layer_batch_normalization() %>%
 54 |         layer_max_pooling_2d(pool_size = c(2, 2)) %>%
 55 |         layer_dropout(rate = 0.2) %>%
 56 |         layer_flatten() %>%
 57 |         layer_dense(units=1024,activation='relu') %>%
 58 |         layer_dense(units=512,activation='relu') %>%
 59 |         layer_dense(units=256,activation='relu') %>%
 60 |         layer_dense(units=10,activation='softmax')  
 61 | 
 62 |     model %>% compile(
 63 |         loss='categorical_crossentropy',
 64 |         optimizer='adam',
 65 |         metrics='accuracy'
 66 |     )
 67 | 
 68 |     # Train model
 69 |     datagen <- image_data_generator(
 70 |         featurewise_center = F,
 71 |         samplewise_center=F,
 72 |         featurewise_std_normalization = F,
 73 |         samplewise_std_normalization=F,
 74 |         zca_whitening=F,
 75 |         horizontal_flip = F,
 76 |         vertical_flip = F,
 77 |         width_shift_range = 0.15,
 78 |         height_shift_range = 0.15,
 79 |         zoom_range = 0.15,
 80 |         rotation_range = 0.15,
 81 |         shear_range = 0.15
 82 |     )
 83 | 
 84 |     datagen %>% fit_image_data_generator(train.feature)
 85 | 
 86 |     history <- model %>%
 87 |         fit(
 88 |             flow_images_from_data(train.feature, train.label, datagen, batch_size = 10),
 89 |             steps_per_epoch = nrow(train.feature) / 10,
 90 |             epochs = 1)
 91 | 
 92 |     expect_is(history, "keras_training_history")
 93 | })
 94 | 
 95 | test_that("flow_images_from_dataframe", {
 96 |     library(keras)
 97 |     library(readr)
 98 | 
 99 |     base_dir <- '/input/tests/data'
100 |     test_labels <- read_csv("/input/tests/data/sample_submission.csv")
101 | 
102 |     test_labels$filename <- paste0(test_labels$id_code, ".png")
103 | 
104 |     pred <- flow_images_from_dataframe(
105 |         dataframe = test_labels,
106 |         x_col = "filename",
107 |         y_col = NULL,
108 |         directory = base_dir,
109 |         shuffle = FALSE,
110 |         class_mode = NULL,
111 |         target_size = c(224, 224))
112 | 
113 |     batch <- generator_next(pred, completed = NULL)
114 |     expect_gt(length(batch), 0)
115 | })
116 | 


--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
  1 | String cron_string = BRANCH_NAME == "main" ? "H 12 * * 1,3" : ""
  2 | 
  3 | pipeline {
  4 |   agent { label 'ephemeral-linux' }
  5 |   options {
  6 |     // The Build GPU stage depends on the image from the Push CPU stage
  7 |     disableConcurrentBuilds()
  8 |   }
  9 |   triggers {
 10 |     cron(cron_string)
 11 |   }
 12 |   environment {
 13 |     GIT_COMMIT_SHORT = sh(returnStdout: true, script:"git rev-parse --short=7 HEAD").trim()
 14 |     GIT_COMMIT_SUBJECT = sh(returnStdout: true, script:"git log --format=%s -n 1 HEAD").trim()
 15 |     GIT_COMMIT_AUTHOR = sh(returnStdout: true, script:"git log --format='%an' -n 1 HEAD").trim()
 16 |     GIT_COMMIT_SUMMARY = "`<https://github.com/Kaggle/docker-rstats/commit/${GIT_COMMIT}|${GIT_COMMIT_SHORT}>` ${GIT_COMMIT_SUBJECT} - ${GIT_COMMIT_AUTHOR}"
 17 |     MATTERMOST_CHANNEL = sh(returnStdout: true, script: "if [[ \"${GIT_BRANCH}\" == \"main\" ]]; then echo \"#kernelops\"; else echo \"#builds\"; fi").trim()
 18 |     // See b/152450512
 19 |     GITHUB_PAT = credentials('github-pat')
 20 |     PRETEST_TAG = sh(returnStdout: true, script: "if [[ \"${GIT_BRANCH}\" == \"main\" ]]; then echo \"ci-pretest\"; else echo \"${GIT_BRANCH}-pretest\"; fi").trim()
 21 |     STAGING_TAG = sh(returnStdout: true, script: "if [[ \"${GIT_BRANCH}\" == \"main\" ]]; then echo \"staging\"; else echo \"${GIT_BRANCH}-staging\"; fi").trim()
 22 |   }
 23 | 
 24 |   stages {
 25 |     stage('Docker CPU Build') {
 26 |       steps {
 27 |         sh '''#!/bin/bash
 28 |           set -exo pipefail
 29 | 
 30 |           ./build | ts
 31 |           date
 32 |           ./push ${PRETEST_TAG}
 33 |         '''
 34 |       }
 35 |     }
 36 | 
 37 |     stage('Test CPU Image') {
 38 |       steps {
 39 |         sh '''#!/bin/bash
 40 |           set -exo pipefail
 41 | 
 42 |           date
 43 |           ./test --image gcr.io/kaggle-images/rstats:${PRETEST_TAG}
 44 |         '''
 45 |       }
 46 |     }
 47 | 
 48 |     stage('Docker GPU Build') {
 49 |       agent { label 'ephemeral-linux-gpu' }
 50 |       steps {
 51 |         sh '''#!/bin/bash
 52 |           set -exo pipefail
 53 |           # Remove images (dangling or not) created more than 120h (5 days ago) to prevent disk from filling up.
 54 |           docker image prune --all --force --filter "until=120h" --filter "label=kaggle-lang=r"
 55 |           # Remove any dangling images (no tags).
 56 |           # All builds for the same branch uses the same tag. This means a subsequent build for the same branch
 57 |           # will untag the previously built image which is safe to do. Builds for a single branch are performed
 58 |           # serially.
 59 |           docker image prune -f
 60 |           ./build --gpu --base-image-tag ${PRETEST_TAG} | ts
 61 |           date
 62 |           ./push --gpu ${PRETEST_TAG}
 63 |         '''
 64 |       }
 65 |     }
 66 | 
 67 |     stage('Test GPU Image') {
 68 |       agent { label 'ephemeral-linux-gpu' }
 69 |       steps {
 70 |         sh '''#!/bin/bash
 71 |           set -exo pipefail
 72 |           date
 73 |           ./test --gpu --image gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG}
 74 |         '''
 75 |       }
 76 |     }
 77 | 
 78 |     stage('Package Versions') {
 79 |       parallel {
 80 |         stage('CPU Diff') {
 81 |           steps {
 82 |             sh '''#!/bin/bash
 83 |             set -exo pipefail
 84 | 
 85 |             docker pull gcr.io/kaggle-images/rstats:${PRETEST_TAG}
 86 |             ./diff --target gcr.io/kaggle-images/rstats:${PRETEST_TAG}
 87 |           '''
 88 |           }
 89 |         }
 90 |         stage('GPU Diff') {
 91 |           agent { label 'ephemeral-linux-gpu' }
 92 |           steps {
 93 |             sh '''#!/bin/bash
 94 |             set -exo pipefail
 95 |             
 96 |             docker pull gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG}
 97 |             ./diff --gpu --target gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG}
 98 |           '''
 99 |           }
100 |         }
101 |       }
102 |     }
103 | 
104 |     stage('Label CPU/GPU Staging Images') {
105 |       steps {
106 |         sh '''#!/bin/bash
107 |           set -exo pipefail
108 | 
109 |           gcloud container images add-tag gcr.io/kaggle-images/rstats:${PRETEST_TAG} gcr.io/kaggle-images/rstats:${STAGING_TAG}
110 |           gcloud container images add-tag gcr.io/kaggle-private-byod/rstats:${PRETEST_TAG} gcr.io/kaggle-private-byod/rstats:${STAGING_TAG}
111 |         '''
112 |       }
113 |     }
114 |   }
115 | 
116 |   post {
117 |     failure {
118 |       mattermostSend color: 'danger', message: "*<${env.BUILD_URL}console|${JOB_NAME} failed>* ${GIT_COMMIT_SUMMARY} @kernels-backend-ops", channel: env.MATTERMOST_CHANNEL
119 |     }
120 |     success {
121 |       mattermostSend color: 'good', message: "*<${env.BUILD_URL}console|${JOB_NAME} passed>* ${GIT_COMMIT_SUMMARY}", channel: env.MATTERMOST_CHANNEL
122 |     }
123 |     aborted {
124 |       mattermostSend color: 'warning', message: "*<${env.BUILD_URL}console|${JOB_NAME} aborted>* ${GIT_COMMIT_SUMMARY}", channel: env.MATTERMOST_CHANNEL
125 |     }
126 |   }
127 | }
128 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {2015} {Kaggle Inc}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------