├── .gitattributes ├── .github ├── .gitignore └── workflows │ └── R-CMD-check.yaml ├── LICENSE ├── .gitignore ├── tests ├── testthat.R └── testthat │ ├── iris.orig │ ├── logo.orig │ ├── posttypes │ ├── description.orig │ ├── test-encoding.R │ ├── test-parse.R │ └── test-echo.R ├── .Rbuildignore ├── src ├── register.c ├── split.c └── memmem.c ├── NAMESPACE ├── R ├── util.R ├── get_boundary.R ├── parse_query.R ├── parse_http.R ├── demo_httpuv.R ├── demo_rhttpd.R └── parse_multipart.R ├── man ├── demo_rhttpd.Rd ├── demo_httpuv.Rd ├── parse_query.Rd ├── parse_http.Rd └── parse_multipart.Rd ├── webutils.Rproj ├── NEWS ├── DESCRIPTION ├── README.md └── inst └── testpage.html /.gitattributes: -------------------------------------------------------------------------------- 1 | *.orig binary 2 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2017 2 | COPYRIGHT HOLDER: Jeroen Ooms 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | src/*.so 4 | src/*.o 5 | src/*.dll 6 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(webutils) 3 | 4 | test_check("webutils") 5 | -------------------------------------------------------------------------------- /tests/testthat/iris.orig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeroen/webutils/HEAD/tests/testthat/iris.orig -------------------------------------------------------------------------------- /tests/testthat/logo.orig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeroen/webutils/HEAD/tests/testthat/logo.orig -------------------------------------------------------------------------------- /tests/testthat/posttypes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jeroen/webutils/HEAD/tests/testthat/posttypes -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^appveyor\.yml$ 4 | ^\.travis\.yml$ 5 | ^README.md$ 6 | ^\.github$ 7 | -------------------------------------------------------------------------------- /src/register.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | void R_init_webutils(DllInfo* info) { 5 | R_registerRoutines(info, NULL, NULL, NULL, NULL); 6 | R_useDynamicSymbols(info, TRUE); 7 | } 8 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(demo_httpuv) 4 | export(demo_rhttpd) 5 | export(parse_http) 6 | export(parse_multipart) 7 | export(parse_query) 8 | importFrom(jsonlite,fromJSON) 9 | useDynLib(webutils,R_split_boundary) 10 | useDynLib(webutils,R_split_string) 11 | useDynLib(webutils,R_unquote) 12 | -------------------------------------------------------------------------------- /tests/testthat/description.orig: -------------------------------------------------------------------------------- 1 | Package: base 2 | Version: 3.3.3 3 | Priority: base 4 | Title: The R Base Package 5 | Author: R Core Team and contributors worldwide 6 | Maintainer: R Core Team 7 | Description: Base R functions. 8 | License: Part of R 3.3.3 9 | Suggests: methods 10 | Built: R 3.3.3; ; 2017-03-07 18:47:16 UTC; unix 11 | -------------------------------------------------------------------------------- /R/util.R: -------------------------------------------------------------------------------- 1 | # Override default for call. argument 2 | stop <- function(..., call. = FALSE){ 3 | base::stop(..., call. = FALSE) 4 | } 5 | 6 | # Strip trailing whitespace 7 | trail <- function(str){ 8 | str <- sub("\\s+$", "", str, perl = TRUE); 9 | sub("^\\s+", "", str, perl = TRUE); 10 | } 11 | 12 | rawToChar <- function(x){ 13 | out <- base::rawToChar(x) 14 | Encoding(out) <- 'UTF-8' 15 | out 16 | } 17 | -------------------------------------------------------------------------------- /man/demo_rhttpd.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/demo_rhttpd.R 3 | \name{demo_rhttpd} 4 | \alias{demo_rhttpd} 5 | \title{Demo multipart parser with rhttpd} 6 | \usage{ 7 | demo_rhttpd() 8 | } 9 | \description{ 10 | Starts the Rhttpd web server and hosts a simple form including a file 11 | upload to demo the multipart parser. 12 | } 13 | \seealso{ 14 | Other demo: 15 | \code{\link{demo_httpuv}()} 16 | } 17 | \concept{demo} 18 | -------------------------------------------------------------------------------- /tests/testthat/test-encoding.R: -------------------------------------------------------------------------------- 1 | context("encoding") 2 | 3 | test_that("Encoding is retained", { 4 | strings <- c( 5 | "Zürich", 6 | "北京填鴨们", 7 | "ผัดไทย", 8 | "寿司", 9 | rawToChar(as.raw(1:40)), 10 | "?foo&bar=baz!bla\n" 11 | ) 12 | encstr <- curl::curl_escape(strings) 13 | data <- paste(encstr, encstr, collapse = "&", sep = "=") 14 | out <- webutils::parse_query(data) 15 | expect_equal(names(out), strings) 16 | expect_equal(unlist(unname(out)), strings) 17 | }) 18 | -------------------------------------------------------------------------------- /webutils.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /man/demo_httpuv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/demo_httpuv.R 3 | \name{demo_httpuv} 4 | \alias{demo_httpuv} 5 | \title{Demo multipart parser with httpuv} 6 | \usage{ 7 | demo_httpuv(port = 9359) 8 | } 9 | \arguments{ 10 | \item{port}{which port number to run the http server} 11 | } 12 | \description{ 13 | Starts the httpuv web server and hosts a simple form including a file 14 | upload to demo the multipart parser. 15 | } 16 | \seealso{ 17 | Other demo: 18 | \code{\link{demo_rhttpd}()} 19 | } 20 | \concept{demo} 21 | -------------------------------------------------------------------------------- /R/get_boundary.R: -------------------------------------------------------------------------------- 1 | get_boundary <- function(content_type){ 2 | # Check for multipart header 3 | if(!grepl("multipart/form-data;", content_type, fixed = TRUE)) 4 | stop("Content type is not multipart/form-data: ", content_type) 5 | if(!grepl("boundary=", content_type, fixed = TRUE)) 6 | stop("Multipart content-type header without boundary: ", content_type) 7 | 8 | # Extract bounary 9 | m <- regexpr('boundary=[^; ]{2,}', content_type, ignore.case = TRUE) 10 | boundary <- sub('boundary=','',regmatches(content_type, m)[[1]]) 11 | sub('^"(.*)"$', "\\1", boundary) 12 | } 13 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | 1.2.1 2 | - Fix Rd link 'anchor' 3 | 4 | 1.2.0 5 | - Remove quotes from bounary if any (#8) 6 | - Use httpuv::randomPort() in tests and examples 7 | 8 | 1.1 9 | - Allow charset in multipart Content-Type (#3) 10 | 11 | 1.0 12 | - Ignore additional fields such as charset from the multipart boundary 13 | - Fix a unit test to avoid a bug in libcurl: https://github.com/curl/curl/issues/4246 14 | - Make curl_echo() unit test faster on Windows 15 | 16 | 0.6 17 | - Use curl_unescape() in parse_query() because utils::URLdecode() is broken for UTF-8 18 | - Assume UTF-8 for all strings in rawToChar() 19 | 20 | 0.5 21 | - Rewrite of multipart parser in C 22 | - Use the new curl::curl_echo() function to test the parser 23 | -------------------------------------------------------------------------------- /man/parse_query.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_query.R 3 | \name{parse_query} 4 | \alias{parse_query} 5 | \title{Parse query string} 6 | \usage{ 7 | parse_query(query) 8 | } 9 | \arguments{ 10 | \item{query}{a url-encoded query string} 11 | } 12 | \description{ 13 | Parse http parameters from a query string. This includes unescaping 14 | of url-encoded values. 15 | } 16 | \details{ 17 | For http GET requests, the query string is specified 18 | in the URL after the question mark. For http POST or PUT requests, the query 19 | string can be used in the request body when the \code{Content-Type} header 20 | is set to \code{application/x-www-form-urlencoded}. 21 | } 22 | \examples{ 23 | q <- "foo=1\%2B1\%3D2&bar=yin\%26yang" 24 | parse_query(q) 25 | } 26 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: webutils 2 | Type: Package 3 | Title: Utility Functions for Developing Web Applications 4 | Version: 1.2.2 5 | Authors@R: person("Jeroen", "Ooms", role = c("aut", "cre"), email = "jeroenooms@gmail.com", 6 | comment = c(ORCID = "0000-0002-4035-0289")) 7 | Description: Parses http request data in application/json, multipart/form-data, 8 | or application/x-www-form-urlencoded format. Includes example of hosting 9 | and parsing html form data in R using either 'httpuv' or 'Rhttpd'. 10 | License: MIT + file LICENSE 11 | URL: https://jeroen.r-universe.dev/webutils 12 | BugReports: https://github.com/jeroen/webutils/issues 13 | Imports: 14 | curl (>= 2.5), 15 | jsonlite 16 | Suggests: 17 | httpuv, 18 | testthat 19 | RoxygenNote: 7.3.2.9000 20 | Language: en-US 21 | Roxygen: list(markdown = TRUE) 22 | Encoding: UTF-8 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # webutils 2 | 3 | ##### *Utility Functions for Web Applications* 4 | 5 | [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/webutils)](http://cran.r-project.org/package=webutils) 6 | [![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/webutils)](http://cran.r-project.org/web/packages/webutils/index.html) 7 | 8 | > Utility functions for developing web applications. Includes parsers 9 | for application/x-www-form-urlencoded as well as multipart/form-data 10 | and examples of using the parser with either httpuv or rhttpd. 11 | 12 | ## Hello World 13 | 14 | ```r 15 | # Parse json encoded payload: 16 | parse_http('{"foo":123, "bar":true}', 'application/json') 17 | 18 | # Parse url-encoded payload 19 | parse_http("foo=1%2B1%3D2&bar=yin%26yang", "application/x-www-form-urlencoded") 20 | 21 | ## Use demo app to parse multipart/form-data payload 22 | demo_rhttpd() 23 | ``` 24 | -------------------------------------------------------------------------------- /R/parse_query.R: -------------------------------------------------------------------------------- 1 | #' Parse query string 2 | #' 3 | #' Parse http parameters from a query string. This includes unescaping 4 | #' of url-encoded values. 5 | #' 6 | #' For http GET requests, the query string is specified 7 | #' in the URL after the question mark. For http POST or PUT requests, the query 8 | #' string can be used in the request body when the `Content-Type` header 9 | #' is set to `application/x-www-form-urlencoded`. 10 | #' 11 | #' @export 12 | #' @param query a url-encoded query string 13 | #' @examples q <- "foo=1%2B1%3D2&bar=yin%26yang" 14 | #' parse_query(q) 15 | parse_query <- function(query){ 16 | if(is.raw(query)) 17 | query <- rawToChar(query); 18 | stopifnot(is.character(query)); 19 | 20 | #httpuv includes the question mark in query string 21 | query <- sub("^[?]", "", query) 22 | query <- chartr('+',' ', query) 23 | 24 | #split by & character 25 | argstr <- strsplit(query, "&", fixed = TRUE)[[1]] 26 | args <- lapply(argstr, function(x){ 27 | curl::curl_unescape(strsplit(x, "=", fixed = TRUE)[[1]]) 28 | }) 29 | values <- lapply(args, `[`, 2) 30 | names(values) <- vapply(args, `[`, character(1), 1) 31 | return(values) 32 | } 33 | -------------------------------------------------------------------------------- /man/parse_http.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_http.R 3 | \name{parse_http} 4 | \alias{parse_http} 5 | \title{Parse http request} 6 | \usage{ 7 | parse_http(body, content_type, ...) 8 | } 9 | \arguments{ 10 | \item{body}{request body of the http request} 11 | 12 | \item{content_type}{content-type http request header as specified by the client} 13 | 14 | \item{...}{additional arguments passed to parser function} 15 | } 16 | \description{ 17 | Parse the body of a http request, based on the \code{Content-Type} request 18 | header. Currently supports the three most important content types: 19 | \code{application/x-www-form-urlencoded} with \code{\link[=parse_query]{parse_query()}}, 20 | \code{multipart/form-data} with \code{\link[=parse_multipart]{parse_multipart()}}, and \code{application/json} 21 | with \code{\link[jsonlite:fromJSON]{jsonlite::fromJSON()}}. 22 | } 23 | \examples{ 24 | # Parse json encoded payload: 25 | parse_http('{"foo":123, "bar":true}', 'application/json') 26 | 27 | # Parse url-encoded payload 28 | parse_http("foo=1\%2B1\%3D2&bar=yin\%26yang", "application/x-www-form-urlencoded") 29 | 30 | \dontrun{use demo app to parse multipart/form-data payload 31 | demo_rhttpd() 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /tests/testthat/test-parse.R: -------------------------------------------------------------------------------- 1 | context("fixed post data") 2 | 3 | # Example with various types from 'curl' vignette 4 | test_that("parsing example post", { 5 | buf <- readBin("posttypes", raw(), 1e6) 6 | out <- parse_multipart(buf, "------------------------ef343c1f05a612c3") 7 | 8 | # foo = "blabla" 9 | expect_equal(rawToChar(out$foo$value), "blabla") 10 | expect_null(out$foo$content_type) 11 | 12 | # bar = charToRaw("boeboe") 13 | expect_equal(out$bar$value, charToRaw("boeboe")) 14 | expect_null(out$foo$content_type) 15 | 16 | # iris = form_data(serialize(iris, NULL), "application/rda"), 17 | expect_equal(out$iris$value, readBin('iris.orig', raw(), 1e5)) 18 | expect_equal(out$iris$content_type, "application/rda") 19 | 20 | # description = form_file(system.file("DESCRIPTION")), 21 | expect_equal(out$description$value, readBin('description.orig', raw(), 1e5)); 22 | expect_equal(out$description$content_type, "application/octet-stream") 23 | expect_equal(out$description$filename, "DESCRIPTION") 24 | 25 | # logo = form_file(file.path(Sys.getenv("R_DOC_DIR"), "html/logo.jpg"), "image/jpeg") 26 | expect_equal(out$logo$value, readBin('logo.orig', raw(), 1e5)); 27 | expect_equal(out$logo$content_type, "image/jpeg") 28 | expect_equal(out$logo$filename, "logo.jpg") 29 | }) 30 | -------------------------------------------------------------------------------- /man/parse_multipart.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/parse_multipart.R 3 | \name{parse_multipart} 4 | \alias{parse_multipart} 5 | \title{Parse a multipart/form-data request} 6 | \usage{ 7 | parse_multipart(body, boundary) 8 | } 9 | \arguments{ 10 | \item{body}{body of the HTTP request. Must be raw or character vector.} 11 | 12 | \item{boundary}{boundary string as specified in the \code{Content-Type} request header.} 13 | } 14 | \description{ 15 | Parse a multipart/form-data request, which is usually generated from a HTML form 16 | submission. The parameters can include both text values as well as binary files. 17 | They can be distinguished from the presence of a \code{filename} attribute. 18 | } 19 | \details{ 20 | A multipart/form-data request consists of a single body which contains one or more 21 | values plus meta-data, separated using a boundary string. This boundary string 22 | is chosen by the client (e.g. the browser) and specified in the \code{Content-Type} 23 | header of the HTTP request. There is no escaping; it is up to the client to choose 24 | a boundary string that does not appear in one of the values. 25 | 26 | The parser is written in pure R, but still pretty fast because it uses the regex 27 | engine. 28 | } 29 | \examples{ 30 | \dontrun{example form 31 | demo_rhttpd() 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | pull_request: 6 | 7 | name: R-CMD-check.yaml 8 | 9 | permissions: read-all 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macos-13, r: 'release'} 22 | - {os: macos-14, r: 'release'} 23 | - {os: windows-latest, r: '4.1'} 24 | - {os: windows-latest, r: '4.2'} 25 | - {os: windows-latest, r: 'release'} 26 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 27 | - {os: ubuntu-latest, r: 'release'} 28 | - {os: ubuntu-latest, r: 'oldrel-1'} 29 | 30 | env: 31 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 32 | R_KEEP_PKG_SOURCE: yes 33 | 34 | steps: 35 | - uses: actions/checkout@v4 36 | 37 | - uses: r-lib/actions/setup-pandoc@v2 38 | 39 | - uses: r-lib/actions/setup-r@v2 40 | with: 41 | r-version: ${{ matrix.config.r }} 42 | http-user-agent: ${{ matrix.config.http-user-agent }} 43 | use-public-rspm: true 44 | 45 | - uses: r-lib/actions/setup-r-dependencies@v2 46 | with: 47 | extra-packages: any::rcmdcheck 48 | needs: check 49 | 50 | - uses: r-lib/actions/check-r-package@v2 51 | -------------------------------------------------------------------------------- /inst/testpage.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Multipart test page 4 | 5 | 6 | 7 | 8 |
9 |

multipart/form-data test

10 | 11 |

A simple form to test the R multipart/form-data parser.

12 | 13 |
14 |
15 | 16 | 17 |
18 |
19 | 20 | 21 |
22 |
23 | 24 | 25 |
26 | 27 |
28 | 32 |
33 |
34 | 38 |
39 | 40 | 41 |
42 |
43 | 44 | 45 | -------------------------------------------------------------------------------- /R/parse_http.R: -------------------------------------------------------------------------------- 1 | #' Parse http request 2 | #' 3 | #' Parse the body of a http request, based on the `Content-Type` request 4 | #' header. Currently supports the three most important content types: 5 | #' `application/x-www-form-urlencoded` with [parse_query()], 6 | #' `multipart/form-data` with [parse_multipart()], and `application/json` 7 | #' with [jsonlite::fromJSON()]. 8 | #' 9 | #' @export 10 | #' @param body request body of the http request 11 | #' @param content_type content-type http request header as specified by the client 12 | #' @param ... additional arguments passed to parser function 13 | #' @importFrom jsonlite fromJSON 14 | #' @examples # Parse json encoded payload: 15 | #' parse_http('{"foo":123, "bar":true}', 'application/json') 16 | #' 17 | #' # Parse url-encoded payload 18 | #' parse_http("foo=1%2B1%3D2&bar=yin%26yang", "application/x-www-form-urlencoded") 19 | #' 20 | #' \dontrun{use demo app to parse multipart/form-data payload 21 | #' demo_rhttpd() 22 | #' } 23 | parse_http <- function(body, content_type, ...){ 24 | # Remove header name if present 25 | content_type <- sub("Content-Type: ?", "", content_type, ignore.case=TRUE); 26 | 27 | # Switch by content-type 28 | if(grepl("multipart/form-data;", content_type, fixed = TRUE)){ 29 | return(parse_multipart(body, get_boundary(content_type))) 30 | } else if(grepl("application/x-www-form-urlencoded", content_type, fixed=TRUE)){ 31 | return(parse_query(body)) 32 | } else if(grepl("(text|application)/json", content_type)){ 33 | if(is.raw(body)) 34 | body <- rawToChar(body) 35 | return(fromJSON(body, ...)) 36 | } else { 37 | stop("Unsupported Content-Type: ", content_type) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /R/demo_httpuv.R: -------------------------------------------------------------------------------- 1 | #' Demo multipart parser with httpuv 2 | #' 3 | #' Starts the httpuv web server and hosts a simple form including a file 4 | #' upload to demo the multipart parser. 5 | # 6 | #' @export 7 | #' @family demo 8 | #' @param port which port number to run the http server 9 | demo_httpuv <- function(port = 9359){ 10 | rook_handler <- function(env){ 11 | 12 | # See Rook spec 13 | content_type <- env[["CONTENT_TYPE"]] 14 | http_method <- env[["REQUEST_METHOD"]] 15 | body <- env[["rook.input"]]$read() 16 | path <- env[["PATH_INFO"]] 17 | 18 | # Show HTML page for GET requests. 19 | if(tolower(http_method) %in% c("post", "put")){ 20 | # Parse the multipart/form-data 21 | message("Received HTTP POST request.") 22 | postdata <- parse_http(body, content_type) 23 | 24 | # Print it to the R console (just for fun) 25 | utils::str(postdata) 26 | 27 | # process this form 28 | username <- rawToChar(as.raw(postdata$username$value)) 29 | email <- rawToChar(as.raw(postdata$email_address$value)) 30 | food <- rawToChar(as.raw(postdata$food$value)) 31 | picture <- file.path(getwd(), basename(postdata$picture$filename)) 32 | writeBin(postdata$picture$value, picture) 33 | 34 | # return summary to the client 35 | list( 36 | status = 200, 37 | body = paste0("User: ", username, "\nEmail: ", email, "\nPicture (copy): ", picture,"\nFood: ", food, "\n"), 38 | headers = c("Content-Type" = "text/plain") 39 | ) 40 | } else { 41 | message("Received HTTP GET request: ", path) 42 | testpage <- system.file("testpage.html", package="webutils"); 43 | stopifnot(file.exists(testpage)) 44 | list ( 45 | status = 200, 46 | body = paste(readLines(testpage), collapse="\n"), 47 | headers = c("Content-Type" = "text/html") 48 | ) 49 | } 50 | } 51 | 52 | # Start httpuv 53 | if(!length(port)) 54 | port <- httpuv::randomPort() 55 | server_id <- httpuv::startServer("0.0.0.0", port, list(call = rook_handler)) 56 | on.exit({ 57 | message("stopping server") 58 | httpuv::stopServer(server_id) 59 | }, add = TRUE) 60 | url <- paste0("http://localhost:", port, "/") 61 | message("Opening ", url) 62 | utils::browseURL(url) 63 | repeat { 64 | httpuv::service() 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /tests/testthat/test-echo.R: -------------------------------------------------------------------------------- 1 | context("echo with httpuv") 2 | 3 | # Example with various types from 'curl' vignette 4 | test_that("test echo from httpuv", { 5 | desc <- system.file("DESCRIPTION") 6 | logo <- file.path(Sys.getenv("R_DOC_DIR"), "html/logo.jpg") 7 | h <- curl::handle_setform(curl::new_handle(forbid_reuse = TRUE), 8 | foo = "blabla", 9 | bar = charToRaw("boeboe"), 10 | iris = curl::form_data(serialize(iris, NULL), "application/rda"), 11 | description = curl::form_file(desc), 12 | logo = curl::form_file(logo, "image/jpeg") 13 | ) 14 | req <- curl::curl_echo(h, port = httpuv::randomPort()) 15 | formdata <- parse_http(req$body, req$content_type) 16 | 17 | # foo = "blabla" 18 | expect_equal(rawToChar(formdata$foo$value), "blabla") 19 | expect_null(formdata$foo$content_type) 20 | 21 | # bar = charToRaw("boeboe") 22 | expect_equal(formdata$bar$value, charToRaw("boeboe")) 23 | expect_null(formdata$foo$content_type) 24 | 25 | # iris = form_data(serialize(iris, NULL), "application/rda"), 26 | expect_equal(formdata$iris$value, serialize(iris, NULL), "application/rda") 27 | expect_equal(formdata$iris$content_type, "application/rda") 28 | 29 | # description = form_file(system.file("DESCRIPTION")), 30 | expect_equal(formdata$description$value, readBin(desc, raw(), 1e5)) 31 | expect_equal(formdata$description$content_type, "application/octet-stream") 32 | expect_equal(formdata$description$filename, "DESCRIPTION") 33 | 34 | # logo = form_file(file.path(Sys.getenv("R_DOC_DIR"), "html/logo.jpg"), "image/jpeg") 35 | expect_equal(formdata$logo$value, readBin(logo, raw(), 1e5)); 36 | expect_equal(formdata$logo$content_type, "image/jpeg") 37 | expect_equal(formdata$logo$filename, "logo.jpg") 38 | }) 39 | 40 | test_that("Echo a big file", { 41 | # Create a random file (~30 MB) 42 | # Note: can test even bigger files but curl_echo() is a bit slow on Windows 43 | tmp <- tempfile() 44 | size <- if(.Platform$OS.type == "windows"){ 45 | 1e5 46 | } else { 47 | runif(1, 3e6, 4e6) 48 | } 49 | buf <- serialize(rnorm(size), NULL) 50 | writeBin(buf, tmp) 51 | on.exit(unlink(tmp)) 52 | 53 | # Roundtrip via httpuv 54 | h <- curl::handle_setform(curl::new_handle(forbid_reuse = TRUE), myfile = curl::form_file(tmp)) 55 | req <- curl::curl_echo(h, port = httpuv::randomPort()) 56 | formdata <- parse_http(req$body, req$content_type) 57 | 58 | # Tests 59 | expect_length(formdata$myfile$value, file.info(tmp)$size) 60 | expect_identical(formdata$myfile$filename, basename(tmp)) 61 | expect_identical(formdata$myfile$value, buf) 62 | }) 63 | -------------------------------------------------------------------------------- /R/demo_rhttpd.R: -------------------------------------------------------------------------------- 1 | #' Demo multipart parser with rhttpd 2 | #' 3 | #' Starts the Rhttpd web server and hosts a simple form including a file 4 | #' upload to demo the multipart parser. 5 | # 6 | #' @export 7 | #' @family demo 8 | demo_rhttpd <- function(){ 9 | rhttpd_handler <- function(reqpath, reqquery, reqbody, reqheaders){ 10 | 11 | # Extract HTTP content type and method from strange rhttpd format 12 | content_type <- grep("Content-Type:", strsplit(rawToChar(reqheaders), "\n")[[1]], ignore.case=TRUE, value=TRUE); 13 | content_type <- sub("Content-Type: ?", "", content_type, ignore.case=TRUE); 14 | http_method <- grep("Request-Method:", strsplit(rawToChar(reqheaders), "\n")[[1]], ignore.case=TRUE, value=TRUE); 15 | http_method <- sub("Request-Method: ?", "", http_method, ignore.case=TRUE); 16 | 17 | # Show HTML page for GET requests. 18 | if(tolower(http_method) %in% c("post", "put") && length(reqbody)){ 19 | 20 | # Parse the multipart/form-data 21 | message("Received HTTP POST request.") 22 | 23 | # Check for multipart() 24 | postdata <- parse_http(reqbody, content_type) 25 | 26 | # Print it to the R console (just for fun) 27 | utils::str(postdata) 28 | 29 | # process this form 30 | username <- rawToChar(as.raw(postdata$username$value)) 31 | email <- rawToChar(as.raw(postdata$email_address$value)) 32 | food <- rawToChar(as.raw(postdata$food$value)) 33 | picture <- file.path(getwd(), basename(postdata$picture$filename)) 34 | writeBin(as.raw(postdata$picture$value), picture) 35 | 36 | # return summary to the client 37 | list( 38 | "payload" = paste0("User: ", username, "\nEmail: ", email, "\nPicture (copy): ", picture,"\nFood: ", food, "\n"), 39 | "content-type" = "text/plain", 40 | "headers" = NULL, 41 | "status code" = 200 42 | ) 43 | } else { 44 | message("Received HTTP GET request: ", reqpath) 45 | testpage <- system.file("testpage.html", package="webutils"); 46 | stopifnot(file.exists(testpage)) 47 | list( 48 | "payload" = readBin(testpage, raw(), n=file.info(testpage)$size), 49 | "content-type" = "text/html", 50 | "headers" = NULL, 51 | "status code" = 200 52 | ) 53 | } 54 | } 55 | 56 | # Start rhttpd and get port 57 | port <- if(R.version[["svn rev"]] < 67550) { 58 | try(tools::startDynamicHelp(TRUE), silent=TRUE); 59 | utils::getFromNamespace("httpdPort", "tools"); 60 | } else { 61 | tools::startDynamicHelp(NA); 62 | } 63 | 64 | handlers_env <- utils::getFromNamespace(".httpd.handlers.env", "tools") 65 | assign("test", rhttpd_handler, handlers_env) 66 | url <- paste0("http://localhost:", port, "/custom/test") 67 | message("Opening ", url) 68 | utils::browseURL(url) 69 | } 70 | -------------------------------------------------------------------------------- /R/parse_multipart.R: -------------------------------------------------------------------------------- 1 | #' Parse a multipart/form-data request 2 | #' 3 | #' Parse a multipart/form-data request, which is usually generated from a HTML form 4 | #' submission. The parameters can include both text values as well as binary files. 5 | #' They can be distinguished from the presence of a `filename` attribute. 6 | #' 7 | #' A multipart/form-data request consists of a single body which contains one or more 8 | #' values plus meta-data, separated using a boundary string. This boundary string 9 | #' is chosen by the client (e.g. the browser) and specified in the `Content-Type` 10 | #' header of the HTTP request. There is no escaping; it is up to the client to choose 11 | #' a boundary string that does not appear in one of the values. 12 | #' 13 | #' The parser is written in pure R, but still pretty fast because it uses the regex 14 | #' engine. 15 | #' 16 | #' @export 17 | #' @param body body of the HTTP request. Must be raw or character vector. 18 | #' @param boundary boundary string as specified in the `Content-Type` request header. 19 | #' @examples \dontrun{example form 20 | #' demo_rhttpd() 21 | #' } 22 | parse_multipart <- function(body, boundary){ 23 | # Some HTTP daemons give the body as a string instead of raw. 24 | if(is.character(body)) 25 | body <- charToRaw(paste(body, collapse="")) 26 | 27 | if(is.character(boundary)) 28 | boundary <- charToRaw(boundary) 29 | 30 | # Heavy lifting in C 31 | stopifnot(is.raw(body), is.raw(boundary)) 32 | form_data <- split_by_boundary(body, boundary) 33 | 34 | # Output 35 | out <- lapply(form_data, function(val){ 36 | headers <- parse_header(val[[1]]) 37 | c(list( 38 | value = val[[2]] 39 | ), headers) 40 | }) 41 | 42 | names(out) <- sapply(out, `[[`, 'name'); 43 | out 44 | } 45 | 46 | parse_header <- function(buf){ 47 | headers <- strsplit(rawToChar(buf), "\r\n", fixed = TRUE)[[1]] 48 | out <- split_names(headers, ": ") 49 | if(length(out$content_disposition)){ 50 | pieces <- strsplit(out$content_disposition, "; ")[[1]] 51 | out$content_disposition <- pieces[1] 52 | out <- c(out, lapply(split_names(pieces[-1], "="), unquote)) 53 | } 54 | out 55 | } 56 | 57 | #' @useDynLib webutils R_split_boundary 58 | split_by_boundary <- function(body, boundary){ 59 | .Call(R_split_boundary, body, boundary) 60 | } 61 | 62 | #' @useDynLib webutils R_split_string 63 | split_by_string <- function(string, split = ":"){ 64 | .Call(R_split_string, string, split) 65 | } 66 | 67 | #' @useDynLib webutils R_unquote 68 | unquote <- function(string){ 69 | .Call(R_unquote, string) 70 | } 71 | 72 | split_names <- function(x, split){ 73 | matches <- lapply(x, split_by_string, split) 74 | names <- chartr("-", "_", tolower(sapply(matches, `[[`, 1))) 75 | values <- lapply(matches, `[[`, 2) 76 | structure(values, names = names); 77 | } 78 | -------------------------------------------------------------------------------- /src/split.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | 6 | //from memmem.c 7 | void * fallback_memmem(const void *h0, size_t k, const void *n0, size_t l); 8 | 9 | #if !defined(_WIN32) && !defined(__sun) 10 | #define my_memmem memmem 11 | #else 12 | #define my_memmem fallback_memmem 13 | #endif 14 | 15 | //split by first CRLF 16 | SEXP split_header(unsigned char * haystack, size_t n){ 17 | SEXP out = PROTECT(allocVector(VECSXP, 2)); 18 | unsigned char * cur = my_memmem(haystack, n, "\r\n\r\n", 4); 19 | if(cur){ 20 | size_t len = cur - haystack; 21 | SEXP header = allocVector(RAWSXP, len); 22 | memcpy(RAW(header), haystack, len); 23 | SET_VECTOR_ELT(out, 0, header); 24 | SEXP body = allocVector(RAWSXP, n - len - 4); 25 | memcpy(RAW(body), cur + 4, n - len - 4); 26 | SET_VECTOR_ELT(out, 1, body); 27 | haystack = cur + 4; 28 | n -= len + 4; 29 | } 30 | SEXP body = allocVector(RAWSXP, n); 31 | memcpy(RAW(body), haystack, n); 32 | SET_VECTOR_ELT(out, 1, body); 33 | UNPROTECT(1); 34 | return out; 35 | } 36 | 37 | //split by arbitrary string 38 | SEXP R_split_boundary(SEXP body, SEXP boundary){ 39 | unsigned char * haystack = RAW(body); 40 | unsigned char * needle = RAW(boundary); 41 | 42 | //expect no more than 1000 boundaries 43 | unsigned char * offsets[1000] = { 0 }; 44 | 45 | //initial values 46 | size_t n = Rf_length(body); 47 | size_t m = Rf_length(boundary); 48 | 49 | //find the needles 50 | int count = 0; 51 | unsigned char * cur = NULL; 52 | for(count = 0; (cur = my_memmem(haystack, n, needle, m)) && (n > m); count++){ 53 | offsets[count] = cur; 54 | n = n - (cur - haystack) - m; 55 | haystack = cur + m; 56 | } 57 | 58 | //extract the 59 | if(count < 2) 60 | return allocVector(VECSXP, 0); 61 | 62 | //extract the payloads 63 | SEXP out = PROTECT(allocVector(VECSXP, count - 1)); 64 | for(int i = 0; i < count - 1; i++){ 65 | unsigned char * start = offsets[i] + m + 2; //drop ending CRLF 66 | unsigned char * end = offsets[i+1] - 4; //drop beginning CRLF + boundary preamble "--" 67 | size_t len = end - start; 68 | SET_VECTOR_ELT(out, i, split_header(start, len)); 69 | } 70 | 71 | UNPROTECT(1); 72 | return out; 73 | } 74 | 75 | SEXP R_split_string(SEXP string, SEXP split){ 76 | const char * str = CHAR(STRING_ELT(string, 0)); 77 | const char * cut = CHAR(STRING_ELT(split, 0)); 78 | char * out = strstr(str, cut); 79 | if(!out) 80 | return string; 81 | SEXP res = PROTECT(allocVector(STRSXP, 2)); 82 | SET_STRING_ELT(res, 0, mkCharLen(str, out - str)); 83 | SET_STRING_ELT(res, 1, mkChar(out + strlen(cut))); 84 | UNPROTECT(1); 85 | return res; 86 | } 87 | 88 | SEXP R_unquote(SEXP string){ 89 | const char * str = CHAR(STRING_ELT(string, 0)); 90 | size_t len = strlen(str); 91 | if(len > 1 && str[0] == '"' && str[len-1] == '"') 92 | return ScalarString(mkCharLen(str + 1, len - 2)); 93 | return string; 94 | } 95 | -------------------------------------------------------------------------------- /src/memmem.c: -------------------------------------------------------------------------------- 1 | /*- 2 | * Copyright (c) 2005-2014 Rich Felker, et al. 3 | * 4 | * Permission is hereby granted, free of charge, to any person obtaining 5 | * a copy of this software and associated documentation files (the 6 | * "Software"), to deal in the Software without restriction, including 7 | * without limitation the rights to use, copy, modify, merge, publish, 8 | * distribute, sublicense, and/or sell copies of the Software, and to 9 | * permit persons to whom the Software is furnished to do so, subject to 10 | * the following conditions: 11 | * 12 | * The above copyright notice and this permission notice shall be 13 | * included in all copies or substantial portions of the Software. 14 | * 15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 18 | * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 19 | * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 20 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 21 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | */ 23 | 24 | #include 25 | #include 26 | 27 | static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) 28 | { 29 | uint16_t nw = n[0]<<8 | n[1], hw = h[0]<<8 | h[1]; 30 | for (h++, k--; k; k--, hw = hw<<8 | *++h) 31 | if (hw == nw) return (char *)h-1; 32 | return 0; 33 | } 34 | 35 | static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) 36 | { 37 | uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8; 38 | uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8; 39 | for (h+=2, k-=2; k; k--, hw = (hw|*++h)<<8) 40 | if (hw == nw) return (char *)h-2; 41 | return 0; 42 | } 43 | 44 | static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) 45 | { 46 | uint32_t nw = n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; 47 | uint32_t hw = h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; 48 | for (h+=3, k-=3; k; k--, hw = hw<<8 | *++h) 49 | if (hw == nw) return (char *)h-3; 50 | return 0; 51 | } 52 | 53 | #define MAX(a,b) ((a)>(b)?(a):(b)) 54 | #define MIN(a,b) ((a)<(b)?(a):(b)) 55 | 56 | #define BITOP(a,b,op) \ 57 | ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a)))) 58 | 59 | /* 60 | * Two Way string search algorithm, with a bad shift table applied to the last 61 | * byte of the window. A bit array marks which entries in the shift table are 62 | * initialized to avoid fully initializing a 1kb/2kb table. 63 | * 64 | * Reference: CROCHEMORE M., PERRIN D., 1991, Two-way string-matching, 65 | * Journal of the ACM 38(3):651-675 66 | */ 67 | static char *twoway_memmem(const unsigned char *h, const unsigned char *z, const unsigned char *n, size_t l) 68 | { 69 | size_t i, ip, jp, k, p, ms, p0, mem, mem0; 70 | size_t byteset[32 / sizeof(size_t)] = { 0 }; 71 | size_t shift[256]; 72 | 73 | /* Computing length of needle and fill shift table */ 74 | for (i=0; i n[jp+k]) { 86 | jp += k; 87 | k = 1; 88 | p = jp - ip; 89 | } else { 90 | ip = jp++; 91 | k = p = 1; 92 | } 93 | } 94 | ms = ip; 95 | p0 = p; 96 | 97 | /* And with the opposite comparison */ 98 | ip = -1; jp = 0; k = p = 1; 99 | while (jp+k ms+1) ms = ip; 115 | else p = p0; 116 | 117 | /* Periodic needle? */ 118 | if (memcmp(n, n+p, ms+1)) { 119 | mem0 = 0; 120 | p = MAX(ms, l-ms-1) + 1; 121 | } else mem0 = l-p; 122 | mem = 0; 123 | 124 | /* Search loop */ 125 | for (;;) { 126 | /* If remainder of haystack is shorter than needle, done */ 127 | if (z-h < l) return 0; 128 | 129 | /* Check last byte first; advance by shift on mismatch */ 130 | if (BITOP(byteset, h[l-1], &)) { 131 | k = l-shift[h[l-1]]; 132 | if (k) { 133 | if (mem0 && mem && k < p) k = l-p; 134 | h += k; 135 | mem = 0; 136 | continue; 137 | } 138 | } else { 139 | h += l; 140 | mem = 0; 141 | continue; 142 | } 143 | 144 | /* Compare right half */ 145 | for (k=MAX(ms+1,mem); kmem && n[k-1] == h[k-1]; k--); 153 | if (k <= mem) return (char *)h; 154 | h += p; 155 | mem = mem0; 156 | } 157 | } 158 | 159 | void * fallback_memmem(const void *h0, size_t k, const void *n0, size_t l) 160 | { 161 | const unsigned char *h = h0, *n = n0; 162 | 163 | /* Return immediately on empty needle */ 164 | if (!l) return (void *)h; 165 | 166 | /* Return immediately when needle is longer than haystack */ 167 | if (k