├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── NEWS ├── R ├── azureSchema.R ├── azureml-defunct.R ├── azureml-package.R ├── config.R ├── consume.R ├── datasets.R ├── deleteWebservice.R ├── discover.R ├── fetch.R ├── getsyms.R ├── internal.R ├── methods.R ├── publish.R ├── services.R ├── workspace.R ├── wrapper.R ├── zzz.R └── zzz_test_helpers.R ├── README.md ├── inst ├── COPYRIGHTS ├── doc │ ├── getting_started.R │ ├── getting_started.Rmd │ └── getting_started.html └── examples │ ├── example_datasets.R │ ├── example_download.R │ ├── example_experiments.R │ ├── example_publish.R │ └── example_upload.R ├── man ├── AzureML-deprecated.Rd ├── AzureML-package.Rd ├── consume.Rd ├── datasets.Rd ├── delete.datasets.Rd ├── deleteWebService.Rd ├── discoverSchema.Rd ├── download.datasets.Rd ├── download.intermediate.dataset.Rd ├── endpointHelp.Rd ├── endpoints.Rd ├── experiments.Rd ├── figures │ ├── authorizationToken.pdf │ ├── authorization_token.png │ ├── workspaceId.pdf │ └── workspace_id.png ├── getDetailsFromUrl.Rd ├── is.Dataset.Rd ├── is.Endpoint.Rd ├── is.Service.Rd ├── is.Workspace.Rd ├── publishWebService.Rd ├── read.AzureML.config.Rd ├── refresh.Rd ├── services.Rd ├── upload.dataset.Rd └── workspace.Rd ├── tests ├── testthat.R └── testthat │ ├── .gitignore │ ├── test-1-workspace-no-config.R │ ├── test-1-workspace.R │ ├── test-2-datasets-upload-download-delete.R │ ├── test-2-download-each-dataset-type.R │ ├── test-2-multiple-dataset-download.R │ ├── test-3-experiments-download.R │ ├── test-5-try_fetch.R │ ├── test-6-publish.R │ ├── test-7-discover-schema.R │ ├── test-7-discover.R │ └── test-99-final.R └── vignettes ├── Readme.md ├── authToken.png ├── getting_started.Rmd ├── getting_started.Rmd-original └── workspaceID.png /.Rbuildignore: -------------------------------------------------------------------------------- 1 | Readme.* 2 | ^.*\.Rproj$ 3 | ^\.Rproj\.user$ 4 | .travis.yml 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | *.RData 4 | *.Rproj 5 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: AzureML 2 | Type: Package 3 | Title: Interface with Azure Machine Learning Datasets, Experiments and Web Services 4 | Description: Functions and datasets to support Azure Machine Learning. This 5 | allows you to interact with datasets, as well as publish and consume R functions 6 | as API services. 7 | Version: 0.2.15 8 | Date: 2019-07-15 9 | Authors@R: c( 10 | person("Hong", "Ooi", role="cre", email="hongooi@microsoft.com"), 11 | person("Rich", "Calaway", role="ctb"), 12 | person("Andrie", "de Vries", role=c("aut"), email="apdevries@gmail.com"), 13 | person(family="Microsoft Corporation", role="cph"), 14 | person(family="Revolution Analytics", role="cph", comment="Code adapted from the foreach package") 15 | ) 16 | Copyright: COPYRIGHTS 17 | License: MIT + file LICENSE 18 | URL: https://github.com/RevolutionAnalytics/AzureML 19 | BugReports: https://github.com/RevolutionAnalytics/AzureML/issues 20 | LazyData: TRUE 21 | VignetteBuilder: knitr 22 | SystemRequirements: Requires external zip utility, available in path. On 23 | windows, it's sufficient to install RTools. 24 | Imports: 25 | jsonlite(>= 0.9.16), 26 | curl(>= 0.8), 27 | foreign, 28 | codetools, 29 | base64enc, 30 | miniCRAN, 31 | uuid 32 | Suggests: 33 | testthat, 34 | knitr, 35 | rmarkdown, 36 | lme4, 37 | gbm, 38 | MASS, 39 | mockery 40 | RoxygenNote: 6.0.1 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2015-2016 2 | COPYRIGHT HOLDER: Microsoft Corporation -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(print,Datasets) 4 | S3method(print,Experiments) 5 | S3method(print,Workspace) 6 | S3method(print,discoverSchema) 7 | S3method(str,Workspace) 8 | export(consume) 9 | export(datasets) 10 | export(delete.datasets) 11 | export(deleteWebService) 12 | export(discoverSchema) 13 | export(download.datasets) 14 | export(download.intermediate.dataset) 15 | export(endpointHelp) 16 | export(endpoints) 17 | export(experiments) 18 | export(getEndpoints) 19 | export(getWebServices) 20 | export(is.Dataset) 21 | export(is.Endpoint) 22 | export(is.Service) 23 | export(is.Workspace) 24 | export(publishWebService) 25 | export(read.AzureML.config) 26 | export(refresh) 27 | export(services) 28 | export(updateWebService) 29 | export(upload.dataset) 30 | export(workspace) 31 | export(write.AzureML.config) 32 | import(codetools) 33 | importFrom(base64enc,base64encode) 34 | importFrom(curl,curl) 35 | importFrom(curl,curl_escape) 36 | importFrom(curl,curl_fetch_memory) 37 | importFrom(curl,handle_reset) 38 | importFrom(curl,handle_setheaders) 39 | importFrom(curl,handle_setopt) 40 | importFrom(curl,new_handle) 41 | importFrom(foreign,read.arff) 42 | importFrom(jsonlite,fromJSON) 43 | importFrom(jsonlite,toJSON) 44 | importFrom(miniCRAN,makeRepo) 45 | importFrom(miniCRAN,pkgDep) 46 | importFrom(stats,runif) 47 | importFrom(stats,setNames) 48 | importFrom(utils,capture.output) 49 | importFrom(utils,head) 50 | importFrom(utils,read.table) 51 | importFrom(utils,str) 52 | importFrom(utils,write.table) 53 | importFrom(utils,zip) 54 | importFrom(uuid,UUIDgenerate) 55 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | AzureML v0.2.13 2 | --------------- 3 | 4 | Fixes: 5 | 6 | * Fixes a bug that lead to a memory leak on the AzureML web service during consume(). Load exportenv only once (during first call) # 117 7 | 8 | 9 | AzureML v0.2.12 10 | --------------- 11 | 12 | This version was released to CRAN on 2017-07-12 13 | 14 | Fixes: 15 | 16 | * Upload packages from a local repository using `publishWebservice()` #109 17 | 18 | Enhancements 19 | 20 | * Produce more informative error messages from consume() (#57) 21 | * Better documentation and examples for endpoint settings, especially for regional AML instances (#105) 22 | 23 | This version also contains many other internal improvements that probably won't be visible to most users 24 | 25 | 26 | AzureML v0.2.11 Bug fix and refactor release 27 | --------------- 28 | 29 | This release fixes multiple internal issues: 30 | 31 | * Add additional skip logic to skip tests on CRAN and if no Internet connection tests (#114) 32 | * Fix unit tests and code for download.datasets() to deal with multiple datasets bug tests (#111) 33 | * Upload packages from a local repository using `publishWebservice()` enhancement (#109) 34 | * Missing workspace parameter on download.datasets() leads to cryptic error message bug (#93) 35 | * Fix bug where example for download.datasets() doesn't work (#104) 36 | 37 | 38 | 39 | -------------------------------------------------------------------------------- /R/azureSchema.R: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016 Microsoft Corporation 2 | # All rights reserved. 3 | # 4 | # The MIT License (MIT) 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | 24 | # Convert input schema to API expected format. 25 | # 26 | # Helper function to convert the user-friendly input and output schema parameters to the publishWebService() function to the format expected by the API. 27 | # 28 | # @param argList list of expected input parameters in the format expected by \code{\link{publishWebService}} 29 | # @return list of the format expected by the API 30 | # 31 | # @keywords internal 32 | convertArgsToAMLschema <- function(argList) { 33 | form <- list() 34 | for (arg in names(argList)) { 35 | type <- argList[[arg]] 36 | 37 | form[[ arg ]] <- if (type %in% c("numeric", "double")) { 38 | list("type"="number", "format"="double") 39 | } else if (type %in% c("date-time", "time-span")){ 40 | list("type"="string", "format"=type) 41 | } else if (type %in% c("uint16", "int16", "uint32", "int32", "uint64", "int64")) { 42 | list("type"="integer", "format"=type) 43 | } else if (type %in% c("integer")) { 44 | list("type"="integer", "format"="int32") 45 | } else if (type %in% c("logical", "bool", "boolean")) { 46 | list("type"="boolean") 47 | } else if (type %in% c("character", "string", "factor", "ordered")) { 48 | list("type"="string", "format"="string") 49 | } else { 50 | stop(sprintf("Error: data type \"%s\" not supported", type), call. = TRUE) 51 | } 52 | } 53 | return(form) 54 | } 55 | 56 | testAllowedTypes <- function(x){ 57 | allowedTypes <- c("numeric", "double", 58 | "date-time", "time-span", 59 | "uint16", "int16", "uint32", "int32", "uint64", "int64", "integer", 60 | "logical", "bool", "boolean", 61 | "character", "string", "factor", "ordered") 62 | all(sapply(x, function(x)x %in% allowedTypes)) 63 | } 64 | 65 | 66 | inputSchemaIsDataframe <- function(x){ 67 | inherits(x, "data.frame") || "data.frame" %in% attr(x, "original.class") 68 | } 69 | 70 | 71 | # Convert input schema to API expected format. 72 | # 73 | # Helper function to convert the user-friendly input and output schema parameters to the publishWebService() function to the format expected by the API. 74 | # 75 | # @param argList list of expected input parameters in the format expected by \code{\link{publishWebService}} 76 | # @return list of the format expected by the API 77 | # 78 | # @keywords internal 79 | # @examples 80 | # azureSchema(list(x=3)) 81 | # azureSchema(iris) 82 | # azureSchema(list(input1 = iris, input2 = cars)) 83 | azureSchema <- function(object){ 84 | object_name <- substitute(object) 85 | if(inherits(object, "azureSchema")) return(object) 86 | 87 | # Scalar values already in azure schema format, probably hand coded 88 | if(all(sapply(object, typeof) != "list") && (testAllowedTypes(object))){ 89 | z <- convertArgsToAMLschema(object) 90 | class(z) <- c("azureSchema", class(z)) 91 | attr(z, "original.class") <- class(object) 92 | return(z) 93 | } 94 | 95 | # Scalar values, most likely a data frame 96 | if(all(sapply(object, typeof) != "list") && (!testAllowedTypes(object))){ 97 | message(paste0("converting `", object_name, "` to data frame")) 98 | # object <- list(as.data.frame(object)) 99 | object <- as.data.frame(object) 100 | } 101 | 102 | if(!is.list(object)) stop("object must be a list") 103 | if(is.data.frame(object)) { 104 | z <- convertArgsToAMLschema(lapply(object, class)) 105 | class(z) <- c("azureSchema", class(z)) 106 | attr(z, "original.class") <- class(object) 107 | return(z) 108 | } 109 | z <- rapply(object, class, how = "replace") 110 | idx <- sapply(object, is.list) 111 | if(length(idx) > 0){ 112 | z[idx] <- lapply(object[idx], function(x)convertArgsToAMLschema(lapply(x, class))) 113 | } 114 | if(length(!idx) > 0){ 115 | z[!idx] <- convertArgsToAMLschema(lapply(object[!idx], class)) 116 | } 117 | class(z) <- c("azureSchema", class(z)) 118 | attr(z, "original.class") <- class(object) 119 | z 120 | } 121 | 122 | print.azureSchema <- function(x, ...){ 123 | str(x) 124 | } -------------------------------------------------------------------------------- /R/azureml-defunct.R: -------------------------------------------------------------------------------- 1 | #' Deprecated functions 2 | #' 3 | #' @rdname AzureML-deprecated 4 | consumeDataframe <- function(){ 5 | .Defunct("consume") 6 | } 7 | 8 | #' @rdname AzureML-deprecated 9 | consumeFile <- function(){ 10 | .Defunct("consume") 11 | } 12 | 13 | #' @rdname AzureML-deprecated 14 | consumeLists <- function(){ 15 | .Defunct("consume") 16 | } 17 | 18 | #' @rdname AzureML-deprecated 19 | getEPDetails <- function(){ 20 | .Defunct("endpoints") 21 | } 22 | 23 | #' @rdname AzureML-deprecated 24 | getWSDetails <- function(){ 25 | .Defunct("services") 26 | } 27 | -------------------------------------------------------------------------------- /R/azureml-package.R: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016 Microsoft Corporation 2 | # All rights reserved. 3 | # 4 | # The MIT License (MIT) 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | 24 | 25 | #' Interface to Azure ML Studio datasets and experiments. 26 | #' 27 | #' Allows you to work with Azure ML Studio datasets and experiments directly from R. 28 | #' 29 | #' @section Summary of functions: 30 | #' 31 | #' 1. Create a reference to an Azure ML workspace 32 | #' 33 | #' \itemize{ 34 | #' \item Workspace: \code{\link{workspace}} 35 | #' } 36 | #' 37 | #' 2. Datasets 38 | #' 39 | #' \itemize{ 40 | #' \item List available datasets: \code{\link{datasets}} 41 | #' \item Download datasets: \code{\link{download.datasets}} 42 | #' \item Upload a dataset: \code{\link{upload.dataset}} 43 | #' \item Delete datasets: \code{\link{delete.datasets}} 44 | #' } 45 | #' 46 | #' 3. Experiments 47 | #' 48 | #' \itemize{ 49 | #' \item Get experiments: \code{\link{experiments}} 50 | #' \item Get data from an experiment port: \code{\link{download.intermediate.dataset}} 51 | #' } 52 | #' 53 | #' 4. Web Services 54 | #' 55 | #' \itemize{ 56 | #' \item List available services: \code{\link{services}} 57 | #' \item Consume a web service (run data through it and retrieve result): \code{\link{consume}} 58 | #' \item Publish an R function as a web service: \code{\link{publishWebService}} 59 | #' \item Update an existing web service: \code{\link{updateWebService}} 60 | #' \item List web service endpoints: \code{\link{endpoints}} 61 | #' } 62 | #' 63 | #' 5. Configure a settings file with your AzureML secrets 64 | #' 65 | #' The \code{\link{workspace}} function optionally reads your AzureML credentials from a settings file located at \code{~/.azureml/settings.json}. You can read and write this file using: 66 | #' 67 | #' \itemize{ 68 | #' \item Write: \code{\link{write.AzureML.config}} 69 | #' \item Read: \code{\link{read.AzureML.config}} 70 | #' } 71 | #' 72 | #' @name AzureML-package 73 | #' @aliases AzureML 74 | #' @docType package 75 | #' @keywords package 76 | #' 77 | #' @importFrom stats runif setNames 78 | #' @importFrom utils capture.output head read.table str write.table zip 79 | NULL 80 | 81 | -------------------------------------------------------------------------------- /R/config.R: -------------------------------------------------------------------------------- 1 | validate.AzureML.config <- function(config = getOption("AzureML.config"), stopOnError = FALSE){ 2 | # Stop if the config file is missing 3 | if(!file.exists(config)) { 4 | msg <- sprintf("config file is missing: '%s'", config) 5 | if(stopOnError) 6 | stop(msg, call. = FALSE) 7 | else 8 | return(simpleError(msg)) 9 | } 10 | 11 | # Stop if the config is a directory, not a file 12 | if(file.info(config)$isdir){ 13 | msg <- paste( 14 | "The config argument should point to a file.", 15 | sprintf(" You provided a directory (%s)", 16 | normalizePath(config, winslash = "/", mustWork = FALSE) 17 | ), sep = "\n" 18 | ) 19 | if(stopOnError) 20 | stop(msg, call. = FALSE) 21 | else 22 | return(simpleError(msg)) 23 | } 24 | TRUE 25 | } 26 | 27 | #' Reads settings from configuration file in JSON format. 28 | #' 29 | #' @inheritParams workspace 30 | #' 31 | #' @export 32 | #' @seealso write.AzureML.config 33 | #' @seealso workspace 34 | read.AzureML.config <- function(config = getOption("AzureML.config")){ 35 | z <- tryCatch(fromJSON(file(config)), 36 | error = function(e)e 37 | ) 38 | # Error check the settings file for invalid JSON 39 | if(inherits(z, "error")) { 40 | msg <- sprintf("Your config file contains invalid json", config) 41 | msg <- paste(msg, z$message, sep = "\n\n") 42 | stop(msg, call. = FALSE) 43 | } 44 | z 45 | } 46 | 47 | #' Writes settings to configuration file. 48 | #' 49 | #' @inheritParams workspace 50 | #' @param file either a character string naming a file or a connection open for writing. "" indicates output to the console. 51 | #' 52 | #' @rdname read.AzureML.config 53 | #' 54 | #' @export 55 | #' @seealso write.AzureML.config 56 | #' @seealso workspace 57 | write.AzureML.config <- function(id = NULL, auth = NULL, 58 | api_endpoint = NULL, 59 | management_endpoint = NULL, 60 | file = ""){ 61 | # Construct list 62 | x <- list( 63 | id = id, 64 | authorization_token = auth, 65 | api_endpoint = api_endpoint, 66 | management_endpoint = management_endpoint 67 | ) 68 | # Remove null values 69 | conf <- list( 70 | workspace = x[!sapply(x, is.null)] 71 | ) 72 | # Convert to JSON 73 | js <- jsonlite::toJSON(conf, pretty = TRUE) 74 | if(!missing(file) && !is.null(file)) { 75 | writeLines(js, con = file) 76 | } else { 77 | js 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /R/consume.R: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016 Microsoft Corporation 2 | # All rights reserved. 3 | # 4 | # The MIT License (MIT) 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | 24 | 25 | #' Use a web service to score data in list (key=value) format. 26 | #' 27 | #' Score data represented as lists where each list key represents a parameter of the web service. 28 | #' 29 | #' @export 30 | #' 31 | #' @inheritParams refresh 32 | #' @inheritParams publishWebService 33 | #' @param endpoint Either an AzureML web service endpoint returned by \code{\link{publishWebService}}, \code{\link{endpoints}}, or simply an AzureML web service from \code{\link{services}}; in the latter case the default endpoint for the service will be used. 34 | #' @param ... variable number of requests entered as lists in key-value format; optionally a single data frame argument. 35 | #' @param globalParam global parameters entered as a list, default value is an empty list 36 | #' @param retryDelay the time in seconds to delay before retrying in case of a server error 37 | #' @param output name of the output port to return usually 'output1' or 'output2'; set to NULL to return everything as raw results in JSON-encoded list form 38 | #' 39 | #' @return data frame containing results returned from web service call 40 | #' 41 | #' @note Set \code{...} to a list of key/value pairs corresponding to web service inputs. Optionally, set \code{...} to a single data frame with columns corresponding to web service variables. The data frame approach returns output from the evaluation of each row of the data frame (see the examples). 42 | #' 43 | #' @seealso \code{\link{publishWebService}} \code{\link{endpoints}} \code{\link{services}} \code{\link{workspace}} 44 | #' @family consumption functions 45 | #' @importFrom jsonlite fromJSON 46 | #' @example inst/examples/example_publish.R 47 | consume <- function(endpoint, ..., globalParam, retryDelay = 10, output = "output1", .retry = 5) 48 | { 49 | if(is.Service(endpoint)) 50 | { 51 | if(nrow(endpoint) > 1) endpoint = endpoint[1, ] 52 | default <- endpoint$DefaultEndpointName 53 | endpoint <- endpoints(attr(endpoint, "workspace"), endpoint) 54 | endpoint <- subset(endpoint, Name = default) 55 | } 56 | 57 | if(!is.Endpoint(endpoint)) { 58 | stop("Invalid endpoint. Use publishWebservice() or endpoints() to create or obtain a service endpoint.") 59 | } 60 | 61 | apiKey <- endpoint$PrimaryKey 62 | requestUrl <- endpoint$ApiLocation 63 | 64 | if(missing(globalParam)) { 65 | globalParam <- setNames(list(), character(0)) 66 | } 67 | # Store variable number of lists entered as a list of lists 68 | requestsLists <- list(...) 69 | if(length(requestsLists) == 1 && is.data.frame(requestsLists[[1]])) { 70 | requestsLists <- requestsLists[[1]] 71 | } else { 72 | if(!is.list(requestsLists[[1]])) { 73 | requestsLists <- list(requestsLists) 74 | } 75 | } 76 | # Make API call with parameters 77 | result <- callAPI(apiKey, requestUrl, requestsLists, globalParam, retryDelay, .retry = .retry) 78 | if(inherits(result, "error")) stop("AzureML returned an error code") 79 | 80 | # Access output by converting from JSON into list and indexing into Results 81 | if(!is.null(output) && output == "output1") { 82 | help <- endpointHelp(endpoint)$definitions$output1Item 83 | ans <- data.frame(result$Results$output1) 84 | nums <- which("number" == unlist(help)[grepl("\\.type$", names(unlist(help)))]) 85 | logi <- which("boolean" == unlist(help)[grepl("\\.type$", names(unlist(help)))]) 86 | if(length(nums) > 0) for(j in nums) ans[,j] <- as.numeric(ans[,j]) 87 | if(length(logi) > 0) for(j in logi) ans[,j] <- as.logical(ans[,j]) 88 | return(ans) 89 | } 90 | if(!is.null(output) && output == "output2") { 91 | return(fromJSON(result$Results$output2[[1]])) 92 | } 93 | result$Results 94 | } 95 | 96 | 97 | 98 | # Framework for making an Azure ML web service API call. 99 | # 100 | # Helper function that constructs and send the API call to a Microsoft Azure 101 | # Machine Learning web service, then receives and returns the response in JSON format. 102 | # 103 | # @param apiKey primary API key 104 | # @param requestUrl API URL 105 | # @param keyvalues the data to be passed to the web service 106 | # @param globalParam the global parameters for the web service 107 | # @param retryDelay number of seconds to wait after failing (max 3 tries) to try again 108 | # @param .retry the number of retry attempts 109 | # @return result the response 110 | # 111 | # @importFrom jsonlite toJSON 112 | # @importFrom curl handle_setheaders new_handle handle_setopt curl_fetch_memory 113 | # @keywords internal 114 | callAPI <- function(apiKey, requestUrl, keyvalues, globalParam, 115 | retryDelay=10, .retry = 5) { 116 | # Set number of tries and HTTP status to 0 117 | result <- NULL 118 | # Construct request payload 119 | req <- list( 120 | Inputs = list(input1 = keyvalues), 121 | GlobalParameters = globalParam 122 | ) 123 | # message(toJSON(req, auto_unbox = TRUE, digits = 16, pretty = TRUE)) 124 | body <- charToRaw(paste( 125 | toJSON(req, auto_unbox = TRUE, digits = 16), 126 | collapse = "\n") 127 | ) 128 | h <- new_handle() 129 | headers <- list(`User-Agent` = "R", 130 | `Content-Type` = "application/json", 131 | `Authorization` = paste0("Bearer ", apiKey)) 132 | handle_setheaders(h, .list = headers) 133 | handle_setopt(h, 134 | .list = list( 135 | post = TRUE, 136 | postfieldsize = length(body), 137 | postfields = body 138 | ) 139 | ) 140 | r <- try_fetch(requestUrl, h, no_retry_on = 400, delay = retryDelay, .retry = .retry) 141 | result <- fromJSON(rawToChar(r$content)) 142 | if(r$status_code >= 400) { 143 | stop(paste(capture.output(result), collapse="\n")) 144 | } 145 | return(result) 146 | } 147 | -------------------------------------------------------------------------------- /R/datasets.R: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016 Microsoft Corporation 2 | # All rights reserved. 3 | # 4 | # The MIT License (MIT) 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | # XXX make this an S3 method for the "Datasets" class? 24 | #' Download one or more datasets from an AzureML workspace. 25 | #' 26 | #' Download one or more datasets from an AzureML workspace into local R data frame or raw binary objects. 27 | #' @param dataset Either one or more rows from a \code{\link{datasets}} data frame in a workspace, 28 | #' or just a workspace from \code{\link{workspace}}. When \code{source} is a workspace, then 29 | #' the \code{name} parameter must also be specified. 30 | #' @param name Optional character vector of one or more dataset names to filter the \code{datasets} 31 | #' parameter list by. 32 | #' @param ... Optional arguments to pass to \code{read.table} for CSV or TSV DataTypeIds or to 33 | #' \code{readBin} for the ZIP DataTypeId. For example, 34 | #' specify \code{stringsAsFactors=TRUE} if you wish, or any other valid argument to \code{read.table}. 35 | #' @return If one dataset is specified (that is, one row from a workspace \code{datasets} data frame), 36 | #' then a single data frame is returned. 37 | #' If more than one dataset is specified (more than one row), then a list of data frames is returned. 38 | #' @note TSV- and CSV-formatted datasets return data frame results with \code{stringsAsFactors=FALSE} 39 | #' by default (independently of the global \code{stringsAsFactors} option). 40 | #' 41 | #' This function can download datasets with various CSV and TSV "DataTypeIds", or "DataTypeId" 42 | #' of "ARFF", "PlainText" or "ZIP". Other "DataTypeIds" return an error. See the AzureML Data Format 43 | #' Conversion modules to convert data to a supported format. Data with DataTypeId "ZIP" are returned 44 | #' in a raw binary R vector, which could then be passed through \code{unzip}, for example. 45 | #' @seealso \code{\link{workspace}}, \code{\link{datasets}}, \code{\link{read.table}}, 46 | #' \code{\link{download.intermediate.dataset}} 47 | #' @export 48 | #' @example inst/examples/example_download.R 49 | 50 | download.datasets <- function(dataset, name, ...) 51 | { 52 | 53 | validateAllNamesInWorkspace <- function(nm, ds){ 54 | z <- nm %in% ds$Name 55 | if(!all(z)) { 56 | nomatch <- nm[!z] 57 | msg <- sprintf("Datasets not found in workspace: %s", paste(nomatch, collapse = ", ")) 58 | stop(msg) 59 | } 60 | TRUE 61 | } 62 | 63 | downloadDatasets <- function(){ 64 | ans = lapply(1:nrow(datasets), 65 | function(j) get_dataset(datasets[j,], ...) 66 | ) 67 | if(length(ans)==1) return(ans[[1]]) 68 | names(ans) = datasets$Name 69 | ans 70 | } 71 | 72 | 73 | # *** Cases 1-3 involve both dataset and name arguments present. Case 4 is where there 74 | # is only a single argument (dataset) provided in the function call. *** 75 | # Case 1: 1st arg (dataset) is ws, 2nd arg (name) is character vector 76 | # Case 2: 1st arg is a Datasets object, and 2nd arg (name) is character vector 77 | # Case 3: 1st arg (dataset) is ws, 2nd arg (name) is a Datasets object 78 | # Case 4: arg is a Datasets object (subset of datasets(ws)) 79 | 80 | # Note: name is expected to be a vector of character strings 81 | if(missing(dataset)) { 82 | msg <- "Specify at least a dataset argument: see help file for `download.datasets`" 83 | stop(msg) 84 | } 85 | 86 | if(!missing(name) && is.Dataset(name)){ 87 | datasets <- name 88 | return(downloadDatasets()) 89 | } 90 | 91 | if(is.Workspace(dataset)){ 92 | ws <- dataset # make it clear it is a workspace 93 | datasets = datasets(ws) 94 | if(missing(name)){ 95 | msg <- "Specify the dataset names to download." 96 | stop(msg) 97 | } 98 | validateAllNamesInWorkspace(name, datasets) 99 | # Coerce to data frame, if for example presented as a list. 100 | datasets <- datasets[match(name, datasets$Name), ] 101 | } 102 | 103 | if(is.Dataset(dataset)){ 104 | datasets <- dataset 105 | if(!missing(name) && is.character(name)) { 106 | datasets <- datasets[match(name, datasets$Name), ] 107 | } else { 108 | datasets <- dataset 109 | } 110 | } 111 | 112 | if(!is.Workspace(dataset) && !is.Dataset(dataset)) { 113 | msg <- paste("You specified a dataset name that is not in the workspace.", 114 | "See help file for `download.datasets`") 115 | stop(msg) 116 | } 117 | 118 | downloadDatasets() 119 | 120 | } 121 | 122 | #' Download a dataset from an AzureML experiment module. 123 | #' 124 | #' Allows you to download the data from certain types of modules in AzureML experiments. You can generate the information required from AzureML Studio by (right) clicking on a module output port and selecting the option "Generate Data Access Code...". 125 | #' 126 | #' @inheritParams refresh 127 | #' 128 | #' @param experiment AzureML experiment ID. 129 | #' @param node_id Experiment node ID. 130 | #' @param port_name Experiment port name. The default is "Results dataset". 131 | #' @param data_type_id Experiment data type id. The default is "GenericCSV". See the note below for other types. 132 | #' @param ... Optional arguments to pass to \code{read.table} for CSV or TSV DataTypeIds. For example, specify \code{stringsAsFactors=TRUE} if you wish, or any other valid argument to \code{read.table}. 133 | #' 134 | #' @return In most cases a data frame. Exceptions are: a raw vector for \code{DataTypeId="Zip"} and character vector for \code{DataTypeId="PlainText"} 135 | #' 136 | #' @note TSV- and CSV-formatted datasets return data frame results with \code{stringsAsFactors=FALSE} by default (independently of the global \code{stringsAsFactors} option). 137 | #' 138 | #' \bold{Supported DataTypeId options} 139 | #' 140 | #' 141 | #' This function can download datasets with various CSV and TSV \code{DataTypeId} (with or without headers), in addition to "ARFF", "PlainText" and "Zip". Other "DataTypeIds" return an error. See the AzureML Data Format Conversion modules to convert data to a supported format. 142 | #' 143 | #' @seealso \code{\link{workspace}}, \code{\link{datasets}}, \code{\link[utils]{read.table}} and \code{\link{download.datasets}} 144 | #' 145 | #' @importFrom curl curl_escape new_handle handle_setheaders 146 | #' @importFrom jsonlite toJSON 147 | #' 148 | #' @export 149 | #' @family dataset functions 150 | #' @family experiment functions 151 | download.intermediate.dataset <- function(ws, experiment, node_id, 152 | port_name = "Results dataset", 153 | data_type_id = "GenericCSV", ...) 154 | { 155 | url = sprintf("%s/workspaces/%s/experiments/%s/outputdata/%s/%s", 156 | ws$.studioapi, curl_escape(ws$id), 157 | curl_escape(experiment), curl_escape(node_id), 158 | curl_escape(port_name)) 159 | h = new_handle() 160 | handle_setheaders(h, .list=ws$.headers) 161 | get_dataset(list(DataTypeId = data_type_id, DownloadLocation = url), h, ...) 162 | } 163 | 164 | 165 | #' Upload an R data frame to an AzureML workspace. 166 | #' 167 | #' Upload any R data frame to an AzureML workspace using the \code{GenericTSV} format. 168 | #' 169 | #' @inheritParams refresh 170 | #' @param x An R data frame object 171 | #' @param name A character name for the new AzureML dataset (may not match an existing dataset name) 172 | #' @param description An optional character description of the dataset 173 | #' @param family_id An optional AzureML family identifier 174 | #' @param ... Optional additional options passed to \code{write.table} 175 | #' @note The additional \code{\link[utils]{write.table}} options may not include \code{sep} or \code{row.names} or \code{file}, but any other options are accepted. 176 | #' The AzureML API does not support uploads for _replacing_ datasets with new data by re-using a name. If you need to do this, first delete the dataset from the AzureML Studio interface, then upload a new version. 177 | #' 178 | #' @return A single-row data frame of "Datasets" class that corresponds to the uploaded object now available in ws$datasets. 179 | #' @importFrom curl curl_escape new_handle handle_setheaders handle_reset handle_setopt curl_fetch_memory 180 | #' @importFrom jsonlite fromJSON 181 | #' @export 182 | #' @family dataset functions 183 | #' @example inst/examples/example_upload.R 184 | upload.dataset <- function(x, ws, name, description = "", family_id="", ...) 185 | { 186 | stopIfNotWorkspace(ws) 187 | if(name %in% datasets(ws)$Name) { 188 | msg <- sprintf("A dataset with the name '%s' already exists in AzureML", name) 189 | stop(msg) 190 | } 191 | # Uploading data to AzureML is a two-step process. 192 | # 1. Upload raw data, retrieving an ID. 193 | # 2. Construct a DataSource metadata JSON object describing the data and 194 | # upload that. 195 | 196 | # Step 1 197 | tsv = capture.output(write.table(x, file = "", sep = "\t", row.names = FALSE, ...)) 198 | url = sprintf("%s/resourceuploads/workspaces/%s/?userStorage=true&dataTypeId=GenericTSV", 199 | ws$.studioapi, curl_escape(ws$id)) 200 | h = new_handle() 201 | hdr = ws$.headers 202 | hdr["Content-Type"] = "text/plain" 203 | handle_setheaders(h, .list=hdr) 204 | body = charToRaw(paste(tsv, collapse="\n")) 205 | handle_setopt(h, post=TRUE, postfieldsize=length(body), postfields=body) 206 | step1 = try_fetch(url, handle=h) 207 | if(step1$status_code != 200) stop("HTTP ", step1$status_code, rawToChar(step1$content)) 208 | # Parse the response 209 | step1 = fromJSON(rawToChar(step1$content)) 210 | 211 | # Step 2 212 | metadata = toJSON( 213 | list( 214 | DataSource = 215 | list( 216 | Name = name, 217 | DataTypeId = "GenericTSV", 218 | Description = description, 219 | FamilyId = family_id, 220 | Owner = "R", 221 | SourceOrigin = "FromResourceUpload"), 222 | UploadId = step1$Id, # From Step 1 223 | UploadedFromFileName = "", 224 | ClientPoll = TRUE), auto_unbox=TRUE) 225 | 226 | url = sprintf("%s/workspaces/%s/datasources", 227 | ws$.studioapi, curl_escape(ws$id)) 228 | handle_reset(h) # Preserves connection, cookies 229 | handle_setheaders(h, .list=ws$.headers) 230 | body = charToRaw(paste(metadata, collapse="\n")) 231 | handle_setopt(h, post=TRUE, postfieldsize=length(body), postfields=body) 232 | step2 = try_fetch(url, handle=h) 233 | if(step2$status_code != 200) stop("HTTP ", step2$status_code, " ", rawToChar(step2$content)) 234 | id = gsub("\\\"","",rawToChar(step2$content)) 235 | 236 | # Success, refresh datasets 237 | refresh(ws, "datasets") 238 | 239 | # Return the row of ws$datasets corresponding to the uploaded data 240 | ws$datasets[ws$datasets$Id == id, ] 241 | } 242 | 243 | 244 | 245 | #' Delete datasets from an AzureML workspace. 246 | #' 247 | #' @inheritParams refresh 248 | #' @param name Either one or more \code{Dataset} objects (rows from the workspace \code{datasets} data frame), or a character vector of dataset names to delete. 249 | #' @param host AzureML delete service endpoint 250 | #' @return A data frame with columns Name, Deleted, status_code indicating the HTTP status code and success/failure result of the delete operation for each dataset. 251 | #' @family dataset functions 252 | #' @export 253 | delete.datasets <- function(ws, name, host){ 254 | stopIfNotWorkspace(ws) 255 | # https://studioapi.azureml.net/api/workspaces//datasources/family/ HTTP/1.1 256 | datasets <- name 257 | refresh(ws, "datasets") 258 | if(!inherits(datasets, "Datasets")){ 259 | datasets <- datasets(ws) 260 | datasets <- datasets[datasets$Name %in% name, ] 261 | } 262 | h <- new_handle() 263 | handle_setheaders(h, .list = ws$.headers) 264 | handle_setopt(h, customrequest = "DELETE") 265 | delete_one <- function(familyId){ 266 | uri <- sprintf("%s/workspaces/%s/datasources/family/%s", 267 | ws$.studioapi, 268 | curl_escape(ws$id), 269 | curl_escape(familyId) 270 | ) 271 | z <- try_fetch(uri, h, .retry = 3, delay = 2) 272 | z$status_code 273 | } 274 | status_code <- vapply(datasets$FamilyId, delete_one, FUN.VALUE = numeric(1), USE.NAMES = FALSE) 275 | ans = data.frame( 276 | Name = datasets$Name, 277 | Deleted = status_code < 300, 278 | status_code = status_code, 279 | stringsAsFactors = FALSE 280 | ) 281 | refresh(ws, "datasets") 282 | ans 283 | } 284 | -------------------------------------------------------------------------------- /R/deleteWebservice.R: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016 Microsoft Corporation 2 | # All rights reserved. 3 | # 4 | # The MIT License (MIT) 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | 24 | 25 | #' Delete a Microsoft Azure Web Service 26 | #' 27 | #' Delete a Microsoft Azure Machine Learning web service from your workspace. 28 | #' 29 | #' @export 30 | #' 31 | #' @inheritParams refresh 32 | #' @param name Either one row from the workspace \code{services} data.frame corresponding to a service to delete, or simply a service name character string. 33 | #' @param refresh Set to \code{FALSE} to supress automatic updating of the workspace list of services, 34 | #' useful when deleting many services in bulk. 35 | #' @note If more than one service matches the supplied \code{name}, the first listed service will be deleted. 36 | #' @return The updated data.frame of workspace services is invisibly returned. 37 | #' @seealso \code{\link{services}} \code{\link{publishWebService}} \code{\link{updateWebService}} 38 | #' @family publishing functions 39 | #' @example inst/examples/example_publish.R 40 | deleteWebService <- function(ws, name, refresh = TRUE) 41 | { 42 | #DELETE https://management.azureml.net/workspaces/{id}/webservices/{id}[/endpoints/{name}] 43 | 44 | stopIfNotWorkspace(ws) 45 | if(is.data.frame(name) || is.list(name)){ 46 | name = name$Id[1] 47 | } else { 48 | name = ws$services[ws$services$Name == name, "Id"][1] 49 | if(is.na(name)) stop("service not found") 50 | } 51 | h = new_handle() 52 | handle_setheaders(h, `Authorization`=sprintf("Bearer %s",ws$.auth), .list=ws$.headers) 53 | handle_setopt(h, customrequest="DELETE") 54 | uri = sprintf("%s/workspaces/%s/webservices/%s", 55 | ws$.management_endpoint, ws$id, name) 56 | s = curl_fetch_memory(uri, handle=h)$status_code 57 | if(s > 299) stop("HTTP error ",s) 58 | if(refresh) refresh(ws, "services") 59 | invisible(ws$services) 60 | } 61 | -------------------------------------------------------------------------------- /R/discover.R: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016 Microsoft Corporation 2 | # All rights reserved. 3 | # 4 | # The MIT License (MIT) 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | 24 | #' Helper function to extract information from a help page URL 25 | #' 26 | #' Given a Microsoft Azure Machine Learning web service endpoint, extracts the endpoint ID and the workspace ID 27 | #' 28 | #' @param url the URL of a help page 29 | #' @return a vector containing the workspace ID, webservices ID and endpoint ID 30 | #' 31 | #' @keywords internal 32 | getDetailsFromUrl <- function(url) { 33 | ptn = ".*?/workspaces/([[:alnum:]]*)/webservices/([[:alnum:]]*)/endpoints/([[:alnum:]]*)/*.*$" 34 | if(!grepl(ptn, url)) stop("Invalid url") 35 | c( 36 | gsub(ptn, "\\1", url), 37 | gsub(ptn, "\\2", url), 38 | gsub(ptn, "\\3", url) 39 | 40 | ) 41 | } 42 | 43 | 44 | #' Discover web service schema. 45 | #' 46 | #' Discover the expected input to a web service specified by a web service ID ng the workspace ID and web service ID, information specific to the consumption functions 47 | #' 48 | #' @param helpURL URL of the help page of the web service 49 | #' @param scheme the URI scheme 50 | #' @param host optional parameter that defaults to ussouthcentral.services.azureml.net 51 | #' @param api_version AzureML API version 52 | #' 53 | #' @return List containing the request URL of the webservice, column names of the data, sample input as well as the input schema 54 | #' 55 | #' @seealso \code{\link{publishWebService}} \code{\link{consume}} \code{\link{workspace}} \code{link{services}} \code{\link{endpoints}} \code{\link{endpointHelp}} 56 | #' 57 | #' @family discovery functions 58 | #' @export 59 | discoverSchema <- function(helpURL, scheme = "https", 60 | host = "ussouthcentral.services.azureml.net", 61 | api_version = "2.0") 62 | { 63 | workspaceId = getDetailsFromUrl(helpURL)[1] 64 | endpointId = getDetailsFromUrl(helpURL)[3] 65 | # Construct swagger document URL using parameters 66 | # Use paste method without separator 67 | uri = paste0(scheme,"://", host, 68 | "/workspaces/", workspaceId, 69 | "/services/", endpointId, 70 | "/swagger.json") 71 | 72 | # parses the content and gets the swagger document 73 | r <- try_fetch(uri, handle = new_handle(), retry_on = "404") 74 | swagger <- fromJSON(rawToChar(r$content)) 75 | 76 | # Accesses the input schema in the swagger document 77 | inputSchema <- swagger$definition$input1Item 78 | 79 | # Accesses the example in the swagger document and converts it to JSON 80 | exampleJson <- toJSON(swagger$definitions$ExecutionRequest$example) 81 | 82 | # Accesses a single specific JSON object and formats it to be a request inputted as a list in R 83 | inputExample <- as.list((fromJSON((exampleJson)))$Inputs$input1) 84 | idx <- sapply(inputExample, class, USE.NAMES = FALSE) == "character" 85 | inputExample[idx] <- "Please input valid String" 86 | 87 | # Accesses the names of the columns in the example 88 | # and stores it in a list of column names 89 | columnNames <- lapply(seq_along(inputExample), function(i)names(inputExample[i])) 90 | 91 | execPathNo <- grep("/execute\\?", names(swagger$paths)) 92 | if(is.numeric(execPathNo)) { 93 | executePath <- names(swagger$paths)[[execPathNo]] 94 | } else { 95 | "Path not found" 96 | } 97 | 98 | # Constructs the request URL with the parameters as well as execution path found. 99 | # The separator is set to an empty string 100 | requestUrl <- paste0(scheme,"://", host, 101 | "/workspaces/", workspaceId, 102 | "/services/", endpointId, 103 | executePath) 104 | 105 | # Access the HTTP method type e.g. GET/ POST and constructs an example request 106 | httpMethod <- toupper(names(swagger$paths[[2]])) 107 | httpRequest <- paste(httpMethod,requestUrl) 108 | 109 | # Warns user of characters and urges them to enter valid strings for them 110 | firstWarning = TRUE 111 | for(i in 1:length(inputExample)) { 112 | if(is.character(inputExample[[i]])) { 113 | if(firstWarning) { 114 | msg <- paste("The sample input does not contain sample values for characters.", 115 | "Please input valid strings for these fields:", 116 | sep = "\n") 117 | message(msg) 118 | } 119 | message(" - ", names(inputExample)[[i]]) 120 | firstWarning = FALSE 121 | } 122 | } 123 | 124 | #Returns what was discovered in the form of a list 125 | z <- list(requestUrl = requestUrl, 126 | columnNames = columnNames, 127 | sampleInput = inputExample, 128 | inputSchema = inputSchema 129 | ) 130 | class(z) <- "discoverSchema" 131 | z 132 | } 133 | 134 | #' @export 135 | print.discoverSchema <- function(x, ...){ 136 | str(x, ...) 137 | invisible() 138 | } 139 | -------------------------------------------------------------------------------- /R/fetch.R: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016 Microsoft Corporation 2 | # All rights reserved. 3 | # 4 | # The MIT License (MIT) 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | 24 | # Used in experiment date parsing 25 | date_origin = "1970-1-1" 26 | 27 | 28 | validate_response <- function(r){ 29 | if(r$status_code >= 400){ 30 | # Some functions return response in JSON format, others not 31 | body <- tryCatch(fromJSON(rawToChar(r$content)), error = function(e)e) 32 | response_is_json <- !inherits(body, "error") 33 | 34 | if(response_is_json){ 35 | # If response is JSON, then we have a list wiwh status code and error message 36 | msg <- paste( 37 | "AzureML returns error code:", 38 | sprintf("HTTP status code : %s", r$status_code), 39 | sprintf("AzureML error code : %s", body$error$code), 40 | "", 41 | body$error$message, 42 | body$error$details$message, 43 | sep = "\n" 44 | ) 45 | } else { 46 | # Response is plain text, with not list structure 47 | body <- rawToChar(r$content) 48 | msg <- switch( 49 | as.character(r$status_code), 50 | "400" = "400 (Bad request). Please check your workspace ID, auth and api_endpoint.", 51 | "401" = "401 (Unauthorised). Please check your workspace ID and auth codes.", 52 | "403" = "403 (Forbidden).", 53 | paste( 54 | "AzureML returns error code:", 55 | sprintf("HTTP status code : %s", r$status_code), 56 | sep = "\n" 57 | ) 58 | ) 59 | msg <- paste(msg, body, sep = "\n") 60 | } 61 | stop(msg, call. = FALSE) 62 | } 63 | } 64 | 65 | # Try to fetch a uri/handle, retrying on certain returned status codes after a timeout. 66 | # 67 | # @param uri the uri to fetch 68 | # @param handle a curl handle 69 | # @param retry_on HTTP status codes that result in retry 70 | # @param .retry number of tries before failing 71 | # @param delay in seconds between retries, subject to exponent 72 | # @param exponent increment each successive delay by delay^exponent 73 | # @param no_message_threshold Only show messages if delay is greater than this limit 74 | # 75 | # @keywords Internal 76 | # @return the result of curl_fetch_memory(uri, handle) 77 | # 78 | try_fetch <- function(uri, handle, 79 | retry_on = c(400, 401, 440, 503, 504, 509), 80 | no_retry_on, 81 | .retry = 6, 82 | delay = 1, exponent = 2, 83 | no_message_threshold = 1) 84 | { 85 | r = curl_fetch_memory(uri, handle) 86 | # if(r$status_code == 400){ 87 | # validate_response(r) 88 | # } 89 | if(!missing(no_retry_on) && !is.null(no_retry_on)){ 90 | retry_on <- setdiff(retry_on, no_retry_on) 91 | } 92 | if(!(r$status_code %in% retry_on)) { 93 | validate_response(r) 94 | return(r) 95 | } 96 | collisions = 1 97 | printed_message <- FALSE 98 | while(collisions < (.retry)) { 99 | r = curl_fetch_memory(uri, handle) 100 | if(!(r$status_code %in% retry_on)) { 101 | validate_response(r) 102 | return(r) 103 | } 104 | wait_time = delay * (2 ^ collisions - 1) 105 | wait_time <- runif(1, min = 0.001, max = wait_time) 106 | printed_message <- FALSE 107 | if(wait_time > no_message_threshold){ 108 | message(sprintf("Request failed with status %s. Waiting %3.1f seconds before retry", 109 | r$status_code, 110 | wait_time)) 111 | printed_message <- TRUE 112 | wait_time <- ceiling(wait_time) 113 | for(i in 1:wait_time){ 114 | message(".", appendLF = FALSE) 115 | Sys.sleep(1) 116 | } 117 | message("") 118 | } else { 119 | Sys.sleep(wait_time) 120 | } 121 | collisions = collisions + 1 122 | } 123 | if(printed_message) message("\n") 124 | validate_response(r) 125 | r 126 | } 127 | 128 | # urlAPIinsert <- function(x, text = "api"){ 129 | # gsub("(http.*?)(\\..*)", sprintf("\\1%s\\2", text), x) 130 | # } 131 | 132 | 133 | -------------------------------------------------------------------------------- /R/getsyms.R: -------------------------------------------------------------------------------- 1 | # Support functions adapted from the foreach package: 2 | # 3 | # Copyright (c) 2008-2010 Revolution Analytics 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # These are all internal functions. 19 | 20 | .getsyms <- function(ex) { 21 | fun <- function(x) { 22 | if (is.symbol(x)) 23 | as.character(x) 24 | else if (is.call(x)) 25 | .getsyms(x) 26 | else 27 | NULL 28 | } 29 | unlist(lapply(ex, fun)) 30 | } 31 | 32 | .gather <- function(x) { 33 | fun <- function(a, b) unique(c(a, b)) 34 | accum = list(good=character(0), bad=character(0)) 35 | for (e in x) { 36 | accum = mapply(fun, e, accum, SIMPLIFY=FALSE) 37 | } 38 | accum 39 | } 40 | 41 | .expandsyms <- function(syms, env, good, bad) { 42 | fun <- function(sym, good, bad) { 43 | if (sym %in% c(good, bad)) { 44 | # we already saw this symbol 45 | list(good=good, bad=bad) 46 | } else if (!nzchar(sym)) { 47 | # apparently a symbol can be converted into an empty string, 48 | # but it's an error to call "exists" with an empty string, 49 | # so we just declare it to be bad here 50 | list(good=good, bad=c(sym, bad)) 51 | } else if (exists(sym, env, mode='function', inherits=FALSE)) { 52 | # this is a function defined in this environment 53 | good = c(sym, good) 54 | f = get(sym, env, mode='function', inherits=FALSE) 55 | if (identical(environment(f), env)) { 56 | # it's a local function 57 | globs = findGlobals(f) 58 | if (length(globs) > 0) { 59 | # it's got free variables, so let's check them out 60 | .gather(lapply(globs, fun, good, bad)) 61 | } else { 62 | # it doesn't have free variables, so we're done 63 | list(good=good, bad=bad) 64 | } 65 | } else { 66 | # it's not a local function, so we're done 67 | list(good=good, bad=bad) 68 | } 69 | } else if (exists(sym, env, inherits=FALSE)) { 70 | # it's not a function, but it's defined in this environment 71 | list(good=c(sym, good), bad=bad) 72 | } else { 73 | # it's not defined in this environment 74 | list(good=good, bad=c(sym, bad)) 75 | } 76 | } 77 | .gather(lapply(syms, fun, good, bad))$good 78 | } 79 | 80 | .getexports <- function(ex, e, env, good=character(0), bad=character(0)) { 81 | syms = .getsyms(ex) 82 | syms = .expandsyms(syms, env, good, bad) 83 | for (s in syms) { 84 | if (s != '...') { 85 | val = get(s, env, inherits=FALSE) 86 | 87 | # if this is a function, check if we should change the 88 | # enclosing environment to be this new environment 89 | fenv = environment(val) 90 | if (is.function(val) && 91 | (identical(fenv, env) || identical(fenv, .GlobalEnv))) 92 | environment(val) = e 93 | 94 | assign(s, val, e) 95 | } 96 | } 97 | invisible(NULL) 98 | } 99 | -------------------------------------------------------------------------------- /R/internal.R: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016 Microsoft Corporation 2 | # All rights reserved. 3 | # 4 | # The MIT License (MIT) 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | 24 | urlconcat <- function(a,b) 25 | { 26 | ans = paste(gsub("/$", "", a), b, sep="/") 27 | ans = gsub(":/([^/])", "://\\1", ans) 28 | ans 29 | } 30 | 31 | # Internal function that retrieves datasets. 32 | # 33 | # @param ws A workspace object 34 | # @return a data.frame 35 | #' @importFrom curl handle_setheaders curl new_handle 36 | #' @importFrom jsonlite fromJSON 37 | # @keywords Internal 38 | get_datasets <- function(ws) { 39 | h = new_handle() 40 | handle_setheaders(h, .list = ws$.headers) 41 | uri <- sprintf("%s/workspaces/%s/datasources", ws$.studioapi, ws$id) 42 | r <- try_fetch(uri = uri, handle = h, delay = 0.25, .retry = 3) 43 | 44 | msg <- paste("No results returned from datasets(ws).", 45 | "Please check your workspace credentials and api_endpoint are correct.") 46 | if(inherits(r, "error")){ stop(msg) } 47 | if(r$status_code >= 400){ stop(msg) } 48 | 49 | x <- fromJSON(rawToChar(r$content)) 50 | if(is.null(x) || is.na(x$Name[1])){ 51 | x = data.frame() 52 | class(x) = c("Datasets", "data.frame") 53 | return(x) 54 | } 55 | # Use strict variable name matching to look up data 56 | d = x[, "DownloadLocation"] 57 | x$DownloadLocation = paste0(d[, "BaseUri"], 58 | d[, "Location"], 59 | d[, "AccessCredential"]) 60 | d = x[,"VisualizeEndPoint"] 61 | x$VisualizeEndPoint = paste0(d[, "BaseUri"], 62 | d[, "AccessCredential"]) 63 | d = x[,"SchemaEndPoint"] 64 | x$SchemaEndPoint = paste0(d[, "BaseUri"], 65 | d[, "Location"], 66 | d[, "AccessCredential"]) 67 | class(x) = c("Datasets", "data.frame") 68 | x 69 | } 70 | 71 | 72 | convertToDate <- function(x) { 73 | x = as.numeric(gsub("[^-0-9]", "", x)) /1000 74 | x = ifelse(x >= 0, x, NA) 75 | suppressWarnings( 76 | as.POSIXct(x, tz = "GMT", origin = date_origin) 77 | ) 78 | } 79 | 80 | 81 | # Internal function that retrieves experiments. 82 | # 83 | # @param ws A workspace object 84 | # @return a data.frame 85 | #' @importFrom curl handle_setheaders curl new_handle 86 | #' @importFrom jsonlite fromJSON 87 | # @keywords Internal 88 | get_experiments <- function(ws) { 89 | h = new_handle() 90 | handle_setheaders(h, .list=ws$.headers) 91 | uri = sprintf("%s/workspaces/%s/experiments", ws$.studioapi, ws$id) 92 | r <- try_fetch(uri = uri, handle = h, delay = 0.25, .retry = 3) 93 | 94 | msg <- paste("No results returned from experiments(ws).", 95 | "Please check your workspace credentials and api_endpoint are correct.") 96 | if(inherits(r, "error")){ stop(msg) } 97 | if(r$status_code >= 400){ stop(msg) } 98 | 99 | # Use strict variable name matching to look up data 100 | x <- fromJSON(rawToChar(r$content)) 101 | x = cbind(x, x[,"Status"]) 102 | 103 | x$Status = c() 104 | x$EndTime = convertToDate(x[["EndTime"]]) 105 | x$StartTime = convertToDate(x[["StartTime"]]) 106 | x$CreationTime = convertToDate(x[["CreationTime"]]) 107 | class(x) = c("Experiments", "data.frame") 108 | x 109 | } 110 | 111 | # Internal function that retrieves a dataset from AzureML. 112 | # 113 | # @param x a list or data.frame with \code{DownloadLocation} and \code{DataTypeId} fields 114 | # @param h optional curl handle 115 | # @param quote passed to \code{\link[utils]{read.table}} 116 | # @param ... additional parameters to pass to \code{read.table} 117 | # @return a data.frame 118 | #' @importFrom foreign read.arff 119 | #' @importFrom curl new_handle curl 120 | # @keywords Internal 121 | get_dataset <- function(x, h, quote = "\"", ...) { 122 | # Set default stringsAsFactors to FALSE, but allow users to override in ... 123 | # Restore the option on function exit. 124 | opts = options(stringsAsFactors = FALSE) 125 | on.exit(options(opts)) 126 | if(missing(h)) h = new_handle() 127 | conn = "r" 128 | if(tolower(x$DataTypeId) == "zip") conn = "rb" 129 | uri = curl(x$DownloadLocation, handle = h, open = conn) 130 | on.exit(tryCatch(close(uri), error = invisible), add = TRUE) 131 | 132 | # Existence of DataTypeId, DowloadLocation guaranteed by caller 133 | switch( 134 | tolower(x$DataTypeId), 135 | arff = read.arff(uri), 136 | plaintext = paste(readLines(uri, warn = FALSE), collapse="\n"), 137 | generictsvnoheader = read.table(uri, sep = "\t", header = FALSE, quote, ...), 138 | generictsv = read.table(uri, sep = "\t", header = TRUE, quote, ...), 139 | genericcsvnoheader = read.table(uri, sep = ",", header = FALSE, quote, ...), 140 | genericcsv = read.table(uri, sep = ",", header = TRUE, quote, ...), 141 | zip = readBin(uri, what = "raw", n = x$Size, ...), 142 | stop("unsupported data type: '",x$DataTypeId,"'") 143 | ) 144 | } 145 | 146 | 147 | # Checks if zip is available on system. 148 | # Required for packageEnv() 149 | zipAvailable <- function(){ 150 | z <- unname(Sys.which("zip")) 151 | z != "" 152 | } 153 | 154 | zipNotAvailableMessage = "Requires external zip utility. Please install zip, ensure it's on your path and try again." 155 | 156 | 157 | # Package a Function and Dependencies into an Environment 158 | # 159 | # @param exportenv R environment to package 160 | # @param packages a character vector of required R package dependencies 161 | # @param version optional R version 162 | # @return A base64-encoded zip file containing the saved 'exportenv' environment 163 | #' @import codetools 164 | #' @importFrom base64enc base64encode 165 | #' @importFrom miniCRAN makeRepo pkgDep 166 | # @keywords Internal 167 | packageEnv <- function(exportenv = new.env(), packages=NULL, version = getOption("default_r_version")) { 168 | if(!zipAvailable()) stop(zipNotAvailableMessage) 169 | 170 | if(!is.null(packages)) assign("..packages", packages, envir = exportenv) 171 | td <- tempfile(pattern = "dir") 172 | on.exit(unlink(td, recursive=TRUE)) 173 | tryCatch(dir.create(td), warning=function(e) stop(e)) 174 | # zip, unfortunately a zip file is apparently an AzureML requirement. 175 | cwd = getwd() 176 | on.exit(setwd(cwd), add = TRUE) 177 | setwd(td) 178 | # save export environment to an RData file 179 | save(exportenv, file="env.RData") 180 | 181 | # Package up dependencies 182 | if(!is.null(packages)) 183 | { 184 | re <- getOption("repos") 185 | if(is.null(re)){ 186 | re <- c(CRAN = "http://cran.revolutionanalytics.com") 187 | } 188 | tp <- normalizePath(file.path(td, "packages"), winslash = "/", mustWork = FALSE) 189 | tryCatch(dir.create(tp), warning = function(e) stop(e)) 190 | all_p <- pkgDep(packages, 191 | repos = re, 192 | type = "win.binary", 193 | Rversion = version, 194 | suggests = FALSE 195 | ) 196 | tryCatch( 197 | z <- makeRepo(all_p, 198 | path = tp, 199 | repos = re, 200 | type = "win.binary", 201 | Rversion = version 202 | ), 203 | error=function(e) stop(e) 204 | ) 205 | if(!all(grepl(tp, z))) { 206 | warning("Packages did not copy properly in to AzureML. Please ensure you have miniCRAN v0.2.7 or above installed.") 207 | } 208 | z 209 | } 210 | 211 | z = try({ 212 | zip(zipfile = "export.zip", files = dir(), flags = "-r9Xq") 213 | }) 214 | if(inherits(z, "error") || z > 0) stop("Unable to create zip file") 215 | setwd(cwd) 216 | base64encode(file.path(td, "export.zip", fsep="/")) 217 | } 218 | 219 | -------------------------------------------------------------------------------- /R/methods.R: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016 Microsoft Corporation 2 | # All rights reserved. 3 | # 4 | # The MIT License (MIT) 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | 24 | stopIfNotWorkspace <- function(x){ 25 | if(!is.Workspace(x)) { 26 | msg <- paste0("Error in ", as.character(sys.call(-1))[1], "()\n", 27 | "ws must be a Workspace object. See ?workspace" 28 | ) 29 | stop(msg, call. = FALSE) 30 | } 31 | } 32 | 33 | 34 | #' Test if an object is an Azure ML Workspace. 35 | #' 36 | #' @param x an R object 37 | #' @return logical value, TRUE if \code{x} represents an Azure ML workspace. 38 | #' @export 39 | is.Workspace <- function(x){ 40 | inherits(x, "Workspace") 41 | } 42 | 43 | #' Test if an object is an Azure ML Dataset. 44 | #' 45 | #' @param x an R object 46 | #' @return logical value, TRUE if \code{x} represents an Azure ML Dataset. 47 | #' @export 48 | is.Dataset <- function(x){ 49 | inherits(x, "Datasets") 50 | } 51 | 52 | 53 | #' Test if an object is an Azure ML Service. 54 | #' 55 | #' @param x an R object 56 | #' @return logical value, TRUE if \code{x} represents an Azure ML web service 57 | #' @export 58 | is.Service <- function(x){ 59 | inherits(x, "Service") 60 | } 61 | 62 | #' Test if an object is an Azure ML Endpoint. 63 | #' 64 | #' @param x an R object 65 | #' @return logical value, TRUE if \code{x} represents an Azure ML web service endpoint 66 | #' @export 67 | is.Endpoint <- function(x){ 68 | inherits(x, "Endpoint") 69 | } 70 | 71 | #' @export 72 | print.Workspace = function(x, detail = FALSE, ...) 73 | { 74 | cat("AzureML Workspace\n") 75 | cat("Workspace ID :", x$id, "\n") 76 | cat("API endpoint :", x$.api_endpoint, "\n") 77 | if(detail){ 78 | cat("Studio API :", x$.studioapi, "\n") 79 | cat("Management endpoint :", x$.management_endpoint, "\n") 80 | } 81 | } 82 | 83 | #' @export 84 | print.Experiments <- function(x, ...) 85 | { 86 | dots = character() 87 | if(nrow(x) > 0) dots = "..." 88 | d = data.frame( 89 | Description = substr(x[, "Description"], 1, 48), 90 | CreationTime = x[, "CreationTime"], 91 | `...` = dots 92 | ) 93 | print(d) 94 | cat("-------------------------------------------------\n") 95 | cat("AzureML experiments data.frame variables include:\n") 96 | cat(paste(capture.output(names(x)),collapse="\n"),"\n") 97 | d 98 | } 99 | 100 | #' @export 101 | print.Datasets <- function(x, ...) 102 | { 103 | dots = character() 104 | if(nrow(x) > 0) dots = "..." 105 | d = data.frame( 106 | Name = substr(x[, "Name"], 1, 50), 107 | DataTypeId = x[, "DataTypeId"], 108 | Size = x[, "Size"], 109 | `...` = dots 110 | ) 111 | print(d) 112 | cat("----------------------------------------------\n") 113 | cat("AzureML datasets data.frame variables include:\n") 114 | cat(paste(capture.output(names(x)),collapse="\n"),"\n") 115 | d 116 | } 117 | 118 | #' @export 119 | str.Workspace <- function(object, ...){ 120 | NextMethod(object) 121 | cat("list with elements:\n") 122 | cat(ls(object, all.names = TRUE)) 123 | cat("\n\n") 124 | cat("Values:\n") 125 | cat("$ id :", object$id, "\n") 126 | cat("$ .api_endpoint :", object$.api_endpoint, "\n") 127 | cat("$ .studioapi :", object$.studioapi, "\n") 128 | cat("$ .management_endpoint :", object$.management_endpoint, "\n") 129 | } 130 | 131 | -------------------------------------------------------------------------------- /R/publish.R: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016 Microsoft Corporation 2 | # All rights reserved. 3 | # 4 | # The MIT License (MIT) 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | 24 | 25 | #' Publish a function as a Microsoft Azure Web Service. 26 | #' 27 | #' Publish a function to Microsoft Azure Machine Learning as a web service. The web service created is a standard Azure ML web service, and can be used from any web or mobile platform as long as the user knows the API key and URL. The function to be published is limited to inputs/outputs consisting of lists of scalar values or single data frames (see the notes below and examples). Requires a zip program to be installed (see note below). 28 | #' 29 | #' @export 30 | #' 31 | #' @inheritParams refresh 32 | #' @inheritParams workspace 33 | #' @param fun a function to publish; the function must have at least one argument. 34 | #' @param name name of the new web service; ignored when \code{serviceId} is specified (when updating an existing web service). 35 | #' 36 | #' @param inputSchema either a list of \code{fun} input parameters and their AzureML types formatted as \code{list("arg1"="type", "arg2"="type", ...)}, or an example input data frame when \code{fun} takes a single data frame argument; see the note below for details. 37 | #' 38 | #' @param outputSchema list of \code{fun} outputs and AzureML types, formatted as \code{list("output1"="type", "output2"="type", ...)}, optional when \code{inputSchema} is an example input data frame. 39 | #' 40 | #' @param export optional character vector of variable names to explicitly export in the web service for use by the function. See the note below. 41 | #' @param noexport optional character vector of variable names to prevent from exporting in the web service. 42 | #' @param packages optional character vector of R packages to bundle in the web service, including their dependencies. 43 | #' @param version optional R version string for required packages (the version of R running in the AzureML Web Service). 44 | #' @param serviceId optional Azure web service ID; use to update an existing service (see Note below). 45 | #' @param host optional Azure regional host, defaulting to the global \code{management_endpoint} set in \code{\link{workspace}} 46 | #' @param data.frame \code{TRUE} indicates that the function \code{fun} accepts a data frame as input and returns a data frame output; automatically set to \code{TRUE} when \code{inputSchema} is a data frame. 47 | #' @param .retry number of tries before failing 48 | #' 49 | #' @return A data.frame describing the new service endpoints, cf. \code{\link{endpoints}}. The output can be directly used by the \code{\link{consume}} function. 50 | #' 51 | #' @note 52 | #' \bold{Data Types} 53 | #' 54 | #' AzureML data types are different from, but related to, R types. You may specify the R types \code{numeric, logical, integer,} and \code{character} and those will be specified as AzureML types \code{double, boolean, int32, string}, respectively. 55 | #' 56 | #' \bold{Input and output schemas} 57 | #' 58 | #' Function input must be: 59 | #' \enumerate{ 60 | #' \item named scalar arguments with names and types specified in \code{inputSchema} 61 | #' \item one or more lists of named scalar values 62 | #' \item a single data frame when \code{data.frame=TRUE} is specified; either explicitly specify the column names and types in \code{inputSchema} or provide an example input data frame as \code{inputSchema} 63 | #' } 64 | #' Function output is always returned as a data frame with column names and types specified in \code{outputSchema}. See the examples for example use of all three I/O options. 65 | #' 66 | #' \bold{Updating a web service} 67 | #' 68 | #' Leave the \code{serviceId} parameter undefined to create a new AzureML web service, or specify the ID of an existing web service to update it, replacing the function, \code{inputSchema}, \code{outputSchema}, and required R pacakges with new values. The \code{name} parameter is ignored \code{serviceId} is specified to update an existing web service. 69 | #' 70 | #' The \code{\link{updateWebService}} function is nearly an alias for \code{\link{publishWebService}}, differing only in that the \code{serviceId} parameter is required by \code{\link{updateWebService}}. 71 | #' 72 | #' The \code{publishWebService} function automatically exports objects required by the function to a working environment in the AzureML machine, including objects accessed within the function using lexical scoping rules. Use the \code{exports} parameter to explicitly include other objects that are needed. Use \code{noexport} to explicitly prevent objects from being exported. 73 | #' 74 | #' Note that it takes some time to update the AzureML service on the server. After updating the service, you may have to wait several seconds for the service to update. The time it takes will depend on a number of factors, including the complexity of your web service function. 75 | #' 76 | #' \bold{External zip program required} 77 | #' 78 | #' The function uses \code{\link[utils]{zip}} to compress information before transmission to AzureML. To use this, you need to have a zip program installed on your machine, and this program should be available in the path. The program should be called \code{zip} otherwise R may not find it. On windows, it is sufficient to install RTools (see \url{https://cran.r-project.org/bin/windows/Rtools/}) 79 | #' 80 | #' @seealso \code{\link{endpoints}}, \code{\link{discoverSchema}}, \code{\link{consume}} and \code{\link{services}}. 81 | #' @family publishing functions 82 | #' 83 | #' @example inst/examples/example_publish.R 84 | #' @importFrom jsonlite toJSON 85 | #' @importFrom uuid UUIDgenerate 86 | #' @importFrom curl new_handle handle_setheaders handle_setopt 87 | publishWebService <- function(ws, fun, name, 88 | inputSchema, outputSchema, 89 | `data.frame` = FALSE, 90 | export = character(0), 91 | noexport = character(0), 92 | packages, 93 | version = "3.1.0", 94 | serviceId, 95 | host = ws$.management_endpoint, 96 | .retry = 3) 97 | { 98 | # Perform validation on inputs 99 | stopIfNotWorkspace(ws) 100 | if(!zipAvailable()) stop(zipNotAvailableMessage) 101 | if(is.character(fun)) stop("You must specify 'fun' as a function, not a character") 102 | if(!is.function(fun)) stop("The argument 'fun' must be a function.") 103 | if(!is.list(inputSchema)) stop("You must specify inputSchema as either a list or a data.frame") 104 | 105 | if(missing(serviceId) && as.character(match.call()[1]) == "updateWebService") 106 | stop("updateWebService requires that the serviceId parameter is specified") 107 | if(missing(name) && !missing(serviceId)) name = "" # unused in this case 108 | if(missing(serviceId)) serviceId = gsub("-", "", UUIDgenerate(use.time = TRUE)) 109 | publishURL = sprintf("%s/workspaces/%s/webservices/%s", 110 | host, ws$id, serviceId) 111 | # Make sure schema matches function signature 112 | if(inputSchemaIsDataframe(inputSchema)){ 113 | `data.frame` <- TRUE 114 | } 115 | if(`data.frame`) { 116 | function_output <- match.fun(fun)(head(inputSchema)) 117 | inputSchema <- azureSchema(inputSchema) 118 | if(missing(outputSchema)) { 119 | if(is.data.frame(function_output) || is.list(function_output)) { 120 | outputSchema <- azureSchema(function_output) 121 | } else { 122 | outputSchema <- azureSchema(list(ans = class(function_output))) 123 | } 124 | } 125 | } else { 126 | # not a data frame 127 | inputSchema <- azureSchema(inputSchema) 128 | if(missing(outputSchema)) { 129 | function_output <- match.fun(fun)(inputSchema) 130 | outputSchema <- azureSchema(function_output)[[1]] 131 | } else { 132 | outputSchema <- azureSchema(outputSchema) 133 | } 134 | 135 | } 136 | 137 | ### Get and encode the dependencies 138 | 139 | if(missing(packages)) packages=NULL 140 | exportenv = new.env() 141 | .getexports(substitute(fun), 142 | exportenv, 143 | parent.frame(), 144 | good = export, 145 | bad = noexport 146 | ) 147 | 148 | ### Assign required objects in the export environment 149 | 150 | assign("..fun", fun, envir = exportenv) 151 | assign("..output_names", names(outputSchema), envir = exportenv) 152 | assign("..data.frame", `data.frame`, envir = exportenv) 153 | 154 | zipString = packageEnv(exportenv, packages = packages, version = version) 155 | 156 | ### Build the body of the request 157 | 158 | req = list( 159 | Name = name, 160 | Type = "Code", 161 | CodeBundle = list( 162 | InputSchema = inputSchema, 163 | OutputSchema = outputSchema, 164 | Language = "R-3.1-64", 165 | SourceCode = wrapper, 166 | ZipContents = zipString 167 | ) 168 | ) 169 | body = charToRaw( 170 | paste(toJSON(req, auto_unbox = TRUE), collapse = "\n") 171 | ) 172 | h = new_handle() 173 | httpheader = list( 174 | Authorization = paste("Bearer ", ws$.auth), 175 | `Content-Type` = "application/json", 176 | Accept = "application/json" 177 | ) 178 | opts = list( 179 | post = TRUE, 180 | postfieldsize = length(body), 181 | postfields = body, 182 | customrequest = "PUT" 183 | ) 184 | handle_setheaders(h, .list = httpheader) 185 | handle_setopt(h, .list = opts) 186 | r = try_fetch(publishURL, handle = h, .retry = .retry) 187 | result = rawToChar(r$content) 188 | if(r$status_code >= 400) stop(result) 189 | newService = fromJSON(result) 190 | 191 | ### refresh the workspace cache 192 | refresh(ws, "services") 193 | 194 | ### Use discovery functions to get endpoints for immediate use 195 | endpoints(ws, newService["Id"]) 196 | } 197 | 198 | 199 | #' @rdname publishWebService 200 | #' @export 201 | updateWebService = publishWebService 202 | 203 | 204 | -------------------------------------------------------------------------------- /R/services.R: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016 Microsoft Corporation 2 | # All rights reserved. 3 | # 4 | # The MIT License (MIT) 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | 24 | #' List Available Web Services. 25 | #' 26 | #' Return a list of web services available to the specified Microsoft Azure Machine Learning workspace. 27 | #' The result is cached in the workspace environment similarly to datasets and experiments. 28 | #' 29 | #' @inheritParams refresh 30 | #' @param service_id optional web service id. If supplied, return the web service information for just the specified service id. Leave undefined to return a data.frame of all services. 31 | #' @param name optional web service name. If supplied, return the web service information for services with matching names. Leave undefined to return all services. 32 | #' @param host the AzureML web services URI 33 | #' 34 | #' @return Returns a data.frame with variables: 35 | #' \itemize{ 36 | #' \item Id 37 | #' \item Name 38 | #' \item Description 39 | #' \item CreationTime 40 | #' \item WorkspaceId 41 | #' \item DefaultEndpointName 42 | #' } 43 | #' Each row of the returned data.frame corresponds to a service. 44 | #' @note \code{getWebServices} is an alias for \code{services}. 45 | #' @family discovery functions 46 | #' @examples 47 | #' \dontrun{ 48 | #' workspace_id <- "" # Your AzureML workspace id 49 | #' authorization_token <- "" # Your AzureML authorization token 50 | #' 51 | #' ws <- workspace( 52 | #' id = workspace_id, 53 | #' auth = authorization_token 54 | #' ) 55 | #' 56 | #' # Equivalent: 57 | #' services(ws) 58 | #' getWebServices(ws) 59 | #' } 60 | #' @export 61 | services <- function(ws, service_id, name, host = ws$.management_endpoint) 62 | { 63 | stopIfNotWorkspace(ws) 64 | h <- new_handle() 65 | headers <- c(ws$.headers, 66 | `Authorization` = sprintf("Bearer %s",ws$.auth), 67 | `Accept` = "application/json" 68 | ) 69 | handle_setheaders(h, .list = headers) 70 | 71 | if(missing(service_id)) { 72 | service_id <- "" 73 | } else { 74 | service_id <- sprintf("/%s", service_id) 75 | } 76 | 77 | uri <- sprintf("%s/workspaces/%s/webservices%s", host, ws$id, service_id) 78 | r <- try_fetch(uri = uri, handle = h, delay = 0.25, .retry = 3) 79 | # if(inherits(r, "error")){ 80 | # msg <- paste("No results returned from datasets(ws).", 81 | # "Please check your workspace credentials and api_endpoint are correct.") 82 | # stop(msg) 83 | # } 84 | 85 | ans <- fromJSON(rawToChar(r$content)) 86 | if(inherits(ans, "error") || is.null(ans)) { 87 | msg <- "service not found" 88 | warning(msg, immediate. = TRUE) 89 | return(simpleError(msg)) 90 | } 91 | 92 | attr(ans, "workspace") = ws 93 | if(!missing(name)) { 94 | ans = ans[ans$Name == name,] 95 | } 96 | if(is.null(ans)) ans = data.frame() 97 | class(ans) = c("Service", "data.frame") 98 | # Cache the result in the workspace 99 | if(service_id == "") ws$services = ans 100 | ans 101 | } 102 | 103 | 104 | #' @rdname services 105 | #' @export 106 | getWebServices <- services 107 | 108 | #' List AzureML Web Service Endpoints 109 | #' 110 | #' Return a list of web services endpoints for the specified web service id. 111 | #' 112 | #' @inheritParams refresh 113 | #' @param host The AzureML web services URI 114 | #' @param service_id A web service Id, for example returned by \code{\link{services}}; alternatively a row from the services data frame identifying the service. 115 | #' @param endpoint_id An optional endpoint id. If supplied, return the endpoint information for just that id. Leave undefined to return a data.frame of all end points associated with the service. 116 | #' 117 | #' @return Returns a data.frame with variables: 118 | #' \itemize{ 119 | #' \item Name 120 | #' \item Description 121 | #' \item CreationTime 122 | #' \item WorkspaceId 123 | #' \item WebServiceId 124 | #' \item HelpLocation 125 | #' \item PrimaryKey 126 | #' \item SecondaryKey 127 | #' \item ApiLocation 128 | #' \item Version 129 | #' \item MaxConcurrentCalls 130 | #' \item DiagnosticsTraceLevel 131 | #' \item ThrottleLevel 132 | #' } 133 | #' Each row of the data.frame corresponds to an end point. 134 | #' @note \code{getEndPoints} is an alias for \code{endpoints}. 135 | #' @family discovery functions 136 | #' @examples 137 | #' \dontrun{ 138 | #' workspace_id <- "" # Your AzureML workspace id 139 | #' authorization_token <- "" # Your AsureML authorization token 140 | #' 141 | #' ws <- workspace( 142 | #' id = workspace_id, 143 | #' auth = authorization_token 144 | #' ) 145 | #' 146 | #' s <- services(ws) 147 | #' endpoints(ws, s$Id[1]) 148 | #' 149 | #' # Note that you can alternatively just use the entire row that 150 | #' # describes the service. 151 | #' endpoints(ws, s[1,]) 152 | #' 153 | #' # Equivalent: 154 | #' getEndpoints(ws, s$Id[1]) 155 | #' } 156 | #' @export 157 | endpoints <- function(ws, service_id, endpoint_id, host = ws$.management_endpoint) 158 | { 159 | stopIfNotWorkspace(ws) 160 | # if(is.list(service_id) || is.data.frame(service_id)) service_id = service_id$Id[1] 161 | if(is.Service(service_id)) service_id = service_id$Id[1] 162 | 163 | h = new_handle() 164 | headers = list(`User-Agent`="R", 165 | `Content-Type`="application/json;charset=UTF8", 166 | `Authorization`=sprintf("Bearer %s", ws$.auth), 167 | `Accept`="application/json") 168 | handle_setheaders(h, .list=headers) 169 | 170 | if(missing(endpoint_id)) endpoint_id = "" 171 | else endpoint_id = sprintf("/%s", endpoint_id) 172 | 173 | # if(is.list(service_id)) service_id = service_id$Id[1] 174 | uri <- sprintf("%s/workspaces/%s/webservices/%s/endpoints%s", 175 | host, 176 | ws$id, 177 | service_id, 178 | endpoint_id 179 | ) 180 | 181 | r <- try_fetch(uri, handle = h) 182 | ans <- fromJSON(rawToChar(r$content)) 183 | 184 | 185 | # Adjust the returned API location for completeness: 186 | if(length(ans) > 0) { 187 | suffix <- "/execute?api-version=2.0&details=true&format=swagger" 188 | ans$ApiLocation <- paste0(ans$ApiLocation, suffix) 189 | } 190 | class(ans) <- c("Endpoint", "data.frame") 191 | ans 192 | } 193 | 194 | #' @rdname endpoints 195 | #' @export 196 | getEndpoints = endpoints 197 | 198 | 199 | #' Display AzureML Web Service Endpoint Help Screens. 200 | #' 201 | #' Download and return help for the specified AzureML web service endpoint. 202 | #' 203 | #' @param ep an AzureML web service endpoint from the \code{\link{endpoints}} function. 204 | #' @param type the type of help to display. 205 | #' 206 | #' @return Returns the help text. If \code{type = "apidocument"}, then returns the help as a list from a parsed JSON document describing the service. 207 | #' @family discovery functions 208 | #' @examples 209 | #' \dontrun{ 210 | #' ws <- workspace() 211 | #' 212 | #' s <- services(ws) 213 | #' e <- endpoints(ws, s[1,]) 214 | #' endpointHelp(e) 215 | #' 216 | #' Particularly useful way to see expected service input and output: 217 | #' endpointHelp(e)$definitions 218 | #' 219 | #' } 220 | #' @export 221 | endpointHelp <- function(ep, type = c("apidocument", "r-snippet", "score", "jobs", "update")) 222 | { 223 | if(!inherits(ep, "Endpoint")) stop("Object ep must be an endpoint") 224 | type <- match.arg(type) 225 | rsnip <- FALSE 226 | if(type == "r-snippet") { 227 | type <- "score" 228 | rsnip <- TRUE 229 | } 230 | uri <- ep$HelpLocation[1] 231 | 232 | # XXX This is totally nuts, and not documented, but help hosts vary depending on type. 233 | # Arrghhh... 234 | if(type == "apidocument"){ 235 | uri <- gsub("studio.azureml.net/apihelp", "management.azureml.net", uri) 236 | uri <- gsub("studio.azureml-int.net/apihelp", "management.azureml-int.net", uri) 237 | } 238 | 239 | uri <- paste(uri, type, sep = "/") 240 | r <- try_fetch(uri, handle = new_handle()) 241 | txt = rawToChar(r$content) 242 | 243 | pattern <- "\\s]+))?)+\\s*|\\s*)/?>" 244 | txt <- gsub(pattern, "\\1", txt) 245 | txt <- gsub("&.?quot;", "'", txt) 246 | txt <- paste(txt, collapse = "\n") 247 | if(rsnip) { 248 | txt <- substr(txt, 249 | grepRaw("code-snippet-r", txt) + nchar("code-snippet-r") + 2, nchar(txt) 250 | ) 251 | } 252 | if(type == "apidocument"){ 253 | fromJSON(txt) 254 | } else { 255 | txt 256 | } 257 | } 258 | 259 | endpointInputDefinition <- function(ep, input = "input1"){ 260 | help <- endpointHelp(ep, type = "apidocument") 261 | help$definitions$ExecutionRequest$example$Inputs[[input]] 262 | help$definitions$output1Item 263 | } 264 | -------------------------------------------------------------------------------- /R/workspace.R: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016 Microsoft Corporation 2 | # All rights reserved. 3 | # 4 | # The MIT License (MIT) 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | 24 | 25 | default_api <- function(api_endpoint = "https://studioapi.azureml.net"){ 26 | x <- api_endpoint 27 | 28 | api_endpoint <- function(x){ 29 | switch(x, 30 | "https://studio.azureml.net" = "https://studioapi.azureml.net", 31 | "https://studioapi.azureml.net" = "https://studioapi.azureml.net", 32 | "https://studio.azureml-int.net" = "https://studio.azureml-int.net", 33 | "https://studioapi.azureml-int.net" = "https://studio.azureml-int.net", 34 | x 35 | ) 36 | } 37 | 38 | mgt_api <- function(x){ 39 | if(api_endpoint(x) == "https://studio.azureml-int.net") 40 | "https://management.azureml-int.net" 41 | else 42 | sub("studio(.*?).azureml(.*?).net", "management.azureml.net", x) 43 | } 44 | 45 | defaults <- list( 46 | api_endpoint = api_endpoint(x), 47 | management_endpoint = mgt_api(x), 48 | studioapi = if(api_endpoint(x) == "https://studio.azureml-int.net") 49 | "https://studioapi.azureml-int.net/api" 50 | else 51 | paste0(api_endpoint(x), "/api") 52 | ) 53 | defaults 54 | } 55 | 56 | #' Create a reference to an AzureML Studio workspace. 57 | #' 58 | #' Create a reference to an AzureML Studio workspace, returning a \code{Workspace} object that is an R environment containing details and data associated with the AzureML work space. Data corresponding to services, experiments, and datasets in the workspace are cached in the result object environment. See \code{\link{refresh}} about updating cached data. 59 | #' 60 | #' 61 | #' @section Finding your AzureML credentials: 62 | #' You can find your Azure Machine Learning \code{workspace id} and \code{authorization token} in the Azure Machine Learning Studio interface. 63 | #' 64 | #' Workspace ID 65 | #' 66 | #' \if{html}{\figure{workspace_id.png}{options: width="60\%" alt="Figure: workspace_id.png"}} 67 | #' \if{latex}{\figure{workspaceId.pdf}{options: width=7cm}} 68 | #' 69 | #' 70 | #' 71 | #' Authorization token 72 | #' 73 | #' \if{html}{\figure{authorization_token.png}{options: width="60\%" alt="Figure: authorization_token.png"}} 74 | #' \if{latex}{\figure{authorizationToken.pdf}{options: width=7cm}} 75 | #' 76 | #' 77 | #' @section Using a settings.json file: 78 | #' If any of the \code{id}, \code{auth}, \code{api_endpoint} or \code{management_endpoint} arguments are missing, the function attempts to read values from the \code{config} file with JSON format: 79 | #' \preformatted{ 80 | #' {"workspace":{ 81 | #' "id": "enter your AzureML workspace id here", 82 | #' "authorization_token": "enter your AzureML authorization token here", 83 | #' "api_endpoint": "https://studioapi.azureml.net", 84 | #' }} 85 | #' } 86 | #' 87 | #' To explicitly add the management endpoint in the JSON file, use: 88 | #' \preformatted{ 89 | #' {"workspace":{ 90 | #' "id": "enter your AzureML workspace id here", 91 | #' "authorization_token": "enter your AzureML authorization token here", 92 | #' "api_endpoint": "https://studioapi.azureml.net", 93 | #' "management_endpoint": "https://management.azureml.net" 94 | #' }} 95 | #' } 96 | #' 97 | #' @section Using a workspace in different Azure Machine Learning regions: 98 | #' 99 | #' By default, the Azure Machine Learning workspace is located in US South Central, but it is possible to create a workspace in different regions, including Europe West and Asia Southeast. 100 | #' 101 | #' To use a workspace in Asia Southeast, you can modify the api endpoint line in the JSON file: 102 | #' \preformatted{ 103 | #' {"workspace": { 104 | #' "api_endpoint": ["https://asiasoutheast.studio.azureml.net"] 105 | #' }} 106 | #' } 107 | #' 108 | #' Similarly, for a workspace in Europe West: 109 | #' \preformatted{ 110 | #' {"workspace": { 111 | #' "api_endpoint": ["https://europewest.studio.azureml.net"] 112 | #' }} 113 | #' } 114 | #' 115 | 116 | #' 117 | #' 118 | #' @param id Optional workspace id from ML studio -> settings -> WORKSPACE ID. See the section "Finding your AzureML credentials" for more details. 119 | #' @param auth Optional authorization token from ML studio -> settings -> AUTHORIZATION TOKENS. See the section "Finding your AzureML credentials" for more details. 120 | #' @param api_endpoint Optional AzureML API web service URI. Defaults to \code{https://studioapi.azureml.net} if not provided and not specified in config. See note. 121 | #' @param management_endpoint Optional AzureML management web service URI. Defaults to \code{https://management.azureml.net} if not provided and not specified in config. See note. 122 | #' @param config Optional settings file containing id and authorization info. Used if any of the other arguments are missing. The default config file is \code{~/.azureml/settings.json}, but you can change this location by setting \code{options(AzureML.config = "newlocation")}. See the section "Using a settings.json file" for more details. 123 | #' @param ... ignored 124 | #' @param .validate If TRUE, makes a request to the AzureML API to retrieve some data. This validates whether the workspace id and authorization token are valid. Specifically, the function calls \code{\link{datasets}}. This should normally be set to TRUE. Set this to FALSE for testing, or if you know that your credentials are correct and you don't want to retrieve the datasets. 125 | #' 126 | #' 127 | #' 128 | #' 129 | #' @return An R environment of class \code{Workspace} containing at least the following objects: 130 | #' \itemize{ 131 | #' \item{experiments: Collection of experiments in the workspace represented as an \code{Experiments} object. See \code{\link{experiments}}} 132 | #' \item{datasets: Collection of datasets in the workspace represented as a \code{Datasets} object. See \code{\link{datasets}}} 133 | #' \item{services: Collection of web services in the workspace represented as a \code{Services} object. See \code{\link{services}}} 134 | #' } 135 | #' 136 | #' @importFrom jsonlite fromJSON 137 | #' @export 138 | #' @family dataset functions 139 | #' @family experiment functions 140 | #' @family discovery functions 141 | #' @family consumption functions 142 | #' @family publishing functions 143 | #' @seealso \code{\link{datasets}}, \code{\link{experiments}}, \code{\link{refresh}}, 144 | #' \code{\link{services}}, \code{\link{consume}}, \code{\link{publishWebService}} 145 | workspace <- function(id, auth, api_endpoint, management_endpoint, 146 | config = getOption("AzureML.config"), ..., .validate = TRUE) 147 | { 148 | 149 | args <- list(...) 150 | if(!is.null(args$validate)) { 151 | message("You used an argument 'validate'. Did you mean '.validate'?\nIgnoring this argument.") 152 | } 153 | 154 | 155 | # If workspace_id or auth are missing, read from config. Stop if unavailable. 156 | if(missing(id) || missing(auth)) { 157 | x <- validate.AzureML.config(config, stopOnError = TRUE) 158 | if(inherits(x, "error")) stop(x$message) 159 | settings <- read.AzureML.config(config) 160 | 161 | if(missing(id)){ 162 | id <- settings[["workspace"]][["id"]] 163 | } 164 | if(missing(auth)){ 165 | auth <- settings[["workspace"]][["authorization_token"]] 166 | } 167 | } 168 | 169 | # If workspace_id or auth are missing, read from config, if available. 170 | if(missing(api_endpoint) || missing(management_endpoint)){ 171 | x <- try(validate.AzureML.config(config, stopOnError = FALSE)) 172 | if(!inherits(x, "error")){ 173 | settings <- read.AzureML.config(config) 174 | 175 | if(missing(api_endpoint)){ 176 | api_endpoint <- settings[["workspace"]][["api_endpoint"]] 177 | } 178 | if(missing(management_endpoint)){ 179 | management_endpoint <- settings[["workspace"]][["management_endpoint"]] 180 | } 181 | } 182 | } 183 | 184 | # Assign a default api_endpoint if this was not provided 185 | default_api <- if(missing(api_endpoint) || is.null(api_endpoint)) { 186 | default_api() 187 | } else { 188 | default_api(api_endpoint) 189 | } 190 | if(missing(api_endpoint) || is.null(api_endpoint)){ 191 | api_endpoint <- default_api[["api_endpoint"]] 192 | } 193 | 194 | # Assign a default management_endpoint if this was not provided 195 | if(missing(management_endpoint) || is.null(management_endpoint)){ 196 | management_endpoint <- default_api[["management_endpoint"]] 197 | } 198 | 199 | # Test to see if api_endpoint is a valid url 200 | if(.validate){ 201 | resp <- tryCatch( 202 | suppressWarnings(curl::curl_fetch_memory(api_endpoint)), 203 | error = function(e)e 204 | ) 205 | if(inherits(resp, "error")) stop("Invalid api_endpoint: ", api_endpoint) 206 | 207 | # Test to see if management_endpoint is a valid url 208 | resp <- tryCatch( 209 | suppressWarnings(curl::curl_fetch_memory(management_endpoint)), 210 | error = function(e)e 211 | ) 212 | if(inherits(resp, "error")) stop("Invalid management_endpoint: ", management_endpoint) 213 | } 214 | 215 | # It seems all checks passed. Now construct the Workspace object 216 | 217 | e <- new.env() 218 | class(e) <- "Workspace" 219 | e$id <- id 220 | e$.auth <- auth 221 | e$.api_endpoint <- api_endpoint 222 | e$.management_endpoint <- management_endpoint 223 | e$.studioapi <- default_api[["studioapi"]] 224 | e$.headers <- list( 225 | `User-Agent` = "R", 226 | `Content-Type` = "application/json;charset=UTF8", 227 | `x-ms-client-session-id` = "DefaultSession", 228 | `x-ms-metaanalytics-authorizationtoken` = auth 229 | ) 230 | delayedAssign("experiments", get_experiments(e), assign.env = e) 231 | delayedAssign("datasets", get_datasets(e), assign.env = e) 232 | delayedAssign("services", services(e), assign.env = e) 233 | 234 | if(.validate){ 235 | d <- get_datasets(e) 236 | e$datasets <- d 237 | } 238 | 239 | e 240 | } 241 | 242 | 243 | #' Refresh data in an AzureML workspace object. 244 | #' 245 | #' Contact the AzureML web service and refresh/update data in an AzureML workspace object. 246 | #' 247 | #' @param ws An AzureML workspace reference returned by \code{\link{workspace}}. 248 | #' @param what Select "everything" to update all cached data, or other values to selectively update those values. 249 | #' 250 | #' @return NULL is invisibly returned--this function updates data in the \code{w} environment. 251 | #' @seealso \code{\link{workspace}} 252 | #' @export 253 | refresh <- function(ws, what=c("everything", "datasets", "experiments", "services")) 254 | { 255 | what = match.arg(what) 256 | if(what %in% c("everything", "experiments")) ws$experiments <- get_experiments(ws) 257 | if(what %in% c("everything", "datasets")) ws$datasets <- get_datasets(ws) 258 | if(what %in% c("everything", "services")) ws$services <- services(ws) 259 | invisible() 260 | } 261 | 262 | #' List datasets in an AzureML workspace. 263 | #' 264 | #' List datasets in an AzureML workspace, optionally filtering on sample or my datasets. 265 | #' 266 | #' @inheritParams refresh 267 | #' @param filter Optionally filter result, returing all, mine, or sample datasets. 268 | #' 269 | #' @return A data.frame with class \code{Datasets} listing available datasets in the workspace. 270 | #' @note \code{datasets(w)} is equivalent to \code{w$datasets}. Since \code{w$datasets} is simply 271 | #' an R data.frame, you can alternatively filter on any variable as desired. 272 | #' @seealso \code{\link{workspace}}, \code{\link{experiments}}, \code{\link{download.datasets}} 273 | #' 274 | #' @export 275 | #' @family dataset functions 276 | #' @example inst/examples/example_datasets.R 277 | datasets <- function(ws, filter=c("all", "my datasets", "samples")) 278 | { 279 | stopIfNotWorkspace(ws) 280 | filter = match.arg(filter) 281 | if(filter == "all") return(suppressWarnings(ws$datasets)) 282 | samples = filter == "samples" 283 | i = grep(paste("^", ws$id, sep=""), ws$datasets[,"Id"], invert=samples) 284 | ws$datasets[i, ] 285 | } 286 | 287 | #' List experiments in an AzureML workspace. 288 | #' 289 | #' List experiments in an AzureML workspace, optionally filtering on sample or my experiments. 290 | #' 291 | #' @inheritParams datasets 292 | #' 293 | #' @return A data.frame with class \code{Experiments} listing available experiments in the workspace. 294 | #' @note \code{experiments(w)} is equivalent to \code{w$experiments}. Since \code{w$experiments} is simply an R data.frame, you can alternatively filter on any variable as desired. 295 | #' @seealso \code{\link{workspace}}, \code{\link{datasets}}, \code{\link{download.intermediate.dataset}} 296 | #' 297 | #' @export 298 | #' @family experiment functions 299 | #' @example inst/examples/example_experiments.R 300 | experiments <- function(ws, filter=c("all", "my experiments", "samples")) 301 | { 302 | filter = match.arg(filter) 303 | if(filter == "all") return(suppressWarnings(ws$experiments)) 304 | samples = filter == "samples" 305 | i = grep(paste("^", ws$id, sep=""), ws$experiments[,"ExperimentId"], invert=samples) 306 | ws$experiments[i, ] 307 | } 308 | -------------------------------------------------------------------------------- /R/wrapper.R: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2015-2016 Microsoft Corporation 2 | # All rights reserved. 3 | # 4 | # The MIT License (MIT) 5 | # Permission is hereby granted, free of charge, to any person obtaining a copy 6 | # of this software and associated documentation files (the "Software"), to deal 7 | # in the Software without restriction, including without limitation the rights 8 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | # copies of the Software, and to permit persons to whom the Software is 10 | # furnished to do so, subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included in 13 | # all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | # THE SOFTWARE. 22 | 23 | 24 | # `wrapper` is the expression executed in the AzureML R environment. The publishWebService function sets up the environment "exportenv" from which this expression follows. 25 | 26 | # Note that exposing wrapperFunction() and wrapper will cause R CMD BUILD failures 27 | # The workaround is to comment out the wrapper function, and replace it with the text. 28 | # To update the function, uncomment the following function. 29 | 30 | 31 | ### --- Do not remove this uncommented code ------------------------------------ 32 | 33 | # wrapperFunction <- function(){ 34 | # 35 | # inputDF <- maml.mapInputPort(1) 36 | # 37 | # if(!is.element("exportenv", search())) { 38 | # load('src/env.RData') 39 | # if(!is.null(exportenv$..packages)) { 40 | # lapply(exportenv$..packages, function(pkg){ 41 | # if(!require(pkg, character.only = TRUE, quietly = TRUE)) 42 | # install.packages(pkg, 43 | # repos = paste0('file:///', getwd(), '/src/packages'), 44 | # lib = getwd() 45 | # ) 46 | # }) 47 | # .libPaths(new = getwd()) 48 | # lapply(exportenv$..packages, require, 49 | # quietly = TRUE, character.only=TRUE) 50 | # } 51 | # parent.env(exportenv) = globalenv() 52 | # 53 | # attach(exportenv, warn.conflicts = FALSE) 54 | # } 55 | # 56 | # if(..data.frame){ 57 | # outputDF <- as.data.frame(..fun(inputDF)) 58 | # colnames(outputDF) <- ..output_names 59 | # } else { 60 | # outputDF <- matrix(nrow = nrow(inputDF), 61 | # ncol = length(..output_names) 62 | # ) 63 | # outputDF <- as.data.frame(outputDF) 64 | # names(outputDF) <- ..output_names 65 | # for(i in 1:nrow(inputDF)){ 66 | # outputDF[i, ] <- do.call(..fun, inputDF[i, ]) 67 | # } 68 | # } 69 | # maml.mapOutputPort("outputDF") 70 | # } 71 | # 72 | # wrapper <- paste(as.character(body(wrapperFunction)[-1]), 73 | # collapse = "\n") 74 | 75 | ### --- End of Do not remove --------------------------------------------------- 76 | 77 | wrapper <- "inputDF <- maml.mapInputPort(1)\nif (!is.element(\"exportenv\", search())) {\n load(\"src/env.RData\")\n if (!is.null(exportenv$..packages)) {\n lapply(exportenv$..packages, function(pkg) {\n if (!require(pkg, character.only = TRUE, quietly = TRUE)) \n install.packages(pkg, repos = paste0(\"file:///\", getwd(), \"/src/packages\"), lib = getwd())\n })\n .libPaths(new = getwd())\n lapply(exportenv$..packages, require, quietly = TRUE, character.only = TRUE)\n }\n parent.env(exportenv) = globalenv()\n attach(exportenv, warn.conflicts = FALSE)\n}\nif (..data.frame) {\n outputDF <- as.data.frame(..fun(inputDF))\n colnames(outputDF) <- ..output_names\n} else {\n outputDF <- matrix(nrow = nrow(inputDF), ncol = length(..output_names))\n outputDF <- as.data.frame(outputDF)\n names(outputDF) <- ..output_names\n for (i in 1:nrow(inputDF)) {\n outputDF[i, ] <- do.call(..fun, inputDF[i, ])\n }\n}\nmaml.mapOutputPort(\"outputDF\")" 78 | 79 | 80 | 81 | # Test the AzureML wrapper locally 82 | # @param inputDF data frame 83 | # @param wrapper the AzureML R wrapper code 84 | # @param fun a function to test 85 | # @param output_names character vector of function output names 86 | # @param data.frame i/o format 87 | # @examples 88 | # foo <- function(dat)head(dat, 10) 89 | # test_wrapper(foo, iris) 90 | test_wrapper <- function(fun = function(x)head(x, 3), inputDF, `data.frame` = TRUE) 91 | { 92 | if(missing(inputDF) || is.null(inputDF)){ 93 | # replicate first 5 lines of iris 94 | # this is a workaround to pass R CMD check 95 | iris <- data.frame( 96 | Sepal.Length = c(5.1, 4.9, 4.7, 4.6, 5, 5.4), 97 | Sepal.Width = c(3.5, 3, 3.2, 3.1, 3.6, 3.9), 98 | Petal.Length = c(1.4, 1.4, 1.3, 1.5, 1.4, 1.7), 99 | Petal.Width = c(0.2, 0.2, 0.2, 0.2, 0.2, 0.4), 100 | Species = factor(rep(1, 6), levels = 1:3, labels = c("setosa", "versicolor", "virginica")) 101 | ) 102 | inputDF <- iris 103 | } 104 | exportenv = new.env() 105 | maml.mapInputPort <- function(x) as.data.frame(inputDF) 106 | maml.mapOutputPort <- function(x) get(x) 107 | load <- function(x) invisible() 108 | exportenv$..fun = fun 109 | exportenv$..output_names = if(`data.frame`) { 110 | names(match.fun(fun)(inputDF)) 111 | } else { 112 | do.call(match.fun(fun), inputDF) 113 | } 114 | exportenv$..data.frame = `data.frame` 115 | eval(parse(text = wrapper)) 116 | } 117 | 118 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | AzureML.config.default <- "~/.azureml/settings.json" 2 | 3 | .onAttach <- function(libname, pkgname){ 4 | options(AzureML.config = AzureML.config.default) 5 | } 6 | -------------------------------------------------------------------------------- /R/zzz_test_helpers.R: -------------------------------------------------------------------------------- 1 | # This function is used in unit testing to skip tests if the config file is missing 2 | # 3 | skip_if_missing_config <- function(f){ 4 | if(!file.exists(f)) { 5 | msg <- paste("To run tests, add a file ~/.azureml/settings.json containing AzureML keys.", 6 | "See ?workspace for help", 7 | sep = "\n") 8 | message(msg) 9 | testthat::skip("settings.json file is missing") 10 | } 11 | } 12 | 13 | skip_if_offline <- function(){ 14 | u <- tryCatch(url("https://mran.microsoft.com"), 15 | error = function(e)e) 16 | if(inherits(u, "error")){ 17 | u <- url("http://mran.microsoft.com") 18 | } 19 | on.exit(close(u)) 20 | z <- tryCatch(suppressWarnings(readLines(u, n = 1, warn = FALSE)), 21 | error = function(e)e) 22 | if(inherits(z, "error")){ 23 | testthat::skip("Offline. Skipping test.") 24 | } 25 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AzureML 2 | 3 | [![Project Status: Inactive – The project has reached a stable, usable state but is no longer being actively developed; support/maintenance will be provided as time allows.](https://www.repostatus.org/badges/latest/inactive.svg)](https://www.repostatus.org/#inactive)[![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/AzureML)](http://cran.r-project.org/package=AzureML) 4 | [![CRAN RStudio mirror downloads](http://cranlogs.r-pkg.org/badges/AzureML)](http://www.r-pkg.org/pkg/AzureML) 5 | 6 | An R interface to [AzureML](https://studio.azureml.net/) experiments, datasets, and web services. 7 | 8 | Use this package to upload and download datasets to and from AzureML, to interrogate experiments, to publish new R-based web services, and to run R data through existing web services and retrieve the output. 9 | 10 | 11 | # Installation instructions 12 | 13 | Install the development version of the package directly from GitHub with: 14 | 15 | ```r 16 | # Install devtools 17 | if(!require("devtools")) install.packages("devtools") 18 | devtools::install_github("RevolutionAnalytics/AzureML") 19 | ``` 20 | 21 | The package has dependencies on the following R packages: 22 | 23 | - `jsonlite` 24 | - `curl` 25 | - `miniCRAN` 26 | - `base64enc` 27 | - `uuid` 28 | 29 | In addition, you need a zip utility installed and your path must include the location of this zip utility. On Linux machines this is usually included by default. However, on Windows, you may have to install this yourself, e.g. by installing RTools and editing your path to include the RTools location. 30 | 31 | 32 | # Overview 33 | 34 | This package provides an interface to publish web services on Microsoft Azure Machine Learning (Azure ML) from your local R environment. The main functions in the package cover: 35 | 36 | - Workspace: connect to and manage AzureML workspaces 37 | - Datasets: upload and download datasets to and from AzureML workspaces 38 | - Publish: define a custom function or train a model and publish it as an Azure Web Service 39 | - Consume: use available web services from R in a variety of convenient formats 40 | 41 | 42 | # System requirements 43 | 44 | To publish web services, you need to have an external zip utility installed. This utility should be in the available in the path. See `?zip` for more details. 45 | 46 | On windows, it's sufficient to install [RTools](https://cran.r-project.org/bin/windows/Rtools/). 47 | 48 | Note: the utility should be called `zip`, since `zip()` looks for a file called `zip` in the path. Thus, `publishWebservice()` may fail, even if you have a program like `7-zip` installed. 49 | 50 | # Wiki 51 | 52 | The [project wiki](https://github.com/RevolutionAnalytics/AzureML/wiki) contains additional information, such as [bug bash instructions](https://github.com/RevolutionAnalytics/AzureML/wiki/Bug-bash-instructions) 53 | 54 | 55 | # Vignette 56 | 57 | See the package vignette and help documentation for examples and more information. 58 | 59 | You can view the vignette at [Getting Started with the AzureML Package](https://htmlpreview.github.io/?https://github.com/RevolutionAnalytics/AzureML/blob/master/vignettes/getting_started.html). 60 | 61 | 62 | # Bug reports 63 | 64 | This is a technology preview. The APIs used by the package are still subject to change. Please report any issues at the [github issue tracker](https://github.com/RevolutionAnalytics/AzureML/issues). 65 | -------------------------------------------------------------------------------- /inst/COPYRIGHTS: -------------------------------------------------------------------------------- 1 | The AzureML package is Copyright (C) 2015 by Microsoft Corporation, licensed 2 | under the MIT license. 3 | 4 | Portions of the included file getsyms.R are Copyright (c) 2008-2010 Revolution 5 | Analytics and licensed under the Apache License, Version 2.0. 6 | -------------------------------------------------------------------------------- /inst/doc/getting_started.R: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /inst/examples/example_datasets.R: -------------------------------------------------------------------------------- 1 | \dontrun{ 2 | library(AzureML) 3 | 4 | # Use the default config file ~/azureml/settings.json with format: 5 | # {"workspace":{ 6 | # "id":"test_id", 7 | # "authorization_token": "test_token", 8 | # "api_endpoint":"api_endpoint", 9 | # "management_endpoint":"management_endpoint" 10 | # }} 11 | # or, optionally set the `id` and `auth` parameters in the workspace 12 | # function. 13 | ws <- workspace() 14 | 15 | # List datasets 16 | ws$datasets 17 | datasets(ws) 18 | 19 | dataset <- "New York weather" 20 | ds <- match(dataset, ws$datasets$Name) 21 | frame <- download.datasets(ws$datasets[ds, ]) 22 | head(frame) 23 | 24 | # Alternative approach: 25 | frame <- download.datasets(ws, name=dataset) 26 | head(frame) 27 | } 28 | -------------------------------------------------------------------------------- /inst/examples/example_download.R: -------------------------------------------------------------------------------- 1 | \dontrun{ 2 | library(AzureML) 3 | 4 | name <- "Blood donation data" 5 | 6 | ws <- workspace() 7 | 8 | # The following three alternatives produce the same output: 9 | frame1 <- download.datasets(ws, name) 10 | frame2 <- download.datasets(datasets(ws), name) 11 | 12 | # Note that one can examine all the names, sizes, etc. of the datasets 13 | # in ws by examining d: 14 | d <- datasets(ws) 15 | frame3 <- download.datasets(subset(d, Name == name)) 16 | 17 | head(frame1) 18 | } 19 | -------------------------------------------------------------------------------- /inst/examples/example_experiments.R: -------------------------------------------------------------------------------- 1 | \dontrun{ 2 | library(AzureML) 3 | 4 | experiment <- "dd01c7e4a424432c9a9f83142d5cfec4.f-id.d2f351dd4cec4c06a4592ac83f7af55a" 5 | node_id <- '2a472ae1-ecb1-4f40-ae4e-cd3cecb1003f-268' 6 | 7 | ws <- workspace() 8 | 9 | ws$experiments 10 | experiments(ws) 11 | frame <- download.intermediate.dataset(ws, experiment, node_id, 12 | port_name = "Results dataset", 13 | data_type_id = "GenericCSV") 14 | head(frame) 15 | } 16 | -------------------------------------------------------------------------------- /inst/examples/example_publish.R: -------------------------------------------------------------------------------- 1 | \dontrun{ 2 | # Use a default configuration in ~/.azureml, alternatively 3 | # see help for `?workspace`. 4 | 5 | ws <- workspace() 6 | 7 | # Publish a simple model using the lme4::sleepdata --------------------------- 8 | 9 | library(lme4) 10 | set.seed(1) 11 | train <- sleepstudy[sample(nrow(sleepstudy), 120),] 12 | m <- lm(Reaction ~ Days + Subject, data = train) 13 | 14 | # Deine a prediction function to publish based on the model: 15 | sleepyPredict <- function(newdata){ 16 | predict(m, newdata=newdata) 17 | } 18 | 19 | ep <- publishWebService(ws, fun = sleepyPredict, name="sleepy lm", 20 | inputSchema = sleepstudy, 21 | data.frame=TRUE) 22 | 23 | # OK, try this out, and compare with raw data 24 | ans <- consume(ep, sleepstudy)$ans 25 | plot(ans, sleepstudy$Reaction) 26 | 27 | # Remove the service 28 | deleteWebService(ws, "sleepy lm") 29 | 30 | 31 | 32 | # Another data frame example ------------------------------------------------- 33 | 34 | # If your function can consume a whole data frame at once, you can also 35 | # supply data in that form, resulting in more efficient computation. 36 | # The following example builds a simple linear model on a subset of the 37 | # airquality data and publishes a prediction function based on the model. 38 | set.seed(1) 39 | m <- lm(Ozone ~ ., data=airquality[sample(nrow(airquality), 100),]) 40 | # Define a prediction function based on the model: 41 | fun <- function(newdata) 42 | { 43 | predict(m, newdata=newdata) 44 | } 45 | # Note the definition of inputSchema and use of the data.frame argument. 46 | ep <- publishWebService(ws, fun=fun, name="Ozone", 47 | inputSchema = airquality, 48 | data.frame=TRUE) 49 | ans <- consume(ep, airquality)$ans 50 | plot(ans, airquality$Ozone) 51 | deleteWebService(ws, "Ozone") 52 | 53 | 54 | 55 | # Train a model using diamonds in ggplot2 ------------------------------------ 56 | # This example also demonstrates how to deal with factor in the data 57 | 58 | data(diamonds, package="ggplot2") 59 | set.seed(1) 60 | train_idx = sample.int(nrow(diamonds), 30000) 61 | test_idx = sample(setdiff(seq(1, nrow(diamonds)), train_idx), 500) 62 | train <- diamonds[train_idx, ] 63 | test <- diamonds[test_idx, ] 64 | 65 | model <- glm(price ~ carat + clarity + color + cut - 1, data = train, 66 | family = Gamma(link = "log")) 67 | 68 | diamondLevels <- diamonds[1, ] 69 | 70 | # The model works reasonably well, except for some outliers 71 | plot(exp(predict(model, test)) ~ test$price) 72 | 73 | # Create a prediction function that converts characters correctly to factors 74 | 75 | predictDiamonds <- function(x){ 76 | x$cut <- factor(x$cut, 77 | levels = levels(diamondLevels$cut), ordered = TRUE) 78 | x$clarity <- factor(x$clarity, 79 | levels = levels(diamondLevels$clarity), ordered = TRUE) 80 | x$color <- factor(x$color, 81 | levels = levels(diamondLevels$color), ordered = TRUE) 82 | exp(predict(model, newdata = x)) 83 | } 84 | 85 | 86 | # Publish the service 87 | 88 | ws <- workspace() 89 | ep <- publishWebService(ws, fun = predictDiamonds, name = "diamonds", 90 | inputSchema = test, 91 | data.frame = TRUE 92 | ) 93 | 94 | # Consume the service 95 | results <- consume(ep, test)$ans 96 | plot(results ~ test$price) 97 | 98 | deleteWebService(ws, "diamonds") 99 | 100 | 101 | 102 | # Simple example using scalar input ------------------------------------------ 103 | 104 | ws <- workspace() 105 | 106 | # Really simple example: 107 | add <- function(x,y) x + y 108 | endpoint <- publishWebService(ws, 109 | fun = add, 110 | name = "addme", 111 | inputSchema = list(x="numeric", 112 | y="numeric"), 113 | outputSchema = list(ans="numeric")) 114 | consume(endpoint, list(x=pi, y=2)) 115 | 116 | # Now remove the web service named "addme" that we just published 117 | deleteWebService(ws, "addme") 118 | 119 | 120 | 121 | # Send a custom R function for evaluation in AzureML ------------------------- 122 | 123 | # A neat trick to evaluate any expression in the Azure ML virtual 124 | # machine R session and view its output: 125 | ep <- publishWebService(ws, 126 | fun = function(expr) { 127 | paste(capture.output( 128 | eval(parse(text=expr))), collapse="\n") 129 | }, 130 | name="commander", 131 | inputSchema = list(x = "character"), 132 | outputSchema = list(ans = "character")) 133 | cat(consume(ep, list(x = "getwd()"))$ans) 134 | cat(consume(ep, list(x = ".packages(all=TRUE)"))$ans) 135 | cat(consume(ep, list(x = "R.Version()"))$ans) 136 | 137 | # Remove the service we just published 138 | deleteWebService(ws, "commander") 139 | 140 | 141 | 142 | # Understanding the scoping rules -------------------------------------------- 143 | 144 | # The following example illustrates scoping rules. Note that the function 145 | # refers to the variable y defined outside the function body. That value 146 | # will be exported with the service. 147 | y <- pi 148 | ep <- publishWebService(ws, 149 | fun = function(x) x + y, 150 | name = "lexical scope", 151 | inputSchema = list(x = "numeric"), 152 | outputSchema = list(ans = "numeric")) 153 | cat(consume(ep, list(x=2))$ans) 154 | 155 | # Remove the service we just published 156 | deleteWebService(ws, "lexical scope") 157 | 158 | 159 | # Demonstrate scalar inputs but sending a data frame for scoring ------------- 160 | 161 | # Example showing the use of consume to score all the rows of a data frame 162 | # at once, and other invocations for evaluating multiple sets of input 163 | # values. The columns of the data frame correspond to the input parameters 164 | # of the web service in this example: 165 | f <- function(a,b,c,d) list(sum = a+b+c+d, prod = a*b*c*d) 166 | ep <- publishWebService(ws, 167 | f, 168 | name = "rowSums", 169 | inputSchema = list( 170 | a = "numeric", 171 | b = "numeric", 172 | c = "numeric", 173 | d = "numeric" 174 | ), 175 | outputSchema = list( 176 | sum ="numeric", 177 | prod = "numeric") 178 | ) 179 | x <- head(iris[,1:4]) # First four columns of iris 180 | 181 | # Note the following will FAIL because of a name mismatch in the arguments 182 | # (with an informative error): 183 | consume(ep, x, retryDelay=1) 184 | # We need the columns of the data frame to match the inputSchema: 185 | names(x) <- letters[1:4] 186 | # Now we can evaluate all the rows of the data frame in one call: 187 | consume(ep, x) 188 | # output should look like: 189 | # sum prod 190 | # 1 10.2 4.998 191 | # 2 9.5 4.116 192 | # 3 9.4 3.9104 193 | # 4 9.4 4.278 194 | # 5 10.2 5.04 195 | # 6 11.4 14.3208 196 | 197 | # You can use consume to evaluate just a single set of input values with this 198 | # form: 199 | consume(ep, a=1, b=2, c=3, d=4) 200 | 201 | # or, equivalently, 202 | consume(ep, list(a=1, b=2, c=3, d=4)) 203 | 204 | # You can evaluate multiple sets of input values with a data frame input: 205 | consume(ep, data.frame(a=1:2, b=3:4, c=5:6, d=7:8)) 206 | 207 | # or, equivalently, with multiple lists: 208 | consume(ep, list(a=1, b=3, c=5, d=7), list(a=2, b=4, c=6, d=8)) 209 | 210 | # Remove the service we just published 211 | deleteWebService(ws, "rowSums") 212 | 213 | # A more efficient way to do the same thing using data frame input/output: 214 | f <- function(df) with(df, list(sum = a+b+c+d, prod = a*b*c*d)) 215 | ep = publishWebService(ws, f, name="rowSums2", 216 | inputSchema = data.frame(a = 0, b = 0, c = 0, d = 0)) 217 | consume(ep, data.frame(a=1:2, b=3:4, c=5:6, d=7:8)) 218 | deleteWebService(ws, "rowSums2") 219 | 220 | 221 | 222 | # Automatically discover dependencies ---------------------------------------- 223 | 224 | # The publishWebService function uses `miniCRAN` to include dependencies on 225 | # packages required by your function. The next example uses the `lmer` 226 | # function from the lme4 package, and also shows how to publish a function 227 | # that consumes a data frame by setting data.frame=TRUE. Note! This example 228 | # depends on a lot of packages and may take some time to upload to Azure. 229 | library(lme4) 230 | # Build a sample mixed effects model on just a subset of the sleepstudy data... 231 | set.seed(1) 232 | m <- lmer(Reaction ~ Days + (Days | Subject), 233 | data=sleepstudy[sample(nrow(sleepstudy), 120),]) 234 | # Deine a prediction function to publish based on the model: 235 | fun <- function(newdata) 236 | { 237 | predict(m, newdata=newdata) 238 | } 239 | ep <- publishWebService(ws, fun=fun, name="sleepy lmer", 240 | inputSchema= sleepstudy, 241 | packages="lme4", 242 | data.frame=TRUE) 243 | 244 | # OK, try this out, and compare with raw data 245 | ans = consume(ep, sleepstudy)$ans 246 | plot(ans, sleepstudy$Reaction) 247 | 248 | # Remove the service 249 | deleteWebService(ws, "sleepy lmer") 250 | } 251 | -------------------------------------------------------------------------------- /inst/examples/example_upload.R: -------------------------------------------------------------------------------- 1 | \dontrun{ 2 | library(AzureML) 3 | 4 | ws <- workspace() 5 | 6 | # Upload the R airquality data.frame to the workspace. 7 | upload.dataset(airquality, ws, "airquality") 8 | 9 | # Example datasets (airquality should be among them now) 10 | head(datasets(ws)) 11 | 12 | # Now delete what we've just uploaded 13 | delete.datasets(ws, "airquality") 14 | } 15 | -------------------------------------------------------------------------------- /man/AzureML-deprecated.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/azureml-defunct.R 3 | \name{consumeDataframe} 4 | \alias{consumeDataframe} 5 | \alias{consumeFile} 6 | \alias{consumeLists} 7 | \alias{getEPDetails} 8 | \alias{getWSDetails} 9 | \title{Deprecated functions} 10 | \usage{ 11 | consumeDataframe() 12 | 13 | consumeFile() 14 | 15 | consumeLists() 16 | 17 | getEPDetails() 18 | 19 | getWSDetails() 20 | } 21 | \description{ 22 | Deprecated functions 23 | } 24 | -------------------------------------------------------------------------------- /man/AzureML-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/azureml-package.R 3 | \docType{package} 4 | \name{AzureML-package} 5 | \alias{AzureML-package} 6 | \alias{AzureML} 7 | \title{Interface to Azure ML Studio datasets and experiments.} 8 | \description{ 9 | Allows you to work with Azure ML Studio datasets and experiments directly from R. 10 | } 11 | \section{Summary of functions}{ 12 | 13 | 14 | 1. Create a reference to an Azure ML workspace 15 | 16 | \itemize{ 17 | \item Workspace: \code{\link{workspace}} 18 | } 19 | 20 | 2. Datasets 21 | 22 | \itemize{ 23 | \item List available datasets: \code{\link{datasets}} 24 | \item Download datasets: \code{\link{download.datasets}} 25 | \item Upload a dataset: \code{\link{upload.dataset}} 26 | \item Delete datasets: \code{\link{delete.datasets}} 27 | } 28 | 29 | 3. Experiments 30 | 31 | \itemize{ 32 | \item Get experiments: \code{\link{experiments}} 33 | \item Get data from an experiment port: \code{\link{download.intermediate.dataset}} 34 | } 35 | 36 | 4. Web Services 37 | 38 | \itemize{ 39 | \item List available services: \code{\link{services}} 40 | \item Consume a web service (run data through it and retrieve result): \code{\link{consume}} 41 | \item Publish an R function as a web service: \code{\link{publishWebService}} 42 | \item Update an existing web service: \code{\link{updateWebService}} 43 | \item List web service endpoints: \code{\link{endpoints}} 44 | } 45 | 46 | 5. Configure a settings file with your AzureML secrets 47 | 48 | The \code{\link{workspace}} function optionally reads your AzureML credentials from a settings file located at \code{~/.azureml/settings.json}. You can read and write this file using: 49 | 50 | \itemize{ 51 | \item Write: \code{\link{write.AzureML.config}} 52 | \item Read: \code{\link{read.AzureML.config}} 53 | } 54 | } 55 | 56 | \keyword{package} 57 | -------------------------------------------------------------------------------- /man/consume.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/consume.R 3 | \name{consume} 4 | \alias{consume} 5 | \title{Use a web service to score data in list (key=value) format.} 6 | \usage{ 7 | consume(endpoint, ..., globalParam, retryDelay = 10, output = "output1", 8 | .retry = 5) 9 | } 10 | \arguments{ 11 | \item{endpoint}{Either an AzureML web service endpoint returned by \code{\link{publishWebService}}, \code{\link{endpoints}}, or simply an AzureML web service from \code{\link{services}}; in the latter case the default endpoint for the service will be used.} 12 | 13 | \item{...}{variable number of requests entered as lists in key-value format; optionally a single data frame argument.} 14 | 15 | \item{globalParam}{global parameters entered as a list, default value is an empty list} 16 | 17 | \item{retryDelay}{the time in seconds to delay before retrying in case of a server error} 18 | 19 | \item{output}{name of the output port to return usually 'output1' or 'output2'; set to NULL to return everything as raw results in JSON-encoded list form} 20 | 21 | \item{.retry}{number of tries before failing} 22 | } 23 | \value{ 24 | data frame containing results returned from web service call 25 | } 26 | \description{ 27 | Score data represented as lists where each list key represents a parameter of the web service. 28 | } 29 | \note{ 30 | Set \code{...} to a list of key/value pairs corresponding to web service inputs. Optionally, set \code{...} to a single data frame with columns corresponding to web service variables. The data frame approach returns output from the evaluation of each row of the data frame (see the examples). 31 | } 32 | \examples{ 33 | \dontrun{ 34 | # Use a default configuration in ~/.azureml, alternatively 35 | # see help for `?workspace`. 36 | 37 | ws <- workspace() 38 | 39 | # Publish a simple model using the lme4::sleepdata --------------------------- 40 | 41 | library(lme4) 42 | set.seed(1) 43 | train <- sleepstudy[sample(nrow(sleepstudy), 120),] 44 | m <- lm(Reaction ~ Days + Subject, data = train) 45 | 46 | # Deine a prediction function to publish based on the model: 47 | sleepyPredict <- function(newdata){ 48 | predict(m, newdata=newdata) 49 | } 50 | 51 | ep <- publishWebService(ws, fun = sleepyPredict, name="sleepy lm", 52 | inputSchema = sleepstudy, 53 | data.frame=TRUE) 54 | 55 | # OK, try this out, and compare with raw data 56 | ans <- consume(ep, sleepstudy)$ans 57 | plot(ans, sleepstudy$Reaction) 58 | 59 | # Remove the service 60 | deleteWebService(ws, "sleepy lm") 61 | 62 | 63 | 64 | # Another data frame example ------------------------------------------------- 65 | 66 | # If your function can consume a whole data frame at once, you can also 67 | # supply data in that form, resulting in more efficient computation. 68 | # The following example builds a simple linear model on a subset of the 69 | # airquality data and publishes a prediction function based on the model. 70 | set.seed(1) 71 | m <- lm(Ozone ~ ., data=airquality[sample(nrow(airquality), 100),]) 72 | # Define a prediction function based on the model: 73 | fun <- function(newdata) 74 | { 75 | predict(m, newdata=newdata) 76 | } 77 | # Note the definition of inputSchema and use of the data.frame argument. 78 | ep <- publishWebService(ws, fun=fun, name="Ozone", 79 | inputSchema = airquality, 80 | data.frame=TRUE) 81 | ans <- consume(ep, airquality)$ans 82 | plot(ans, airquality$Ozone) 83 | deleteWebService(ws, "Ozone") 84 | 85 | 86 | 87 | # Train a model using diamonds in ggplot2 ------------------------------------ 88 | # This example also demonstrates how to deal with factor in the data 89 | 90 | data(diamonds, package="ggplot2") 91 | set.seed(1) 92 | train_idx = sample.int(nrow(diamonds), 30000) 93 | test_idx = sample(setdiff(seq(1, nrow(diamonds)), train_idx), 500) 94 | train <- diamonds[train_idx, ] 95 | test <- diamonds[test_idx, ] 96 | 97 | model <- glm(price ~ carat + clarity + color + cut - 1, data = train, 98 | family = Gamma(link = "log")) 99 | 100 | diamondLevels <- diamonds[1, ] 101 | 102 | # The model works reasonably well, except for some outliers 103 | plot(exp(predict(model, test)) ~ test$price) 104 | 105 | # Create a prediction function that converts characters correctly to factors 106 | 107 | predictDiamonds <- function(x){ 108 | x$cut <- factor(x$cut, 109 | levels = levels(diamondLevels$cut), ordered = TRUE) 110 | x$clarity <- factor(x$clarity, 111 | levels = levels(diamondLevels$clarity), ordered = TRUE) 112 | x$color <- factor(x$color, 113 | levels = levels(diamondLevels$color), ordered = TRUE) 114 | exp(predict(model, newdata = x)) 115 | } 116 | 117 | 118 | # Publish the service 119 | 120 | ws <- workspace() 121 | ep <- publishWebService(ws, fun = predictDiamonds, name = "diamonds", 122 | inputSchema = test, 123 | data.frame = TRUE 124 | ) 125 | 126 | # Consume the service 127 | results <- consume(ep, test)$ans 128 | plot(results ~ test$price) 129 | 130 | deleteWebService(ws, "diamonds") 131 | 132 | 133 | 134 | # Simple example using scalar input ------------------------------------------ 135 | 136 | ws <- workspace() 137 | 138 | # Really simple example: 139 | add <- function(x,y) x + y 140 | endpoint <- publishWebService(ws, 141 | fun = add, 142 | name = "addme", 143 | inputSchema = list(x="numeric", 144 | y="numeric"), 145 | outputSchema = list(ans="numeric")) 146 | consume(endpoint, list(x=pi, y=2)) 147 | 148 | # Now remove the web service named "addme" that we just published 149 | deleteWebService(ws, "addme") 150 | 151 | 152 | 153 | # Send a custom R function for evaluation in AzureML ------------------------- 154 | 155 | # A neat trick to evaluate any expression in the Azure ML virtual 156 | # machine R session and view its output: 157 | ep <- publishWebService(ws, 158 | fun = function(expr) { 159 | paste(capture.output( 160 | eval(parse(text=expr))), collapse="\\n") 161 | }, 162 | name="commander", 163 | inputSchema = list(x = "character"), 164 | outputSchema = list(ans = "character")) 165 | cat(consume(ep, list(x = "getwd()"))$ans) 166 | cat(consume(ep, list(x = ".packages(all=TRUE)"))$ans) 167 | cat(consume(ep, list(x = "R.Version()"))$ans) 168 | 169 | # Remove the service we just published 170 | deleteWebService(ws, "commander") 171 | 172 | 173 | 174 | # Understanding the scoping rules -------------------------------------------- 175 | 176 | # The following example illustrates scoping rules. Note that the function 177 | # refers to the variable y defined outside the function body. That value 178 | # will be exported with the service. 179 | y <- pi 180 | ep <- publishWebService(ws, 181 | fun = function(x) x + y, 182 | name = "lexical scope", 183 | inputSchema = list(x = "numeric"), 184 | outputSchema = list(ans = "numeric")) 185 | cat(consume(ep, list(x=2))$ans) 186 | 187 | # Remove the service we just published 188 | deleteWebService(ws, "lexical scope") 189 | 190 | 191 | # Demonstrate scalar inputs but sending a data frame for scoring ------------- 192 | 193 | # Example showing the use of consume to score all the rows of a data frame 194 | # at once, and other invocations for evaluating multiple sets of input 195 | # values. The columns of the data frame correspond to the input parameters 196 | # of the web service in this example: 197 | f <- function(a,b,c,d) list(sum = a+b+c+d, prod = a*b*c*d) 198 | ep <- publishWebService(ws, 199 | f, 200 | name = "rowSums", 201 | inputSchema = list( 202 | a = "numeric", 203 | b = "numeric", 204 | c = "numeric", 205 | d = "numeric" 206 | ), 207 | outputSchema = list( 208 | sum ="numeric", 209 | prod = "numeric") 210 | ) 211 | x <- head(iris[,1:4]) # First four columns of iris 212 | 213 | # Note the following will FAIL because of a name mismatch in the arguments 214 | # (with an informative error): 215 | consume(ep, x, retryDelay=1) 216 | # We need the columns of the data frame to match the inputSchema: 217 | names(x) <- letters[1:4] 218 | # Now we can evaluate all the rows of the data frame in one call: 219 | consume(ep, x) 220 | # output should look like: 221 | # sum prod 222 | # 1 10.2 4.998 223 | # 2 9.5 4.116 224 | # 3 9.4 3.9104 225 | # 4 9.4 4.278 226 | # 5 10.2 5.04 227 | # 6 11.4 14.3208 228 | 229 | # You can use consume to evaluate just a single set of input values with this 230 | # form: 231 | consume(ep, a=1, b=2, c=3, d=4) 232 | 233 | # or, equivalently, 234 | consume(ep, list(a=1, b=2, c=3, d=4)) 235 | 236 | # You can evaluate multiple sets of input values with a data frame input: 237 | consume(ep, data.frame(a=1:2, b=3:4, c=5:6, d=7:8)) 238 | 239 | # or, equivalently, with multiple lists: 240 | consume(ep, list(a=1, b=3, c=5, d=7), list(a=2, b=4, c=6, d=8)) 241 | 242 | # Remove the service we just published 243 | deleteWebService(ws, "rowSums") 244 | 245 | # A more efficient way to do the same thing using data frame input/output: 246 | f <- function(df) with(df, list(sum = a+b+c+d, prod = a*b*c*d)) 247 | ep = publishWebService(ws, f, name="rowSums2", 248 | inputSchema = data.frame(a = 0, b = 0, c = 0, d = 0)) 249 | consume(ep, data.frame(a=1:2, b=3:4, c=5:6, d=7:8)) 250 | deleteWebService(ws, "rowSums2") 251 | 252 | 253 | 254 | # Automatically discover dependencies ---------------------------------------- 255 | 256 | # The publishWebService function uses `miniCRAN` to include dependencies on 257 | # packages required by your function. The next example uses the `lmer` 258 | # function from the lme4 package, and also shows how to publish a function 259 | # that consumes a data frame by setting data.frame=TRUE. Note! This example 260 | # depends on a lot of packages and may take some time to upload to Azure. 261 | library(lme4) 262 | # Build a sample mixed effects model on just a subset of the sleepstudy data... 263 | set.seed(1) 264 | m <- lmer(Reaction ~ Days + (Days | Subject), 265 | data=sleepstudy[sample(nrow(sleepstudy), 120),]) 266 | # Deine a prediction function to publish based on the model: 267 | fun <- function(newdata) 268 | { 269 | predict(m, newdata=newdata) 270 | } 271 | ep <- publishWebService(ws, fun=fun, name="sleepy lmer", 272 | inputSchema= sleepstudy, 273 | packages="lme4", 274 | data.frame=TRUE) 275 | 276 | # OK, try this out, and compare with raw data 277 | ans = consume(ep, sleepstudy)$ans 278 | plot(ans, sleepstudy$Reaction) 279 | 280 | # Remove the service 281 | deleteWebService(ws, "sleepy lmer") 282 | } 283 | } 284 | \seealso{ 285 | \code{\link{publishWebService}} \code{\link{endpoints}} \code{\link{services}} \code{\link{workspace}} 286 | 287 | Other consumption functions: \code{\link{workspace}} 288 | } 289 | -------------------------------------------------------------------------------- /man/datasets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/workspace.R 3 | \name{datasets} 4 | \alias{datasets} 5 | \title{List datasets in an AzureML workspace.} 6 | \usage{ 7 | datasets(ws, filter = c("all", "my datasets", "samples")) 8 | } 9 | \arguments{ 10 | \item{ws}{An AzureML workspace reference returned by \code{\link{workspace}}.} 11 | 12 | \item{filter}{Optionally filter result, returing all, mine, or sample datasets.} 13 | } 14 | \value{ 15 | A data.frame with class \code{Datasets} listing available datasets in the workspace. 16 | } 17 | \description{ 18 | List datasets in an AzureML workspace, optionally filtering on sample or my datasets. 19 | } 20 | \note{ 21 | \code{datasets(w)} is equivalent to \code{w$datasets}. Since \code{w$datasets} is simply 22 | an R data.frame, you can alternatively filter on any variable as desired. 23 | } 24 | \examples{ 25 | \dontrun{ 26 | library(AzureML) 27 | 28 | # Use the default config file ~/azureml/settings.json with format: 29 | # {"workspace":{ 30 | # "id":"test_id", 31 | # "authorization_token": "test_token", 32 | # "api_endpoint":"api_endpoint", 33 | # "management_endpoint":"management_endpoint" 34 | # }} 35 | # or, optionally set the `id` and `auth` parameters in the workspace 36 | # function. 37 | ws <- workspace() 38 | 39 | # List datasets 40 | ws$datasets 41 | datasets(ws) 42 | 43 | dataset <- "New York weather" 44 | ds <- match(dataset, ws$datasets$Name) 45 | frame <- download.datasets(ws$datasets[ds, ]) 46 | head(frame) 47 | 48 | # Alternative approach: 49 | frame <- download.datasets(ws, name=dataset) 50 | head(frame) 51 | } 52 | } 53 | \seealso{ 54 | \code{\link{workspace}}, \code{\link{experiments}}, \code{\link{download.datasets}} 55 | 56 | Other dataset functions: \code{\link{delete.datasets}}, 57 | \code{\link{download.intermediate.dataset}}, 58 | \code{\link{upload.dataset}}, \code{\link{workspace}} 59 | } 60 | -------------------------------------------------------------------------------- /man/delete.datasets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/datasets.R 3 | \name{delete.datasets} 4 | \alias{delete.datasets} 5 | \title{Delete datasets from an AzureML workspace.} 6 | \usage{ 7 | delete.datasets(ws, name, host) 8 | } 9 | \arguments{ 10 | \item{ws}{An AzureML workspace reference returned by \code{\link{workspace}}.} 11 | 12 | \item{name}{Either one or more \code{Dataset} objects (rows from the workspace \code{datasets} data frame), or a character vector of dataset names to delete.} 13 | 14 | \item{host}{AzureML delete service endpoint} 15 | } 16 | \value{ 17 | A data frame with columns Name, Deleted, status_code indicating the HTTP status code and success/failure result of the delete operation for each dataset. 18 | } 19 | \description{ 20 | Delete datasets from an AzureML workspace. 21 | } 22 | \seealso{ 23 | Other dataset functions: \code{\link{datasets}}, 24 | \code{\link{download.intermediate.dataset}}, 25 | \code{\link{upload.dataset}}, \code{\link{workspace}} 26 | } 27 | -------------------------------------------------------------------------------- /man/deleteWebService.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/deleteWebservice.R 3 | \name{deleteWebService} 4 | \alias{deleteWebService} 5 | \title{Delete a Microsoft Azure Web Service} 6 | \usage{ 7 | deleteWebService(ws, name, refresh = TRUE) 8 | } 9 | \arguments{ 10 | \item{ws}{An AzureML workspace reference returned by \code{\link{workspace}}.} 11 | 12 | \item{name}{Either one row from the workspace \code{services} data.frame corresponding to a service to delete, or simply a service name character string.} 13 | 14 | \item{refresh}{Set to \code{FALSE} to supress automatic updating of the workspace list of services, 15 | useful when deleting many services in bulk.} 16 | } 17 | \value{ 18 | The updated data.frame of workspace services is invisibly returned. 19 | } 20 | \description{ 21 | Delete a Microsoft Azure Machine Learning web service from your workspace. 22 | } 23 | \note{ 24 | If more than one service matches the supplied \code{name}, the first listed service will be deleted. 25 | } 26 | \examples{ 27 | \dontrun{ 28 | # Use a default configuration in ~/.azureml, alternatively 29 | # see help for `?workspace`. 30 | 31 | ws <- workspace() 32 | 33 | # Publish a simple model using the lme4::sleepdata --------------------------- 34 | 35 | library(lme4) 36 | set.seed(1) 37 | train <- sleepstudy[sample(nrow(sleepstudy), 120),] 38 | m <- lm(Reaction ~ Days + Subject, data = train) 39 | 40 | # Deine a prediction function to publish based on the model: 41 | sleepyPredict <- function(newdata){ 42 | predict(m, newdata=newdata) 43 | } 44 | 45 | ep <- publishWebService(ws, fun = sleepyPredict, name="sleepy lm", 46 | inputSchema = sleepstudy, 47 | data.frame=TRUE) 48 | 49 | # OK, try this out, and compare with raw data 50 | ans <- consume(ep, sleepstudy)$ans 51 | plot(ans, sleepstudy$Reaction) 52 | 53 | # Remove the service 54 | deleteWebService(ws, "sleepy lm") 55 | 56 | 57 | 58 | # Another data frame example ------------------------------------------------- 59 | 60 | # If your function can consume a whole data frame at once, you can also 61 | # supply data in that form, resulting in more efficient computation. 62 | # The following example builds a simple linear model on a subset of the 63 | # airquality data and publishes a prediction function based on the model. 64 | set.seed(1) 65 | m <- lm(Ozone ~ ., data=airquality[sample(nrow(airquality), 100),]) 66 | # Define a prediction function based on the model: 67 | fun <- function(newdata) 68 | { 69 | predict(m, newdata=newdata) 70 | } 71 | # Note the definition of inputSchema and use of the data.frame argument. 72 | ep <- publishWebService(ws, fun=fun, name="Ozone", 73 | inputSchema = airquality, 74 | data.frame=TRUE) 75 | ans <- consume(ep, airquality)$ans 76 | plot(ans, airquality$Ozone) 77 | deleteWebService(ws, "Ozone") 78 | 79 | 80 | 81 | # Train a model using diamonds in ggplot2 ------------------------------------ 82 | # This example also demonstrates how to deal with factor in the data 83 | 84 | data(diamonds, package="ggplot2") 85 | set.seed(1) 86 | train_idx = sample.int(nrow(diamonds), 30000) 87 | test_idx = sample(setdiff(seq(1, nrow(diamonds)), train_idx), 500) 88 | train <- diamonds[train_idx, ] 89 | test <- diamonds[test_idx, ] 90 | 91 | model <- glm(price ~ carat + clarity + color + cut - 1, data = train, 92 | family = Gamma(link = "log")) 93 | 94 | diamondLevels <- diamonds[1, ] 95 | 96 | # The model works reasonably well, except for some outliers 97 | plot(exp(predict(model, test)) ~ test$price) 98 | 99 | # Create a prediction function that converts characters correctly to factors 100 | 101 | predictDiamonds <- function(x){ 102 | x$cut <- factor(x$cut, 103 | levels = levels(diamondLevels$cut), ordered = TRUE) 104 | x$clarity <- factor(x$clarity, 105 | levels = levels(diamondLevels$clarity), ordered = TRUE) 106 | x$color <- factor(x$color, 107 | levels = levels(diamondLevels$color), ordered = TRUE) 108 | exp(predict(model, newdata = x)) 109 | } 110 | 111 | 112 | # Publish the service 113 | 114 | ws <- workspace() 115 | ep <- publishWebService(ws, fun = predictDiamonds, name = "diamonds", 116 | inputSchema = test, 117 | data.frame = TRUE 118 | ) 119 | 120 | # Consume the service 121 | results <- consume(ep, test)$ans 122 | plot(results ~ test$price) 123 | 124 | deleteWebService(ws, "diamonds") 125 | 126 | 127 | 128 | # Simple example using scalar input ------------------------------------------ 129 | 130 | ws <- workspace() 131 | 132 | # Really simple example: 133 | add <- function(x,y) x + y 134 | endpoint <- publishWebService(ws, 135 | fun = add, 136 | name = "addme", 137 | inputSchema = list(x="numeric", 138 | y="numeric"), 139 | outputSchema = list(ans="numeric")) 140 | consume(endpoint, list(x=pi, y=2)) 141 | 142 | # Now remove the web service named "addme" that we just published 143 | deleteWebService(ws, "addme") 144 | 145 | 146 | 147 | # Send a custom R function for evaluation in AzureML ------------------------- 148 | 149 | # A neat trick to evaluate any expression in the Azure ML virtual 150 | # machine R session and view its output: 151 | ep <- publishWebService(ws, 152 | fun = function(expr) { 153 | paste(capture.output( 154 | eval(parse(text=expr))), collapse="\\n") 155 | }, 156 | name="commander", 157 | inputSchema = list(x = "character"), 158 | outputSchema = list(ans = "character")) 159 | cat(consume(ep, list(x = "getwd()"))$ans) 160 | cat(consume(ep, list(x = ".packages(all=TRUE)"))$ans) 161 | cat(consume(ep, list(x = "R.Version()"))$ans) 162 | 163 | # Remove the service we just published 164 | deleteWebService(ws, "commander") 165 | 166 | 167 | 168 | # Understanding the scoping rules -------------------------------------------- 169 | 170 | # The following example illustrates scoping rules. Note that the function 171 | # refers to the variable y defined outside the function body. That value 172 | # will be exported with the service. 173 | y <- pi 174 | ep <- publishWebService(ws, 175 | fun = function(x) x + y, 176 | name = "lexical scope", 177 | inputSchema = list(x = "numeric"), 178 | outputSchema = list(ans = "numeric")) 179 | cat(consume(ep, list(x=2))$ans) 180 | 181 | # Remove the service we just published 182 | deleteWebService(ws, "lexical scope") 183 | 184 | 185 | # Demonstrate scalar inputs but sending a data frame for scoring ------------- 186 | 187 | # Example showing the use of consume to score all the rows of a data frame 188 | # at once, and other invocations for evaluating multiple sets of input 189 | # values. The columns of the data frame correspond to the input parameters 190 | # of the web service in this example: 191 | f <- function(a,b,c,d) list(sum = a+b+c+d, prod = a*b*c*d) 192 | ep <- publishWebService(ws, 193 | f, 194 | name = "rowSums", 195 | inputSchema = list( 196 | a = "numeric", 197 | b = "numeric", 198 | c = "numeric", 199 | d = "numeric" 200 | ), 201 | outputSchema = list( 202 | sum ="numeric", 203 | prod = "numeric") 204 | ) 205 | x <- head(iris[,1:4]) # First four columns of iris 206 | 207 | # Note the following will FAIL because of a name mismatch in the arguments 208 | # (with an informative error): 209 | consume(ep, x, retryDelay=1) 210 | # We need the columns of the data frame to match the inputSchema: 211 | names(x) <- letters[1:4] 212 | # Now we can evaluate all the rows of the data frame in one call: 213 | consume(ep, x) 214 | # output should look like: 215 | # sum prod 216 | # 1 10.2 4.998 217 | # 2 9.5 4.116 218 | # 3 9.4 3.9104 219 | # 4 9.4 4.278 220 | # 5 10.2 5.04 221 | # 6 11.4 14.3208 222 | 223 | # You can use consume to evaluate just a single set of input values with this 224 | # form: 225 | consume(ep, a=1, b=2, c=3, d=4) 226 | 227 | # or, equivalently, 228 | consume(ep, list(a=1, b=2, c=3, d=4)) 229 | 230 | # You can evaluate multiple sets of input values with a data frame input: 231 | consume(ep, data.frame(a=1:2, b=3:4, c=5:6, d=7:8)) 232 | 233 | # or, equivalently, with multiple lists: 234 | consume(ep, list(a=1, b=3, c=5, d=7), list(a=2, b=4, c=6, d=8)) 235 | 236 | # Remove the service we just published 237 | deleteWebService(ws, "rowSums") 238 | 239 | # A more efficient way to do the same thing using data frame input/output: 240 | f <- function(df) with(df, list(sum = a+b+c+d, prod = a*b*c*d)) 241 | ep = publishWebService(ws, f, name="rowSums2", 242 | inputSchema = data.frame(a = 0, b = 0, c = 0, d = 0)) 243 | consume(ep, data.frame(a=1:2, b=3:4, c=5:6, d=7:8)) 244 | deleteWebService(ws, "rowSums2") 245 | 246 | 247 | 248 | # Automatically discover dependencies ---------------------------------------- 249 | 250 | # The publishWebService function uses `miniCRAN` to include dependencies on 251 | # packages required by your function. The next example uses the `lmer` 252 | # function from the lme4 package, and also shows how to publish a function 253 | # that consumes a data frame by setting data.frame=TRUE. Note! This example 254 | # depends on a lot of packages and may take some time to upload to Azure. 255 | library(lme4) 256 | # Build a sample mixed effects model on just a subset of the sleepstudy data... 257 | set.seed(1) 258 | m <- lmer(Reaction ~ Days + (Days | Subject), 259 | data=sleepstudy[sample(nrow(sleepstudy), 120),]) 260 | # Deine a prediction function to publish based on the model: 261 | fun <- function(newdata) 262 | { 263 | predict(m, newdata=newdata) 264 | } 265 | ep <- publishWebService(ws, fun=fun, name="sleepy lmer", 266 | inputSchema= sleepstudy, 267 | packages="lme4", 268 | data.frame=TRUE) 269 | 270 | # OK, try this out, and compare with raw data 271 | ans = consume(ep, sleepstudy)$ans 272 | plot(ans, sleepstudy$Reaction) 273 | 274 | # Remove the service 275 | deleteWebService(ws, "sleepy lmer") 276 | } 277 | } 278 | \seealso{ 279 | \code{\link{services}} \code{\link{publishWebService}} \code{\link{updateWebService}} 280 | 281 | Other publishing functions: \code{\link{publishWebService}}, 282 | \code{\link{workspace}} 283 | } 284 | -------------------------------------------------------------------------------- /man/discoverSchema.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/discover.R 3 | \name{discoverSchema} 4 | \alias{discoverSchema} 5 | \title{Discover web service schema.} 6 | \usage{ 7 | discoverSchema(helpURL, scheme = "https", 8 | host = "ussouthcentral.services.azureml.net", api_version = "2.0") 9 | } 10 | \arguments{ 11 | \item{helpURL}{URL of the help page of the web service} 12 | 13 | \item{scheme}{the URI scheme} 14 | 15 | \item{host}{optional parameter that defaults to ussouthcentral.services.azureml.net} 16 | 17 | \item{api_version}{AzureML API version} 18 | } 19 | \value{ 20 | List containing the request URL of the webservice, column names of the data, sample input as well as the input schema 21 | } 22 | \description{ 23 | Discover the expected input to a web service specified by a web service ID ng the workspace ID and web service ID, information specific to the consumption functions 24 | } 25 | \seealso{ 26 | \code{\link{publishWebService}} \code{\link{consume}} \code{\link{workspace}} \code{link{services}} \code{\link{endpoints}} \code{\link{endpointHelp}} 27 | 28 | Other discovery functions: \code{\link{endpointHelp}}, 29 | \code{\link{endpoints}}, \code{\link{services}}, 30 | \code{\link{workspace}} 31 | } 32 | -------------------------------------------------------------------------------- /man/download.datasets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/datasets.R 3 | \name{download.datasets} 4 | \alias{download.datasets} 5 | \title{Download one or more datasets from an AzureML workspace.} 6 | \usage{ 7 | download.datasets(dataset, name, ...) 8 | } 9 | \arguments{ 10 | \item{dataset}{Either one or more rows from a \code{\link{datasets}} data frame in a workspace, 11 | or just a workspace from \code{\link{workspace}}. When \code{source} is a workspace, then 12 | the \code{name} parameter must also be specified.} 13 | 14 | \item{name}{Optional character vector of one or more dataset names to filter the \code{datasets} 15 | parameter list by.} 16 | 17 | \item{...}{Optional arguments to pass to \code{read.table} for CSV or TSV DataTypeIds or to 18 | \code{readBin} for the ZIP DataTypeId. For example, 19 | specify \code{stringsAsFactors=TRUE} if you wish, or any other valid argument to \code{read.table}.} 20 | } 21 | \value{ 22 | If one dataset is specified (that is, one row from a workspace \code{datasets} data frame), 23 | then a single data frame is returned. 24 | If more than one dataset is specified (more than one row), then a list of data frames is returned. 25 | } 26 | \description{ 27 | Download one or more datasets from an AzureML workspace into local R data frame or raw binary objects. 28 | } 29 | \note{ 30 | TSV- and CSV-formatted datasets return data frame results with \code{stringsAsFactors=FALSE} 31 | by default (independently of the global \code{stringsAsFactors} option). 32 | 33 | This function can download datasets with various CSV and TSV "DataTypeIds", or "DataTypeId" 34 | of "ARFF", "PlainText" or "ZIP". Other "DataTypeIds" return an error. See the AzureML Data Format 35 | Conversion modules to convert data to a supported format. Data with DataTypeId "ZIP" are returned 36 | in a raw binary R vector, which could then be passed through \code{unzip}, for example. 37 | } 38 | \examples{ 39 | \dontrun{ 40 | library(AzureML) 41 | 42 | name <- "Blood donation data" 43 | 44 | ws <- workspace() 45 | 46 | # The following three alternatives produce the same output: 47 | frame1 <- download.datasets(ws, name) 48 | frame2 <- download.datasets(datasets(ws), name) 49 | 50 | # Note that one can examine all the names, sizes, etc. of the datasets 51 | # in ws by examining d: 52 | d <- datasets(ws) 53 | frame3 <- download.datasets(subset(d, Name == name)) 54 | 55 | head(frame1) 56 | } 57 | } 58 | \seealso{ 59 | \code{\link{workspace}}, \code{\link{datasets}}, \code{\link{read.table}}, 60 | \code{\link{download.intermediate.dataset}} 61 | } 62 | -------------------------------------------------------------------------------- /man/download.intermediate.dataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/datasets.R 3 | \name{download.intermediate.dataset} 4 | \alias{download.intermediate.dataset} 5 | \title{Download a dataset from an AzureML experiment module.} 6 | \usage{ 7 | download.intermediate.dataset(ws, experiment, node_id, 8 | port_name = "Results dataset", data_type_id = "GenericCSV", ...) 9 | } 10 | \arguments{ 11 | \item{ws}{An AzureML workspace reference returned by \code{\link{workspace}}.} 12 | 13 | \item{experiment}{AzureML experiment ID.} 14 | 15 | \item{node_id}{Experiment node ID.} 16 | 17 | \item{port_name}{Experiment port name. The default is "Results dataset".} 18 | 19 | \item{data_type_id}{Experiment data type id. The default is "GenericCSV". See the note below for other types.} 20 | 21 | \item{...}{Optional arguments to pass to \code{read.table} for CSV or TSV DataTypeIds. For example, specify \code{stringsAsFactors=TRUE} if you wish, or any other valid argument to \code{read.table}.} 22 | } 23 | \value{ 24 | In most cases a data frame. Exceptions are: a raw vector for \code{DataTypeId="Zip"} and character vector for \code{DataTypeId="PlainText"} 25 | } 26 | \description{ 27 | Allows you to download the data from certain types of modules in AzureML experiments. You can generate the information required from AzureML Studio by (right) clicking on a module output port and selecting the option "Generate Data Access Code...". 28 | } 29 | \note{ 30 | TSV- and CSV-formatted datasets return data frame results with \code{stringsAsFactors=FALSE} by default (independently of the global \code{stringsAsFactors} option). 31 | 32 | \bold{Supported DataTypeId options} 33 | 34 | 35 | This function can download datasets with various CSV and TSV \code{DataTypeId} (with or without headers), in addition to "ARFF", "PlainText" and "Zip". Other "DataTypeIds" return an error. See the AzureML Data Format Conversion modules to convert data to a supported format. 36 | } 37 | \seealso{ 38 | \code{\link{workspace}}, \code{\link{datasets}}, \code{\link[utils]{read.table}} and \code{\link{download.datasets}} 39 | 40 | Other dataset functions: \code{\link{datasets}}, 41 | \code{\link{delete.datasets}}, 42 | \code{\link{upload.dataset}}, \code{\link{workspace}} 43 | 44 | Other experiment functions: \code{\link{experiments}}, 45 | \code{\link{workspace}} 46 | } 47 | -------------------------------------------------------------------------------- /man/endpointHelp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/services.R 3 | \name{endpointHelp} 4 | \alias{endpointHelp} 5 | \title{Display AzureML Web Service Endpoint Help Screens.} 6 | \usage{ 7 | endpointHelp(ep, type = c("apidocument", "r-snippet", "score", "jobs", 8 | "update")) 9 | } 10 | \arguments{ 11 | \item{ep}{an AzureML web service endpoint from the \code{\link{endpoints}} function.} 12 | 13 | \item{type}{the type of help to display.} 14 | } 15 | \value{ 16 | Returns the help text. If \code{type = "apidocument"}, then returns the help as a list from a parsed JSON document describing the service. 17 | } 18 | \description{ 19 | Download and return help for the specified AzureML web service endpoint. 20 | } 21 | \examples{ 22 | \dontrun{ 23 | ws <- workspace() 24 | 25 | s <- services(ws) 26 | e <- endpoints(ws, s[1,]) 27 | endpointHelp(e) 28 | 29 | Particularly useful way to see expected service input and output: 30 | endpointHelp(e)$definitions 31 | 32 | } 33 | } 34 | \seealso{ 35 | Other discovery functions: \code{\link{discoverSchema}}, 36 | \code{\link{endpoints}}, \code{\link{services}}, 37 | \code{\link{workspace}} 38 | } 39 | -------------------------------------------------------------------------------- /man/endpoints.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/services.R 3 | \name{endpoints} 4 | \alias{endpoints} 5 | \alias{getEndpoints} 6 | \title{List AzureML Web Service Endpoints} 7 | \usage{ 8 | endpoints(ws, service_id, endpoint_id, host = ws$.management_endpoint) 9 | 10 | getEndpoints(ws, service_id, endpoint_id, host = ws$.management_endpoint) 11 | } 12 | \arguments{ 13 | \item{ws}{An AzureML workspace reference returned by \code{\link{workspace}}.} 14 | 15 | \item{service_id}{A web service Id, for example returned by \code{\link{services}}; alternatively a row from the services data frame identifying the service.} 16 | 17 | \item{endpoint_id}{An optional endpoint id. If supplied, return the endpoint information for just that id. Leave undefined to return a data.frame of all end points associated with the service.} 18 | 19 | \item{host}{The AzureML web services URI} 20 | } 21 | \value{ 22 | Returns a data.frame with variables: 23 | \itemize{ 24 | \item Name 25 | \item Description 26 | \item CreationTime 27 | \item WorkspaceId 28 | \item WebServiceId 29 | \item HelpLocation 30 | \item PrimaryKey 31 | \item SecondaryKey 32 | \item ApiLocation 33 | \item Version 34 | \item MaxConcurrentCalls 35 | \item DiagnosticsTraceLevel 36 | \item ThrottleLevel 37 | } 38 | Each row of the data.frame corresponds to an end point. 39 | } 40 | \description{ 41 | Return a list of web services endpoints for the specified web service id. 42 | } 43 | \note{ 44 | \code{getEndPoints} is an alias for \code{endpoints}. 45 | } 46 | \examples{ 47 | \dontrun{ 48 | workspace_id <- "" # Your AzureML workspace id 49 | authorization_token <- "" # Your AsureML authorization token 50 | 51 | ws <- workspace( 52 | id = workspace_id, 53 | auth = authorization_token 54 | ) 55 | 56 | s <- services(ws) 57 | endpoints(ws, s$Id[1]) 58 | 59 | # Note that you can alternatively just use the entire row that 60 | # describes the service. 61 | endpoints(ws, s[1,]) 62 | 63 | # Equivalent: 64 | getEndpoints(ws, s$Id[1]) 65 | } 66 | } 67 | \seealso{ 68 | Other discovery functions: \code{\link{discoverSchema}}, 69 | \code{\link{endpointHelp}}, \code{\link{services}}, 70 | \code{\link{workspace}} 71 | } 72 | -------------------------------------------------------------------------------- /man/experiments.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/workspace.R 3 | \name{experiments} 4 | \alias{experiments} 5 | \title{List experiments in an AzureML workspace.} 6 | \usage{ 7 | experiments(ws, filter = c("all", "my experiments", "samples")) 8 | } 9 | \arguments{ 10 | \item{ws}{An AzureML workspace reference returned by \code{\link{workspace}}.} 11 | 12 | \item{filter}{Optionally filter result, returing all, mine, or sample datasets.} 13 | } 14 | \value{ 15 | A data.frame with class \code{Experiments} listing available experiments in the workspace. 16 | } 17 | \description{ 18 | List experiments in an AzureML workspace, optionally filtering on sample or my experiments. 19 | } 20 | \note{ 21 | \code{experiments(w)} is equivalent to \code{w$experiments}. Since \code{w$experiments} is simply an R data.frame, you can alternatively filter on any variable as desired. 22 | } 23 | \examples{ 24 | \dontrun{ 25 | library(AzureML) 26 | 27 | experiment <- "dd01c7e4a424432c9a9f83142d5cfec4.f-id.d2f351dd4cec4c06a4592ac83f7af55a" 28 | node_id <- '2a472ae1-ecb1-4f40-ae4e-cd3cecb1003f-268' 29 | 30 | ws <- workspace() 31 | 32 | ws$experiments 33 | experiments(ws) 34 | frame <- download.intermediate.dataset(ws, experiment, node_id, 35 | port_name = "Results dataset", 36 | data_type_id = "GenericCSV") 37 | head(frame) 38 | } 39 | } 40 | \seealso{ 41 | \code{\link{workspace}}, \code{\link{datasets}}, \code{\link{download.intermediate.dataset}} 42 | 43 | Other experiment functions: \code{\link{download.intermediate.dataset}}, 44 | \code{\link{workspace}} 45 | } 46 | -------------------------------------------------------------------------------- /man/figures/authorizationToken.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RevolutionAnalytics/AzureML/1266f42c7493bd7874affb4fc969f9c3118ee13c/man/figures/authorizationToken.pdf -------------------------------------------------------------------------------- /man/figures/authorization_token.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RevolutionAnalytics/AzureML/1266f42c7493bd7874affb4fc969f9c3118ee13c/man/figures/authorization_token.png -------------------------------------------------------------------------------- /man/figures/workspaceId.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RevolutionAnalytics/AzureML/1266f42c7493bd7874affb4fc969f9c3118ee13c/man/figures/workspaceId.pdf -------------------------------------------------------------------------------- /man/figures/workspace_id.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RevolutionAnalytics/AzureML/1266f42c7493bd7874affb4fc969f9c3118ee13c/man/figures/workspace_id.png -------------------------------------------------------------------------------- /man/getDetailsFromUrl.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/discover.R 3 | \name{getDetailsFromUrl} 4 | \alias{getDetailsFromUrl} 5 | \title{Helper function to extract information from a help page URL} 6 | \usage{ 7 | getDetailsFromUrl(url) 8 | } 9 | \arguments{ 10 | \item{url}{the URL of a help page} 11 | } 12 | \value{ 13 | a vector containing the workspace ID, webservices ID and endpoint ID 14 | } 15 | \description{ 16 | Given a Microsoft Azure Machine Learning web service endpoint, extracts the endpoint ID and the workspace ID 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/is.Dataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.R 3 | \name{is.Dataset} 4 | \alias{is.Dataset} 5 | \title{Test if an object is an Azure ML Dataset.} 6 | \usage{ 7 | is.Dataset(x) 8 | } 9 | \arguments{ 10 | \item{x}{an R object} 11 | } 12 | \value{ 13 | logical value, TRUE if \code{x} represents an Azure ML Dataset. 14 | } 15 | \description{ 16 | Test if an object is an Azure ML Dataset. 17 | } 18 | -------------------------------------------------------------------------------- /man/is.Endpoint.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.R 3 | \name{is.Endpoint} 4 | \alias{is.Endpoint} 5 | \title{Test if an object is an Azure ML Endpoint.} 6 | \usage{ 7 | is.Endpoint(x) 8 | } 9 | \arguments{ 10 | \item{x}{an R object} 11 | } 12 | \value{ 13 | logical value, TRUE if \code{x} represents an Azure ML web service endpoint 14 | } 15 | \description{ 16 | Test if an object is an Azure ML Endpoint. 17 | } 18 | -------------------------------------------------------------------------------- /man/is.Service.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.R 3 | \name{is.Service} 4 | \alias{is.Service} 5 | \title{Test if an object is an Azure ML Service.} 6 | \usage{ 7 | is.Service(x) 8 | } 9 | \arguments{ 10 | \item{x}{an R object} 11 | } 12 | \value{ 13 | logical value, TRUE if \code{x} represents an Azure ML web service 14 | } 15 | \description{ 16 | Test if an object is an Azure ML Service. 17 | } 18 | -------------------------------------------------------------------------------- /man/is.Workspace.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/methods.R 3 | \name{is.Workspace} 4 | \alias{is.Workspace} 5 | \title{Test if an object is an Azure ML Workspace.} 6 | \usage{ 7 | is.Workspace(x) 8 | } 9 | \arguments{ 10 | \item{x}{an R object} 11 | } 12 | \value{ 13 | logical value, TRUE if \code{x} represents an Azure ML workspace. 14 | } 15 | \description{ 16 | Test if an object is an Azure ML Workspace. 17 | } 18 | -------------------------------------------------------------------------------- /man/publishWebService.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/publish.R 3 | \name{publishWebService} 4 | \alias{publishWebService} 5 | \alias{updateWebService} 6 | \title{Publish a function as a Microsoft Azure Web Service.} 7 | \usage{ 8 | publishWebService(ws, fun, name, inputSchema, outputSchema, 9 | data.frame = FALSE, export = character(0), noexport = character(0), 10 | packages, version = "3.1.0", serviceId, host = ws$.management_endpoint, 11 | .retry = 3) 12 | 13 | updateWebService(ws, fun, name, inputSchema, outputSchema, data.frame = FALSE, 14 | export = character(0), noexport = character(0), packages, 15 | version = "3.1.0", serviceId, host = ws$.management_endpoint, 16 | .retry = 3) 17 | } 18 | \arguments{ 19 | \item{ws}{An AzureML workspace reference returned by \code{\link{workspace}}.} 20 | 21 | \item{fun}{a function to publish; the function must have at least one argument.} 22 | 23 | \item{name}{name of the new web service; ignored when \code{serviceId} is specified (when updating an existing web service).} 24 | 25 | \item{inputSchema}{either a list of \code{fun} input parameters and their AzureML types formatted as \code{list("arg1"="type", "arg2"="type", ...)}, or an example input data frame when \code{fun} takes a single data frame argument; see the note below for details.} 26 | 27 | \item{outputSchema}{list of \code{fun} outputs and AzureML types, formatted as \code{list("output1"="type", "output2"="type", ...)}, optional when \code{inputSchema} is an example input data frame.} 28 | 29 | \item{data.frame}{\code{TRUE} indicates that the function \code{fun} accepts a data frame as input and returns a data frame output; automatically set to \code{TRUE} when \code{inputSchema} is a data frame.} 30 | 31 | \item{export}{optional character vector of variable names to explicitly export in the web service for use by the function. See the note below.} 32 | 33 | \item{noexport}{optional character vector of variable names to prevent from exporting in the web service.} 34 | 35 | \item{packages}{optional character vector of R packages to bundle in the web service, including their dependencies.} 36 | 37 | \item{version}{optional R version string for required packages (the version of R running in the AzureML Web Service).} 38 | 39 | \item{serviceId}{optional Azure web service ID; use to update an existing service (see Note below).} 40 | 41 | \item{host}{optional Azure regional host, defaulting to the global \code{management_endpoint} set in \code{\link{workspace}}} 42 | 43 | \item{.retry}{number of tries before failing} 44 | } 45 | \value{ 46 | A data.frame describing the new service endpoints, cf. \code{\link{endpoints}}. The output can be directly used by the \code{\link{consume}} function. 47 | } 48 | \description{ 49 | Publish a function to Microsoft Azure Machine Learning as a web service. The web service created is a standard Azure ML web service, and can be used from any web or mobile platform as long as the user knows the API key and URL. The function to be published is limited to inputs/outputs consisting of lists of scalar values or single data frames (see the notes below and examples). Requires a zip program to be installed (see note below). 50 | } 51 | \note{ 52 | \bold{Data Types} 53 | 54 | AzureML data types are different from, but related to, R types. You may specify the R types \code{numeric, logical, integer,} and \code{character} and those will be specified as AzureML types \code{double, boolean, int32, string}, respectively. 55 | 56 | \bold{Input and output schemas} 57 | 58 | Function input must be: 59 | \enumerate{ 60 | \item named scalar arguments with names and types specified in \code{inputSchema} 61 | \item one or more lists of named scalar values 62 | \item a single data frame when \code{data.frame=TRUE} is specified; either explicitly specify the column names and types in \code{inputSchema} or provide an example input data frame as \code{inputSchema} 63 | } 64 | Function output is always returned as a data frame with column names and types specified in \code{outputSchema}. See the examples for example use of all three I/O options. 65 | 66 | \bold{Updating a web service} 67 | 68 | Leave the \code{serviceId} parameter undefined to create a new AzureML web service, or specify the ID of an existing web service to update it, replacing the function, \code{inputSchema}, \code{outputSchema}, and required R pacakges with new values. The \code{name} parameter is ignored \code{serviceId} is specified to update an existing web service. 69 | 70 | The \code{\link{updateWebService}} function is nearly an alias for \code{\link{publishWebService}}, differing only in that the \code{serviceId} parameter is required by \code{\link{updateWebService}}. 71 | 72 | The \code{publishWebService} function automatically exports objects required by the function to a working environment in the AzureML machine, including objects accessed within the function using lexical scoping rules. Use the \code{exports} parameter to explicitly include other objects that are needed. Use \code{noexport} to explicitly prevent objects from being exported. 73 | 74 | Note that it takes some time to update the AzureML service on the server. After updating the service, you may have to wait several seconds for the service to update. The time it takes will depend on a number of factors, including the complexity of your web service function. 75 | 76 | \bold{External zip program required} 77 | 78 | The function uses \code{\link[utils]{zip}} to compress information before transmission to AzureML. To use this, you need to have a zip program installed on your machine, and this program should be available in the path. The program should be called \code{zip} otherwise R may not find it. On windows, it is sufficient to install RTools (see \url{https://cran.r-project.org/bin/windows/Rtools/}) 79 | } 80 | \examples{ 81 | \dontrun{ 82 | # Use a default configuration in ~/.azureml, alternatively 83 | # see help for `?workspace`. 84 | 85 | ws <- workspace() 86 | 87 | # Publish a simple model using the lme4::sleepdata --------------------------- 88 | 89 | library(lme4) 90 | set.seed(1) 91 | train <- sleepstudy[sample(nrow(sleepstudy), 120),] 92 | m <- lm(Reaction ~ Days + Subject, data = train) 93 | 94 | # Deine a prediction function to publish based on the model: 95 | sleepyPredict <- function(newdata){ 96 | predict(m, newdata=newdata) 97 | } 98 | 99 | ep <- publishWebService(ws, fun = sleepyPredict, name="sleepy lm", 100 | inputSchema = sleepstudy, 101 | data.frame=TRUE) 102 | 103 | # OK, try this out, and compare with raw data 104 | ans <- consume(ep, sleepstudy)$ans 105 | plot(ans, sleepstudy$Reaction) 106 | 107 | # Remove the service 108 | deleteWebService(ws, "sleepy lm") 109 | 110 | 111 | 112 | # Another data frame example ------------------------------------------------- 113 | 114 | # If your function can consume a whole data frame at once, you can also 115 | # supply data in that form, resulting in more efficient computation. 116 | # The following example builds a simple linear model on a subset of the 117 | # airquality data and publishes a prediction function based on the model. 118 | set.seed(1) 119 | m <- lm(Ozone ~ ., data=airquality[sample(nrow(airquality), 100),]) 120 | # Define a prediction function based on the model: 121 | fun <- function(newdata) 122 | { 123 | predict(m, newdata=newdata) 124 | } 125 | # Note the definition of inputSchema and use of the data.frame argument. 126 | ep <- publishWebService(ws, fun=fun, name="Ozone", 127 | inputSchema = airquality, 128 | data.frame=TRUE) 129 | ans <- consume(ep, airquality)$ans 130 | plot(ans, airquality$Ozone) 131 | deleteWebService(ws, "Ozone") 132 | 133 | 134 | 135 | # Train a model using diamonds in ggplot2 ------------------------------------ 136 | # This example also demonstrates how to deal with factor in the data 137 | 138 | data(diamonds, package="ggplot2") 139 | set.seed(1) 140 | train_idx = sample.int(nrow(diamonds), 30000) 141 | test_idx = sample(setdiff(seq(1, nrow(diamonds)), train_idx), 500) 142 | train <- diamonds[train_idx, ] 143 | test <- diamonds[test_idx, ] 144 | 145 | model <- glm(price ~ carat + clarity + color + cut - 1, data = train, 146 | family = Gamma(link = "log")) 147 | 148 | diamondLevels <- diamonds[1, ] 149 | 150 | # The model works reasonably well, except for some outliers 151 | plot(exp(predict(model, test)) ~ test$price) 152 | 153 | # Create a prediction function that converts characters correctly to factors 154 | 155 | predictDiamonds <- function(x){ 156 | x$cut <- factor(x$cut, 157 | levels = levels(diamondLevels$cut), ordered = TRUE) 158 | x$clarity <- factor(x$clarity, 159 | levels = levels(diamondLevels$clarity), ordered = TRUE) 160 | x$color <- factor(x$color, 161 | levels = levels(diamondLevels$color), ordered = TRUE) 162 | exp(predict(model, newdata = x)) 163 | } 164 | 165 | 166 | # Publish the service 167 | 168 | ws <- workspace() 169 | ep <- publishWebService(ws, fun = predictDiamonds, name = "diamonds", 170 | inputSchema = test, 171 | data.frame = TRUE 172 | ) 173 | 174 | # Consume the service 175 | results <- consume(ep, test)$ans 176 | plot(results ~ test$price) 177 | 178 | deleteWebService(ws, "diamonds") 179 | 180 | 181 | 182 | # Simple example using scalar input ------------------------------------------ 183 | 184 | ws <- workspace() 185 | 186 | # Really simple example: 187 | add <- function(x,y) x + y 188 | endpoint <- publishWebService(ws, 189 | fun = add, 190 | name = "addme", 191 | inputSchema = list(x="numeric", 192 | y="numeric"), 193 | outputSchema = list(ans="numeric")) 194 | consume(endpoint, list(x=pi, y=2)) 195 | 196 | # Now remove the web service named "addme" that we just published 197 | deleteWebService(ws, "addme") 198 | 199 | 200 | 201 | # Send a custom R function for evaluation in AzureML ------------------------- 202 | 203 | # A neat trick to evaluate any expression in the Azure ML virtual 204 | # machine R session and view its output: 205 | ep <- publishWebService(ws, 206 | fun = function(expr) { 207 | paste(capture.output( 208 | eval(parse(text=expr))), collapse="\\n") 209 | }, 210 | name="commander", 211 | inputSchema = list(x = "character"), 212 | outputSchema = list(ans = "character")) 213 | cat(consume(ep, list(x = "getwd()"))$ans) 214 | cat(consume(ep, list(x = ".packages(all=TRUE)"))$ans) 215 | cat(consume(ep, list(x = "R.Version()"))$ans) 216 | 217 | # Remove the service we just published 218 | deleteWebService(ws, "commander") 219 | 220 | 221 | 222 | # Understanding the scoping rules -------------------------------------------- 223 | 224 | # The following example illustrates scoping rules. Note that the function 225 | # refers to the variable y defined outside the function body. That value 226 | # will be exported with the service. 227 | y <- pi 228 | ep <- publishWebService(ws, 229 | fun = function(x) x + y, 230 | name = "lexical scope", 231 | inputSchema = list(x = "numeric"), 232 | outputSchema = list(ans = "numeric")) 233 | cat(consume(ep, list(x=2))$ans) 234 | 235 | # Remove the service we just published 236 | deleteWebService(ws, "lexical scope") 237 | 238 | 239 | # Demonstrate scalar inputs but sending a data frame for scoring ------------- 240 | 241 | # Example showing the use of consume to score all the rows of a data frame 242 | # at once, and other invocations for evaluating multiple sets of input 243 | # values. The columns of the data frame correspond to the input parameters 244 | # of the web service in this example: 245 | f <- function(a,b,c,d) list(sum = a+b+c+d, prod = a*b*c*d) 246 | ep <- publishWebService(ws, 247 | f, 248 | name = "rowSums", 249 | inputSchema = list( 250 | a = "numeric", 251 | b = "numeric", 252 | c = "numeric", 253 | d = "numeric" 254 | ), 255 | outputSchema = list( 256 | sum ="numeric", 257 | prod = "numeric") 258 | ) 259 | x <- head(iris[,1:4]) # First four columns of iris 260 | 261 | # Note the following will FAIL because of a name mismatch in the arguments 262 | # (with an informative error): 263 | consume(ep, x, retryDelay=1) 264 | # We need the columns of the data frame to match the inputSchema: 265 | names(x) <- letters[1:4] 266 | # Now we can evaluate all the rows of the data frame in one call: 267 | consume(ep, x) 268 | # output should look like: 269 | # sum prod 270 | # 1 10.2 4.998 271 | # 2 9.5 4.116 272 | # 3 9.4 3.9104 273 | # 4 9.4 4.278 274 | # 5 10.2 5.04 275 | # 6 11.4 14.3208 276 | 277 | # You can use consume to evaluate just a single set of input values with this 278 | # form: 279 | consume(ep, a=1, b=2, c=3, d=4) 280 | 281 | # or, equivalently, 282 | consume(ep, list(a=1, b=2, c=3, d=4)) 283 | 284 | # You can evaluate multiple sets of input values with a data frame input: 285 | consume(ep, data.frame(a=1:2, b=3:4, c=5:6, d=7:8)) 286 | 287 | # or, equivalently, with multiple lists: 288 | consume(ep, list(a=1, b=3, c=5, d=7), list(a=2, b=4, c=6, d=8)) 289 | 290 | # Remove the service we just published 291 | deleteWebService(ws, "rowSums") 292 | 293 | # A more efficient way to do the same thing using data frame input/output: 294 | f <- function(df) with(df, list(sum = a+b+c+d, prod = a*b*c*d)) 295 | ep = publishWebService(ws, f, name="rowSums2", 296 | inputSchema = data.frame(a = 0, b = 0, c = 0, d = 0)) 297 | consume(ep, data.frame(a=1:2, b=3:4, c=5:6, d=7:8)) 298 | deleteWebService(ws, "rowSums2") 299 | 300 | 301 | 302 | # Automatically discover dependencies ---------------------------------------- 303 | 304 | # The publishWebService function uses `miniCRAN` to include dependencies on 305 | # packages required by your function. The next example uses the `lmer` 306 | # function from the lme4 package, and also shows how to publish a function 307 | # that consumes a data frame by setting data.frame=TRUE. Note! This example 308 | # depends on a lot of packages and may take some time to upload to Azure. 309 | library(lme4) 310 | # Build a sample mixed effects model on just a subset of the sleepstudy data... 311 | set.seed(1) 312 | m <- lmer(Reaction ~ Days + (Days | Subject), 313 | data=sleepstudy[sample(nrow(sleepstudy), 120),]) 314 | # Deine a prediction function to publish based on the model: 315 | fun <- function(newdata) 316 | { 317 | predict(m, newdata=newdata) 318 | } 319 | ep <- publishWebService(ws, fun=fun, name="sleepy lmer", 320 | inputSchema= sleepstudy, 321 | packages="lme4", 322 | data.frame=TRUE) 323 | 324 | # OK, try this out, and compare with raw data 325 | ans = consume(ep, sleepstudy)$ans 326 | plot(ans, sleepstudy$Reaction) 327 | 328 | # Remove the service 329 | deleteWebService(ws, "sleepy lmer") 330 | } 331 | } 332 | \seealso{ 333 | \code{\link{endpoints}}, \code{\link{discoverSchema}}, \code{\link{consume}} and \code{\link{services}}. 334 | 335 | Other publishing functions: \code{\link{deleteWebService}}, 336 | \code{\link{workspace}} 337 | } 338 | -------------------------------------------------------------------------------- /man/read.AzureML.config.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/config.R 3 | \name{read.AzureML.config} 4 | \alias{read.AzureML.config} 5 | \alias{write.AzureML.config} 6 | \title{Reads settings from configuration file in JSON format.} 7 | \usage{ 8 | read.AzureML.config(config = getOption("AzureML.config")) 9 | 10 | write.AzureML.config(id = NULL, auth = NULL, api_endpoint = NULL, 11 | management_endpoint = NULL, file = "") 12 | } 13 | \arguments{ 14 | \item{config}{Optional settings file containing id and authorization info. Used if any of the other arguments are missing. The default config file is \code{~/.azureml/settings.json}, but you can change this location by setting \code{options(AzureML.config = "newlocation")}. See the section "Using a settings.json file" for more details.} 15 | 16 | \item{id}{Optional workspace id from ML studio -> settings -> WORKSPACE ID. See the section "Finding your AzureML credentials" for more details.} 17 | 18 | \item{auth}{Optional authorization token from ML studio -> settings -> AUTHORIZATION TOKENS. See the section "Finding your AzureML credentials" for more details.} 19 | 20 | \item{api_endpoint}{Optional AzureML API web service URI. Defaults to \code{https://studioapi.azureml.net} if not provided and not specified in config. See note.} 21 | 22 | \item{management_endpoint}{Optional AzureML management web service URI. Defaults to \code{https://management.azureml.net} if not provided and not specified in config. See note.} 23 | 24 | \item{file}{either a character string naming a file or a connection open for writing. "" indicates output to the console.} 25 | } 26 | \description{ 27 | Reads settings from configuration file in JSON format. 28 | 29 | Writes settings to configuration file. 30 | } 31 | \seealso{ 32 | write.AzureML.config 33 | 34 | workspace 35 | 36 | write.AzureML.config 37 | 38 | workspace 39 | } 40 | -------------------------------------------------------------------------------- /man/refresh.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/workspace.R 3 | \name{refresh} 4 | \alias{refresh} 5 | \title{Refresh data in an AzureML workspace object.} 6 | \usage{ 7 | refresh(ws, what = c("everything", "datasets", "experiments", "services")) 8 | } 9 | \arguments{ 10 | \item{ws}{An AzureML workspace reference returned by \code{\link{workspace}}.} 11 | 12 | \item{what}{Select "everything" to update all cached data, or other values to selectively update those values.} 13 | } 14 | \value{ 15 | NULL is invisibly returned--this function updates data in the \code{w} environment. 16 | } 17 | \description{ 18 | Contact the AzureML web service and refresh/update data in an AzureML workspace object. 19 | } 20 | \seealso{ 21 | \code{\link{workspace}} 22 | } 23 | -------------------------------------------------------------------------------- /man/services.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/services.R 3 | \name{services} 4 | \alias{services} 5 | \alias{getWebServices} 6 | \title{List Available Web Services.} 7 | \usage{ 8 | services(ws, service_id, name, host = ws$.management_endpoint) 9 | 10 | getWebServices(ws, service_id, name, host = ws$.management_endpoint) 11 | } 12 | \arguments{ 13 | \item{ws}{An AzureML workspace reference returned by \code{\link{workspace}}.} 14 | 15 | \item{service_id}{optional web service id. If supplied, return the web service information for just the specified service id. Leave undefined to return a data.frame of all services.} 16 | 17 | \item{name}{optional web service name. If supplied, return the web service information for services with matching names. Leave undefined to return all services.} 18 | 19 | \item{host}{the AzureML web services URI} 20 | } 21 | \value{ 22 | Returns a data.frame with variables: 23 | \itemize{ 24 | \item Id 25 | \item Name 26 | \item Description 27 | \item CreationTime 28 | \item WorkspaceId 29 | \item DefaultEndpointName 30 | } 31 | Each row of the returned data.frame corresponds to a service. 32 | } 33 | \description{ 34 | Return a list of web services available to the specified Microsoft Azure Machine Learning workspace. 35 | The result is cached in the workspace environment similarly to datasets and experiments. 36 | } 37 | \note{ 38 | \code{getWebServices} is an alias for \code{services}. 39 | } 40 | \examples{ 41 | \dontrun{ 42 | workspace_id <- "" # Your AzureML workspace id 43 | authorization_token <- "" # Your AzureML authorization token 44 | 45 | ws <- workspace( 46 | id = workspace_id, 47 | auth = authorization_token 48 | ) 49 | 50 | # Equivalent: 51 | services(ws) 52 | getWebServices(ws) 53 | } 54 | } 55 | \seealso{ 56 | Other discovery functions: \code{\link{discoverSchema}}, 57 | \code{\link{endpointHelp}}, \code{\link{endpoints}}, 58 | \code{\link{workspace}} 59 | } 60 | -------------------------------------------------------------------------------- /man/upload.dataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/datasets.R 3 | \name{upload.dataset} 4 | \alias{upload.dataset} 5 | \title{Upload an R data frame to an AzureML workspace.} 6 | \usage{ 7 | upload.dataset(x, ws, name, description = "", family_id = "", ...) 8 | } 9 | \arguments{ 10 | \item{x}{An R data frame object} 11 | 12 | \item{ws}{An AzureML workspace reference returned by \code{\link{workspace}}.} 13 | 14 | \item{name}{A character name for the new AzureML dataset (may not match an existing dataset name)} 15 | 16 | \item{description}{An optional character description of the dataset} 17 | 18 | \item{family_id}{An optional AzureML family identifier} 19 | 20 | \item{...}{Optional additional options passed to \code{write.table}} 21 | } 22 | \value{ 23 | A single-row data frame of "Datasets" class that corresponds to the uploaded object now available in ws$datasets. 24 | } 25 | \description{ 26 | Upload any R data frame to an AzureML workspace using the \code{GenericTSV} format. 27 | } 28 | \note{ 29 | The additional \code{\link[utils]{write.table}} options may not include \code{sep} or \code{row.names} or \code{file}, but any other options are accepted. 30 | The AzureML API does not support uploads for _replacing_ datasets with new data by re-using a name. If you need to do this, first delete the dataset from the AzureML Studio interface, then upload a new version. 31 | } 32 | \examples{ 33 | \dontrun{ 34 | library(AzureML) 35 | 36 | ws <- workspace() 37 | 38 | # Upload the R airquality data.frame to the workspace. 39 | upload.dataset(airquality, ws, "airquality") 40 | 41 | # Example datasets (airquality should be among them now) 42 | head(datasets(ws)) 43 | 44 | # Now delete what we've just uploaded 45 | delete.datasets(ws, "airquality") 46 | } 47 | } 48 | \seealso{ 49 | Other dataset functions: \code{\link{datasets}}, 50 | \code{\link{delete.datasets}}, 51 | \code{\link{download.intermediate.dataset}}, 52 | \code{\link{workspace}} 53 | } 54 | -------------------------------------------------------------------------------- /man/workspace.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/workspace.R 3 | \name{workspace} 4 | \alias{workspace} 5 | \title{Create a reference to an AzureML Studio workspace.} 6 | \usage{ 7 | workspace(id, auth, api_endpoint, management_endpoint, 8 | config = getOption("AzureML.config"), ..., .validate = TRUE) 9 | } 10 | \arguments{ 11 | \item{id}{Optional workspace id from ML studio -> settings -> WORKSPACE ID. See the section "Finding your AzureML credentials" for more details.} 12 | 13 | \item{auth}{Optional authorization token from ML studio -> settings -> AUTHORIZATION TOKENS. See the section "Finding your AzureML credentials" for more details.} 14 | 15 | \item{api_endpoint}{Optional AzureML API web service URI. Defaults to \code{https://studioapi.azureml.net} if not provided and not specified in config. See note.} 16 | 17 | \item{management_endpoint}{Optional AzureML management web service URI. Defaults to \code{https://management.azureml.net} if not provided and not specified in config. See note.} 18 | 19 | \item{config}{Optional settings file containing id and authorization info. Used if any of the other arguments are missing. The default config file is \code{~/.azureml/settings.json}, but you can change this location by setting \code{options(AzureML.config = "newlocation")}. See the section "Using a settings.json file" for more details.} 20 | 21 | \item{...}{ignored} 22 | 23 | \item{.validate}{If TRUE, makes a request to the AzureML API to retrieve some data. This validates whether the workspace id and authorization token are valid. Specifically, the function calls \code{\link{datasets}}. This should normally be set to TRUE. Set this to FALSE for testing, or if you know that your credentials are correct and you don't want to retrieve the datasets.} 24 | } 25 | \value{ 26 | An R environment of class \code{Workspace} containing at least the following objects: 27 | \itemize{ 28 | \item{experiments: Collection of experiments in the workspace represented as an \code{Experiments} object. See \code{\link{experiments}}} 29 | \item{datasets: Collection of datasets in the workspace represented as a \code{Datasets} object. See \code{\link{datasets}}} 30 | \item{services: Collection of web services in the workspace represented as a \code{Services} object. See \code{\link{services}}} 31 | } 32 | } 33 | \description{ 34 | Create a reference to an AzureML Studio workspace, returning a \code{Workspace} object that is an R environment containing details and data associated with the AzureML work space. Data corresponding to services, experiments, and datasets in the workspace are cached in the result object environment. See \code{\link{refresh}} about updating cached data. 35 | } 36 | \section{Finding your AzureML credentials}{ 37 | 38 | You can find your Azure Machine Learning \code{workspace id} and \code{authorization token} in the Azure Machine Learning Studio interface. 39 | 40 | Workspace ID 41 | 42 | \if{html}{\figure{workspace_id.png}{options: width="60\%" alt="Figure: workspace_id.png"}} 43 | \if{latex}{\figure{workspaceId.pdf}{options: width=7cm}} 44 | 45 | 46 | 47 | Authorization token 48 | 49 | \if{html}{\figure{authorization_token.png}{options: width="60\%" alt="Figure: authorization_token.png"}} 50 | \if{latex}{\figure{authorizationToken.pdf}{options: width=7cm}} 51 | } 52 | 53 | \section{Using a settings.json file}{ 54 | 55 | If any of the \code{id}, \code{auth}, \code{api_endpoint} or \code{management_endpoint} arguments are missing, the function attempts to read values from the \code{config} file with JSON format: 56 | \preformatted{ 57 | {"workspace":{ 58 | "id": "enter your AzureML workspace id here", 59 | "authorization_token": "enter your AzureML authorization token here", 60 | "api_endpoint": "https://studioapi.azureml.net", 61 | }} 62 | } 63 | 64 | To explicitly add the management endpoint in the JSON file, use: 65 | \preformatted{ 66 | {"workspace":{ 67 | "id": "enter your AzureML workspace id here", 68 | "authorization_token": "enter your AzureML authorization token here", 69 | "api_endpoint": "https://studioapi.azureml.net", 70 | "management_endpoint": "https://management.azureml.net" 71 | }} 72 | } 73 | } 74 | 75 | \section{Using a workspace in different Azure Machine Learning regions}{ 76 | 77 | 78 | By default, the Azure Machine Learning workspace is located in US South Central, but it is possible to create a workspace in different regions, including Europe West and Asia Southeast. 79 | 80 | To use a workspace in Asia Southeast, you can modify the api endpoint line in the JSON file: 81 | \preformatted{ 82 | {"workspace": { 83 | "api_endpoint": ["https://asiasoutheast.studio.azureml.net"] 84 | }} 85 | } 86 | 87 | Similarly, for a workspace in Europe West: 88 | \preformatted{ 89 | {"workspace": { 90 | "api_endpoint": ["https://europewest.studio.azureml.net"] 91 | }} 92 | } 93 | } 94 | 95 | \seealso{ 96 | \code{\link{datasets}}, \code{\link{experiments}}, \code{\link{refresh}}, 97 | \code{\link{services}}, \code{\link{consume}}, \code{\link{publishWebService}} 98 | 99 | Other dataset functions: \code{\link{datasets}}, 100 | \code{\link{delete.datasets}}, 101 | \code{\link{download.intermediate.dataset}}, 102 | \code{\link{upload.dataset}} 103 | 104 | Other experiment functions: \code{\link{download.intermediate.dataset}}, 105 | \code{\link{experiments}} 106 | 107 | Other discovery functions: \code{\link{discoverSchema}}, 108 | \code{\link{endpointHelp}}, \code{\link{endpoints}}, 109 | \code{\link{services}} 110 | 111 | Other consumption functions: \code{\link{consume}} 112 | 113 | Other publishing functions: \code{\link{deleteWebService}}, 114 | \code{\link{publishWebService}} 115 | } 116 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | Sys.setenv("R_TESTS" = "") 2 | library(testthat, quietly = TRUE) 3 | if (identical(Sys.getenv("NOT_CRAN"), "true")) { 4 | # NOT_CRAN 5 | # run all tests 6 | test_check("AzureML") 7 | 8 | } else { 9 | # CRAN 10 | # skip some tests on CRAN, to comply with timing directive and other policy 11 | test_check("AzureML", filter = "1-workspace-no-config") 12 | test_check("AzureML", filter = "7-discover-schema") 13 | } 14 | -------------------------------------------------------------------------------- /tests/testthat/.gitignore: -------------------------------------------------------------------------------- 1 | config.json -------------------------------------------------------------------------------- /tests/testthat/test-1-workspace-no-config.R: -------------------------------------------------------------------------------- 1 | if(interactive()) library("testthat") 2 | 3 | 4 | settingsFile <- AzureML.config.default 5 | workspace <- function(..., .validate = FALSE) AzureML::workspace(..., .validate = .validate) 6 | 7 | # ------------------------------------------------------------------------ 8 | 9 | context("workspace - connect to workspace with no config file") 10 | 11 | test_that("Can connect to workspace with no config file", { 12 | 13 | opts <- getOption("AzureML.config") 14 | options(AzureML.config = tempfile(fileext = ".tmp")) 15 | on.exit(options(AzureML.config = opts)) 16 | 17 | expect_error( 18 | ws <- workspace(), 19 | "config file is missing" 20 | ) 21 | expect_is(workspace("x", "y"), "Workspace") 22 | expect_equal({ws <- workspace("x", "y"); ls(ws)}, 23 | c("datasets", "experiments", "id", "services")) 24 | }) 25 | -------------------------------------------------------------------------------- /tests/testthat/test-1-workspace.R: -------------------------------------------------------------------------------- 1 | if(interactive()) library("testthat") 2 | 3 | 4 | settingsFile <- AzureML.config.default 5 | workspace <- function(..., .validate = FALSE) AzureML::workspace(..., .validate = .validate) 6 | 7 | # ------------------------------------------------------------------------ 8 | 9 | context("workspace - connect to workspace") 10 | 11 | test_that("Can connect to workspace with supplied id and auth", { 12 | AzureML:::skip_if_missing_config(settingsFile) 13 | 14 | js <- read.AzureML.config(settingsFile) 15 | id <- js$workspace$id 16 | auth <- js$workspace$authorization_token 17 | 18 | expect_true(!is.null(id)) 19 | expect_true(!is.null(auth)) 20 | 21 | ws <- workspace(id, auth) 22 | 23 | expect_is(ws, c("Workspace")) 24 | expect_equal(ls(ws), c("datasets", "experiments", "id", "services")) 25 | expect_equal(ws$id, id) 26 | }) 27 | 28 | test_that("Can connect to workspace with config file", { 29 | AzureML:::skip_if_missing_config(settingsFile) 30 | 31 | ws <- workspace() 32 | 33 | expect_is(ws, c("Workspace")) 34 | expect_equal(ls(ws), c("datasets", "experiments", "id", "services")) 35 | }) 36 | 37 | 38 | test_that("workspace() throws helpful 401 error with invalid id", { 39 | # AzureML:::skip_if_missing_config(settingsFile) 40 | 41 | .catchError <- function(expr){ 42 | tryCatch(expr, error = function(e)e)$message 43 | } 44 | .expect_error_in <- function(object, msgs){ 45 | if(missing(object) || is.null(object)) return(FALSE) 46 | ptn <- sprintf("[%s]", paste(sprintf("(%s)", msgs), collapse = "|")) 47 | expect_true(grepl(ptn, object)) 48 | } 49 | 50 | m <- .catchError(workspace(id = "x", auth = "y", .validate = TRUE)) 51 | msg <- c("invalid workspaceId", 52 | "401 (Unauthorised). Please check your workspace ID and auth codes." 53 | ) 54 | 55 | .expect_error_in(m, msg = msg) 56 | 57 | }) 58 | 59 | 60 | 61 | 62 | # ------------------------------------------------------------------------ 63 | 64 | context("workspace - reading from settings.json file") 65 | 66 | test_that("workspace() adds api_endpoint and management_endpoint if missing from config", { 67 | tf <- tempfile(fileext = ".json") 68 | on.exit(unlink(tf)) 69 | write.AzureML.config("x", "y", file = tf) 70 | ws <- workspace(config = tf) 71 | expect_equal(ws$id, "x") 72 | expect_equal( 73 | ws$.api_endpoint, 74 | default_api(ws$.api_endpoint)[["api_endpoint"]] 75 | ) 76 | expect_equal( 77 | ws$.management_endpoint, 78 | default_api(ws$.api_endpoint)[["management_endpoint"]] 79 | ) 80 | }) 81 | 82 | test_that("workspace() throws helpful error if config file does not exist", { 83 | expect_error( 84 | workspace(config = "file_does_not_exist"), 85 | "config file is missing: 'file_does_not_exist'" 86 | ) 87 | }) 88 | 89 | test_that("workspace() throws helpful error if config is invalid json", { 90 | tf <- tempfile(fileext = ".json") 91 | on.exit(unlink(tf)) 92 | writeLines("garbage", con = tf) 93 | msg <- tryCatch(workspace(config = tf), error = function(e)e)$message 94 | expect_true( 95 | grepl("Your config file contains invalid json", msg) 96 | ) 97 | }) 98 | 99 | -------------------------------------------------------------------------------- /tests/testthat/test-2-datasets-upload-download-delete.R: -------------------------------------------------------------------------------- 1 | if(interactive()) library("testthat") 2 | 3 | settingsFile <- AzureML.config.default 4 | 5 | context("Upload and delete dataset") 6 | 7 | test_that("datasets(ws) returns results", { 8 | AzureML:::skip_if_missing_config(settingsFile) 9 | AzureML:::skip_if_offline() 10 | 11 | ws <<- workspace() 12 | 13 | x <- datasets(ws) 14 | expect_is(x, "data.frame") 15 | }) 16 | 17 | timestamped_name <- paste0("dataset-test-upload-", 18 | format(Sys.time(), format="%Y-%m-%d--%H-%M-%S")) 19 | 20 | test_that("Can upload dataset to workspace", { 21 | AzureML:::skip_if_missing_config(settingsFile) 22 | AzureML:::skip_if_offline() 23 | 24 | upload.dataset(airquality, ws, timestamped_name) 25 | ds <- datasets(ws, filter = "my") 26 | expect_true(timestamped_name %in% ds$Name) 27 | }) 28 | 29 | test_that("Uploading dataset with duplicate name gives helpful error", { 30 | AzureML:::skip_if_missing_config(settingsFile) 31 | AzureML:::skip_if_offline() 32 | 33 | expect_error(upload.dataset(airquality, ws, timestamped_name), 34 | sprintf("A dataset with the name '%s' already exists in AzureML", timestamped_name) 35 | ) 36 | }) 37 | 38 | test_that("Can download dataset", { 39 | AzureML:::skip_if_missing_config(settingsFile) 40 | AzureML:::skip_if_offline() 41 | 42 | dl <- download.datasets(ws, name = timestamped_name) 43 | expect_equal(dl, airquality) 44 | }) 45 | 46 | test_that("Can delete dataset from workspace", { 47 | AzureML:::skip_if_missing_config(settingsFile) 48 | AzureML:::skip_if_offline() 49 | 50 | z <- delete.datasets(ws, timestamped_name) 51 | expect_true(timestamped_name %in% z$Name && z$Deleted[z$Name == timestamped_name]) 52 | # Force refresh - sometime this fails in non-interactive 53 | max_wait <- 15 54 | wait_period <- 3 55 | i <- 0 56 | ds <- datasets(ws, filter = "my") 57 | while(i < max_wait && nrow(ds) > 0 && timestamped_name %in% ds$Name) { 58 | Sys.sleep(wait_period) 59 | i <- i + wait_period 60 | refresh(ws, what = "datasets") 61 | ds <- datasets(ws, filter = "my") 62 | } 63 | if(nrow(ds) > 0 || timestamped_name %in% ds$Name) skip("skip waiting for delete") 64 | expect_true(nrow(ds) == 0 || !timestamped_name %in% ds$Name) 65 | }) 66 | 67 | 68 | 69 | test_that("Invalid input throws helpful error", { 70 | expect_error(download.datasets('HSAFundsData.csv'), 71 | "You specified a dataset name that is not in the workspace. See help file for `download.datasets`" 72 | ) 73 | }) 74 | 75 | 76 | 77 | 78 | 79 | 80 | -------------------------------------------------------------------------------- /tests/testthat/test-2-download-each-dataset-type.R: -------------------------------------------------------------------------------- 1 | if(interactive()) library("testthat") 2 | 3 | settingsFile <- AzureML.config.default 4 | context("Download one file of each DataTypeId") 5 | 6 | test_that("setup global variables", { 7 | AzureML:::skip_if_missing_config(settingsFile) 8 | AzureML:::skip_if_offline() 9 | 10 | ws <<- workspace() 11 | ds <- datasets(ws, filter = "samples") 12 | testIdx <- grepl("[Tt]est", ds$Name) 13 | ds <- ds[!testIdx, ] 14 | unique(ds$DataTypeId) 15 | 16 | oneOfEach <- do.call( 17 | rbind, 18 | lapply( 19 | split(ds, ds$DataTypeId), 20 | function(x){ 21 | x[which.min(x$Size), ] 22 | } 23 | ) 24 | ) 25 | 26 | Zip <<- oneOfEach[oneOfEach$DataTypeId %in% c("Zip"), ] 27 | oneOfEach <<- oneOfEach[!oneOfEach$DataTypeId %in% c("Zip"), ] 28 | # oneOfEach$DataTypeId 29 | expect_is(oneOfEach, "Datasets") 30 | }) 31 | 32 | if(exists("oneOfEach")){ 33 | # oneOfEach will only exist if the previous test ran successfully, 34 | # i.e. if the settings.json file could be found 35 | for(type in oneOfEach$DataTypeId){ 36 | test_that(sprintf("Can download dataset of type %s", type), { 37 | AzureML:::skip_if_missing_config(settingsFile) 38 | AzureML:::skip_if_offline() 39 | 40 | dl <- suppressWarnings( 41 | download.datasets(ws, name = oneOfEach$Name[oneOfEach$DataTypeId == type]) 42 | ) 43 | expect_is(dl, "data.frame") 44 | expect_true(nrow(dl) > 0) 45 | }) 46 | } 47 | } 48 | 49 | type <- "zip" 50 | test_that(sprintf("Can download dataset of type %s", type), { 51 | AzureML:::skip_if_missing_config(settingsFile) 52 | AzureML:::skip_if_offline() 53 | 54 | dl <- download.datasets(ws, Zip) 55 | class(dl) 56 | expect_is(dl, "raw") 57 | }) 58 | 59 | 60 | 61 | test_that("Multiple file download", { 62 | AzureML:::skip_if_missing_config(settingsFile) 63 | AzureML:::skip_if_offline() 64 | 65 | multiple <- oneOfEach[order(oneOfEach$Size, decreasing = FALSE), ][1:3, ] 66 | names <- multiple$Name 67 | 68 | res <- suppressWarnings(download.datasets(ws, names)) 69 | expect_equal(names(res), names) 70 | 71 | res <- suppressWarnings(download.datasets(datasets(ws), names)) 72 | expect_equal(names(res), names) 73 | 74 | res <- suppressWarnings(download.datasets(multiple)) 75 | expect_equal(names(res), names) 76 | }) 77 | 78 | -------------------------------------------------------------------------------- /tests/testthat/test-2-multiple-dataset-download.R: -------------------------------------------------------------------------------- 1 | if(interactive()) library("testthat") 2 | 3 | settingsFile <- AzureML.config.default 4 | 5 | context("Download multiple datasets") 6 | 7 | test_that("datasets(ws) returns results", { 8 | AzureML:::skip_if_missing_config(settingsFile) 9 | AzureML:::skip_if_offline() 10 | 11 | ws <<- workspace() 12 | 13 | x <- datasets(ws) 14 | expect_is(x, "data.frame") 15 | }) 16 | 17 | ### Additional tests of download.datasets(.). We could expand the same dataset formats 18 | ### to other tests (upload, delete, etc). 19 | 20 | ## csv and .tsv files: 21 | test_that("Can download multiple .csv and .tsv files", { 22 | AzureML:::skip_if_missing_config(settingsFile) 23 | AzureML:::skip_if_offline() 24 | 25 | # ds <- datasets(ws, filter = "samples") 26 | # ds[grep("[CT]SV", ds$DataTypeId), ] 27 | names <- c("Time Series Dataset", 28 | "Sample Named Entity Recognition Articles") 29 | 30 | res <- suppressWarnings(download.datasets(ws, names)) 31 | expect_equal(names(res), names) 32 | 33 | res <- suppressWarnings(download.datasets(datasets(ws), names)) 34 | expect_equal(names(res), names) 35 | 36 | }) 37 | 38 | test_that("Can download .zip files", { 39 | AzureML:::skip_if_missing_config(settingsFile) 40 | AzureML:::skip_if_offline() 41 | 42 | # ds <- datasets(ws, filter = "samples") 43 | # ds[ds$DataTypeId == "Zip", ] 44 | names <- c("text.preprocessing.zip", "fraudTemplateUtil.zip") 45 | 46 | res <- download.datasets(ws, names) 47 | expect_equal(names(res), names) 48 | 49 | res <- download.datasets(datasets(ws), names) 50 | expect_equal(names(res), names) 51 | }) 52 | 53 | 54 | test_that("Can download .arff files", { 55 | AzureML:::skip_if_missing_config(settingsFile) 56 | AzureML:::skip_if_offline() 57 | 58 | # ds <- datasets(ws, filter = "samples") 59 | # ds[ds$DataTypeId == "ARFF", ] 60 | names <- c("Breast cancer data", "Forest fires data", "Iris Two Class Data") 61 | 62 | res <- download.datasets(ws, names) 63 | expect_equal(names(res), names) 64 | 65 | res <- download.datasets(datasets(ws), names) 66 | expect_equal(names(res), names) 67 | }) 68 | 69 | -------------------------------------------------------------------------------- /tests/testthat/test-3-experiments-download.R: -------------------------------------------------------------------------------- 1 | # This is written as a rather bogus test as it requires a specific exp_id which is unlikely to be generally available. 2 | # This is a hard test to configure. 3 | # 4 | 5 | if(interactive()) library("testthat") 6 | 7 | settingsFile <- AzureML.config.default 8 | 9 | 10 | context("Read dataset from experiment") 11 | 12 | test_that("Can read intermediate dataset from workspace", { 13 | AzureML:::skip_if_missing_config(settingsFile) 14 | AzureML:::skip_if_offline() 15 | 16 | settingsFile <- AzureML:::AzureML.config.default 17 | js <- jsonlite::fromJSON(settingsFile) 18 | id <- js$workspace$id 19 | auth <- js$workspace$authorization_token 20 | exp_id <- js$workspace$exp_id 21 | node_id <- js$workspace$node_id 22 | 23 | if(is.null(exp_id) || is.null(node_id)) skip("exp_id or node_id not available") 24 | 25 | ws <- workspace() 26 | 27 | we <- experiments(ws) 28 | expect_is(we, "Experiments") 29 | expect_is(we, "data.frame") 30 | 31 | expect_identical(we, ws$experiments) 32 | 33 | 34 | en <- we$Description 35 | expect_is(en, "character") 36 | expect_true(length(en) > 0) 37 | 38 | expect_true(exp_id %in% we$ExperimentId) 39 | idx <- match(exp_id, we$ExperimentId) 40 | experiment = experiments(ws)[idx, ] 41 | class(experiment) 42 | expect_is(experiment, "Experiments") 43 | expect_is(experiment, "data.frame") 44 | 45 | frame <- download.intermediate.dataset(ws, experiment = exp_id, node_id = node_id, 46 | port_name='Results dataset', 47 | data_type_id='GenericCSV') 48 | 49 | expect_is(frame, "data.frame") 50 | expect_true(nrow(frame) > 1) 51 | }) 52 | 53 | -------------------------------------------------------------------------------- /tests/testthat/test-5-try_fetch.R: -------------------------------------------------------------------------------- 1 | if(interactive()) library(testthat) 2 | 3 | context("try_fetch") 4 | library(mockery) 5 | 6 | test_that("try_fetch() gives exponential retry messages",{ 7 | set.seed(1) 8 | mockery::stub(try_fetch, "curl_fetch_memory", function(...){ 9 | retry_on = c(400, 401, 440, 503, 504, 509) 10 | status_code <- if(runif(1) > 0.26) sample(retry_on, 1) else 200 11 | list(status_code = status_code, contents = NA) 12 | }) 13 | msg <- "Request failed with status 509. Waiting 0.0 seconds before retry\n" 14 | expect_message( 15 | try_fetch(delay = 0.1, no_message_threshold = 0), 16 | msg 17 | ) 18 | 19 | }) 20 | -------------------------------------------------------------------------------- /tests/testthat/test-6-publish.R: -------------------------------------------------------------------------------- 1 | if(interactive()) library(testthat) 2 | 3 | context("Publish API") 4 | settingsFile <- AzureML.config.default 5 | 6 | 7 | test_that(".getexports finds function and creates zip string", { 8 | AzureML:::skip_if_missing_config(settingsFile) 9 | AzureML:::skip_if_offline() 10 | 11 | ws <<- workspace() 12 | endpoint <<- NA 13 | 14 | 15 | funEnv <- new.env() 16 | assign("add", function(x, y) x + y, envir = funEnv) 17 | 18 | exportEnv = new.env() 19 | AzureML:::.getexports(substitute(add), e = exportEnv, env = funEnv) 20 | 21 | expect_equal( 22 | ls(exportEnv), 23 | "add" 24 | ) 25 | 26 | za <- AzureML:::zipAvailable() 27 | if(!za) skip(AzureML:::zipNotAvailableMessage) 28 | expect_true(za) 29 | 30 | z <- AzureML:::packageEnv(exportEnv) 31 | expect_is(z, "character") 32 | expect_true(nchar(z) > 1) 33 | 34 | }) 35 | 36 | 37 | 38 | test_that("publishWebService throws error if fun is not a function", { 39 | AzureML:::skip_if_missing_config(settingsFile) 40 | AzureML:::skip_if_offline() 41 | 42 | add <- function(x,y) x + y 43 | 44 | timestamped_name <- paste0("webservice-test-publish-", 45 | format(Sys.time(), format="%Y-%m-%d--%H-%M-%S")) 46 | 47 | expect_error({ 48 | endpoint <- publishWebService(ws, 49 | fun = "add", 50 | name = timestamped_name, 51 | inputSchema = list(x="numeric", 52 | y="numeric"), 53 | outputSchema = list(ans="numeric") 54 | ) 55 | if(is.Endpoint(endpoint)) deleteWebService(ws, timestamped_name) 56 | }, 57 | "You must specify 'fun' as a function, not a character" 58 | ) 59 | }) 60 | 61 | timestamped_name <- paste0("webservice-test-publish-", 62 | format(Sys.time(), format="%Y-%m-%d--%H-%M-%S")) 63 | 64 | 65 | 66 | test_that("publishWebService works with simple function", { 67 | AzureML:::skip_if_missing_config(settingsFile) 68 | AzureML:::skip_if_offline() 69 | 70 | add <- function(x,y) x + y 71 | 72 | endpoint <- publishWebService(ws, 73 | fun = add, 74 | name = timestamped_name, 75 | inputSchema = list(x="numeric", 76 | y="numeric"), 77 | outputSchema = list(ans="numeric") 78 | ) 79 | 80 | endpoint <<- endpoint # Used to test updateWebservice in next test 81 | 82 | 83 | expect_is(endpoint, "data.frame") 84 | expect_is(endpoint, "Endpoint") 85 | expect_is(endpoint$WorkspaceId, "character") 86 | expect_is(endpoint$WebServiceId, "character") 87 | expect_equal(ws$id, endpoint$WorkspaceId) 88 | 89 | # Now test if we can consume the service we just published 90 | res <- consume(endpoint, x=pi, y=2) 91 | expect_is(res, "data.frame") 92 | expect_equal(res$ans, pi + 2, tolerance = 1e-8) 93 | }) 94 | 95 | 96 | test_that("updateWebService works with simple function", { 97 | # Now test updateWebService 98 | AzureML:::skip_if_missing_config(settingsFile) 99 | AzureML:::skip_if_offline() 100 | 101 | endpoint <- updateWebService(ws, 102 | serviceId = endpoint$WebServiceId, 103 | fun = function(x, y) x - y, 104 | inputSchema = list(x="numeric", 105 | y="numeric"), 106 | outputSchema = list(ans="numeric")) 107 | 108 | # Now test if we can consume the service we just updated 109 | for(i in 1:10){ 110 | Sys.sleep(3) # Allow some time for the service to update and refresh 111 | res <- consume(endpoint, x=pi, y=2) 112 | if(isTRUE(all.equal(res$ans, pi - 2, tolerance = 1e-8))) break 113 | } 114 | expect_is(res, "data.frame") 115 | expect_equal(res$ans, pi - 2, tolerance = 1e-8) 116 | 117 | deleteWebService(ws, timestamped_name) 118 | }) 119 | 120 | 121 | test_that("publishWebService works with data frame input", { 122 | AzureML:::skip_if_missing_config(settingsFile) 123 | AzureML:::skip_if_offline() 124 | 125 | timestamped_name <- paste0("webservice-test-publish-", 126 | format(Sys.time(), format="%Y-%m-%d--%H-%M-%S")) 127 | 128 | if(!require("lme4")) skip("You need to install lme4 to run this test") 129 | 130 | set.seed(1) 131 | train <- sleepstudy[sample(nrow(sleepstudy), 120),] 132 | m <- lm(Reaction ~ Days + Subject, data = train) 133 | 134 | # Deine a prediction function to publish based on the model: 135 | sleepyPredict <- function(newdata){ 136 | predict(m, newdata=newdata) 137 | } 138 | 139 | endpoint <- publishWebService(ws, fun = sleepyPredict, 140 | name = timestamped_name, 141 | inputSchema = sleepstudy) 142 | 143 | expect_is(endpoint, "data.frame") 144 | expect_is(endpoint, "Endpoint") 145 | expect_is(endpoint$WorkspaceId, "character") 146 | expect_is(endpoint$WebServiceId, "character") 147 | expect_equal(ws$id, endpoint$WorkspaceId) 148 | 149 | 150 | # Now test if we can consume the service we just published 151 | res <- consume(endpoint, sleepstudy)$ans 152 | expect_is(res, "numeric") 153 | expect_equal(length(res), nrow(sleepstudy)) 154 | 155 | deleteWebService(ws, timestamped_name) 156 | }) 157 | 158 | -------------------------------------------------------------------------------- /tests/testthat/test-7-discover-schema.R: -------------------------------------------------------------------------------- 1 | if(interactive()) library(testthat) 2 | 3 | context("Discover API schema") 4 | 5 | 6 | test_that("discoverSchema() returns help page information", { 7 | AzureML:::skip_if_offline() 8 | 9 | schemaUrl <- "https://studio.azureml.net/apihelp/workspaces/xxxxx/webservices/yyyyy/endpoints/zzzzz" 10 | expect_equal( 11 | getDetailsFromUrl(schemaUrl), 12 | c("xxxxx", 13 | "yyyyy", 14 | "zzzzz") 15 | ) 16 | 17 | schemaUrl <- "https://studio.azureml.net/apihelp/workspaces/f5e8e9bc4eed4034b78567449cfca779/webservices/d42667a354e34a3f98888ba86300fc2f/endpoints/b4caf0f0ebfd451bbc187741894e213b/score" 18 | 19 | expect_equal( 20 | getDetailsFromUrl(schemaUrl), 21 | c("f5e8e9bc4eed4034b78567449cfca779", 22 | "d42667a354e34a3f98888ba86300fc2f", 23 | "b4caf0f0ebfd451bbc187741894e213b") 24 | ) 25 | 26 | url <- "https://ussouthcentral.services.azureml.net/workspaces/f5e8e9bc4eed4034b78567449cfca779/services/b4caf0f0ebfd451bbc187741894e213b/execute?api-version=2.0&format=swagger" 27 | expect_error( 28 | getDetailsFromUrl(url) 29 | ) 30 | 31 | schema <- discoverSchema(schemaUrl) 32 | schema$sampleInput$Gender <- "male" 33 | schema$sampleInput$PortEmbarkation <- "C" 34 | 35 | expect_equal(length(schema), 4) 36 | expect_equivalent(schema$requestUrl, url) 37 | expect_equivalent(schema$columnNames, 38 | list("Survived", 39 | "PassengerClass", 40 | "Gender", 41 | "Age", 42 | "SiblingSpouse", 43 | "ParentChild", 44 | "FarePrice", 45 | "PortEmbarkation") 46 | ) 47 | expect_equivalent(schema$sampleInput, 48 | list(Survived = 1, 49 | PassengerClass = 1, 50 | Gender = "male", 51 | Age = 1, 52 | SiblingSpouse = 1, 53 | ParentChild = 1, 54 | FarePrice = 1, 55 | PortEmbarkation = "C")) 56 | }) 57 | 58 | -------------------------------------------------------------------------------- /tests/testthat/test-7-discover.R: -------------------------------------------------------------------------------- 1 | if(interactive()) library(testthat) 2 | 3 | context("Discover API") 4 | settingsFile <- "~/.azureml/settings.json" 5 | 6 | test_that("discoverSchema() can discover endpoints starting from workspace ID", { 7 | 8 | AzureML:::skip_if_missing_config(settingsFile) 9 | AzureML:::skip_if_offline() 10 | 11 | ws <<- workspace() 12 | timestamped_name <<- paste0("webservice-test-publish-", 13 | format(Sys.time(), format="%Y-%m-%d--%H-%M-%S")) 14 | 15 | add <- function(x, y) x + y 16 | 17 | publishWebService(ws, 18 | fun = add, 19 | name = timestamped_name, 20 | inputSchema = list(x = "numeric", 21 | y = "numeric"), 22 | outputSchema = list(ans = "numeric") 23 | ) 24 | ss <- services(ws) 25 | 26 | expect_is(ss, "Service") 27 | expect_is(ss, "data.frame") 28 | 29 | Sys.sleep(3) 30 | testWS <- services(ws, name = timestamped_name) 31 | testEP_1 <- endpoints(ws, service_id = testWS) 32 | testEP_2 <- endpoints(ws, service_id = testWS)[1, ] 33 | 34 | expect_is(testEP_1, "Endpoint") 35 | 36 | expect_true(nrow(ss) >= 1) 37 | expect_true(length(testWS) >= 6) 38 | expect_equal(length(endpoints), 1) 39 | 40 | expect_identical(testEP_1, testEP_2) 41 | expect_true(length(testEP_1) >= 13) 42 | expect_equal(testWS$Id, testEP_1$WebServiceId) 43 | }) 44 | 45 | 46 | test_that("API location is returned and able to be used immediately", { 47 | AzureML:::skip_if_missing_config(settingsFile) 48 | AzureML:::skip_if_offline() 49 | 50 | ss <- services(ws, name = timestamped_name) 51 | testEP <- endpoints(ws, ss)[1, ] 52 | res <- consume(testEP, list(x=pi, y=2), retryDelay = 2) 53 | expect_is(res, "data.frame") 54 | expect_equal(res$ans, pi + 2, tolerance = 1e-5) 55 | 56 | deleteWebService(ws, timestamped_name) 57 | }) 58 | 59 | 60 | test_that("Discovery function handles error correctly", { 61 | AzureML:::skip_if_missing_config(settingsFile) 62 | AzureML:::skip_if_offline() 63 | 64 | expect_error( 65 | services(ws, "foo-does-not-exist"), 66 | "Invalid web service ID provided. Verify the web service ID is correct and try again." 67 | ) 68 | }) 69 | 70 | -------------------------------------------------------------------------------- /tests/testthat/test-99-final.R: -------------------------------------------------------------------------------- 1 | if(!file.exists("~/.azureml/settings.json")){ 2 | message("To run tests, add a file ~/.azureml/settings.json containing AzureML keys") 3 | message("Some tests skipped. See ?workspace for help") 4 | } -------------------------------------------------------------------------------- /vignettes/Readme.md: -------------------------------------------------------------------------------- 1 | To update the vignette: 2 | 3 | * Use the original `.rmd` document to build the vignette 4 | - Rename `getting_started.Rmd-original` to `getting_started.Rmd` 5 | - Use `knitr` to build the vignette, keeping the intermediate `.md` output 6 | * Copy the intermediate contents into a new `.rmd` file 7 | - Call this `getting_started.Rmd` 8 | * Remove intermediate artefacts 9 | 10 | -------------------------------------------------------------------------------- /vignettes/authToken.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RevolutionAnalytics/AzureML/1266f42c7493bd7874affb4fc969f9c3118ee13c/vignettes/authToken.png -------------------------------------------------------------------------------- /vignettes/workspaceID.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RevolutionAnalytics/AzureML/1266f42c7493bd7874affb4fc969f9c3118ee13c/vignettes/workspaceID.png --------------------------------------------------------------------------------