├── .Rbuildignore
├── .gitignore
├── .travis.yml
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── R
    ├── dataset.R
    ├── dataupload.R
    ├── experiment.R
    ├── main.R
    ├── params.R
    ├── prediction.R
    ├── prediction_download.R
    ├── predictjob.R
    ├── projects.R
    ├── result.R
    └── utils.R
├── README.md
├── man
    ├── add_dataset_if_not_exists.Rd
    ├── add_experiment_if_not_exists.Rd
    ├── add_new_dataset.Rd
    ├── create_experiment.Rd
    ├── create_project.Rd
    ├── delete_dataset.Rd
    ├── delete_project.Rd
    ├── get_all_models.Rd
    ├── get_dataset.Rd
    ├── get_datasets.Rd
    ├── get_experiment.Rd
    ├── get_experiments.Rd
    ├── get_model.Rd
    ├── get_prediction.Rd
    ├── get_project.Rd
    ├── get_projects.Rd
    ├── get_results.Rd
    ├── mljar_fit.Rd
    ├── mljar_predict.Rd
    ├── prediction_download.Rd
    ├── print_all_projects.Rd
    ├── submit_predict_job.Rd
    └── upload_file.Rd
├── mljar.Rproj
└── tests
    ├── testthat.R
    └── testthat
        ├── binary_part_iris_converted.csv
        ├── test_dataset.R
        ├── test_dataupload.R
        ├── test_experiment.R
        ├── test_main.R
        ├── test_project.R
        ├── test_results.R
        └── test_utils.R


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^.*\.Rproj$
 2 | ^\.Rproj\.user$
 3 | ^\.travis\.yml$
 4 | ^\.httr-oauth$
 5 | ^cran-comments\.md$
 6 | ^README\.Rmd$
 7 | ^revdep$
 8 | ^codecov\.yml$
 9 | ^appveyor\.yml$
10 | ^Doxyfile$
11 | ^clion-test\.R$
12 | ^API$
13 | ^ISSUE_TEMPLATE\.md$
14 | ^LICENSE\.md$
15 | ^BROWSE$
16 | ^GPATH$
17 | ^GRTAGS$
18 | ^GTAGS$
19 | ^TAGS$
20 | ^\.dir-locals\.el$
21 | ^vignettes/rsconnect$
22 | ^docs$
23 | ^_pkgdown\.yml$
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | .Rapp.history
 4 | 
 5 | # Session Data files
 6 | .RData
 7 | 
 8 | # Example code in package build process
 9 | *-Ex.R
10 | 
11 | # Output files from R CMD build
12 | /*.tar.gz
13 | 
14 | # Output files from R CMD check
15 | /*.Rcheck/
16 | 
17 | # RStudio files
18 | .Rproj.user/
19 | 
20 | # produced vignettes
21 | vignettes/*.html
22 | vignettes/*.pdf
23 | 
24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3
25 | .httr-oauth
26 | 
27 | # knitr and R markdown default cache directories
28 | /*_cache/
29 | /cache/
30 | 
31 | # Temporary files created by R markdown
32 | *.utf8.md
33 | *.knit.md
34 | .Rproj.user
35 | 
36 | .DS_Store
37 | tests/.DS_Store
38 | tests/testthat/.DS_Store
39 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: r
 2 | 
 3 | dist: trusty
 4 | 
 5 | r_github_packages:
 6 |   - jimhester/covr
 7 | 
 8 | after_success:
 9 |   - Rscript -e 'covr::codecov()' --verbose
10 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: mljar
 2 | Title: R API for MLJAR
 3 | Version: 0.1.2
 4 | Author: Dominik Krzemiński <raymon92@gmail.com>, Piotr Płoński <contact@mljar.com>
 5 | Maintainer: Piotr Płoński <contact@mljar.com>
 6 | Description: Provides an R API wrapper for 'mljar.com', a web service allowing for on-line training for machine learning models (see <https://mljar.com> for more information).
 7 | License: MIT + file LICENSE
 8 | URL: http://mljar.com, https://github.com/mljar/mljar-api-R
 9 | BugReports: https://github.com/mljar/mljar-api-R/issues
10 | Encoding: UTF-8
11 | LazyData: true
12 | RoxygenNote: 6.0.1
13 | Suggests:
14 |     testthat,
15 |     covr
16 | Depends: 
17 |     R (>= 3.1.2)
18 | Imports:
19 |     httr,
20 |     jsonlite,
21 |     readr
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2018
2 | COPYRIGHT HOLDER: MLJAR Inc
3 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | export(add_dataset_if_not_exists)
 2 | export(add_new_dataset)
 3 | export(add_experiment_if_not_exists)
 4 | export(create_experiment)
 5 | export(create_project)
 6 | export(delete_dataset)
 7 | export(delete_project)
 8 | export(get_dataset)
 9 | export(get_datasets)
10 | export(get_experiment)
11 | export(get_experiments)
12 | export(get_prediction)
13 | export(get_project)
14 | export(get_projects)
15 | export(print_all_projects)
16 | export(get_results)
17 | export(get_model)
18 | export(mljar_fit)
19 | export(mljar_predict)
20 | export(prediction_download)
21 | export(submit_predict_job)
22 | export(upload_file)
23 | export(get_all_models)
24 | importFrom(httr,GET)
25 | importFrom(httr,POST)
26 | importFrom(httr,PUT)
27 | importFrom(httr,DELETE)
28 | importFrom(httr,add_headers)
29 | importFrom(httr,content)
30 | importFrom(httr,status_code)
31 | importFrom(jsonlite,fromJSON)
32 | importFrom(jsonlite,toJSON)
33 | importFrom("stats", "runif")
34 | importFrom("utils", "read.csv", "str", "tail", "write.csv")
35 | 


--------------------------------------------------------------------------------
/R/dataset.R:
--------------------------------------------------------------------------------
  1 | #' Gets list of available datasets
  2 | #'
  3 | #' @param project_hid character with project identifier
  4 | #'
  5 | #' @return structure with parsed datasets and response
  6 | #' @export
  7 | get_datasets <- function(project_hid) {
  8 |   #' Gets list of available datasets
  9 |   api_url_datasets <- paste(MLAR_API_PATH, API_VERSION, "/datasets?project_id=", project_hid, sep = "")
 10 |   rp <- .get_json_from_get_query(api_url_datasets)
 11 |   resp <- rp$resp
 12 |   parsed <- rp$parsed
 13 |   structure(
 14 |     list(
 15 |       datasets = parsed,
 16 |       response = resp
 17 |     ),
 18 |     class = "get_datasets"
 19 |   )
 20 | }
 21 | 
 22 | print.get_datasets <- function(x, ...) {
 23 |   cat("<MLJAR datasets >\n", sep = "")
 24 |   str(x$datasets)
 25 |   invisible(x)
 26 | }
 27 | 
 28 | #' Gets dataset
 29 | #'
 30 | #' @param dataset_hid character with dataset identifier
 31 | #'
 32 | #' @return structure with parsed dataset and response
 33 | #' @export
 34 | get_dataset <- function(dataset_hid) {
 35 |   api_url_dataset_hid <- paste(MLAR_API_PATH, API_VERSION, "/datasets/", dataset_hid, sep="")
 36 |   rp <- .get_json_from_get_query(api_url_dataset_hid)
 37 |   resp <- rp$resp
 38 |   parsed <- rp$parsed
 39 |   structure(
 40 |     list(
 41 |       dataset = parsed,
 42 |       response = resp
 43 |     ),
 44 |     class = "get_dataset"
 45 |   )
 46 | }
 47 | 
 48 | print.get_dataset <- function(x, ...) {
 49 |   cat("<MLJAR dataset >\n", sep = "")
 50 |   str(x$dataset)
 51 |   invisible(x)
 52 | }
 53 | 
 54 | #' Deletes dataset
 55 | #'
 56 | #' @param dataset_hid character with dataset identifier
 57 | #' @export
 58 | delete_dataset <-function(dataset_hid){
 59 |   token <- .get_token()
 60 |   api_url_dataset_hid <- paste(MLAR_API_PATH, API_VERSION, "/datasets/", dataset_hid, sep="")
 61 |   resp <- DELETE(api_url_dataset_hid, add_headers(Authorization = paste("Token", token)))
 62 |   if (status_code(resp)==204 || status_code(resp)==200){
 63 |     sprintf("Dataset <%s> succesfully deleted!", dataset_hid)
 64 |   }
 65 | }
 66 | 
 67 | #' Adds new dataset
 68 | #'
 69 | #' @param project_hid character with project identifier
 70 | #' @param filename character with filename containing data
 71 | #' @param title title of dataset
 72 | #' @param prediction_only boolean determining if data is used only for prediction
 73 | #'
 74 | #' @return parsed by toJSON dataset details
 75 | #' @export
 76 | #'
 77 | #' @importFrom httr POST
 78 | #' @importFrom jsonlite toJSON
 79 | add_new_dataset <- function(project_hid, filename, title, prediction_only=FALSE){
 80 |   dst_path <- upload_file(project_hid, filename)
 81 | 
 82 |   prediction_only <- as.integer(prediction_only)
 83 | 
 84 |   token <- .get_token()
 85 |   api_url_new_dataset <- paste(MLAR_API_PATH, API_VERSION, "/datasets" , sep="")
 86 |   data <- list(
 87 |     title = title,
 88 |     file_path = dst_path,
 89 |     file_name = filename,
 90 |     file_size = round(file.info(filename)$size/1024, 2),
 91 |     derived = 0,
 92 |     valid = 0,
 93 |     parent_project = project_hid,
 94 |     meta = '',
 95 |     data_type = "tabular",
 96 |     scope = "private",
 97 |     prediction_only = prediction_only
 98 |   )
 99 |   resp <- POST(api_url_new_dataset, add_headers(Authorization = paste("Token", token)),
100 |                body = data, encode = "form")
101 |   .check_response_status(resp, 201)
102 |   if (status_code(resp)==201){
103 |     print(sprintf("Dataset <%s> created!", title))
104 |   }
105 |   dataset_details <- jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8"),
106 |                                         simplifyVector = FALSE)
107 |   return(dataset_details)
108 | }
109 | 
110 | 
111 | #' Wait till all datasets are valid
112 | #'
113 | #'  Waits till all datasets is valid. If all valid it returns no error,
114 | #' if wait time is exceeded and there is any dataset not valid then
115 | #' it returns TRUE.
116 | #'
117 | #' @param project_hid hid of the project
118 | #'
119 | #' @return TRUE if all datasets are valid
120 | #'
121 | .wait_till_all_datasets_are_valid <-function(project_hid){
122 |   total_checks  <- 120
123 |   time_interval <- 5 # sleep for 5 sec every iteration
124 |   for (i in 1:total_checks){
125 |     datasets_list <- get_datasets(project_hid)
126 |     if (length(datasets_list$datasets) == 0){
127 |       sprintf("No datasets")
128 |       return(TRUE)
129 |     } else {
130 |       tmpcnt = 0
131 |       for (k in 1:length(datasets_list$datasets)){
132 |         tmpcnt = tmpcnt + datasets_list$datasets[[k]]$valid
133 |       }
134 |       if (tmpcnt == length(datasets_list$datasets)){
135 |         sprintf("All datasets are valid")
136 |         return(TRUE)
137 |       }
138 |     }
139 |     Sys.sleep(time_interval)
140 |   }
141 |   stop("Some datasets are invalid.")
142 | }
143 | 
144 | #' Verify if columns have correct structure
145 | #'
146 | #' At least one column must be Target and this is verified on server site.
147 | #'
148 | #' @param dataset_hid dataset hid code
149 | #'
150 | #' @return TRUE if correct, FALSE if not
151 | #'
152 | .accept_dataset_column_usage <- function(dataset_hid){
153 |   token <- .get_token()
154 |   api_url_new_dataset <- paste(MLAR_API_PATH, API_VERSION, "/accept_column_usage/" , sep="")
155 |   data <- list(dataset_id = dataset_hid)
156 |   resp <- POST(api_url_new_dataset, add_headers(Authorization = paste("Token", token)),
157 |                body = data, encode = "form")
158 |   return(ifelse(status_code(resp)==200, TRUE, FALSE))
159 | }
160 | 
161 | #' Add dataset if not exists
162 | #'
163 | #' Checks parameters before adding new dataset and verifies
164 | #' if it doesn't exists already.
165 | #'
166 | #' @param project_hid character with project identifier
167 | #' @param filename character with filename containing data
168 | #' @param title title of dataset
169 | #' @param prediction_only boolean determining if data is used only for prediction
170 | #'
171 | #' @return parsed dataset details
172 | #' @export
173 | add_dataset_if_not_exists <- function(project_hid, filename, title, prediction_only=FALSE){
174 |   .wait_till_all_datasets_are_valid(project_hid)
175 |   ds <- get_datasets(project_hid)
176 |   if (length(ds$datasets)>0) {
177 |     for(i in 1:length(ds$datasets)) {
178 |       if (ds$datasets[[i]]$title == title) {
179 |         warning(sprintf("Dataset with the same name already exists: <%s>",
180 |                         title))
181 |         existing_ds <- list(dataset=ds$datasets[[i]], resp=NULL)
182 |         return(existing_ds)
183 |       }
184 |     }
185 |   }
186 |   dataset_details <- add_new_dataset(project_hid, filename, title, prediction_only)
187 |   .wait_till_all_datasets_are_valid(project_hid)
188 |   if (!.accept_dataset_column_usage(dataset_details$hid)){
189 |     stop("There was a problem with accept column usage for your dataset.")
190 |   }
191 |   new_dataset <- get_dataset(dataset_details$hid)
192 |   if (!new_dataset$dataset$valid){
193 |     stop("Sorry, your dataset cannot be read by MLJAR.\nPlease report this to us - we will fix it")
194 |   }
195 |   if (is.null(new_dataset$dataset$column_usage_min)){
196 |     stop("Something bad happend! There is no attributes usage defined for your dataset")
197 |   }
198 |   return(new_dataset)
199 | }
200 | 


--------------------------------------------------------------------------------
/R/dataupload.R:
--------------------------------------------------------------------------------
 1 | #' Uploads file into MLJAR
 2 | #'
 3 | #' It uploads file into MLJAR and returns destination path.
 4 | #'
 5 | #' @param project_hid character with project identifier
 6 | #' @param filepath character with path to file
 7 | #'
 8 | #' @return character with destination path
 9 | #'
10 | #' @importFrom httr PUT
11 | #' @export
12 | upload_file <- function(project_hid, filepath){
13 | 
14 |   parsed <- .get_signed_url(project_hid, filepath)
15 |   signed_url <- parsed$signed_url
16 |   dst_path <- parsed$destination_path
17 |   plain_text_data <- readr::read_file(filepath)
18 |   resp <- PUT(signed_url, body=plain_text_data)
19 |   .check_response_status(resp, 200, "Upload into MLJAR failed")
20 |   return(dst_path)
21 | }
22 | 
23 | #' Get signed url
24 | #'
25 | #' From given project hid and filepath returns signed url for uploading.
26 | #'
27 | #' @param project_hid character with project identifier
28 | #' @param filepath path to the file
29 | #'
30 | #' @return parsed htt response from MLJAR s3policy (check mljar api for more)
31 | .get_signed_url <- function(project_hid, filepath){
32 |   api_url_signed_url <- paste(MLAR_API_PATH, API_VERSION, "/s3policy/" , sep="")
33 |   fname = tail(strsplit(filepath, "/")[[1]], n=1)
34 |   data <- list(project_hid = project_hid,
35 |                fname = fname)
36 |   rp <- .get_json_from_post_query(api_url_signed_url, data)
37 |   resp <- rp$resp
38 |   parsed <- rp$parsed
39 |   return(parsed)
40 | }
41 | 
42 | 


--------------------------------------------------------------------------------
/R/experiment.R:
--------------------------------------------------------------------------------
  1 | #' Gets list of available experiments for given project
  2 | #'
  3 | #' @param project_hid character with project identifier
  4 | #'
  5 | #' @return  structure with parsed experiments and http response
  6 | #' @export
  7 | get_experiments <- function(project_hid){
  8 |   api_url_experiments <- paste(MLAR_API_PATH, API_VERSION, "/experiments",
  9 |                                "?project_id=", project_hid, sep="")
 10 |   rp <- .get_json_from_get_query(api_url_experiments)
 11 |   resp <- rp$resp
 12 |   parsed <- rp$parsed
 13 | 
 14 |   structure(
 15 |     list(
 16 |       experiments = parsed,
 17 |       response = resp
 18 |     ),
 19 |     class = "get_experiments"
 20 |   )
 21 | }
 22 | 
 23 | print.get_experiments <- function(x, ...) {
 24 |   cat("<MLJAR experiments >\n", sep = "")
 25 |   str(x$experiments)
 26 |   invisible(x)
 27 | }
 28 | 
 29 | #' Gets experiment details
 30 | #'
 31 | #' @param experiment_hid character with experiment identifier
 32 | #'
 33 | #' @return structure with parsed experiment and http response
 34 | #' @export
 35 | get_experiment <- function(experiment_hid){
 36 |   api_url_experiment <- paste(MLAR_API_PATH, API_VERSION, "/experiments/",
 37 |                                experiment_hid, sep="")
 38 |   rp <- .get_json_from_get_query(api_url_experiment)
 39 |   resp <- rp$resp
 40 |   parsed <- rp$parsed
 41 | 
 42 |   structure(
 43 |     list(
 44 |       experiment = parsed,
 45 |       response = resp
 46 |     ),
 47 |     class = "get_experiment"
 48 |   )
 49 | }
 50 | 
 51 | print.get_experiment <- function(x, ...) {
 52 |   cat("<MLJAR experiment >\n", sep = "")
 53 |   str(x$experiment)
 54 |   invisible(x)
 55 | }
 56 | 
 57 | #' Creates experiment from given parameters
 58 | #'
 59 | #' @param data list of experiment parameters
 60 | #'
 61 | #' @return experiment details parsed by fromJSON
 62 | #' @export
 63 | #'
 64 | #' @importFrom httr POST
 65 | #' @importFrom jsonlite fromJSON
 66 | create_experiment <- function(data){
 67 |   token <- .get_token()
 68 |   api_url_create_experiment <- paste(MLAR_API_PATH, API_VERSION, "/experiments" , sep="")
 69 |   resp <- POST(api_url_create_experiment, add_headers(Authorization = paste("Token", token)),
 70 |                body = data, encode = "form")
 71 |   .check_response_status(resp, 201)
 72 |   if (status_code(resp)==201){
 73 |     print(sprintf("Experiment <%s> succesfully created!", data$title))
 74 |   }
 75 |   experiment_details <- jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8"),
 76 |                                            simplifyVector = FALSE)
 77 |   return(experiment_details)
 78 | }
 79 | 
 80 | #' Add experiment if not exists
 81 | #'
 82 | #' Check if experiment exists, verifies parameters, creates data
 83 | #' to create_experiment function and finally starts creation of
 84 | #' MLJAR experiment.
 85 | #'
 86 | #' @param project_hid character with project identifier
 87 | #' @param train_dataset character with path to training dataset
 88 | #' @param valid_dataset character with path to validation dataset
 89 | #' @param experiment_title character with experiment title
 90 | #' @param project_task character with project task
 91 | #' @param validation_kfolds number of folds to be used in validation
 92 | #' @param validation_shuffle boolean which specify if shuffle samples before training
 93 | #' @param validation_stratify boolean which decides whether samples will be
 94 | #' divided into folds with the same class distribution
 95 | #' @param validation_train_split ratio how to split training dataset into train and validation
 96 | #' @param algorithms list of algorithms to use
 97 | #' @param metric charcater with metric
 98 | #' @param tuning_mode tuning mode
 99 | #' @param time_constraint numeric with time limit to calculate algorithm
100 | #' @param create_ensemble whether or not to create ensemble
101 | #'
102 | #' @return experiment details structure
103 | #' @export
104 | add_experiment_if_not_exists <- function(project_hid, train_dataset,
105 |                                          valid_dataset, experiment_title,
106 |                                          project_task, validation_kfolds,
107 |                                          validation_shuffle, validation_stratify,
108 |                                          validation_train_split, algorithms,
109 |                                          metric, tuning_mode, time_constraint,
110 |                                          create_ensemble) {
111 |   if (!is.null(valid_dataset)) {
112 |     # check validation parameters
113 |     validation = "With dataset"
114 |   } else {
115 |     if (!is.null(validation_train_split)) {
116 |       percents = round(validation_train_split * 100)
117 |       validation = paste0("Split ", percents , "/", 100-percents)
118 |     } else {
119 |       validation = paste0(validation_kfolds, "-fold CV")
120 |     }
121 |     if (validation_shuffle) {
122 |       validation = paste0(validation, ", Shuffle")
123 |     }
124 |     if (validation_stratify && project_task == 'bin_class') {
125 |       validation = paste0(validation, ", Stratify")
126 |     }
127 |     if (validation_stratify && project_task != 'bin_class') {
128 |       warning("Cannot use stratify in validation for your project task.
129 |               Omitting this option in validation.")
130 |     }
131 |   }
132 |   # check metric parameters
133 |   if (is.null(metric) || metric == "" || !(metric %in% names(MLJAR_METRICS))) {
134 |     metric = MLJAR_DEFAULT_METRICS[project_task]
135 |   }
136 |   # check tuning mode parameters
137 |   if (is.null(tuning_mode) || tuning_mode == "" || !(tuning_mode %in% names(MLJAR_TUNING_MODES))){
138 |     tuning_mode = MLJAR_DEFAULT_TUNING_MODE
139 |   }
140 |   # check algorithms parameters
141 |   if (is.null(algorithms) || length(algorithms) == 0 || algorithms == "") {
142 |     algorithms = MLJAR_DEFAULT_ALGORITHMS[project_task]
143 |   }
144 |   # set default preprocessing if needed
145 |   dataset_preproc <- list()
146 |   if (length(train_dataset$column_usage_min[["cols_to_fill_na"]]) > 0) {
147 |     dataset_preproc$na_fill <- "na_fill_median"
148 |   }
149 |   if (length(train_dataset$column_usage_min[["cols_to_convert_categorical"]]) > 0) {
150 |     dataset_preproc$convert_categorical <- "categorical_to_int"
151 |   }
152 |   if (length(dataset_preproc) == 0) dataset_preproc={}
153 |   if (length(algorithms) == 1) algorithms = c(algorithms,"")
154 |   expt_params <- list(
155 |     train_dataset = list(id = train_dataset$hid, title = train_dataset$title),
156 |     algs = algorithms,
157 |     preproc = dataset_preproc,
158 |     single_limit = time_constraint,
159 |     ensemble = create_ensemble,
160 |     random_start_cnt = MLJAR_TUNING_MODES[[tuning_mode]][["random_start_cnt"]],
161 |     hill_climbing_cnt =  MLJAR_TUNING_MODES[[tuning_mode]][["hill_climbing_cnt"]]
162 |   )
163 |   if (!is.null(valid_dataset)){
164 |     expt_params$vald_dataset = list(id = valid_dataset$hid, title = valid_dataset$title)
165 |   }
166 | 
167 |   # checks whether title of experiment is different
168 |   all_experiments = get_experiments(project_hid)
169 |   if (length(all_experiments$experiments) > 0) {
170 |     for(i in 1:length(all_experiments$experiments)) {
171 |       if (all_experiments$experiments[[i]]$title == experiment_title) {
172 |         stop("The experiment with specified title already exists\nPlease rename your new experiment with new parameters setup.")
173 |       }
174 |     }
175 |   }
176 |   params <- jsonlite::toJSON(expt_params, auto_unbox =TRUE)
177 |   #' if everything is fine untill this point we can create data list to
178 |   #' build a new experiment
179 |   experiment_data <- list(title =  experiment_title,
180 |                           description = "",
181 |                           metric = metric,
182 |                           validation_scheme = validation,
183 |                           task = project_task,
184 |                           compute_now = 1,
185 |                           parent_project = project_hid,
186 |                           params = params
187 |                           )
188 |   create_experiment(experiment_data)
189 | }
190 | 


--------------------------------------------------------------------------------
/R/main.R:
--------------------------------------------------------------------------------
  1 | #' Get results statistics
  2 | #'
  3 | #' @param results results structure
  4 | #'
  5 | #' @return list with numbers of jobs: initiated, learning, done, error
  6 | .get_results_stats <- function(results){
  7 |   resstats <- list()
  8 |   resstats$initiated_cnt <- 0
  9 |   resstats$learning_cnt  <- 0
 10 |   resstats$done_cnt      <- 0
 11 |   resstats$error_cnt     <- 0
 12 |   for (r in results$results){
 13 |     if (r$status == "Initiated"){
 14 |       resstats$initiated_cnt = resstats$initiated_cnt + 1
 15 |     } else if (r$status == "Learning"){
 16 |       resstats$learning_cnt = resstats$learning_cnt + 1
 17 |     } else if (r$status == "Done"){
 18 |       resstats$done_cnt = resstats$done_cnt + 1
 19 |     } else {
 20 |       resstats$error_cnt = resstats$error_cnt + 1
 21 |     }
 22 |   }
 23 |   return(resstats)
 24 | }
 25 | 
 26 | #' Gives info about remaining training time
 27 | #'
 28 | #' @param exp experiment structure
 29 | #' @param res_stats results statistics structure
 30 | #'
 31 | #' @return numeric with estimated time
 32 | .asses_total_training_time <- function(exp, res_stats){
 33 |   single_alg_limit <- exp$experiment$params$single_limit
 34 |   if (is.null(single_alg_limit)){
 35 |     single_alg_limit <- 5
 36 |   } else {
 37 |     single_alg_limit <- as.numeric(single_alg_limit)
 38 |   }
 39 |   total <- (res_stats$initiated_cnt * single_alg_limit) / max(c(res_stats$learning_cnt,1))
 40 |   total <- total + 0.5 * single_alg_limit
 41 |   return(total)
 42 | }
 43 | 
 44 | #' Get best result
 45 | #'
 46 | #' Returns best result from given experiment and results stats.
 47 | #'
 48 | #' @param exp experiment structure
 49 | #' @param curr_results currect results structure
 50 | #'
 51 | #' @return results structure with best results
 52 | .get_best_result <- function(exp, curr_results){
 53 |   the_best_result <- NULL
 54 |   min_value       <- 10e12
 55 |   if (exp$experiment$compute_now == 1 || exp$experiment$compute_now == 2) {
 56 |     if (!(exp$experiment$metric %in% MLJAR_OPT_MAXIMIZE)){
 57 |       opt_direction <- 1
 58 |     } else {
 59 |       opt_direction <- -1
 60 |     }
 61 |     for(res in curr_results$results){
 62 |       if(is.null(res$metric_value)) next
 63 |       if(res$metric_value * opt_direction < min_value){
 64 |         min_value <- res$metric_value*opt_direction
 65 |         the_best_result <- res
 66 |       }
 67 |     }
 68 |   }
 69 |   return(the_best_result)
 70 | }
 71 | 
 72 | #' Wait till all models trained
 73 | #'
 74 | #' Waits untill all models are trained and returns best model.
 75 | #'
 76 | #' @param project_hid character with project identifier
 77 | #' @param experiment_hid character with experiment identifier
 78 | #'
 79 | #' @return best model structure
 80 | .wait_till_all_models_trained <- function(project_hid, experiment_hid){
 81 |   WAIT_INTERVAL    <- 10.0
 82 |   loop_max_counter <- 24*360 # 24 hours of maximum waiting
 83 |   results          <- NULL
 84 |   while(loop_max_counter > 0){
 85 |     loop_max_counter <- loop_max_counter - 1
 86 |     rtry <- try({
 87 |       curr_results <- get_results(project_hid, experiment_hid)
 88 |       exp <- get_experiment(experiment_hid)
 89 |       if (exp$experiment$compute_now == 2){
 90 |         break
 91 |       }
 92 |       res_stats <- .get_results_stats(curr_results)
 93 |       # printing out info about training process
 94 |       eta <- .asses_total_training_time(exp, res_stats)
 95 |       if (res_stats$initiated_cnt + res_stats$learning_cnt +
 96 |           res_stats$done_cnt + res_stats$error_cnt == 0) {
 97 |         eta <- "estimating"
 98 |       } else {
 99 |         eta = round(eta, 2)
100 |       }
101 |       cat("\r", sprintf(
102 |       "initiated: %s, learning: %s, done: %s, error: %s | ETA: %s minutes               ",
103 |           res_stats$initiated_cnt, res_stats$learning_cnt, res_stats$done_cnt,
104 |           res_stats$error_cnt, eta))
105 |       Sys.sleep(WAIT_INTERVAL)
106 | 
107 |     }, silent=TRUE)
108 |     if(class(rtry) == "try-error"){
109 |       warning(paste("There were some problems with your model: ", geterrmessage()))
110 |     }
111 |   }
112 |   best_result <- .get_best_result(exp, curr_results)
113 |   return(best_result)
114 | }
115 | 
116 | #' Starts experiment and returns best model
117 | #'
118 | #' But before verifies if given input data is correct.
119 | #'
120 | #' @param x data.frame/matrix with training data
121 | #' @param y data.frame/matrix with training labels
122 | #' @param validx data.frame/matrix with validation data
123 | #' @param validy data.frame/matrix with validation labels
124 | #' @param proj_title charcater with project title
125 | #' @param exp_title charcater with experiment title
126 | #' @param dataset_title charcater with dataset title
127 | #' @param val_dataset_title charcater with validation dataset title
128 | #' @param metric charcater with metric
129 | #' @param algorithms list of algorithms to use
130 | #' @param validation_kfolds number of folds to be used in validation
131 | #' @param validation_shuffle boolean which specify if shuffle samples before training
132 | #' @param validation_stratify boolean which decides whether samples will be
133 | #' divided into folds with the same class distribution
134 | #' @param validation_train_split ratio how to split training dataset into train and validation
135 | #' @param tuning_mode tuning mode
136 | #' @param create_ensemble whether or not to create ensemble
137 | #' @param single_algorithm_time_limit numeric with time limit to calculate algorithm
138 | #'
139 | #' @return structure with the best model
140 | .start_experiment <- function(x, y, validx, validy, proj_title, exp_title,
141 |                               dataset_title, val_ds_title, metric,
142 |                               algorithms, validation_kfolds, validation_shuffle,
143 |                               validation_stratify, validation_train_split,
144 |                               tuning_mode, create_ensemble, single_algorithm_time_limit){
145 |   task <- .obtain_task(y)
146 |   if (length(algorithms) == 0) {
147 |     algorithms <- ifelse(task == "reg",
148 |                          MLJAR_DEFAULT_ALGORITHMS$regression,
149 |                          MLJAR_DEFAULT_ALGORITHMS$bin_class)
150 |     warning(sprintf("You did not specify algorithms: defaults for task %s are %s",
151 |                     task, paste(algorithms, collapse=" ")))
152 |   }
153 |   if (nchar(metric) == 0) {
154 |     metric <- ifelse(task == "reg",
155 |                      MLJAR_DEFAULT_METRICS$regression,
156 |                      MLJAR_DEFAULT_METRICS$bin_class)
157 |     warning(sprintf("You did not specify metric: defaults for task %s are %s",
158 |                     task, paste(metric, collapse=" ")))
159 |   }
160 |   # create project and datasets
161 |   tmp_data_filename <- .data_to_file(x, y)
162 |   tmp_proj_hid <- .check_if_project_exists(proj_title)
163 |   if (is.null(tmp_proj_hid))
164 |     project_details <- create_project(proj_title, task)
165 |   else {
166 |     print(sprintf("Project <%s> exists.", proj_title))
167 |     project_details <- get_project(tmp_proj_hid)$project
168 |   }
169 |   ds_title <- ifelse(is.null(dataset_title),
170 |                      paste0("Dataset", round(runif(1, 1, 999))),
171 |                      dataset_title )
172 |   dataset <- add_dataset_if_not_exists(project_details$hid,
173 |                                        tmp_data_filename, ds_title)
174 |   if (!is.null(validx) && !is.null(validy)){
175 |     tmp_valid_data_filename <- .data_to_file(validx, validy)
176 |     val_title <- ifelse(is.null(val_ds_title),
177 |                         paste0("Val_dataset", round(runif(1, 1, 999))),
178 |                         val_ds_title)
179 |     valdataset <- add_dataset_if_not_exists(project_details$hid, tmp_valid_data_filename, val_title)
180 |   } else {
181 |     valdataset <- NULL
182 |   }
183 |   # add experiment
184 |   exp_details <- add_experiment_if_not_exists(project_details$hid, dataset$dataset,
185 |                                               valdataset$dataset, exp_title, task,
186 |                                               validation_kfolds, validation_shuffle,
187 |                                               validation_stratify, validation_train_split,
188 |                                               algorithms, metric, tuning_mode,
189 |                                               single_algorithm_time_limit, create_ensemble)
190 |   best_model <- .wait_till_all_models_trained(project_details$hid, exp_details$hid)
191 |   return(best_model)
192 | }
193 | 
194 | #' MLJAR FIT
195 | #'
196 | #' Verifies parameters and data and tries to run experiment.
197 | #'
198 | #' @param x data.frame/matrix with training data
199 | #' @param y data.frame/matrix with training labels
200 | #' @param validx data.frame/matrix with validation data
201 | #' @param validy data.frame/matrix with validation labels
202 | #' @param proj_title charcater with project title
203 | #' @param exp_title charcater with experiment title
204 | #' @param dataset_title charcater with dataset name
205 | #' @param val_dataset_title charcater with validation dataset name
206 | #' @param metric charcater with metric
207 | #' For binary classification there are metrics:
208 | #' "auc" which is for Area Under ROC Curve,
209 | #' "logloss" which is for Logarithmic Loss.
210 | #' For regression tasks:
211 | #' "rmse" which is Root Mean Square Error,
212 | #' "mse" which is for Mean Square Error,
213 | #' "mase" which is for Mean Absolute Error.
214 | #' @param wait_till_all_done boolean saying whether function should wait
215 | #' till all models are done
216 | #' @param algorithms list of algorithms to use
217 | #' For binary classification task available algorithm are:
218 | #' "xgb" which is for Xgboost,
219 | #' "lgb" which is for LightGBM
220 | #' "mlp" which is for Neural Network,
221 | #' "rfc" which is for Random Forest,
222 | #' "etc" which is for Extra Trees,
223 | #' "rgfc" which is for Regularized Greedy Forest,
224 | #' "knnc" which is for k-Nearest Neighbors,
225 | #' "logreg" which is for Logistic Regression.
226 | #' For regression task there are available algorithms:
227 | #' "xgbr" which is for Xgboost,
228 | #' "lgbr" which is for LightGBM,
229 | #' "rgfr" which is for Regularized Greedy Forest,
230 | #' "rfr" which is for Random Forest,
231 | #' "etr" which is for Extra Trees.
232 | #' @param validation_kfolds number of folds to be used in validation
233 | #' @param validation_shuffle boolean which specify if shuffle samples before training
234 | #' @param validation_stratify boolean which decides whether samples will be
235 | #' divided into folds with the same class distribution
236 | #' @param validation_train_split ratio how to split training dataset into train and validation
237 | #' @param tuning_mode tuning mode
238 | #' @param create_ensemble whether or not to create ensemble
239 | #' @param single_algorithm_time_limit numeric with time limit to calculate algorithm
240 | #'
241 | #' @return structure with the best model
242 | #' @export
243 | mljar_fit <- function(x, y, validx=NULL, validy=NULL,
244 |                       proj_title=NULL, exp_title=NULL,
245 |                       dataset_title=NULL, val_dataset_title=NULL,
246 |                       algorithms = c(), metric = "",
247 |                       wait_till_all_done = TRUE,
248 |                       validation_kfolds = MLJAR_DEFAULT_FOLDS,
249 |                       validation_shuffle = MLJAR_DEFAULT_SHUFFLE,
250 |                       validation_stratify = MLJAR_DEFAULT_STRATIFY,
251 |                       validation_train_split = MLJAR_DEFAULT_TRAIN_SPLIT,
252 |                       tuning_mode = MLJAR_DEFAULT_TUNING_MODE,
253 |                       create_ensemble  = MLJAR_DEFAULT_ENSEMBLE,
254 |                       single_algorithm_time_limit = MLJAR_DEFAULT_TIME_CONSTRAINT){
255 |   if (is.null(proj_title)){
256 |     proj_title <- paste0("Project", round(runif(1, 1, 999)))
257 |   }
258 |   if (is.null(exp_title)){
259 |     proj_title <- paste0("Experiment", round(runif(1, 1, 999)))
260 |   }
261 |   model <- .start_experiment(x, y, validx, validy, proj_title, exp_title,
262 |                              dataset_title, val_dataset_title, metric,
263 |                              algorithms, validation_kfolds, validation_shuffle,
264 |                              validation_stratify, validation_train_split,
265 |                              tuning_mode, create_ensemble,
266 |                              single_algorithm_time_limit)
267 |   class(model) <- "mljar_model"
268 |   return(model)
269 | }
270 | 
271 | #' MLJAR PREDICT
272 | #'
273 | #' Makes prediction basing on trained model.
274 | #'
275 | #' @param model model Id or MLJAR result structure
276 | #' @param x_pred data.frame/matrix data to predict
277 | #' @param project_title character with project title
278 | #'
279 | #' @return data.frame with preditction
280 | #' @export
281 | mljar_predict <- function(model, x_pred, project_title){
282 |   if (is.null(model)) {
283 |     stop("Model cannot be null.")
284 |   }
285 |   if (is.atomic(model)) {
286 |     model <- list(hid = model)
287 |   }
288 |   # checking if prediction data is ok
289 |   x_pred <- as.data.frame(x_pred)
290 |   if (is.null(x_pred)) {
291 |     stop("NULL data")
292 |   }
293 |   # look for project
294 |   proj_hid <- .check_if_project_exists(project_title)
295 |   if (is.null(proj_hid)) stop("Project not found! Check title and try again.")
296 |   # adding prediction dataset
297 |   tmp_data_filename <- .data_to_file(x_pred)
298 |   dspred_title <- paste0("Pred_dataset", round(runif(1, 1, 999)))
299 |   pred_ds  <- add_dataset_if_not_exists(proj_hid, tmp_data_filename, dspred_title, TRUE)
300 |   total_checks <- 1000
301 |   cat("Prediction download started")
302 |   for (i in 1:total_checks){
303 |     prediction <- get_prediction(proj_hid, pred_ds$dataset$hid, model$hid)
304 |     cat("\r", sprintf("Downloading prediction - %s          ", i))
305 |     # for first iteration we send dataset for prediction
306 |     if (i == 1 && length(prediction$prediction) == 0) {
307 |       submit_predict_job(proj_hid, pred_ds$dataset$hid, model$hid)
308 |     }
309 |     if (length(prediction$prediction) > 0) {
310 |       pred <- prediction_download(prediction$prediction[[1]]$hid)
311 |       delete_dataset(pred_ds$dataset$hid)
312 |       return(pred)
313 |     }
314 |     Sys.sleep(10)
315 |   }
316 |   return(NULL)
317 | }
318 | 
319 | #' Gives data.frame with basic data of all models
320 | #'
321 | #' You can later get some specific model by calling
322 | #' e.g. \code{mod <- get_model(project_title, experiment_title, model_hid)}.
323 | #'
324 | #' @param project_title character with project title
325 | #' @param exp_title character with experiment title
326 | #'
327 | #' @return data.frame with model's "hid", "model_type", "metric_value",
328 | #' "metric_type"
329 | #'
330 | #' @export
331 | get_all_models <- function(project_title, exp_title) {
332 |   # Look for project title
333 |   flag.proj.title <- FALSE
334 |   prj_hid <- .check_if_project_exists(project_title)
335 |   if (is.null(prj_hid))
336 |     stop("MLJAR cannot find a project with such a title. Check and try again.")
337 |   # Look for experiment title
338 |   flag.proj.exp <- FALSE
339 |   ge <- get_experiments(prj_hid)
340 |   if (length(ge$experiments) == 0) stop("No experiments found.")
341 |   for(i in 1:length(ge$experiments)) {
342 |     if (ge$experiments[[i]]$title == exp_title){
343 |       flag.proj.exp <- TRUE
344 |       break
345 |     }
346 |   }
347 |   if (flag.proj.exp == FALSE)
348 |     stop("MLJAR cannot find an experiment with such a title. Check and try again.")
349 |   exp_hid <- ge$experiments[[i]]$hid
350 |   exp <- get_experiment(exp_hid)
351 |   if (exp$experiment$compute_now != 2)
352 |     stop("Experiment still in progress. Wait till its done!")
353 |   curr_results <- get_results(prj_hid, exp_hid)
354 |   column.names <- c("hid", "model_type", "metric_value",
355 |                     "metric_type", "validation_scheme")
356 |   filter_curr_res <- curr_results$results[unlist(lapply(curr_results$results,
357 |                                           function(x) x$experiment==exp_title))]
358 |   tmp_sa <- sapply(filter_curr_res,
359 |              function(x) c(x$hid, x$model_type, x$metric_value,
360 |                            x$metric_type, x$validation_scheme),
361 |              simplify = FALSE, USE.NAMES = TRUE)
362 |   df_res <- t(as.data.frame(tmp_sa,
363 |                             row.names = column.names,
364 |                             col.names = 1:length(tmp_sa)))
365 |   df_res <- data.frame(df_res, row.names = NULL)
366 |   return(df_res)
367 | }
368 | 
369 | 


--------------------------------------------------------------------------------
/R/params.R:
--------------------------------------------------------------------------------
 1 | # MLJAR Constants
 2 | #################
 3 | 
 4 | MLAR_API_PATH <- "https://mljar.com/api/"
 5 | API_VERSION   <- "v1"
 6 | 
 7 | MLJAR_TASKS <- list( bin_class = 'Binary Classification',
 8 |                      regression = 'Regression'
 9 |                      )
10 | 
11 | MLJAR_METRICS  <- list(auc = 'Area Under Curve',
12 |                        logloss = 'Logarithmic Loss',
13 |                        rmse = 'Root Mean Square Error',
14 |                        mse = 'Mean Square Error',
15 |                        mae = 'Mean Absolute Error')
16 | 
17 | MLJAR_DEFAULT_FOLDS = 5
18 | MLJAR_DEFAULT_SHUFFLE = TRUE
19 | MLJAR_DEFAULT_STRATIFY = TRUE
20 | MLJAR_DEFAULT_TRAIN_SPLIT = NULL
21 | 
22 | MLJAR_BIN_CLASS  <- list(xgb = 'Extreme Gradient Boosting',
23 |                          lgb = 'LightGBM',
24 |                          rfc = 'Random Forest',
25 |                          rgfc = 'Regularized Greedy Forest',
26 |                          etc = 'Extra Trees',
27 |                          knnc = 'k-Nearest Neighbor',
28 |                          logreg = 'Logistic Regression',
29 |                          mlp = 'Neural Network'
30 |                          )
31 | 
32 | MLJAR_REGRESSION   <- list(xgbr = 'Extreme Gradient Boosting',
33 |                            lgbr = 'LightGBM',
34 |                            rfr = 'Random Forest',
35 |                            rgfr = 'Regularized Greedy Forest',
36 |                            etr = 'Extra Trees'
37 |                            )
38 | 
39 | MLJAR_TUNING_MODES <- list(Normal = list(random_start_cnt = 5, hill_climbing_cnt = 1),
40 |                            Sport = list(random_start_cnt = 10, hill_climbing_cnt = 2),
41 |                            Insane = list(random_start_cnt = 15, hill_climbing_cnt = 3)
42 |                            )
43 | 
44 | # MLJAR Defaults
45 | #################
46 | 
47 | MLJAR_DEFAULT_METRICS  <- list(bin_class = "logloss",
48 |                                regression = "rmse")
49 | 
50 | MLJAR_DEFAULT_ALGORITHMS <- list( bin_class = c("xgb", "lgb"),
51 |                                   regression = c("xgbr", "lgbr")
52 |                                 )
53 | 
54 | MLJAR_DEFAULT_ENSEMBLE        = TRUE
55 | MLJAR_DEFAULT_TUNING_MODE     = 'Normal'
56 | MLJAR_DEFAULT_TIME_CONSTRAINT = '5' # minutes
57 | 
58 | MLJAR_OPT_MAXIMIZE = c('auc')
59 | 


--------------------------------------------------------------------------------
/R/prediction.R:
--------------------------------------------------------------------------------
 1 | #' Gets MLJAR predictions
 2 | #'
 3 | #' @param project_hid character with project identifier
 4 | #' @param dataset_hid character with dataset identifier
 5 | #' @param result_hid character with result identifier
 6 | #'
 7 | #' @return structure with parsed prediction and http response
 8 | #' @export
 9 | get_prediction <- function(project_hid, dataset_hid, result_hid){
10 |   api_url_prediction <- paste(MLAR_API_PATH, API_VERSION, "/predictions",
11 |                                "?project_id=", project_hid, "&dataset_id=",
12 |                                dataset_hid, "&result_id=", result_hid, sep="")
13 |   rp <- .get_json_from_get_query(api_url_prediction)
14 |   resp <- rp$resp
15 |   parsed <- rp$parsed
16 | 
17 |   structure(
18 |     list(
19 |       prediction = parsed,
20 |       response = resp
21 |     ),
22 |     class = "get_prediction"
23 |   )
24 | }
25 | 
26 | print.get_prediction <- function(x, ...) {
27 |   cat("<MLJAR prediction >\n", sep = "")
28 |   str(x$prediction)
29 |   invisible(x)
30 | }
31 | 


--------------------------------------------------------------------------------
/R/prediction_download.R:
--------------------------------------------------------------------------------
 1 | #' Function to get predictions from MLJAR.
 2 | #'
 3 | #' @param prediction_hid prediction identifier
 4 | #'
 5 | #' @return data.frame with prediction
 6 | #'
 7 | #' @importFrom httr POST
 8 | #'
 9 | #' @export
10 | prediction_download <- function(prediction_hid){
11 |   token <- .get_token()
12 |   api_url_preddown <- paste(MLAR_API_PATH, API_VERSION, "/download/prediction/" , sep="")
13 |   data <- list( prediction_id =  prediction_hid)
14 |   resp <- POST(api_url_preddown, add_headers(Authorization = paste("Token", token)),
15 |                body = data, encode = "form")
16 |   .check_response_status(resp, 200, "Error in prediction download!")
17 |   tmpfilepath <- paste0(tempfile(),".csv")
18 |   file.create(tmpfilepath)
19 |   write(content(resp, encoding = "UTF-8"), file = tmpfilepath)
20 |   prediction <- read.csv(tmpfilepath)
21 |   file.remove(tmpfilepath)
22 |   return(prediction)
23 | }
24 | 


--------------------------------------------------------------------------------
/R/predictjob.R:
--------------------------------------------------------------------------------
 1 | #' Submits dataset for MLJAR prediction
 2 | #'
 3 | #' @param project_hid character with project identifier
 4 | #' @param dataset_hid character with dataset identifier
 5 | #' @param result_hid character with result identifier
 6 | #'
 7 | #' @importFrom httr POST
 8 | #' @importFrom jsonlite toJSON
 9 | #' @export
10 | submit_predict_job <- function(project_hid, dataset_hid, result_hid){
11 |   token <- .get_token()
12 |   data <- list(predict_params = jsonlite::toJSON(list(project_id =  project_hid,
13 |                                             project_hardware = 'cloud',
14 |                                             algorithms_ids = list(result_hid),
15 |                                             dataset_id = dataset_hid,
16 |                                             cv_models = 1),
17 |                                        auto_unbox =TRUE)
18 |               )
19 |   query <- paste(MLAR_API_PATH, API_VERSION, "/predict/" , sep="")
20 |   resp <- POST(query, add_headers(Authorization = paste("Token", token)),
21 |                body = data, encode = "form")
22 |   .check_response_status(resp, 200, "Predict MLJAR job failed")
23 | }
24 | 


--------------------------------------------------------------------------------
/R/projects.R:
--------------------------------------------------------------------------------
  1 | #' Get projects
  2 | #'
  3 | #' Gets list of available projects
  4 | #'
  5 | #' @return structure with parsed projects and http response
  6 | #' @export
  7 | get_projects <- function() {
  8 |   api_url_projects <- paste(MLAR_API_PATH, API_VERSION, "/projects" , sep="")
  9 |   rp <- .get_json_from_get_query(api_url_projects)
 10 |   resp <- rp$resp
 11 |   parsed <- rp$parsed
 12 |   structure(
 13 |     list(
 14 |       projects = parsed,
 15 |       response = resp
 16 |     ),
 17 |     class = "get_projects"
 18 |   )
 19 | }
 20 | 
 21 | print.get_projects <- function(x, ...) {
 22 |   cat("<MLJAR projects >\n", sep = "")
 23 |   str(x$projects)
 24 |   invisible(x)
 25 | }
 26 | 
 27 | #' Print all projects
 28 | #'
 29 | #' Gives data.frame with basic information about existing projects
 30 | #'
 31 | #' @return data.frame with projects
 32 | #' @export
 33 | print_all_projects <- function() {
 34 |   columns = c("hid", "title", "task", "description")
 35 |   projects <- get_projects()
 36 |   if (length(projects$projects) == 0) return(data.frame())
 37 |   tmp_sa <- sapply(projects$projects,
 38 |                    function(x) c(x$hid, x$title, x$task,
 39 |                                  ifelse(!is.null(x$description), x$description, "")),
 40 |                    simplify = FALSE, USE.NAMES = TRUE)
 41 |   df_proj <- t(as.data.frame(tmp_sa,
 42 |                             row.names = columns,
 43 |                             col.names = 1:length(tmp_sa)))
 44 |   df_proj <- data.frame(df_proj, row.names = NULL)
 45 |   return(df_proj)
 46 | }
 47 | 
 48 | #' Get project
 49 | #'
 50 | #' Get data from a project of specified hid
 51 | #'
 52 | #' @param hid character with project unique identifier
 53 | #'
 54 | #' @return structure with parsed project and http response
 55 | #' @export
 56 | get_project <- function(hid) {
 57 |   api_url_project_hid <- paste(MLAR_API_PATH, API_VERSION, "/projects/", hid, sep="")
 58 |   rp <- .get_json_from_get_query(api_url_project_hid)
 59 |   resp <- rp$resp
 60 |   parsed <- rp$parsed
 61 | 
 62 |   structure(
 63 |     list(
 64 |       project = parsed,
 65 |       response = resp
 66 |     ),
 67 |     class = "get_project"
 68 |   )
 69 | }
 70 | 
 71 | print.get_project <- function(x, ...) {
 72 |   cat("<MLJAR project >\n", sep = "")
 73 |   str(x$project)
 74 |   invisible(x)
 75 | }
 76 | 
 77 | #' Creates a new project
 78 | #'
 79 | #' @param title character with project title
 80 | #' @param task character with project task
 81 | #' @param description optional description
 82 | #'
 83 | #' @return project details structure
 84 | #' @export
 85 | create_project <-function(title, task, description=""){
 86 |   .verify_if_project_exists(title, task)
 87 |   token <- .get_token()
 88 |   api_url_projects <- paste(MLAR_API_PATH, API_VERSION, "/projects" , sep="")
 89 |   data <- list(title = title,
 90 |                hardware = 'cloud',
 91 |                scope = 'private',
 92 |                task = task,
 93 |                compute_now = 0,
 94 |                description = description)
 95 |   resp <- POST(api_url_projects, add_headers(Authorization = paste("Token", token)),
 96 |                body = data, encode = "form")
 97 |   .check_response_status(resp, 201)
 98 |   if (status_code(resp)==201){
 99 |     print(sprintf("Project <%s> succesfully created!", title))
100 |   }
101 |   project_details <- jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8"), simplifyVector = FALSE)
102 |   return(project_details)
103 | }
104 | 
105 | #' Delete project
106 | #'
107 | #' @param hid charceter with project identifier
108 | #'
109 | #' @export
110 | #' @importFrom httr DELETE status_code
111 | delete_project <-function(hid){
112 |   token <- .get_token()
113 |   api_url_project_hid <- paste(MLAR_API_PATH, API_VERSION, "/projects/", hid, sep="")
114 |   resp <- DELETE(api_url_project_hid, add_headers(Authorization = paste("Token", token)))
115 |   if (status_code(resp)==204 || status_code(resp)==200){
116 |     print(sprintf("Project <%s> succesfully deleted!", hid))
117 |   }
118 | }
119 | 
120 | # Helper project functions
121 | 
122 | #' Verify if project exists
123 | #'
124 | #' Checks if there is no project with the same name and task.
125 | #'
126 | #' @param projtitle character with project title
127 | #' @param task characeter with project task
128 | #'
129 | #' @return TRUE if okay, stops if such a project exists.
130 | .verify_if_project_exists <- function(projtitle, task){
131 |   gp <- get_projects()
132 |   for (proj in gp$projects){
133 |     if (proj$title==projtitle && proj$task==task){
134 |       stop("Project with the same title and task already exists, change name.")
135 |     }
136 |   }
137 |   return(TRUE)
138 | }
139 | 
140 | #' Checks if project exists
141 | #'
142 | #' It bases only on title and returns project's hid if it exists.
143 | #'
144 | #' @param project_title character with project title
145 | #'
146 | #' @return character of project with its identifier or NULL
147 | .check_if_project_exists <- function(project_title) {
148 |   projects <- get_projects()
149 |   proj_hid <- NULL
150 |   if (length(projects$projects) == 0) return(NULL)
151 |   for(i in 1:length(projects$projects)) {
152 |     if (projects$projects[[i]]$title == project_title){
153 |       proj_hid <- projects$projects[[i]]$hid
154 |       break
155 |     }
156 |   }
157 |   return(proj_hid)
158 | }
159 | 


--------------------------------------------------------------------------------
/R/result.R:
--------------------------------------------------------------------------------
 1 | #' Get results of MLJAR training
 2 | #'
 3 | #' @param project_hid character with project identifier
 4 | #' @param experiment_hid character with experiment identifier
 5 | #'
 6 | #' @return structure with parsed results and http response
 7 | #'
 8 | #' @importFrom httr POST
 9 | #'
10 | #' @export
11 | get_results <- function(project_hid, experiment_hid){
12 |   token <- .get_token()
13 |   api_url_results <- paste(MLAR_API_PATH, API_VERSION, "/results/" , sep="")
14 |   datares <- list( project_id =  project_hid,
15 |                    experiment_id =  experiment_hid)
16 |   resp <- POST(api_url_results, add_headers(Authorization = paste("Token", token)),
17 |                body = datares, encode = "form")
18 |   .check_response_status(resp, 200)
19 |   parsed <- jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8"), simplifyVector = FALSE)
20 |   structure(
21 |     list(
22 |       results = parsed,
23 |       response = resp
24 |     ),
25 |     class = "get_results"
26 |   )
27 | }
28 | 
29 | print.get_results <- function(x, ...) {
30 |   cat("<MLJAR results >\n", sep = "")
31 |   str(x$results)
32 |   invisible(x)
33 | }
34 | 
35 | 
36 | #' Get model
37 | #'
38 | #' Gets model only if experiment finished and project with such
39 | #' a title and having such an experiment exists.
40 | #'
41 | #' @param project_title character with project title
42 | #' @param exp_title character with experiment title
43 | #' @param model_hid character with experiment identifier
44 | #'
45 | #' @return structure with model parameters
46 | #' @export
47 | get_model <- function(project_title, exp_title, model_hid) {
48 |   # Look for project title
49 |   flag.proj.title <- FALSE
50 |   prj_hid <- .check_if_project_exists(project_title)
51 |   if (is.null(prj_hid))
52 |     stop("MLJAR cannot find a project with such a title. Check and try again.")
53 |   # Look for experiment title
54 |   flag.proj.exp <- FALSE
55 |   ge <- get_experiments(prj_hid)
56 |   if (length(ge$experiments) == 0) stop("No experiments found.")
57 |   for(i in 1:length(ge$experiments)) {
58 |     if (ge$experiments[[i]]$title == exp_title){
59 |       flag.proj.exp <- TRUE
60 |       break
61 |     }
62 |   }
63 |   if (flag.proj.exp == FALSE)
64 |     stop("MLJAR cannot find an experiment with such a title. Check and try again.")
65 |   exp_hid <- ge$experiments[[i]]$hid
66 |   exp <- get_experiment(exp_hid)
67 |   if (exp$experiment$compute_now != 2)
68 |     stop("Experiment still in progress. Wait till its done!")
69 |   flag.mod <- FALSE
70 |   curr_results <- get_results(prj_hid, exp_hid)
71 |   for(res in curr_results$results) {
72 |     if (res$hid == model_hid){
73 |       flag.mod <- TRUE
74 |       break
75 |     }
76 |   }
77 |   if (flag.mod == FALSE)
78 |     stop("MLJAR cannot find an experiment with such a title. Check and try again.")
79 |   return(res)
80 | }
81 | 


--------------------------------------------------------------------------------
/R/utils.R:
--------------------------------------------------------------------------------
  1 | # MLJAR Helper Functions
  2 | 
  3 | #' Get json from post query
  4 | #'
  5 | #' Returns api response and parsed output from POST query given data
  6 | #'
  7 | #' @param query character with http query
  8 | #' @param data list with body data
  9 | #'
 10 | #' @importFrom httr POST add_headers content
 11 | #' @importFrom jsonlite fromJSON
 12 | #' @return list with response and parsed response from json
 13 | .get_json_from_post_query <- function(query, data){
 14 |   token <- .get_token()
 15 |   resp <- POST(query, add_headers(Authorization = paste("Token", token)),
 16 |                body = data, encode = "form")
 17 |   parsed <- jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8"),
 18 |                                simplifyVector = FALSE)
 19 |   .check_response_status(resp, 200)
 20 |   return(list(resp=resp, parsed=parsed))
 21 | }
 22 | 
 23 | #' Get json from get query
 24 | #'
 25 | #' Returns api response and parsed output
 26 | #'
 27 | #' @param query character with http query
 28 | #'
 29 | #' @importFrom httr GET add_headers content
 30 | #' @importFrom jsonlite fromJSON
 31 | #' @return list with response and parsed response from json
 32 | .get_json_from_get_query <- function(query){
 33 |   token <- .get_token()
 34 |   resp <- GET(query, add_headers(Authorization = paste("Token", token)))
 35 |   parsed <- jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8"),
 36 |                                simplifyVector = FALSE)
 37 |   .check_response_status(resp, 200)
 38 |   return(list(resp=resp, parsed=parsed))
 39 | }
 40 | 
 41 | #' Get token
 42 | #'
 43 | #' Gets token from MLJAR_TOKEN env variable.
 44 | #'
 45 | #' @return returns token defined in enviromental variable MLJAR_TOKEN
 46 | #' @examples
 47 | #' .get_token()
 48 | .get_token <- function(){
 49 |   token <- Sys.getenv("MLJAR_TOKEN")
 50 |   if (identical(token, "")) {
 51 |     stop("Specify MLJAR_TOKEN env variable", call. = FALSE)
 52 |   }
 53 |   return(token)
 54 | }
 55 | 
 56 | #' Check response status
 57 | #'
 58 | #' Verifies if response status is correct.
 59 | #' If not it stops execution with message.
 60 | #'
 61 | #' @param resp httr response
 62 | #' @param expected_code numeric with expected code e.g. 201
 63 | #' @param error_message character with error message
 64 | #'
 65 | #' @importFrom httr status_code
 66 | .check_response_status <- function(resp, expected_code,
 67 |                                    error_message="MLJAR API request failed"){
 68 |   # compares response status with expeced_code and returns error_message if not equal
 69 |   if (status_code(resp) != expected_code) {
 70 |     stop(
 71 |       sprintf(
 72 |         paste(error_message, "[%s]\n"),
 73 |         status_code(resp)
 74 |       ),
 75 |       call. = FALSE
 76 |     )
 77 |   }
 78 | }
 79 | 
 80 | #' Checks if data is in good format.
 81 | #'
 82 | #' If not it stops execution.
 83 | #'
 84 | #' @param x preferably a matrix or data frame.
 85 | #' If not, it is attempted to coerce x to a data frame.
 86 | #' @param y preferably a matrix or data frame.
 87 | #' If not, it is attempted to coerce x to a data frame.
 88 | .data_check <- function(x, y){
 89 |   x <- as.data.frame(x)
 90 |   y <- as.data.frame(y)
 91 |   if (is.null(x) || is.null(y)){
 92 |     stop("NULL data")
 93 |   }
 94 |   if(length(dim(y))>1 && dim(y)[2]>1){
 95 |     stop("Sorry, multiple outputs are not supported in MLJAR")
 96 |   }
 97 |   if(dim(y)[1]!=dim(x)[1]){
 98 |     stop("Sorry, there is a missmatch between X and y matrices shapes")
 99 |   }
100 | }
101 | 
102 | #' Stores data in temporary CSV file
103 | #'
104 | #' @param x preferably a matrix or data frame.
105 | #' If not, it is attempted to coerce x to a data frame.
106 | #' @param y preferably a matrix or data frame.
107 | #' If not, it is attempted to coerce x to a data frame.
108 | #'
109 | #' @return tmpfilepath character with path to temporary file
110 | #'
111 | #' @example
112 | #' .data_to_file(c(1,2))
113 | .data_to_file <- function(x, y=NULL){
114 |   if (!is.null(y)){
115 |     # first we check if data is valid
116 |     .data_check(x, y)
117 |     # now it's time to convert to data frame
118 |     dataxy <- as.data.frame(x)
119 |     dataxy["target"] <- y
120 |   } else {
121 |     if (is.null(x)) stop("NULL data")
122 |     dataxy <- as.data.frame(x)
123 |   }
124 |   # temporary csv file is created
125 |   tmpfilepath <- paste0(tempfile(),".csv")
126 |   file.create(tmpfilepath)
127 |   write.csv(dataxy, file = tmpfilepath, row.names = F)
128 |   return(tmpfilepath)
129 | }
130 | 
131 | #' Obtain task
132 | #'
133 | #' Determines what kind of task is that basing on y.
134 | #' @param y target vector/data.frame
135 | #'
136 | #' @return "reg" or "bin_class" depending on kind of task
137 | #' @examples
138 | #' .obtain_task(c(1, 0, 0, 1))
139 | .obtain_task <- function(y){
140 |   return(ifelse(nrow(as.data.frame(unique(y))) > 2, "reg", "bin_class"))
141 | }
142 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | [![Build Status](https://travis-ci.org/mljar/mljar-api-R.svg?branch=master)](https://travis-ci.org/mljar/mljar-api-R)
 2 | [![codecov](https://codecov.io/gh/mljar/mljar-api-R/branch/master/graph/badge.svg)](https://codecov.io/gh/mljar/mljar-api-R)
 3 | 
 4 | # mljar-api-R
 5 | 
 6 | A simple R wrapper for **mljar.com** API. It allows MLJAR users to create Machine Learning models with few lines of code:
 7 | 
 8 | ```R
 9 | library(mljar)
10 | 
11 | model <- mljar_fit(x.training, y.training, validx=x.validation, validy=y.validation,
12 |                 proj_title="Project title", exp_title="experiment title",
13 |                 algorithms = c("logreg"), metric = "logloss")
14 | 
15 | predicted_values <- mljar_predict(model, x.to.predict, "Project title")
16 | ```
17 | 
18 | That's all folks! Yeah, I know, this makes Machine Learning super easy! You can use this code for following Machine Learning tasks:
19 |  * Binary classification (your target has only two unique values)
20 |  * Regression (your target value is continuous)
21 |  * More is coming soon!
22 | 
23 | ## How to install
24 | 
25 | You can install mljar directly from **CRAN**:
26 | 
27 |     install.packages("mljar")
28 | 
29 | Alternatively, you can install the latest development version from GitHub using `devtools`:
30 | 
31 |     devtools::install_github("mljar/mljar-api-R")
32 | 
33 | ## How to use it
34 | 
35 |  1. Create an account at mljar.com and login.
36 |  2. Please go to your users settings (top, right corner).
37 |  3. Get your token, for example 'exampleexampleexample'.
38 |  4. Set environment variable `MLJAR_TOKEN` with your token value in shell:
39 | ```
40 | export MLJAR_TOKEN=exampleexampleexample
41 | ```
42 | or directly in RStudio:
43 | ```
44 | Sys.setenv(MLJAR_TOKEN="examplexampleexample")
45 | ```
46 | 
47 |  5. That's all, you are ready to use MLJAR in your R code!
48 | 
49 | ## What's going on?
50 | 
51 |  * This wrapper allows you to search through different Machine Learning algorithms and tune each of the algorithm.
52 |  * By searching and tuning ML algorithm to your data you will get very accurate model.
53 |  * By calling function `mljar_fit` you create new project and start experiment with models training.
54 |  All your results will be accessible from your mljar.com account - this makes Machine Learning super easy and
55 |  keeps all your models and results in beautiful order. So, you will never miss anything.
56 |  * All computations are done in MLJAR Cloud, they are executed in parallel. So after calling `mljar_fit` method you can switch
57 |  your computer off and MLJAR will do the job for you!
58 |  * I think this is really amazing! What do you think? Please let us know at `contact@mljar.com`.
59 | 
60 | ## Examples
61 | 
62 | Soon
63 | 
64 | ## Testing
65 | 
66 | To run tests use simple command in your R session:
67 | 
68 | ```R
69 | devtools::test()
70 | ```
71 | 


--------------------------------------------------------------------------------
/man/add_dataset_if_not_exists.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataset.R
 3 | \name{add_dataset_if_not_exists}
 4 | \alias{add_dataset_if_not_exists}
 5 | \title{Add dataset if not exists}
 6 | \usage{
 7 | add_dataset_if_not_exists(project_hid, filename, title,
 8 |   prediction_only = FALSE)
 9 | }
10 | \arguments{
11 | \item{project_hid}{character with project identifier}
12 | 
13 | \item{filename}{character with filename containing data}
14 | 
15 | \item{title}{title of dataset}
16 | 
17 | \item{prediction_only}{boolean determining if data is used only for prediction}
18 | }
19 | \value{
20 | parsed dataset details
21 | }
22 | \description{
23 | Checks parameters before adding new dataset and verifies
24 | if it doesn't exists already.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/add_experiment_if_not_exists.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/experiment.R
 3 | \name{add_experiment_if_not_exists}
 4 | \alias{add_experiment_if_not_exists}
 5 | \title{Add experiment if not exists}
 6 | \usage{
 7 | add_experiment_if_not_exists(project_hid, train_dataset, valid_dataset,
 8 |   experiment_title, project_task, validation_kfolds, validation_shuffle,
 9 |   validation_stratify, validation_train_split, algorithms, metric, tuning_mode,
10 |   time_constraint, create_ensemble)
11 | }
12 | \arguments{
13 | \item{project_hid}{character with project identifier}
14 | 
15 | \item{train_dataset}{character with path to training dataset}
16 | 
17 | \item{valid_dataset}{character with path to validation dataset}
18 | 
19 | \item{experiment_title}{character with experiment title}
20 | 
21 | \item{project_task}{character with project task}
22 | 
23 | \item{validation_kfolds}{number of folds to be used in validation}
24 | 
25 | \item{validation_shuffle}{boolean which specify if shuffle samples before training}
26 | 
27 | \item{validation_stratify}{boolean which decides whether samples will be
28 | divided into folds with the same class distribution}
29 | 
30 | \item{validation_train_split}{ratio how to split training dataset into train and validation}
31 | 
32 | \item{algorithms}{list of algorithms to use}
33 | 
34 | \item{metric}{charcater with metric}
35 | 
36 | \item{tuning_mode}{tuning mode}
37 | 
38 | \item{time_constraint}{numeric with time limit to calculate algorithm}
39 | 
40 | \item{create_ensemble}{whether or not to create ensemble}
41 | }
42 | \value{
43 | experiment details structure
44 | }
45 | \description{
46 | Check if experiment exists, verifies parameters, creates data
47 | to create_experiment function and finally starts creation of
48 | MLJAR experiment.
49 | }
50 | 


--------------------------------------------------------------------------------
/man/add_new_dataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataset.R
 3 | \name{add_new_dataset}
 4 | \alias{add_new_dataset}
 5 | \title{Adds new dataset}
 6 | \usage{
 7 | add_new_dataset(project_hid, filename, title, prediction_only = FALSE)
 8 | }
 9 | \arguments{
10 | \item{project_hid}{character with project identifier}
11 | 
12 | \item{filename}{character with filename containing data}
13 | 
14 | \item{title}{title of dataset}
15 | 
16 | \item{prediction_only}{boolean determining if data is used only for prediction}
17 | }
18 | \value{
19 | parsed by toJSON dataset details
20 | }
21 | \description{
22 | Adds new dataset
23 | }
24 | 


--------------------------------------------------------------------------------
/man/create_experiment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/experiment.R
 3 | \name{create_experiment}
 4 | \alias{create_experiment}
 5 | \title{Creates experiment from given parameters}
 6 | \usage{
 7 | create_experiment(data)
 8 | }
 9 | \arguments{
10 | \item{data}{list of experiment parameters}
11 | }
12 | \value{
13 | experiment details parsed by fromJSON
14 | }
15 | \description{
16 | Creates experiment from given parameters
17 | }
18 | 


--------------------------------------------------------------------------------
/man/create_project.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/projects.R
 3 | \name{create_project}
 4 | \alias{create_project}
 5 | \title{Creates a new project}
 6 | \usage{
 7 | create_project(title, task, description = "")
 8 | }
 9 | \arguments{
10 | \item{title}{character with project title}
11 | 
12 | \item{task}{character with project task}
13 | 
14 | \item{description}{optional description}
15 | }
16 | \value{
17 | project details structure
18 | }
19 | \description{
20 | Creates a new project
21 | }
22 | 


--------------------------------------------------------------------------------
/man/delete_dataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataset.R
 3 | \name{delete_dataset}
 4 | \alias{delete_dataset}
 5 | \title{Deletes dataset}
 6 | \usage{
 7 | delete_dataset(dataset_hid)
 8 | }
 9 | \arguments{
10 | \item{dataset_hid}{character with dataset identifier}
11 | }
12 | \description{
13 | Deletes dataset
14 | }
15 | 


--------------------------------------------------------------------------------
/man/delete_project.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/projects.R
 3 | \name{delete_project}
 4 | \alias{delete_project}
 5 | \title{Delete project}
 6 | \usage{
 7 | delete_project(hid)
 8 | }
 9 | \arguments{
10 | \item{hid}{charceter with project identifier}
11 | }
12 | \description{
13 | Delete project
14 | }
15 | 


--------------------------------------------------------------------------------
/man/get_all_models.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/main.R
 3 | \name{get_all_models}
 4 | \alias{get_all_models}
 5 | \title{Gives data.frame with basic data of all models}
 6 | \usage{
 7 | get_all_models(project_title, exp_title)
 8 | }
 9 | \arguments{
10 | \item{project_title}{character with project title}
11 | 
12 | \item{exp_title}{character with experiment title}
13 | }
14 | \value{
15 | data.frame with model's "hid", "model_type", "metric_value",
16 | "metric_type"
17 | }
18 | \description{
19 | You can later get some specific model by calling
20 | e.g. \code{mod <- get_model(project_title, experiment_title, model_hid)}.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/get_dataset.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataset.R
 3 | \name{get_dataset}
 4 | \alias{get_dataset}
 5 | \title{Gets dataset}
 6 | \usage{
 7 | get_dataset(dataset_hid)
 8 | }
 9 | \arguments{
10 | \item{dataset_hid}{character with dataset identifier}
11 | }
12 | \value{
13 | structure with parsed dataset and response
14 | }
15 | \description{
16 | Gets dataset
17 | }
18 | 


--------------------------------------------------------------------------------
/man/get_datasets.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataset.R
 3 | \name{get_datasets}
 4 | \alias{get_datasets}
 5 | \title{Gets list of available datasets}
 6 | \usage{
 7 | get_datasets(project_hid)
 8 | }
 9 | \arguments{
10 | \item{project_hid}{character with project identifier}
11 | }
12 | \value{
13 | structure with parsed datasets and response
14 | }
15 | \description{
16 | Gets list of available datasets
17 | }
18 | 


--------------------------------------------------------------------------------
/man/get_experiment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/experiment.R
 3 | \name{get_experiment}
 4 | \alias{get_experiment}
 5 | \title{Gets experiment details}
 6 | \usage{
 7 | get_experiment(experiment_hid)
 8 | }
 9 | \arguments{
10 | \item{experiment_hid}{character with experiment identifier}
11 | }
12 | \value{
13 | structure with parsed experiment and http response
14 | }
15 | \description{
16 | Gets experiment details
17 | }
18 | 


--------------------------------------------------------------------------------
/man/get_experiments.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/experiment.R
 3 | \name{get_experiments}
 4 | \alias{get_experiments}
 5 | \title{Gets list of available experiments for given project}
 6 | \usage{
 7 | get_experiments(project_hid)
 8 | }
 9 | \arguments{
10 | \item{project_hid}{character with project identifier}
11 | }
12 | \value{
13 | structure with parsed experiments and http response
14 | }
15 | \description{
16 | Gets list of available experiments for given project
17 | }
18 | 


--------------------------------------------------------------------------------
/man/get_model.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/result.R
 3 | \name{get_model}
 4 | \alias{get_model}
 5 | \title{Get model}
 6 | \usage{
 7 | get_model(project_title, exp_title, model_hid)
 8 | }
 9 | \arguments{
10 | \item{project_title}{character with project title}
11 | 
12 | \item{exp_title}{character with experiment title}
13 | 
14 | \item{model_hid}{character with experiment identifier}
15 | }
16 | \value{
17 | structure with model parameters
18 | }
19 | \description{
20 | Gets model only if experiment finished and project with such
21 | a title and having such an experiment exists.
22 | }
23 | 


--------------------------------------------------------------------------------
/man/get_prediction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/prediction.R
 3 | \name{get_prediction}
 4 | \alias{get_prediction}
 5 | \title{Gets MLJAR predictions}
 6 | \usage{
 7 | get_prediction(project_hid, dataset_hid, result_hid)
 8 | }
 9 | \arguments{
10 | \item{project_hid}{character with project identifier}
11 | 
12 | \item{dataset_hid}{character with dataset identifier}
13 | 
14 | \item{result_hid}{character with result identifier}
15 | }
16 | \value{
17 | structure with parsed prediction and http response
18 | }
19 | \description{
20 | Gets MLJAR predictions
21 | }
22 | 


--------------------------------------------------------------------------------
/man/get_project.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/projects.R
 3 | \name{get_project}
 4 | \alias{get_project}
 5 | \title{Get project}
 6 | \usage{
 7 | get_project(hid)
 8 | }
 9 | \arguments{
10 | \item{hid}{character with project unique identifier}
11 | }
12 | \value{
13 | structure with parsed project and http response
14 | }
15 | \description{
16 | Get data from a project of specified hid
17 | }
18 | 


--------------------------------------------------------------------------------
/man/get_projects.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/projects.R
 3 | \name{get_projects}
 4 | \alias{get_projects}
 5 | \title{Get projects}
 6 | \usage{
 7 | get_projects()
 8 | }
 9 | \value{
10 | structure with parsed projects and http response
11 | }
12 | \description{
13 | Gets list of available projects
14 | }
15 | 


--------------------------------------------------------------------------------
/man/get_results.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/result.R
 3 | \name{get_results}
 4 | \alias{get_results}
 5 | \title{Get results of MLJAR training}
 6 | \usage{
 7 | get_results(project_hid, experiment_hid)
 8 | }
 9 | \arguments{
10 | \item{project_hid}{character with project identifier}
11 | 
12 | \item{experiment_hid}{character with experiment identifier}
13 | }
14 | \value{
15 | structure with parsed results and http response
16 | }
17 | \description{
18 | Get results of MLJAR training
19 | }
20 | 


--------------------------------------------------------------------------------
/man/mljar_fit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/main.R
 3 | \name{mljar_fit}
 4 | \alias{mljar_fit}
 5 | \title{MLJAR FIT}
 6 | \usage{
 7 | mljar_fit(x, y, validx = NULL, validy = NULL, proj_title = NULL,
 8 |   exp_title = NULL, dataset_title = NULL, val_dataset_title = NULL,
 9 |   algorithms = c(), metric = "", wait_till_all_done = TRUE,
10 |   validation_kfolds = MLJAR_DEFAULT_FOLDS,
11 |   validation_shuffle = MLJAR_DEFAULT_SHUFFLE,
12 |   validation_stratify = MLJAR_DEFAULT_STRATIFY,
13 |   validation_train_split = MLJAR_DEFAULT_TRAIN_SPLIT,
14 |   tuning_mode = MLJAR_DEFAULT_TUNING_MODE,
15 |   create_ensemble = MLJAR_DEFAULT_ENSEMBLE,
16 |   single_algorithm_time_limit = MLJAR_DEFAULT_TIME_CONSTRAINT)
17 | }
18 | \arguments{
19 | \item{x}{data.frame/matrix with training data}
20 | 
21 | \item{y}{data.frame/matrix with training labels}
22 | 
23 | \item{validx}{data.frame/matrix with validation data}
24 | 
25 | \item{validy}{data.frame/matrix with validation labels}
26 | 
27 | \item{proj_title}{charcater with project title}
28 | 
29 | \item{exp_title}{charcater with experiment title}
30 | 
31 | \item{dataset_title}{charcater with dataset name}
32 | 
33 | \item{val_dataset_title}{charcater with validation dataset name}
34 | 
35 | \item{algorithms}{list of algorithms to use
36 | For binary classification task available algorithm are:
37 | "xgb" which is for Xgboost,
38 | "lgb" which is for LightGBM
39 | "mlp" which is for Neural Network,
40 | "rfc" which is for Random Forest,
41 | "etc" which is for Extra Trees,
42 | "rgfc" which is for Regularized Greedy Forest,
43 | "knnc" which is for k-Nearest Neighbors,
44 | "logreg" which is for Logistic Regression.
45 | For regression task there are available algorithms:
46 | "xgbr" which is for Xgboost,
47 | "lgbr" which is for LightGBM,
48 | "rgfr" which is for Regularized Greedy Forest,
49 | "rfr" which is for Random Forest,
50 | "etr" which is for Extra Trees.}
51 | 
52 | \item{metric}{charcater with metric
53 | For binary classification there are metrics:
54 | "auc" which is for Area Under ROC Curve,
55 | "logloss" which is for Logarithmic Loss.
56 | For regression tasks:
57 | "rmse" which is Root Mean Square Error,
58 | "mse" which is for Mean Square Error,
59 | "mase" which is for Mean Absolute Error.}
60 | 
61 | \item{wait_till_all_done}{boolean saying whether function should wait
62 | till all models are done}
63 | 
64 | \item{validation_kfolds}{number of folds to be used in validation}
65 | 
66 | \item{validation_shuffle}{boolean which specify if shuffle samples before training}
67 | 
68 | \item{validation_stratify}{boolean which decides whether samples will be
69 | divided into folds with the same class distribution}
70 | 
71 | \item{validation_train_split}{ratio how to split training dataset into train and validation}
72 | 
73 | \item{tuning_mode}{tuning mode}
74 | 
75 | \item{create_ensemble}{whether or not to create ensemble}
76 | 
77 | \item{single_algorithm_time_limit}{numeric with time limit to calculate algorithm}
78 | }
79 | \value{
80 | structure with the best model
81 | }
82 | \description{
83 | Verifies parameters and data and tries to run experiment.
84 | }
85 | 


--------------------------------------------------------------------------------
/man/mljar_predict.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/main.R
 3 | \name{mljar_predict}
 4 | \alias{mljar_predict}
 5 | \title{MLJAR PREDICT}
 6 | \usage{
 7 | mljar_predict(model, x_pred, project_title)
 8 | }
 9 | \arguments{
10 | \item{model}{model or MLJAR result structure}
11 | 
12 | \item{x_pred}{data.frame/matrix data to predict}
13 | 
14 | \item{project_title}{character with project title}
15 | }
16 | \value{
17 | data.frame with preditction
18 | }
19 | \description{
20 | Makes prediction basing on trained model.
21 | }
22 | 


--------------------------------------------------------------------------------
/man/prediction_download.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/prediction_download.R
 3 | \name{prediction_download}
 4 | \alias{prediction_download}
 5 | \title{Function to get predictions from MLJAR.}
 6 | \usage{
 7 | prediction_download(prediction_hid)
 8 | }
 9 | \arguments{
10 | \item{prediction_hid}{prediction identifier}
11 | }
12 | \value{
13 | data.frame with prediction
14 | }
15 | \description{
16 | Function to get predictions from MLJAR.
17 | }
18 | 


--------------------------------------------------------------------------------
/man/print_all_projects.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/projects.R
 3 | \name{print_all_projects}
 4 | \alias{print_all_projects}
 5 | \title{Print all projects}
 6 | \usage{
 7 | print_all_projects()
 8 | }
 9 | \value{
10 | data.frame with projects
11 | }
12 | \description{
13 | Gives data.frame with basic information about existing projects
14 | }
15 | 


--------------------------------------------------------------------------------
/man/submit_predict_job.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/predictjob.R
 3 | \name{submit_predict_job}
 4 | \alias{submit_predict_job}
 5 | \title{Submits dataset for MLJAR prediction}
 6 | \usage{
 7 | submit_predict_job(project_hid, dataset_hid, result_hid)
 8 | }
 9 | \arguments{
10 | \item{project_hid}{character with project identifier}
11 | 
12 | \item{dataset_hid}{character with dataset identifier}
13 | 
14 | \item{result_hid}{character with result identifier}
15 | }
16 | \description{
17 | Submits dataset for MLJAR prediction
18 | }
19 | 


--------------------------------------------------------------------------------
/man/upload_file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dataupload.R
 3 | \name{upload_file}
 4 | \alias{upload_file}
 5 | \title{Uploads file into MLJAR}
 6 | \usage{
 7 | upload_file(project_hid, filepath)
 8 | }
 9 | \arguments{
10 | \item{project_hid}{character with project identifier}
11 | 
12 | \item{filepath}{character with path to file}
13 | }
14 | \value{
15 | character with destination path
16 | }
17 | \description{
18 | It uploads file into MLJAR and returns destination path.
19 | }
20 | 


--------------------------------------------------------------------------------
/mljar.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace
22 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(mljar)
 3 | 
 4 | # token needed by CRAN
 5 | Sys.setenv(MLJAR_TOKEN="10bc57e737c2ca5516bb01ab29549978b53d83a4")
 6 | gp <- get_projects()
 7 | if (length(gp$projects)>0) {
 8 |   for (pr in gp$projects){
 9 |     delete_project(pr$hid)
10 |   }
11 | }
12 | test_check("mljar")
13 | Sys.unsetenv("MLJAR_TOKEN")
14 | 


--------------------------------------------------------------------------------
/tests/testthat/binary_part_iris_converted.csv:
--------------------------------------------------------------------------------
  1 | sepal length,sepal width,petal length,petal width,class
  2 | 5.1,3.5,1.4,0.2,0
  3 | 4.9,3.0,1.4,0.2,0
  4 | 4.7,3.2,1.3,0.2,0
  5 | 4.6,3.1,1.5,0.2,0
  6 | 5.0,3.6,1.4,0.2,0
  7 | 5.4,3.9,1.7,0.4,0
  8 | 4.6,3.4,1.4,0.3,0
  9 | 5.0,3.4,1.5,0.2,0
 10 | 4.4,2.9,1.4,0.2,0
 11 | 4.9,3.1,1.5,0.1,0
 12 | 5.4,3.7,1.5,0.2,0
 13 | 4.8,3.4,1.6,0.2,0
 14 | 4.8,3.0,1.4,0.1,0
 15 | 4.3,3.0,1.1,0.1,0
 16 | 5.8,4.0,1.2,0.2,0
 17 | 5.7,4.4,1.5,0.4,0
 18 | 5.4,3.9,1.3,0.4,0
 19 | 5.1,3.5,1.4,0.3,0
 20 | 5.7,3.8,1.7,0.3,0
 21 | 5.1,3.8,1.5,0.3,0
 22 | 5.4,3.4,1.7,0.2,0
 23 | 5.1,3.7,1.5,0.4,0
 24 | 4.6,3.6,1.0,0.2,0
 25 | 5.1,3.3,1.7,0.5,0
 26 | 4.8,3.4,1.9,0.2,0
 27 | 5.0,3.0,1.6,0.2,0
 28 | 5.0,3.4,1.6,0.4,0
 29 | 5.2,3.5,1.5,0.2,0
 30 | 5.2,3.4,1.4,0.2,0
 31 | 4.7,3.2,1.6,0.2,0
 32 | 4.8,3.1,1.6,0.2,0
 33 | 5.4,3.4,1.5,0.4,0
 34 | 5.2,4.1,1.5,0.1,0
 35 | 5.5,4.2,1.4,0.2,0
 36 | 4.9,3.1,1.5,0.1,0
 37 | 5.0,3.2,1.2,0.2,0
 38 | 5.5,3.5,1.3,0.2,0
 39 | 4.9,3.1,1.5,0.1,0
 40 | 4.4,3.0,1.3,0.2,0
 41 | 5.1,3.4,1.5,0.2,0
 42 | 5.0,3.5,1.3,0.3,0
 43 | 4.5,2.3,1.3,0.3,0
 44 | 4.4,3.2,1.3,0.2,0
 45 | 5.0,3.5,1.6,0.6,0
 46 | 5.1,3.8,1.9,0.4,0
 47 | 4.8,3.0,1.4,0.3,0
 48 | 5.1,3.8,1.6,0.2,0
 49 | 4.6,3.2,1.4,0.2,0
 50 | 5.3,3.7,1.5,0.2,0
 51 | 5.0,3.3,1.4,0.2,0
 52 | 7.0,3.2,4.7,1.4,1
 53 | 6.4,3.2,4.5,1.5,1
 54 | 6.9,3.1,4.9,1.5,1
 55 | 5.5,2.3,4.0,1.3,1
 56 | 6.5,2.8,4.6,1.5,1
 57 | 5.7,2.8,4.5,1.3,1
 58 | 6.3,3.3,4.7,1.6,1
 59 | 4.9,2.4,3.3,1.0,1
 60 | 6.6,2.9,4.6,1.3,1
 61 | 5.2,2.7,3.9,1.4,1
 62 | 5.0,2.0,3.5,1.0,1
 63 | 5.9,3.0,4.2,1.5,1
 64 | 6.0,2.2,4.0,1.0,1
 65 | 6.1,2.9,4.7,1.4,1
 66 | 5.6,2.9,3.6,1.3,1
 67 | 6.7,3.1,4.4,1.4,1
 68 | 5.6,3.0,4.5,1.5,1
 69 | 5.8,2.7,4.1,1.0,1
 70 | 6.2,2.2,4.5,1.5,1
 71 | 5.6,2.5,3.9,1.1,1
 72 | 5.9,3.2,4.8,1.8,1
 73 | 6.1,2.8,4.0,1.3,1
 74 | 6.3,2.5,4.9,1.5,1
 75 | 6.1,2.8,4.7,1.2,1
 76 | 6.4,2.9,4.3,1.3,1
 77 | 6.6,3.0,4.4,1.4,1
 78 | 6.8,2.8,4.8,1.4,1
 79 | 6.7,3.0,5.0,1.7,1
 80 | 6.0,2.9,4.5,1.5,1
 81 | 5.7,2.6,3.5,1.0,1
 82 | 5.5,2.4,3.8,1.1,1
 83 | 5.5,2.4,3.7,1.0,1
 84 | 5.8,2.7,3.9,1.2,1
 85 | 6.0,2.7,5.1,1.6,1
 86 | 5.4,3.0,4.5,1.5,1
 87 | 6.0,3.4,4.5,1.6,1
 88 | 6.7,3.1,4.7,1.5,1
 89 | 6.3,2.3,4.4,1.3,1
 90 | 5.6,3.0,4.1,1.3,1
 91 | 5.5,2.5,4.0,1.3,1
 92 | 5.5,2.6,4.4,1.2,1
 93 | 6.1,3.0,4.6,1.4,1
 94 | 5.8,2.6,4.0,1.2,1
 95 | 5.0,2.3,3.3,1.0,1
 96 | 5.6,2.7,4.2,1.3,1
 97 | 5.7,3.0,4.2,1.2,1
 98 | 5.7,2.9,4.2,1.3,1
 99 | 6.2,2.9,4.3,1.3,1
100 | 5.1,2.5,3.0,1.1,1
101 | 5.7,2.8,4.1,1.3,1
102 | 


--------------------------------------------------------------------------------
/tests/testthat/test_dataset.R:
--------------------------------------------------------------------------------
 1 | library(mljar)
 2 | context("Test API dataset")
 3 | 
 4 | task <- "bin_class"
 5 | pr <- create_project("ds", task, "some description")
 6 | hid <- pr$hid
 7 | 
 8 | 
 9 | test_that("test add_new_dataset", {
10 |   file_from_resources <- "binary_part_iris_converted.csv"
11 |   expect_error(add_new_dataset(hid, file_from_resources, "title"), NA)
12 | })
13 | 
14 | 
15 | test_that("test get_datasets", {
16 |   ds <- get_datasets(hid)
17 |   expect_equal(length(get_datasets(hid)$datasets), 1)
18 | })
19 | 
20 | test_that("test get_dataset", {
21 |   ds_hid <- get_datasets(hid)$datasets[[1]]$hid
22 |   expect_equal(get_dataset(ds_hid)$dataset$hid, ds_hid)
23 | })
24 | 
25 | test_that("test .wait_till_all_datasets_are_valid", {
26 |   expect_true(.wait_till_all_datasets_are_valid(hid))
27 | })
28 | 
29 | test_that("test .accept_dataset_column_usage", {
30 |   ds_hid <- get_datasets(hid)$datasets[[1]]$hid
31 |   expect_true(.accept_dataset_column_usage(ds_hid))
32 | })
33 | 
34 | test_that("test delete_dataset", {
35 |   ds_hid <- get_datasets(hid)$datasets[[1]]$hid
36 |   delete_dataset(ds_hid)
37 |   expect_equal(length(get_datasets(hid)$datasets), 0)
38 | })
39 | 
40 | test_that( "test add_dataset_if_not_exists", {
41 |   file_from_resources <- "binary_part_iris_converted.csv"
42 |   expect_error(add_dataset_if_not_exists(hid, file_from_resources, "title-1"),
43 |                NA)
44 |   expect_warning(add_dataset_if_not_exists(hid, file_from_resources, "title-1"),
45 |                "Dataset with the same name already exists: <title-1>")
46 | })
47 | 
48 | delete_project(hid)
49 | 


--------------------------------------------------------------------------------
/tests/testthat/test_dataupload.R:
--------------------------------------------------------------------------------
 1 | library(mljar)
 2 | context("Test API data upload")
 3 | 
 4 | test_that("test data_upload", {
 5 |   task <- "Binary Classification"
 6 |   create_project('a', task, 'some description')
 7 |   gp <- get_projects()
 8 |   tmpfilepath <- tempfile()
 9 |   file.create(tmpfilepath)
10 |   write.csv(c(1.0,2.0,1.1), file = tmpfilepath)
11 |   project_hid <- gp$projects[[1]]$hid
12 |   up <- upload_file(project_hid, tmpfilepath)
13 |   expect_gt(nchar(up), 1)
14 |   delete_project(project_hid)
15 |   file.remove(tmpfilepath)
16 | })
17 | 


--------------------------------------------------------------------------------
/tests/testthat/test_experiment.R:
--------------------------------------------------------------------------------
 1 | library(mljar)
 2 | context("Test experiment")
 3 | 
 4 | task <- "bin_class"
 5 | pr <- create_project('ds', task, 'some description')
 6 | hid <- pr$hid
 7 | 
 8 | pr_task <- pr$task
 9 | file_from_resources <- "binary_part_iris_converted.csv"
10 | dataset1 <- add_dataset_if_not_exists(hid, file_from_resources, "test-exp1")
11 | 
12 | validation_kfolds <- 5
13 | validation_shuffle <- TRUE
14 | validation_stratify <- TRUE
15 | validation_train_split <- NULL
16 | validation <- "5-fold CV, Shuffle, Stratify"
17 | algorithms <- c("xgb")
18 | metric <- "logloss"
19 | tuning_mode <- "Normal"
20 | time_constraint <- 1
21 | create_ensemble <- FALSE
22 | dataset_preproc <- {}
23 | 
24 | test_that("test create_experiment", {
25 |   params <- list(
26 |     train_dataset = list(id = dataset1$dataset$hid, title = dataset1$dataset$title),
27 |     algs = c(algorithms,""),
28 |     preproc = dataset_preproc,
29 |     single_limit = time_constraint,
30 |     ensemble = create_ensemble,
31 |     random_start_cnt = MLJAR_TUNING_MODES[[tuning_mode]][["random_start_cnt"]],
32 |     hill_climbing_cnt =  MLJAR_TUNING_MODES[[tuning_mode]][["hill_climbing_cnt"]]
33 |   )
34 |   params <- jsonlite::toJSON(params, auto_unbox =TRUE)
35 |   exp_data <- list( title =  "exp-1",
36 |                     description = "",
37 |                     metric = metric,
38 |                     validation_scheme = validation,
39 |                     task = pr_task,
40 |                     compute_now = 1,
41 |                     parent_project = hid,
42 |                     params = params
43 |                   )
44 |   expect_error(create_experiment(exp_data), NA)
45 | 
46 | })
47 | 
48 | test_that("test get_experiments", {
49 |   ds <- get_experiments(hid)
50 |   expect_equal(length(get_experiments(hid)$experiments), 1)
51 | })
52 | 
53 | test_that("test get_experiment", {
54 |   ex_hid <- get_experiments(hid)$experiments[[1]]$hid
55 |   expect_equal(get_experiment(ex_hid)$experiment$hid, ex_hid)
56 | })
57 | 
58 | test_that("test add_experiment_if_not_exists", {
59 |   expect_error(add_experiment_if_not_exists(hid, dataset1$dataset, NULL, "exp-2",
60 |                                pr_task, validation_kfolds, validation_shuffle,
61 |                                validation_stratify, validation_train_split, algorithms, metric,
62 |                                tuning_mode, time_constraint, create_ensemble), NA)
63 | 
64 |   expect_error(add_experiment_if_not_exists(hid, dataset1$dataset, NULL, "exp-2",
65 |                                pr_task, validation_kfolds, validation_shuffle,
66 |                                validation_stratify, validation_train_split, algorithms, metric,
67 |                                tuning_mode, time_constraint, create_ensemble),
68 |                "Please rename your new experiment with new parameters setup.")
69 | })
70 | 
71 | delete_project(hid)
72 | 


--------------------------------------------------------------------------------
/tests/testthat/test_main.R:
--------------------------------------------------------------------------------
 1 | library(mljar)
 2 | context("Test main")
 3 | 
 4 | file_from_resources <- "binary_part_iris_converted.csv"
 5 | irisdata <- read.csv(file_from_resources)
 6 | dx <- irisdata[-5]
 7 | dy <- irisdata[5]
 8 | 
 9 | irisdata2 <- irisdata[sample(nrow(irisdata)),]
10 | x.tr <- irisdata2[1:80,-5]
11 | y.tr <- irisdata2[1:80,5]
12 | x.vl <- irisdata2[81:100,-5]
13 | y.vl <- irisdata2[81:100,5]
14 | 
15 | expname <- "fullexp1"
16 | 
17 | test_that("test mljar_fit reactions to bad arguments",{
18 |   expect_error(mljar_fit(NULL, NULL, validx=NULL, validy=NULL,
19 |                          proj_title="fullproject1", exp_title="fullexp2",
20 |                          algorithms = c("xgb"), metric = "logloss"),
21 |                "NULL data"
22 |   )
23 | })
24 | 
25 | test_that("test mljar_fit and mljar_predict integration test",{
26 |   bs <- mljar_fit(x.tr, y.tr, validx=x.vl, validy=y.vl,
27 |                   proj_title="fullproject2", exp_title=expname,
28 |                   algorithms = c("logreg"), metric = "logloss")
29 |   expect_equal(bs$experiment, expname)
30 |   expect_equal(bs$status, "Done")
31 |   expect_error(predvals <- mljar_predict(bs, x.vl, "fullproject2"), NA)
32 |   expect_equal(as.numeric(predvals > 0.5), y.vl)
33 |   # test running predict with model id
34 |   model_hid <- bs$hid
35 |   expect_error(predvals <- mljar_predict(model_hid, x.vl, "fullproject2"), NA)
36 |   expect_equal(as.numeric(predvals > 0.5), y.vl)
37 | })
38 | 
39 | test_that("test get_all_models integration test",{
40 |   expect_error(get_all_models("fullproject2", "x"),
41 |      "MLJAR cannot find an experiment with such a title. Check and try again.")
42 |   expect_error(get_all_models("f", "x"),
43 |      "MLJAR cannot find a project with such a title. Check and try again.")
44 |   df <- get_all_models("fullproject2", expname)
45 |   expect_equal(colnames(df), c("hid", "model_type", "metric_value",
46 |                                "metric_type", "validation_scheme"))
47 | })
48 | 
49 | projects <- get_projects()
50 | delete_project(projects$projects[[1]]$hid)
51 | 


--------------------------------------------------------------------------------
/tests/testthat/test_project.R:
--------------------------------------------------------------------------------
 1 | library(mljar)
 2 | context("Test API projects")
 3 | 
 4 | test_that("test get_projects", {
 5 |   gp <- get_projects()
 6 |   expect_equal(length(gp$projects), 0)
 7 | })
 8 | 
 9 | test_that("test create_project and get_projects", {
10 |   task <- "bin_class"
11 |   pr_a <- create_project("a", task, "description a")
12 |   expect_match(pr_a$title, "a")
13 |   pr_a <- create_project("b", task, "description b")
14 |   expect_match(pr_a$title, "b")
15 |   gp <- get_projects()
16 |   expect_equal(length(gp$projects), 2)
17 | })
18 | 
19 | test_that("test .verify_if_project_exists", {
20 |   task <- "bin_class"
21 |   .verify_if_project_exists
22 |   expect_error(.verify_if_project_exists("a", task),
23 |                "Project with the same title and task already exists, change name.")
24 | })
25 | 
26 | test_that("test delete_project and get_projects", {
27 |   gp <- get_projects()
28 |   # here we search for project named a
29 |   for(i in 1:length(gp$projects)) {
30 |     if (gp$projects[[i]]$title=="a"){
31 |       break
32 |     }
33 |   }
34 |   hid <- gp$projects[[i]]$hid
35 |   expect_match(delete_project(hid), "succesfully deleted!")
36 |   gp <- get_projects()
37 |   expect_equal(length(gp$projects), 1)
38 | })
39 | 
40 | test_that("test print_all_projects", {
41 |   df <- print_all_projects()
42 |   expect_equal(colnames(df), c("hid", "title", "task", "description"))
43 | })
44 | 
45 | test_that("test get_project, delete_project and get_projects", {
46 |   gp <- get_projects()
47 |   hid <- gp$projects[[1]]$hid
48 |   pr <- get_project(hid)
49 |   expect_equal(pr$project$title, "b")
50 |   expect_match(delete_project(hid), "succesfully deleted!")
51 |   gp <- get_projects()
52 |   expect_equal(length(gp$projects), 0)
53 | })
54 | 


--------------------------------------------------------------------------------
/tests/testthat/test_results.R:
--------------------------------------------------------------------------------
 1 | library(mljar)
 2 | context("Test experiment")
 3 | 
 4 | pr_task <- "bin_class"
 5 | pr_title <- "ds"
 6 | pr <- create_project(pr_title, pr_task, 'some description')
 7 | hid <- pr$hid
 8 | 
 9 | file_from_resources <- "binary_part_iris_converted.csv"
10 | dataset1 <- add_dataset_if_not_exists(hid, file_from_resources, "test-exp1")
11 | 
12 | validation_kfolds <- 5
13 | validation_shuffle <- TRUE
14 | validation_stratify <-TRUE
15 | validation_train_split <- NULL
16 | validation <- "5-fold CV, Shuffle, Stratify"
17 | algorithms <- c("logreg")
18 | metric <- "logloss"
19 | tuning_mode <- "Normal"
20 | time_constraint <- 1
21 | create_ensemble <- FALSE
22 | dataset_preproc <- {}
23 | 
24 | exp <- add_experiment_if_not_exists(hid, dataset1$dataset, NULL, "exp",
25 |                                     pr_task, validation_kfolds, validation_shuffle,
26 |                                     validation_stratify, validation_train_split, algorithms, metric,
27 |                                     tuning_mode, time_constraint, create_ensemble)
28 | test_that("test get_results", {
29 |   wait <- 5
30 |   for (i in 1:wait){
31 |     Sys.sleep(4) # wait till experiment is initiated
32 |     exp_dd <- get_experiment(exp$hid)
33 |     if (exp_dd$experiment$compute_now == 2) {
34 |       r <- get_results(hid, exp$hid)
35 |       expect_equal(length(r$results), 5)
36 |       break
37 |     }
38 |   }
39 | })
40 | 
41 | test_that("test get_model for bad arguments", {
42 | expect_error(get_model("xasxasdasda", "a", "a"),
43 |              "MLJAR cannot find a project with such a title. Check and try again.")
44 | })
45 | 
46 | test_that("test get_model for right arguments", {
47 |   exp_dd <- get_experiment(exp$hid)
48 |   if (exp_dd$experiment$compute_now == 2) {
49 |     rs <- get_results(hid, exp_dd$experiment$hid)
50 |     model <- get_model(pr_title, exp_dd$experiment$title, rs$results[[1]]$hid)
51 |     expect_equal(model$hid, rs$results[[1]]$hid)
52 |   }
53 | })
54 | 
55 | delete_project(hid)
56 | 


--------------------------------------------------------------------------------
/tests/testthat/test_utils.R:
--------------------------------------------------------------------------------
 1 | library(mljar)
 2 | context("Test utils")
 3 | 
 4 | test_that("test .get_token", {
 5 |   tok <- .get_token()
 6 |   expect_type(tok, "character")
 7 | })
 8 | 
 9 | test_that("test .get_json_from_get_query", {
10 |   query <- paste0(MLAR_API_PATH, API_VERSION, "/projects")
11 |   r <- .get_json_from_get_query(query)
12 |   expect_equal(names(r), c("resp", "parsed"))
13 | })
14 | 
15 | test_that("test .check_response_status", {
16 |   query <- paste0(MLAR_API_PATH, API_VERSION, "/projects")
17 |   r <- .get_json_from_get_query(query)
18 |   expect_error(.check_response_status(r$resp, 200), NA)
19 |   expect_error(.check_response_status(r$resp, 222, "omg"), "omg")
20 | })
21 | 
22 | test_that("test .obtain_task", {
23 |   expect_equal(.obtain_task(c(1,0,0,0)), "bin_class")
24 |   expect_equal(.obtain_task(c(1,2,3)), "reg")
25 | })
26 | 
27 | test_that("test .data_check", {
28 |   expect_error(.data_check(c(1,2,3), data.frame(a=c(1,2), b=c(2,1))),
29 |                "Sorry, multiple outputs are not supported in MLJAR")
30 |   expect_error(.data_check(as.data.frame(c(1,2,3)), data.frame(a=c(1,2))),
31 |                "Sorry, there is a missmatch between X and y matrices shapes")
32 |   expect_error(.data_check(as.data.frame(c(1,2)), data.frame(a=c(1,2))),
33 |                NA)
34 | })
35 | 
36 | test_that("test .data_to_file", {
37 |   tmpf <- .data_to_file(c(1,2))
38 |   expect_type(tmpf, "character")
39 |   expect_equal(unlist(strsplit(tmpf,"[.]"))[[2]], "csv")
40 | })
41 | 


--------------------------------------------------------------------------------