├── .Rbuildignore ├── .gitignore ├── .travis.yml ├── DESCRIPTION ├── LICENSE ├── NAMESPACE ├── R ├── dataset.R ├── dataupload.R ├── experiment.R ├── main.R ├── params.R ├── prediction.R ├── prediction_download.R ├── predictjob.R ├── projects.R ├── result.R └── utils.R ├── README.md ├── man ├── add_dataset_if_not_exists.Rd ├── add_experiment_if_not_exists.Rd ├── add_new_dataset.Rd ├── create_experiment.Rd ├── create_project.Rd ├── delete_dataset.Rd ├── delete_project.Rd ├── get_all_models.Rd ├── get_dataset.Rd ├── get_datasets.Rd ├── get_experiment.Rd ├── get_experiments.Rd ├── get_model.Rd ├── get_prediction.Rd ├── get_project.Rd ├── get_projects.Rd ├── get_results.Rd ├── mljar_fit.Rd ├── mljar_predict.Rd ├── prediction_download.Rd ├── print_all_projects.Rd ├── submit_predict_job.Rd └── upload_file.Rd ├── mljar.Rproj └── tests ├── testthat.R └── testthat ├── binary_part_iris_converted.csv ├── test_dataset.R ├── test_dataupload.R ├── test_experiment.R ├── test_main.R ├── test_project.R ├── test_results.R └── test_utils.R /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^\.travis\.yml$ 4 | ^\.httr-oauth$ 5 | ^cran-comments\.md$ 6 | ^README\.Rmd$ 7 | ^revdep$ 8 | ^codecov\.yml$ 9 | ^appveyor\.yml$ 10 | ^Doxyfile$ 11 | ^clion-test\.R$ 12 | ^API$ 13 | ^ISSUE_TEMPLATE\.md$ 14 | ^LICENSE\.md$ 15 | ^BROWSE$ 16 | ^GPATH$ 17 | ^GRTAGS$ 18 | ^GTAGS$ 19 | ^TAGS$ 20 | ^\.dir-locals\.el$ 21 | ^vignettes/rsconnect$ 22 | ^docs$ 23 | ^_pkgdown\.yml$ 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | .Rproj.user 35 | 36 | .DS_Store 37 | tests/.DS_Store 38 | tests/testthat/.DS_Store 39 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | 3 | dist: trusty 4 | 5 | r_github_packages: 6 | - jimhester/covr 7 | 8 | after_success: 9 | - Rscript -e 'covr::codecov()' --verbose 10 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: mljar 2 | Title: R API for MLJAR 3 | Version: 0.1.2 4 | Author: Dominik Krzemiński , Piotr Płoński 5 | Maintainer: Piotr Płoński 6 | Description: Provides an R API wrapper for 'mljar.com', a web service allowing for on-line training for machine learning models (see for more information). 7 | License: MIT + file LICENSE 8 | URL: http://mljar.com, https://github.com/mljar/mljar-api-R 9 | BugReports: https://github.com/mljar/mljar-api-R/issues 10 | Encoding: UTF-8 11 | LazyData: true 12 | RoxygenNote: 6.0.1 13 | Suggests: 14 | testthat, 15 | covr 16 | Depends: 17 | R (>= 3.1.2) 18 | Imports: 19 | httr, 20 | jsonlite, 21 | readr 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2018 2 | COPYRIGHT HOLDER: MLJAR Inc 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | export(add_dataset_if_not_exists) 2 | export(add_new_dataset) 3 | export(add_experiment_if_not_exists) 4 | export(create_experiment) 5 | export(create_project) 6 | export(delete_dataset) 7 | export(delete_project) 8 | export(get_dataset) 9 | export(get_datasets) 10 | export(get_experiment) 11 | export(get_experiments) 12 | export(get_prediction) 13 | export(get_project) 14 | export(get_projects) 15 | export(print_all_projects) 16 | export(get_results) 17 | export(get_model) 18 | export(mljar_fit) 19 | export(mljar_predict) 20 | export(prediction_download) 21 | export(submit_predict_job) 22 | export(upload_file) 23 | export(get_all_models) 24 | importFrom(httr,GET) 25 | importFrom(httr,POST) 26 | importFrom(httr,PUT) 27 | importFrom(httr,DELETE) 28 | importFrom(httr,add_headers) 29 | importFrom(httr,content) 30 | importFrom(httr,status_code) 31 | importFrom(jsonlite,fromJSON) 32 | importFrom(jsonlite,toJSON) 33 | importFrom("stats", "runif") 34 | importFrom("utils", "read.csv", "str", "tail", "write.csv") 35 | -------------------------------------------------------------------------------- /R/dataset.R: -------------------------------------------------------------------------------- 1 | #' Gets list of available datasets 2 | #' 3 | #' @param project_hid character with project identifier 4 | #' 5 | #' @return structure with parsed datasets and response 6 | #' @export 7 | get_datasets <- function(project_hid) { 8 | #' Gets list of available datasets 9 | api_url_datasets <- paste(MLAR_API_PATH, API_VERSION, "/datasets?project_id=", project_hid, sep = "") 10 | rp <- .get_json_from_get_query(api_url_datasets) 11 | resp <- rp$resp 12 | parsed <- rp$parsed 13 | structure( 14 | list( 15 | datasets = parsed, 16 | response = resp 17 | ), 18 | class = "get_datasets" 19 | ) 20 | } 21 | 22 | print.get_datasets <- function(x, ...) { 23 | cat("\n", sep = "") 24 | str(x$datasets) 25 | invisible(x) 26 | } 27 | 28 | #' Gets dataset 29 | #' 30 | #' @param dataset_hid character with dataset identifier 31 | #' 32 | #' @return structure with parsed dataset and response 33 | #' @export 34 | get_dataset <- function(dataset_hid) { 35 | api_url_dataset_hid <- paste(MLAR_API_PATH, API_VERSION, "/datasets/", dataset_hid, sep="") 36 | rp <- .get_json_from_get_query(api_url_dataset_hid) 37 | resp <- rp$resp 38 | parsed <- rp$parsed 39 | structure( 40 | list( 41 | dataset = parsed, 42 | response = resp 43 | ), 44 | class = "get_dataset" 45 | ) 46 | } 47 | 48 | print.get_dataset <- function(x, ...) { 49 | cat("\n", sep = "") 50 | str(x$dataset) 51 | invisible(x) 52 | } 53 | 54 | #' Deletes dataset 55 | #' 56 | #' @param dataset_hid character with dataset identifier 57 | #' @export 58 | delete_dataset <-function(dataset_hid){ 59 | token <- .get_token() 60 | api_url_dataset_hid <- paste(MLAR_API_PATH, API_VERSION, "/datasets/", dataset_hid, sep="") 61 | resp <- DELETE(api_url_dataset_hid, add_headers(Authorization = paste("Token", token))) 62 | if (status_code(resp)==204 || status_code(resp)==200){ 63 | sprintf("Dataset <%s> succesfully deleted!", dataset_hid) 64 | } 65 | } 66 | 67 | #' Adds new dataset 68 | #' 69 | #' @param project_hid character with project identifier 70 | #' @param filename character with filename containing data 71 | #' @param title title of dataset 72 | #' @param prediction_only boolean determining if data is used only for prediction 73 | #' 74 | #' @return parsed by toJSON dataset details 75 | #' @export 76 | #' 77 | #' @importFrom httr POST 78 | #' @importFrom jsonlite toJSON 79 | add_new_dataset <- function(project_hid, filename, title, prediction_only=FALSE){ 80 | dst_path <- upload_file(project_hid, filename) 81 | 82 | prediction_only <- as.integer(prediction_only) 83 | 84 | token <- .get_token() 85 | api_url_new_dataset <- paste(MLAR_API_PATH, API_VERSION, "/datasets" , sep="") 86 | data <- list( 87 | title = title, 88 | file_path = dst_path, 89 | file_name = filename, 90 | file_size = round(file.info(filename)$size/1024, 2), 91 | derived = 0, 92 | valid = 0, 93 | parent_project = project_hid, 94 | meta = '', 95 | data_type = "tabular", 96 | scope = "private", 97 | prediction_only = prediction_only 98 | ) 99 | resp <- POST(api_url_new_dataset, add_headers(Authorization = paste("Token", token)), 100 | body = data, encode = "form") 101 | .check_response_status(resp, 201) 102 | if (status_code(resp)==201){ 103 | print(sprintf("Dataset <%s> created!", title)) 104 | } 105 | dataset_details <- jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8"), 106 | simplifyVector = FALSE) 107 | return(dataset_details) 108 | } 109 | 110 | 111 | #' Wait till all datasets are valid 112 | #' 113 | #' Waits till all datasets is valid. If all valid it returns no error, 114 | #' if wait time is exceeded and there is any dataset not valid then 115 | #' it returns TRUE. 116 | #' 117 | #' @param project_hid hid of the project 118 | #' 119 | #' @return TRUE if all datasets are valid 120 | #' 121 | .wait_till_all_datasets_are_valid <-function(project_hid){ 122 | total_checks <- 120 123 | time_interval <- 5 # sleep for 5 sec every iteration 124 | for (i in 1:total_checks){ 125 | datasets_list <- get_datasets(project_hid) 126 | if (length(datasets_list$datasets) == 0){ 127 | sprintf("No datasets") 128 | return(TRUE) 129 | } else { 130 | tmpcnt = 0 131 | for (k in 1:length(datasets_list$datasets)){ 132 | tmpcnt = tmpcnt + datasets_list$datasets[[k]]$valid 133 | } 134 | if (tmpcnt == length(datasets_list$datasets)){ 135 | sprintf("All datasets are valid") 136 | return(TRUE) 137 | } 138 | } 139 | Sys.sleep(time_interval) 140 | } 141 | stop("Some datasets are invalid.") 142 | } 143 | 144 | #' Verify if columns have correct structure 145 | #' 146 | #' At least one column must be Target and this is verified on server site. 147 | #' 148 | #' @param dataset_hid dataset hid code 149 | #' 150 | #' @return TRUE if correct, FALSE if not 151 | #' 152 | .accept_dataset_column_usage <- function(dataset_hid){ 153 | token <- .get_token() 154 | api_url_new_dataset <- paste(MLAR_API_PATH, API_VERSION, "/accept_column_usage/" , sep="") 155 | data <- list(dataset_id = dataset_hid) 156 | resp <- POST(api_url_new_dataset, add_headers(Authorization = paste("Token", token)), 157 | body = data, encode = "form") 158 | return(ifelse(status_code(resp)==200, TRUE, FALSE)) 159 | } 160 | 161 | #' Add dataset if not exists 162 | #' 163 | #' Checks parameters before adding new dataset and verifies 164 | #' if it doesn't exists already. 165 | #' 166 | #' @param project_hid character with project identifier 167 | #' @param filename character with filename containing data 168 | #' @param title title of dataset 169 | #' @param prediction_only boolean determining if data is used only for prediction 170 | #' 171 | #' @return parsed dataset details 172 | #' @export 173 | add_dataset_if_not_exists <- function(project_hid, filename, title, prediction_only=FALSE){ 174 | .wait_till_all_datasets_are_valid(project_hid) 175 | ds <- get_datasets(project_hid) 176 | if (length(ds$datasets)>0) { 177 | for(i in 1:length(ds$datasets)) { 178 | if (ds$datasets[[i]]$title == title) { 179 | warning(sprintf("Dataset with the same name already exists: <%s>", 180 | title)) 181 | existing_ds <- list(dataset=ds$datasets[[i]], resp=NULL) 182 | return(existing_ds) 183 | } 184 | } 185 | } 186 | dataset_details <- add_new_dataset(project_hid, filename, title, prediction_only) 187 | .wait_till_all_datasets_are_valid(project_hid) 188 | if (!.accept_dataset_column_usage(dataset_details$hid)){ 189 | stop("There was a problem with accept column usage for your dataset.") 190 | } 191 | new_dataset <- get_dataset(dataset_details$hid) 192 | if (!new_dataset$dataset$valid){ 193 | stop("Sorry, your dataset cannot be read by MLJAR.\nPlease report this to us - we will fix it") 194 | } 195 | if (is.null(new_dataset$dataset$column_usage_min)){ 196 | stop("Something bad happend! There is no attributes usage defined for your dataset") 197 | } 198 | return(new_dataset) 199 | } 200 | -------------------------------------------------------------------------------- /R/dataupload.R: -------------------------------------------------------------------------------- 1 | #' Uploads file into MLJAR 2 | #' 3 | #' It uploads file into MLJAR and returns destination path. 4 | #' 5 | #' @param project_hid character with project identifier 6 | #' @param filepath character with path to file 7 | #' 8 | #' @return character with destination path 9 | #' 10 | #' @importFrom httr PUT 11 | #' @export 12 | upload_file <- function(project_hid, filepath){ 13 | 14 | parsed <- .get_signed_url(project_hid, filepath) 15 | signed_url <- parsed$signed_url 16 | dst_path <- parsed$destination_path 17 | plain_text_data <- readr::read_file(filepath) 18 | resp <- PUT(signed_url, body=plain_text_data) 19 | .check_response_status(resp, 200, "Upload into MLJAR failed") 20 | return(dst_path) 21 | } 22 | 23 | #' Get signed url 24 | #' 25 | #' From given project hid and filepath returns signed url for uploading. 26 | #' 27 | #' @param project_hid character with project identifier 28 | #' @param filepath path to the file 29 | #' 30 | #' @return parsed htt response from MLJAR s3policy (check mljar api for more) 31 | .get_signed_url <- function(project_hid, filepath){ 32 | api_url_signed_url <- paste(MLAR_API_PATH, API_VERSION, "/s3policy/" , sep="") 33 | fname = tail(strsplit(filepath, "/")[[1]], n=1) 34 | data <- list(project_hid = project_hid, 35 | fname = fname) 36 | rp <- .get_json_from_post_query(api_url_signed_url, data) 37 | resp <- rp$resp 38 | parsed <- rp$parsed 39 | return(parsed) 40 | } 41 | 42 | -------------------------------------------------------------------------------- /R/experiment.R: -------------------------------------------------------------------------------- 1 | #' Gets list of available experiments for given project 2 | #' 3 | #' @param project_hid character with project identifier 4 | #' 5 | #' @return structure with parsed experiments and http response 6 | #' @export 7 | get_experiments <- function(project_hid){ 8 | api_url_experiments <- paste(MLAR_API_PATH, API_VERSION, "/experiments", 9 | "?project_id=", project_hid, sep="") 10 | rp <- .get_json_from_get_query(api_url_experiments) 11 | resp <- rp$resp 12 | parsed <- rp$parsed 13 | 14 | structure( 15 | list( 16 | experiments = parsed, 17 | response = resp 18 | ), 19 | class = "get_experiments" 20 | ) 21 | } 22 | 23 | print.get_experiments <- function(x, ...) { 24 | cat("\n", sep = "") 25 | str(x$experiments) 26 | invisible(x) 27 | } 28 | 29 | #' Gets experiment details 30 | #' 31 | #' @param experiment_hid character with experiment identifier 32 | #' 33 | #' @return structure with parsed experiment and http response 34 | #' @export 35 | get_experiment <- function(experiment_hid){ 36 | api_url_experiment <- paste(MLAR_API_PATH, API_VERSION, "/experiments/", 37 | experiment_hid, sep="") 38 | rp <- .get_json_from_get_query(api_url_experiment) 39 | resp <- rp$resp 40 | parsed <- rp$parsed 41 | 42 | structure( 43 | list( 44 | experiment = parsed, 45 | response = resp 46 | ), 47 | class = "get_experiment" 48 | ) 49 | } 50 | 51 | print.get_experiment <- function(x, ...) { 52 | cat("\n", sep = "") 53 | str(x$experiment) 54 | invisible(x) 55 | } 56 | 57 | #' Creates experiment from given parameters 58 | #' 59 | #' @param data list of experiment parameters 60 | #' 61 | #' @return experiment details parsed by fromJSON 62 | #' @export 63 | #' 64 | #' @importFrom httr POST 65 | #' @importFrom jsonlite fromJSON 66 | create_experiment <- function(data){ 67 | token <- .get_token() 68 | api_url_create_experiment <- paste(MLAR_API_PATH, API_VERSION, "/experiments" , sep="") 69 | resp <- POST(api_url_create_experiment, add_headers(Authorization = paste("Token", token)), 70 | body = data, encode = "form") 71 | .check_response_status(resp, 201) 72 | if (status_code(resp)==201){ 73 | print(sprintf("Experiment <%s> succesfully created!", data$title)) 74 | } 75 | experiment_details <- jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8"), 76 | simplifyVector = FALSE) 77 | return(experiment_details) 78 | } 79 | 80 | #' Add experiment if not exists 81 | #' 82 | #' Check if experiment exists, verifies parameters, creates data 83 | #' to create_experiment function and finally starts creation of 84 | #' MLJAR experiment. 85 | #' 86 | #' @param project_hid character with project identifier 87 | #' @param train_dataset character with path to training dataset 88 | #' @param valid_dataset character with path to validation dataset 89 | #' @param experiment_title character with experiment title 90 | #' @param project_task character with project task 91 | #' @param validation_kfolds number of folds to be used in validation 92 | #' @param validation_shuffle boolean which specify if shuffle samples before training 93 | #' @param validation_stratify boolean which decides whether samples will be 94 | #' divided into folds with the same class distribution 95 | #' @param validation_train_split ratio how to split training dataset into train and validation 96 | #' @param algorithms list of algorithms to use 97 | #' @param metric charcater with metric 98 | #' @param tuning_mode tuning mode 99 | #' @param time_constraint numeric with time limit to calculate algorithm 100 | #' @param create_ensemble whether or not to create ensemble 101 | #' 102 | #' @return experiment details structure 103 | #' @export 104 | add_experiment_if_not_exists <- function(project_hid, train_dataset, 105 | valid_dataset, experiment_title, 106 | project_task, validation_kfolds, 107 | validation_shuffle, validation_stratify, 108 | validation_train_split, algorithms, 109 | metric, tuning_mode, time_constraint, 110 | create_ensemble) { 111 | if (!is.null(valid_dataset)) { 112 | # check validation parameters 113 | validation = "With dataset" 114 | } else { 115 | if (!is.null(validation_train_split)) { 116 | percents = round(validation_train_split * 100) 117 | validation = paste0("Split ", percents , "/", 100-percents) 118 | } else { 119 | validation = paste0(validation_kfolds, "-fold CV") 120 | } 121 | if (validation_shuffle) { 122 | validation = paste0(validation, ", Shuffle") 123 | } 124 | if (validation_stratify && project_task == 'bin_class') { 125 | validation = paste0(validation, ", Stratify") 126 | } 127 | if (validation_stratify && project_task != 'bin_class') { 128 | warning("Cannot use stratify in validation for your project task. 129 | Omitting this option in validation.") 130 | } 131 | } 132 | # check metric parameters 133 | if (is.null(metric) || metric == "" || !(metric %in% names(MLJAR_METRICS))) { 134 | metric = MLJAR_DEFAULT_METRICS[project_task] 135 | } 136 | # check tuning mode parameters 137 | if (is.null(tuning_mode) || tuning_mode == "" || !(tuning_mode %in% names(MLJAR_TUNING_MODES))){ 138 | tuning_mode = MLJAR_DEFAULT_TUNING_MODE 139 | } 140 | # check algorithms parameters 141 | if (is.null(algorithms) || length(algorithms) == 0 || algorithms == "") { 142 | algorithms = MLJAR_DEFAULT_ALGORITHMS[project_task] 143 | } 144 | # set default preprocessing if needed 145 | dataset_preproc <- list() 146 | if (length(train_dataset$column_usage_min[["cols_to_fill_na"]]) > 0) { 147 | dataset_preproc$na_fill <- "na_fill_median" 148 | } 149 | if (length(train_dataset$column_usage_min[["cols_to_convert_categorical"]]) > 0) { 150 | dataset_preproc$convert_categorical <- "categorical_to_int" 151 | } 152 | if (length(dataset_preproc) == 0) dataset_preproc={} 153 | if (length(algorithms) == 1) algorithms = c(algorithms,"") 154 | expt_params <- list( 155 | train_dataset = list(id = train_dataset$hid, title = train_dataset$title), 156 | algs = algorithms, 157 | preproc = dataset_preproc, 158 | single_limit = time_constraint, 159 | ensemble = create_ensemble, 160 | random_start_cnt = MLJAR_TUNING_MODES[[tuning_mode]][["random_start_cnt"]], 161 | hill_climbing_cnt = MLJAR_TUNING_MODES[[tuning_mode]][["hill_climbing_cnt"]] 162 | ) 163 | if (!is.null(valid_dataset)){ 164 | expt_params$vald_dataset = list(id = valid_dataset$hid, title = valid_dataset$title) 165 | } 166 | 167 | # checks whether title of experiment is different 168 | all_experiments = get_experiments(project_hid) 169 | if (length(all_experiments$experiments) > 0) { 170 | for(i in 1:length(all_experiments$experiments)) { 171 | if (all_experiments$experiments[[i]]$title == experiment_title) { 172 | stop("The experiment with specified title already exists\nPlease rename your new experiment with new parameters setup.") 173 | } 174 | } 175 | } 176 | params <- jsonlite::toJSON(expt_params, auto_unbox =TRUE) 177 | #' if everything is fine untill this point we can create data list to 178 | #' build a new experiment 179 | experiment_data <- list(title = experiment_title, 180 | description = "", 181 | metric = metric, 182 | validation_scheme = validation, 183 | task = project_task, 184 | compute_now = 1, 185 | parent_project = project_hid, 186 | params = params 187 | ) 188 | create_experiment(experiment_data) 189 | } 190 | -------------------------------------------------------------------------------- /R/main.R: -------------------------------------------------------------------------------- 1 | #' Get results statistics 2 | #' 3 | #' @param results results structure 4 | #' 5 | #' @return list with numbers of jobs: initiated, learning, done, error 6 | .get_results_stats <- function(results){ 7 | resstats <- list() 8 | resstats$initiated_cnt <- 0 9 | resstats$learning_cnt <- 0 10 | resstats$done_cnt <- 0 11 | resstats$error_cnt <- 0 12 | for (r in results$results){ 13 | if (r$status == "Initiated"){ 14 | resstats$initiated_cnt = resstats$initiated_cnt + 1 15 | } else if (r$status == "Learning"){ 16 | resstats$learning_cnt = resstats$learning_cnt + 1 17 | } else if (r$status == "Done"){ 18 | resstats$done_cnt = resstats$done_cnt + 1 19 | } else { 20 | resstats$error_cnt = resstats$error_cnt + 1 21 | } 22 | } 23 | return(resstats) 24 | } 25 | 26 | #' Gives info about remaining training time 27 | #' 28 | #' @param exp experiment structure 29 | #' @param res_stats results statistics structure 30 | #' 31 | #' @return numeric with estimated time 32 | .asses_total_training_time <- function(exp, res_stats){ 33 | single_alg_limit <- exp$experiment$params$single_limit 34 | if (is.null(single_alg_limit)){ 35 | single_alg_limit <- 5 36 | } else { 37 | single_alg_limit <- as.numeric(single_alg_limit) 38 | } 39 | total <- (res_stats$initiated_cnt * single_alg_limit) / max(c(res_stats$learning_cnt,1)) 40 | total <- total + 0.5 * single_alg_limit 41 | return(total) 42 | } 43 | 44 | #' Get best result 45 | #' 46 | #' Returns best result from given experiment and results stats. 47 | #' 48 | #' @param exp experiment structure 49 | #' @param curr_results currect results structure 50 | #' 51 | #' @return results structure with best results 52 | .get_best_result <- function(exp, curr_results){ 53 | the_best_result <- NULL 54 | min_value <- 10e12 55 | if (exp$experiment$compute_now == 1 || exp$experiment$compute_now == 2) { 56 | if (!(exp$experiment$metric %in% MLJAR_OPT_MAXIMIZE)){ 57 | opt_direction <- 1 58 | } else { 59 | opt_direction <- -1 60 | } 61 | for(res in curr_results$results){ 62 | if(is.null(res$metric_value)) next 63 | if(res$metric_value * opt_direction < min_value){ 64 | min_value <- res$metric_value*opt_direction 65 | the_best_result <- res 66 | } 67 | } 68 | } 69 | return(the_best_result) 70 | } 71 | 72 | #' Wait till all models trained 73 | #' 74 | #' Waits untill all models are trained and returns best model. 75 | #' 76 | #' @param project_hid character with project identifier 77 | #' @param experiment_hid character with experiment identifier 78 | #' 79 | #' @return best model structure 80 | .wait_till_all_models_trained <- function(project_hid, experiment_hid){ 81 | WAIT_INTERVAL <- 10.0 82 | loop_max_counter <- 24*360 # 24 hours of maximum waiting 83 | results <- NULL 84 | while(loop_max_counter > 0){ 85 | loop_max_counter <- loop_max_counter - 1 86 | rtry <- try({ 87 | curr_results <- get_results(project_hid, experiment_hid) 88 | exp <- get_experiment(experiment_hid) 89 | if (exp$experiment$compute_now == 2){ 90 | break 91 | } 92 | res_stats <- .get_results_stats(curr_results) 93 | # printing out info about training process 94 | eta <- .asses_total_training_time(exp, res_stats) 95 | if (res_stats$initiated_cnt + res_stats$learning_cnt + 96 | res_stats$done_cnt + res_stats$error_cnt == 0) { 97 | eta <- "estimating" 98 | } else { 99 | eta = round(eta, 2) 100 | } 101 | cat("\r", sprintf( 102 | "initiated: %s, learning: %s, done: %s, error: %s | ETA: %s minutes ", 103 | res_stats$initiated_cnt, res_stats$learning_cnt, res_stats$done_cnt, 104 | res_stats$error_cnt, eta)) 105 | Sys.sleep(WAIT_INTERVAL) 106 | 107 | }, silent=TRUE) 108 | if(class(rtry) == "try-error"){ 109 | warning(paste("There were some problems with your model: ", geterrmessage())) 110 | } 111 | } 112 | best_result <- .get_best_result(exp, curr_results) 113 | return(best_result) 114 | } 115 | 116 | #' Starts experiment and returns best model 117 | #' 118 | #' But before verifies if given input data is correct. 119 | #' 120 | #' @param x data.frame/matrix with training data 121 | #' @param y data.frame/matrix with training labels 122 | #' @param validx data.frame/matrix with validation data 123 | #' @param validy data.frame/matrix with validation labels 124 | #' @param proj_title charcater with project title 125 | #' @param exp_title charcater with experiment title 126 | #' @param dataset_title charcater with dataset title 127 | #' @param val_dataset_title charcater with validation dataset title 128 | #' @param metric charcater with metric 129 | #' @param algorithms list of algorithms to use 130 | #' @param validation_kfolds number of folds to be used in validation 131 | #' @param validation_shuffle boolean which specify if shuffle samples before training 132 | #' @param validation_stratify boolean which decides whether samples will be 133 | #' divided into folds with the same class distribution 134 | #' @param validation_train_split ratio how to split training dataset into train and validation 135 | #' @param tuning_mode tuning mode 136 | #' @param create_ensemble whether or not to create ensemble 137 | #' @param single_algorithm_time_limit numeric with time limit to calculate algorithm 138 | #' 139 | #' @return structure with the best model 140 | .start_experiment <- function(x, y, validx, validy, proj_title, exp_title, 141 | dataset_title, val_ds_title, metric, 142 | algorithms, validation_kfolds, validation_shuffle, 143 | validation_stratify, validation_train_split, 144 | tuning_mode, create_ensemble, single_algorithm_time_limit){ 145 | task <- .obtain_task(y) 146 | if (length(algorithms) == 0) { 147 | algorithms <- ifelse(task == "reg", 148 | MLJAR_DEFAULT_ALGORITHMS$regression, 149 | MLJAR_DEFAULT_ALGORITHMS$bin_class) 150 | warning(sprintf("You did not specify algorithms: defaults for task %s are %s", 151 | task, paste(algorithms, collapse=" "))) 152 | } 153 | if (nchar(metric) == 0) { 154 | metric <- ifelse(task == "reg", 155 | MLJAR_DEFAULT_METRICS$regression, 156 | MLJAR_DEFAULT_METRICS$bin_class) 157 | warning(sprintf("You did not specify metric: defaults for task %s are %s", 158 | task, paste(metric, collapse=" "))) 159 | } 160 | # create project and datasets 161 | tmp_data_filename <- .data_to_file(x, y) 162 | tmp_proj_hid <- .check_if_project_exists(proj_title) 163 | if (is.null(tmp_proj_hid)) 164 | project_details <- create_project(proj_title, task) 165 | else { 166 | print(sprintf("Project <%s> exists.", proj_title)) 167 | project_details <- get_project(tmp_proj_hid)$project 168 | } 169 | ds_title <- ifelse(is.null(dataset_title), 170 | paste0("Dataset", round(runif(1, 1, 999))), 171 | dataset_title ) 172 | dataset <- add_dataset_if_not_exists(project_details$hid, 173 | tmp_data_filename, ds_title) 174 | if (!is.null(validx) && !is.null(validy)){ 175 | tmp_valid_data_filename <- .data_to_file(validx, validy) 176 | val_title <- ifelse(is.null(val_ds_title), 177 | paste0("Val_dataset", round(runif(1, 1, 999))), 178 | val_ds_title) 179 | valdataset <- add_dataset_if_not_exists(project_details$hid, tmp_valid_data_filename, val_title) 180 | } else { 181 | valdataset <- NULL 182 | } 183 | # add experiment 184 | exp_details <- add_experiment_if_not_exists(project_details$hid, dataset$dataset, 185 | valdataset$dataset, exp_title, task, 186 | validation_kfolds, validation_shuffle, 187 | validation_stratify, validation_train_split, 188 | algorithms, metric, tuning_mode, 189 | single_algorithm_time_limit, create_ensemble) 190 | best_model <- .wait_till_all_models_trained(project_details$hid, exp_details$hid) 191 | return(best_model) 192 | } 193 | 194 | #' MLJAR FIT 195 | #' 196 | #' Verifies parameters and data and tries to run experiment. 197 | #' 198 | #' @param x data.frame/matrix with training data 199 | #' @param y data.frame/matrix with training labels 200 | #' @param validx data.frame/matrix with validation data 201 | #' @param validy data.frame/matrix with validation labels 202 | #' @param proj_title charcater with project title 203 | #' @param exp_title charcater with experiment title 204 | #' @param dataset_title charcater with dataset name 205 | #' @param val_dataset_title charcater with validation dataset name 206 | #' @param metric charcater with metric 207 | #' For binary classification there are metrics: 208 | #' "auc" which is for Area Under ROC Curve, 209 | #' "logloss" which is for Logarithmic Loss. 210 | #' For regression tasks: 211 | #' "rmse" which is Root Mean Square Error, 212 | #' "mse" which is for Mean Square Error, 213 | #' "mase" which is for Mean Absolute Error. 214 | #' @param wait_till_all_done boolean saying whether function should wait 215 | #' till all models are done 216 | #' @param algorithms list of algorithms to use 217 | #' For binary classification task available algorithm are: 218 | #' "xgb" which is for Xgboost, 219 | #' "lgb" which is for LightGBM 220 | #' "mlp" which is for Neural Network, 221 | #' "rfc" which is for Random Forest, 222 | #' "etc" which is for Extra Trees, 223 | #' "rgfc" which is for Regularized Greedy Forest, 224 | #' "knnc" which is for k-Nearest Neighbors, 225 | #' "logreg" which is for Logistic Regression. 226 | #' For regression task there are available algorithms: 227 | #' "xgbr" which is for Xgboost, 228 | #' "lgbr" which is for LightGBM, 229 | #' "rgfr" which is for Regularized Greedy Forest, 230 | #' "rfr" which is for Random Forest, 231 | #' "etr" which is for Extra Trees. 232 | #' @param validation_kfolds number of folds to be used in validation 233 | #' @param validation_shuffle boolean which specify if shuffle samples before training 234 | #' @param validation_stratify boolean which decides whether samples will be 235 | #' divided into folds with the same class distribution 236 | #' @param validation_train_split ratio how to split training dataset into train and validation 237 | #' @param tuning_mode tuning mode 238 | #' @param create_ensemble whether or not to create ensemble 239 | #' @param single_algorithm_time_limit numeric with time limit to calculate algorithm 240 | #' 241 | #' @return structure with the best model 242 | #' @export 243 | mljar_fit <- function(x, y, validx=NULL, validy=NULL, 244 | proj_title=NULL, exp_title=NULL, 245 | dataset_title=NULL, val_dataset_title=NULL, 246 | algorithms = c(), metric = "", 247 | wait_till_all_done = TRUE, 248 | validation_kfolds = MLJAR_DEFAULT_FOLDS, 249 | validation_shuffle = MLJAR_DEFAULT_SHUFFLE, 250 | validation_stratify = MLJAR_DEFAULT_STRATIFY, 251 | validation_train_split = MLJAR_DEFAULT_TRAIN_SPLIT, 252 | tuning_mode = MLJAR_DEFAULT_TUNING_MODE, 253 | create_ensemble = MLJAR_DEFAULT_ENSEMBLE, 254 | single_algorithm_time_limit = MLJAR_DEFAULT_TIME_CONSTRAINT){ 255 | if (is.null(proj_title)){ 256 | proj_title <- paste0("Project", round(runif(1, 1, 999))) 257 | } 258 | if (is.null(exp_title)){ 259 | proj_title <- paste0("Experiment", round(runif(1, 1, 999))) 260 | } 261 | model <- .start_experiment(x, y, validx, validy, proj_title, exp_title, 262 | dataset_title, val_dataset_title, metric, 263 | algorithms, validation_kfolds, validation_shuffle, 264 | validation_stratify, validation_train_split, 265 | tuning_mode, create_ensemble, 266 | single_algorithm_time_limit) 267 | class(model) <- "mljar_model" 268 | return(model) 269 | } 270 | 271 | #' MLJAR PREDICT 272 | #' 273 | #' Makes prediction basing on trained model. 274 | #' 275 | #' @param model model Id or MLJAR result structure 276 | #' @param x_pred data.frame/matrix data to predict 277 | #' @param project_title character with project title 278 | #' 279 | #' @return data.frame with preditction 280 | #' @export 281 | mljar_predict <- function(model, x_pred, project_title){ 282 | if (is.null(model)) { 283 | stop("Model cannot be null.") 284 | } 285 | if (is.atomic(model)) { 286 | model <- list(hid = model) 287 | } 288 | # checking if prediction data is ok 289 | x_pred <- as.data.frame(x_pred) 290 | if (is.null(x_pred)) { 291 | stop("NULL data") 292 | } 293 | # look for project 294 | proj_hid <- .check_if_project_exists(project_title) 295 | if (is.null(proj_hid)) stop("Project not found! Check title and try again.") 296 | # adding prediction dataset 297 | tmp_data_filename <- .data_to_file(x_pred) 298 | dspred_title <- paste0("Pred_dataset", round(runif(1, 1, 999))) 299 | pred_ds <- add_dataset_if_not_exists(proj_hid, tmp_data_filename, dspred_title, TRUE) 300 | total_checks <- 1000 301 | cat("Prediction download started") 302 | for (i in 1:total_checks){ 303 | prediction <- get_prediction(proj_hid, pred_ds$dataset$hid, model$hid) 304 | cat("\r", sprintf("Downloading prediction - %s ", i)) 305 | # for first iteration we send dataset for prediction 306 | if (i == 1 && length(prediction$prediction) == 0) { 307 | submit_predict_job(proj_hid, pred_ds$dataset$hid, model$hid) 308 | } 309 | if (length(prediction$prediction) > 0) { 310 | pred <- prediction_download(prediction$prediction[[1]]$hid) 311 | delete_dataset(pred_ds$dataset$hid) 312 | return(pred) 313 | } 314 | Sys.sleep(10) 315 | } 316 | return(NULL) 317 | } 318 | 319 | #' Gives data.frame with basic data of all models 320 | #' 321 | #' You can later get some specific model by calling 322 | #' e.g. \code{mod <- get_model(project_title, experiment_title, model_hid)}. 323 | #' 324 | #' @param project_title character with project title 325 | #' @param exp_title character with experiment title 326 | #' 327 | #' @return data.frame with model's "hid", "model_type", "metric_value", 328 | #' "metric_type" 329 | #' 330 | #' @export 331 | get_all_models <- function(project_title, exp_title) { 332 | # Look for project title 333 | flag.proj.title <- FALSE 334 | prj_hid <- .check_if_project_exists(project_title) 335 | if (is.null(prj_hid)) 336 | stop("MLJAR cannot find a project with such a title. Check and try again.") 337 | # Look for experiment title 338 | flag.proj.exp <- FALSE 339 | ge <- get_experiments(prj_hid) 340 | if (length(ge$experiments) == 0) stop("No experiments found.") 341 | for(i in 1:length(ge$experiments)) { 342 | if (ge$experiments[[i]]$title == exp_title){ 343 | flag.proj.exp <- TRUE 344 | break 345 | } 346 | } 347 | if (flag.proj.exp == FALSE) 348 | stop("MLJAR cannot find an experiment with such a title. Check and try again.") 349 | exp_hid <- ge$experiments[[i]]$hid 350 | exp <- get_experiment(exp_hid) 351 | if (exp$experiment$compute_now != 2) 352 | stop("Experiment still in progress. Wait till its done!") 353 | curr_results <- get_results(prj_hid, exp_hid) 354 | column.names <- c("hid", "model_type", "metric_value", 355 | "metric_type", "validation_scheme") 356 | filter_curr_res <- curr_results$results[unlist(lapply(curr_results$results, 357 | function(x) x$experiment==exp_title))] 358 | tmp_sa <- sapply(filter_curr_res, 359 | function(x) c(x$hid, x$model_type, x$metric_value, 360 | x$metric_type, x$validation_scheme), 361 | simplify = FALSE, USE.NAMES = TRUE) 362 | df_res <- t(as.data.frame(tmp_sa, 363 | row.names = column.names, 364 | col.names = 1:length(tmp_sa))) 365 | df_res <- data.frame(df_res, row.names = NULL) 366 | return(df_res) 367 | } 368 | 369 | -------------------------------------------------------------------------------- /R/params.R: -------------------------------------------------------------------------------- 1 | # MLJAR Constants 2 | ################# 3 | 4 | MLAR_API_PATH <- "https://mljar.com/api/" 5 | API_VERSION <- "v1" 6 | 7 | MLJAR_TASKS <- list( bin_class = 'Binary Classification', 8 | regression = 'Regression' 9 | ) 10 | 11 | MLJAR_METRICS <- list(auc = 'Area Under Curve', 12 | logloss = 'Logarithmic Loss', 13 | rmse = 'Root Mean Square Error', 14 | mse = 'Mean Square Error', 15 | mae = 'Mean Absolute Error') 16 | 17 | MLJAR_DEFAULT_FOLDS = 5 18 | MLJAR_DEFAULT_SHUFFLE = TRUE 19 | MLJAR_DEFAULT_STRATIFY = TRUE 20 | MLJAR_DEFAULT_TRAIN_SPLIT = NULL 21 | 22 | MLJAR_BIN_CLASS <- list(xgb = 'Extreme Gradient Boosting', 23 | lgb = 'LightGBM', 24 | rfc = 'Random Forest', 25 | rgfc = 'Regularized Greedy Forest', 26 | etc = 'Extra Trees', 27 | knnc = 'k-Nearest Neighbor', 28 | logreg = 'Logistic Regression', 29 | mlp = 'Neural Network' 30 | ) 31 | 32 | MLJAR_REGRESSION <- list(xgbr = 'Extreme Gradient Boosting', 33 | lgbr = 'LightGBM', 34 | rfr = 'Random Forest', 35 | rgfr = 'Regularized Greedy Forest', 36 | etr = 'Extra Trees' 37 | ) 38 | 39 | MLJAR_TUNING_MODES <- list(Normal = list(random_start_cnt = 5, hill_climbing_cnt = 1), 40 | Sport = list(random_start_cnt = 10, hill_climbing_cnt = 2), 41 | Insane = list(random_start_cnt = 15, hill_climbing_cnt = 3) 42 | ) 43 | 44 | # MLJAR Defaults 45 | ################# 46 | 47 | MLJAR_DEFAULT_METRICS <- list(bin_class = "logloss", 48 | regression = "rmse") 49 | 50 | MLJAR_DEFAULT_ALGORITHMS <- list( bin_class = c("xgb", "lgb"), 51 | regression = c("xgbr", "lgbr") 52 | ) 53 | 54 | MLJAR_DEFAULT_ENSEMBLE = TRUE 55 | MLJAR_DEFAULT_TUNING_MODE = 'Normal' 56 | MLJAR_DEFAULT_TIME_CONSTRAINT = '5' # minutes 57 | 58 | MLJAR_OPT_MAXIMIZE = c('auc') 59 | -------------------------------------------------------------------------------- /R/prediction.R: -------------------------------------------------------------------------------- 1 | #' Gets MLJAR predictions 2 | #' 3 | #' @param project_hid character with project identifier 4 | #' @param dataset_hid character with dataset identifier 5 | #' @param result_hid character with result identifier 6 | #' 7 | #' @return structure with parsed prediction and http response 8 | #' @export 9 | get_prediction <- function(project_hid, dataset_hid, result_hid){ 10 | api_url_prediction <- paste(MLAR_API_PATH, API_VERSION, "/predictions", 11 | "?project_id=", project_hid, "&dataset_id=", 12 | dataset_hid, "&result_id=", result_hid, sep="") 13 | rp <- .get_json_from_get_query(api_url_prediction) 14 | resp <- rp$resp 15 | parsed <- rp$parsed 16 | 17 | structure( 18 | list( 19 | prediction = parsed, 20 | response = resp 21 | ), 22 | class = "get_prediction" 23 | ) 24 | } 25 | 26 | print.get_prediction <- function(x, ...) { 27 | cat("\n", sep = "") 28 | str(x$prediction) 29 | invisible(x) 30 | } 31 | -------------------------------------------------------------------------------- /R/prediction_download.R: -------------------------------------------------------------------------------- 1 | #' Function to get predictions from MLJAR. 2 | #' 3 | #' @param prediction_hid prediction identifier 4 | #' 5 | #' @return data.frame with prediction 6 | #' 7 | #' @importFrom httr POST 8 | #' 9 | #' @export 10 | prediction_download <- function(prediction_hid){ 11 | token <- .get_token() 12 | api_url_preddown <- paste(MLAR_API_PATH, API_VERSION, "/download/prediction/" , sep="") 13 | data <- list( prediction_id = prediction_hid) 14 | resp <- POST(api_url_preddown, add_headers(Authorization = paste("Token", token)), 15 | body = data, encode = "form") 16 | .check_response_status(resp, 200, "Error in prediction download!") 17 | tmpfilepath <- paste0(tempfile(),".csv") 18 | file.create(tmpfilepath) 19 | write(content(resp, encoding = "UTF-8"), file = tmpfilepath) 20 | prediction <- read.csv(tmpfilepath) 21 | file.remove(tmpfilepath) 22 | return(prediction) 23 | } 24 | -------------------------------------------------------------------------------- /R/predictjob.R: -------------------------------------------------------------------------------- 1 | #' Submits dataset for MLJAR prediction 2 | #' 3 | #' @param project_hid character with project identifier 4 | #' @param dataset_hid character with dataset identifier 5 | #' @param result_hid character with result identifier 6 | #' 7 | #' @importFrom httr POST 8 | #' @importFrom jsonlite toJSON 9 | #' @export 10 | submit_predict_job <- function(project_hid, dataset_hid, result_hid){ 11 | token <- .get_token() 12 | data <- list(predict_params = jsonlite::toJSON(list(project_id = project_hid, 13 | project_hardware = 'cloud', 14 | algorithms_ids = list(result_hid), 15 | dataset_id = dataset_hid, 16 | cv_models = 1), 17 | auto_unbox =TRUE) 18 | ) 19 | query <- paste(MLAR_API_PATH, API_VERSION, "/predict/" , sep="") 20 | resp <- POST(query, add_headers(Authorization = paste("Token", token)), 21 | body = data, encode = "form") 22 | .check_response_status(resp, 200, "Predict MLJAR job failed") 23 | } 24 | -------------------------------------------------------------------------------- /R/projects.R: -------------------------------------------------------------------------------- 1 | #' Get projects 2 | #' 3 | #' Gets list of available projects 4 | #' 5 | #' @return structure with parsed projects and http response 6 | #' @export 7 | get_projects <- function() { 8 | api_url_projects <- paste(MLAR_API_PATH, API_VERSION, "/projects" , sep="") 9 | rp <- .get_json_from_get_query(api_url_projects) 10 | resp <- rp$resp 11 | parsed <- rp$parsed 12 | structure( 13 | list( 14 | projects = parsed, 15 | response = resp 16 | ), 17 | class = "get_projects" 18 | ) 19 | } 20 | 21 | print.get_projects <- function(x, ...) { 22 | cat("\n", sep = "") 23 | str(x$projects) 24 | invisible(x) 25 | } 26 | 27 | #' Print all projects 28 | #' 29 | #' Gives data.frame with basic information about existing projects 30 | #' 31 | #' @return data.frame with projects 32 | #' @export 33 | print_all_projects <- function() { 34 | columns = c("hid", "title", "task", "description") 35 | projects <- get_projects() 36 | if (length(projects$projects) == 0) return(data.frame()) 37 | tmp_sa <- sapply(projects$projects, 38 | function(x) c(x$hid, x$title, x$task, 39 | ifelse(!is.null(x$description), x$description, "")), 40 | simplify = FALSE, USE.NAMES = TRUE) 41 | df_proj <- t(as.data.frame(tmp_sa, 42 | row.names = columns, 43 | col.names = 1:length(tmp_sa))) 44 | df_proj <- data.frame(df_proj, row.names = NULL) 45 | return(df_proj) 46 | } 47 | 48 | #' Get project 49 | #' 50 | #' Get data from a project of specified hid 51 | #' 52 | #' @param hid character with project unique identifier 53 | #' 54 | #' @return structure with parsed project and http response 55 | #' @export 56 | get_project <- function(hid) { 57 | api_url_project_hid <- paste(MLAR_API_PATH, API_VERSION, "/projects/", hid, sep="") 58 | rp <- .get_json_from_get_query(api_url_project_hid) 59 | resp <- rp$resp 60 | parsed <- rp$parsed 61 | 62 | structure( 63 | list( 64 | project = parsed, 65 | response = resp 66 | ), 67 | class = "get_project" 68 | ) 69 | } 70 | 71 | print.get_project <- function(x, ...) { 72 | cat("\n", sep = "") 73 | str(x$project) 74 | invisible(x) 75 | } 76 | 77 | #' Creates a new project 78 | #' 79 | #' @param title character with project title 80 | #' @param task character with project task 81 | #' @param description optional description 82 | #' 83 | #' @return project details structure 84 | #' @export 85 | create_project <-function(title, task, description=""){ 86 | .verify_if_project_exists(title, task) 87 | token <- .get_token() 88 | api_url_projects <- paste(MLAR_API_PATH, API_VERSION, "/projects" , sep="") 89 | data <- list(title = title, 90 | hardware = 'cloud', 91 | scope = 'private', 92 | task = task, 93 | compute_now = 0, 94 | description = description) 95 | resp <- POST(api_url_projects, add_headers(Authorization = paste("Token", token)), 96 | body = data, encode = "form") 97 | .check_response_status(resp, 201) 98 | if (status_code(resp)==201){ 99 | print(sprintf("Project <%s> succesfully created!", title)) 100 | } 101 | project_details <- jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8"), simplifyVector = FALSE) 102 | return(project_details) 103 | } 104 | 105 | #' Delete project 106 | #' 107 | #' @param hid charceter with project identifier 108 | #' 109 | #' @export 110 | #' @importFrom httr DELETE status_code 111 | delete_project <-function(hid){ 112 | token <- .get_token() 113 | api_url_project_hid <- paste(MLAR_API_PATH, API_VERSION, "/projects/", hid, sep="") 114 | resp <- DELETE(api_url_project_hid, add_headers(Authorization = paste("Token", token))) 115 | if (status_code(resp)==204 || status_code(resp)==200){ 116 | print(sprintf("Project <%s> succesfully deleted!", hid)) 117 | } 118 | } 119 | 120 | # Helper project functions 121 | 122 | #' Verify if project exists 123 | #' 124 | #' Checks if there is no project with the same name and task. 125 | #' 126 | #' @param projtitle character with project title 127 | #' @param task characeter with project task 128 | #' 129 | #' @return TRUE if okay, stops if such a project exists. 130 | .verify_if_project_exists <- function(projtitle, task){ 131 | gp <- get_projects() 132 | for (proj in gp$projects){ 133 | if (proj$title==projtitle && proj$task==task){ 134 | stop("Project with the same title and task already exists, change name.") 135 | } 136 | } 137 | return(TRUE) 138 | } 139 | 140 | #' Checks if project exists 141 | #' 142 | #' It bases only on title and returns project's hid if it exists. 143 | #' 144 | #' @param project_title character with project title 145 | #' 146 | #' @return character of project with its identifier or NULL 147 | .check_if_project_exists <- function(project_title) { 148 | projects <- get_projects() 149 | proj_hid <- NULL 150 | if (length(projects$projects) == 0) return(NULL) 151 | for(i in 1:length(projects$projects)) { 152 | if (projects$projects[[i]]$title == project_title){ 153 | proj_hid <- projects$projects[[i]]$hid 154 | break 155 | } 156 | } 157 | return(proj_hid) 158 | } 159 | -------------------------------------------------------------------------------- /R/result.R: -------------------------------------------------------------------------------- 1 | #' Get results of MLJAR training 2 | #' 3 | #' @param project_hid character with project identifier 4 | #' @param experiment_hid character with experiment identifier 5 | #' 6 | #' @return structure with parsed results and http response 7 | #' 8 | #' @importFrom httr POST 9 | #' 10 | #' @export 11 | get_results <- function(project_hid, experiment_hid){ 12 | token <- .get_token() 13 | api_url_results <- paste(MLAR_API_PATH, API_VERSION, "/results/" , sep="") 14 | datares <- list( project_id = project_hid, 15 | experiment_id = experiment_hid) 16 | resp <- POST(api_url_results, add_headers(Authorization = paste("Token", token)), 17 | body = datares, encode = "form") 18 | .check_response_status(resp, 200) 19 | parsed <- jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8"), simplifyVector = FALSE) 20 | structure( 21 | list( 22 | results = parsed, 23 | response = resp 24 | ), 25 | class = "get_results" 26 | ) 27 | } 28 | 29 | print.get_results <- function(x, ...) { 30 | cat("\n", sep = "") 31 | str(x$results) 32 | invisible(x) 33 | } 34 | 35 | 36 | #' Get model 37 | #' 38 | #' Gets model only if experiment finished and project with such 39 | #' a title and having such an experiment exists. 40 | #' 41 | #' @param project_title character with project title 42 | #' @param exp_title character with experiment title 43 | #' @param model_hid character with experiment identifier 44 | #' 45 | #' @return structure with model parameters 46 | #' @export 47 | get_model <- function(project_title, exp_title, model_hid) { 48 | # Look for project title 49 | flag.proj.title <- FALSE 50 | prj_hid <- .check_if_project_exists(project_title) 51 | if (is.null(prj_hid)) 52 | stop("MLJAR cannot find a project with such a title. Check and try again.") 53 | # Look for experiment title 54 | flag.proj.exp <- FALSE 55 | ge <- get_experiments(prj_hid) 56 | if (length(ge$experiments) == 0) stop("No experiments found.") 57 | for(i in 1:length(ge$experiments)) { 58 | if (ge$experiments[[i]]$title == exp_title){ 59 | flag.proj.exp <- TRUE 60 | break 61 | } 62 | } 63 | if (flag.proj.exp == FALSE) 64 | stop("MLJAR cannot find an experiment with such a title. Check and try again.") 65 | exp_hid <- ge$experiments[[i]]$hid 66 | exp <- get_experiment(exp_hid) 67 | if (exp$experiment$compute_now != 2) 68 | stop("Experiment still in progress. Wait till its done!") 69 | flag.mod <- FALSE 70 | curr_results <- get_results(prj_hid, exp_hid) 71 | for(res in curr_results$results) { 72 | if (res$hid == model_hid){ 73 | flag.mod <- TRUE 74 | break 75 | } 76 | } 77 | if (flag.mod == FALSE) 78 | stop("MLJAR cannot find an experiment with such a title. Check and try again.") 79 | return(res) 80 | } 81 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | # MLJAR Helper Functions 2 | 3 | #' Get json from post query 4 | #' 5 | #' Returns api response and parsed output from POST query given data 6 | #' 7 | #' @param query character with http query 8 | #' @param data list with body data 9 | #' 10 | #' @importFrom httr POST add_headers content 11 | #' @importFrom jsonlite fromJSON 12 | #' @return list with response and parsed response from json 13 | .get_json_from_post_query <- function(query, data){ 14 | token <- .get_token() 15 | resp <- POST(query, add_headers(Authorization = paste("Token", token)), 16 | body = data, encode = "form") 17 | parsed <- jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8"), 18 | simplifyVector = FALSE) 19 | .check_response_status(resp, 200) 20 | return(list(resp=resp, parsed=parsed)) 21 | } 22 | 23 | #' Get json from get query 24 | #' 25 | #' Returns api response and parsed output 26 | #' 27 | #' @param query character with http query 28 | #' 29 | #' @importFrom httr GET add_headers content 30 | #' @importFrom jsonlite fromJSON 31 | #' @return list with response and parsed response from json 32 | .get_json_from_get_query <- function(query){ 33 | token <- .get_token() 34 | resp <- GET(query, add_headers(Authorization = paste("Token", token))) 35 | parsed <- jsonlite::fromJSON(content(resp, "text", encoding = "UTF-8"), 36 | simplifyVector = FALSE) 37 | .check_response_status(resp, 200) 38 | return(list(resp=resp, parsed=parsed)) 39 | } 40 | 41 | #' Get token 42 | #' 43 | #' Gets token from MLJAR_TOKEN env variable. 44 | #' 45 | #' @return returns token defined in enviromental variable MLJAR_TOKEN 46 | #' @examples 47 | #' .get_token() 48 | .get_token <- function(){ 49 | token <- Sys.getenv("MLJAR_TOKEN") 50 | if (identical(token, "")) { 51 | stop("Specify MLJAR_TOKEN env variable", call. = FALSE) 52 | } 53 | return(token) 54 | } 55 | 56 | #' Check response status 57 | #' 58 | #' Verifies if response status is correct. 59 | #' If not it stops execution with message. 60 | #' 61 | #' @param resp httr response 62 | #' @param expected_code numeric with expected code e.g. 201 63 | #' @param error_message character with error message 64 | #' 65 | #' @importFrom httr status_code 66 | .check_response_status <- function(resp, expected_code, 67 | error_message="MLJAR API request failed"){ 68 | # compares response status with expeced_code and returns error_message if not equal 69 | if (status_code(resp) != expected_code) { 70 | stop( 71 | sprintf( 72 | paste(error_message, "[%s]\n"), 73 | status_code(resp) 74 | ), 75 | call. = FALSE 76 | ) 77 | } 78 | } 79 | 80 | #' Checks if data is in good format. 81 | #' 82 | #' If not it stops execution. 83 | #' 84 | #' @param x preferably a matrix or data frame. 85 | #' If not, it is attempted to coerce x to a data frame. 86 | #' @param y preferably a matrix or data frame. 87 | #' If not, it is attempted to coerce x to a data frame. 88 | .data_check <- function(x, y){ 89 | x <- as.data.frame(x) 90 | y <- as.data.frame(y) 91 | if (is.null(x) || is.null(y)){ 92 | stop("NULL data") 93 | } 94 | if(length(dim(y))>1 && dim(y)[2]>1){ 95 | stop("Sorry, multiple outputs are not supported in MLJAR") 96 | } 97 | if(dim(y)[1]!=dim(x)[1]){ 98 | stop("Sorry, there is a missmatch between X and y matrices shapes") 99 | } 100 | } 101 | 102 | #' Stores data in temporary CSV file 103 | #' 104 | #' @param x preferably a matrix or data frame. 105 | #' If not, it is attempted to coerce x to a data frame. 106 | #' @param y preferably a matrix or data frame. 107 | #' If not, it is attempted to coerce x to a data frame. 108 | #' 109 | #' @return tmpfilepath character with path to temporary file 110 | #' 111 | #' @example 112 | #' .data_to_file(c(1,2)) 113 | .data_to_file <- function(x, y=NULL){ 114 | if (!is.null(y)){ 115 | # first we check if data is valid 116 | .data_check(x, y) 117 | # now it's time to convert to data frame 118 | dataxy <- as.data.frame(x) 119 | dataxy["target"] <- y 120 | } else { 121 | if (is.null(x)) stop("NULL data") 122 | dataxy <- as.data.frame(x) 123 | } 124 | # temporary csv file is created 125 | tmpfilepath <- paste0(tempfile(),".csv") 126 | file.create(tmpfilepath) 127 | write.csv(dataxy, file = tmpfilepath, row.names = F) 128 | return(tmpfilepath) 129 | } 130 | 131 | #' Obtain task 132 | #' 133 | #' Determines what kind of task is that basing on y. 134 | #' @param y target vector/data.frame 135 | #' 136 | #' @return "reg" or "bin_class" depending on kind of task 137 | #' @examples 138 | #' .obtain_task(c(1, 0, 0, 1)) 139 | .obtain_task <- function(y){ 140 | return(ifelse(nrow(as.data.frame(unique(y))) > 2, "reg", "bin_class")) 141 | } 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/mljar/mljar-api-R.svg?branch=master)](https://travis-ci.org/mljar/mljar-api-R) 2 | [![codecov](https://codecov.io/gh/mljar/mljar-api-R/branch/master/graph/badge.svg)](https://codecov.io/gh/mljar/mljar-api-R) 3 | 4 | # mljar-api-R 5 | 6 | A simple R wrapper for **mljar.com** API. It allows MLJAR users to create Machine Learning models with few lines of code: 7 | 8 | ```R 9 | library(mljar) 10 | 11 | model <- mljar_fit(x.training, y.training, validx=x.validation, validy=y.validation, 12 | proj_title="Project title", exp_title="experiment title", 13 | algorithms = c("logreg"), metric = "logloss") 14 | 15 | predicted_values <- mljar_predict(model, x.to.predict, "Project title") 16 | ``` 17 | 18 | That's all folks! Yeah, I know, this makes Machine Learning super easy! You can use this code for following Machine Learning tasks: 19 | * Binary classification (your target has only two unique values) 20 | * Regression (your target value is continuous) 21 | * More is coming soon! 22 | 23 | ## How to install 24 | 25 | You can install mljar directly from **CRAN**: 26 | 27 | install.packages("mljar") 28 | 29 | Alternatively, you can install the latest development version from GitHub using `devtools`: 30 | 31 | devtools::install_github("mljar/mljar-api-R") 32 | 33 | ## How to use it 34 | 35 | 1. Create an account at mljar.com and login. 36 | 2. Please go to your users settings (top, right corner). 37 | 3. Get your token, for example 'exampleexampleexample'. 38 | 4. Set environment variable `MLJAR_TOKEN` with your token value in shell: 39 | ``` 40 | export MLJAR_TOKEN=exampleexampleexample 41 | ``` 42 | or directly in RStudio: 43 | ``` 44 | Sys.setenv(MLJAR_TOKEN="examplexampleexample") 45 | ``` 46 | 47 | 5. That's all, you are ready to use MLJAR in your R code! 48 | 49 | ## What's going on? 50 | 51 | * This wrapper allows you to search through different Machine Learning algorithms and tune each of the algorithm. 52 | * By searching and tuning ML algorithm to your data you will get very accurate model. 53 | * By calling function `mljar_fit` you create new project and start experiment with models training. 54 | All your results will be accessible from your mljar.com account - this makes Machine Learning super easy and 55 | keeps all your models and results in beautiful order. So, you will never miss anything. 56 | * All computations are done in MLJAR Cloud, they are executed in parallel. So after calling `mljar_fit` method you can switch 57 | your computer off and MLJAR will do the job for you! 58 | * I think this is really amazing! What do you think? Please let us know at `contact@mljar.com`. 59 | 60 | ## Examples 61 | 62 | Soon 63 | 64 | ## Testing 65 | 66 | To run tests use simple command in your R session: 67 | 68 | ```R 69 | devtools::test() 70 | ``` 71 | -------------------------------------------------------------------------------- /man/add_dataset_if_not_exists.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dataset.R 3 | \name{add_dataset_if_not_exists} 4 | \alias{add_dataset_if_not_exists} 5 | \title{Add dataset if not exists} 6 | \usage{ 7 | add_dataset_if_not_exists(project_hid, filename, title, 8 | prediction_only = FALSE) 9 | } 10 | \arguments{ 11 | \item{project_hid}{character with project identifier} 12 | 13 | \item{filename}{character with filename containing data} 14 | 15 | \item{title}{title of dataset} 16 | 17 | \item{prediction_only}{boolean determining if data is used only for prediction} 18 | } 19 | \value{ 20 | parsed dataset details 21 | } 22 | \description{ 23 | Checks parameters before adding new dataset and verifies 24 | if it doesn't exists already. 25 | } 26 | -------------------------------------------------------------------------------- /man/add_experiment_if_not_exists.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/experiment.R 3 | \name{add_experiment_if_not_exists} 4 | \alias{add_experiment_if_not_exists} 5 | \title{Add experiment if not exists} 6 | \usage{ 7 | add_experiment_if_not_exists(project_hid, train_dataset, valid_dataset, 8 | experiment_title, project_task, validation_kfolds, validation_shuffle, 9 | validation_stratify, validation_train_split, algorithms, metric, tuning_mode, 10 | time_constraint, create_ensemble) 11 | } 12 | \arguments{ 13 | \item{project_hid}{character with project identifier} 14 | 15 | \item{train_dataset}{character with path to training dataset} 16 | 17 | \item{valid_dataset}{character with path to validation dataset} 18 | 19 | \item{experiment_title}{character with experiment title} 20 | 21 | \item{project_task}{character with project task} 22 | 23 | \item{validation_kfolds}{number of folds to be used in validation} 24 | 25 | \item{validation_shuffle}{boolean which specify if shuffle samples before training} 26 | 27 | \item{validation_stratify}{boolean which decides whether samples will be 28 | divided into folds with the same class distribution} 29 | 30 | \item{validation_train_split}{ratio how to split training dataset into train and validation} 31 | 32 | \item{algorithms}{list of algorithms to use} 33 | 34 | \item{metric}{charcater with metric} 35 | 36 | \item{tuning_mode}{tuning mode} 37 | 38 | \item{time_constraint}{numeric with time limit to calculate algorithm} 39 | 40 | \item{create_ensemble}{whether or not to create ensemble} 41 | } 42 | \value{ 43 | experiment details structure 44 | } 45 | \description{ 46 | Check if experiment exists, verifies parameters, creates data 47 | to create_experiment function and finally starts creation of 48 | MLJAR experiment. 49 | } 50 | -------------------------------------------------------------------------------- /man/add_new_dataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dataset.R 3 | \name{add_new_dataset} 4 | \alias{add_new_dataset} 5 | \title{Adds new dataset} 6 | \usage{ 7 | add_new_dataset(project_hid, filename, title, prediction_only = FALSE) 8 | } 9 | \arguments{ 10 | \item{project_hid}{character with project identifier} 11 | 12 | \item{filename}{character with filename containing data} 13 | 14 | \item{title}{title of dataset} 15 | 16 | \item{prediction_only}{boolean determining if data is used only for prediction} 17 | } 18 | \value{ 19 | parsed by toJSON dataset details 20 | } 21 | \description{ 22 | Adds new dataset 23 | } 24 | -------------------------------------------------------------------------------- /man/create_experiment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/experiment.R 3 | \name{create_experiment} 4 | \alias{create_experiment} 5 | \title{Creates experiment from given parameters} 6 | \usage{ 7 | create_experiment(data) 8 | } 9 | \arguments{ 10 | \item{data}{list of experiment parameters} 11 | } 12 | \value{ 13 | experiment details parsed by fromJSON 14 | } 15 | \description{ 16 | Creates experiment from given parameters 17 | } 18 | -------------------------------------------------------------------------------- /man/create_project.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/projects.R 3 | \name{create_project} 4 | \alias{create_project} 5 | \title{Creates a new project} 6 | \usage{ 7 | create_project(title, task, description = "") 8 | } 9 | \arguments{ 10 | \item{title}{character with project title} 11 | 12 | \item{task}{character with project task} 13 | 14 | \item{description}{optional description} 15 | } 16 | \value{ 17 | project details structure 18 | } 19 | \description{ 20 | Creates a new project 21 | } 22 | -------------------------------------------------------------------------------- /man/delete_dataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dataset.R 3 | \name{delete_dataset} 4 | \alias{delete_dataset} 5 | \title{Deletes dataset} 6 | \usage{ 7 | delete_dataset(dataset_hid) 8 | } 9 | \arguments{ 10 | \item{dataset_hid}{character with dataset identifier} 11 | } 12 | \description{ 13 | Deletes dataset 14 | } 15 | -------------------------------------------------------------------------------- /man/delete_project.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/projects.R 3 | \name{delete_project} 4 | \alias{delete_project} 5 | \title{Delete project} 6 | \usage{ 7 | delete_project(hid) 8 | } 9 | \arguments{ 10 | \item{hid}{charceter with project identifier} 11 | } 12 | \description{ 13 | Delete project 14 | } 15 | -------------------------------------------------------------------------------- /man/get_all_models.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/main.R 3 | \name{get_all_models} 4 | \alias{get_all_models} 5 | \title{Gives data.frame with basic data of all models} 6 | \usage{ 7 | get_all_models(project_title, exp_title) 8 | } 9 | \arguments{ 10 | \item{project_title}{character with project title} 11 | 12 | \item{exp_title}{character with experiment title} 13 | } 14 | \value{ 15 | data.frame with model's "hid", "model_type", "metric_value", 16 | "metric_type" 17 | } 18 | \description{ 19 | You can later get some specific model by calling 20 | e.g. \code{mod <- get_model(project_title, experiment_title, model_hid)}. 21 | } 22 | -------------------------------------------------------------------------------- /man/get_dataset.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dataset.R 3 | \name{get_dataset} 4 | \alias{get_dataset} 5 | \title{Gets dataset} 6 | \usage{ 7 | get_dataset(dataset_hid) 8 | } 9 | \arguments{ 10 | \item{dataset_hid}{character with dataset identifier} 11 | } 12 | \value{ 13 | structure with parsed dataset and response 14 | } 15 | \description{ 16 | Gets dataset 17 | } 18 | -------------------------------------------------------------------------------- /man/get_datasets.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dataset.R 3 | \name{get_datasets} 4 | \alias{get_datasets} 5 | \title{Gets list of available datasets} 6 | \usage{ 7 | get_datasets(project_hid) 8 | } 9 | \arguments{ 10 | \item{project_hid}{character with project identifier} 11 | } 12 | \value{ 13 | structure with parsed datasets and response 14 | } 15 | \description{ 16 | Gets list of available datasets 17 | } 18 | -------------------------------------------------------------------------------- /man/get_experiment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/experiment.R 3 | \name{get_experiment} 4 | \alias{get_experiment} 5 | \title{Gets experiment details} 6 | \usage{ 7 | get_experiment(experiment_hid) 8 | } 9 | \arguments{ 10 | \item{experiment_hid}{character with experiment identifier} 11 | } 12 | \value{ 13 | structure with parsed experiment and http response 14 | } 15 | \description{ 16 | Gets experiment details 17 | } 18 | -------------------------------------------------------------------------------- /man/get_experiments.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/experiment.R 3 | \name{get_experiments} 4 | \alias{get_experiments} 5 | \title{Gets list of available experiments for given project} 6 | \usage{ 7 | get_experiments(project_hid) 8 | } 9 | \arguments{ 10 | \item{project_hid}{character with project identifier} 11 | } 12 | \value{ 13 | structure with parsed experiments and http response 14 | } 15 | \description{ 16 | Gets list of available experiments for given project 17 | } 18 | -------------------------------------------------------------------------------- /man/get_model.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/result.R 3 | \name{get_model} 4 | \alias{get_model} 5 | \title{Get model} 6 | \usage{ 7 | get_model(project_title, exp_title, model_hid) 8 | } 9 | \arguments{ 10 | \item{project_title}{character with project title} 11 | 12 | \item{exp_title}{character with experiment title} 13 | 14 | \item{model_hid}{character with experiment identifier} 15 | } 16 | \value{ 17 | structure with model parameters 18 | } 19 | \description{ 20 | Gets model only if experiment finished and project with such 21 | a title and having such an experiment exists. 22 | } 23 | -------------------------------------------------------------------------------- /man/get_prediction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prediction.R 3 | \name{get_prediction} 4 | \alias{get_prediction} 5 | \title{Gets MLJAR predictions} 6 | \usage{ 7 | get_prediction(project_hid, dataset_hid, result_hid) 8 | } 9 | \arguments{ 10 | \item{project_hid}{character with project identifier} 11 | 12 | \item{dataset_hid}{character with dataset identifier} 13 | 14 | \item{result_hid}{character with result identifier} 15 | } 16 | \value{ 17 | structure with parsed prediction and http response 18 | } 19 | \description{ 20 | Gets MLJAR predictions 21 | } 22 | -------------------------------------------------------------------------------- /man/get_project.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/projects.R 3 | \name{get_project} 4 | \alias{get_project} 5 | \title{Get project} 6 | \usage{ 7 | get_project(hid) 8 | } 9 | \arguments{ 10 | \item{hid}{character with project unique identifier} 11 | } 12 | \value{ 13 | structure with parsed project and http response 14 | } 15 | \description{ 16 | Get data from a project of specified hid 17 | } 18 | -------------------------------------------------------------------------------- /man/get_projects.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/projects.R 3 | \name{get_projects} 4 | \alias{get_projects} 5 | \title{Get projects} 6 | \usage{ 7 | get_projects() 8 | } 9 | \value{ 10 | structure with parsed projects and http response 11 | } 12 | \description{ 13 | Gets list of available projects 14 | } 15 | -------------------------------------------------------------------------------- /man/get_results.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/result.R 3 | \name{get_results} 4 | \alias{get_results} 5 | \title{Get results of MLJAR training} 6 | \usage{ 7 | get_results(project_hid, experiment_hid) 8 | } 9 | \arguments{ 10 | \item{project_hid}{character with project identifier} 11 | 12 | \item{experiment_hid}{character with experiment identifier} 13 | } 14 | \value{ 15 | structure with parsed results and http response 16 | } 17 | \description{ 18 | Get results of MLJAR training 19 | } 20 | -------------------------------------------------------------------------------- /man/mljar_fit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/main.R 3 | \name{mljar_fit} 4 | \alias{mljar_fit} 5 | \title{MLJAR FIT} 6 | \usage{ 7 | mljar_fit(x, y, validx = NULL, validy = NULL, proj_title = NULL, 8 | exp_title = NULL, dataset_title = NULL, val_dataset_title = NULL, 9 | algorithms = c(), metric = "", wait_till_all_done = TRUE, 10 | validation_kfolds = MLJAR_DEFAULT_FOLDS, 11 | validation_shuffle = MLJAR_DEFAULT_SHUFFLE, 12 | validation_stratify = MLJAR_DEFAULT_STRATIFY, 13 | validation_train_split = MLJAR_DEFAULT_TRAIN_SPLIT, 14 | tuning_mode = MLJAR_DEFAULT_TUNING_MODE, 15 | create_ensemble = MLJAR_DEFAULT_ENSEMBLE, 16 | single_algorithm_time_limit = MLJAR_DEFAULT_TIME_CONSTRAINT) 17 | } 18 | \arguments{ 19 | \item{x}{data.frame/matrix with training data} 20 | 21 | \item{y}{data.frame/matrix with training labels} 22 | 23 | \item{validx}{data.frame/matrix with validation data} 24 | 25 | \item{validy}{data.frame/matrix with validation labels} 26 | 27 | \item{proj_title}{charcater with project title} 28 | 29 | \item{exp_title}{charcater with experiment title} 30 | 31 | \item{dataset_title}{charcater with dataset name} 32 | 33 | \item{val_dataset_title}{charcater with validation dataset name} 34 | 35 | \item{algorithms}{list of algorithms to use 36 | For binary classification task available algorithm are: 37 | "xgb" which is for Xgboost, 38 | "lgb" which is for LightGBM 39 | "mlp" which is for Neural Network, 40 | "rfc" which is for Random Forest, 41 | "etc" which is for Extra Trees, 42 | "rgfc" which is for Regularized Greedy Forest, 43 | "knnc" which is for k-Nearest Neighbors, 44 | "logreg" which is for Logistic Regression. 45 | For regression task there are available algorithms: 46 | "xgbr" which is for Xgboost, 47 | "lgbr" which is for LightGBM, 48 | "rgfr" which is for Regularized Greedy Forest, 49 | "rfr" which is for Random Forest, 50 | "etr" which is for Extra Trees.} 51 | 52 | \item{metric}{charcater with metric 53 | For binary classification there are metrics: 54 | "auc" which is for Area Under ROC Curve, 55 | "logloss" which is for Logarithmic Loss. 56 | For regression tasks: 57 | "rmse" which is Root Mean Square Error, 58 | "mse" which is for Mean Square Error, 59 | "mase" which is for Mean Absolute Error.} 60 | 61 | \item{wait_till_all_done}{boolean saying whether function should wait 62 | till all models are done} 63 | 64 | \item{validation_kfolds}{number of folds to be used in validation} 65 | 66 | \item{validation_shuffle}{boolean which specify if shuffle samples before training} 67 | 68 | \item{validation_stratify}{boolean which decides whether samples will be 69 | divided into folds with the same class distribution} 70 | 71 | \item{validation_train_split}{ratio how to split training dataset into train and validation} 72 | 73 | \item{tuning_mode}{tuning mode} 74 | 75 | \item{create_ensemble}{whether or not to create ensemble} 76 | 77 | \item{single_algorithm_time_limit}{numeric with time limit to calculate algorithm} 78 | } 79 | \value{ 80 | structure with the best model 81 | } 82 | \description{ 83 | Verifies parameters and data and tries to run experiment. 84 | } 85 | -------------------------------------------------------------------------------- /man/mljar_predict.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/main.R 3 | \name{mljar_predict} 4 | \alias{mljar_predict} 5 | \title{MLJAR PREDICT} 6 | \usage{ 7 | mljar_predict(model, x_pred, project_title) 8 | } 9 | \arguments{ 10 | \item{model}{model or MLJAR result structure} 11 | 12 | \item{x_pred}{data.frame/matrix data to predict} 13 | 14 | \item{project_title}{character with project title} 15 | } 16 | \value{ 17 | data.frame with preditction 18 | } 19 | \description{ 20 | Makes prediction basing on trained model. 21 | } 22 | -------------------------------------------------------------------------------- /man/prediction_download.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prediction_download.R 3 | \name{prediction_download} 4 | \alias{prediction_download} 5 | \title{Function to get predictions from MLJAR.} 6 | \usage{ 7 | prediction_download(prediction_hid) 8 | } 9 | \arguments{ 10 | \item{prediction_hid}{prediction identifier} 11 | } 12 | \value{ 13 | data.frame with prediction 14 | } 15 | \description{ 16 | Function to get predictions from MLJAR. 17 | } 18 | -------------------------------------------------------------------------------- /man/print_all_projects.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/projects.R 3 | \name{print_all_projects} 4 | \alias{print_all_projects} 5 | \title{Print all projects} 6 | \usage{ 7 | print_all_projects() 8 | } 9 | \value{ 10 | data.frame with projects 11 | } 12 | \description{ 13 | Gives data.frame with basic information about existing projects 14 | } 15 | -------------------------------------------------------------------------------- /man/submit_predict_job.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/predictjob.R 3 | \name{submit_predict_job} 4 | \alias{submit_predict_job} 5 | \title{Submits dataset for MLJAR prediction} 6 | \usage{ 7 | submit_predict_job(project_hid, dataset_hid, result_hid) 8 | } 9 | \arguments{ 10 | \item{project_hid}{character with project identifier} 11 | 12 | \item{dataset_hid}{character with dataset identifier} 13 | 14 | \item{result_hid}{character with result identifier} 15 | } 16 | \description{ 17 | Submits dataset for MLJAR prediction 18 | } 19 | -------------------------------------------------------------------------------- /man/upload_file.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dataupload.R 3 | \name{upload_file} 4 | \alias{upload_file} 5 | \title{Uploads file into MLJAR} 6 | \usage{ 7 | upload_file(project_hid, filepath) 8 | } 9 | \arguments{ 10 | \item{project_hid}{character with project identifier} 11 | 12 | \item{filepath}{character with path to file} 13 | } 14 | \value{ 15 | character with destination path 16 | } 17 | \description{ 18 | It uploads file into MLJAR and returns destination path. 19 | } 20 | -------------------------------------------------------------------------------- /mljar.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(mljar) 3 | 4 | # token needed by CRAN 5 | Sys.setenv(MLJAR_TOKEN="10bc57e737c2ca5516bb01ab29549978b53d83a4") 6 | gp <- get_projects() 7 | if (length(gp$projects)>0) { 8 | for (pr in gp$projects){ 9 | delete_project(pr$hid) 10 | } 11 | } 12 | test_check("mljar") 13 | Sys.unsetenv("MLJAR_TOKEN") 14 | -------------------------------------------------------------------------------- /tests/testthat/binary_part_iris_converted.csv: -------------------------------------------------------------------------------- 1 | sepal length,sepal width,petal length,petal width,class 2 | 5.1,3.5,1.4,0.2,0 3 | 4.9,3.0,1.4,0.2,0 4 | 4.7,3.2,1.3,0.2,0 5 | 4.6,3.1,1.5,0.2,0 6 | 5.0,3.6,1.4,0.2,0 7 | 5.4,3.9,1.7,0.4,0 8 | 4.6,3.4,1.4,0.3,0 9 | 5.0,3.4,1.5,0.2,0 10 | 4.4,2.9,1.4,0.2,0 11 | 4.9,3.1,1.5,0.1,0 12 | 5.4,3.7,1.5,0.2,0 13 | 4.8,3.4,1.6,0.2,0 14 | 4.8,3.0,1.4,0.1,0 15 | 4.3,3.0,1.1,0.1,0 16 | 5.8,4.0,1.2,0.2,0 17 | 5.7,4.4,1.5,0.4,0 18 | 5.4,3.9,1.3,0.4,0 19 | 5.1,3.5,1.4,0.3,0 20 | 5.7,3.8,1.7,0.3,0 21 | 5.1,3.8,1.5,0.3,0 22 | 5.4,3.4,1.7,0.2,0 23 | 5.1,3.7,1.5,0.4,0 24 | 4.6,3.6,1.0,0.2,0 25 | 5.1,3.3,1.7,0.5,0 26 | 4.8,3.4,1.9,0.2,0 27 | 5.0,3.0,1.6,0.2,0 28 | 5.0,3.4,1.6,0.4,0 29 | 5.2,3.5,1.5,0.2,0 30 | 5.2,3.4,1.4,0.2,0 31 | 4.7,3.2,1.6,0.2,0 32 | 4.8,3.1,1.6,0.2,0 33 | 5.4,3.4,1.5,0.4,0 34 | 5.2,4.1,1.5,0.1,0 35 | 5.5,4.2,1.4,0.2,0 36 | 4.9,3.1,1.5,0.1,0 37 | 5.0,3.2,1.2,0.2,0 38 | 5.5,3.5,1.3,0.2,0 39 | 4.9,3.1,1.5,0.1,0 40 | 4.4,3.0,1.3,0.2,0 41 | 5.1,3.4,1.5,0.2,0 42 | 5.0,3.5,1.3,0.3,0 43 | 4.5,2.3,1.3,0.3,0 44 | 4.4,3.2,1.3,0.2,0 45 | 5.0,3.5,1.6,0.6,0 46 | 5.1,3.8,1.9,0.4,0 47 | 4.8,3.0,1.4,0.3,0 48 | 5.1,3.8,1.6,0.2,0 49 | 4.6,3.2,1.4,0.2,0 50 | 5.3,3.7,1.5,0.2,0 51 | 5.0,3.3,1.4,0.2,0 52 | 7.0,3.2,4.7,1.4,1 53 | 6.4,3.2,4.5,1.5,1 54 | 6.9,3.1,4.9,1.5,1 55 | 5.5,2.3,4.0,1.3,1 56 | 6.5,2.8,4.6,1.5,1 57 | 5.7,2.8,4.5,1.3,1 58 | 6.3,3.3,4.7,1.6,1 59 | 4.9,2.4,3.3,1.0,1 60 | 6.6,2.9,4.6,1.3,1 61 | 5.2,2.7,3.9,1.4,1 62 | 5.0,2.0,3.5,1.0,1 63 | 5.9,3.0,4.2,1.5,1 64 | 6.0,2.2,4.0,1.0,1 65 | 6.1,2.9,4.7,1.4,1 66 | 5.6,2.9,3.6,1.3,1 67 | 6.7,3.1,4.4,1.4,1 68 | 5.6,3.0,4.5,1.5,1 69 | 5.8,2.7,4.1,1.0,1 70 | 6.2,2.2,4.5,1.5,1 71 | 5.6,2.5,3.9,1.1,1 72 | 5.9,3.2,4.8,1.8,1 73 | 6.1,2.8,4.0,1.3,1 74 | 6.3,2.5,4.9,1.5,1 75 | 6.1,2.8,4.7,1.2,1 76 | 6.4,2.9,4.3,1.3,1 77 | 6.6,3.0,4.4,1.4,1 78 | 6.8,2.8,4.8,1.4,1 79 | 6.7,3.0,5.0,1.7,1 80 | 6.0,2.9,4.5,1.5,1 81 | 5.7,2.6,3.5,1.0,1 82 | 5.5,2.4,3.8,1.1,1 83 | 5.5,2.4,3.7,1.0,1 84 | 5.8,2.7,3.9,1.2,1 85 | 6.0,2.7,5.1,1.6,1 86 | 5.4,3.0,4.5,1.5,1 87 | 6.0,3.4,4.5,1.6,1 88 | 6.7,3.1,4.7,1.5,1 89 | 6.3,2.3,4.4,1.3,1 90 | 5.6,3.0,4.1,1.3,1 91 | 5.5,2.5,4.0,1.3,1 92 | 5.5,2.6,4.4,1.2,1 93 | 6.1,3.0,4.6,1.4,1 94 | 5.8,2.6,4.0,1.2,1 95 | 5.0,2.3,3.3,1.0,1 96 | 5.6,2.7,4.2,1.3,1 97 | 5.7,3.0,4.2,1.2,1 98 | 5.7,2.9,4.2,1.3,1 99 | 6.2,2.9,4.3,1.3,1 100 | 5.1,2.5,3.0,1.1,1 101 | 5.7,2.8,4.1,1.3,1 102 | -------------------------------------------------------------------------------- /tests/testthat/test_dataset.R: -------------------------------------------------------------------------------- 1 | library(mljar) 2 | context("Test API dataset") 3 | 4 | task <- "bin_class" 5 | pr <- create_project("ds", task, "some description") 6 | hid <- pr$hid 7 | 8 | 9 | test_that("test add_new_dataset", { 10 | file_from_resources <- "binary_part_iris_converted.csv" 11 | expect_error(add_new_dataset(hid, file_from_resources, "title"), NA) 12 | }) 13 | 14 | 15 | test_that("test get_datasets", { 16 | ds <- get_datasets(hid) 17 | expect_equal(length(get_datasets(hid)$datasets), 1) 18 | }) 19 | 20 | test_that("test get_dataset", { 21 | ds_hid <- get_datasets(hid)$datasets[[1]]$hid 22 | expect_equal(get_dataset(ds_hid)$dataset$hid, ds_hid) 23 | }) 24 | 25 | test_that("test .wait_till_all_datasets_are_valid", { 26 | expect_true(.wait_till_all_datasets_are_valid(hid)) 27 | }) 28 | 29 | test_that("test .accept_dataset_column_usage", { 30 | ds_hid <- get_datasets(hid)$datasets[[1]]$hid 31 | expect_true(.accept_dataset_column_usage(ds_hid)) 32 | }) 33 | 34 | test_that("test delete_dataset", { 35 | ds_hid <- get_datasets(hid)$datasets[[1]]$hid 36 | delete_dataset(ds_hid) 37 | expect_equal(length(get_datasets(hid)$datasets), 0) 38 | }) 39 | 40 | test_that( "test add_dataset_if_not_exists", { 41 | file_from_resources <- "binary_part_iris_converted.csv" 42 | expect_error(add_dataset_if_not_exists(hid, file_from_resources, "title-1"), 43 | NA) 44 | expect_warning(add_dataset_if_not_exists(hid, file_from_resources, "title-1"), 45 | "Dataset with the same name already exists: ") 46 | }) 47 | 48 | delete_project(hid) 49 | -------------------------------------------------------------------------------- /tests/testthat/test_dataupload.R: -------------------------------------------------------------------------------- 1 | library(mljar) 2 | context("Test API data upload") 3 | 4 | test_that("test data_upload", { 5 | task <- "Binary Classification" 6 | create_project('a', task, 'some description') 7 | gp <- get_projects() 8 | tmpfilepath <- tempfile() 9 | file.create(tmpfilepath) 10 | write.csv(c(1.0,2.0,1.1), file = tmpfilepath) 11 | project_hid <- gp$projects[[1]]$hid 12 | up <- upload_file(project_hid, tmpfilepath) 13 | expect_gt(nchar(up), 1) 14 | delete_project(project_hid) 15 | file.remove(tmpfilepath) 16 | }) 17 | -------------------------------------------------------------------------------- /tests/testthat/test_experiment.R: -------------------------------------------------------------------------------- 1 | library(mljar) 2 | context("Test experiment") 3 | 4 | task <- "bin_class" 5 | pr <- create_project('ds', task, 'some description') 6 | hid <- pr$hid 7 | 8 | pr_task <- pr$task 9 | file_from_resources <- "binary_part_iris_converted.csv" 10 | dataset1 <- add_dataset_if_not_exists(hid, file_from_resources, "test-exp1") 11 | 12 | validation_kfolds <- 5 13 | validation_shuffle <- TRUE 14 | validation_stratify <- TRUE 15 | validation_train_split <- NULL 16 | validation <- "5-fold CV, Shuffle, Stratify" 17 | algorithms <- c("xgb") 18 | metric <- "logloss" 19 | tuning_mode <- "Normal" 20 | time_constraint <- 1 21 | create_ensemble <- FALSE 22 | dataset_preproc <- {} 23 | 24 | test_that("test create_experiment", { 25 | params <- list( 26 | train_dataset = list(id = dataset1$dataset$hid, title = dataset1$dataset$title), 27 | algs = c(algorithms,""), 28 | preproc = dataset_preproc, 29 | single_limit = time_constraint, 30 | ensemble = create_ensemble, 31 | random_start_cnt = MLJAR_TUNING_MODES[[tuning_mode]][["random_start_cnt"]], 32 | hill_climbing_cnt = MLJAR_TUNING_MODES[[tuning_mode]][["hill_climbing_cnt"]] 33 | ) 34 | params <- jsonlite::toJSON(params, auto_unbox =TRUE) 35 | exp_data <- list( title = "exp-1", 36 | description = "", 37 | metric = metric, 38 | validation_scheme = validation, 39 | task = pr_task, 40 | compute_now = 1, 41 | parent_project = hid, 42 | params = params 43 | ) 44 | expect_error(create_experiment(exp_data), NA) 45 | 46 | }) 47 | 48 | test_that("test get_experiments", { 49 | ds <- get_experiments(hid) 50 | expect_equal(length(get_experiments(hid)$experiments), 1) 51 | }) 52 | 53 | test_that("test get_experiment", { 54 | ex_hid <- get_experiments(hid)$experiments[[1]]$hid 55 | expect_equal(get_experiment(ex_hid)$experiment$hid, ex_hid) 56 | }) 57 | 58 | test_that("test add_experiment_if_not_exists", { 59 | expect_error(add_experiment_if_not_exists(hid, dataset1$dataset, NULL, "exp-2", 60 | pr_task, validation_kfolds, validation_shuffle, 61 | validation_stratify, validation_train_split, algorithms, metric, 62 | tuning_mode, time_constraint, create_ensemble), NA) 63 | 64 | expect_error(add_experiment_if_not_exists(hid, dataset1$dataset, NULL, "exp-2", 65 | pr_task, validation_kfolds, validation_shuffle, 66 | validation_stratify, validation_train_split, algorithms, metric, 67 | tuning_mode, time_constraint, create_ensemble), 68 | "Please rename your new experiment with new parameters setup.") 69 | }) 70 | 71 | delete_project(hid) 72 | -------------------------------------------------------------------------------- /tests/testthat/test_main.R: -------------------------------------------------------------------------------- 1 | library(mljar) 2 | context("Test main") 3 | 4 | file_from_resources <- "binary_part_iris_converted.csv" 5 | irisdata <- read.csv(file_from_resources) 6 | dx <- irisdata[-5] 7 | dy <- irisdata[5] 8 | 9 | irisdata2 <- irisdata[sample(nrow(irisdata)),] 10 | x.tr <- irisdata2[1:80,-5] 11 | y.tr <- irisdata2[1:80,5] 12 | x.vl <- irisdata2[81:100,-5] 13 | y.vl <- irisdata2[81:100,5] 14 | 15 | expname <- "fullexp1" 16 | 17 | test_that("test mljar_fit reactions to bad arguments",{ 18 | expect_error(mljar_fit(NULL, NULL, validx=NULL, validy=NULL, 19 | proj_title="fullproject1", exp_title="fullexp2", 20 | algorithms = c("xgb"), metric = "logloss"), 21 | "NULL data" 22 | ) 23 | }) 24 | 25 | test_that("test mljar_fit and mljar_predict integration test",{ 26 | bs <- mljar_fit(x.tr, y.tr, validx=x.vl, validy=y.vl, 27 | proj_title="fullproject2", exp_title=expname, 28 | algorithms = c("logreg"), metric = "logloss") 29 | expect_equal(bs$experiment, expname) 30 | expect_equal(bs$status, "Done") 31 | expect_error(predvals <- mljar_predict(bs, x.vl, "fullproject2"), NA) 32 | expect_equal(as.numeric(predvals > 0.5), y.vl) 33 | # test running predict with model id 34 | model_hid <- bs$hid 35 | expect_error(predvals <- mljar_predict(model_hid, x.vl, "fullproject2"), NA) 36 | expect_equal(as.numeric(predvals > 0.5), y.vl) 37 | }) 38 | 39 | test_that("test get_all_models integration test",{ 40 | expect_error(get_all_models("fullproject2", "x"), 41 | "MLJAR cannot find an experiment with such a title. Check and try again.") 42 | expect_error(get_all_models("f", "x"), 43 | "MLJAR cannot find a project with such a title. Check and try again.") 44 | df <- get_all_models("fullproject2", expname) 45 | expect_equal(colnames(df), c("hid", "model_type", "metric_value", 46 | "metric_type", "validation_scheme")) 47 | }) 48 | 49 | projects <- get_projects() 50 | delete_project(projects$projects[[1]]$hid) 51 | -------------------------------------------------------------------------------- /tests/testthat/test_project.R: -------------------------------------------------------------------------------- 1 | library(mljar) 2 | context("Test API projects") 3 | 4 | test_that("test get_projects", { 5 | gp <- get_projects() 6 | expect_equal(length(gp$projects), 0) 7 | }) 8 | 9 | test_that("test create_project and get_projects", { 10 | task <- "bin_class" 11 | pr_a <- create_project("a", task, "description a") 12 | expect_match(pr_a$title, "a") 13 | pr_a <- create_project("b", task, "description b") 14 | expect_match(pr_a$title, "b") 15 | gp <- get_projects() 16 | expect_equal(length(gp$projects), 2) 17 | }) 18 | 19 | test_that("test .verify_if_project_exists", { 20 | task <- "bin_class" 21 | .verify_if_project_exists 22 | expect_error(.verify_if_project_exists("a", task), 23 | "Project with the same title and task already exists, change name.") 24 | }) 25 | 26 | test_that("test delete_project and get_projects", { 27 | gp <- get_projects() 28 | # here we search for project named a 29 | for(i in 1:length(gp$projects)) { 30 | if (gp$projects[[i]]$title=="a"){ 31 | break 32 | } 33 | } 34 | hid <- gp$projects[[i]]$hid 35 | expect_match(delete_project(hid), "succesfully deleted!") 36 | gp <- get_projects() 37 | expect_equal(length(gp$projects), 1) 38 | }) 39 | 40 | test_that("test print_all_projects", { 41 | df <- print_all_projects() 42 | expect_equal(colnames(df), c("hid", "title", "task", "description")) 43 | }) 44 | 45 | test_that("test get_project, delete_project and get_projects", { 46 | gp <- get_projects() 47 | hid <- gp$projects[[1]]$hid 48 | pr <- get_project(hid) 49 | expect_equal(pr$project$title, "b") 50 | expect_match(delete_project(hid), "succesfully deleted!") 51 | gp <- get_projects() 52 | expect_equal(length(gp$projects), 0) 53 | }) 54 | -------------------------------------------------------------------------------- /tests/testthat/test_results.R: -------------------------------------------------------------------------------- 1 | library(mljar) 2 | context("Test experiment") 3 | 4 | pr_task <- "bin_class" 5 | pr_title <- "ds" 6 | pr <- create_project(pr_title, pr_task, 'some description') 7 | hid <- pr$hid 8 | 9 | file_from_resources <- "binary_part_iris_converted.csv" 10 | dataset1 <- add_dataset_if_not_exists(hid, file_from_resources, "test-exp1") 11 | 12 | validation_kfolds <- 5 13 | validation_shuffle <- TRUE 14 | validation_stratify <-TRUE 15 | validation_train_split <- NULL 16 | validation <- "5-fold CV, Shuffle, Stratify" 17 | algorithms <- c("logreg") 18 | metric <- "logloss" 19 | tuning_mode <- "Normal" 20 | time_constraint <- 1 21 | create_ensemble <- FALSE 22 | dataset_preproc <- {} 23 | 24 | exp <- add_experiment_if_not_exists(hid, dataset1$dataset, NULL, "exp", 25 | pr_task, validation_kfolds, validation_shuffle, 26 | validation_stratify, validation_train_split, algorithms, metric, 27 | tuning_mode, time_constraint, create_ensemble) 28 | test_that("test get_results", { 29 | wait <- 5 30 | for (i in 1:wait){ 31 | Sys.sleep(4) # wait till experiment is initiated 32 | exp_dd <- get_experiment(exp$hid) 33 | if (exp_dd$experiment$compute_now == 2) { 34 | r <- get_results(hid, exp$hid) 35 | expect_equal(length(r$results), 5) 36 | break 37 | } 38 | } 39 | }) 40 | 41 | test_that("test get_model for bad arguments", { 42 | expect_error(get_model("xasxasdasda", "a", "a"), 43 | "MLJAR cannot find a project with such a title. Check and try again.") 44 | }) 45 | 46 | test_that("test get_model for right arguments", { 47 | exp_dd <- get_experiment(exp$hid) 48 | if (exp_dd$experiment$compute_now == 2) { 49 | rs <- get_results(hid, exp_dd$experiment$hid) 50 | model <- get_model(pr_title, exp_dd$experiment$title, rs$results[[1]]$hid) 51 | expect_equal(model$hid, rs$results[[1]]$hid) 52 | } 53 | }) 54 | 55 | delete_project(hid) 56 | -------------------------------------------------------------------------------- /tests/testthat/test_utils.R: -------------------------------------------------------------------------------- 1 | library(mljar) 2 | context("Test utils") 3 | 4 | test_that("test .get_token", { 5 | tok <- .get_token() 6 | expect_type(tok, "character") 7 | }) 8 | 9 | test_that("test .get_json_from_get_query", { 10 | query <- paste0(MLAR_API_PATH, API_VERSION, "/projects") 11 | r <- .get_json_from_get_query(query) 12 | expect_equal(names(r), c("resp", "parsed")) 13 | }) 14 | 15 | test_that("test .check_response_status", { 16 | query <- paste0(MLAR_API_PATH, API_VERSION, "/projects") 17 | r <- .get_json_from_get_query(query) 18 | expect_error(.check_response_status(r$resp, 200), NA) 19 | expect_error(.check_response_status(r$resp, 222, "omg"), "omg") 20 | }) 21 | 22 | test_that("test .obtain_task", { 23 | expect_equal(.obtain_task(c(1,0,0,0)), "bin_class") 24 | expect_equal(.obtain_task(c(1,2,3)), "reg") 25 | }) 26 | 27 | test_that("test .data_check", { 28 | expect_error(.data_check(c(1,2,3), data.frame(a=c(1,2), b=c(2,1))), 29 | "Sorry, multiple outputs are not supported in MLJAR") 30 | expect_error(.data_check(as.data.frame(c(1,2,3)), data.frame(a=c(1,2))), 31 | "Sorry, there is a missmatch between X and y matrices shapes") 32 | expect_error(.data_check(as.data.frame(c(1,2)), data.frame(a=c(1,2))), 33 | NA) 34 | }) 35 | 36 | test_that("test .data_to_file", { 37 | tmpf <- .data_to_file(c(1,2)) 38 | expect_type(tmpf, "character") 39 | expect_equal(unlist(strsplit(tmpf,"[.]"))[[2]], "csv") 40 | }) 41 | --------------------------------------------------------------------------------