21 | IMAGE_NAME: ${{ github.repository }}
22 | BRANCH_TAG: ${{ github.ref == 'refs/heads/devel' && 'unbiased-dev' || 'latest' }}
23 |
24 |
25 | jobs:
26 | build:
27 |
28 | runs-on: ubuntu-latest
29 | permissions:
30 | contents: read
31 | packages: write
32 | # This is used to complete the identity challenge
33 | # with sigstore/fulcio when running outside of PRs.
34 | id-token: write
35 |
36 | steps:
37 | - name: Checkout repository
38 | uses: actions/checkout@v4
39 |
40 | # Install the cosign tool except on PR
41 | # https://github.com/sigstore/cosign-installer
42 | - name: Install cosign
43 | if: github.event_name != 'pull_request'
44 | uses: sigstore/cosign-installer@main
45 |
46 | # Set up BuildKit Docker container builder to be able to build
47 | # multi-platform images and export cache
48 | # https://github.com/docker/setup-buildx-action
49 | - name: Set up Docker Buildx
50 | uses: docker/setup-buildx-action@f95db51fddba0c2d1ec667646a06c2ce06100226 # v3.0.0
51 |
52 | # Login against a Docker registry except on PR
53 | # https://github.com/docker/login-action
54 | - name: Log into registry ${{ env.REGISTRY }}
55 | if: github.event_name != 'pull_request'
56 | uses: docker/login-action@343f7c4344506bcbf9b4de18042ae17996df046d # v3.0.0
57 | with:
58 | registry: ${{ env.REGISTRY }}
59 | username: ${{ github.actor }}
60 | password: ${{ secrets.GITHUB_TOKEN }}
61 |
62 | # Extract metadata (tags, labels) for Docker
63 | # https://github.com/docker/metadata-action
64 | - name: Extract Docker metadata
65 | id: meta
66 | uses: docker/metadata-action@96383f45573cb7f253c731d3b3ab81c87ef81934 # v5.0.0
67 | with:
68 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
69 |
70 | # Build and push Docker image with Buildx (don't push on PR)
71 | # https://github.com/docker/build-push-action
72 | - name: Build and push Docker image
73 | id: build-and-push
74 | uses: docker/build-push-action@0565240e2d4ab88bba5387d719585280857ece09 # v5.0.0
75 | with:
76 | context: .
77 | push: ${{ github.event_name != 'pull_request' }}
78 | tags: ${{ steps.meta.outputs.tags }}
79 | labels: ${{ steps.meta.outputs.labels }}
80 | cache-from: type=gha
81 | cache-to: type=gha,mode=max
82 | provenance: false
83 | sbom: false
84 | build-args: github_sha=${{ github.sha }}
85 |
86 | # Sign the resulting Docker image digest except on PRs.
87 | # This will only write to the public Rekor transparency log when the Docker
88 | # repository is public to avoid leaking data. If you would like to publish
89 | # transparency data even for private images, pass --force to cosign below.
90 | # https://github.com/sigstore/cosign
91 | - name: Sign the published Docker image
92 | if: ${{ github.event_name != 'pull_request' }}
93 | env:
94 | # https://docs.github.com/en/actions/security-guides/security-hardening-for-github-actions#using-an-intermediate-environment-variable
95 | TAGS: ${{ steps.meta.outputs.tags }}
96 | DIGEST: ${{ steps.build-and-push.outputs.digest }}
97 | # This step uses the identity token to provision an ephemeral certificate
98 | # against the sigstore community Fulcio instance.
99 | run: echo "${TAGS}" | xargs -I {} cosign sign --yes {}@${DIGEST}
100 |
--------------------------------------------------------------------------------
/R/error-handling.R:
--------------------------------------------------------------------------------
1 | # hack to make sure we can mock the globalCallingHandlers
2 | # this method needs to be present in the package environment for mocking to work
3 | # linter disabled intentionally since this is internal method and cannot be renamed
4 | globalCallingHandlers <- NULL # nolint
5 |
6 | #' setup_sentry function
7 | #'
8 | #' This function is used to configure Sentry, a service for real-time error tracking.
9 | #' It uses the sentryR package to set up Sentry based on environment variables.
10 | #'
11 | #' @param None
12 | #' @noRd
13 | #'
14 | #' @return None. If the SENTRY_DSN environment variable is not set, the function will
15 | #' return a message and stop execution.
16 | #'
17 | #' @examples
18 | #' setup_sentry()
19 | #'
20 | #' @details
21 | #' The function first checks if the SENTRY_DSN environment variable is set. If not, it
22 | #' returns a message and stops execution.
23 | #' If SENTRY_DSN is set, it uses the sentryR::configure_sentry function to set up Sentry with
24 | #' the following parameters:
25 | #' - dsn: The Data Source Name (DSN) is retrieved from the SENTRY_DSN environment variable.
26 | #' - app_name: The application name is set to "unbiased".
27 | #' - app_version: The application version is retrieved from the GITHUB_SHA environment variable.
28 | #' If not set, it defaults to "unspecified".
29 | #' - environment: The environment is retrieved from the SENTRY_ENVIRONMENT environment variable.
30 | #' If not set, it defaults to "development".
31 | #' - release: The release is retrieved from the SENTRY_RELEASE environment variable.
32 | #' If not set, it defaults to "unspecified".
33 | #'
34 | #' @seealso \url{https://docs.sentry.io/}
35 | setup_sentry <- function() {
36 | sentry_dsn <- Sys.getenv("SENTRY_DSN")
37 | if (sentry_dsn == "") {
38 | message("SENTRY_DSN not set, skipping Sentry setup")
39 | return()
40 | }
41 |
42 | sentryR::configure_sentry(
43 | dsn = sentry_dsn,
44 | app_name = "unbiased",
45 | app_version = Sys.getenv("GITHUB_SHA", "unspecified"),
46 | environment = Sys.getenv("SENTRY_ENVIRONMENT", "development"),
47 | release = Sys.getenv("SENTRY_RELEASE", "unspecified")
48 | )
49 |
50 | globalCallingHandlers(
51 | error = global_calling_handler
52 | )
53 | }
54 |
55 | global_calling_handler <- function(error) {
56 | error$function_calls <- sys.calls()
57 | sentryR::capture_exception(error)
58 | signalCondition(error)
59 | }
60 |
61 | wrap_endpoint <- function(z) {
62 | f <- function(...) {
63 | return(withCallingHandlers(z(...), error = rlang::entrace))
64 | }
65 | return(f)
66 | }
67 |
68 | setup_invalid_json_handler <- function(api) {
69 | api |>
70 | plumber::pr_filter("validate_input_json", \(req, res) {
71 | if (length(req$bodyRaw) > 0) {
72 | request_body <- req$bodyRaw |> rawToChar()
73 | e <- tryCatch(
74 | {
75 | jsonlite::fromJSON(request_body)
76 | NULL
77 | },
78 | error = \(e) e
79 | )
80 | if (!is.null(e)) {
81 | print(glue::glue("Invalid JSON; requested endpoint: {req$PATH_INFO}"))
82 | audit_log_set_event_type("malformed_request", req)
83 | res$status <- 400
84 | return(list(
85 | error = jsonlite::unbox("Invalid JSON"),
86 | details = e$message |> strsplit("\n") |> unlist()
87 | ))
88 | }
89 | }
90 |
91 | plumber::forward()
92 | })
93 | }
94 |
95 | # nocov start
96 | default_error_handler <- function(req, res, error) {
97 | print(error, simplify = "branch")
98 |
99 | if (sentryR::is_sentry_configured()) {
100 | if ("trace" %in% names(error)) {
101 | error$function_calls <- error$trace$call
102 | } else if (!("function_calls" %in% names(error))) {
103 | error$function_calls <- sys.calls()
104 | }
105 |
106 | sentryR::capture_exception(error)
107 | }
108 |
109 | res$status <- 500
110 |
111 | list(
112 | error = "500 - Internal server error"
113 | )
114 | }
115 | # nocov end
116 |
117 | with_err_handler <- function(expr) {
118 | withCallingHandlers(
119 | expr = expr,
120 | error = rlang::entrace, bottom = rlang::caller_env()
121 | )
122 | }
123 |
--------------------------------------------------------------------------------
/man/randomize_minimisation_pocock.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/randomize-minimisation-pocock.R
3 | \name{randomize_minimisation_pocock}
4 | \alias{randomize_minimisation_pocock}
5 | \title{Patient Randomization Using Minimization Method}
6 | \usage{
7 | randomize_minimisation_pocock(
8 | arms,
9 | current_state,
10 | weights,
11 | ratio,
12 | method = "var",
13 | p = 0.85
14 | )
15 | }
16 | \arguments{
17 | \item{arms}{\code{character()}\cr
18 | Arm names.}
19 |
20 | \item{current_state}{\code{tibble()}\cr
21 | table of covariates and current arm assignments in column \code{arm},
22 | last row contains the new patient with empty string for \code{arm}}
23 |
24 | \item{weights}{\code{numeric()}\cr
25 | vector of positive weights, equal in length to number of covariates,
26 | numbered after covariates, defaults to equal weights}
27 |
28 | \item{ratio}{\code{integer()}\cr
29 | Vector of positive integers (0 is allowed), equal in length to number
30 | of arms, named after arms, defaults to equal weight}
31 |
32 | \item{method}{\code{character()}\cr
33 | Function used to compute within-arm variability, must be one of:
34 | \code{sd}, \code{var}, \code{range}, defaults to \code{var}}
35 |
36 | \item{p}{\code{numeric()}\cr
37 | single value, proportion of randomness (0, 1) in the randomization
38 | vs determinism, defaults to 85\% deterministic}
39 | }
40 | \value{
41 | \code{character()}\cr
42 | name of the arm assigned to the patient
43 | }
44 | \description{
45 | \loadmathjax
46 | The \code{randomize_dynamic} function implements the dynamic randomization
47 | algorithm using the minimization method proposed by Pocock (Pocock and Simon,
48 | 1975). It requires defining basic study parameters: the number of arms (K),
49 | number of covariates (C), patient allocation ratios (\(a_{k}\))
50 | (where k = 1,2,…., K), weights for the covariates (\(w_{i}\))
51 | (where i = 1,2,…., C), and the maximum probability (p) of assigning a patient
52 | to the group with the smallest total unbalance multiplied by
53 | the respective weights (\(G_{k}\)). As the total unbalance for the first
54 | patient is the same regardless of the assigned arm, this patient is randomly
55 | allocated to a given arm. Subsequent patients are randomized based on the
56 | calculation of the unbalance depending on the selected method: "range",
57 | "var" (variance), or "sd" (standard deviation). In the case of two arms,
58 | the "range" method is equivalent to the "sd" method.
59 | }
60 | \details{
61 | Initially, the algorithm creates a matrix of results comparing a newly
62 | randomized patient with the current balance of patients based on the defined
63 | covariates. In the next step, for each arm and specified covariate,
64 | various scenarios of patient allocation are calculated. The existing results
65 | (n) are updated with the new patient, and then, considering the ratio
66 | coefficients, the results are divided by the specific allocation ratio.
67 | Depending on the method, the total unbalance is then calculated,
68 | taking into account the weights, and the number of covariates using one
69 | of three methods (“sd”, “range”, “var”).
70 | Based on the number of defined arms, the minimum value of (\(G_{k}\))
71 | (defined as the weighted sum of the level-based imbalance) selects the arm to
72 | which the patient will be assigned with a predefined probability (p). The
73 | probability that a patient will be assigned to any other arm will then be
74 | equal (1-p)/(K-1)
75 | for each of the remaining arms.
76 | }
77 | \note{
78 | This function's implementation is a refactored adaptation
79 | of the codebase from the 'Minirand' package.
80 | }
81 | \examples{
82 | n_at_the_moment <- 10
83 | arms <- c("control", "active low", "active high")
84 | sex <- sample(c("F", "M"),
85 | n_at_the_moment + 1,
86 | replace = TRUE,
87 | prob = c(0.4, 0.6)
88 | )
89 | diabetes <-
90 | sample(c("diabetes", "no diabetes"),
91 | n_at_the_moment + 1,
92 | replace = TRUE,
93 | prob = c(0.2, 0.8)
94 | )
95 | arm <-
96 | sample(arms,
97 | n_at_the_moment,
98 | replace = TRUE,
99 | prob = c(0.4, 0.4, 0.2)
100 | ) |>
101 | c("")
102 | covar_df <- tibble::tibble(sex, diabetes, arm)
103 | covar_df
104 |
105 | randomize_minimisation_pocock(arms = arms, current_state = covar_df)
106 | randomize_minimisation_pocock(
107 | arms = arms, current_state = covar_df,
108 | ratio = c(
109 | "control" = 1,
110 | "active low" = 2,
111 | "active high" = 2
112 | ),
113 | weights = c(
114 | "sex" = 0.5,
115 | "diabetes" = 1
116 | )
117 | )
118 |
119 | }
120 | \references{
121 | Pocock, S. J., & Simon, R. (1975). Minimization: A new method
122 | of assigning patients to treatment and control groups in clinical trials.
123 |
124 | Minirand Package: Man Jin, Adam Polis, Jonathan Hartzel.
125 | (https://CRAN.R-project.org/package=Minirand)
126 | }
127 |
--------------------------------------------------------------------------------
/tests/testthat/test-E2E-get-study.R:
--------------------------------------------------------------------------------
1 | test_that("correct request to reads studies with the structure of the returned result", {
2 | source("./test-helpers.R")
3 | source("./audit-log-test-helpers.R")
4 |
5 | conn <- pool::localCheckout(
6 | get("db_connection_pool", envir = globalenv())
7 | )
8 | with_db_fixtures("fixtures/example_db.yml")
9 |
10 | # this endpoint should not be logged
11 | assert_audit_trail_for_test(c())
12 |
13 | response <- request(api_url) |>
14 | req_url_path("study", "") |>
15 | req_method("GET") |>
16 | req_perform()
17 |
18 | response_body <-
19 | response |>
20 | resp_body_json()
21 |
22 | testthat::expect_equal(response$status_code, 200)
23 |
24 | checkmate::expect_names(
25 | names(response_body[[1]]),
26 | identical.to = c("study_id", "identifier", "name", "method", "last_edited")
27 | )
28 |
29 | checkmate::expect_list(
30 | response_body[[1]],
31 | any.missing = TRUE,
32 | null.ok = FALSE,
33 | len = 5,
34 | type = c("numeric", "character", "character", "character", "character")
35 | )
36 |
37 | # Compliance of the number of tests
38 |
39 | n_studies <-
40 | dplyr::tbl(db_connection_pool, "study") |>
41 | collect() |>
42 | nrow()
43 |
44 | testthat::expect_equal(length(response_body), n_studies)
45 | })
46 |
47 | test_that("requests to reads records for chosen study_id with the structure of the returned result", {
48 | response <- request(api_url) |>
49 | req_url_path("study", "minimisation_pocock") |>
50 | req_method("POST") |>
51 | req_body_json(
52 | data = list(
53 | identifier = "ABC-X",
54 | name = "Study ABC-X",
55 | method = "var",
56 | p = 0.85,
57 | arms = list(
58 | "placebo" = 1,
59 | "active" = 1
60 | ),
61 | covariates = list(
62 | sex = list(
63 | weight = 1,
64 | levels = c("female", "male")
65 | ),
66 | weight = list(
67 | weight = 1,
68 | levels = c("up to 60kg", "61-80 kg", "81 kg or more")
69 | )
70 | )
71 | )
72 | ) |>
73 | req_perform()
74 |
75 | response_body <-
76 | response |>
77 | resp_body_json()
78 |
79 | response_study <-
80 | request(api_url) |>
81 | req_url_path("study", response_body$study$id) |>
82 | req_method("GET") |>
83 | req_perform()
84 |
85 | response_study_body <-
86 | response_study |>
87 | resp_body_json()
88 |
89 | testthat::expect_equal(response$status_code, 200)
90 |
91 | checkmate::expect_names(
92 | names(response_study_body),
93 | identical.to = c("study_id", "name", "randomization_method", "last_edited", "p", "method", "strata", "arms")
94 | )
95 |
96 | checkmate::expect_list(
97 | response_study_body,
98 | any.missing = TRUE,
99 | null.ok = TRUE,
100 | len = 8,
101 | type = c("numeric", "character", "character", "character", "numeric", "character", "list", "character")
102 | )
103 |
104 | response_study_id <-
105 | tryCatch(
106 | {
107 | request(api_url) |>
108 | req_url_path("study", response_body$study$id + 1) |>
109 | req_method("GET") |>
110 | req_perform()
111 | },
112 | error = function(e) e
113 | )
114 |
115 | testthat::expect_equal(response_study_id$status, 404)
116 | })
117 |
118 | test_that("correct request to reads randomization list with the structure of the returned result", {
119 | source("./test-helpers.R")
120 |
121 | conn <- pool::localCheckout(
122 | get("db_connection_pool", envir = globalenv())
123 | )
124 |
125 | with_db_fixtures("fixtures/example_db.yml")
126 |
127 | response <-
128 | request(api_url) |>
129 | req_url_path("/study/1/randomization_list") |>
130 | req_method("GET") |>
131 | req_perform()
132 |
133 | response_body <-
134 | response |>
135 | resp_body_json()
136 |
137 | testthat::expect_equal(response$status_code, 200)
138 |
139 | checkmate::expect_names(
140 | names(response_body[[1]]),
141 | identical.to = c("patient_id", "arm", "used", "sys_period")
142 | )
143 |
144 | checkmate::expect_set_equal(
145 | x = response_body |>
146 | dplyr::bind_rows() |>
147 | dplyr::pull(patient_id),
148 | y = c(1, 2, 3, 4)
149 | )
150 | })
151 |
152 | test_that("incorrect input study_id to reads randomization list", {
153 | source("./test-helpers.R")
154 |
155 | conn <- pool::localCheckout(
156 | get("db_connection_pool", envir = globalenv())
157 | )
158 | with_db_fixtures("fixtures/example_db.yml")
159 |
160 | response <-
161 | tryCatch(
162 | {
163 | request(api_url) |>
164 | req_url_path("study/100/randomization_list") |>
165 | req_method("GET") |>
166 | req_perform()
167 | },
168 | error = function(e) e
169 | )
170 |
171 | testthat::expect_equal(response$status, 404)
172 | })
173 |
--------------------------------------------------------------------------------
/R/db.R:
--------------------------------------------------------------------------------
1 | #' Defines methods for interacting with the study in the database
2 |
3 | #' Create a database connection pool
4 | #'
5 | #' This function creates a connection pool to a PostgreSQL database. It uses
6 | #' environment variables to get the necessary connection parameters. If the
7 | #' connection fails, it will retry up to 5 times with a delay of 2 seconds
8 | #' between each attempt.
9 | #'
10 | #' @return A pool object representing the connection pool to the database.
11 | #' @noRd
12 | #' @examples
13 | #' \dontrun{
14 | #' pool <- create_db_connection_pool()
15 | #' }
16 | create_db_connection_pool <- purrr::insistently(function() {
17 | dbname <- Sys.getenv("POSTGRES_DB")
18 | host <- Sys.getenv("POSTGRES_HOST")
19 | port <- Sys.getenv("POSTGRES_PORT", 5432)
20 | user <- Sys.getenv("POSTGRES_USER")
21 | password <- Sys.getenv("POSTGRES_PASSWORD")
22 | print(
23 | glue::glue("Creating database connection pool to {dbname} at {host}:{port} as {user}")
24 | )
25 | pool::dbPool(
26 | RPostgres::Postgres(),
27 | dbname = dbname,
28 | host = host,
29 | port = port,
30 | user = user,
31 | password = password
32 | )
33 | }, rate = purrr::rate_delay(1, max_times = 15), quiet = FALSE)
34 |
35 |
36 | get_similar_studies <- function(name, identifier) {
37 | db_connection_pool <- get("db_connection_pool")
38 | similar <-
39 | dplyr::tbl(db_connection_pool, "study") |>
40 | dplyr::select(id, name, identifier) |>
41 | dplyr::filter(name == !!name | identifier == !!identifier) |>
42 | dplyr::collect()
43 | similar
44 | }
45 |
46 | check_study_exist <- function(study_id) {
47 | db_connection_pool <- get("db_connection_pool")
48 | study_exists <- dplyr::tbl(db_connection_pool, "study") |>
49 | dplyr::filter(id == !!study_id) |>
50 | dplyr::collect() |>
51 | nrow() > 0
52 | study_exists
53 | }
54 |
55 | create_study <- function(
56 | name, identifier, method, parameters, arms, strata) {
57 | db_connection_pool <- get("db_connection_pool", envir = .GlobalEnv)
58 | connection <- pool::localCheckout(db_connection_pool)
59 |
60 | DBI::dbWithTransaction(
61 | connection,
62 | {
63 | study_record <- list(
64 | name = name,
65 | identifier = identifier,
66 | method = method,
67 | parameters = jsonlite::toJSON(parameters, auto_unbox = TRUE)
68 | |> as.character()
69 | )
70 |
71 | study <- DBI::dbGetQuery(
72 | connection,
73 | "INSERT INTO study (name, identifier, method, parameters)
74 | VALUES ($1, $2, $3, $4)
75 | RETURNING id, name, identifier, method, parameters",
76 | unname(study_record)
77 | )
78 |
79 | study <- as.list(study)
80 | study$parameters <- jsonlite::fromJSON(study$parameters)
81 |
82 | arm_records <- arms |>
83 | purrr::imap(\(x, name) list(name = name, ratio = x)) |>
84 | purrr::map(tibble::as_tibble) |>
85 | purrr::list_c()
86 | arm_records$study_id <- study$id
87 |
88 | DBI::dbWriteTable(
89 | connection,
90 | "arm",
91 | arm_records,
92 | append = TRUE,
93 | row.names = FALSE
94 | )
95 |
96 | created_arms <- DBI::dbGetQuery(
97 | connection,
98 | "SELECT id, study_id, name, ratio
99 | FROM arm
100 | WHERE study_id = $1",
101 | study$id
102 | )
103 |
104 | study$arms <- created_arms
105 |
106 | stratum_records <- strata |>
107 | purrr::imap(\(x, name) list(name = name, value_type = x$value_type)) |>
108 | purrr::map(tibble::as_tibble) |>
109 | purrr::list_c()
110 | stratum_records$study_id <- study$id
111 |
112 | DBI::dbWriteTable(
113 | connection,
114 | "stratum",
115 | stratum_records,
116 | append = TRUE,
117 | row.names = FALSE
118 | )
119 |
120 | created_strata <- DBI::dbGetQuery(
121 | connection,
122 | "SELECT id, study_id, name, value_type
123 | FROM stratum
124 | WHERE study_id = $1",
125 | study$id
126 | )
127 |
128 | factor_constraints <- strata |>
129 | purrr::imap(\(x, name) tibble::as_tibble(x)) |>
130 | purrr::list_c() |>
131 | dplyr::filter(.data$value_type == "factor") |>
132 | dplyr::select(name, levels) |>
133 | dplyr::left_join(created_strata, dplyr::join_by("name")) |>
134 | dplyr::select(id, levels) |>
135 | dplyr::rename(value = levels, stratum_id = id)
136 |
137 | DBI::dbWriteTable(
138 | connection,
139 | "factor_constraint",
140 | factor_constraints,
141 | append = TRUE,
142 | row.names = FALSE
143 | )
144 |
145 | list(study = study)
146 | }
147 | )
148 | }
149 |
150 | save_patient <- function(study_id, arm_id, used) {
151 | DBI::dbGetQuery(
152 | db_connection_pool,
153 | "INSERT INTO patient (arm_id, study_id, used)
154 | VALUES ($1, $2, $3)
155 | RETURNING id, arm_id, used",
156 | list(arm_id, study_id, used)
157 | )
158 | }
159 |
--------------------------------------------------------------------------------
/man/AuditLog.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/audit-trail.R
3 | \name{AuditLog}
4 | \alias{AuditLog}
5 | \title{AuditLog Class}
6 | \description{
7 | This class is used internally to store audit logs for each request.
8 | }
9 | \section{Methods}{
10 | \subsection{Public methods}{
11 | \itemize{
12 | \item \href{#method-AuditLog-new}{\code{AuditLog$new()}}
13 | \item \href{#method-AuditLog-disable}{\code{AuditLog$disable()}}
14 | \item \href{#method-AuditLog-is_enabled}{\code{AuditLog$is_enabled()}}
15 | \item \href{#method-AuditLog-set_request_body}{\code{AuditLog$set_request_body()}}
16 | \item \href{#method-AuditLog-set_response_body}{\code{AuditLog$set_response_body()}}
17 | \item \href{#method-AuditLog-set_ip_address}{\code{AuditLog$set_ip_address()}}
18 | \item \href{#method-AuditLog-set_user_agent}{\code{AuditLog$set_user_agent()}}
19 | \item \href{#method-AuditLog-set_event_type}{\code{AuditLog$set_event_type()}}
20 | \item \href{#method-AuditLog-set_study_id}{\code{AuditLog$set_study_id()}}
21 | \item \href{#method-AuditLog-set_response_code}{\code{AuditLog$set_response_code()}}
22 | \item \href{#method-AuditLog-validate_log}{\code{AuditLog$validate_log()}}
23 | \item \href{#method-AuditLog-persist}{\code{AuditLog$persist()}}
24 | \item \href{#method-AuditLog-clone}{\code{AuditLog$clone()}}
25 | }
26 | }
27 | \if{html}{\out{
}}
28 | \if{html}{\out{}}
29 | \if{latex}{\out{\hypertarget{method-AuditLog-new}{}}}
30 | \subsection{Method \code{new()}}{
31 | \subsection{Usage}{
32 | \if{html}{\out{}}\preformatted{AuditLog$new(request_method, endpoint_url)}\if{html}{\out{
}}
33 | }
34 |
35 | }
36 | \if{html}{\out{
}}
37 | \if{html}{\out{}}
38 | \if{latex}{\out{\hypertarget{method-AuditLog-disable}{}}}
39 | \subsection{Method \code{disable()}}{
40 | \subsection{Usage}{
41 | \if{html}{\out{}}\preformatted{AuditLog$disable()}\if{html}{\out{
}}
42 | }
43 |
44 | }
45 | \if{html}{\out{
}}
46 | \if{html}{\out{}}
47 | \if{latex}{\out{\hypertarget{method-AuditLog-is_enabled}{}}}
48 | \subsection{Method \code{is_enabled()}}{
49 | \subsection{Usage}{
50 | \if{html}{\out{}}\preformatted{AuditLog$is_enabled()}\if{html}{\out{
}}
51 | }
52 |
53 | }
54 | \if{html}{\out{
}}
55 | \if{html}{\out{}}
56 | \if{latex}{\out{\hypertarget{method-AuditLog-set_request_body}{}}}
57 | \subsection{Method \code{set_request_body()}}{
58 | \subsection{Usage}{
59 | \if{html}{\out{}}\preformatted{AuditLog$set_request_body(request_body)}\if{html}{\out{
}}
60 | }
61 |
62 | }
63 | \if{html}{\out{
}}
64 | \if{html}{\out{}}
65 | \if{latex}{\out{\hypertarget{method-AuditLog-set_response_body}{}}}
66 | \subsection{Method \code{set_response_body()}}{
67 | \subsection{Usage}{
68 | \if{html}{\out{}}\preformatted{AuditLog$set_response_body(response_body)}\if{html}{\out{
}}
69 | }
70 |
71 | }
72 | \if{html}{\out{
}}
73 | \if{html}{\out{}}
74 | \if{latex}{\out{\hypertarget{method-AuditLog-set_ip_address}{}}}
75 | \subsection{Method \code{set_ip_address()}}{
76 | \subsection{Usage}{
77 | \if{html}{\out{}}\preformatted{AuditLog$set_ip_address(ip_address)}\if{html}{\out{
}}
78 | }
79 |
80 | }
81 | \if{html}{\out{
}}
82 | \if{html}{\out{}}
83 | \if{latex}{\out{\hypertarget{method-AuditLog-set_user_agent}{}}}
84 | \subsection{Method \code{set_user_agent()}}{
85 | \subsection{Usage}{
86 | \if{html}{\out{}}\preformatted{AuditLog$set_user_agent(user_agent)}\if{html}{\out{
}}
87 | }
88 |
89 | }
90 | \if{html}{\out{
}}
91 | \if{html}{\out{}}
92 | \if{latex}{\out{\hypertarget{method-AuditLog-set_event_type}{}}}
93 | \subsection{Method \code{set_event_type()}}{
94 | \subsection{Usage}{
95 | \if{html}{\out{}}\preformatted{AuditLog$set_event_type(event_type)}\if{html}{\out{
}}
96 | }
97 |
98 | }
99 | \if{html}{\out{
}}
100 | \if{html}{\out{}}
101 | \if{latex}{\out{\hypertarget{method-AuditLog-set_study_id}{}}}
102 | \subsection{Method \code{set_study_id()}}{
103 | \subsection{Usage}{
104 | \if{html}{\out{}}\preformatted{AuditLog$set_study_id(study_id)}\if{html}{\out{
}}
105 | }
106 |
107 | }
108 | \if{html}{\out{
}}
109 | \if{html}{\out{}}
110 | \if{latex}{\out{\hypertarget{method-AuditLog-set_response_code}{}}}
111 | \subsection{Method \code{set_response_code()}}{
112 | \subsection{Usage}{
113 | \if{html}{\out{}}\preformatted{AuditLog$set_response_code(response_code)}\if{html}{\out{
}}
114 | }
115 |
116 | }
117 | \if{html}{\out{
}}
118 | \if{html}{\out{}}
119 | \if{latex}{\out{\hypertarget{method-AuditLog-validate_log}{}}}
120 | \subsection{Method \code{validate_log()}}{
121 | \subsection{Usage}{
122 | \if{html}{\out{}}\preformatted{AuditLog$validate_log()}\if{html}{\out{
}}
123 | }
124 |
125 | }
126 | \if{html}{\out{
}}
127 | \if{html}{\out{}}
128 | \if{latex}{\out{\hypertarget{method-AuditLog-persist}{}}}
129 | \subsection{Method \code{persist()}}{
130 | \subsection{Usage}{
131 | \if{html}{\out{}}\preformatted{AuditLog$persist()}\if{html}{\out{
}}
132 | }
133 |
134 | }
135 | \if{html}{\out{
}}
136 | \if{html}{\out{}}
137 | \if{latex}{\out{\hypertarget{method-AuditLog-clone}{}}}
138 | \subsection{Method \code{clone()}}{
139 | The objects of this class are cloneable with this method.
140 | \subsection{Usage}{
141 | \if{html}{\out{}}\preformatted{AuditLog$clone(deep = FALSE)}\if{html}{\out{
}}
142 | }
143 |
144 | \subsection{Arguments}{
145 | \if{html}{\out{}}
146 | \describe{
147 | \item{\code{deep}}{Whether to make a deep clone.}
148 | }
149 | \if{html}{\out{
}}
150 | }
151 | }
152 | }
153 |
--------------------------------------------------------------------------------
/tests/testthat/test-DB-study.R:
--------------------------------------------------------------------------------
1 | source("./test-helpers.R")
2 |
3 | pool <- get("db_connection_pool", envir = globalenv())
4 |
5 | test_that("it is enough to provide a name, an identifier, and a method id", {
6 | conn <- pool::localCheckout(pool)
7 | with_db_fixtures("fixtures/example_db.yml")
8 | testthat::expect_no_error({
9 | dplyr::tbl(conn, "study") |>
10 | dplyr::rows_append(
11 | tibble::tibble(
12 | identifier = "FINE",
13 | name = "Correctly working study",
14 | method = "minimisation_pocock"
15 | ),
16 | copy = TRUE, in_place = TRUE
17 | )
18 | })
19 | })
20 |
21 | # first study id is 1
22 | new_study_id <- as.integer(1)
23 |
24 | test_that("deleting archivizes a study", {
25 | conn <- pool::localCheckout(pool)
26 | with_db_fixtures("fixtures/example_db.yml")
27 | testthat::expect_no_error({
28 | dplyr::tbl(conn, "study") |>
29 | dplyr::rows_delete(
30 | tibble::tibble(id = new_study_id),
31 | copy = TRUE, in_place = TRUE, unmatched = "ignore"
32 | )
33 | })
34 |
35 | testthat::expect_identical(
36 | dplyr::tbl(conn, "study_history") |>
37 | dplyr::filter(id == new_study_id) |>
38 | dplyr::select(-parameters, -sys_period, -timestamp) |>
39 | dplyr::collect(),
40 | tibble::tibble(
41 | id = new_study_id,
42 | identifier = "TEST",
43 | name = "Test Study",
44 | method = "minimisation_pocock"
45 | )
46 | )
47 | })
48 |
49 | test_that("can't push arm with negative ratio", {
50 | conn <- pool::localCheckout(pool)
51 | with_db_fixtures("fixtures/example_db.yml")
52 | testthat::expect_error(
53 | {
54 | dplyr::tbl(conn, "arm") |>
55 | dplyr::rows_append(
56 | tibble::tibble(
57 | study_id = 1,
58 | name = "Exception-throwing arm",
59 | ratio = -1
60 | ),
61 | copy = TRUE, in_place = TRUE
62 | )
63 | },
64 | regexp = "violates check constraint"
65 | )
66 | })
67 |
68 | test_that("can't push stratum other than factor or numeric", {
69 | conn <- pool::localCheckout(pool)
70 | with_db_fixtures("fixtures/example_db.yml")
71 | testthat::expect_error(
72 | {
73 | tbl(conn, "stratum") |>
74 | rows_append(
75 | tibble(
76 | study_id = 1,
77 | name = "failing stratum",
78 | value_type = "array"
79 | ),
80 | copy = TRUE, in_place = TRUE
81 | )
82 | },
83 | regexp = "violates check constraint"
84 | )
85 | })
86 |
87 | test_that("can't push stratum level outside of defined levels", {
88 | conn <- pool::localCheckout(pool)
89 | with_db_fixtures("fixtures/example_db.yml")
90 | # create a new patient
91 | return <-
92 | testthat::expect_no_error({
93 | dplyr::tbl(conn, "patient") |>
94 | dplyr::rows_append(
95 | tibble::tibble(
96 | study_id = 1,
97 | arm_id = 1,
98 | used = TRUE
99 | ),
100 | copy = TRUE, in_place = TRUE, returning = id
101 | ) |>
102 | dbplyr::get_returned_rows()
103 | })
104 |
105 | added_patient_id <- return$id
106 |
107 | testthat::expect_error(
108 | {
109 | dplyr::tbl(conn, "patient_stratum") |>
110 | dplyr::rows_append(
111 | tibble::tibble(
112 | patient_id = added_patient_id,
113 | stratum_id = 1,
114 | fct_value = "Female"
115 | ),
116 | copy = TRUE, in_place = TRUE
117 | )
118 | },
119 | regexp = "Factor value not specified as allowed"
120 | )
121 |
122 | # add legal value
123 | testthat::expect_no_error({
124 | dplyr::tbl(conn, "patient_stratum") |>
125 | dplyr::rows_append(
126 | tibble::tibble(
127 | patient_id = added_patient_id,
128 | stratum_id = 1,
129 | fct_value = "F"
130 | ),
131 | copy = TRUE, in_place = TRUE
132 | )
133 | })
134 | })
135 |
136 | test_that("numerical constraints are enforced", {
137 | conn <- pool::localCheckout(pool)
138 | with_db_fixtures("fixtures/example_db.yml")
139 | added_patient_id <- as.integer(1)
140 | return <-
141 | testthat::expect_no_error({
142 | dplyr::tbl(conn, "stratum") |>
143 | dplyr::rows_append(
144 | tibble::tibble(
145 | study_id = 1,
146 | name = "age",
147 | value_type = "numeric"
148 | ),
149 | copy = TRUE, in_place = TRUE, returning = id
150 | ) |>
151 | dbplyr::get_returned_rows()
152 | })
153 |
154 | added_stratum_id <- return$id
155 |
156 | testthat::expect_no_error({
157 | dplyr::tbl(conn, "numeric_constraint") |>
158 | dplyr::rows_append(
159 | tibble::tibble(
160 | stratum_id = added_stratum_id,
161 | min_value = 18,
162 | max_value = 64
163 | ),
164 | copy = TRUE, in_place = TRUE
165 | )
166 | })
167 |
168 | # and you can't add an illegal value
169 | testthat::expect_error(
170 | {
171 | dplyr::tbl(conn, "patient_stratum") |>
172 | dplyr::rows_append(
173 | tibble::tibble(
174 | patient_id = added_patient_id,
175 | stratum_id = added_stratum_id,
176 | num_value = 16
177 | ),
178 | copy = TRUE, in_place = TRUE
179 | )
180 | },
181 | regexp = "New value is lower than minimum"
182 | )
183 |
184 | # you can add valid value
185 | testthat::expect_no_error({
186 | dplyr::tbl(conn, "patient_stratum") |>
187 | dplyr::rows_append(
188 | dplyr::tibble(
189 | patient_id = added_patient_id,
190 | stratum_id = added_stratum_id,
191 | num_value = 23
192 | ),
193 | copy = TRUE, in_place = TRUE
194 | )
195 | })
196 |
197 | # but you cannot add two values for one patient one stratum
198 | testthat::expect_error(
199 | {
200 | dplyr::tbl(conn, "patient_stratum") |>
201 | dplyr::rows_append(
202 | tibble::tibble(
203 | patient_id = added_patient_id,
204 | stratum_id = added_stratum_id,
205 | num_value = 24
206 | ),
207 | copy = TRUE, in_place = TRUE
208 | )
209 | },
210 | regexp = "duplicate key value violates unique constraint"
211 | )
212 | })
213 |
--------------------------------------------------------------------------------
/R/audit-trail.R:
--------------------------------------------------------------------------------
1 | #' AuditLog Class
2 | #'
3 | #' This class is used internally to store audit logs for each request.
4 | AuditLog <- R6::R6Class( # nolint: object_name_linter.
5 | "AuditLog",
6 | public = list(
7 | initialize = function(request_method, endpoint_url) {
8 | private$request_id <- uuid::UUIDgenerate()
9 | private$request_method <- request_method
10 | private$endpoint_url <- endpoint_url
11 | },
12 | disable = function() {
13 | private$disabled <- TRUE
14 | },
15 | is_enabled = function() {
16 | !private$disabled
17 | },
18 | set_request_body = function(request_body) {
19 | if (typeof(request_body) == "list") {
20 | request_body <- jsonlite::toJSON(request_body, auto_unbox = TRUE) |> as.character()
21 | } else if (!is.character(request_body)) {
22 | request_body <- NA
23 | }
24 | private$request_body <- request_body
25 | },
26 | set_response_body = function(response_body) {
27 | checkmate::assert_false(
28 | typeof(response_body) == "list"
29 | )
30 | private$response_body <- response_body
31 | },
32 | set_ip_address = function(ip_address) {
33 | private$ip_address <- ip_address
34 | },
35 | set_user_agent = function(user_agent) {
36 | private$user_agent <- user_agent
37 | },
38 | set_event_type = function(event_type) {
39 | private$event_type <- event_type
40 | },
41 | set_study_id = function(study_id) {
42 | private$study_id <- study_id
43 | },
44 | set_response_code = function(response_code) {
45 | private$response_code <- response_code
46 | },
47 | validate_log = function() {
48 | checkmate::assert(
49 | !private$disabled
50 | )
51 | if (is.null(private$event_type)) {
52 | if (private$response_code == 404) {
53 | # "soft" validation failure for 404 errors
54 | # it might be just invalid endpoint
55 | # so we don't want to fail the request
56 | return(FALSE)
57 | } else {
58 | stop("Event type not set for audit log. Please set the event type using `audit_log_event_type`")
59 | }
60 | }
61 | return(TRUE)
62 | },
63 | persist = function() {
64 | checkmate::assert(
65 | !private$disabled
66 | )
67 | db_conn <- pool::localCheckout(db_connection_pool)
68 | values <- list(
69 | private$request_id,
70 | private$event_type,
71 | private$study_id,
72 | private$endpoint_url,
73 | private$request_method,
74 | private$request_body,
75 | private$response_code,
76 | private$response_body,
77 | private$ip_address,
78 | private$user_agent
79 | )
80 |
81 | values <- purrr::map(values, \(x) ifelse(is.null(x), NA, x))
82 |
83 | DBI::dbGetQuery(
84 | db_conn,
85 | "INSERT INTO audit_log (
86 | request_id,
87 | event_type,
88 | study_id,
89 | endpoint_url,
90 | request_method,
91 | request_body,
92 | response_code,
93 | response_body,
94 | ip_address,
95 | user_agent
96 | )
97 | VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)",
98 | values
99 | )
100 | }
101 | ),
102 | private = list(
103 | disabled = FALSE,
104 | request_id = NULL,
105 | event_type = NULL,
106 | study_id = NULL,
107 | endpoint_url = NULL,
108 | request_method = NULL,
109 | response_code = NULL,
110 | request_body = NULL,
111 | response_body = NULL,
112 | ip_address = NULL,
113 | user_agent = NULL
114 | )
115 | )
116 |
117 |
118 | #' Set up audit trail
119 | #'
120 | #' This function sets up an audit trail for a given process. It uses plumber's hooks to log
121 | #' information before routing (preroute) and after serializing the response (postserialize).
122 | #'
123 | #' This function modifies the plumber router in place and returns the updated router.
124 | #'
125 | #' @param pr A plumber router for which the audit trail is to be set up.
126 | #' @param endpoints A list of regex patterns for which the audit trail should be enabled.
127 | #' @noRd
128 | #' @return Returns the updated plumber router with the audit trail hooks.
129 | #' @examples
130 | #' pr <- plumber::plumb("your-api-definition.R") |>
131 | #' setup_audit_trail()
132 | setup_audit_trail <- function(pr, endpoints = list()) {
133 | checkmate::assert_list(endpoints, types = "character")
134 | is_enabled_for_request <- function(req) {
135 | any(sapply(endpoints, \(endpoint) grepl(endpoint, req$PATH_INFO)))
136 | }
137 |
138 | hooks <- list(
139 | preroute = function(req, res) {
140 | with_err_handler({
141 | if (!is_enabled_for_request(req)) {
142 | return()
143 | }
144 | audit_log <- AuditLog$new(
145 | request_method = req$REQUEST_METHOD,
146 | endpoint_url = req$PATH_INFO
147 | )
148 | req$.internal.audit_log <- audit_log
149 | })
150 | },
151 | postserialize = function(req, res) {
152 | with_err_handler({
153 | audit_log <- req$.internal.audit_log
154 | if (is.null(audit_log) || !audit_log$is_enabled()) {
155 | return()
156 | }
157 | audit_log$set_response_code(res$status)
158 | audit_log$set_request_body(req$body)
159 | audit_log$set_response_body(res$body)
160 | audit_log$set_ip_address(req$REMOTE_ADDR)
161 | audit_log$set_user_agent(req$HTTP_USER_AGENT)
162 |
163 | log_valid <- audit_log$validate_log()
164 |
165 | if (log_valid) {
166 | audit_log$persist()
167 | }
168 | })
169 | }
170 | )
171 | pr |>
172 | plumber::pr_hooks(hooks)
173 | }
174 |
175 | #' Set Audit Log Event Type
176 | #'
177 | #' This function sets the event type for an audit log. It retrieves the audit log from the request's
178 | #' internal data, and then calls the audit log's set_event_type method with the provided event type.
179 | #'
180 | #' @param event_type The event type to be set for the audit log.
181 | #' @param req The request object, which should contain an audit log in its internal data.
182 | #' @return Returns nothing as it modifies the audit log in-place.
183 | audit_log_set_event_type <- function(event_type, req) {
184 | audit_log <- req$.internal.audit_log
185 | if (!is.null(audit_log)) {
186 | audit_log$set_event_type(event_type)
187 | }
188 | }
189 |
190 | #' Set Audit Log Study ID
191 | #'
192 | #' This function sets the study ID for an audit log. It retrieves the audit log from the request's
193 | #' internal data, and then calls the audit log's set_study_id method with the provided study ID.
194 | #'
195 | #' @param study_id The study ID to be set for the audit log.
196 | #' @param req The request object, which should contain an audit log in its internal data.
197 | #' @return Returns nothing as it modifies the audit log in-place.
198 | audit_log_set_study_id <- function(study_id, req) {
199 | checkmate::assert(!is.null(study_id) && is.numeric(study_id), "Study ID must be a number")
200 | audit_log <- req$.internal.audit_log
201 | if (!is.null(audit_log)) {
202 | audit_log$set_study_id(study_id)
203 | }
204 | }
205 |
206 | audit_log_disable_for_request <- function(req) {
207 | audit_log <- req$.internal.audit_log
208 | if (!is.null(audit_log)) {
209 | audit_log$disable()
210 | }
211 | }
212 |
--------------------------------------------------------------------------------
/tests/testthat/setup-testing-environment.R:
--------------------------------------------------------------------------------
1 | library(checkmate)
2 | library(dplyr)
3 | library(dbplyr)
4 | library(httr2)
5 |
6 | run_psql <- function(statement) {
7 | withr::local_envvar(
8 | PGPASSWORD = Sys.getenv("POSTGRES_PASSWORD")
9 | )
10 |
11 | # Construct the command
12 | command <- paste(
13 | "psql",
14 | "--host", shQuote(Sys.getenv("POSTGRES_HOST")),
15 | "--port", shQuote(Sys.getenv("POSTGRES_PORT")),
16 | "--username", shQuote(Sys.getenv("POSTGRES_USER")),
17 | "--dbname", shQuote(Sys.getenv("POSTGRES_DB")),
18 | "--command", shQuote(statement),
19 | sep = " "
20 | )
21 |
22 | system(command, intern = TRUE)
23 | }
24 |
25 | run_migrations <- function() {
26 | # Construct the connection string
27 | user <- Sys.getenv("POSTGRES_USER")
28 | password <- Sys.getenv("POSTGRES_PASSWORD")
29 | host <- Sys.getenv("POSTGRES_HOST")
30 | port <- Sys.getenv("POSTGRES_PORT", "5432")
31 | db <- Sys.getenv("POSTGRES_DB")
32 |
33 | print(
34 | glue::glue(
35 | "Running migrations on database {db} at {host}:{port}"
36 | )
37 | )
38 |
39 | migrations_path <- glue::glue(
40 | "{root_repo_directory}/inst/db/migrations"
41 | )
42 | if (!dir.exists(migrations_path)) {
43 | # If the migrations directory does not exist
44 | # we will assume that the package is installed
45 | # and inst directory content is copied to the root directory
46 | migrations_path <- glue::glue(
47 | "{root_repo_directory}/db/migrations"
48 | )
49 | }
50 |
51 | db_connection_string <-
52 | glue::glue(
53 | "postgres://{user}:{password}@{host}:{port}/{db}?sslmode=disable"
54 | )
55 | command <- "migrate"
56 | args <- c(
57 | "-database",
58 | db_connection_string,
59 | "-path",
60 | migrations_path,
61 | "up"
62 | )
63 |
64 | system2(command, args)
65 | }
66 |
67 | create_database <- function(db_name) {
68 | # make sure we are not creating the database that we are using for connection
69 | assert(
70 | db_name != Sys.getenv("POSTGRES_DB"),
71 | "Cannot create the database that is used for connection"
72 | )
73 | print(
74 | glue::glue(
75 | "Creating database {db_name}"
76 | )
77 | )
78 | run_psql(
79 | glue::glue(
80 | "CREATE DATABASE {db_name}"
81 | )
82 | )
83 | }
84 |
85 | drop_database <- function(db_name) {
86 | # make sure we are not dropping the database that we are using for connection
87 | assert(
88 | db_name != Sys.getenv("POSTGRES_DB"),
89 | "Cannot drop the database that is used for connection"
90 | )
91 | # first, terminate all connections to the database
92 | print(
93 | glue::glue(
94 | "Terminating all connections to the database {db_name}"
95 | )
96 | )
97 | run_psql(
98 | glue::glue(
99 | "SELECT pg_terminate_backend(pg_stat_activity.pid)
100 | FROM pg_stat_activity
101 | WHERE pg_stat_activity.datname = '{db_name}'
102 | AND pid <> pg_backend_pid();"
103 | )
104 | )
105 | print(
106 | glue::glue(
107 | "Dropping database {db_name}"
108 | )
109 | )
110 | run_psql(
111 | glue::glue(
112 | "DROP DATABASE {db_name}"
113 | )
114 | )
115 | }
116 |
117 | setup_test_db_connection_pool <- function(envir = parent.frame()) {
118 | # We will create a connection pool to the database
119 | # and store it in the global environment
120 | # so that we can use it in the tests
121 | # without having to pass it around
122 | db_connection_pool <- unbiased:::create_db_connection_pool()
123 | assign("db_connection_pool", db_connection_pool, envir = globalenv())
124 | withr::defer(
125 | {
126 | print("Closing database connection pool")
127 | db_connection_pool$close()
128 | assign("db_connection_pool", NULL, envir = globalenv())
129 | },
130 | envir = envir
131 | )
132 | }
133 |
134 | # Make sure to disable Sentry during testing
135 | withr::local_envvar(
136 | SENTRY_DSN = NULL
137 | )
138 |
139 | # We will always run the API on the localhost
140 | # and on a random port
141 | api_host <- "127.0.0.1"
142 | api_port <- httpuv::randomPort()
143 |
144 | api_url <- glue::glue("http://{api_host}:{api_port}")
145 | print(glue::glue("API URL: {api_url}"))
146 |
147 | # make sure we are in the root directory of the repository
148 | # this is necessary to run the database migrations
149 | # as well as to run the plumber API
150 | current_working_dir <- getwd()
151 | root_repo_directory <-
152 | glue::glue(current_working_dir, "/../../") |>
153 | normalizePath()
154 | setwd(root_repo_directory)
155 |
156 | # append __test suffix to the database name
157 | # we will use this as a convention to create a test database
158 | # we have to avoid messing with the original database
159 | db_name <- Sys.getenv("POSTGRES_DB")
160 | db_name_test <- glue::glue("{db_name}__test")
161 |
162 | # create the test database using connection with the original database
163 | create_database(db_name_test)
164 |
165 | # now that the database is created, we can set the environment variable
166 | # to the test database name
167 | # we will be working on the test database from now on
168 | withr::local_envvar(
169 | list(
170 | POSTGRES_DB = db_name_test
171 | )
172 | )
173 |
174 | # drop the test database upon exiting
175 | withr::defer(
176 | {
177 | # make sure db_name_test ends with __test before dropping it
178 | assert(
179 | stringr::str_detect(db_name_test, "__test$"),
180 | "db_name_test should end with __test"
181 | )
182 | setwd(root_repo_directory)
183 | drop_database(db_name_test)
184 | },
185 | teardown_env()
186 | )
187 |
188 | # run migrations
189 | exit_code <- run_migrations()
190 | if (exit_code != 0) {
191 | stop(
192 | glue::glue(
193 | "Failed to run database migrations",
194 | "exit code: {exit_code}"
195 | )
196 | )
197 | }
198 |
199 | # We will run the unbiased API in the background
200 | # and wait until it starts
201 | # We are setting the environment variables
202 | # so that the unbiased API will start an HTTP server
203 | # on the specified host and port without coliision
204 | # with the main API that might be running on the same machine
205 | withr::local_envvar(
206 | list(
207 | UNBIASED_HOST = api_host,
208 | UNBIASED_PORT = api_port
209 | )
210 | )
211 |
212 | # Mock GITHUB_SHA as valid sha if it is not set
213 | github_sha <- Sys.getenv(
214 | "GITHUB_SHA",
215 | "6e21b5b689cc9737ba0d24147ed4b634c7146a28"
216 | )
217 | if (github_sha == "") {
218 | github_sha <- "6e21b5b689cc9737ba0d24147ed4b634c7146a28"
219 | }
220 | withr::local_envvar(
221 | list(
222 | GITHUB_SHA = github_sha
223 | )
224 | )
225 |
226 | stdout_file <- withr::local_tempfile(
227 | fileext = ".log",
228 | .local_envir = teardown_env()
229 | )
230 |
231 | stderr_file <- withr::local_tempfile(
232 | fileext = ".log",
233 | .local_envir = teardown_env()
234 | )
235 |
236 | plumber_process <- callr::r_bg(
237 | \() {
238 | if (!requireNamespace("unbiased", quietly = TRUE)) {
239 | # There is no installed unbiased package
240 | # In that case, we will assume that we are running
241 | # on the development machine
242 | # and we will load the package using devtools
243 | print("Installing unbiased package using devtools")
244 | devtools::load_all()
245 | }
246 |
247 | unbiased:::run_unbiased()
248 | },
249 | supervise = TRUE,
250 | stdout = stdout_file,
251 | stderr = stderr_file,
252 | )
253 |
254 | withr::defer(
255 | {
256 | print("Server STDOUT:")
257 | lines <- readLines(stdout_file)
258 | writeLines(lines)
259 | print("Server STDERR:")
260 | lines <- readLines(stderr_file)
261 | writeLines(lines)
262 | print("Sending SIGINT to plumber process")
263 | plumber_process$interrupt()
264 |
265 | print("Waiting for plumber process to exit")
266 | plumber_process$wait()
267 | },
268 | teardown_env()
269 | )
270 |
271 | # go back to the original working directory
272 | # that is used by the testthat package
273 | setwd(current_working_dir)
274 |
275 | setup_test_db_connection_pool(envir = teardown_env())
276 |
277 | # Retry a request until the API starts
278 | print("Waiting for the API to start...")
279 | request(api_url) |>
280 | # Endpoint that should be always available
281 | req_url_path("meta", "sha") |>
282 | req_method("GET") |>
283 | req_retry(
284 | max_seconds = 30,
285 | backoff = \(x) 1
286 | ) |>
287 | req_perform()
288 | print("API started, running tests...")
289 |
--------------------------------------------------------------------------------
/vignettes/articles/references.bib:
--------------------------------------------------------------------------------
1 | % Encoding: UTF-8
2 |
3 | @article{lim2019randomization,
4 | title={Randomization in clinical studies},
5 | author={Lim, Chi-Yeon and In, Junyong},
6 | journal={Korean journal of anesthesiology},
7 | volume={72},
8 | number={3},
9 | pages={221--232},
10 | year={2019},
11 | publisher={Korean Society of Anesthesiologists}
12 | }
13 |
14 | @article{goldfeld2020simstudy,
15 | title = {simstudy: Illuminating research methods through data generation},
16 | author = {Keith Goldfeld and Jacob Wujciak-Jens},
17 | publisher = {The Open Journal},
18 | journal = {Journal of Open Source Software},
19 | year = {2020},
20 | volume = {5},
21 | number = {54},
22 | pages = {2763},
23 | url = {https://doi.org/10.21105/joss.02763},
24 | doi = {10.21105/joss.02763},
25 | }
26 |
27 | @article{mrozikiewicz2023allogenic,
28 | title={Allogenic Adipose-Derived Stem Cells in Diabetic Foot Ulcer Treatment: Clinical Effectiveness, Safety, Survival in the Wound Site, and Proteomic Impact},
29 | author={Mrozikiewicz-Rakowska, Beata and Szab{\l}owska-Gadomska, Ilona and Cysewski, Dominik and Rudzi{\'n}ski, Stefan and P{\l}oski, Rafa{\l} and Gasperowicz, Piotr and Konarzewska, Magdalena and Zieli{\'n}ski, Jakub and Mieczkowski, Mateusz and Sie{\'n}ko, Damian and others},
30 | journal={International Journal of Molecular Sciences},
31 | volume={24},
32 | number={2},
33 | pages={1472},
34 | year={2023},
35 | publisher={MDPI}
36 | }
37 |
38 | @article{pocock1975sequential,
39 | title={Sequential treatment assignment with balancing for prognostic factors in the controlled clinical trial},
40 | author={Pocock, Stuart J and Simon, Richard},
41 | journal={Biometrics},
42 | pages={103--115},
43 | year={1975},
44 | publisher={JSTOR}
45 | }
46 |
47 | @book{rosenberger2015randomization,
48 | title={Randomization in clinical trials: theory and practice},
49 | author={Rosenberger, William F and Lachin, John M},
50 | year={2015},
51 | publisher={John Wiley \& Sons}
52 | }
53 |
54 | @article{lee2021estimating,
55 | title={Estimating COVID-19 infection and severity risks in patients with chronic rhinosinusitis: a Korean nationwide cohort study},
56 | author={Lee, Seung Won and Kim, So Young and Moon, Sung Yong and Yang, Jee Myung and Ha, Eun Kyo and Jee, Hye Mi and Shin, Jae Il and Cho, Seong Ho and Yon, Dong Keon and Suh, Dong In},
57 | journal={The Journal of Allergy and Clinical Immunology: In Practice},
58 | volume={9},
59 | number={6},
60 | pages={2262--2271},
61 | year={2021},
62 | publisher={Elsevier}
63 | }
64 |
65 | @article{austin2009balance,
66 | title={Balance diagnostics for comparing the distribution of baseline covariates between treatment groups in propensity-score matched samples},
67 | author={Austin, Peter C},
68 | journal={Statistics in medicine},
69 | volume={28},
70 | number={25},
71 | pages={3083--3107},
72 | year={2009},
73 | publisher={Wiley Online Library}
74 | }
75 |
76 | @article{doah2021impact,
77 | title={The impact of primary tumor resection on survival in asymptomatic colorectal cancer patients with unresectable metastases},
78 | author={Doah, Ki Yoon and Shin, Ui Sup and Jeon, Byong Ho and Cho, Sang Sik and Moon, Sun Mi},
79 | journal={Annals of Coloproctology},
80 | volume={37},
81 | number={2},
82 | pages={94},
83 | year={2021},
84 | publisher={Korean Society of Coloproctology}
85 | }
86 |
87 | @article{brown2020novel,
88 | title={A novel approach for propensity score matching and stratification for multiple treatments: Application to an electronic health record--derived study},
89 | author={Brown, Derek W and DeSantis, Stacia M and Greene, Thomas J and Maroufy, Vahed and Yaseen, Ashraf and Wu, Hulin and Williams, George and Swartz, Michael D},
90 | journal={Statistics in medicine},
91 | volume={39},
92 | number={17},
93 | pages={2308--2323},
94 | year={2020},
95 | publisher={Wiley Online Library}
96 | }
97 |
98 | @article{nguyen2017double,
99 | title={Double-adjustment in propensity score matching analysis: choosing a threshold for considering residual imbalance},
100 | author={Nguyen, Tri-Long and Collins, Gary S and Spence, Jessica and Daur{\`e}s, Jean-Pierre and Devereaux, PJ and Landais, Paul and Le Manach, Yannick},
101 | journal={BMC medical research methodology},
102 | volume={17},
103 | pages={1--8},
104 | year={2017},
105 | publisher={Springer}
106 | }
107 |
108 | @article{sanchez2003effect,
109 | title={Effect-size indices for dichotomized outcomes in meta-analysis.},
110 | author={S{\'a}nchez-Meca, Julio and Mar{\'\i}n-Mart{\'\i}nez, Fulgencio and Chac{\'o}n-Moscoso, Salvador},
111 | journal={Psychological methods},
112 | volume={8},
113 | number={4},
114 | pages={448},
115 | year={2003},
116 | publisher={American Psychological Association}
117 | }
118 |
119 | @article{lee2022propensity,
120 | title={Propensity score matching for causal inference and reducing the confounding effects: statistical standard and guideline of Life Cycle Committee},
121 | author={Lee, Seung Won and Acharya, Krishna Prasad and others},
122 | journal={Life Cycle},
123 | volume={2},
124 | year={2022},
125 | publisher={Life Cycle}
126 | }
127 |
128 | @article{zhang2019balance,
129 | title={Balance diagnostics after propensity score matching},
130 | author={Zhang, Zhongheng and Kim, Hwa Jung and Lonjon, Guillaume and Zhu, Yibing and others},
131 | journal={Annals of translational medicine},
132 | volume={7},
133 | number={1},
134 | year={2019},
135 | publisher={AME Publications}
136 | }
137 |
138 | @Manual{truncnorm,
139 | title = {truncnorm: Truncated Normal Distribution},
140 | author = {Olaf Mersmann and Heike Trautmann and Detlef Steuer and Björn Bornkamp},
141 | year = {2023},
142 | note = {R package version 1.0-9},
143 | url = {https://github.com/olafmersmann/truncnorm},
144 | }
145 |
146 | @article{burkardt2014truncated,
147 | title={The truncated normal distribution},
148 | author={Burkardt, John},
149 | journal={Department of Scientific Computing Website, Florida State University},
150 | volume={1},
151 | pages={35},
152 | year={2014}
153 | }
154 |
155 | @Manual{tableone,
156 | title = {tableone: Create 'Table 1' to Describe Baseline Characteristics with or
157 | without Propensity Score Weights},
158 | author = {Kazuki Yoshida and Alexander Bartel},
159 | year = {2022},
160 | note = {R package version 0.13.2},
161 | url = {https://github.com/kaz-yos/tableone},
162 | }
163 | @article{randomizeR,
164 | title = {{randomizeR}: An {R} Package for the Assessment and Implementation of Randomization in Clinical Trials},
165 | author = {Diane Uschner and David Schindler and Ralf-Dieter Hilgers and Nicole Heussen},
166 | journal = {Journal of Statistical Software},
167 | year = {2018},
168 | volume = {85},
169 | number = {8},
170 | pages = {1--22},
171 | doi = {10.18637/jss.v085.i08},
172 | }
173 |
174 |
175 | @article{gtsummary,
176 | author = {Daniel D. Sjoberg and Karissa Whiting and Michael Curry and Jessica A. Lavery and Joseph Larmarange},
177 | title = {Reproducible Summary Tables with the gtsummary Package},
178 | journal = {{The R Journal}},
179 | year = {2021},
180 | url = {https://doi.org/10.32614/RJ-2021-053},
181 | doi = {10.32614/RJ-2021-053},
182 | volume = {13},
183 | issue = {1},
184 | pages = {570-580},
185 | }
186 |
187 | @article{berger2021roadmap,
188 | title={A roadmap to using randomization in clinical trials},
189 | author={Berger, Vance W and Bour, Louis Joseph and Carter, Kerstine and Chipman, Jonathan J and Everett, Colin C and Heussen, Nicole and Hewitt, Catherine and Hilgers, Ralf-Dieter and Luo, Yuqun Abigail and Renteria, Jone and others},
190 | journal={BMC Medical Research Methodology},
191 | volume={21},
192 | pages={1--24},
193 | year={2021},
194 | publisher={Springer}
195 | }
196 |
197 | @article{kang2008issues,
198 | title={Issues in outcomes research: an overview of randomization techniques for clinical trials},
199 | author={Kang, Minsoo and Ragan, Brian G and Park, Jae-Hyeon},
200 | journal={Journal of athletic training},
201 | volume={43},
202 | number={2},
203 | pages={215--221},
204 | year={2008},
205 | publisher={The National Athletic Trainers' Association, Inc c/o Hughston Sports~…}
206 | }
207 |
208 | @Manual{truncnorm,
209 | title = {truncnorm: Truncated Normal Distribution},
210 | author = {Olaf Mersmann and Heike Trautmann and Detlef Steuer and Björn Bornkamp},
211 | year = {2023},
212 | note = {R package version 1.0-9},
213 | url = {https://github.com/olafmersmann/truncnorm},
214 | }
215 |
216 | @Manual{unbiased,
217 | title = {unbiased: Diverse Randomization Algorithms for Clinical Trials},
218 | author = {Kamil Sijko and Kinga Sałata and Aleksandra Duda and Łukasz Wałejko},
219 | year = {2024},
220 | note = {R package version 1.0.2},
221 | url = {https://ttscience.github.io/unbiased/},
222 | }
223 |
--------------------------------------------------------------------------------
/inst/db/migrations/20240129082653_create_tables.up.sql:
--------------------------------------------------------------------------------
1 | CREATE TABLE study (
2 | id SERIAL PRIMARY KEY,
3 | identifier VARCHAR(12) NOT NULL,
4 | name VARCHAR(255) NOT NULL,
5 | method VARCHAR(255) NOT NULL,
6 | parameters JSONB,
7 | timestamp TIMESTAMPTZ NOT NULL DEFAULT now(),
8 | sys_period TSTZRANGE NOT NULL
9 | );
10 |
11 | COMMENT ON TABLE study IS 'Stores information about various studies conducted.';
12 | COMMENT ON COLUMN study.id IS 'An auto-incrementing primary key uniquely identifying each study.';
13 | COMMENT ON COLUMN study.identifier IS 'A unique, short textual identifier for the study (max 12 characters).';
14 | COMMENT ON COLUMN study.name IS 'Provides the full name or title of the study.';
15 | COMMENT ON COLUMN study.method IS 'A randomization method name.';
16 | COMMENT ON COLUMN study.parameters IS 'JSONB column to store parameters related to the study.';
17 | COMMENT ON COLUMN study.timestamp IS 'Timestamp of when the record was created, defaults to current time.';
18 | COMMENT ON COLUMN study.sys_period IS 'TSTZRANGE type used for temporal versioning to track the validity period of each record.';
19 |
20 | CREATE TABLE arm (
21 | id SERIAL PRIMARY KEY,
22 | study_id INT NOT NULL,
23 | name VARCHAR(255) NOT NULL,
24 | ratio INT NOT NULL DEFAULT 1,
25 | sys_period TSTZRANGE NOT NULL,
26 | CONSTRAINT arm_study
27 | FOREIGN KEY (study_id)
28 | REFERENCES study (id) ON DELETE CASCADE,
29 | CONSTRAINT uc_arm_study
30 | UNIQUE (id, study_id),
31 | CONSTRAINT ratio_positive
32 | CHECK (ratio > 0)
33 | );
34 |
35 | COMMENT ON TABLE arm IS 'Represents the treatment arms within each study.';
36 | COMMENT ON COLUMN arm.id IS 'An auto-incrementing primary key that uniquely identifies each arm.';
37 | COMMENT ON COLUMN arm.study_id IS 'A foreign key that links each arm to its corresponding study.';
38 | COMMENT ON COLUMN arm.name IS 'Provides a descriptive name for the treatment arm.';
39 | COMMENT ON COLUMN arm.ratio IS 'Specifies the proportion of patients allocated to this arm. It defaults to 1 and must always be positive.';
40 | COMMENT ON COLUMN arm.sys_period IS 'TSTZRANGE type used for temporal versioning to track the validity period of each record.';
41 |
42 | CREATE TABLE stratum (
43 | id SERIAL PRIMARY KEY,
44 | study_id INT NOT NULL,
45 | name VARCHAR(255) NOT NULL,
46 | value_type VARCHAR(12),
47 | sys_period TSTZRANGE NOT NULL,
48 | CONSTRAINT fk_study
49 | FOREIGN KEY (study_id)
50 | REFERENCES study (id) ON DELETE CASCADE,
51 | CONSTRAINT chk_value_type
52 | CHECK (value_type IN ('factor', 'numeric'))
53 | );
54 |
55 | COMMENT ON TABLE stratum IS 'Defines the strata for patient categorization within each study.';
56 |
57 | COMMENT ON COLUMN stratum.id IS 'An auto-incrementing primary key that uniquely identifies each stratum.';
58 | COMMENT ON COLUMN stratum.study_id IS 'A foreign key that links the stratum to a specific study.';
59 | COMMENT ON COLUMN stratum.name IS 'Provides a descriptive name for the stratum, such as a particular demographic or clinical characteristic.';
60 | COMMENT ON COLUMN stratum.value_type IS 'Indicates the type of value the stratum represents, limited to two types: ''factor'' or ''numeric''. ''factor'' represents categorical data, while ''numeric'' represents numerical data. This distinction is crucial as it informs the data validation logic applied in the system.';
61 | COMMENT ON COLUMN stratum.sys_period IS 'TSTZRANGE type used for temporal versioning to track the validity period of each record.';
62 |
63 | CREATE TABLE stratum_level (
64 | stratum_id INT NOT NULL,
65 | level VARCHAR(255) NOT NULL,
66 | CONSTRAINT fk_stratum_level
67 | FOREIGN KEY (stratum_id)
68 | REFERENCES stratum (id) ON DELETE CASCADE,
69 | CONSTRAINT uc_stratum_level
70 | UNIQUE (stratum_id, level)
71 | );
72 | COMMENT ON TABLE stratum_level IS 'Keeps allowed stratum factor levels.';
73 |
74 | COMMENT ON COLUMN stratum_level.stratum_id IS 'A foreign key that links the stratum level to a specific stratum.';
75 | COMMENT ON COLUMN stratum_level.level IS 'Level label, has to be unique within stratum.';
76 |
77 | CREATE TABLE factor_constraint (
78 | stratum_id INT NOT NULL,
79 | value VARCHAR(255) NOT NULL,
80 | sys_period TSTZRANGE NOT NULL,
81 | CONSTRAINT factor_stratum
82 | FOREIGN KEY (stratum_id)
83 | REFERENCES stratum (id) ON DELETE CASCADE,
84 | CONSTRAINT uc_stratum_value
85 | UNIQUE (stratum_id, value)
86 | );
87 |
88 | COMMENT ON TABLE factor_constraint IS 'Defines constraints for strata of the ''factor'' type in studies. This table stores allowable values for each factor stratum, ensuring data consistency and integrity.';
89 |
90 | COMMENT ON COLUMN factor_constraint.stratum_id IS 'A foreign key that links the constraint to a specific stratum in the ''stratum'' table.';
91 | COMMENT ON COLUMN factor_constraint.value IS 'Represents the specific allowable value for the factor stratum. This could be a categorical label like ''male'' or ''female'' for a gender stratum, for example.';
92 | COMMENT ON COLUMN factor_constraint.sys_period IS 'TSTZRANGE type used for temporal versioning to track the validity period of each record.';
93 |
94 | CREATE TABLE numeric_constraint (
95 | stratum_id INT NOT NULL,
96 | min_value FLOAT,
97 | max_value FLOAT,
98 | sys_period TSTZRANGE NOT NULL,
99 | CONSTRAINT numeric_stratum
100 | FOREIGN KEY (stratum_id)
101 | REFERENCES stratum (id) ON DELETE CASCADE,
102 | CONSTRAINT uc_stratum
103 | UNIQUE (stratum_id),
104 | CONSTRAINT chk_min_max
105 | -- NULL is ok in checks, no need to test for it
106 | CHECK (min_value <= max_value)
107 | );
108 |
109 | COMMENT ON TABLE numeric_constraint IS 'Specifies constraints for strata of the ''numeric'' type in studies. This table defines the permissible range (minimum and maximum values) for each numeric stratum.';
110 |
111 | COMMENT ON COLUMN numeric_constraint.stratum_id IS 'A foreign key that links the constraint to a specific numeric stratum in the ''stratum'' table.';
112 | COMMENT ON COLUMN numeric_constraint.min_value IS 'Defines the minimum allowable value for the stratum''s numeric values. Can be NULL, indicating that there is no lower bound.';
113 | COMMENT ON COLUMN numeric_constraint.max_value IS 'Defines the maximum allowable value for the stratum''s numeric values. Can be NULL, indicating that there is no upper bound.';
114 | COMMENT ON COLUMN numeric_constraint.sys_period IS 'TSTZRANGE type used for temporal versioning to track the validity period of each record.';
115 |
116 | CREATE TABLE patient (
117 | id SERIAL PRIMARY KEY,
118 | study_id INT NOT NULL,
119 | arm_id INT,
120 | used BOOLEAN NOT NULL DEFAULT false,
121 | -- timestamp TIMESTAMPTZ NOT NULL DEFAULT now(),
122 | sys_period TSTZRANGE NOT NULL,
123 | CONSTRAINT patient_arm_study
124 | FOREIGN KEY (arm_id, study_id)
125 | REFERENCES arm (id, study_id) ON DELETE CASCADE,
126 | CONSTRAINT used_with_arm
127 | CHECK (NOT used OR arm_id IS NOT NULL)
128 | );
129 |
130 |
131 | COMMENT ON TABLE patient IS 'Represents individual patients participating in the studies.';
132 | COMMENT ON COLUMN patient.id IS 'An auto-incrementing primary key that uniquely identifies each patient.';
133 | COMMENT ON COLUMN patient.study_id IS 'A foreign key linking the patient to a specific study.';
134 | COMMENT ON COLUMN patient.arm_id IS 'An optional foreign key that links the patient to a specific treatment arm within the study.';
135 | COMMENT ON COLUMN patient.used IS 'A boolean flag indicating the state of the patient in the randomization process.';
136 | COMMENT ON COLUMN patient.sys_period IS 'Type TSTZRANGE, used for temporal versioning to track the validity period of each record.';
137 | COMMENT ON CONSTRAINT patient_arm_study ON patient IS 'Ensures referential integrity between patients, studies, and arms. It also cascades deletions to maintain consistency when a study or arm is deleted.';
138 | COMMENT ON CONSTRAINT used_with_arm ON patient IS 'Ensures logical consistency by allowing ''used'' to be true only if the patient is assigned to an arm (i.e., ''arm_id'' is not NULL). This prevents scenarios where a patient is marked as used but not assigned to any treatment arm.';
139 |
140 |
141 | CREATE TABLE patient_stratum (
142 | patient_id INT NOT NULL,
143 | stratum_id INT NOT NULL,
144 | fct_value VARCHAR(255),
145 | num_value FLOAT,
146 | sys_period TSTZRANGE NOT NULL,
147 | CONSTRAINT fk_patient
148 | FOREIGN KEY (patient_id)
149 | REFERENCES patient (id) ON DELETE CASCADE,
150 | CONSTRAINT fk_stratum_2
151 | FOREIGN KEY (stratum_id)
152 | REFERENCES stratum (id) ON DELETE CASCADE,
153 | CONSTRAINT chk_value_exists
154 | -- Either factor or numeric value must be given
155 | CHECK (fct_value IS NOT NULL OR num_value IS NOT NULL),
156 | CONSTRAINT chk_one_value_only
157 | -- Can't give both factor and numeric value
158 | CHECK (fct_value IS NULL OR num_value IS NULL),
159 | CONSTRAINT uc_patient_stratum
160 | UNIQUE (patient_id, stratum_id)
161 | );
162 |
163 |
164 | COMMENT ON TABLE patient_stratum IS 'Associates patients with specific strata and records the corresponding stratum values.';
165 | COMMENT ON COLUMN patient_stratum.patient_id IS 'A foreign key that links to the ''patient'' table, identifying the patient.';
166 | COMMENT ON COLUMN patient_stratum.stratum_id IS 'A foreign key that links to the ''stratum'' table, identifying the stratum to which the patient belongs.';
167 | COMMENT ON COLUMN patient_stratum.fct_value IS 'Stores the categorical (factor) value for the patient in the corresponding stratum, if applicable.';
168 | COMMENT ON COLUMN patient_stratum.num_value IS 'Stores the numerical value for the patient in the corresponding stratum, if applicable.';
169 | COMMENT ON COLUMN patient_stratum.sys_period IS 'Type TSTZRANGE, used for temporal versioning to track the validity period of each record.';
170 | COMMENT ON CONSTRAINT fk_patient ON patient_stratum IS 'Links each patient-stratum pairing to the respective tables.';
171 | COMMENT ON CONSTRAINT fk_stratum_2 ON patient_stratum IS 'Links each patient-stratum pairing to the respective tables.';
172 | COMMENT ON CONSTRAINT chk_value_exists ON patient_stratum IS 'Ensures that either a factor or numeric value is provided for each record, aligning with the nature of the stratum.';
173 | COMMENT ON CONSTRAINT chk_one_value_only ON patient_stratum IS 'Ensures that each record has either a factor or a numeric value, but not both, maintaining the integrity of the data by ensuring it matches the stratum type (factor or numeric).';
174 | COMMENT ON CONSTRAINT uc_patient_stratum ON patient_stratum IS 'Ensures that each patient-stratum pairing is unique.';
175 |
--------------------------------------------------------------------------------
/R/randomize-minimisation-pocock.R:
--------------------------------------------------------------------------------
1 | #' Compare rows of two dataframes
2 | #'
3 | #' Takes dataframe all_patients (presumably with one row / patient) and
4 | #' compares it to all rows of new_patients (presumably already randomized
5 | #' patients)
6 | #'
7 | #' @param all_patients data.frame with all patients
8 | #' @param new_patients data.frame with new patient
9 | #' @noRd
10 | #'
11 | #' @return data.frame with columns as in all_patients and new_patients,
12 | #' filled with TRUE if there is match in covariate and FALSE if not
13 | compare_rows <- function(all_patients, new_patients) {
14 | # Find common column names
15 | common_cols <- intersect(names(all_patients), names(new_patients))
16 |
17 | # Compare each common column of A with B
18 | comparisons <- lapply(common_cols, function(col) {
19 | all_patients[[col]] == new_patients[[col]]
20 | })
21 |
22 | # Combine the comparisons into a new dataframe
23 | comparison_df <- data.frame(comparisons)
24 | names(comparison_df) <- common_cols
25 | tibble::as_tibble(comparison_df)
26 | }
27 |
28 |
29 |
30 | #' Patient Randomization Using Minimization Method
31 | #'
32 | #' \loadmathjax
33 | #' The `randomize_dynamic` function implements the dynamic randomization
34 | #' algorithm using the minimization method proposed by Pocock (Pocock and Simon,
35 | #' 1975). It requires defining basic study parameters: the number of arms (K),
36 | #' number of covariates (C), patient allocation ratios (\(a_{k}\))
37 | #' (where k = 1,2,…., K), weights for the covariates (\(w_{i}\))
38 | #' (where i = 1,2,…., C), and the maximum probability (p) of assigning a patient
39 | #' to the group with the smallest total unbalance multiplied by
40 | #' the respective weights (\(G_{k}\)). As the total unbalance for the first
41 | #' patient is the same regardless of the assigned arm, this patient is randomly
42 | #' allocated to a given arm. Subsequent patients are randomized based on the
43 | #' calculation of the unbalance depending on the selected method: "range",
44 | #' "var" (variance), or "sd" (standard deviation). In the case of two arms,
45 | #' the "range" method is equivalent to the "sd" method.
46 | #'
47 | #' Initially, the algorithm creates a matrix of results comparing a newly
48 | #' randomized patient with the current balance of patients based on the defined
49 | #' covariates. In the next step, for each arm and specified covariate,
50 | #' various scenarios of patient allocation are calculated. The existing results
51 | #' (n) are updated with the new patient, and then, considering the ratio
52 | #' coefficients, the results are divided by the specific allocation ratio.
53 | #' Depending on the method, the total unbalance is then calculated,
54 | #' taking into account the weights, and the number of covariates using one
55 | #' of three methods (“sd”, “range”, “var”).
56 | #' Based on the number of defined arms, the minimum value of (\(G_{k}\))
57 | #' (defined as the weighted sum of the level-based imbalance) selects the arm to
58 | #' which the patient will be assigned with a predefined probability (p). The
59 | #' probability that a patient will be assigned to any other arm will then be
60 | #' equal (1-p)/(K-1)
61 | #' for each of the remaining arms.
62 |
63 | #' @references Pocock, S. J., & Simon, R. (1975). Minimization: A new method
64 | #' of assigning patients to treatment and control groups in clinical trials.
65 | #' @references Minirand Package: Man Jin, Adam Polis, Jonathan Hartzel.
66 | #' (https://CRAN.R-project.org/package=Minirand)
67 | #' @note This function's implementation is a refactored adaptation
68 | #' of the codebase from the 'Minirand' package.
69 | #'
70 | #' @inheritParams randomize_simple
71 | #'
72 | #' @param current_state `tibble()`\cr
73 | #' table of covariates and current arm assignments in column `arm`,
74 | #' last row contains the new patient with empty string for `arm`
75 | #' @param weights `numeric()`\cr
76 | #' vector of positive weights, equal in length to number of covariates,
77 | #' numbered after covariates, defaults to equal weights
78 | #' @param method `character()`\cr
79 | #' Function used to compute within-arm variability, must be one of:
80 | #' `sd`, `var`, `range`, defaults to `var`
81 | #' @param p `numeric()`\cr
82 | #' single value, proportion of randomness (0, 1) in the randomization
83 | #' vs determinism, defaults to 85% deterministic
84 | #'
85 | #' @return `character()`\cr
86 | #' name of the arm assigned to the patient
87 | #' @examples
88 | #' n_at_the_moment <- 10
89 | #' arms <- c("control", "active low", "active high")
90 | #' sex <- sample(c("F", "M"),
91 | #' n_at_the_moment + 1,
92 | #' replace = TRUE,
93 | #' prob = c(0.4, 0.6)
94 | #' )
95 | #' diabetes <-
96 | #' sample(c("diabetes", "no diabetes"),
97 | #' n_at_the_moment + 1,
98 | #' replace = TRUE,
99 | #' prob = c(0.2, 0.8)
100 | #' )
101 | #' arm <-
102 | #' sample(arms,
103 | #' n_at_the_moment,
104 | #' replace = TRUE,
105 | #' prob = c(0.4, 0.4, 0.2)
106 | #' ) |>
107 | #' c("")
108 | #' covar_df <- tibble::tibble(sex, diabetes, arm)
109 | #' covar_df
110 | #'
111 | #' randomize_minimisation_pocock(arms = arms, current_state = covar_df)
112 | #' randomize_minimisation_pocock(
113 | #' arms = arms, current_state = covar_df,
114 | #' ratio = c(
115 | #' "control" = 1,
116 | #' "active low" = 2,
117 | #' "active high" = 2
118 | #' ),
119 | #' weights = c(
120 | #' "sex" = 0.5,
121 | #' "diabetes" = 1
122 | #' )
123 | #' )
124 | #'
125 | #' @export
126 | randomize_minimisation_pocock <-
127 | function(arms,
128 | current_state,
129 | weights,
130 | ratio,
131 | method = "var",
132 | p = 0.85) {
133 | # Assertions
134 | checkmate::assert_character(
135 | arms,
136 | min.len = 2,
137 | min.chars = 1,
138 | unique = TRUE
139 | )
140 |
141 | # Define a custom range function
142 | custom_range <- function(x) {
143 | max(x, na.rm = TRUE) - min(x, na.rm = TRUE)
144 | }
145 |
146 | supported_methods <- list(
147 | "range" = custom_range,
148 | "var" = var,
149 | "sd" = sd
150 | )
151 |
152 | checkmate::assert_choice(
153 | method,
154 | choices = names(supported_methods),
155 | )
156 | checkmate::assert_tibble(
157 | current_state,
158 | any.missing = FALSE,
159 | min.cols = 2,
160 | min.rows = 1,
161 | null.ok = FALSE
162 | )
163 | checkmate::assert_names(
164 | colnames(current_state),
165 | must.include = "arm"
166 | )
167 | checkmate::assert_character(
168 | current_state$arm[nrow(current_state)],
169 | max.chars = 0, .var.name = "Last value of 'arm'"
170 | )
171 |
172 | n_covariates <-
173 | (ncol(current_state) - 1)
174 | n_arms <-
175 | length(arms)
176 |
177 | checkmate::assert_subset(
178 | unique(current_state$arm),
179 | choices = c(arms, ""),
180 | .var.name = "'arm' variable in dataframe"
181 | )
182 | # Validate argument presence and revert to defaults if not provided
183 | if (rlang::is_missing(ratio)) {
184 | ratio <- rep(1L, n_arms)
185 | names(ratio) <- arms
186 | }
187 | if (rlang::is_missing(weights)) {
188 | weights <- rep(1 / n_covariates, n_covariates)
189 | names(weights) <-
190 | colnames(current_state)[colnames(current_state) != "arm"]
191 | }
192 |
193 | checkmate::assert_numeric(
194 | weights,
195 | any.missing = FALSE,
196 | len = n_covariates,
197 | null.ok = FALSE,
198 | lower = 0,
199 | finite = TRUE,
200 | all.missing = FALSE
201 | )
202 | checkmate::assert_names(
203 | names(weights),
204 | must.include =
205 | colnames(current_state)[colnames(current_state) != "arm"]
206 | )
207 | checkmate::assert_integerish(
208 | ratio,
209 | any.missing = FALSE,
210 | len = n_arms,
211 | null.ok = FALSE,
212 | lower = 0,
213 | all.missing = FALSE,
214 | names = "named"
215 | )
216 | checkmate::assert_names(
217 | names(ratio),
218 | must.include = arms
219 | )
220 | checkmate::assert_number(
221 | p,
222 | na.ok = FALSE,
223 | lower = 0,
224 | upper = 1,
225 | null.ok = FALSE
226 | )
227 |
228 | # Computations
229 | n_at_the_moment <- nrow(current_state) - 1
230 |
231 | if (n_at_the_moment == 0) {
232 | return(randomize_simple(arms, ratio))
233 | }
234 |
235 | arms_similarity <-
236 | # compare new subject to all old subjects
237 | compare_rows(
238 | current_state[-nrow(current_state), names(current_state) != "arm"],
239 | current_state[nrow(current_state), names(current_state) != "arm"]
240 | ) |>
241 | split(current_state$arm[1:n_at_the_moment]) |> # split by arm
242 | lapply(colSums) |> # and compute number of similarities in each arm
243 | dplyr::bind_rows(.id = "arm") |>
244 | # make sure that every arm has a metric, even if not present in data yet
245 | tidyr::complete(arm = arms) |>
246 | dplyr::mutate(dplyr::across(
247 | dplyr::where(is.numeric),
248 | ~ tidyr::replace_na(.x, 0)
249 | ))
250 |
251 | imbalance <- sapply(arms, function(x) {
252 | arms_similarity |>
253 | # compute scenario where each arm (x) gets new subject
254 | dplyr::mutate(dplyr::across(
255 | dplyr::where(is.numeric),
256 | ~ dplyr::if_else(arm == x, .x + 1, .x) /
257 | ratio[arm]
258 | )) |>
259 | # compute dispersion across each covariate
260 | dplyr::summarise(dplyr::across(
261 | dplyr::where(is.numeric),
262 | ~ supported_methods[[method]](.x)
263 | )) |>
264 | # multiply each covariate dispersion by covariate weight
265 | dplyr::mutate(dplyr::across(
266 | dplyr::everything(),
267 | ~ . * weights[dplyr::cur_column()]
268 | )) |>
269 | # sum all covariate outcomes
270 | dplyr::summarize(total = sum(dplyr::c_across(dplyr::everything()))) |>
271 | dplyr::pull("total")
272 | })
273 |
274 | high_prob_arms <- names(which(imbalance == min(imbalance)))
275 | low_prob_arms <- arms[!arms %in% high_prob_arms]
276 |
277 | if (length(high_prob_arms) == n_arms) {
278 | return(randomize_simple(arms, ratio))
279 | }
280 |
281 | sample(
282 | c(high_prob_arms, low_prob_arms), 1,
283 | prob = c(
284 | rep(
285 | p / length(high_prob_arms),
286 | length(high_prob_arms)
287 | ),
288 | rep(
289 | (1 - p) / length(low_prob_arms),
290 | length(low_prob_arms)
291 | )
292 | )
293 | )
294 | }
295 |
--------------------------------------------------------------------------------
/tests/testthat/test-randomize-minimisation-pocock.R:
--------------------------------------------------------------------------------
1 | set.seed(seed = "345345")
2 | n_at_the_moment <- 10
3 | arms <- c("control", "active low", "active high")
4 | sex <- sample(c("F", "M"),
5 | n_at_the_moment + 1,
6 | replace = TRUE,
7 | prob = c(0.4, 0.6)
8 | )
9 | diabetes <-
10 | sample(c("diabetes", "no diabetes"),
11 | n_at_the_moment + 1,
12 | replace = TRUE,
13 | prob = c(0.2, 0.8)
14 | )
15 | arm <-
16 | sample(arms,
17 | n_at_the_moment,
18 | replace = TRUE,
19 | prob = c(0.4, 0.4, 0.2)
20 | ) |>
21 | c("")
22 | covar_df <- tibble::tibble(sex, diabetes, arm)
23 |
24 | test_that("You can call function and it returns arm", {
25 | expect_subset(
26 | randomize_minimisation_pocock(arms = arms, current_state = covar_df),
27 | choices = arms
28 | )
29 | })
30 |
31 | test_that("Assertions work", {
32 | expect_error(
33 | randomize_minimisation_pocock(
34 | arms = c(1, 2), current_state = covar_df
35 | ),
36 | regexp = "Must be of type 'character'"
37 | )
38 | expect_error(
39 | randomize_minimisation_pocock(
40 | arms = arms, current_state = covar_df,
41 | method = "nonexistent"
42 | ),
43 | regexp = "Must be element of set .'range','var','sd'., but is 'nonexistent'"
44 | )
45 | expect_error(
46 | randomize_minimisation_pocock(
47 | arms = arms,
48 | current_state = "5 patietns OK"
49 | ),
50 | regexp =
51 | "Assertion on 'current_state' failed: Must be a tibble, not character"
52 | )
53 | expect_error(
54 | randomize_minimisation_pocock(
55 | arms = arms,
56 | current_state = covar_df[, 1:2]
57 | ),
58 | regexp = "Names must include the elements .'arm'."
59 | )
60 | # Last subject already randomized
61 | expect_error(
62 | randomize_minimisation_pocock(arms = arms, current_state = covar_df[1:3, ]),
63 | regexp = "must have at most 0 characters"
64 | )
65 | expect_error(
66 | randomize_minimisation_pocock(
67 | arms = c("foo", "bar"),
68 | current_state = covar_df
69 | ),
70 | regexp = "Must be a subset of .'foo','bar',''."
71 | )
72 | expect_error(
73 | randomize_minimisation_pocock(
74 | arms = arms, current_state = covar_df,
75 | weights = c("sex" = -1, "diabetes" = 2)
76 | ),
77 | regexp = "Element 1 is not >= 0"
78 | )
79 | expect_error(
80 | randomize_minimisation_pocock(
81 | arms = arms, current_state = covar_df,
82 | weights = c("wrong" = 1, "diabetes" = 2)
83 | ),
84 | regexp = "is missing elements .'sex'."
85 | )
86 | expect_error(
87 | randomize_minimisation_pocock(
88 | arms = arms, current_state = covar_df,
89 | ratio = c(
90 | "control" = 1.5,
91 | "active low" = 2,
92 | "active high" = 1
93 | )
94 | ),
95 | regexp = "element 1 is not close to an integer"
96 | )
97 | expect_error(
98 | randomize_minimisation_pocock(
99 | arms = arms, current_state = covar_df,
100 | ratio = c(
101 | "control" = 1L,
102 | "active high" = 1L
103 | )
104 | ),
105 | regexp = "Must have length 3, but has length 2"
106 | )
107 | expect_error(
108 | randomize_minimisation_pocock(
109 | arms = arms, current_state = covar_df,
110 | p = 12
111 | ),
112 | regexp = "Assertion on 'p' failed: Element 1 is not <= 1"
113 | )
114 | })
115 |
116 | test_that("Function randomizes first patient randomly", {
117 | randomized <-
118 | sapply(1:100, function(x) {
119 | randomize_minimisation_pocock(
120 | arms = arms,
121 | current_state = covar_df[nrow(covar_df), ]
122 | )
123 | })
124 | test <- prop.test(
125 | x = sum(randomized == "control"),
126 | n = length(randomized),
127 | p = 1 / 3,
128 | conf.level = 0.95,
129 | correct = FALSE
130 | )
131 | expect_gt(test$p.value, 0.05)
132 | })
133 |
134 | test_that("Function randomizes second patient deterministically", {
135 | arms <- c("A", "B")
136 | situation <- tibble::tibble(
137 | sex = c("F", "F"),
138 | arm = c("A", "")
139 | )
140 | randomized <-
141 | randomize_minimisation_pocock(
142 | arms = arms,
143 | current_state = situation,
144 | p = 1
145 | )
146 |
147 | expect_equal(randomized, "B")
148 | })
149 |
150 | test_that("Setting proportion of randomness works", {
151 | arms <- c("A", "B")
152 | situation <- tibble::tibble(
153 | sex = c("F", "F"),
154 | arm = c("A", "")
155 | )
156 |
157 | randomized <-
158 | sapply(1:100, function(x) {
159 | randomize_minimisation_pocock(
160 | arms = arms,
161 | current_state = situation,
162 | p = 0.60
163 | )
164 | })
165 | # 60% to minimization arm (B) 40% to other arm (in this case A)
166 |
167 | test <- prop.test(table(randomized), p = 0.4, correct = FALSE)
168 |
169 | expect_gt(test$p.value, 0.05)
170 | })
171 |
172 | test_that("Method 'range' works properly", {
173 | arms <- c("A", "B", "C")
174 | situation <- tibble::tibble(
175 | sex = c("F", "M", "F"),
176 | diabetes_type = c("type2", "type2", "type2"),
177 | arm = c("A", "B", "")
178 | )
179 | randomized <-
180 | randomize_minimisation_pocock(
181 | arms = arms,
182 | current_state = situation,
183 | p = 1,
184 | method = "range"
185 | )
186 |
187 | testthat::expect_equal(randomized, "C")
188 | })
189 |
190 | test_that("minimisation respects ratio", {
191 | arms <- c("control", "active low", "active high")
192 | ratio <- c("control" = 3L, "active low" = 1L, "active high" = 1L)
193 |
194 | draws <- replicate(100, randomize_minimisation_pocock(
195 | arms = arms,
196 | current_state = covar_df,
197 | ratio = ratio,
198 | p = 0.85,
199 | method = "var",
200 | weights = c("sex" = 0, "diabetes" = 0)
201 | ))
202 |
203 | obs <- table(factor(draws, levels = arms))
204 | x <- suppressWarnings(chisq.test(x = obs, p = ratio / sum(ratio), simulate.p.value = FALSE, rescale.p = TRUE))
205 | expect_gt(x$p.value, 0.01)
206 | })
207 |
208 | test_that("minimisation respects ratio", {
209 | arms <- c("control", "active low", "active high")
210 | ratio <- c("control" = 3L, "active low" = 1L, "active high" = 1L)
211 |
212 | # Construct a state where each arm has exactly one match with the new patient
213 | # Covariate column 'group'; last row is the new patient (arm == "")
214 | covar_df <- tibble::tibble(
215 | group = c("X","X","X","Y","Y","Y","X"),
216 | arm = c("control","active low","active high","control","active low","active high","")
217 | )
218 |
219 | # Deterministic choice: p = 1
220 | chosen <- randomize_minimisation_pocock(
221 | arms = arms,
222 | current_state = covar_df,
223 | ratio = ratio,
224 | method = "range",
225 | p = 1
226 | )
227 |
228 | expect_equal(chosen, "control")
229 | })
230 |
231 | test_that("minimisation maintains 3:1:1 allocation over a sequence (non-tie path)", {
232 | set.seed(123)
233 | arms <- c("control", "active low", "active high")
234 | ratio <- c("control" = 3L, "active low" = 1L, "active high" = 1L)
235 |
236 | # Generate covariates to avoid ties; sequence evolves (non-tie branch exercised)
237 | n <- 20
238 | sex <- sample(c("F","M"), n, replace = TRUE, prob = c(0.55, 0.45))
239 | diabetes <- sample(c("diabetes","no diabetes"), n, replace = TRUE, prob = c(0.2, 0.8))
240 |
241 | assigned <- character(n)
242 | for (i in seq_len(n)) {
243 | current_state <- tibble::tibble(
244 | sex = c(head(sex, i - 1), sex[i]),
245 | diabetes = c(head(diabetes, i - 1), diabetes[i]),
246 | arm = c(if (i > 1) assigned[1:(i - 1)] else character(0), "")
247 | )
248 | assigned[i] <- randomize_minimisation_pocock(
249 | arms = arms,
250 | current_state = current_state,
251 | ratio = ratio,
252 | method = "var",
253 | p = 1
254 | )
255 | }
256 |
257 | obs <- table(factor(assigned, levels = arms))
258 | x <- suppressWarnings(chisq.test(x = obs, p = ratio / sum(ratio), rescale.p = TRUE))
259 | expect_gt(x$p.value, 0.01)
260 | })
261 |
262 | test_that("ratio invariance to scaling (deterministic, repeated)", {
263 | arms <- c("control", "active low", "active high")
264 | ratio_small <- c("control" = 3L, "active low" = 1L, "active high" = 1L)
265 | ratio_large <- c("control" = 300L, "active low" = 100L, "active high" = 100L)
266 |
267 | # Construct a state with a unique minimal-imbalance arm
268 | covar_df <- tibble::tibble(
269 | group = c("X","X","X","Y","Y","Y","X"),
270 | arm = c("control","active low","active high","control","active low","active high","")
271 | )
272 |
273 | pick <- function(rat) {
274 | randomize_minimisation_pocock(
275 | arms = arms,
276 | current_state = covar_df,
277 | ratio = rat,
278 | method = "range",
279 | p = 1
280 | )
281 | }
282 |
283 | chosen_small <- replicate(10, pick(ratio_small))
284 | chosen_large <- replicate(10, pick(ratio_large))
285 |
286 | # Deterministic across repeats (no ties, p=1)
287 | expect_length(unique(chosen_small), 1)
288 | expect_length(unique(chosen_large), 1)
289 |
290 | # Invariance to scaling
291 | expect_equal(chosen_small[1], chosen_large[1])
292 |
293 | # Always picks the same arm: control
294 | expect_true(all(chosen_small == "control"))
295 | expect_true(all(chosen_large == "control"))
296 | })
297 |
298 | test_that("weights change the chosen arm (two-arm, deterministic, repeated)", {
299 | arms <- c("A", "B")
300 | ratio <- c("A" = 1L, "B" = 1L)
301 |
302 | covar_df <- tibble::tibble(
303 | c1 = c("Y","Y","X","X","X"),
304 | c2 = c("U","U","V","V","U"),
305 | arm = c("A","A","B","B","")
306 | )
307 |
308 | pick <- function(w) {
309 | randomize_minimisation_pocock(
310 | arms = arms,
311 | current_state = covar_df,
312 | ratio = ratio,
313 | method = "range",
314 | p = 1,
315 | weights = w
316 | )
317 | }
318 |
319 | chosen_c1 <- replicate(10, pick(c("c1" = 1, "c2" = 0)))
320 | chosen_c2 <- replicate(10, pick(c("c1" = 0, "c2" = 1)))
321 |
322 | expect_true(all(chosen_c1 == "A"))
323 | expect_true(all(chosen_c2 == "B"))
324 | })
325 |
326 | test_that("weight scaling invariance (two-arm, deterministic, repeated)", {
327 | arms <- c("A", "B")
328 | ratio <- c("A" = 1L, "B" = 1L)
329 |
330 | covar_df <- tibble::tibble(
331 | c1 = c("Y","Y","X","X","X"),
332 | c2 = c("U","U","V","V","U"),
333 | arm = c("A","A","B","B","")
334 | )
335 |
336 | choose_with <- function(w) {
337 | randomize_minimisation_pocock(
338 | arms = arms,
339 | current_state = covar_df,
340 | ratio = ratio,
341 | method = "range",
342 | p = 1,
343 | weights = w
344 | )
345 | }
346 |
347 | c_small <- replicate(10, choose_with(c("c1" = 3, "c2" = 1)))
348 | c_large <- replicate(10, choose_with(c("c1" = 30, "c2" = 10)))
349 |
350 | expect_length(unique(c_small), 1)
351 | expect_length(unique(c_large), 1)
352 | expect_equal(c_small[1], c_large[1])
353 | })
354 |
355 | test_that("methods can yield different choices for three arms (deterministic search)", {
356 | set.seed(456)
357 | arms <- c("A", "B", "C")
358 | ratio <- c("A" = 1L, "B" = 1L, "C" = 1L)
359 |
360 | found_difference <- FALSE
361 | for (i in 1:300) {
362 | n <- sample(6:12, 1)
363 | c1 <- sample(c("X","Y","Z"), n, replace = TRUE, prob = c(0.6, 0.3, 0.1))
364 | c2 <- sample(c("U","V"), n, replace = TRUE, prob = c(0.7, 0.3))
365 | assigned <- sample(arms, n - 1, replace = TRUE, prob = c(0.5, 0.3, 0.2))
366 | current_state <- tibble::tibble(
367 | c1 = c(head(c1, n - 1), c1[n]),
368 | c2 = c(head(c2, n - 1), c2[n]),
369 | arm = c(assigned, "")
370 | )
371 |
372 | pick_range <- randomize_minimisation_pocock(
373 | arms = arms, current_state = current_state, ratio = ratio,
374 | method = "range", p = 1
375 | )
376 | pick_sd <- randomize_minimisation_pocock(
377 | arms = arms, current_state = current_state, ratio = ratio,
378 | method = "sd", p = 1
379 | )
380 | pick_var <- randomize_minimisation_pocock(
381 | arms = arms, current_state = current_state, ratio = ratio,
382 | method = "var", p = 1
383 | )
384 |
385 | if (!(pick_range == pick_sd && pick_sd == pick_var)) {
386 | found_difference <- TRUE
387 | break
388 | }
389 | }
390 |
391 | expect_true(found_difference) # at least one state where methods disagree
392 | })
393 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # unbiased
2 | 
3 | 
4 | 
5 | [](https://codecov.io/gh/ttscience/unbiased)
6 |
7 | > An API-based solution for Clinical Trial Randomization
8 |
9 |
10 | In clinical trials, the fair and efficient allocation of participants is essential for achieving reliable results. While there are many excellent R randomization packages available, none, to our knowledge, provide a dedicated API for this purpose. The **unbiased** package fills this gap by featuring a production-ready REST API designed for seamless integration. This unique combination allows for easy connection with electronic Case Report Forms (eCRF), enhancing data management and streamlining participant allocation.
11 |
12 | ## Why choose **unbiased**?
13 |
14 | Our goal in creating **unbiased** was to provide a user-friendly yet powerful tool that addresses the nuanced demands of clinical trial randomization. It offers:
15 |
16 | - **Production-Ready REST API**: Built for seamless integration with eCRF/EDC systems, facilitating real-time randomization and automation.
17 | - **Extensive Database Integration**: Supports robust data management practices, ensuring that participant information and randomization outcomes are securely managed and easily accessible.
18 | - **Commitment to Quality**: Emphasizes development best practices, including comprehensive code coverage, to deliver a reliable and trustworthy solution.
19 | - **Adaptability**: Whether for small-scale studies or large, multi-center trials, **unbiased** scales to meet your needs.
20 | - **Comprehensive Documentation**: To support you in applying the package effectively.
21 |
22 | By choosing **unbiased**, you're adopting a sophisticated approach to trial randomization, ensuring fair and efficient participant allocation across your studies and support of the broader objectives of clinical research through technology.
23 |
24 | ## Table of Contents
25 |
26 | 1. [Quickstart Guide](#quickstart-guide)
27 | - [Quick Setup with Docker Compose](#quick-setup-with-docker-compose)
28 | - [Quick Setup with Docker](#quick-setup-with-docker)
29 | - [API Server Configuration](#api-server-configuration)
30 | - [Alternative Installation Method](#alternative-installation-method)
31 | 2. [Getting started with **unbiased**](#getting-started-with-unbiased)
32 | - [API Endpoints](#api-endpoints)
33 | - [Study Creation](#study-creation)
34 | - [Patient Randomization](#patient-randomization)
35 | - [Study List](#study-list)
36 | - [Study Details](#study-details)
37 | - [Randomization List](#randomization-list)
38 | - [Audit Log](#audit-log)
39 | 3. [Technical Implementation](#technical-implementation)
40 | - [Quality Assurance Measures](#quality-assurance-measures)
41 | - [Running Tests](#running-tests)
42 | - [Executing Tests from an R Interactive Session](#executing-tests-from-an-r-interactive-session)
43 | - [Executing Tests from the Command Line](#executing-tests-from-the-command-line)
44 | - [Running Tests with Docker Compose](#running-tests-with-docker-compose)
45 | - [Code Coverage](#code-coverage)
46 | - [Configuring Sentry](#configuring-sentry)
47 | 4. [Background](#background)
48 | - [Purpose and Scope for Clinical Trial Randomization](#purpose-and-scope-for-clinical-trial-randomization)
49 | - [Comparison with Other Solutions](#comparison-with-other-solutions)
50 |
51 | # Quickstart Guide
52 |
53 | Initiating your work with **unbiased** involves simple setup steps. Whether you're integrating it into your R environment or deploying its API, we aim to equip you with a reliable tool that enhances the integrity and efficiency of your clinical trials.
54 |
55 | ## Quick Setup with Docker Compose
56 |
57 | We have prepared a simple Docker Compose setup that allows you to run **unbiased** with minimal configuration. This method is ideal for users who want to quickly test **unbiased** locally or in a development environment. To get started, follow these steps:
58 |
59 | 1. Clone the **unbiased** repository to your local machine or just copy the `docker-compose.yml` file from the repository.
60 | 2. Navigate to the directory containing the `docker-compose.yml` file.
61 | 3. Run the following command to start **unbiased**:
62 |
63 | ```sh
64 | docker compose pull
65 | docker compose up
66 | ```
67 |
68 | This command will start the **unbiased** API server, making it accessible on the specified port. PostgreSQL will also be started, ensuring that the API can connect to the database.
69 |
70 | You can use this `docker-compose.yml` file as a starting point for your own deployment, modifying it to suit your specific requirements.
71 |
72 | ### Configuration
73 |
74 | The `docker-compose.yml` file contains the necessary configuration for running **unbiased** with Docker Compose. You can override the default environment variables by creating a `.env` file in the same directory as the `docker-compose.yml` file. The following environment variables can be set:
75 |
76 | - Server configuration:
77 | - `UNBIASED_PORT`: The port on which the API will listen. Defaults to `3838` if not provided.
78 | - `POSTGRES_USER`: The username for authentication with the PostgreSQL database. Defaults to `postgres` if not provided.
79 | - `POSTGRES_PASSWORD`: The password for authentication with the PostgreSQL database. Warning: It can only be set on the first run, as it will not work if the database already exists. Defaults to `postgres` if not provided.
80 | - Versions:
81 | - `UNBIASED_VERSION`: The version of the **unbiased** Docker image to use. Defaults to `latest` if not provided.
82 | - `POSTGRES_VERSION`: The version of the PostgreSQL Docker image to use. Defaults to `latest` if not provided.
83 |
84 | ### Building the Docker Image Locally
85 |
86 | If you cloned the repository, you can also build the Docker image locally using the following command:
87 |
88 | ```sh
89 | docker compose build
90 | ```
91 |
92 | ## Quick Setup with Docker
93 |
94 | The most straightforward way to deploy **unbiased** is through our Docker images. This ensures that you can get **unbiased** up and running with minimal setup, regardless of your local environment. To use **unbiased**, pull the latest Docker image:
95 |
96 | ```sh
97 | docker pull ghcr.io/ttscience/unbiased
98 | ```
99 |
100 | To run **unbiased** with Docker, ensuring you have set the necessary environment variables:
101 |
102 | ```sh
103 | docker run -e POSTGRES_DB=mydb -e POSTGRES_USER=myuser -e POSTGRES_PASSWORD=mypassword -e UNBIASED_PORT=3838 ghcr.io/ttscience/unbiased
104 | ```
105 |
106 | This command starts the **unbiased** API, making it accessible on the specified port. It's crucial to have your PostgreSQL database ready, as **unbiased** will automatically configure the necessary database structures upon startup. PostgreSQL can be run in a separate container or on your local machine. Make sure to use a PostgreSQL instance that have `temporal_tables` extension available. You can use our `ghcr.io/ttscience/postgres-temporal-tables/postgres-temporal-tables:latest` image to run PostgreSQL with `temporal_tables` extension.
107 |
108 | ## API Server configuration
109 |
110 | The **unbiased** API server can be configured using environment variables. The following environment variables need to be set for the server to start:
111 |
112 | - `POSTGRES_DB`: The name of the PostgreSQL database to connect to.
113 | - `POSTGRES_HOST`: The host of the PostgreSQL database. This could be a hostname, such as `localhost` or `database.example.com`, or an IP address.
114 | - `POSTGRES_PORT`: The port on which the PostgreSQL database is listening. Defaults to `5432` if not provided.
115 | - `POSTGRES_USER`: The username for authentication with the PostgreSQL database.
116 | - `POSTGRES_PASSWORD`: The password for authentication with the PostgreSQL database.
117 | - `UNBIASED_HOST`: The host on which the API will run. Defaults to `0.0.0.0` if not provided.
118 | - `UNBIASED_PORT`: The port on which the API will listen. Defaults to `3838` if not provided.
119 |
120 | ## Alternative Installation Method
121 |
122 | For those preferring to work directly within the R environment, the **unbiased** package offers an alternative installation method via GitHub. This approach allows users to easily integrate **unbiased** into their R projects. To proceed with this method, utilize the `devtools` package for installation by executing the following command:
123 |
124 | ```R
125 | devtools::install_github("ttscience/unbiased")
126 | ```
127 |
128 | Following the package installation, the **unbiased** API can be launched within R. Simply invoke the `run_unbiased()` function to start the API:
129 |
130 | ```R
131 | unbiased::run_unbiased()
132 | ```
133 |
134 | This initiates the API server, by default, on your local machine (http://localhost:3838), making it accessible for interaction through various HTTP clients, including curl, Postman, or R's `httr` package.
135 |
136 | # Getting started with **unbiased**
137 |
138 | The **unbiased** package offers functions for randomizing participants in clinical trials, ensuring a fair and transparent process.
139 |
140 | Complete documentation for the implemented methodology and examples of how to use them are available on our GitHub Pages, providing all the information you need to integrate **unbiased** into your trial management workflow. Below, we present the basic steps for using **unbiased** through the API.
141 |
142 | ## API Endpoints
143 |
144 | The **unbiased** API is designed to facilitate clinical trial management through a set of endpoints:
145 |
146 | - **Study Management**: Create and configure new studies, including specifying randomization parameters and treatment arms.
147 | - **Participant Randomization**: Dynamically randomize participants to treatment groups based on the study's configuration and existing participant data.
148 | - **Study List**: List all previously defined studies.
149 | - **Study Details**: Show details about the selected study.
150 | - **Randomization List**: Generate a list of randomized patients for the selected study.
151 | - **Audit Log**: Show a audit log for the selected study.
152 |
153 |
154 | ### Study Creation
155 |
156 | To initialize a study using Pocock's minimization method, use the `POST /minimisation_pocock` endpoint. The required JSON payload should detail the study, including treatment groups, allocation ratios, and covariates.
157 |
158 | This endpoint sets up the study and returns an ID for accessing further study-related endpoints.
159 |
160 | ### Patient Randomization
161 |
162 | The `POST /{study_id}/patient` endpoint assigns a new patient to a treatment group, requiring patient details and covariate information in the JSON payload.
163 |
164 | This endpoint determines the patient's treatment group.
165 |
166 | ### Study List
167 |
168 | The `GET /study/` endpoint allow to list all previously defined studies. It returns information such as:
169 |
170 | - Study ID
171 | - Identifier
172 | - Name of study
173 | - Randomization method
174 | - Last edit date
175 |
176 | ### Study Details
177 | The `GET /study/{study_id}` endpoint allows to retrieve details about a selected study. The response body return:
178 |
179 | - Name of study
180 | - Randomization method
181 | - Last edit date
182 | - Input parameters
183 | - Strata
184 |
185 | ### Randomization List
186 | The `GET /study/{study_id}/randomization_list` endpoint allows to generate a list of randomized patients along with their assigned study arms.
187 |
188 | ### Audit Log
189 |
190 | The `GET /study/{study_id}/audit` endpoint allows to print all records in the audit log for a selected study.
191 | The response body includes the following information:
192 |
193 | - Log ID
194 | - Creation date
195 | - Type of event
196 | - Request ID
197 | - Study ID
198 | - Endpoint URL
199 | - Request method
200 | - Request body with study definition
201 | - Response code
202 | - Response body with study details
203 |
204 | The endpoint facilitates tracking the history of requests sent to the database, along with their corresponding responses. This enables us to trace all actions involving the API.
205 |
206 | # Technical details
207 |
208 | ## Running Tests
209 |
210 | Unbiased provides an extensive collection of tests to ensure correct functionality.
211 |
212 | ### Executing Tests from an R Interactive Session
213 |
214 | To execute tests using an interactive R session, run the following commands:
215 |
216 | ```R
217 | devtools::load_all()
218 | devtools::test()
219 | ```
220 |
221 | Make sure that `devtools` package is installed in your environment.
222 |
223 | Ensure that the necessary database connection environment variables are set before running these tests. You can set environment variables using methods such as `Sys.setenv`.
224 |
225 | Running these tests will start the Unbiased API on a random port.
226 |
227 | ### Executing Tests from the Command Line
228 |
229 | Use the helper script `run_tests.sh` to execute tests from the command line. Remember to set the database connection environment variables before running the tests.
230 |
231 | ### Running Tests with Docker Compose
232 |
233 | Docker Compose can be used to build the Unbiased Docker image and execute all tests. This can be done using the provided `docker-compose.test.yml` file. This method ensures a consistent testing environment and simplifies the setup process.
234 |
235 | ```bash
236 | docker compose -f docker-compose.test.yml build
237 | docker compose -f docker-compose.test.yml run tests
238 | ```
239 |
240 | ### Code Coverage
241 |
242 | Unbiased supports code coverage analysis through the `covr` package. This allows you to measure the effectiveness of your tests by showing which parts of your R code in the `R` directory are actually being tested.
243 |
244 | To calculate code coverage, you will need to install the `covr` package. Once installed, you can use the following methods:
245 |
246 | - `covr::report()`: This method runs all tests and generates a detailed coverage report in HTML format.
247 | - `covr::package_coverage()`: This method provides a simpler, text-based code coverage report.
248 |
249 | Alternatively, you can use the provided `run_tests_with_coverage.sh` script to run Unbiased tests with code coverage.
250 |
251 | ### Configuring Sentry
252 | The Unbiased server offers robust error reporting capabilities through the integration of the Sentry service. To activate Sentry, simply set the `SENTRY_DSN` environment variable. Additionally, you have the flexibility to customize the setup further by configuring the following environment variables:
253 |
254 | * `SENTRY_ENVIRONMENT` This is used to set the environment (e.g., "production", "staging", "development"). If not set, the environment defaults to "development".
255 |
256 | * `SENTRY_RELEASE` This is used to set the release in Sentry. If not set, the release defaults to "unspecified".
257 |
258 | # Background
259 |
260 | ## Purpose and Scope for Clinical Trial Randomization
261 |
262 | Randomization is the gold standard for conducting clinical trials and a fundamental aspect of clinical trials, in studies comparing two or more arms. In most cases randomization is a desirable technique that will ensure that patients are randomly allocated to defined groups. This is essential for maintaining the integrity of the trial and ensuring that the results are reliable, and blinding of research personnel. However, there are situations where it is desirable for studies to balance patients in terms of numbers in each group or, in addition, to achieve balance with respect to other relevant factors, such as sex or diabetes type. Adequate selection of randomization methods allows the intended randomization goals to be realized.
263 |
264 | **Unbiased** compared to standard and most commonly used randomization methods, e.g. the simple method or the block method, apart from these methods, additionally offers enhanced features of more flexible adaptive methods, which are based on current information about the allocation of patients in the trial. Compared to, for example, block randomization, adaptive randomization not only ensures relatively equal allocation to patient groups, but also allows the groups to be balanced on the basis of certain important covariates, which is its key advantage. This randomization requires predefined criteria, such as the probability with which a given patient will be assigned to a group based on minimizing the total imbalance, or weights that can be assigned personally for each individual covariate. Its advanced algorithmic approach sets it apart from others by minimizing selection bias and improving the overall efficiency of the randomization process in clinical trials.
265 |
266 | **Unbiased** allows the use of simple, block and adaptive minimization randomization methods relevant to the conduct of clinical trials, so package caters to the needs of clinical trial randomization.
267 | ...
268 |
269 | To find out more on differences in randomization methods, read our vignette on [Comparative Analysis of Randomization Methods](vignettes/articles/minimization_randomization_comparison.Rmd).
270 |
271 | ## Comparison with other solutions
272 |
273 | There are many packages that perform specific randomization methods in R. Most of them are designed for stratified randomization and permuted blocks, such as [blockrand](https://CRAN.R-project.org/package=blockrand) and [randomizeR](https://CRAN.R-project.org/package=randomizeR). Some of them also utilize the options for using minimization randomization - e.g. [randpack]( https://bioconductor.org/packages/randPack/) or [Minirand]( https://CRAN.R-project.org/package=Minirand).
274 |
275 | Our unique contribution to the landscape is the integration of a comprehensive API and a commitment to rigorous testing. This dual focus ensures that **unbiased** not only supports the practical needs of clinical trials, but also aligns with the technical requirements of modern clinical research environments. By prioritizing these aspects, **unbiased** addresses a critical gap in the market: the need for an eCRF-compatible randomization solution that is both dependable and easily integrated into existing workflows. This, together with the implementation of minimization techniques, sets **unbiased** apart as a novel, comprehensive tool.
--------------------------------------------------------------------------------