├── vignettes
    ├── .gitignore
    ├── images
    │   ├── gis1.avif
    │   ├── gis2.avif
    │   ├── Figure1.avif
    │   ├── popdef1.avif
    │   ├── popdef2.avif
    │   ├── popdef3.avif
    │   ├── popdef4.avif
    │   ├── popdef5.avif
    │   ├── popdef6.avif
    │   ├── atlasplp1.avif
    │   ├── atlasplp2.avif
    │   ├── atlasplp3.avif
    │   ├── atlasplp4.avif
    │   ├── problems.avif
    │   ├── shinyroc.avif
    │   ├── sparseRoc.avif
    │   ├── shinysummary.avif
    │   ├── studydesign.avif
    │   ├── atlasdownload1.avif
    │   ├── atlasdownload2.avif
    │   ├── learningCurve.avif
    │   ├── precisionRecall.avif
    │   ├── preferencePDF.avif
    │   ├── generalizability.avif
    │   ├── learningCurveBias.avif
    │   ├── learningCurvePlot.avif
    │   ├── smoothCalibration.avif
    │   ├── sparseCalibration.avif
    │   ├── demographicSummary.avif
    │   ├── variableScatterplot.avif
    │   ├── learningCurveVariance.avif
    │   └── predictionDistribution.avif
    ├── example1
    │   ├── ATLAS_O.avif
    │   ├── ATLAS_O.webp
    │   ├── ATLAS_T.avif
    │   └── ATLAS_T.webp
    ├── example2
    │   ├── angioedema.avif
    │   ├── angioedema.webp
    │   ├── aceinhibitors.avif
    │   └── aceinhibitors.webp
    ├── PatientLevelPredictionFigures.pptx
    └── ClinicalModels.Rmd
├── data
    └── simulationProfile.rda
├── inst
    ├── sql
    │   ├── sql_server
    │   │   ├── RemoveCohortTempTables.sql
    │   │   ├── UpdateVersionNumber.sql
    │   │   ├── GetCohorts.sql
    │   │   └── migrations
    │   │   │   └── Migration_1-store_version.sql
    │   ├── sqlite
    │   │   └── migrations
    │   │   │   └── Migration_1-store_version.sql
    │   └── postgresql
    │   │   └── migrations
    │   │       └── Migration_1-store_version.sql
    ├── CITATION
    ├── shinyConfigUpdate.json
    └── shinyConfig.json
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-sklearnClassifierHelpers.R
    │   ├── helper-expectations.R
    │   ├── test-helperfunctions.R
    │   ├── test-PredictionDistribution.R
    │   ├── test-fitting.R
    │   ├── test-featureImportance.R
    │   ├── test-andromedahelperfunctions.R
    │   ├── test-getCalibration.R
    │   └── test-demographicSummary.R
├── extras
    └── PatientLevelPrediction.pdf
├── demo
    ├── 00Index
    ├── SingleModelDemo.R
    ├── LearningCurveDemo.R
    └── EnsembleModelDemo.R
├── .Rbuildignore
├── .github
    ├── workflows
    │   ├── nightly_cleanup_Hades.yml
    │   ├── pkgdown.yaml
    │   └── revdeps.yml
    └── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
├── man
    ├── calibrationInLarge.Rd
    ├── listAppend.Rd
    ├── createTempModelLoc.Rd
    ├── listCartesian.Rd
    ├── print.plpData.Rd
    ├── pmmFit.Rd
    ├── simpleImpute.Rd
    ├── brierScore.Rd
    ├── iterativeImpute.Rd
    ├── summary.plpData.Rd
    ├── createDefaultExecuteSettings.Rd
    ├── print.summary.plpData.Rd
    ├── averagePrecision.Rd
    ├── calibrationLine.Rd
    ├── minMaxNormalize.Rd
    ├── getPredictionDistribution_binary.Rd
    ├── removeRareFeatures.Rd
    ├── setNaiveBayes.Rd
    ├── simulationProfile.Rd
    ├── createFeatureEngineeringSettings.Rd
    ├── createSimpleImputer.Rd
    ├── computeAuc.Rd
    ├── loadPlpData.Rd
    ├── migrateDataModel.Rd
    ├── ici.Rd
    ├── loadPlpResult.Rd
    ├── createRandomForestFeatureSelection.Rd
    ├── sklearnToJson.Rd
    ├── createSplineSettings.Rd
    ├── loadPrediction.Rd
    ├── setPythonEnvironment.Rd
    ├── PatientLevelPrediction.Rd
    ├── modelBasedConcordance.Rd
    ├── loadPlpModel.Rd
    ├── createValidationSettings.Rd
    ├── createUnivariateFeatureSelection.Rd
    ├── simulatePlpData.Rd
    ├── computeGridPerformance.Rd
    ├── savePlpData.Rd
    ├── getPredictionDistribution.Rd
    ├── savePlpModel.Rd
    ├── getThresholdSummary.Rd
    ├── loadPlpShareable.Rd
    ├── sklearnFromJson.Rd
    ├── createExistingSplitSettings.Rd
    ├── loadPlpAnalysesJson.Rd
    ├── configurePython.Rd
    ├── savePrediction.Rd
    ├── predictGlm.Rd
    ├── createStratifiedImputationSettings.Rd
    ├── plotPlp.Rd
    ├── savePlpResult.Rd
    ├── MapIds.Rd
    ├── predictPlp.Rd
    ├── createNormalizer.Rd
    ├── createExecuteSettings.Rd
    ├── viewPlp.Rd
    ├── createPreprocessSettings.Rd
    ├── predictCyclops.Rd
    ├── savePlpAnalysesJson.Rd
    ├── setCoxModel.Rd
    ├── setAdaBoost.Rd
    ├── createIterativeImputer.Rd
    ├── plotVariableScatterplot.Rd
    ├── recalibratePlpRefit.Rd
    ├── createRareFeatureRemover.Rd
    ├── plotSparseCalibration.Rd
    ├── preprocessData.Rd
    ├── savePlpShareable.Rd
    ├── createLogSettings.Rd
    ├── plotSparseCalibration2.Rd
    ├── robustNormalize.Rd
    ├── plotSparseRoc.Rd
    ├── setIterativeHardThresholding.Rd
    ├── plotDemographicSummary.Rd
    ├── plotF1Measure.Rd
    ├── plotPrecisionRecall.Rd
    ├── createSampleSettings.Rd
    ├── plotPredictionDistribution.Rd
    ├── plotPredictedPDF.Rd
    ├── createValidationDesign.Rd
    ├── diagnoseMultiplePlp.Rd
    ├── plotPreferencePDF.Rd
    ├── covariateSummary.Rd
    ├── getDemographicSummary.Rd
    ├── outcomeSurvivalPlot.Rd
    ├── setLassoLogisticRegression.Rd
    ├── plotGeneralizability.Rd
    ├── evaluatePlp.Rd
    ├── setGradientBoostingMachine.Rd
    ├── toSparseM.Rd
    ├── pfi.Rd
    ├── createDatabaseSchemaSettings.Rd
    ├── plotLearningCurve.Rd
    ├── setSVM.Rd
    ├── viewMultiplePlp.Rd
    ├── plotNetBenefit.Rd
    ├── createDefaultSplitSetting.Rd
    ├── fitPlp.Rd
    ├── createStudyPopulation.Rd
    ├── getCalibrationSummary.Rd
    ├── extractDatabaseToCsv.Rd
    ├── insertResultsToSqlite.Rd
    ├── recalibratePlp.Rd
    ├── validateExternal.Rd
    ├── getEunomiaPlpData.Rd
    ├── createCohortCovariateSettings.Rd
    └── splitData.Rd
├── PatientLevelPrediction.Rproj
├── .gitattributes
├── compare_versions
├── .gitignore
├── deploy.sh
├── R
    ├── SklearnClassifierHelpers.R
    └── PatientLevelPrediction.R
├── .settings
    └── org.eclipse.cdt.managedbuilder.core.prefs
└── DESCRIPTION


/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | 


--------------------------------------------------------------------------------
/data/simulationProfile.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/data/simulationProfile.rda


--------------------------------------------------------------------------------
/inst/sql/sql_server/RemoveCohortTempTables.sql:
--------------------------------------------------------------------------------
1 | TRUNCATE TABLE #cohort_person;
2 | 
3 | DROP TABLE #cohort_person;
4 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(PatientLevelPrediction)
3 | test_check("PatientLevelPrediction")
4 | 


--------------------------------------------------------------------------------
/vignettes/images/gis1.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/gis1.avif


--------------------------------------------------------------------------------
/vignettes/images/gis2.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/gis2.avif


--------------------------------------------------------------------------------
/vignettes/images/Figure1.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/Figure1.avif


--------------------------------------------------------------------------------
/vignettes/images/popdef1.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/popdef1.avif


--------------------------------------------------------------------------------
/vignettes/images/popdef2.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/popdef2.avif


--------------------------------------------------------------------------------
/vignettes/images/popdef3.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/popdef3.avif


--------------------------------------------------------------------------------
/vignettes/images/popdef4.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/popdef4.avif


--------------------------------------------------------------------------------
/vignettes/images/popdef5.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/popdef5.avif


--------------------------------------------------------------------------------
/vignettes/images/popdef6.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/popdef6.avif


--------------------------------------------------------------------------------
/vignettes/example1/ATLAS_O.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example1/ATLAS_O.avif


--------------------------------------------------------------------------------
/vignettes/example1/ATLAS_O.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example1/ATLAS_O.webp


--------------------------------------------------------------------------------
/vignettes/example1/ATLAS_T.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example1/ATLAS_T.avif


--------------------------------------------------------------------------------
/vignettes/example1/ATLAS_T.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example1/ATLAS_T.webp


--------------------------------------------------------------------------------
/vignettes/images/atlasplp1.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/atlasplp1.avif


--------------------------------------------------------------------------------
/vignettes/images/atlasplp2.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/atlasplp2.avif


--------------------------------------------------------------------------------
/vignettes/images/atlasplp3.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/atlasplp3.avif


--------------------------------------------------------------------------------
/vignettes/images/atlasplp4.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/atlasplp4.avif


--------------------------------------------------------------------------------
/vignettes/images/problems.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/problems.avif


--------------------------------------------------------------------------------
/vignettes/images/shinyroc.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/shinyroc.avif


--------------------------------------------------------------------------------
/vignettes/images/sparseRoc.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/sparseRoc.avif


--------------------------------------------------------------------------------
/extras/PatientLevelPrediction.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/extras/PatientLevelPrediction.pdf


--------------------------------------------------------------------------------
/vignettes/example2/angioedema.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example2/angioedema.avif


--------------------------------------------------------------------------------
/vignettes/example2/angioedema.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example2/angioedema.webp


--------------------------------------------------------------------------------
/vignettes/images/shinysummary.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/shinysummary.avif


--------------------------------------------------------------------------------
/vignettes/images/studydesign.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/studydesign.avif


--------------------------------------------------------------------------------
/vignettes/example2/aceinhibitors.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example2/aceinhibitors.avif


--------------------------------------------------------------------------------
/vignettes/example2/aceinhibitors.webp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example2/aceinhibitors.webp


--------------------------------------------------------------------------------
/vignettes/images/atlasdownload1.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/atlasdownload1.avif


--------------------------------------------------------------------------------
/vignettes/images/atlasdownload2.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/atlasdownload2.avif


--------------------------------------------------------------------------------
/vignettes/images/learningCurve.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/learningCurve.avif


--------------------------------------------------------------------------------
/vignettes/images/precisionRecall.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/precisionRecall.avif


--------------------------------------------------------------------------------
/vignettes/images/preferencePDF.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/preferencePDF.avif


--------------------------------------------------------------------------------
/vignettes/images/generalizability.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/generalizability.avif


--------------------------------------------------------------------------------
/vignettes/images/learningCurveBias.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/learningCurveBias.avif


--------------------------------------------------------------------------------
/vignettes/images/learningCurvePlot.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/learningCurvePlot.avif


--------------------------------------------------------------------------------
/vignettes/images/smoothCalibration.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/smoothCalibration.avif


--------------------------------------------------------------------------------
/vignettes/images/sparseCalibration.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/sparseCalibration.avif


--------------------------------------------------------------------------------
/vignettes/images/demographicSummary.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/demographicSummary.avif


--------------------------------------------------------------------------------
/vignettes/images/variableScatterplot.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/variableScatterplot.avif


--------------------------------------------------------------------------------
/vignettes/PatientLevelPredictionFigures.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/PatientLevelPredictionFigures.pptx


--------------------------------------------------------------------------------
/vignettes/images/learningCurveVariance.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/learningCurveVariance.avif


--------------------------------------------------------------------------------
/vignettes/images/predictionDistribution.avif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/predictionDistribution.avif


--------------------------------------------------------------------------------
/demo/00Index:
--------------------------------------------------------------------------------
1 | SingleModelDemo   Develop a single model on simulated data
2 | LearningCurveDemo Generate a learning curve on simulated data
3 | EnsembleModelDemo Generate an ensemble model on simulated data
4 | 


--------------------------------------------------------------------------------
/inst/sql/sql_server/UpdateVersionNumber.sql:
--------------------------------------------------------------------------------
1 | {DEFAULT @package_version = package_version}
2 | {DEFAULT @version_number = '6.0.10'}
3 | 
4 | DELETE FROM @database_schema.@table_prefix@package_version;
5 | INSERT INTO @database_schema.@table_prefix@package_version (version_number) VALUES ('@version_number');
6 | 


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^renv$
 2 | ^renv\.lock$
 3 | ^.*\.Rproj$
 4 | ^\.Rproj\.user$
 5 | ^.vscode$
 6 | standalone
 7 | deploy.sh
 8 | extras/*
 9 | man-roxygen
10 | compare_versions
11 | .github
12 | docs/*
13 | ^pyproject.toml$
14 | ^.python-version$
15 | ^uv.lock$
16 | _pkgdown.yml
17 | ^vignettes/articles$
18 | ^vignettes/Videos.Rmd
19 | ^doc$
20 | ^Meta$
21 | ^CRAN-RELEASE$
22 | ^cran-comments\.md$
23 | ^Dockerfile$
24 | ^.venv$
25 | ^.notVenv$
26 | 


--------------------------------------------------------------------------------
/inst/sql/sql_server/GetCohorts.sql:
--------------------------------------------------------------------------------
 1 | {DEFAULT @cdm_version = '5'}
 2 | 
 3 | SELECT cast(row_id as int) row_id,
 4 | 	subject_id,
 5 | {@cdm_version == "4"} ? {	
 6 | 	cohort_concept_id AS target_id,
 7 | } : {
 8 | 	cohort_definition_id AS target_id,
 9 | }
10 | 	cohort_start_date,
11 | 	days_from_obs_start,
12 | 	days_to_cohort_end,
13 | 	days_to_obs_end,
14 | 	age_year,
15 | 	gender
16 | FROM #cohort_person cohort
17 | ORDER BY subject_id
18 | 


--------------------------------------------------------------------------------
/.github/workflows/nightly_cleanup_Hades.yml:
--------------------------------------------------------------------------------
 1 | name: 'nightly artifacts cleanup'
 2 | on:
 3 |   schedule:
 4 |     - cron: '0 1 * * *' # every night at 1 am UTC
 5 | 
 6 | jobs:
 7 |   remove-old-artifacts:
 8 |     runs-on: ubuntu-latest
 9 |     timeout-minutes: 10
10 | 
11 |     steps:
12 |     - name: Remove old artifacts
13 |       uses: c-hive/gha-remove-artifacts@v1
14 |       with:
15 |         age: '7 days'
16 |         # Optional inputs
17 |         # skip-tags: true
18 |         skip-recent: 1


--------------------------------------------------------------------------------
/man/calibrationInLarge.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/EvaluationSummary.R
 3 | \name{calibrationInLarge}
 4 | \alias{calibrationInLarge}
 5 | \title{Calculate the calibration in large}
 6 | \usage{
 7 | calibrationInLarge(prediction)
 8 | }
 9 | \arguments{
10 | \item{prediction}{A prediction dataframe}
11 | }
12 | \value{
13 | data.frame with meanPredictionRisk, observedRisk, and N
14 | }
15 | \description{
16 | Calculate the calibration in large
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/PatientLevelPrediction.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | ProjectId: 6acb9f49-7428-4e24-8a2a-6b10f35b95e2
 3 | 
 4 | RestoreWorkspace: No
 5 | SaveWorkspace: No
 6 | AlwaysSaveHistory: Default
 7 | 
 8 | EnableCodeIndexing: Yes
 9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 | 
13 | RnwWeave: knitr
14 | LaTeX: pdfLaTeX
15 | 
16 | BuildType: Package
17 | PackageUseDevtools: Yes
18 | PackageInstallArgs: --no-multiarch --with-keep.source
19 | PackageBuildArgs: --compact-vignettes=both
20 | PackageCheckArgs: --as-cran
21 | PackageRoxygenize: rd,namespace
22 | 


--------------------------------------------------------------------------------
/man/listAppend.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/HelperFunctions.R
 3 | \name{listAppend}
 4 | \alias{listAppend}
 5 | \title{join two lists}
 6 | \usage{
 7 | listAppend(a, b)
 8 | }
 9 | \arguments{
10 | \item{a}{A list}
11 | 
12 | \item{b}{Another list}
13 | }
14 | \value{
15 | the joined list
16 | }
17 | \description{
18 | join two lists
19 | }
20 | \details{
21 | This function joins two lists
22 | }
23 | \examples{
24 | a <- list(a = 1, b = 2)
25 | b <- list(c = 3, d = 4)
26 | listAppend(a, b)
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/man/createTempModelLoc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/HelperFunctions.R
 3 | \name{createTempModelLoc}
 4 | \alias{createTempModelLoc}
 5 | \title{Create a temporary model location}
 6 | \usage{
 7 | createTempModelLoc()
 8 | }
 9 | \value{
10 | A string for the location of the temporary model location
11 | }
12 | \description{
13 | Create a temporary model location
14 | }
15 | \examples{
16 | modelLoc <- createTempModelLoc()
17 | dir.exists(modelLoc)
18 | # clean up
19 | unlink(modelLoc, recursive = TRUE)
20 | }
21 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Set the default behavior, in case people don't have core.autocrlf set.
 2 | * text=auto
 3 | 
 4 | # Explicitly declare text files you want to always be normalized and converted
 5 | # to native line endings on checkout.
 6 | DESCRIPTION text
 7 | NAMESPACE text
 8 | *.R text
 9 | *.Rd text
10 | .cproject text
11 | .project text
12 | .settings/* text
13 | 
14 | # Declare files that will always have CRLF line endings on checkout.
15 | *.sln text eol=crlf
16 | 
17 | # Denote all files that are truly binary and should not be modified.
18 | *.png binary
19 | *.jpg binary
20 | 


--------------------------------------------------------------------------------
/man/listCartesian.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SklearnClassifierHelpers.R
 3 | \name{listCartesian}
 4 | \alias{listCartesian}
 5 | \title{Cartesian product}
 6 | \usage{
 7 | listCartesian(allList)
 8 | }
 9 | \arguments{
10 | \item{allList}{a list of lists}
11 | }
12 | \value{
13 | A list with all possible combinations from the input list of lists
14 | }
15 | \description{
16 | Computes the Cartesian product of all the combinations of elements in a list
17 | }
18 | \examples{
19 | listCartesian(list(list(1, 2), list(3, 4)))
20 | }
21 | 


--------------------------------------------------------------------------------
/man/print.plpData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ExtractData.R
 3 | \name{print.plpData}
 4 | \alias{print.plpData}
 5 | \title{Print a plpData object}
 6 | \usage{
 7 | \method{print}{plpData}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{The plpData object to print}
11 | 
12 | \item{...}{Additional arguments}
13 | }
14 | \value{
15 | A message describing the object
16 | }
17 | \description{
18 | Print a plpData object
19 | }
20 | \examples{
21 |  
22 | data("simulationProfile")
23 | plpData <- simulatePlpData(simulationProfile, n = 10, seed = 42)
24 | print(plpData)
25 | }
26 | 


--------------------------------------------------------------------------------
/man/pmmFit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Imputation.R
 3 | \name{pmmFit}
 4 | \alias{pmmFit}
 5 | \title{Predictive mean matching using lasso}
 6 | \usage{
 7 | pmmFit(data, k = 5)
 8 | }
 9 | \arguments{
10 | \item{data}{An andromeda object with the following fields:
11 | xObs: covariates table for observed data
12 | xMiss: covariates table for missing data
13 | yObs: outcome variable that we want to impute}
14 | 
15 | \item{k}{The number of donors to use for matching (default 5)}
16 | }
17 | \description{
18 | Predictive mean matching using lasso
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/simpleImpute.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Imputation.R
 3 | \name{simpleImpute}
 4 | \alias{simpleImpute}
 5 | \title{Simple Imputation}
 6 | \usage{
 7 | simpleImpute(trainData, featureEngineeringSettings, done = FALSE)
 8 | }
 9 | \arguments{
10 | \item{trainData}{The data to be imputed}
11 | 
12 | \item{featureEngineeringSettings}{The settings for the imputation}
13 | 
14 | \item{done}{Whether the imputation has already been done (bool)}
15 | }
16 | \value{
17 | The imputed data
18 | }
19 | \description{
20 | This function does single imputation with the mean or median
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/compare_versions:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | open(R_VERSION, "grep 'Version' DESCRIPTION |");
 4 | $version = <R_VERSION>;
 5 | close(R_VERSION);
 6 | 
 7 | $version =~ /(\d+)\.(\d+)\.(\d+)/;
 8 | $r_major = $1;
 9 | $r_minor = $2;
10 | $r_mod = $3;
11 | 
12 | open(GIT_VERSION, "git describe --tags |");
13 | $git = <GIT_VERSION>;
14 | close(GIT_VERSION);
15 | 
16 | $git =~ /v(\d+)\.(\d+)\.(\d+)/;
17 | $git_major = $1;
18 | $git_minor = $2;
19 | $git_mod = $3;
20 | 
21 | if ($r_major > $git_major || $r_minor > $git_minor || $r_mod > $git_mod) {
22 | 	$new_version = "v$r_major.$r_minor.$r_mod";
23 | } else {
24 | 	$new_version = "";
25 | }
26 | 
27 | print($new_version);
28 | 


--------------------------------------------------------------------------------
/man/brierScore.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/EvaluationSummary.R
 3 | \name{brierScore}
 4 | \alias{brierScore}
 5 | \title{brierScore}
 6 | \usage{
 7 | brierScore(prediction)
 8 | }
 9 | \arguments{
10 | \item{prediction}{A prediction dataframe}
11 | }
12 | \value{
13 | A list containing the brier score and the scaled brier score
14 | }
15 | \description{
16 | brierScore
17 | }
18 | \details{
19 | Calculates the brierScore from prediction object
20 | }
21 | \examples{
22 | prediction <- data.frame(
23 |   value = c(0.1, 0.2, 0.3, 0.4, 0.5),
24 |   outcomeCount = c(0, 1, 0, 1, 1))
25 | brierScore(prediction)
26 | }
27 | 


--------------------------------------------------------------------------------
/man/iterativeImpute.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Imputation.R
 3 | \name{iterativeImpute}
 4 | \alias{iterativeImpute}
 5 | \title{Imputation}
 6 | \usage{
 7 | iterativeImpute(trainData, featureEngineeringSettings, done = FALSE)
 8 | }
 9 | \arguments{
10 | \item{trainData}{The data to be imputed}
11 | 
12 | \item{featureEngineeringSettings}{The settings for the imputation}
13 | 
14 | \item{done}{Whether the imputation has already been done (bool)}
15 | }
16 | \value{
17 | The imputed data
18 | }
19 | \description{
20 | This function does single imputation with predictive mean matchin
21 | }
22 | \keyword{internal}
23 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # History files
 2 | .Rhistory
 3 | # Example code in package build process
 4 | *-Ex.R
 5 | # R data files from past sessions
 6 | .Rdata
 7 | # R environ
 8 | .Renviron
 9 | # RStudio files
10 | .Rproj.user/
11 | .Rproj.user
12 | # SqlRender
13 | statement_*.sql
14 | errorReport.txt
15 | #C++ objects
16 | src/*.o
17 | src/*.so
18 | src/*.dll
19 | /Debug
20 | standalone/build/*
21 | #models
22 | /plpmodels/*
23 | /python_models/*
24 | /mycache/*
25 | /inst/shiny/DiagnosticsExplorer/rsconnect/*
26 | /doc/
27 | /Meta/
28 | /extras/
29 | /results/
30 | /.vscode/
31 | /sql/
32 | .project
33 | .cproject
34 | /docs/
35 | .python-version
36 | pyproject.toml
37 | uv.lock
38 | 


--------------------------------------------------------------------------------
/inst/sql/sqlite/migrations/Migration_1-store_version.sql:
--------------------------------------------------------------------------------
 1 | -- Database migrations for verion 6.0.10
 2 | -- This migration updates the schema:
 3 |  -- 1. to store the patient level prediction version
 4 |  -- 2. Add a migrations table for supporting database migrations
 5 | 
 6 | {DEFAULT @package_version = package_version}
 7 | {DEFAULT @migration = migration}
 8 | {DEFAULT @table_prefix = ''}
 9 | 
10 | -- Create table indicating version number of ddl
11 | DROP TABLE IF EXISTS @database_schema.@table_prefix@package_version;
12 | 
13 | --HINT DISTRIBUTE ON RANDOM
14 | CREATE TABLE @database_schema.@table_prefix@package_version (
15 |     version_number VARCHAR(50) PRIMARY KEY
16 | );


--------------------------------------------------------------------------------
/inst/sql/postgresql/migrations/Migration_1-store_version.sql:
--------------------------------------------------------------------------------
 1 | -- Database migrations for verion 6.0.10
 2 | -- This migration updates the schema:
 3 |  -- 1. to store the patient level prediction version
 4 |  -- 2. Add a migrations table for supporting database migrations
 5 | 
 6 | {DEFAULT @package_version = package_version}
 7 | {DEFAULT @migration = migration}
 8 | {DEFAULT @table_prefix = ''}
 9 | 
10 | -- Create table indicating version number of ddl
11 | DROP TABLE IF EXISTS @database_schema.@table_prefix@package_version;
12 | 
13 | --HINT DISTRIBUTE ON RANDOM
14 | CREATE TABLE @database_schema.@table_prefix@package_version (
15 |     version_number VARCHAR(50) PRIMARY KEY
16 | );


--------------------------------------------------------------------------------
/inst/sql/sql_server/migrations/Migration_1-store_version.sql:
--------------------------------------------------------------------------------
 1 | -- Database migrations for verion 6.0.10
 2 | -- This migration updates the schema:
 3 |  -- 1. to store the patient level prediction version
 4 |  -- 2. Add a migrations table for supporting database migrations
 5 | 
 6 | {DEFAULT @package_version = package_version}
 7 | {DEFAULT @migration = migration}
 8 | {DEFAULT @table_prefix = ''}
 9 | 
10 | -- Create table indicating version number of ddl
11 | DROP TABLE IF EXISTS @database_schema.@table_prefix@package_version;
12 | 
13 | --HINT DISTRIBUTE ON RANDOM
14 | CREATE TABLE @database_schema.@table_prefix@package_version (
15 |     version_number VARCHAR(50) PRIMARY KEY
16 | );


--------------------------------------------------------------------------------
/man/summary.plpData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ExtractData.R
 3 | \name{summary.plpData}
 4 | \alias{summary.plpData}
 5 | \title{Summarize a plpData object}
 6 | \usage{
 7 | \method{summary}{plpData}(object, ...)
 8 | }
 9 | \arguments{
10 | \item{object}{The plpData object to summarize}
11 | 
12 | \item{...}{Additional arguments}
13 | }
14 | \value{
15 | A summary of the object containing the number of people, outcomes and covariates
16 | }
17 | \description{
18 | Summarize a plpData object
19 | }
20 | \examples{
21 | data("simulationProfile")
22 | plpData <- simulatePlpData(simulationProfile, n = 10, seed = 42)
23 | summary(plpData)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/createDefaultExecuteSettings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RunPlpHelpers.R
 3 | \name{createDefaultExecuteSettings}
 4 | \alias{createDefaultExecuteSettings}
 5 | \title{Creates default list of settings specifying what parts of runPlp to execute}
 6 | \usage{
 7 | createDefaultExecuteSettings()
 8 | }
 9 | \value{
10 | list with TRUE for split, preprocess, model development and covariate summary
11 | }
12 | \description{
13 | Creates default list of settings specifying what parts of runPlp to execute
14 | }
15 | \details{
16 | runs split, preprocess, model development and covariate summary
17 | }
18 | \examples{
19 | createDefaultExecuteSettings()
20 | }
21 | 


--------------------------------------------------------------------------------
/man/print.summary.plpData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ExtractData.R
 3 | \name{print.summary.plpData}
 4 | \alias{print.summary.plpData}
 5 | \title{Print a summary.plpData object}
 6 | \usage{
 7 | \method{print}{summary.plpData}(x, ...)
 8 | }
 9 | \arguments{
10 | \item{x}{The summary.plpData object to print}
11 | 
12 | \item{...}{Additional arguments}
13 | }
14 | \value{
15 | A message describing the object
16 | }
17 | \description{
18 | Print a summary.plpData object
19 | }
20 | \examples{
21 | data("simulationProfile")
22 | plpData <- simulatePlpData(simulationProfile, n = 10, seed = 42)
23 | summary <- summary(plpData)
24 | print(summary)
25 | }
26 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **Set up (please run in R "sessionInfo()" and copy the output here):**
14 | copy the system set up details including the R version and operating system details...
15 | 
16 | **To Reproduce**
17 | Enter the code you tried to run here (with sensitive information such as passwords removed)
18 | 
19 | **PLP Log File**
20 | Enter the log file contents here (plplog.txt)
21 | 
22 | **Additional context**
23 | Add any other context about the problem here.
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/man/averagePrecision.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/EvaluationSummary.R
 3 | \name{averagePrecision}
 4 | \alias{averagePrecision}
 5 | \title{Calculate the average precision}
 6 | \usage{
 7 | averagePrecision(prediction)
 8 | }
 9 | \arguments{
10 | \item{prediction}{A prediction object}
11 | }
12 | \value{
13 | The average precision value
14 | }
15 | \description{
16 | Calculate the average precision
17 | }
18 | \details{
19 | Calculates the average precision from a predition object
20 | }
21 | \examples{
22 | prediction <- data.frame(
23 |   value = c(0.1, 0.2, 0.3, 0.4, 0.5),
24 |   outcomeCount = c(0, 1, 0, 1, 1)
25 | )
26 | averagePrecision(prediction)
27 | }
28 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | citHeader("To cite PatientLevelPrediction in publications use:")
 2 | 
 3 | bibentry(bibtype = "Article",
 4 |         author = c(as.person("J. M. Reps"),
 5 |                    as.person("M. J. Schuemie"),
 6 |                    as.person("M. A. Suchard"),
 7 |                    as.person("P. B. Ryan"),
 8 |                    as.person("P. Rijnbeek")),
 9 |         title = "Design and implementation of a standardized framework to generate and evaluate patient-level prediction models using observational healthcare data",
10 |         journal = "Journal of the American Medical Informatics Association",
11 |         volume = "25",
12 |         number = "8",
13 |         pages = "969-975",
14 |         year = "2018",
15 |         url = "https://doi.org/10.1093/jamia/ocy032"
16 | )
17 | 


--------------------------------------------------------------------------------
/inst/shinyConfigUpdate.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shinyModules": [
 3 |     {
 4 |       "id": "about",
 5 |       "tabName": "About",
 6 |       "tabText": "About",
 7 |       "shinyModulePackage": "OhdsiShinyModules",
 8 |       "uiFunction": "aboutViewer",
 9 |       "serverFunction": "aboutServer",
10 |       "infoBoxFile": "aboutHelperFile()",
11 |       "icon": "info",
12 |       "order": 1
13 |     },
14 |     {
15 |       "id": "prediction",
16 |       "tabName": "Prediction",
17 |       "tabText": "Prediction",
18 |       "shinyModulePackage": "OhdsiShinyModules",
19 |       "uiFunction": "patientLevelPredictionViewer",
20 |       "serverFunction": "patientLevelPredictionServer",
21 |       "infoBoxFile": "patientLevelPredictionHelperFile()",
22 |       "icon": "chart-line",
23 |       "order": 2
24 |     }
25 |   ]
26 | }
27 | 


--------------------------------------------------------------------------------
/man/calibrationLine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/EvaluationSummary.R
 3 | \name{calibrationLine}
 4 | \alias{calibrationLine}
 5 | \title{calibrationLine}
 6 | \usage{
 7 | calibrationLine(prediction, numberOfStrata = 10)
 8 | }
 9 | \arguments{
10 | \item{prediction}{A prediction object}
11 | 
12 | \item{numberOfStrata}{The number of groups to split the prediction into}
13 | }
14 | \value{
15 | A list containing the calibrationLine coefficients, the aggregate data used
16 | to fit the line and the Hosmer-Lemeshow goodness of fit test
17 | }
18 | \description{
19 | calibrationLine
20 | }
21 | \examples{
22 | prediction <- data.frame(
23 |   value = c(0.1, 0.2, 0.3, 0.4, 0.5),
24 |   outcomeCount = c(0, 1, 0, 1, 1))
25 | calibrationLine(prediction, numberOfStrata = 1)
26 | }
27 | 


--------------------------------------------------------------------------------
/man/minMaxNormalize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FeatureEngineering.R
 3 | \name{minMaxNormalize}
 4 | \alias{minMaxNormalize}
 5 | \title{A function that normalizes continous features to have values between 0 and 1}
 6 | \usage{
 7 | minMaxNormalize(trainData, featureEngineeringSettings, done = FALSE)
 8 | }
 9 | \arguments{
10 | \item{trainData}{The training data to be normalized}
11 | 
12 | \item{featureEngineeringSettings}{The settings for the normalization}
13 | 
14 | \item{done}{Whether the data has already been normalized (bool)}
15 | }
16 | \value{
17 | The normalized data
18 | }
19 | \description{
20 | A function that normalizes continous features to have values between 0 and 1
21 | }
22 | \details{
23 | uses value - min / (max - min) to normalize the data
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/man/getPredictionDistribution_binary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PredictionDistribution.R
 3 | \name{getPredictionDistribution_binary}
 4 | \alias{getPredictionDistribution_binary}
 5 | \title{Calculates the prediction distribution}
 6 | \usage{
 7 | getPredictionDistribution_binary(prediction, evalColumn, ...)
 8 | }
 9 | \arguments{
10 | \item{prediction}{A prediction object}
11 | 
12 | \item{evalColumn}{A column that is used to stratify the results}
13 | 
14 | \item{...}{Other inputs}
15 | }
16 | \value{
17 | The 0.00, 0.1, 0.25, 0.5, 0.75, 0.9, 1.00 quantile pf the prediction,
18 | the mean and standard deviation per class
19 | }
20 | \description{
21 | Calculates the prediction distribution
22 | }
23 | \details{
24 | Calculates the quantiles from a predition object
25 | }
26 | \keyword{internal}
27 | 


--------------------------------------------------------------------------------
/man/removeRareFeatures.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FeatureEngineering.R
 3 | \name{removeRareFeatures}
 4 | \alias{removeRareFeatures}
 5 | \title{A function that removes rare features from the data}
 6 | \usage{
 7 | removeRareFeatures(trainData, featureEngineeringSettings, done = FALSE)
 8 | }
 9 | \arguments{
10 | \item{trainData}{The data to be normalized}
11 | 
12 | \item{featureEngineeringSettings}{The settings for the normalization}
13 | 
14 | \item{done}{Whether to find and remove rare features or remove them only (bool)}
15 | }
16 | \value{
17 | The data with rare features removed
18 | }
19 | \description{
20 | A function that removes rare features from the data
21 | }
22 | \details{
23 | removes features that are present in less than a certain fraction of the population
24 | }
25 | \keyword{internal}
26 | 


--------------------------------------------------------------------------------
/deploy.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -o errexit -o nounset
 3 | addToDrat(){
 4 |   PKG_REPO=$PWD
 5 | 
 6 |   ## Build package tar ball
 7 |   export PKG_TARBALL=$(ls *.tar.gz)
 8 | 
 9 |   cd ..; mkdir drat; cd drat
10 | 
11 |   ## Set up Repo parameters
12 |   git init
13 |   git config user.name "Martijn Schuemie"
14 |   git config user.email "schuemie@ohdsi.org"
15 |   git config --global push.default simple
16 | 
17 |   ## Get drat repo
18 |   git remote add upstream "https://$GH_TOKEN@github.com/OHDSI/drat.git"
19 |   git fetch upstream 2>err.txt
20 |   git checkout gh-pages
21 |   
22 |   ## Link to local R packages  
23 |   echo 'R_LIBS=~/Rlib' > .Renviron
24 |  
25 |   Rscript -e "drat::insertPackage('$PKG_REPO/$PKG_TARBALL', \
26 |     repodir = '.', \
27 |     commit='GitHub Actions release: $PKG_TARBALL run $GITHUB_RUN_ID')"
28 |   git push
29 | 
30 | }
31 | addToDrat
32 | 


--------------------------------------------------------------------------------
/man/setNaiveBayes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SklearnClassifierSettings.R
 3 | \name{setNaiveBayes}
 4 | \alias{setNaiveBayes}
 5 | \title{Create setting for naive bayes model with python}
 6 | \usage{
 7 | setNaiveBayes()
 8 | }
 9 | \value{
10 | a modelSettings object
11 | }
12 | \description{
13 | Create setting for naive bayes model with python
14 | }
15 | \examples{
16 | \dontshow{ # dontrun reason: requires python's scikit-learn, checkSklearn() will error without it }
17 | \dontrun{ 
18 | plpData <- getEunomiaPlpData()
19 | model <- setNaiveBayes()
20 | analysisId <- "naiveBayes"
21 | saveLocation <- file.path(tempdir(), analysisId)
22 | results <- runPlp(plpData, modelSettings = model,
23 |                   saveDirectory = saveLocation,
24 |                   analysisId = analysisId)
25 | # clean up
26 | unlink(saveLocation, recursive = TRUE)
27 | }
28 | }
29 | 


--------------------------------------------------------------------------------
/man/simulationProfile.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PatientLevelPrediction.R
 3 | \docType{data}
 4 | \name{simulationProfile}
 5 | \alias{simulationProfile}
 6 | \title{A simulation profile for generating synthetic patient level prediction data}
 7 | \format{
 8 | A data frame containing the following elements:
 9 | \describe{
10 | \item{covariatePrevalence}{prevalence of all covariates}
11 | \item{outcomeModels}{regression model parameters to simulate outcomes}
12 | \item{metaData}{settings used to simulate the profile}
13 | \item{covariateRef}{covariateIds and covariateNames}
14 | \item{timePrevalence}{time window}
15 | \item{exclusionPrevalence}{prevalence of exclusion of covariates}
16 | }
17 | }
18 | \usage{
19 | data(simulationProfile)
20 | }
21 | \description{
22 | A simulation profile for generating synthetic patient level prediction data
23 | }
24 | \keyword{datasets}
25 | 


--------------------------------------------------------------------------------
/man/createFeatureEngineeringSettings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FeatureEngineering.R
 3 | \name{createFeatureEngineeringSettings}
 4 | \alias{createFeatureEngineeringSettings}
 5 | \title{Create the settings for defining any feature engineering that will be done}
 6 | \usage{
 7 | createFeatureEngineeringSettings(type = "none")
 8 | }
 9 | \arguments{
10 | \item{type}{(character) Choice of:  \itemize{
11 | \item'none' No feature engineering - this is the default
12 | }}
13 | }
14 | \value{
15 | An object of class \code{featureEngineeringSettings}
16 | }
17 | \description{
18 | Create the settings for defining any feature engineering that will be done
19 | }
20 | \details{
21 | Returns an object of class \code{featureEngineeringSettings} that specifies the sampling function that will be called and the settings
22 | }
23 | \examples{
24 | createFeatureEngineeringSettings(type = "none")
25 | }
26 | 


--------------------------------------------------------------------------------
/man/createSimpleImputer.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Imputation.R
 3 | \name{createSimpleImputer}
 4 | \alias{createSimpleImputer}
 5 | \title{Create Simple Imputer settings}
 6 | \usage{
 7 | createSimpleImputer(method = "mean", missingThreshold = 0.3)
 8 | }
 9 | \arguments{
10 | \item{method}{The method to use for imputation, either "mean" or "median"}
11 | 
12 | \item{missingThreshold}{The threshold for missing values to be imputed vs removed}
13 | }
14 | \value{
15 | The settings for the single imputer of class \code{featureEngineeringSettings}
16 | }
17 | \description{
18 | This function creates the settings for a simple imputer
19 | which imputes missing values with the mean or median
20 | }
21 | \examples{
22 | # create imputer to impute values with missingness less than 10\% using the median
23 | # of observed values
24 | createSimpleImputer(method = "median", missingThreshold = 0.10)
25 | }
26 | 


--------------------------------------------------------------------------------
/man/computeAuc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/EvaluationSummary.R
 3 | \name{computeAuc}
 4 | \alias{computeAuc}
 5 | \title{Compute the area under the ROC curve}
 6 | \usage{
 7 | computeAuc(prediction, confidenceInterval = FALSE)
 8 | }
 9 | \arguments{
10 | \item{prediction}{A prediction object as generated using the
11 | \code{\link{predict}} functions.}
12 | 
13 | \item{confidenceInterval}{Should 95 percebt confidence intervals be computed?}
14 | }
15 | \value{
16 | A data.frame containing the AUC and optionally the 95\% confidence interval
17 | }
18 | \description{
19 | Compute the area under the ROC curve
20 | }
21 | \details{
22 | Computes the area under the ROC curve for the predicted probabilities, given the true observed
23 | outcomes.
24 | }
25 | \examples{
26 | prediction <- data.frame(
27 |   value = c(0.1, 0.2, 0.3, 0.4, 0.5),
28 |   outcomeCount = c(0, 1, 0, 1, 1))
29 | computeAuc(prediction)
30 | }
31 | 


--------------------------------------------------------------------------------
/man/loadPlpData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SaveLoadPlp.R
 3 | \name{loadPlpData}
 4 | \alias{loadPlpData}
 5 | \title{Load the plpData from a folder}
 6 | \usage{
 7 | loadPlpData(file, readOnly = TRUE)
 8 | }
 9 | \arguments{
10 | \item{file}{The name of the folder containing the data.}
11 | 
12 | \item{readOnly}{If true, the data is opened read only.}
13 | }
14 | \value{
15 | An object of class plpData.
16 | }
17 | \description{
18 | \code{loadPlpData} loads an object of type plpData from a folder in the file
19 | system.
20 | }
21 | \details{
22 | The data will be written to a set of files in the folder specified by the user.
23 | }
24 | \examples{
25 | data("simulationProfile")
26 | plpData <- simulatePlpData(simulationProfile, n = 500, seed = 42)
27 | saveLoc <- file.path(tempdir(), "loadPlpData")
28 | savePlpData(plpData, saveLoc)
29 | dir(saveLoc)
30 | # clean up
31 | unlink(saveLoc, recursive = TRUE)
32 | }
33 | 


--------------------------------------------------------------------------------
/man/migrateDataModel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DatabaseMigration.R
 3 | \name{migrateDataModel}
 4 | \alias{migrateDataModel}
 5 | \title{Migrate Data model}
 6 | \usage{
 7 | migrateDataModel(connectionDetails, databaseSchema, tablePrefix = "")
 8 | }
 9 | \arguments{
10 | \item{connectionDetails}{DatabaseConnector connection details object}
11 | 
12 | \item{databaseSchema}{String schema where database schema lives}
13 | 
14 | \item{tablePrefix}{(Optional) Use if a table prefix is used before table names (e.g. "cd_")}
15 | }
16 | \value{
17 | Nothing. Is called for side effects of migrating data model in the
18 | database
19 | }
20 | \description{
21 | Migrate data from current state to next state
22 | 
23 | It is strongly advised that you have a backup of all data (either sqlite files, a backup database (in the case you
24 | are using a postgres backend) or have kept the csv/zip files from your data generation.
25 | }
26 | 


--------------------------------------------------------------------------------
/man/ici.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/EvaluationSummary.R
 3 | \name{ici}
 4 | \alias{ici}
 5 | \title{Calculate the Integrated Calibration Index from Austin and Steyerberg
 6 | https://onlinelibrary.wiley.com/doi/full/10.1002/sim.8281}
 7 | \usage{
 8 | ici(prediction)
 9 | }
10 | \arguments{
11 | \item{prediction}{the prediction object found in the plpResult object}
12 | }
13 | \value{
14 | Integrated Calibration Index value or NULL if the calculation fails
15 | }
16 | \description{
17 | Calculate the Integrated Calibration Index from Austin and Steyerberg
18 | https://onlinelibrary.wiley.com/doi/full/10.1002/sim.8281
19 | }
20 | \details{
21 | Calculate the Integrated Calibration Index
22 | }
23 | \examples{
24 | prediction <- data.frame(rowId = 1:100, 
25 |                         outcomeCount = stats::rbinom(1:100, 1, prob=0.5),
26 |                         value = runif(100), 
27 |                         evaluation = rep("Train", 100))
28 | ici(prediction)
29 | }
30 | 


--------------------------------------------------------------------------------
/man/loadPlpResult.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SaveLoadPlp.R
 3 | \name{loadPlpResult}
 4 | \alias{loadPlpResult}
 5 | \title{Loads the evalaution dataframe}
 6 | \usage{
 7 | loadPlpResult(dirPath)
 8 | }
 9 | \arguments{
10 | \item{dirPath}{The directory where the evaluation was saved}
11 | }
12 | \value{
13 | \if{html}{\out{<div class="sourceCode">}}\preformatted{                       The runPlp object
14 | }\if{html}{\out{</div>}}
15 | }
16 | \description{
17 | Loads the evalaution dataframe
18 | }
19 | \details{
20 | Loads the evaluation
21 | }
22 | \examples{
23 | \donttest{ \dontshow{ # takes too long }
24 | data("simulationProfile")
25 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
26 | saveLoc <- file.path(tempdir(), "loadPlpResult")
27 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
28 | savePlpResult(results, saveLoc)
29 | loadedResults <- loadPlpResult(saveLoc)
30 | # clean up
31 | unlink(saveLoc, recursive = TRUE)
32 | }
33 | }
34 | 


--------------------------------------------------------------------------------
/man/createRandomForestFeatureSelection.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FeatureEngineering.R
 3 | \name{createRandomForestFeatureSelection}
 4 | \alias{createRandomForestFeatureSelection}
 5 | \title{Create the settings for random foreat based feature selection}
 6 | \usage{
 7 | createRandomForestFeatureSelection(ntrees = 2000, maxDepth = 17)
 8 | }
 9 | \arguments{
10 | \item{ntrees}{number of tree in forest}
11 | 
12 | \item{maxDepth}{MAx depth of each tree}
13 | }
14 | \value{
15 | An object of class \code{featureEngineeringSettings}
16 | }
17 | \description{
18 | Create the settings for random foreat based feature selection
19 | }
20 | \details{
21 | Returns an object of class \code{featureEngineeringSettings} that specifies the sampling function that will be called and the settings
22 | }
23 | \examples{
24 | \dontshow{ # dontrun reason: requires python and scikit-learn }
25 | \dontrun{ #' featureSelector <- createRandomForestFeatureSelection(ntrees = 2000, maxDepth = 10)
26 | }
27 | }
28 | 


--------------------------------------------------------------------------------
/inst/shinyConfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "shinyModules": [
 3 |     {
 4 |       "id": "about",
 5 |       "tabName": "About",
 6 |       "tabText": "About",
 7 |       "shinyModulePackage": "OhdsiShinyModules",
 8 |       "uiFunction": "aboutViewer",
 9 |       "serverFunction": "aboutServer",
10 |       "databaseConnectionKeyService": null,
11 |       "databaseConnectionKeyUsername": null,
12 |       "infoBoxFile": "aboutHelperFile()",
13 |       "icon": "info",
14 |       "keyring": true,
15 |       "order": 1
16 |     },
17 |     {
18 |       "id": "prediction",
19 |       "tabName": "Prediction",
20 |       "tabText": "Prediction",
21 |       "shinyModulePackage": "OhdsiShinyModules",
22 |       "uiFunction": "predictionViewer",
23 |       "serverFunction": "predictionServer",
24 |       "databaseConnectionKeyService": "resultDatabaseDetails",
25 |       "databaseConnectionKeyUsername": "prediction",
26 |       "infoBoxFile": "predictionHelperFile()",
27 |       "icon": "chart-line",
28 |       "keyring": false,
29 |       "order": 2
30 |     }
31 |   ]
32 | }
33 | 


--------------------------------------------------------------------------------
/man/sklearnToJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SklearnToJson.R
 3 | \name{sklearnToJson}
 4 | \alias{sklearnToJson}
 5 | \title{Saves sklearn python model object to json in path}
 6 | \usage{
 7 | sklearnToJson(model, path)
 8 | }
 9 | \arguments{
10 | \item{model}{a fitted sklearn python model object}
11 | 
12 | \item{path}{path to the saved model file}
13 | }
14 | \value{
15 | nothing, saves the model to the path as json
16 | }
17 | \description{
18 | Saves sklearn python model object to json in path
19 | }
20 | \examples{
21 | \dontshow{ # dontrun reason: requires python environment with sklearn }
22 | \dontrun{ 
23 | sklearn <- reticulate::import("sklearn", convert = FALSE)
24 | model <- sklearn$tree$DecisionTreeClassifier()
25 | model$fit(sklearn$datasets$load_iris()$data, sklearn$datasets$load_iris()$target)
26 | saveLoc <- file.path(tempdir(), "model.json")
27 | sklearnToJson(model, saveLoc)
28 | # the model.json is saved in the tempdir
29 | dir(tempdir())
30 | # clean up
31 | unlink(saveLoc)
32 | }
33 | }
34 | 


--------------------------------------------------------------------------------
/man/createSplineSettings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FeatureEngineering.R
 3 | \name{createSplineSettings}
 4 | \alias{createSplineSettings}
 5 | \title{Create the settings for adding a spline for continuous variables}
 6 | \usage{
 7 | createSplineSettings(continousCovariateId, knots, analysisId = 683)
 8 | }
 9 | \arguments{
10 | \item{continousCovariateId}{The covariateId to apply splines to}
11 | 
12 | \item{knots}{Either number of knots of vector of split values}
13 | 
14 | \item{analysisId}{The analysisId to use for the spline covariates}
15 | }
16 | \value{
17 | An object of class \code{featureEngineeringSettings}
18 | }
19 | \description{
20 | Create the settings for adding a spline for continuous variables
21 | }
22 | \details{
23 | Returns an object of class \code{featureEngineeringSettings} that specifies the sampling function that will be called and the settings
24 | }
25 | \examples{
26 | # create splines for age (1002) with 5 knots
27 | createSplineSettings(continousCovariateId = 1002, knots = 5, analysisId = 683)
28 | }
29 | 


--------------------------------------------------------------------------------
/man/loadPrediction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SaveLoadPlp.R
 3 | \name{loadPrediction}
 4 | \alias{loadPrediction}
 5 | \title{Loads the prediction dataframe to json}
 6 | \usage{
 7 | loadPrediction(fileLocation)
 8 | }
 9 | \arguments{
10 | \item{fileLocation}{The location with the saved prediction}
11 | }
12 | \value{
13 | \if{html}{\out{<div class="sourceCode">}}\preformatted{                            The prediction data.frame
14 | }\if{html}{\out{</div>}}
15 | }
16 | \description{
17 | Loads the prediction dataframe to json
18 | }
19 | \details{
20 | Loads the prediciton json file
21 | }
22 | \examples{
23 | \donttest{ \dontshow{ # takes too long }
24 | data("simulationProfile")
25 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
26 | saveLoc <- file.path(tempdir(), "loadPrediction")
27 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
28 | savePrediction(results$prediction, saveLoc)
29 | dir(saveLoc)
30 | loadedPrediction <- loadPrediction(file.path(saveLoc, "prediction.json"))
31 | }
32 | }
33 | 


--------------------------------------------------------------------------------
/man/setPythonEnvironment.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/HelperFunctions.R
 3 | \name{setPythonEnvironment}
 4 | \alias{setPythonEnvironment}
 5 | \title{Use the python environment created using configurePython()}
 6 | \usage{
 7 | setPythonEnvironment(envname = "PLP", envtype = NULL)
 8 | }
 9 | \arguments{
10 | \item{envname}{A string for the name of the virtual environment (default is 'PLP')}
11 | 
12 | \item{envtype}{An option for specifying the environment as'conda' or 'python'.  If NULL then the default is 'conda' for windows users and 'python' for non-windows users}
13 | }
14 | \value{
15 | A string indicating the which python environment will be used
16 | }
17 | \description{
18 | Use the python environment created using configurePython()
19 | }
20 | \details{
21 | This function sets PatientLevelPrediction to use a python environment
22 | }
23 | \examples{
24 | \dontshow{ # dontrun reason: don't modify environment in examples }
25 | \dontrun{ #' # create a conda environment named PLP
26 | configurePython(envname="PLP", envtype="conda")
27 | }
28 | }
29 | 


--------------------------------------------------------------------------------
/tests/testthat/test-sklearnClassifierHelpers.R:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Observational Health Data Sciences and Informatics
 2 | #
 3 | # This file is part of PatientLevelPrediction
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | test_that("listCartesian works", {
18 |   allList <- list(a = list(1, 2), b = list(NULL, "auto"), c = list(-1))
19 | 
20 |   paramLists <- listCartesian(allList)
21 | 
22 |   expect_equal(length(paramLists), 2 * 2 * 1)
23 |   expect_equal(names(paramLists[[1]]), c("a", "b", "c"))
24 |   expect_equal(length(paramLists[[1]]), 3)
25 | })
26 | 


--------------------------------------------------------------------------------
/man/PatientLevelPrediction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PatientLevelPrediction.R
 3 | \docType{package}
 4 | \name{PatientLevelPrediction}
 5 | \alias{PatientLevelPrediction-package}
 6 | \alias{PatientLevelPrediction}
 7 | \title{PatientLevelPrediction}
 8 | \description{
 9 | A package for running predictions using data in the OMOP CDM
10 | }
11 | \seealso{
12 | Useful links:
13 | \itemize{
14 |   \item \url{https://ohdsi.github.io/PatientLevelPrediction/}
15 |   \item \url{https://github.com/OHDSI/PatientLevelPrediction}
16 |   \item Report bugs at \url{https://github.com/OHDSI/PatientLevelPrediction/issues}
17 | }
18 | 
19 | }
20 | \author{
21 | \strong{Maintainer}: Egill Fridgeirsson \email{e.fridgeirsson@erasmusmc.nl}
22 | 
23 | Authors:
24 | \itemize{
25 |   \item Jenna Reps \email{jreps@its.jnj.com}
26 |   \item Martijn Schuemie
27 |   \item Marc Suchard
28 |   \item Patrick Ryan
29 |   \item Peter Rijnbeek
30 | }
31 | 
32 | Other contributors:
33 | \itemize{
34 |   \item Observational Health Data Science and Informatics [copyright holder]
35 | }
36 | 
37 | }
38 | \keyword{internal}
39 | 


--------------------------------------------------------------------------------
/man/modelBasedConcordance.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/EvaluatePlp.R
 3 | \name{modelBasedConcordance}
 4 | \alias{modelBasedConcordance}
 5 | \title{Calculate the model-based concordance, which is a calculation of the expected
 6 | discrimination performance of a model under the assumption the model predicts
 7 | the "TRUE" outcome as detailed in van Klaveren et al.
 8 | https://pubmed.ncbi.nlm.nih.gov/27251001/}
 9 | \usage{
10 | modelBasedConcordance(prediction)
11 | }
12 | \arguments{
13 | \item{prediction}{the prediction object found in the plpResult object}
14 | }
15 | \value{
16 | The model-based concordance value
17 | }
18 | \description{
19 | Calculate the model-based concordance, which is a calculation of the expected
20 | discrimination performance of a model under the assumption the model predicts
21 | the "TRUE" outcome as detailed in van Klaveren et al.
22 | https://pubmed.ncbi.nlm.nih.gov/27251001/
23 | }
24 | \details{
25 | Calculate the model-based concordance
26 | }
27 | \examples{
28 | prediction <- data.frame(value = runif(100))
29 | modelBasedConcordance(prediction)
30 | }
31 | 


--------------------------------------------------------------------------------
/man/loadPlpModel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SaveLoadPlp.R
 3 | \name{loadPlpModel}
 4 | \alias{loadPlpModel}
 5 | \title{loads the plp model}
 6 | \usage{
 7 | loadPlpModel(dirPath)
 8 | }
 9 | \arguments{
10 | \item{dirPath}{The location of the model}
11 | }
12 | \value{
13 | \if{html}{\out{<div class="sourceCode">}}\preformatted{                    The plpModel object
14 | }\if{html}{\out{</div>}}
15 | }
16 | \description{
17 | loads the plp model
18 | }
19 | \details{
20 | Loads a plp model that was saved using \code{savePlpModel()}
21 | }
22 | \examples{
23 | \donttest{ \dontshow{ # takes too long }
24 | data("simulationProfile")
25 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
26 | saveLoc <- file.path(tempdir(), "loadPlpModel")
27 | plpResult <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
28 | savePlpModel(plpResult$model, file.path(saveLoc, "savedModel"))
29 | loadedModel <- loadPlpModel(file.path(saveLoc, "savedModel"))
30 | # show design of loaded model
31 | str(loadedModel$modelDesign)
32 | 
33 | # clean up
34 | unlink(saveLoc, recursive = TRUE)
35 | }
36 | }
37 | 


--------------------------------------------------------------------------------
/man/createValidationSettings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ExternalValidatePlp.R
 3 | \name{createValidationSettings}
 4 | \alias{createValidationSettings}
 5 | \title{createValidationSettings define optional settings for performing external validation}
 6 | \usage{
 7 | createValidationSettings(recalibrate = NULL, runCovariateSummary = TRUE)
 8 | }
 9 | \arguments{
10 | \item{recalibrate}{A vector of characters specifying the recalibration method to apply}
11 | 
12 | \item{runCovariateSummary}{Whether to run the covariate summary for the validation data}
13 | }
14 | \value{
15 | A setting object of class \code{validationSettings} containing a list of settings for externalValidatePlp
16 | }
17 | \description{
18 | This function creates the settings required by externalValidatePlp
19 | }
20 | \details{
21 | Users need to specify whether they want to sample or recalibate when performing external validation
22 | }
23 | \examples{
24 | # do weak recalibration and don't run covariate summary
25 | createValidationSettings(recalibrate = "weakRecalibration", 
26 |                          runCovariateSummary = FALSE)
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/man/createUnivariateFeatureSelection.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FeatureEngineering.R
 3 | \name{createUnivariateFeatureSelection}
 4 | \alias{createUnivariateFeatureSelection}
 5 | \title{Create the settings for defining any feature selection that will be done}
 6 | \usage{
 7 | createUnivariateFeatureSelection(k = 100)
 8 | }
 9 | \arguments{
10 | \item{k}{This function returns the K features most associated
11 | (univariately) to the outcome}
12 | }
13 | \value{
14 | An object of class \code{featureEngineeringSettings}
15 | }
16 | \description{
17 | Create the settings for defining any feature selection that will be done
18 | }
19 | \details{
20 | Returns an object of class \code{featureEngineeringSettings} that specifies
21 | the function that will be called and the settings. Uses the scikit-learn
22 | SelectKBest function with chi2 for univariate feature selection.
23 | }
24 | \examples{
25 | \dontshow{ # dontrun reason: requires python and scikit-learn }
26 | \dontrun{ #' # create a feature selection that selects the 100 most associated features
27 | featureSelector <- createUnivariateFeatureSelection(k = 100) 
28 | }
29 | }
30 | 


--------------------------------------------------------------------------------
/man/simulatePlpData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Simulation.R
 3 | \name{simulatePlpData}
 4 | \alias{simulatePlpData}
 5 | \title{Generate simulated data}
 6 | \usage{
 7 | simulatePlpData(plpDataSimulationProfile, n = 10000, seed = NULL)
 8 | }
 9 | \arguments{
10 | \item{plpDataSimulationProfile}{An object of type \code{plpDataSimulationProfile} as generated
11 | using the \cr\code{createplpDataSimulationProfile} function.}
12 | 
13 | \item{n}{The size of the population to be generated.}
14 | 
15 | \item{seed}{An optional seed for the random number generator. If provided}
16 | }
17 | \value{
18 | An object of type \code{plpData}.
19 | }
20 | \description{
21 | \code{simulateplpData} creates a plpData object with simulated data.
22 | }
23 | \details{
24 | This function generates simulated data that is in many ways similar to the original data on which
25 | the simulation profile is based.
26 | }
27 | \examples{
28 | # first load the simulation profile to use
29 | data("simulationProfile")
30 | # then generate the simulated data
31 | plpData <- simulatePlpData(simulationProfile, n = 100, seed = 42)
32 | nrow(plpData$cohorts)
33 | }
34 | 


--------------------------------------------------------------------------------
/man/computeGridPerformance.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SklearnClassifier.R
 3 | \name{computeGridPerformance}
 4 | \alias{computeGridPerformance}
 5 | \title{Computes grid performance with a specified performance function}
 6 | \usage{
 7 | computeGridPerformance(prediction, param, performanceFunct = "computeAuc")
 8 | }
 9 | \arguments{
10 | \item{prediction}{a dataframe with predictions and outcomeCount per rowId}
11 | 
12 | \item{param}{a list of hyperparameters}
13 | 
14 | \item{performanceFunct}{a string specifying which performance function to use
15 | . Default \code{'compute_AUC'}}
16 | }
17 | \value{
18 | A list with overview of the performance
19 | }
20 | \description{
21 | Computes grid performance with a specified performance function
22 | }
23 | \examples{
24 | prediction <- data.frame(rowId = c(1, 2, 3, 4, 5),
25 |                          outcomeCount = c(0, 1, 0, 1, 0),
26 |                          value = c(0.1, 0.9, 0.2, 0.8, 0.3),
27 |                          index = c(1, 1, 1, 1, 1))
28 | param <- list(hyperParam1 = 5, hyperParam2 = 100)
29 | computeGridPerformance(prediction, param, performanceFunct = "computeAuc")
30 | }
31 | 


--------------------------------------------------------------------------------
/man/savePlpData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SaveLoadPlp.R
 3 | \name{savePlpData}
 4 | \alias{savePlpData}
 5 | \title{Save the plpData to folder}
 6 | \usage{
 7 | savePlpData(plpData, file, envir = NULL, overwrite = FALSE)
 8 | }
 9 | \arguments{
10 | \item{plpData}{An object of type \code{plpData} as generated using
11 | \code{getPlpData}.}
12 | 
13 | \item{file}{The name of the folder where the data will be written. The folder should
14 | not yet exist.}
15 | 
16 | \item{envir}{The environment for to evaluate variables when saving}
17 | 
18 | \item{overwrite}{Whether to force overwrite an existing file}
19 | }
20 | \value{
21 | Called for its side effect, the data will be written to a set of files in the
22 | folder specified by the user.
23 | }
24 | \description{
25 | \code{savePlpData} saves an object of type plpData to folder.
26 | }
27 | \examples{
28 | data("simulationProfile")
29 | plpData <- simulatePlpData(simulationProfile, n = 500, seed = 42)
30 | saveLoc <- file.path(tempdir(), "savePlpData")
31 | savePlpData(plpData, saveLoc)
32 | dir(saveLoc, full.names = TRUE)
33 | 
34 | # clean up
35 | unlink(saveLoc, recursive = TRUE)
36 | }
37 | 


--------------------------------------------------------------------------------
/man/getPredictionDistribution.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PredictionDistribution.R
 3 | \name{getPredictionDistribution}
 4 | \alias{getPredictionDistribution}
 5 | \title{Calculates the prediction distribution}
 6 | \usage{
 7 | getPredictionDistribution(
 8 |   prediction,
 9 |   predictionType = "binary",
10 |   typeColumn = "evaluation"
11 | )
12 | }
13 | \arguments{
14 | \item{prediction}{A prediction object}
15 | 
16 | \item{predictionType}{The type of prediction (binary or survival)}
17 | 
18 | \item{typeColumn}{A column that is used to stratify the results}
19 | }
20 | \value{
21 | The 0.00, 0.1, 0.25, 0.5, 0.75, 0.9, 1.00 quantile pf the prediction,
22 | the mean and standard deviation per class
23 | }
24 | \description{
25 | Calculates the prediction distribution
26 | }
27 | \details{
28 | Calculates the quantiles from a predition object
29 | }
30 | \examples{
31 | prediction <- data.frame(rowId = 1:100, 
32 |                          outcomeCount = stats::rbinom(1:100, 1, prob=0.5), 
33 |                          value = runif(100), 
34 |                          evaluation = rep("Train", 100))
35 | getPredictionDistribution(prediction)
36 | }
37 | 


--------------------------------------------------------------------------------
/man/savePlpModel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SaveLoadPlp.R
 3 | \name{savePlpModel}
 4 | \alias{savePlpModel}
 5 | \title{Saves the plp model}
 6 | \usage{
 7 | savePlpModel(plpModel, dirPath)
 8 | }
 9 | \arguments{
10 | \item{plpModel}{A trained classifier returned by running \code{runPlp()$model}}
11 | 
12 | \item{dirPath}{A location to save the model to}
13 | }
14 | \value{
15 | \if{html}{\out{<div class="sourceCode">}}\preformatted{                    The directory path where the model was saved
16 | }\if{html}{\out{</div>}}
17 | }
18 | \description{
19 | Saves the plp model
20 | }
21 | \details{
22 | Saves the plp model to a user specificed folder
23 | }
24 | \examples{
25 | \donttest{ \dontshow{ # takes too long }
26 | data("simulationProfile")
27 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
28 | saveLoc <- file.path(tempdir(), "savePlpModel")
29 | plpResult <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
30 | path <- savePlpModel(plpResult$model, file.path(saveLoc, "savedModel"))
31 | # show the saved model
32 | dir(path, full.names = TRUE)
33 | 
34 | # clean up
35 | unlink(saveLoc, recursive = TRUE)
36 | }
37 | }
38 | 


--------------------------------------------------------------------------------
/man/getThresholdSummary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ThresholdSummary.R
 3 | \name{getThresholdSummary}
 4 | \alias{getThresholdSummary}
 5 | \title{Calculate all measures for sparse ROC}
 6 | \usage{
 7 | getThresholdSummary(
 8 |   prediction,
 9 |   predictionType = "binary",
10 |   typeColumn = "evaluation"
11 | )
12 | }
13 | \arguments{
14 | \item{prediction}{A prediction object}
15 | 
16 | \item{predictionType}{The type of prediction (binary or survival)}
17 | 
18 | \item{typeColumn}{A column that is used to stratify the results}
19 | }
20 | \value{
21 | A data.frame with TP, FP, TN, FN, TPR, FPR, accuracy, PPF, FOR and Fmeasure
22 | }
23 | \description{
24 | Calculate all measures for sparse ROC
25 | }
26 | \details{
27 | Calculates the TP, FP, TN, FN, TPR, FPR, accuracy, PPF, FOR and Fmeasure
28 | from a prediction object
29 | }
30 | \examples{
31 | prediction <- data.frame(rowId = 1:100, 
32 |                          outcomeCount = stats::rbinom(1:100, 1, prob=0.5),
33 |                          value = runif(100), 
34 |                          evaluation = rep("Train", 100))
35 | summary <- getThresholdSummary(prediction)
36 | str(summary)
37 | }
38 | 


--------------------------------------------------------------------------------
/man/loadPlpShareable.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SaveLoadPlp.R
 3 | \name{loadPlpShareable}
 4 | \alias{loadPlpShareable}
 5 | \title{Loads the plp result saved as json/csv files for transparent sharing}
 6 | \usage{
 7 | loadPlpShareable(loadDirectory)
 8 | }
 9 | \arguments{
10 | \item{loadDirectory}{The directory with the results as json/csv files}
11 | }
12 | \value{
13 | \if{html}{\out{<div class="sourceCode">}}\preformatted{                             The runPlp object
14 | }\if{html}{\out{</div>}}
15 | }
16 | \description{
17 | Loads the plp result saved as json/csv files for transparent sharing
18 | }
19 | \details{
20 | Load the main results from json/csv files into a runPlp object
21 | }
22 | \examples{
23 | \donttest{ \dontshow{ # takes too long }
24 | data("simulationProfile")
25 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
26 | saveLoc <- file.path(tempdir(), "loadPlpShareable")
27 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
28 | savePlpShareable(results, saveLoc)
29 | dir(saveLoc)
30 | loadedResults <- loadPlpShareable(saveLoc)
31 | # clean up
32 | unlink(saveLoc, recursive = TRUE)
33 | }
34 | }
35 | 


--------------------------------------------------------------------------------
/man/sklearnFromJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SklearnToJson.R
 3 | \name{sklearnFromJson}
 4 | \alias{sklearnFromJson}
 5 | \title{Loads sklearn python model from json}
 6 | \usage{
 7 | sklearnFromJson(path)
 8 | }
 9 | \arguments{
10 | \item{path}{path to the model json file}
11 | }
12 | \value{
13 | a sklearn python model object
14 | }
15 | \description{
16 | Loads sklearn python model from json
17 | }
18 | \examples{
19 | \dontshow{ # dontrun reason: requires python environment with sklearn }
20 | \dontrun{ 
21 | plpData <- getEunomiaPlpData()
22 | modelSettings <- setDecisionTree(maxDepth = list(3), minSamplesSplit = list(2),
23 |                                   minSamplesLeaf = list(1), maxFeatures = list(100))
24 | saveLocation <- file.path(tempdir(), "sklearnFromJson")
25 | results <- runPlp(plpData, modelSettings = modelSettings, saveDirectory = saveLocation)
26 | # view save model
27 | dir(results$model$model, full.names = TRUE)
28 | # load into a sklearn object
29 | model <- sklearnFromJson(file.path(results$model$model, "model.json"))
30 | # max depth is 3 as we set in beginning
31 | model$max_depth
32 | # clean up
33 | unlink(saveLocation, recursive = TRUE)
34 | }
35 | }
36 | 


--------------------------------------------------------------------------------
/man/createExistingSplitSettings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DataSplitting.R
 3 | \name{createExistingSplitSettings}
 4 | \alias{createExistingSplitSettings}
 5 | \title{Create the settings for defining how the plpData are split into
 6 | test/validation/train sets using an existing split - good to use for
 7 | reproducing results from a different run}
 8 | \usage{
 9 | createExistingSplitSettings(splitIds)
10 | }
11 | \arguments{
12 | \item{splitIds}{(data.frame) A data frame with rowId and index columns of
13 | type integer/numeric. Index is -1 for test set, positive integer for train
14 | set folds}
15 | }
16 | \value{
17 | An object of class \code{splitSettings}
18 | }
19 | \description{
20 | Create the settings for defining how the plpData are split into
21 | test/validation/train sets using an existing split - good to use for
22 | reproducing results from a different run
23 | }
24 | \examples{
25 | # rowId 1 is in fold 1, rowId 2 is in fold 2, rowId 3 is in the test set
26 | # rowId 4 is in fold 1, rowId 5 is in fold 2
27 | createExistingSplitSettings(splitIds = data.frame(rowId = c(1, 2, 3, 4, 5),
28 |                                                   index = c(1, 2, -1, 1, 2)))
29 | }
30 | 


--------------------------------------------------------------------------------
/man/loadPlpAnalysesJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RunMultiplePlp.R
 3 | \name{loadPlpAnalysesJson}
 4 | \alias{loadPlpAnalysesJson}
 5 | \title{Load the multiple prediction json settings from a file}
 6 | \usage{
 7 | loadPlpAnalysesJson(jsonFileLocation)
 8 | }
 9 | \arguments{
10 | \item{jsonFileLocation}{The location of the file 'predictionAnalysisList.json' with the modelDesignList}
11 | }
12 | \value{
13 | A list with the modelDesignList and cohortDefinitions
14 | }
15 | \description{
16 | Load the multiple prediction json settings from a file
17 | }
18 | \details{
19 | This function interprets a json with the multiple prediction settings and creates a list
20 | that can be combined with connection settings to run a multiple prediction study
21 | }
22 | \examples{
23 | modelDesign <- createModelDesign(targetId = 1, outcomeId = 2, 
24 |                                  modelSettings = setLassoLogisticRegression())
25 | saveLoc <- file.path(tempdir(), "loadPlpAnalysesJson")
26 | savePlpAnalysesJson(modelDesignList = modelDesign, saveDirectory = saveLoc)
27 | loadPlpAnalysesJson(file.path(saveLoc, "predictionAnalysisList.json"))
28 | # clean use
29 | unlink(saveLoc, recursive = TRUE)
30 | }
31 | 


--------------------------------------------------------------------------------
/man/configurePython.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/HelperFunctions.R
 3 | \name{configurePython}
 4 | \alias{configurePython}
 5 | \title{Sets up a python environment to use for PLP (can be conda or venv)}
 6 | \usage{
 7 | configurePython(envname = "PLP", envtype = NULL, condaPythonVersion = "3.11")
 8 | }
 9 | \arguments{
10 | \item{envname}{A string for the name of the virtual environment (default is 'PLP')}
11 | 
12 | \item{envtype}{An option for specifying the environment as'conda' or 'python'.  If NULL then the default is 'conda' for windows users and 'python' for non-windows users}
13 | 
14 | \item{condaPythonVersion}{String, Python version to use when creating a conda environment}
15 | }
16 | \value{
17 | location of the created conda or virtual python environment
18 | }
19 | \description{
20 | Sets up a python environment to use for PLP (can be conda or venv)
21 | }
22 | \details{
23 | This function creates a python environment that can be used by PatientLevelPrediction
24 | and installs all the required package dependancies.
25 | }
26 | \examples{
27 | \dontshow{ # dontrun reason: don't modify environment in examples }
28 | \dontrun{ 
29 |  configurePython(envname="PLP", envtype="conda")
30 | }
31 | }
32 | 


--------------------------------------------------------------------------------
/man/savePrediction.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SaveLoadPlp.R
 3 | \name{savePrediction}
 4 | \alias{savePrediction}
 5 | \title{Saves the prediction dataframe to a json file}
 6 | \usage{
 7 | savePrediction(prediction, dirPath, fileName = "prediction.json")
 8 | }
 9 | \arguments{
10 | \item{prediction}{The prediciton data.frame}
11 | 
12 | \item{dirPath}{The directory to save the prediction json}
13 | 
14 | \item{fileName}{The name of the json file that will be saved}
15 | }
16 | \value{
17 | \if{html}{\out{<div class="sourceCode">}}\preformatted{                       The file location where the prediction was saved
18 | }\if{html}{\out{</div>}}
19 | }
20 | \description{
21 | Saves the prediction dataframe to a json file
22 | }
23 | \details{
24 | Saves the prediction data frame returned by predict.R to an json file and
25 | returns the fileLocation where the prediction is saved
26 | }
27 | \examples{
28 | prediction <- data.frame(
29 |   rowIds = c(1, 2, 3),
30 |   outcomeCount = c(0, 1, 0),
31 |   value = c(0.1, 0.9, 0.2)
32 | )
33 | saveLoc <- file.path(tempdir())
34 | savePrediction(prediction, saveLoc)
35 | dir(saveLoc)
36 | 
37 | # clean up
38 | unlink(file.path(saveLoc, "prediction.json"))
39 | }
40 | 


--------------------------------------------------------------------------------
/man/predictGlm.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Glm.R
 3 | \name{predictGlm}
 4 | \alias{predictGlm}
 5 | \title{predict using a logistic regression model}
 6 | \usage{
 7 | predictGlm(plpModel, data, cohort)
 8 | }
 9 | \arguments{
10 | \item{plpModel}{An object of type \code{plpModel} - a patient level
11 | prediction model}
12 | 
13 | \item{data}{An object of type \code{plpData} - the patient level prediction
14 | data extracted from the CDM.}
15 | 
16 | \item{cohort}{The population dataframe created using
17 | \code{createStudyPopulation} who will have their risks predicted or a cohort
18 | without the outcome known}
19 | }
20 | \value{
21 | A dataframe containing the prediction for each person in the
22 | population
23 | }
24 | \description{
25 | Predict risk with a given plpModel containing a generalized linear model.
26 | }
27 | \examples{
28 | coefficients <- data.frame(
29 |   covariateId = c(1002),
30 |   coefficient = c(0.05))
31 | model <- createGlmModel(coefficients, intercept = -2.5)
32 | data("simulationProfile")
33 | plpData <- simulatePlpData(simulationProfile, n = 50, seed = 42)
34 | prediction <- predictGlm(model, plpData, plpData$cohorts)
35 | # see the predicted risk values
36 | head(prediction)
37 | }
38 | 


--------------------------------------------------------------------------------
/man/createStratifiedImputationSettings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FeatureEngineering.R
 3 | \name{createStratifiedImputationSettings}
 4 | \alias{createStratifiedImputationSettings}
 5 | \title{Create the settings for using stratified imputation.}
 6 | \usage{
 7 | createStratifiedImputationSettings(covariateId, ageSplits = NULL)
 8 | }
 9 | \arguments{
10 | \item{covariateId}{The covariateId that needs imputed values}
11 | 
12 | \item{ageSplits}{A vector of age splits in years to create age groups}
13 | }
14 | \value{
15 | An object of class \code{featureEngineeringSettings}
16 | }
17 | \description{
18 | Create the settings for using stratified imputation.
19 | }
20 | \details{
21 | Returns an object of class \code{featureEngineeringSettings} that specifies
22 | how to do stratified imputation. This function splits the covariate into
23 | age groups and fits splines to the covariate within each age group. The spline
24 | values are then used to impute missing values.
25 | }
26 | \examples{
27 | # create a stratified imputation settings for covariate 1050 with age splits 
28 | # at 50 and 70
29 | stratifiedImputationSettings <- 
30 |   createStratifiedImputationSettings(covariateId = 1050, ageSplits = c(50, 70))
31 | }
32 | 


--------------------------------------------------------------------------------
/man/plotPlp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{plotPlp}
 4 | \alias{plotPlp}
 5 | \title{Plot all the PatientLevelPrediction plots}
 6 | \usage{
 7 | plotPlp(plpResult, saveLocation = NULL, typeColumn = "evaluation")
 8 | }
 9 | \arguments{
10 | \item{plpResult}{Object returned by the runPlp() function}
11 | 
12 | \item{saveLocation}{Name of the directory where the plots should be saved (NULL means no saving)}
13 | 
14 | \item{typeColumn}{The name of the column specifying the evaluation type
15 | (to stratify the plots)}
16 | }
17 | \value{
18 | TRUE if it ran, plots are saved in the specified directory
19 | }
20 | \description{
21 | Plot all the PatientLevelPrediction plots
22 | }
23 | \details{
24 | Create a directory with all the plots
25 | }
26 | \examples{
27 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf}
28 | \donttest{ \dontshow{ # takes too long }
29 | data("simulationProfile")
30 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
31 | saveLoc <- file.path(tempdir(), "plotPlp")
32 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
33 | plotPlp(results)
34 | # clean up
35 | unlink(saveLoc, recursive = TRUE)
36 | }
37 | \dontshow{\}) # examplesIf}
38 | }
39 | 


--------------------------------------------------------------------------------
/tests/testthat/helper-expectations.R:
--------------------------------------------------------------------------------
 1 | # common tests that can be grouped together, such as testing the output from fitplp
 2 | expect_correct_fitPlp <- function(plpModel, trainData, testLocation = TRUE) {
 3 |   outcomeId <- 3
 4 |   # predictions are same amount as labels
 5 |   multiplicativeFactor <- dplyr::n_distinct(plpModel$prediction %>%
 6 |     dplyr::pull(.data$evaluationType))
 7 |   expect_equal(NROW(trainData$labels) * multiplicativeFactor, NROW(plpModel$prediction))
 8 | 
 9 |   # predictions are all between 0 and 1
10 |   expect_true(all((plpModel$prediction$value >= 0) &
11 |     (plpModel$prediction$value <= 1)))
12 | 
13 |   # model directory exists
14 |   if (testLocation) {
15 |     expect_true(dir.exists(plpModel$model))
16 |   }
17 | 
18 |   expect_equal(plpModel$modelDesign$outcomeId, outcomeId)
19 |   expect_equal(plpModel$modelDesign$targetId, 1)
20 | 
21 |   # structure of plpModel is correct
22 |   expect_equal(names(plpModel), c(
23 |     "model", "preprocessing", "prediction",
24 |     "modelDesign", "trainDetails", "covariateImportance"
25 |   ))
26 | }
27 | 
28 | expect_correct_predictions <- function(predictions, testData) {
29 |   # predictions are all between 0 and 1
30 |   expect_true(all((predictions$value >= 0) & (predictions$value <= 1)))
31 |   expect_equal(NROW(testData$labels), NROW(predictions))
32 | }
33 | 


--------------------------------------------------------------------------------
/man/savePlpResult.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SaveLoadPlp.R
 3 | \name{savePlpResult}
 4 | \alias{savePlpResult}
 5 | \title{Saves the result from runPlp into the location directory}
 6 | \usage{
 7 | savePlpResult(result, dirPath)
 8 | }
 9 | \arguments{
10 | \item{result}{The result of running runPlp()}
11 | 
12 | \item{dirPath}{The directory to save the csv}
13 | }
14 | \value{
15 | \if{html}{\out{<div class="sourceCode">}}\preformatted{                       The directory path where the results were saved
16 | }\if{html}{\out{</div>}}
17 | }
18 | \description{
19 | Saves the result from runPlp into the location directory
20 | }
21 | \details{
22 | Saves the result from runPlp into the location directory
23 | }
24 | \examples{
25 | \donttest{ \dontshow{ # takes too long }
26 | data("simulationProfile")
27 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
28 | saveLoc <- file.path(tempdir(), "savePlpResult")
29 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
30 | # save the results
31 | newSaveLoc <- file.path(tempdir(), "savePlpResult", "saved")
32 | savePlpResult(results, newSaveLoc)
33 | # show the saved results
34 | dir(newSaveLoc, recursive = TRUE, full.names = TRUE)
35 | 
36 | # clean up
37 | unlink(saveLoc, recursive = TRUE)
38 | unlink(newSaveLoc, recursive = TRUE)
39 | }
40 | }
41 | 


--------------------------------------------------------------------------------
/man/MapIds.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Formatting.R
 3 | \name{MapIds}
 4 | \alias{MapIds}
 5 | \title{Map covariate and row Ids so they start from 1}
 6 | \usage{
 7 | MapIds(covariateData, cohort = NULL, mapping = NULL)
 8 | }
 9 | \arguments{
10 | \item{covariateData}{a covariateData object}
11 | 
12 | \item{cohort}{if specified rowIds restricted to the ones in cohort}
13 | 
14 | \item{mapping}{A pre defined mapping to use}
15 | }
16 | \value{
17 | a new \code{covariateData} object with remapped covariate and row ids
18 | }
19 | \description{
20 | this functions takes covariate data and a cohort/population and remaps
21 | the covariate and row ids, restricts to pop and saves/creates mapping
22 | }
23 | \examples{
24 | covariateData <- Andromeda::andromeda(
25 |   covariates = data.frame(rowId = c(1, 3, 5, 7, 9), 
26 |                           covariateId = c(10, 20, 10, 10, 20),
27 |                           covariateValue = c(1, 1, 1, 1, 1)),
28 |   covariateRef = data.frame(covariateId = c(10, 20), 
29 |                               covariateNames = c("covariateA", 
30 |                                                  "covariateB"),
31 |                               analysisId = c(1, 1)))
32 | mappedData <- MapIds(covariateData)
33 | # columnId and rowId are now starting from 1 and are consecutive
34 | mappedData$covariates
35 | }
36 | 


--------------------------------------------------------------------------------
/R/SklearnClassifierHelpers.R:
--------------------------------------------------------------------------------
 1 | # @file SklearnClassifierHelpers.R
 2 | #
 3 | # Copyright 2022 Observational Health Data Sciences and Informatics
 4 | #
 5 | # This file is part of PatientLevelPrediction
 6 | # 
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | # 
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | # 
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | #' Cartesian product
20 | #' 
21 | #' Computes the Cartesian product of all the combinations of elements in a list
22 | #' 
23 | #' @param allList a list of lists
24 | #' @return A list with all possible combinations from the input list of lists
25 | #' @examples
26 | #' listCartesian(list(list(1, 2), list(3, 4)))
27 | #' @export
28 | listCartesian <- function(allList) {
29 |   combinations <- expand.grid(allList, stringsAsFactors = FALSE)
30 |   results <- lapply(seq_len(nrow(combinations)),
31 |                     function(i) lapply(combinations, function(x) x[i][[1]]))
32 |   return(results)
33 | }
34 | 


--------------------------------------------------------------------------------
/man/predictPlp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Predict.R
 3 | \name{predictPlp}
 4 | \alias{predictPlp}
 5 | \title{predictPlp}
 6 | \usage{
 7 | predictPlp(plpModel, plpData, population, timepoint)
 8 | }
 9 | \arguments{
10 | \item{plpModel}{An object of type \code{plpModel} - a patient level prediction model}
11 | 
12 | \item{plpData}{An object of type \code{plpData} - the patient level prediction
13 | data extracted from the CDM.}
14 | 
15 | \item{population}{The population created using createStudyPopulation() who will have their risks predicted or a cohort without the outcome known}
16 | 
17 | \item{timepoint}{The timepoint to predict risk (survival models only)}
18 | }
19 | \value{
20 | A data frame containing the predicted risk values
21 | }
22 | \description{
23 | Predict the risk of the outcome using the input plpModel for the input plpData
24 | }
25 | \details{
26 | The function applied the trained model on the plpData to make predictions
27 | }
28 | \examples{
29 | coefficients <- data.frame(
30 |   covariateId = c(1002),
31 |   coefficient = c(0.05)
32 | )
33 | model <- createGlmModel(coefficients, intercept = -2.5)
34 | data("simulationProfile")
35 | plpData <- simulatePlpData(simulationProfile, n = 50, seed = 42)
36 | prediction <- predictPlp(model, plpData, plpData$cohorts)
37 | # see the predicted risk values
38 | head(prediction)
39 | }
40 | 


--------------------------------------------------------------------------------
/man/createNormalizer.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FeatureEngineering.R
 3 | \name{createNormalizer}
 4 | \alias{createNormalizer}
 5 | \title{Create the settings for normalizing the data @param type The type of normalization to use, either "minmax" or "robust"}
 6 | \usage{
 7 | createNormalizer(type = "minmax", settings = list())
 8 | }
 9 | \arguments{
10 | \item{type}{The type of normalization to use, either "minmax" or "robust"}
11 | 
12 | \item{settings}{A list of settings for the normalization.
13 | For robust normalization, the settings list can contain a boolean value for
14 | clip, which clips the values to be between -3 and 3 after normalization. See
15 | https://arxiv.org/abs/2407.04491}
16 | }
17 | \value{
18 | An object of class \code{featureEngineeringSettings}
19 | 
20 | An object of class \code{featureEngineeringSettings}'
21 | }
22 | \description{
23 | Create the settings for normalizing the data @param type The type of normalization to use, either "minmax" or "robust"
24 | }
25 | \examples{
26 | # create a minmax normalizer that normalizes the data between 0 and 1
27 | normalizer <- createNormalizer(type = "minmax")
28 | # create a robust normalizer that normalizes the data by the interquartile range
29 | # and squeezes the values to be between -3 and 3
30 | normalizer <- createNormalizer(type = "robust", settings = list(clip = TRUE))
31 | }
32 | 


--------------------------------------------------------------------------------
/man/createExecuteSettings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RunPlpHelpers.R
 3 | \name{createExecuteSettings}
 4 | \alias{createExecuteSettings}
 5 | \title{Creates list of settings specifying what parts of runPlp to execute}
 6 | \usage{
 7 | createExecuteSettings(
 8 |   runSplitData = FALSE,
 9 |   runSampleData = FALSE,
10 |   runFeatureEngineering = FALSE,
11 |   runPreprocessData = FALSE,
12 |   runModelDevelopment = FALSE,
13 |   runCovariateSummary = FALSE
14 | )
15 | }
16 | \arguments{
17 | \item{runSplitData}{TRUE or FALSE whether to split data into train/test}
18 | 
19 | \item{runSampleData}{TRUE or FALSE whether to over or under sample}
20 | 
21 | \item{runFeatureEngineering}{TRUE or FALSE whether to do feature engineering}
22 | 
23 | \item{runPreprocessData}{TRUE or FALSE whether to do preprocessing}
24 | 
25 | \item{runModelDevelopment}{TRUE or FALSE whether to develop the model}
26 | 
27 | \item{runCovariateSummary}{TRUE or FALSE whether to create covariate summary}
28 | }
29 | \value{
30 | list with TRUE/FALSE for each part of runPlp
31 | }
32 | \description{
33 | Creates list of settings specifying what parts of runPlp to execute
34 | }
35 | \details{
36 | define what parts of runPlp to execute
37 | }
38 | \examples{
39 | # create settings with only split and model development
40 | createExecuteSettings(runSplitData = TRUE, runModelDevelopment = TRUE) 
41 | }
42 | 


--------------------------------------------------------------------------------
/.settings/org.eclipse.cdt.managedbuilder.core.prefs:
--------------------------------------------------------------------------------
 1 | eclipse.preferences.version=1
 2 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/CPATH/delimiter=;
 3 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/CPATH/operation=remove
 4 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/CPLUS_INCLUDE_PATH/delimiter=;
 5 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/CPLUS_INCLUDE_PATH/operation=remove
 6 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/C_INCLUDE_PATH/delimiter=;
 7 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/C_INCLUDE_PATH/operation=remove
 8 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/append=true
 9 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/appendContributed=true
10 | environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/LIBRARY_PATH/delimiter=;
11 | environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/LIBRARY_PATH/operation=remove
12 | environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/append=true
13 | environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/appendContributed=true
14 | 


--------------------------------------------------------------------------------
/man/viewPlp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ViewShinyPlp.R
 3 | \name{viewPlp}
 4 | \alias{viewPlp}
 5 | \title{viewPlp - Interactively view the performance and model settings}
 6 | \usage{
 7 | viewPlp(runPlp, validatePlp = NULL, diagnosePlp = NULL)
 8 | }
 9 | \arguments{
10 | \item{runPlp}{The output of runPlp() (an object of class 'runPlp')}
11 | 
12 | \item{validatePlp}{The output of externalValidatePlp (on object of class 'validatePlp')}
13 | 
14 | \item{diagnosePlp}{The output of diagnosePlp()}
15 | }
16 | \value{
17 | Opens a shiny app for interactively viewing the results
18 | }
19 | \description{
20 | This is a shiny app for viewing interactive plots of the performance and the settings
21 | }
22 | \details{
23 | Either the result of runPlp and view the plots
24 | }
25 | \examples{
26 | \dontshow{if (rlang::is_interactive() && rlang::is_installed("OhdsiShinyAppBuilder") ) withAutoprint(\{ # examplesIf}
27 | \donttest{ \dontshow{ # takes too long }
28 | data("simulationProfile")
29 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
30 | saveLoc <- file.path(tempdir(), "viewPlp", "development")
31 | results <- runPlp(plpData, saveDirectory = saveLoc)
32 | # view result files
33 | dir(saveLoc, recursive = TRUE)
34 | # open shiny app
35 | viewPlp(results)
36 | # clean up, shiny app can't be opened after the following has been run
37 | unlink(saveLoc, recursive = TRUE)
38 | }
39 | \dontshow{\}) # examplesIf}
40 | }
41 | 


--------------------------------------------------------------------------------
/man/createPreprocessSettings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PreprocessingData.R
 3 | \name{createPreprocessSettings}
 4 | \alias{createPreprocessSettings}
 5 | \title{Create the settings for preprocessing the trainData.}
 6 | \usage{
 7 | createPreprocessSettings(
 8 |   minFraction = 0.001,
 9 |   normalize = TRUE,
10 |   removeRedundancy = TRUE
11 | )
12 | }
13 | \arguments{
14 | \item{minFraction}{The minimum fraction of target population who must have a
15 | covariate for it to be included in the model training}
16 | 
17 | \item{normalize}{Whether to normalise the covariates before training
18 | (Default: TRUE)}
19 | 
20 | \item{removeRedundancy}{Whether to remove redundant features (Default: TRUE)
21 | Redundant features are features that within an analysisId together cover all
22 | observations. For example with ageGroups, if you have ageGroup 0-18 and 18-100
23 | and all patients are in one of these groups, then one of these groups is redundant.}
24 | }
25 | \value{
26 | An object of class \code{preprocessingSettings}
27 | }
28 | \description{
29 | Create the settings for preprocessing the trainData.
30 | }
31 | \details{
32 | Returns an object of class \code{preprocessingSettings} that specifies how to
33 | preprocess the training data
34 | }
35 | \examples{
36 | # Create the settings for preprocessing, remove no features, normalise the data
37 | createPreprocessSettings(minFraction = 0.0, normalize = TRUE, removeRedundancy = FALSE)
38 | }
39 | 


--------------------------------------------------------------------------------
/man/predictCyclops.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CyclopsModels.R
 3 | \name{predictCyclops}
 4 | \alias{predictCyclops}
 5 | \title{Create predictive probabilities}
 6 | \usage{
 7 | predictCyclops(plpModel, data, cohort)
 8 | }
 9 | \arguments{
10 | \item{plpModel}{An object of type \code{predictiveModel} as generated using
11 | \code{\link{fitPlp}}.}
12 | 
13 | \item{data}{The new plpData containing the covariateData for the new population}
14 | 
15 | \item{cohort}{The cohort to calculate the prediction for}
16 | }
17 | \value{
18 | The value column in the result data.frame is: logistic: probabilities of the outcome, poisson:
19 | Poisson rate (per day) of the outome, survival: hazard rate (per day) of the outcome.
20 | }
21 | \description{
22 | Create predictive probabilities
23 | }
24 | \details{
25 | Generates predictions for the population specified in plpData given the model.
26 | }
27 | \examples{
28 | \donttest{ \dontshow{ # takes too long }
29 | data("simulationProfile")
30 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
31 | population <- createStudyPopulation(plpData, outcomeId = 3)
32 | data <- splitData(plpData, population)
33 | plpModel <- fitPlp(data$Train, modelSettings = setLassoLogisticRegression(seed = 42),
34 |                    analysisId = "test", analysisPath = NULL)
35 | prediction <- predictCyclops(plpModel, data$Test, data$Test$labels)
36 | # view prediction dataframe
37 | head(prediction)
38 | }
39 | }
40 | 


--------------------------------------------------------------------------------
/man/savePlpAnalysesJson.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/RunMultiplePlp.R
 3 | \name{savePlpAnalysesJson}
 4 | \alias{savePlpAnalysesJson}
 5 | \title{Save the modelDesignList to a json file}
 6 | \usage{
 7 | savePlpAnalysesJson(
 8 |   modelDesignList = list(createModelDesign(targetId = 1, outcomeId = 2, modelSettings =
 9 |     setLassoLogisticRegression()), createModelDesign(targetId = 1, outcomeId = 3,
10 |     modelSettings = setLassoLogisticRegression())),
11 |   cohortDefinitions = NULL,
12 |   saveDirectory = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{modelDesignList}{A list of modelDesigns created using \code{createModelDesign()}}
17 | 
18 | \item{cohortDefinitions}{A list of the cohortDefinitions (generally extracted from ATLAS)}
19 | 
20 | \item{saveDirectory}{The directory to save the modelDesignList settings}
21 | }
22 | \value{
23 | The json string of the ModelDesignList
24 | }
25 | \description{
26 | Save the modelDesignList to a json file
27 | }
28 | \details{
29 | This function creates a json file with the modelDesignList saved
30 | }
31 | \examples{
32 | modelDesign <- createModelDesign(targetId = 1, 
33 |                                  outcomeId = 2,
34 |                                  modelSettings = setLassoLogisticRegression())
35 | saveLoc <- file.path(tempdir(), "loadPlpAnalysesJson")
36 | jsonFile <- savePlpAnalysesJson(modelDesignList = modelDesign, saveDirectory = saveLoc)
37 | # clean up
38 | unlink(saveLoc, recursive = TRUE)
39 | }
40 | 


--------------------------------------------------------------------------------
/man/setCoxModel.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CyclopsSettings.R
 3 | \name{setCoxModel}
 4 | \alias{setCoxModel}
 5 | \title{Create setting for lasso Cox model}
 6 | \usage{
 7 | setCoxModel(
 8 |   variance = 0.01,
 9 |   seed = NULL,
10 |   includeCovariateIds = c(),
11 |   noShrinkage = c(),
12 |   threads = -1,
13 |   upperLimit = 20,
14 |   lowerLimit = 0.01,
15 |   tolerance = 2e-07,
16 |   maxIterations = 3000
17 | )
18 | }
19 | \arguments{
20 | \item{variance}{Numeric: prior distribution starting variance}
21 | 
22 | \item{seed}{An option to add a seed when training the model}
23 | 
24 | \item{includeCovariateIds}{a set of covariate IDS to limit the analysis to}
25 | 
26 | \item{noShrinkage}{a set of covariates whcih are to be forced to be included in the final model. default is the intercept}
27 | 
28 | \item{threads}{An option to set number of threads when training model}
29 | 
30 | \item{upperLimit}{Numeric: Upper prior variance limit for grid-search}
31 | 
32 | \item{lowerLimit}{Numeric: Lower prior variance limit for grid-search}
33 | 
34 | \item{tolerance}{Numeric: maximum relative change in convergence criterion from successive iterations to achieve convergence}
35 | 
36 | \item{maxIterations}{Integer: maximum iterations of Cyclops to attempt before returning a failed-to-converge error}
37 | }
38 | \value{
39 | \code{modelSettings} object
40 | }
41 | \description{
42 | Create setting for lasso Cox model
43 | }
44 | \examples{
45 | coxL1 <- setCoxModel()
46 | }
47 | 


--------------------------------------------------------------------------------
/man/setAdaBoost.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SklearnClassifierSettings.R
 3 | \name{setAdaBoost}
 4 | \alias{setAdaBoost}
 5 | \title{Create setting for AdaBoost with python DecisionTreeClassifier base estimator}
 6 | \usage{
 7 | setAdaBoost(
 8 |   nEstimators = list(10, 50, 200),
 9 |   learningRate = list(1, 0.5, 0.1),
10 |   algorithm = list("SAMME"),
11 |   seed = sample(1e+06, 1)
12 | )
13 | }
14 | \arguments{
15 | \item{nEstimators}{(list) The maximum number of estimators at which boosting is terminated. In case of perfect fit, the learning procedure is stopped early.}
16 | 
17 | \item{learningRate}{(list) Weight applied to each classifier at each boosting iteration. A higher learning rate increases the contribution of each classifier. There is a trade-off between the learningRate and nEstimators parameters
18 | There is a trade-off between learningRate and nEstimators.}
19 | 
20 | \item{algorithm}{Only ‘SAMME’ can be provided. The 'algorithm' argument will be deprecated in scikit-learn 1.8.}
21 | 
22 | \item{seed}{A seed for the model}
23 | }
24 | \value{
25 | a modelSettings object
26 | }
27 | \description{
28 | Create setting for AdaBoost with python DecisionTreeClassifier base estimator
29 | }
30 | \examples{
31 | \dontshow{ # dontrun reason: requires python's scikit-learn, checkSklearn() will error without it }
32 | \dontrun{ 
33 | model <- setAdaBoost(nEstimators = list(10),
34 |                      learningRate = list(0.1),
35 |                      seed = 42)
36 | }
37 | }
38 | 


--------------------------------------------------------------------------------
/man/createIterativeImputer.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Imputation.R
 3 | \name{createIterativeImputer}
 4 | \alias{createIterativeImputer}
 5 | \title{Create Iterative Imputer settings}
 6 | \usage{
 7 | createIterativeImputer(
 8 |   missingThreshold = 0.3,
 9 |   method = "pmm",
10 |   methodSettings = list(pmm = list(k = 5, iterations = 5))
11 | )
12 | }
13 | \arguments{
14 | \item{missingThreshold}{The threshold for missing values to remove a feature}
15 | 
16 | \item{method}{The method to use for imputation, currently only "pmm" is supported}
17 | 
18 | \item{methodSettings}{A list of settings for the imputation method to use.
19 | Currently only "pmm" is supported with the following settings:
20 | \itemize{
21 | \item k: The number of donors to use for matching
22 | \item iterations: The number of iterations to use for imputation
23 | }}
24 | }
25 | \value{
26 | The settings for the iterative imputer of class \code{featureEngineeringSettings}
27 | }
28 | \description{
29 | This function creates the settings for an iterative imputer
30 | which first removes features with more than \code{missingThreshold} missing values
31 | and then imputes the missing values iteratively using chained equations
32 | }
33 | \examples{
34 | # create imputer to impute values with missingness less than 30\% using 
35 | # predictive mean matching in 5 iterations with 5 donors
36 | createIterativeImputer(missingThreshold = 0.3, method = "pmm",
37 |                        methodSettings = list(pmm = list(k = 5, iterations = 5)))
38 | }
39 | 


--------------------------------------------------------------------------------
/man/plotVariableScatterplot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{plotVariableScatterplot}
 4 | \alias{plotVariableScatterplot}
 5 | \title{Plot the variable importance scatterplot}
 6 | \usage{
 7 | plotVariableScatterplot(
 8 |   covariateSummary,
 9 |   saveLocation = NULL,
10 |   fileName = "VariableScatterplot.png"
11 | )
12 | }
13 | \arguments{
14 | \item{covariateSummary}{A prediction object as generated using the
15 | \code{\link{runPlp}} function.}
16 | 
17 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)}
18 | 
19 | \item{fileName}{Name of the file to save to plot, for example
20 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
21 | supported file formats.}
22 | }
23 | \value{
24 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
25 | format.
26 | }
27 | \description{
28 | Plot the variable importance scatterplot
29 | }
30 | \details{
31 | Create a plot showing the variable importance scatterplot
32 | #'
33 | }
34 | \examples{
35 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf}
36 | \donttest{ \dontshow{ # takes too long }
37 | data("simulationProfile")
38 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
39 | saveLoc <- file.path(tempdir(), "plotVariableScatterplot")
40 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
41 | plotVariableScatterplot(results$covariateSummary)
42 | # clean up
43 | }
44 | \dontshow{\}) # examplesIf}
45 | }
46 | 


--------------------------------------------------------------------------------
/man/recalibratePlpRefit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Recalibration.R
 3 | \name{recalibratePlpRefit}
 4 | \alias{recalibratePlpRefit}
 5 | \title{recalibratePlpRefit}
 6 | \usage{
 7 | recalibratePlpRefit(plpModel, newPopulation, newData, returnModel = FALSE)
 8 | }
 9 | \arguments{
10 | \item{plpModel}{The trained plpModel (runPlp$model)}
11 | 
12 | \item{newPopulation}{The population created using createStudyPopulation() who will have their risks predicted}
13 | 
14 | \item{newData}{An object of type \code{plpData} - the patient level prediction
15 | data extracted from the CDM.}
16 | 
17 | \item{returnModel}{Logical: return the refitted model}
18 | }
19 | \value{
20 | An prediction dataframe with the predictions of the recalibrated model added
21 | }
22 | \description{
23 | Recalibrating a model by refitting it
24 | }
25 | \examples{
26 | \donttest{ \dontshow{ # takes too long }
27 | data("simulationProfile")
28 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
29 | saveLoc <- file.path(tempdir(), "recalibratePlpRefit")
30 | plpResults <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
31 | newData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
32 | newPopulation <- createStudyPopulation(newData, outcomeId = 3)
33 | predictions <- recalibratePlpRefit(plpModel = plpResults$model, 
34 |                                    newPopulation = newPopulation, 
35 |                                    newData = newData)
36 | # clean up
37 | unlink(saveLoc, recursive = TRUE)
38 | }
39 | }
40 | 


--------------------------------------------------------------------------------
/man/createRareFeatureRemover.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FeatureEngineering.R
 3 | \name{createRareFeatureRemover}
 4 | \alias{createRareFeatureRemover}
 5 | \title{Create the settings for removing rare features}
 6 | \usage{
 7 | createRareFeatureRemover(threshold = 0.001)
 8 | }
 9 | \arguments{
10 | \item{threshold}{The minimum fraction of the training data that must have a
11 | feature for it to be included}
12 | }
13 | \value{
14 | An object of class \code{featureEngineeringSettings}
15 | }
16 | \description{
17 | Create the settings for removing rare features
18 | }
19 | \examples{
20 | \dontshow{if (rlang::is_installed("Eunomia") && rlang::is_installed("curl") && curl::has_internet()) withAutoprint(\{ # examplesIf}
21 | \donttest{ \dontshow{ # takes too long }
22 | # create a rare feature remover that removes features that are present in less
23 | # than 1\% of the population
24 | rareFeatureRemover <- createRareFeatureRemover(threshold = 0.01)
25 | plpData <- getEunomiaPlpData()
26 | analysisId <- "rareFeatureRemover"
27 | saveLocation <- file.path(tempdir(), analysisId)
28 | results <- runPlp(
29 |   plpData = plpData,
30 |   featureEngineeringSettings = rareFeatureRemover,
31 |   outcomeId = 3,
32 |  executeSettings = createExecuteSettings(
33 |    runModelDevelopment = TRUE,
34 |    runSplitData = TRUE,
35 |    runFeatureEngineering = TRUE),
36 |  saveDirectory = saveLocation,
37 |  analysisId = analysisId)
38 | # clean up 
39 | unlink(saveLocation, recursive = TRUE)
40 | } 
41 | \dontshow{\}) # examplesIf}
42 | }
43 | 


--------------------------------------------------------------------------------
/tests/testthat/test-helperfunctions.R:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Observational Health Data Sciences and Informatics
 2 | #
 3 | # This file is part of PatientLevelPrediction
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | # how to test checkPlpInstallation?
17 | 
18 | 
19 | test_that("createTempModelLoc", {
20 |   expect_equal(class(PatientLevelPrediction:::createTempModelLoc()), "character")
21 | })
22 | 
23 | list1 <- list(a = 1:2, b = 5:6)
24 | list2 <- list(c = 1:5)
25 | test_that("listAppend", {
26 |   expect_equal(length(listAppend(list1, list2)), 3)
27 | })
28 | 
29 | # how to test configurePython?
30 | 
31 | test_that("setPythonEnvironment", {
32 |   skip_if_not_installed("reticulate")
33 |   skip_on_cran()
34 |   expect_error(setPythonEnvironment(envname = "madeup34343"))
35 | })
36 | 
37 | test_that("Borrowed cut2", {
38 |   x <- c(1, rep(2, 2), rep(4, 4), rep(5, 5), rep(6, 6))
39 |   groups <- PatientLevelPrediction:::cut2(x, g = 3)
40 |   expect_true(
41 |     all(levels(groups) == c("[1,5)", "5", "6"))
42 |   )
43 | })
44 | 
45 | # getOs test?
46 | 


--------------------------------------------------------------------------------
/man/plotSparseCalibration.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{plotSparseCalibration}
 4 | \alias{plotSparseCalibration}
 5 | \title{Plot the calibration}
 6 | \usage{
 7 | plotSparseCalibration(
 8 |   plpResult,
 9 |   typeColumn = "evaluation",
10 |   saveLocation = NULL,
11 |   fileName = "roc.png"
12 | )
13 | }
14 | \arguments{
15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.}
16 | 
17 | \item{typeColumn}{The name of the column specifying the evaluation type}
18 | 
19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)}
20 | 
21 | \item{fileName}{Name of the file to save to plot, for example
22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
23 | supported file formats.}
24 | }
25 | \value{
26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
27 | format.
28 | }
29 | \description{
30 | Plot the calibration
31 | }
32 | \details{
33 | Create a plot showing the calibration
34 | #'
35 | }
36 | \examples{
37 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf}
38 | \donttest{ \dontshow{ # takes too long }
39 | data("simulationProfile")
40 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
41 | saveLoc <- file.path(tempdir(), "plotSparseCalibration")
42 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
43 | plotSparseCalibration(results)
44 | # clean up 
45 | unlink(saveLoc, recursive = TRUE)
46 | }
47 | \dontshow{\}) # examplesIf}
48 | }
49 | 


--------------------------------------------------------------------------------
/man/preprocessData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PreprocessingData.R
 3 | \name{preprocessData}
 4 | \alias{preprocessData}
 5 | \title{A function that wraps around FeatureExtraction::tidyCovariateData to normalise
 6 | the data and remove rare or redundant features}
 7 | \usage{
 8 | preprocessData(covariateData, preprocessSettings = createPreprocessSettings())
 9 | }
10 | \arguments{
11 | \item{covariateData}{The covariate part of the training data created by \code{splitData} after being sampled and having
12 | any required feature engineering}
13 | 
14 | \item{preprocessSettings}{The settings for the preprocessing created by \code{createPreprocessSettings}
15 | The data processed}
16 | }
17 | \value{
18 | The covariateData object with the processed covariates
19 | }
20 | \description{
21 | A function that wraps around FeatureExtraction::tidyCovariateData to normalise
22 | the data and remove rare or redundant features
23 | }
24 | \details{
25 | Returns an object of class \code{covariateData} that has been processed.
26 | This includes normalising the data and removing rare or redundant features.
27 | Redundant features are features that within an analysisId together cover
28 | all obervations.
29 | }
30 | \examples{
31 | library(dplyr)
32 | data("simulationProfile")
33 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
34 | preProcessedData <- preprocessData(plpData$covariateData, createPreprocessSettings())
35 | # check age is normalized by max value
36 | preProcessedData$covariates \%>\% dplyr::filter(.data$covariateId == 1002)
37 | }
38 | 


--------------------------------------------------------------------------------
/man/savePlpShareable.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SaveLoadPlp.R
 3 | \name{savePlpShareable}
 4 | \alias{savePlpShareable}
 5 | \title{Save the plp result as json files and csv files for transparent sharing}
 6 | \usage{
 7 | savePlpShareable(result, saveDirectory, minCellCount = 10)
 8 | }
 9 | \arguments{
10 | \item{result}{An object of class runPlp with development or validation results}
11 | 
12 | \item{saveDirectory}{The directory the save the results as csv files}
13 | 
14 | \item{minCellCount}{Minimum cell count for the covariateSummary and certain evaluation results}
15 | }
16 | \value{
17 | \if{html}{\out{<div class="sourceCode">}}\preformatted{                       The directory path where the results were saved
18 | }\if{html}{\out{</div>}}
19 | }
20 | \description{
21 | Save the plp result as json files and csv files for transparent sharing
22 | }
23 | \details{
24 | Saves the main results json/csv files (these files can be read by the shiny app)
25 | }
26 | \examples{
27 | \donttest{ \dontshow{ # takes too long }
28 | data("simulationProfile")
29 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
30 | saveLoc <- file.path(tempdir(), "savePlpShareable")
31 | results <- runPlp(plpData, saveDirectory = saveLoc)
32 | newSaveLoc <- file.path(tempdir(), "savePlpShareable", "saved")
33 | path <- savePlpShareable(results, newSaveLoc)
34 | # show the saved result
35 | dir(newSaveLoc, full.names = TRUE, recursive = TRUE)
36 | 
37 | # clean up
38 | unlink(saveLoc, recursive = TRUE)
39 | unlink(newSaveLoc, recursive = TRUE)
40 | }
41 | }
42 | 


--------------------------------------------------------------------------------
/man/createLogSettings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Logging.R
 3 | \name{createLogSettings}
 4 | \alias{createLogSettings}
 5 | \title{Create the settings for logging the progression of the analysis}
 6 | \usage{
 7 | createLogSettings(
 8 |   verbosity = "DEBUG",
 9 |   timeStamp = TRUE,
10 |   logName = "runPlp Log"
11 | )
12 | }
13 | \arguments{
14 | \item{verbosity}{Sets the level of the verbosity. If the log level is at or higher in priority than the logger threshold, a message will print. The levels are:
15 | \itemize{
16 | \item DEBUG Highest verbosity showing all debug statements
17 | \item TRACE Showing information about start and end of steps
18 | \item INFO Show informative information (Default)
19 | \item WARN Show warning messages
20 | \item ERROR Show error messages
21 | \item FATAL Be silent except for fatal errors
22 | }}
23 | 
24 | \item{timeStamp}{If TRUE a timestamp will be added to each logging statement. Automatically switched on for TRACE level.}
25 | 
26 | \item{logName}{A string reference for the logger}
27 | }
28 | \value{
29 | An object of class \code{logSettings} containing the settings for the logger
30 | }
31 | \description{
32 | Create the settings for logging the progression of the analysis
33 | }
34 | \details{
35 | Returns an object of class \code{logSettings} that specifies the logger settings
36 | }
37 | \examples{
38 | # create a log settings object with DENUG verbosity, timestamp and log name 
39 | # "runPlp Log". This needs to be passed to `runPlp`.
40 | createLogSettings(verbosity = "DEBUG", timeStamp = TRUE, logName = "runPlp Log")
41 | }
42 | 


--------------------------------------------------------------------------------
/man/plotSparseCalibration2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{plotSparseCalibration2}
 4 | \alias{plotSparseCalibration2}
 5 | \title{Plot the conventional calibration}
 6 | \usage{
 7 | plotSparseCalibration2(
 8 |   plpResult,
 9 |   typeColumn = "evaluation",
10 |   saveLocation = NULL,
11 |   fileName = "roc.png"
12 | )
13 | }
14 | \arguments{
15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.}
16 | 
17 | \item{typeColumn}{The name of the column specifying the evaluation type}
18 | 
19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)}
20 | 
21 | \item{fileName}{Name of the file to save to plot, for example
22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
23 | supported file formats.}
24 | }
25 | \value{
26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
27 | format.
28 | }
29 | \description{
30 | Plot the conventional calibration
31 | }
32 | \details{
33 | Create a plot showing the calibration
34 | #'
35 | }
36 | \examples{
37 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf}
38 | \donttest{ \dontshow{ # takes too long }
39 | data("simulationProfile")
40 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
41 | saveLoc <- file.path(tempdir(), "plotSparseCalibration2")
42 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
43 | plotSparseCalibration2(results)
44 | # clean up
45 | unlink(saveLoc, recursive = TRUE)
46 | }
47 | \dontshow{\}) # examplesIf}
48 | }
49 | 


--------------------------------------------------------------------------------
/man/robustNormalize.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FeatureEngineering.R
 3 | \name{robustNormalize}
 4 | \alias{robustNormalize}
 5 | \title{A function that normalizes continous by the interquartile range and
 6 | optionally forces the resulting values to be between -3 and 3 with
 7 | f(x) = x / sqrt(1 + (x/3)^2)
 8 | '@details uses (value - median) / iqr to normalize the data and then can
 9 | applies the function f(x) = x / sqrt(1 + (x/3)^2) to the normalized values.
10 | This forces the values to be between -3 and 3 while preserving the relative
11 | ordering of the values.
12 | based on https://arxiv.org/abs/2407.04491 for more details}
13 | \usage{
14 | robustNormalize(trainData, featureEngineeringSettings, done = FALSE)
15 | }
16 | \arguments{
17 | \item{trainData}{The training data to be normalized}
18 | 
19 | \item{featureEngineeringSettings}{The settings for the normalization}
20 | 
21 | \item{done}{Whether the data has already been normalized (bool)}
22 | }
23 | \value{
24 | The \code{trainData} object with normalized data
25 | }
26 | \description{
27 | A function that normalizes continous by the interquartile range and
28 | optionally forces the resulting values to be between -3 and 3 with
29 | f(x) = x / sqrt(1 + (x/3)^2)
30 | '@details uses (value - median) / iqr to normalize the data and then can
31 | applies the function f(x) = x / sqrt(1 + (x/3)^2) to the normalized values.
32 | This forces the values to be between -3 and 3 while preserving the relative
33 | ordering of the values.
34 | based on https://arxiv.org/abs/2407.04491 for more details
35 | }
36 | \keyword{internal}
37 | 


--------------------------------------------------------------------------------
/man/plotSparseRoc.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{plotSparseRoc}
 4 | \alias{plotSparseRoc}
 5 | \title{Plot the ROC curve using the sparse thresholdSummary data frame}
 6 | \usage{
 7 | plotSparseRoc(
 8 |   plpResult,
 9 |   typeColumn = "evaluation",
10 |   saveLocation = NULL,
11 |   fileName = "roc.png"
12 | )
13 | }
14 | \arguments{
15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.}
16 | 
17 | \item{typeColumn}{The name of the column specifying the evaluation type}
18 | 
19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)}
20 | 
21 | \item{fileName}{Name of the file to save to plot, for example
22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
23 | supported file formats.}
24 | }
25 | \value{
26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
27 | format.
28 | }
29 | \description{
30 | Plot the ROC curve using the sparse thresholdSummary data frame
31 | }
32 | \details{
33 | Create a plot showing the Receiver Operator Characteristics (ROC) curve.
34 | }
35 | \examples{
36 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf}
37 | \donttest{ \dontshow{ # takes too long }
38 | data("simulationProfile")
39 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
40 | saveLoc <- file.path(tempdir(), "plotSparseRoc")
41 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
42 | plotSparseRoc(results)
43 | # clean up
44 | unlink(saveLoc, recursive = TRUE)
45 | }
46 | \dontshow{\}) # examplesIf}
47 | }
48 | 


--------------------------------------------------------------------------------
/man/setIterativeHardThresholding.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CyclopsSettings.R
 3 | \name{setIterativeHardThresholding}
 4 | \alias{setIterativeHardThresholding}
 5 | \title{Create setting for Iterative Hard Thresholding model}
 6 | \usage{
 7 | setIterativeHardThresholding(
 8 |   K = 10,
 9 |   penalty = "bic",
10 |   seed = sample(1e+05, 1),
11 |   exclude = c(),
12 |   forceIntercept = FALSE,
13 |   fitBestSubset = FALSE,
14 |   initialRidgeVariance = 0.1,
15 |   tolerance = 1e-08,
16 |   maxIterations = 10000,
17 |   threshold = 1e-06,
18 |   delta = 0
19 | )
20 | }
21 | \arguments{
22 | \item{K}{The maximum number of non-zero predictors}
23 | 
24 | \item{penalty}{Specifies the IHT penalty; possible values are \code{BIC} or \code{AIC} or a numeric value}
25 | 
26 | \item{seed}{An option to add a seed when training the model}
27 | 
28 | \item{exclude}{A vector of numbers or covariateId names to exclude from prior}
29 | 
30 | \item{forceIntercept}{Logical: Force intercept coefficient into regularization}
31 | 
32 | \item{fitBestSubset}{Logical: Fit final subset with no regularization}
33 | 
34 | \item{initialRidgeVariance}{integer}
35 | 
36 | \item{tolerance}{numeric}
37 | 
38 | \item{maxIterations}{integer}
39 | 
40 | \item{threshold}{numeric}
41 | 
42 | \item{delta}{numeric}
43 | }
44 | \value{
45 | \code{modelSettings} object
46 | }
47 | \description{
48 | Create setting for Iterative Hard Thresholding model
49 | }
50 | \examples{
51 | \dontshow{if (rlang::is_installed("IterativeHardThresholding")) withAutoprint(\{ # examplesIf}
52 | modelIht <- setIterativeHardThresholding(K = 5, seed = 42)
53 | \dontshow{\}) # examplesIf}
54 | }
55 | 


--------------------------------------------------------------------------------
/man/plotDemographicSummary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{plotDemographicSummary}
 4 | \alias{plotDemographicSummary}
 5 | \title{Plot the Observed vs. expected incidence, by age and gender}
 6 | \usage{
 7 | plotDemographicSummary(
 8 |   plpResult,
 9 |   typeColumn = "evaluation",
10 |   saveLocation = NULL,
11 |   fileName = "roc.png"
12 | )
13 | }
14 | \arguments{
15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.}
16 | 
17 | \item{typeColumn}{The name of the column specifying the evaluation type}
18 | 
19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)}
20 | 
21 | \item{fileName}{Name of the file to save to plot, for example
22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
23 | supported file formats.}
24 | }
25 | \value{
26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
27 | format.
28 | }
29 | \description{
30 | Plot the Observed vs. expected incidence, by age and gender
31 | }
32 | \details{
33 | Create a plot showing the Observed vs. expected incidence, by age and gender
34 | #'
35 | }
36 | \examples{
37 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf}
38 | \donttest{
39 | data("simulationProfile")
40 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
41 | saveLoc <- file.path(tempdir(), "plotDemographicSummary")
42 | plpResult <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
43 | plotDemographicSummary(plpResult)
44 | # clean up 
45 | unlink(saveLoc, recursive = TRUE)
46 | }
47 | \dontshow{\}) # examplesIf}
48 | }
49 | 


--------------------------------------------------------------------------------
/demo/SingleModelDemo.R:
--------------------------------------------------------------------------------
 1 | # This demo will run a logistic regression model on simulated data and will show the Shiny App
 2 | library(PatientLevelPrediction)
 3 | devAskNewPage(ask = FALSE)
 4 | 
 5 | ### Simulated data from a database profile
 6 | set.seed(1234)
 7 | data(plpDataSimulationProfile)
 8 | sampleSize <- 2000
 9 | plpData <- simulatePlpData(plpDataSimulationProfile, n = sampleSize, seed = 42)
10 | 
11 | ### Define the study population
12 | populationSettings <- createStudyPopulationSettings(
13 |   binary = TRUE,
14 |   firstExposureOnly = FALSE,
15 |   washoutPeriod = 0,
16 |   removeSubjectsWithPriorOutcome = FALSE,
17 |   priorOutcomeLookback = 99999,
18 |   requireTimeAtRisk = TRUE,
19 |   minTimeAtRisk = 0,
20 |   riskWindowStart = 0,
21 |   startAnchor = 'cohort start',
22 |   riskWindowEnd = 365,
23 |   endAnchor = 'cohort start'
24 | )
25 | 
26 | ### Regularised logistic regression
27 | lr_model <- setLassoLogisticRegression()
28 | lr_results <- runPlp( 
29 |   plpData = plpData, 
30 |   outcomeId = 2,
31 |   analysisId = 'demo', 
32 |   analysisName = 'run plp demo', 
33 |   populationSettings = populationSettings, 
34 |   splitSettings = createDefaultSplitSetting(
35 |     type = "time",
36 |     testFraction = 0.25,
37 |     nfold = 2
38 |   ), 
39 |   sampleSettings = createSampleSettings(),
40 |   preprocessSettings = createPreprocessSettings(
41 |     minFraction = 0, 
42 |     normalize = T
43 |   ), 
44 |   modelSettings = lr_model, 
45 |   executeSettings = createDefaultExecuteSettings(), 
46 |   saveDirectory = "./plpdemo"
47 |   )
48 | 
49 | 
50 | ### Have a look at the results object.
51 | 
52 | ### You can start the Shiny App by using this command now:
53 | ### viewPlp(lr_results)
54 | 


--------------------------------------------------------------------------------
/man/plotF1Measure.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{plotF1Measure}
 4 | \alias{plotF1Measure}
 5 | \title{Plot the F1 measure efficiency frontier using the sparse thresholdSummary data frame}
 6 | \usage{
 7 | plotF1Measure(
 8 |   plpResult,
 9 |   typeColumn = "evaluation",
10 |   saveLocation = NULL,
11 |   fileName = "roc.png"
12 | )
13 | }
14 | \arguments{
15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.}
16 | 
17 | \item{typeColumn}{The name of the column specifying the evaluation type}
18 | 
19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)}
20 | 
21 | \item{fileName}{Name of the file to save to plot, for example
22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
23 | supported file formats.}
24 | }
25 | \value{
26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
27 | format.
28 | }
29 | \description{
30 | Plot the F1 measure efficiency frontier using the sparse thresholdSummary data frame
31 | }
32 | \details{
33 | Create a plot showing the F1 measure efficiency frontier using the sparse thresholdSummary data frame
34 | }
35 | \examples{
36 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf}
37 | \donttest{ \dontshow{ # takes too long }
38 | data("simulationProfile")
39 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
40 | saveLoc <- file.path(tempdir(), "plotF1Measure")
41 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
42 | plotF1Measure(results)
43 | # clean up
44 | unlink(saveLoc, recursive = TRUE)
45 | }
46 | \dontshow{\}) # examplesIf}
47 | }
48 | 


--------------------------------------------------------------------------------
/man/plotPrecisionRecall.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{plotPrecisionRecall}
 4 | \alias{plotPrecisionRecall}
 5 | \title{Plot the precision-recall curve using the sparse thresholdSummary data frame}
 6 | \usage{
 7 | plotPrecisionRecall(
 8 |   plpResult,
 9 |   typeColumn = "evaluation",
10 |   saveLocation = NULL,
11 |   fileName = "roc.png"
12 | )
13 | }
14 | \arguments{
15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.}
16 | 
17 | \item{typeColumn}{The name of the column specifying the evaluation type}
18 | 
19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)}
20 | 
21 | \item{fileName}{Name of the file to save to plot, for example
22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
23 | supported file formats.}
24 | }
25 | \value{
26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
27 | format.
28 | }
29 | \description{
30 | Plot the precision-recall curve using the sparse thresholdSummary data frame
31 | }
32 | \details{
33 | Create a plot showing the precision-recall curve using the sparse thresholdSummary data frame
34 | }
35 | \examples{
36 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf}
37 | \donttest{ \dontshow{ # takes too long }
38 | data("simulationProfile")
39 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
40 | saveLoc <- file.path(tempdir(), "plotPrecisionRecall")
41 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
42 | plotPrecisionRecall(results)
43 | # clean up
44 | unlink(saveLoc, recursive = TRUE)
45 | }
46 | \dontshow{\}) # examplesIf}
47 | }
48 | 


--------------------------------------------------------------------------------
/.github/workflows/pkgdown.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, develop]
 6 |   release:
 7 |     types: [published]
 8 |   workflow_dispatch:
 9 | 
10 | name: pkgdown
11 | 
12 | jobs:
13 |   pkgdown:
14 |     runs-on: ubuntu-latest
15 |     # Only restrict concurrency for non-PR jobs
16 |     concurrency:
17 |       group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }}
18 |     env:
19 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
20 |     steps:
21 |       - uses: actions/checkout@v4
22 | 
23 |       - uses: r-lib/actions/setup-pandoc@v2
24 | 
25 |       - uses: r-lib/actions/setup-r@v2
26 |         with:
27 |           use-public-rspm: true
28 | 
29 |       - uses: r-lib/actions/setup-r-dependencies@v2
30 |         with:
31 |           cache: always
32 |           extra-packages: any::pkgdown, ohdsi/OhdsiRTools
33 |           needs: website
34 | 
35 |       - uses: lycheeverse/lychee-action@v2
36 |         with:
37 |           args: --base . --verbose --no-progress --accept '100..=103, 200..=299, 403, 429' './**/*.md' './**/*.Rmd'
38 |     
39 |       - name: Build site
40 |         run: Rscript -e 'pkgdown::build_site_github_pages(new_process = FALSE, install = TRUE)'
41 |         
42 |       - name: Fix Hades Logo
43 |         run: Rscript -e 'OhdsiRTools::fixHadesLogo()'
44 | 
45 |       - name: Deploy to GitHub pages 🚀
46 |         if: github.event_name != 'pull_request'
47 |         uses: JamesIves/github-pages-deploy-action@v4
48 |         with:
49 |           clean: false
50 |           branch: gh-pages
51 |           folder: docs
52 | 


--------------------------------------------------------------------------------
/R/PatientLevelPrediction.R:
--------------------------------------------------------------------------------
 1 | # @file PatientLevelPrediction.R
 2 | #
 3 | # Copyright 2025 Observational Health Data Sciences and Informatics
 4 | #
 5 | # This file is part of PatientLevelPrediction
 6 | # 
 7 | # Licensed under the Apache License, Version 2.0 (the "License");
 8 | # you may not use this file except in compliance with the License.
 9 | # You may obtain a copy of the License at
10 | # 
11 | #     http://www.apache.org/licenses/LICENSE-2.0
12 | # 
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | 
19 | #' PatientLevelPrediction
20 | #' 
21 | #' @description A package for running predictions using data in the OMOP CDM
22 | #'
23 | #' @name PatientLevelPrediction
24 | #' @keywords internal
25 | #' @importFrom dplyr %>%
26 | #' @importFrom rlang .data
27 | "_PACKAGE"
28 | 
29 | #' A simulation profile for generating synthetic patient level prediction data
30 | #' @docType data
31 | #' @keywords datasets
32 | #' @name simulationProfile
33 | #' @format A data frame containing the following elements:
34 | #' \describe{
35 | #'   \item{covariatePrevalence}{prevalence of all covariates}
36 | #'   \item{outcomeModels}{regression model parameters to simulate outcomes}
37 | #'   \item{metaData}{settings used to simulate the profile}
38 | #'   \item{covariateRef}{covariateIds and covariateNames}
39 | #'   \item{timePrevalence}{time window}
40 | #'   \item{exclusionPrevalence}{prevalence of exclusion of covariates}
41 | #' }
42 | #' @usage
43 | #' data(simulationProfile)
44 | NULL
45 | 


--------------------------------------------------------------------------------
/man/createSampleSettings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Sampling.R
 3 | \name{createSampleSettings}
 4 | \alias{createSampleSettings}
 5 | \title{Create the settings for defining how the trainData from \code{splitData} are sampled using
 6 | default sample functions.}
 7 | \usage{
 8 | createSampleSettings(
 9 |   type = "none",
10 |   numberOutcomestoNonOutcomes = 1,
11 |   sampleSeed = sample(10000, 1)
12 | )
13 | }
14 | \arguments{
15 | \item{type}{(character) Choice of:  \itemize{
16 | \item 'none' No sampling is applied - this is the default
17 | \item 'underSample' Undersample the non-outcome class to make the data more balanced
18 | \item 'overSample' Oversample the outcome class by adding in each outcome multiple times
19 | }}
20 | 
21 | \item{numberOutcomestoNonOutcomes}{(numeric) A numeric specifying the required number of outcomes per non-outcomes}
22 | 
23 | \item{sampleSeed}{(numeric) A seed to use when splitting the data for reproducibility (if not set a random number will be generated)}
24 | }
25 | \value{
26 | An object of class \code{sampleSettings}
27 | }
28 | \description{
29 | Create the settings for defining how the trainData from \code{splitData} are sampled using
30 | default sample functions.
31 | }
32 | \details{
33 | Returns an object of class \code{sampleSettings} that specifies the sampling function that will be called and the settings
34 | }
35 | \examples{
36 | \dontshow{if (rlang::is_installed("Eunomia")) withAutoprint(\{ # examplesIf}
37 | \donttest{
38 | # sample even rate of outcomes to non-outcomes
39 | sampleSetting <- createSampleSettings(
40 |   type = "underSample",
41 |   numberOutcomestoNonOutcomes = 1,
42 |   sampleSeed = 42
43 | )
44 | }
45 | \dontshow{\}) # examplesIf}
46 | }
47 | 


--------------------------------------------------------------------------------
/man/plotPredictionDistribution.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{plotPredictionDistribution}
 4 | \alias{plotPredictionDistribution}
 5 | \title{Plot the side-by-side boxplots of prediction distribution, by class}
 6 | \usage{
 7 | plotPredictionDistribution(
 8 |   plpResult,
 9 |   typeColumn = "evaluation",
10 |   saveLocation = NULL,
11 |   fileName = "PredictionDistribution.png"
12 | )
13 | }
14 | \arguments{
15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.}
16 | 
17 | \item{typeColumn}{The name of the column specifying the evaluation type}
18 | 
19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)}
20 | 
21 | \item{fileName}{Name of the file to save to plot, for example
22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
23 | supported file formats.}
24 | }
25 | \value{
26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
27 | format.
28 | }
29 | \description{
30 | Plot the side-by-side boxplots of prediction distribution, by class
31 | }
32 | \details{
33 | Create a plot showing the side-by-side boxplots of prediction distribution, by class
34 | #'
35 | }
36 | \examples{
37 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf}
38 | \donttest{ \dontshow{ # takes too long }
39 | data("simulationProfile")
40 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
41 | saveLoc <- file.path(tempdir(), "plotPredictionDistribution")
42 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
43 | plotPredictionDistribution(results)
44 | # clean up
45 | unlink(saveLoc, recursive = TRUE)
46 | }
47 | \dontshow{\}) # examplesIf}
48 | }
49 | 


--------------------------------------------------------------------------------
/man/plotPredictedPDF.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{plotPredictedPDF}
 4 | \alias{plotPredictedPDF}
 5 | \title{Plot the Predicted probability density function, showing prediction overlap between true and false cases}
 6 | \usage{
 7 | plotPredictedPDF(
 8 |   plpResult,
 9 |   typeColumn = "evaluation",
10 |   saveLocation = NULL,
11 |   fileName = "PredictedPDF.png"
12 | )
13 | }
14 | \arguments{
15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.}
16 | 
17 | \item{typeColumn}{The name of the column specifying the evaluation type}
18 | 
19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)}
20 | 
21 | \item{fileName}{Name of the file to save to plot, for example
22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
23 | supported file formats.}
24 | }
25 | \value{
26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
27 | format.
28 | }
29 | \description{
30 | Plot the Predicted probability density function, showing prediction overlap between true and false cases
31 | }
32 | \details{
33 | Create a plot showing the predicted probability density function, showing prediction overlap between true and false cases
34 | }
35 | \examples{
36 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf}
37 | \donttest{ \dontshow{ # takes too long }
38 | data("simulationProfile")
39 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
40 | saveLoc <- file.path(tempdir(), "plotPredictedPDF")
41 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
42 | plotPredictedPDF(results)
43 | # clean up
44 | unlink(saveLoc, recursive = TRUE)
45 | }
46 | \dontshow{\}) # examplesIf}
47 | }
48 | 


--------------------------------------------------------------------------------
/man/createValidationDesign.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ExternalValidatePlp.R
 3 | \name{createValidationDesign}
 4 | \alias{createValidationDesign}
 5 | \title{createValidationDesign - Define the validation design for external validation}
 6 | \usage{
 7 | createValidationDesign(
 8 |   targetId,
 9 |   outcomeId,
10 |   populationSettings = NULL,
11 |   restrictPlpDataSettings = NULL,
12 |   plpModelList,
13 |   recalibrate = NULL,
14 |   runCovariateSummary = TRUE
15 | )
16 | }
17 | \arguments{
18 | \item{targetId}{The targetId of the target cohort to validate on}
19 | 
20 | \item{outcomeId}{The outcomeId of the outcome cohort to validate on}
21 | 
22 | \item{populationSettings}{A list of population restriction settings created
23 | by \code{createPopulationSettings}. Default is NULL and then this is taken
24 | from the model}
25 | 
26 | \item{restrictPlpDataSettings}{A list of plpData restriction settings
27 | created by \code{createRestrictPlpDataSettings}. Default is NULL and then
28 | this is taken from the model.}
29 | 
30 | \item{plpModelList}{A list of plpModels objects created by \code{runPlp} or a path to such objects}
31 | 
32 | \item{recalibrate}{A vector of characters specifying the recalibration method to apply,}
33 | 
34 | \item{runCovariateSummary}{whether to run the covariate summary for the validation data}
35 | }
36 | \value{
37 | A validation design object of class \code{validationDesign} or a list of such objects
38 | }
39 | \description{
40 | createValidationDesign - Define the validation design for external validation
41 | }
42 | \examples{
43 | # create a validation design for targetId 1 and outcomeId 2 one l1 model and 
44 | # one gradient boosting model
45 | createValidationDesign(1, 2, plpModelList = list(
46 | "pathToL1model", "PathToGBMModel"))
47 | }
48 | 


--------------------------------------------------------------------------------
/.github/workflows/revdeps.yml:
--------------------------------------------------------------------------------
 1 | name: Reverse dependency checks
 2 | 
 3 | on:
 4 |   workflow_dispatch:
 5 |     inputs:
 6 |       repos:
 7 |         description: "owner/repo lines (optional). If empty, uses revdep/github.txt"
 8 |         required: false
 9 |         default: ""
10 | 
11 | permissions:
12 |   contents: read
13 | 
14 | concurrency:
15 |   group: revdep-${{ github.ref }}
16 |   cancel-in-progress: false
17 | 
18 | env:
19 |   RSPM: https://packagemanager.posit.co/cran/__linux__/noble/latest
20 |   GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}   
21 |   R_KEEP_PKG_SOURCE: yes                    
22 | 
23 | jobs:
24 |   revdep:
25 |     runs-on: ubuntu-latest
26 | 
27 |     steps:
28 |       - name: Checkout repo
29 |         uses: actions/checkout@v5
30 | 
31 |       - name: Setup R
32 |         uses: r-lib/actions/setup-r@v2
33 |         with:
34 |           r-version: 'release'
35 |           use-public-rspm: true
36 | 
37 |       - name: Setup Pandoc
38 |         uses: r-lib/actions/setup-pandoc@v2
39 | 
40 |       - name: Setup R dependencies
41 |         uses: r-lib/actions/setup-r-dependencies@v2
42 |         with:
43 |           extra-packages: |
44 |             any::pak
45 |             any::rcmdcheck
46 |             any::jsonlite
47 |             any::desc
48 |             any::gert
49 | 
50 |       - name: Install this package (dev version)
51 |         run: |
52 |           Rscript -e 'pak::pkg_install("local::.", upgrade = FALSE)'
53 | 
54 |       - name: Run reverse dependency checks
55 |         env:
56 |           INPUT_REPOS: ${{ github.event.inputs.repos }}
57 |         run: |
58 |           Rscript extras/revDeps.R
59 | 
60 |       - name: Upload results
61 |         if: always()
62 |         uses: actions/upload-artifact@v5
63 |         with:
64 |           name: revdep-results
65 |           path: revdep/results
66 | 


--------------------------------------------------------------------------------
/man/diagnoseMultiplePlp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DiagnosePlp.R
 3 | \name{diagnoseMultiplePlp}
 4 | \alias{diagnoseMultiplePlp}
 5 | \title{Run a list of predictions diagnoses}
 6 | \usage{
 7 | diagnoseMultiplePlp(
 8 |   databaseDetails = createDatabaseDetails(),
 9 |   modelDesignList = list(createModelDesign(targetId = 1, outcomeId = 2, modelSettings =
10 |     setLassoLogisticRegression()), createModelDesign(targetId = 1, outcomeId = 3,
11 |     modelSettings = setLassoLogisticRegression())),
12 |   cohortDefinitions = NULL,
13 |   logSettings = createLogSettings(verbosity = "DEBUG", timeStamp = TRUE, logName =
14 |     "diagnosePlp Log"),
15 |   saveDirectory = NULL
16 | )
17 | }
18 | \arguments{
19 | \item{databaseDetails}{The database settings created using \code{createDatabaseDetails()}}
20 | 
21 | \item{modelDesignList}{A list of model designs created using \code{createModelDesign()}}
22 | 
23 | \item{cohortDefinitions}{A list of cohort definitions for the target and outcome cohorts}
24 | 
25 | \item{logSettings}{The setting spexcifying the logging for the analyses created using \code{createLogSettings()}}
26 | 
27 | \item{saveDirectory}{Name of the folder where all the outputs will written to.}
28 | }
29 | \value{
30 | A data frame with the following columns: \tabular{ll}{ \verb{analysisId} \tab The unique identifier
31 | for a set of analysis choices.\cr \verb{targetId} \tab The ID of the target cohort populations.\cr
32 | \verb{outcomeId} \tab The ID of the outcomeId.\cr \verb{dataLocation} \tab The location where the plpData was saved
33 | \cr \verb{the settings ids} \tab The ids for all other settings used for model development.\cr }
34 | }
35 | \description{
36 | Run a list of predictions diagnoses
37 | }
38 | \details{
39 | This function will run all specified prediction design diagnoses.
40 | }
41 | 


--------------------------------------------------------------------------------
/man/plotPreferencePDF.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{plotPreferencePDF}
 4 | \alias{plotPreferencePDF}
 5 | \title{Plot the preference score probability density function, showing prediction overlap between true and false cases
 6 | #'}
 7 | \usage{
 8 | plotPreferencePDF(
 9 |   plpResult,
10 |   typeColumn = "evaluation",
11 |   saveLocation = NULL,
12 |   fileName = "plotPreferencePDF.png"
13 | )
14 | }
15 | \arguments{
16 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.}
17 | 
18 | \item{typeColumn}{The name of the column specifying the evaluation type}
19 | 
20 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)}
21 | 
22 | \item{fileName}{Name of the file to save to plot, for example
23 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
24 | supported file formats.}
25 | }
26 | \value{
27 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
28 | format.
29 | }
30 | \description{
31 | Plot the preference score probability density function, showing prediction overlap between true and false cases
32 | #'
33 | }
34 | \details{
35 | Create a plot showing the preference score probability density function, showing prediction overlap between true and false cases
36 | #'
37 | }
38 | \examples{
39 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf}
40 | \donttest{ \dontshow{ # takes too long }
41 | data("simulationProfile")
42 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
43 | saveLoc <- file.path(tempdir(), "plotPreferencePDF")
44 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
45 | plotPreferencePDF(results)
46 | # clean up
47 | unlink(saveLoc, recursive = TRUE)
48 | }
49 | \dontshow{\}) # examplesIf}
50 | }
51 | 


--------------------------------------------------------------------------------
/man/covariateSummary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CovariateSummary.R
 3 | \name{covariateSummary}
 4 | \alias{covariateSummary}
 5 | \title{covariateSummary}
 6 | \usage{
 7 | covariateSummary(
 8 |   covariateData,
 9 |   cohort,
10 |   labels = NULL,
11 |   strata = NULL,
12 |   variableImportance = NULL,
13 |   featureEngineering = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{covariateData}{The covariateData part of the plpData that is
18 | extracted using \code{getPlpData}}
19 | 
20 | \item{cohort}{The patient cohort to calculate the summary}
21 | 
22 | \item{labels}{A data.frame with the columns rowId and outcomeCount}
23 | 
24 | \item{strata}{A data.frame containing the columns rowId, strataName}
25 | 
26 | \item{variableImportance}{A data.frame with the columns covariateId and
27 | value (the variable importance value)}
28 | 
29 | \item{featureEngineering}{(currently not used )
30 | A function or list of functions specifying any feature engineering
31 | to create covariates before summarising}
32 | }
33 | \value{
34 | A data.frame containing: CovariateCount, CovariateMean and CovariateStDev
35 | for any specified stratification
36 | }
37 | \description{
38 | Summarises the covariateData to calculate the mean and standard deviation per covariate
39 | if the labels are given it also stratifies this by class label and if the trainRowIds and testRowIds
40 | specifying the patients in the train/test sets respectively are input, these values are also stratified
41 | by train and test set
42 | }
43 | \details{
44 | The function calculates various metrics to measure the performance of the model
45 | }
46 | \examples{
47 | data("simulationProfile")
48 | plpData <- simulatePlpData(simulationProfile, n = 100, seed = 42)
49 | covariateSummary <- covariateSummary(plpData$covariateData, plpData$cohorts)
50 | head(covariateSummary)
51 | }
52 | 


--------------------------------------------------------------------------------
/man/getDemographicSummary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DemographicSummary.R
 3 | \name{getDemographicSummary}
 4 | \alias{getDemographicSummary}
 5 | \title{Get a demographic summary}
 6 | \usage{
 7 | getDemographicSummary(prediction, predictionType, typeColumn = "evaluation")
 8 | }
 9 | \arguments{
10 | \item{prediction}{A prediction object}
11 | 
12 | \item{predictionType}{The type of prediction (binary or survival)}
13 | 
14 | \item{typeColumn}{A column that is used to stratify the results}
15 | }
16 | \value{
17 | A dataframe with the demographic summary
18 | }
19 | \description{
20 | Get a demographic summary
21 | }
22 | \details{
23 | Generates a data.frame with a prediction summary per each 5 year age group
24 | and gender group
25 | }
26 | \examples{
27 | \donttest{ \dontshow{ # takes too long }
28 | # simulate data
29 | data("simulationProfile")
30 | plpData <- simulatePlpData(simulationProfile, n = 500, seed = 42)
31 | # create study population, split into train/test and preprocess with default settings
32 | population <- createStudyPopulation(plpData, outcomeId = 3)
33 | data <- splitData(plpData, population, createDefaultSplitSetting())
34 | data$Train$covariateData <- preprocessData(data$Train$covariateData)
35 | saveLoc <- file.path(tempdir(), "demographicSummary")
36 | # fit a lasso logistic regression model using the training data
37 | plpModel <- fitPlp(data$Train, modelSettings=setLassoLogisticRegression(seed=42),
38 |                    analysisId=1, analysisPath=saveLoc)
39 | demographicSummary <- getDemographicSummary(plpModel$prediction, 
40 |                                             "binary", 
41 |                                             typeColumn = "evaluationType")
42 | # show the demographic summary dataframe
43 | str(demographicSummary)
44 | # clean up
45 | unlink(saveLoc, recursive = TRUE)
46 | }
47 | }
48 | 


--------------------------------------------------------------------------------
/man/outcomeSurvivalPlot.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{outcomeSurvivalPlot}
 4 | \alias{outcomeSurvivalPlot}
 5 | \title{Plot the outcome incidence over time}
 6 | \usage{
 7 | outcomeSurvivalPlot(
 8 |   plpData,
 9 |   outcomeId,
10 |   populationSettings = createStudyPopulationSettings(binary = TRUE, includeAllOutcomes =
11 |     TRUE, firstExposureOnly = FALSE, washoutPeriod = 0, removeSubjectsWithPriorOutcome =
12 |     TRUE, priorOutcomeLookback = 99999, requireTimeAtRisk = FALSE, riskWindowStart = 1,
13 |     startAnchor = "cohort start", riskWindowEnd = 3650, endAnchor = "cohort start"),
14 |   riskTable = TRUE,
15 |   confInt = TRUE,
16 |   yLabel = "Fraction of those who are outcome free in target population"
17 | )
18 | }
19 | \arguments{
20 | \item{plpData}{The plpData object returned by running getPlpData()}
21 | 
22 | \item{outcomeId}{The cohort id corresponding to the outcome}
23 | 
24 | \item{populationSettings}{The population settings created using \code{createStudyPopulationSettings}}
25 | 
26 | \item{riskTable}{(binary) Whether to include a table at the bottom  of the plot showing the number of people at risk over time}
27 | 
28 | \item{confInt}{(binary) Whether to include a confidence interval}
29 | 
30 | \item{yLabel}{(string) The label for the y-axis}
31 | }
32 | \value{
33 | A \code{ggsurvplot} object
34 | }
35 | \description{
36 | Plot the outcome incidence over time
37 | }
38 | \details{
39 | This creates a survival plot that can be used to pick a suitable time-at-risk period
40 | }
41 | \examples{
42 | \dontshow{if (rlang::is_installed("survminer")) withAutoprint(\{ # examplesIf}
43 | data("simulationProfile")
44 | plpData <- simulatePlpData(simulationProfile, n = 999, seed = 42)
45 | plotObject <- outcomeSurvivalPlot(plpData, outcomeId = 3)
46 | print(plotObject)
47 | \dontshow{\}) # examplesIf}
48 | }
49 | 


--------------------------------------------------------------------------------
/tests/testthat/test-PredictionDistribution.R:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Observational Health Data Sciences and Informatics
 2 | #
 3 | # This file is part of PatientLevelPrediction
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | test_that("getPredictionDistribution binary type", {
17 |   ePrediction <- data.frame(
18 |     value = runif(100),
19 |     outcomeCount = round(runif(100)),
20 |     evaluation = rep("Test", 100)
21 |   )
22 |   predSum <- getPredictionDistribution(
23 |     prediction = ePrediction,
24 |     predictionType = "binary",
25 |     typeColumn = "evaluation"
26 |   )
27 | 
28 |   expect_equal(nrow(predSum), 2)
29 |   expect_equal(ncol(predSum), 12)
30 | 
31 | 
32 | 
33 |   predBinary <- getPredictionDistribution_binary(
34 |     prediction = ePrediction,
35 |     evaluation = rep("Test", 100),
36 |     evalColumn = "evaluation"
37 |   )
38 | 
39 |   expect_equal(predBinary, predSum)
40 | })
41 | 
42 | 
43 | test_that("getPredictionDistribution survival type", {
44 |   ePrediction <- data.frame(
45 |     value = runif(100),
46 |     outcomeCount = round(runif(100)),
47 |     evaluation = rep("Test", 100)
48 |   )
49 | 
50 |   predSurvival <- getPredictionDistribution_survival(
51 |     prediction = ePrediction,
52 |     evaluation = rep("Test", 100),
53 |     evalColumn = "evaluation"
54 |   )
55 | 
56 |   expect_true(is.null(predSurvival))
57 | })
58 | 


--------------------------------------------------------------------------------
/man/setLassoLogisticRegression.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CyclopsSettings.R
 3 | \name{setLassoLogisticRegression}
 4 | \alias{setLassoLogisticRegression}
 5 | \title{Create modelSettings for lasso logistic regression}
 6 | \usage{
 7 | setLassoLogisticRegression(
 8 |   variance = 0.01,
 9 |   seed = NULL,
10 |   includeCovariateIds = c(),
11 |   noShrinkage = c(0),
12 |   threads = -1,
13 |   forceIntercept = FALSE,
14 |   upperLimit = 20,
15 |   lowerLimit = 0.01,
16 |   tolerance = 2e-06,
17 |   maxIterations = 3000,
18 |   priorCoefs = NULL
19 | )
20 | }
21 | \arguments{
22 | \item{variance}{Numeric: prior distribution starting variance}
23 | 
24 | \item{seed}{An option to add a seed when training the model}
25 | 
26 | \item{includeCovariateIds}{a set of covariateIds to limit the analysis to}
27 | 
28 | \item{noShrinkage}{a set of covariates whcih are to be forced to be included in
29 | in the final model. Default is the intercept}
30 | 
31 | \item{threads}{An option to set number of threads when training model.}
32 | 
33 | \item{forceIntercept}{Logical: Force intercept coefficient into prior}
34 | 
35 | \item{upperLimit}{Numeric: Upper prior variance limit for grid-search}
36 | 
37 | \item{lowerLimit}{Numeric: Lower prior variance limit for grid-search}
38 | 
39 | \item{tolerance}{Numeric: maximum relative change in convergence criterion from
40 | from successive iterations to achieve convergence}
41 | 
42 | \item{maxIterations}{Integer: maximum iterations of Cyclops to attempt
43 | before returning a failed-to-converge error}
44 | 
45 | \item{priorCoefs}{Use coefficients from a previous model as starting
46 | points for model fit (transfer learning)}
47 | }
48 | \value{
49 | \code{modelSettings} object
50 | }
51 | \description{
52 | Create modelSettings for lasso logistic regression
53 | }
54 | \examples{
55 | modelLasso <- setLassoLogisticRegression(seed=42)
56 | }
57 | 


--------------------------------------------------------------------------------
/man/plotGeneralizability.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{plotGeneralizability}
 4 | \alias{plotGeneralizability}
 5 | \title{Plot the train/test generalizability diagnostic}
 6 | \usage{
 7 | plotGeneralizability(
 8 |   covariateSummary,
 9 |   saveLocation = NULL,
10 |   fileName = "Generalizability.png"
11 | )
12 | }
13 | \arguments{
14 | \item{covariateSummary}{A prediction object as generated using the
15 | \code{\link{runPlp}} function.}
16 | 
17 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)}
18 | 
19 | \item{fileName}{Name of the file to save to plot, for example
20 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for
21 | supported file formats.}
22 | }
23 | \value{
24 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different
25 | format.
26 | }
27 | \description{
28 | Plot the train/test generalizability diagnostic
29 | }
30 | \details{
31 | Create a plot showing the train/test generalizability diagnostic
32 | #'
33 | }
34 | \examples{
35 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf}
36 | \donttest{ \dontshow{ # takes too long }
37 | data("simulationProfile")
38 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
39 | population <- createStudyPopulation(plpData, outcomeId = 3)
40 | data <- splitData(plpData, population = population)
41 | strata <- data.frame(
42 |  rowId = c(data$Train$labels$rowId, data$Test$labels$rowId),
43 |  strataName = c(rep("Train", nrow(data$Train$labels)),
44 |                 rep("Test", nrow(data$Test$labels))))
45 | covariateSummary <- covariateSummary(plpData$covariateData, 
46 |                                      cohort = dplyr::select(population, "rowId"),
47 |  strata = strata, labels = population)
48 | plotGeneralizability(covariateSummary)
49 | }
50 | \dontshow{\}) # examplesIf}
51 | }
52 | 


--------------------------------------------------------------------------------
/tests/testthat/test-fitting.R:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Observational Health Data Sciences and Informatics
 2 | #
 3 | # This file is part of PatientLevelPrediction
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | modelSettings <- setLassoLogisticRegression()
18 | 
19 | test_that("fitPlp", {
20 |   skip_if_offline()
21 |   plpModel <- fitPlp(
22 |     trainData = trainData,
23 |     modelSettings = modelSettings,
24 |     search = "grid",
25 |     analysisId = "fitting",
26 |     analysisPath = tempdir()
27 |   )
28 | 
29 |   expect_s3_class(plpModel, "plpModel")
30 | })
31 | 
32 | test_that("fitPlp input errors", {
33 |   skip_if_offline()
34 |   expect_error(
35 |     fitPlp(
36 |       trainData = trainData,
37 |       modelSettings = modelSettings,
38 |       analysisPath = tempDir()
39 |     )
40 |   )
41 | 
42 |   expect_error(
43 |     fitPlp(
44 |       trainData = list(covariateData = NULL),
45 |       modelSettings = modelSettings,
46 |       analysisId = "fitting",
47 |       analysisPath = tempDir()
48 |     )
49 |   )
50 | 
51 |   expect_error(
52 |     fitPlp(
53 |       trainData = trainData,
54 |       modelSettings = NULL,
55 |       analysisId = "fitting",
56 |       analysisPath = tempDir()
57 |     )
58 |   )
59 | 
60 |   expect_error(
61 |     fitPlp(
62 |       trainData = trainData,
63 |       modelSettings = modelSettings,
64 |       analysisId = "fitting"
65 |     )
66 |   )
67 | })
68 | 


--------------------------------------------------------------------------------
/man/evaluatePlp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/EvaluatePlp.R
 3 | \name{evaluatePlp}
 4 | \alias{evaluatePlp}
 5 | \title{evaluatePlp}
 6 | \usage{
 7 | evaluatePlp(prediction, typeColumn = "evaluationType")
 8 | }
 9 | \arguments{
10 | \item{prediction}{The patient level prediction model's prediction}
11 | 
12 | \item{typeColumn}{The column name in the prediction object that is used to
13 | stratify the evaluation}
14 | }
15 | \value{
16 | An object of class plpEvaluation containing the following components
17 | \itemize{
18 | \item evaluationStatistics: A data frame containing the evaluation statistics'
19 | \item thresholdSummary: A data frame containing the threshold summary'
20 | \item demographicSummary: A data frame containing the demographic summary'
21 | \item calibrationSummary: A data frame containing the calibration summary'
22 | \item predictionDistribution: A data frame containing the prediction distribution'
23 | }
24 | }
25 | \description{
26 | Evaluates the performance of the patient level prediction model
27 | }
28 | \details{
29 | The function calculates various metrics to measure the performance of the model
30 | }
31 | \examples{
32 | \donttest{ \dontshow{ # takes too long to run }
33 | data("simulationProfile")
34 | plpData <- simulatePlpData(simulationProfile, n = 1500, seed = 42)
35 | population <- createStudyPopulation(plpData, outcomeId = 3, 
36 |                                     populationSettings = createStudyPopulationSettings())
37 | data <- splitData(plpData, population, splitSettings=createDefaultSplitSetting(splitSeed=42))
38 | data$Train$covariateData <- preprocessData(data$Train$covariateData, 
39 |                                            createPreprocessSettings())
40 | path <- file.path(tempdir(), "plp")
41 | model <- fitPlp(data$Train, modelSettings=setLassoLogisticRegression(seed=42),
42 |                 analysisId=1, analysisPath = path)
43 | evaluatePlp(model$prediction) # Train and CV metrics
44 | }
45 | }
46 | 


--------------------------------------------------------------------------------
/man/setGradientBoostingMachine.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/GradientBoostingMachine.R
 3 | \name{setGradientBoostingMachine}
 4 | \alias{setGradientBoostingMachine}
 5 | \title{Create setting for gradient boosting machine model using gbm_xgboost implementation}
 6 | \usage{
 7 | setGradientBoostingMachine(
 8 |   ntrees = c(100, 300),
 9 |   nthread = 20,
10 |   earlyStopRound = 25,
11 |   maxDepth = c(4, 6, 8),
12 |   minChildWeight = 1,
13 |   learnRate = c(0.05, 0.1, 0.3),
14 |   scalePosWeight = 1,
15 |   lambda = 1,
16 |   alpha = 0,
17 |   seed = sample(1e+07, 1)
18 | )
19 | }
20 | \arguments{
21 | \item{ntrees}{The number of trees to build}
22 | 
23 | \item{nthread}{The number of computer threads to use (how many cores do you have?)}
24 | 
25 | \item{earlyStopRound}{If the performance does not increase over earlyStopRound number of trees then training stops (this prevents overfitting)}
26 | 
27 | \item{maxDepth}{Maximum depth of each tree - a large value will lead to slow model training}
28 | 
29 | \item{minChildWeight}{Minimum sum of of instance weight in a child node - larger values are more conservative}
30 | 
31 | \item{learnRate}{The boosting learn rate}
32 | 
33 | \item{scalePosWeight}{Controls weight of positive class in loss - useful for imbalanced classes}
34 | 
35 | \item{lambda}{L2 regularization on weights - larger is more conservative}
36 | 
37 | \item{alpha}{L1 regularization on weights - larger is more conservative}
38 | 
39 | \item{seed}{An option to add a seed when training the final model}
40 | }
41 | \value{
42 | A modelSettings object that can be used to fit the model
43 | }
44 | \description{
45 | Create setting for gradient boosting machine model using gbm_xgboost implementation
46 | }
47 | \examples{
48 | \dontshow{if (rlang::is_installed("xgboost")) withAutoprint(\{ # examplesIf}
49 | modelGbm <- setGradientBoostingMachine(
50 |   ntrees = c(10, 100), nthread = 20,
51 |   maxDepth = c(4, 6), learnRate = c(0.1, 0.3)
52 | )
53 | \dontshow{\}) # examplesIf}
54 | }
55 | 


--------------------------------------------------------------------------------
/man/toSparseM.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Formatting.R
 3 | \name{toSparseM}
 4 | \alias{toSparseM}
 5 | \title{Convert the plpData in COO format into a sparse R matrix}
 6 | \usage{
 7 | toSparseM(plpData, cohort = NULL, map = NULL)
 8 | }
 9 | \arguments{
10 | \item{plpData}{An object of type \code{plpData} with covariate in coo format - the patient level prediction
11 | data extracted from the CDM.}
12 | 
13 | \item{cohort}{If specified the plpData is restricted to the rowIds in the cohort (otherwise plpData$labels is used)}
14 | 
15 | \item{map}{A covariate map (telling us the column number for covariates)}
16 | }
17 | \value{
18 | Returns a list, containing the data as a sparse matrix, the plpData covariateRef
19 | and a data.frame named map that tells us what covariate corresponds to each column
20 | This object is a list with the following components: \describe{
21 | \item{data}{A sparse matrix with the rows corresponding to each person in the plpData and the columns corresponding to the covariates.}
22 | \item{covariateRef}{The plpData covariateRef.}
23 | \item{map}{A data.frame containing the data column ids and the corresponding covariateId from covariateRef.}
24 | }
25 | }
26 | \description{
27 | Converts the standard plpData to a sparse matrix
28 | }
29 | \details{
30 | This function converts the covariates \code{Andromeda} table in COO format into a sparse matrix from
31 | the package Matrix
32 | }
33 | \examples{
34 | \donttest{ \dontshow{ # takes too long }
35 | library(dplyr)
36 | data("simulationProfile")
37 | plpData <- simulatePlpData(simulationProfile, n = 100, seed = 42)
38 | # how many covariates are there before we convert to sparse matrix
39 | plpData$covariateData$covariates \%>\% 
40 |  dplyr::group_by(.data$covariateId) \%>\% 
41 |  dplyr::summarise(n = n()) \%>\% 
42 |  dplyr::collect() \%>\% nrow()
43 | sparseData <- toSparseM(plpData, cohort=plpData$cohorts)
44 | # how many covariates are there after we convert to sparse matrix'
45 | sparseData$dataMatrix@Dim[2]
46 | }
47 | }
48 | 


--------------------------------------------------------------------------------
/tests/testthat/test-featureImportance.R:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Observational Health Data Sciences and Informatics
 2 | #
 3 | # This file is part of PatientLevelPrediction
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | 
18 | test_that("pfi feature importance returns data.frame", {
19 |   skip_if_offline()
20 |   # limit to a sample of 2 covariates for faster test
21 |   covariates <- plpResult$model$covariateImportance %>%
22 |     dplyr::filter("covariateValue" != 0) %>%
23 |     dplyr::select("covariateId") %>%
24 |     dplyr::arrange(desc("covariateValue")) %>%
25 |     dplyr::pull()
26 | 
27 |   # if the model had non-zero covariates
28 |   if (length(covariates) > 0) {
29 |     if (length(covariates) > 2) {
30 |       covariates <- covariates[1:2]
31 |     }
32 |     pfiTest <- pfi(plpResult, population, plpData,
33 |       repeats = 1,
34 |       covariates = covariates, cores = 1, log = NULL,
35 |       logthreshold = "INFO"
36 |     )
37 | 
38 |     expect_equal(class(pfiTest), "data.frame")
39 |     expect_equal(sum(names(pfiTest) %in% c("covariateId", "pfi")), 2)
40 |     expect_true(all(!is.nan(pfiTest$pfi)))
41 |   }
42 | })
43 | 
44 | test_that("pfi feature importance works with logger or without covariates", {
45 |   skip_if_offline()
46 |   pfiTest <- pfi(tinyResults, population, nanoData,
47 |     cores = 1,
48 |     covariates = NULL, log = file.path(tempdir(), "pfiLog")
49 |   )
50 | 
51 |   expect_equal(class(pfiTest), "data.frame")
52 |   expect_equal(sum(names(pfiTest) %in% c("covariateId", "pfi")), 2)
53 |   expect_true(all(!is.nan(pfiTest$pfi)))
54 | })
55 | 


--------------------------------------------------------------------------------
/man/pfi.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/FeatureImportance.R
 3 | \name{pfi}
 4 | \alias{pfi}
 5 | \title{Permutation Feature Importance}
 6 | \usage{
 7 | pfi(
 8 |   plpResult,
 9 |   population,
10 |   plpData,
11 |   repeats = 1,
12 |   covariates = NULL,
13 |   cores = NULL,
14 |   log = NULL,
15 |   logthreshold = "INFO"
16 | )
17 | }
18 | \arguments{
19 | \item{plpResult}{An object of type \code{runPlp}}
20 | 
21 | \item{population}{The population created using createStudyPopulation() who will have their risks predicted}
22 | 
23 | \item{plpData}{An object of type \code{plpData} - the patient level prediction
24 | data extracted from the CDM.}
25 | 
26 | \item{repeats}{The number of times to permute each covariate}
27 | 
28 | \item{covariates}{A vector of covariates to calculate the pfi for.  If NULL it uses all covariates included in the model.}
29 | 
30 | \item{cores}{Number of cores to use when running this (it runs in parallel)}
31 | 
32 | \item{log}{A location to save the log for running pfi}
33 | 
34 | \item{logthreshold}{The log threshold (e.g., INFO, TRACE, ...)}
35 | }
36 | \value{
37 | A dataframe with the covariateIds and the pfi (change in AUC caused by permuting the covariate) value
38 | }
39 | \description{
40 | Calculate the permutation feature importance (pfi) for a PLP model.
41 | }
42 | \details{
43 | The function permutes the each covariate/features \code{repeats} times and
44 | calculates the mean AUC change caused by the permutation.
45 | }
46 | \examples{
47 | \donttest{ \dontshow{ # takes too long }
48 | library(dplyr)
49 | # simulate some data
50 | data("simulationProfile")
51 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
52 | # now fit a model
53 | saveLoc <- file.path(tempdir(), "pfi")
54 | plpResult <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
55 | population <- createStudyPopulation(plpData, outcomeId = 3)
56 | pfi(plpResult, population, plpData, repeats = 1, cores = 1)
57 | # compare to model coefficients
58 | plpResult$model$covariateImportance \%>\% dplyr::filter(.data$covariateValue != 0)
59 | # clean up
60 | unlink(saveLoc, recursive = TRUE)
61 | }
62 | }
63 | 


--------------------------------------------------------------------------------
/man/createDatabaseSchemaSettings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/uploadToDatabase.R
 3 | \name{createDatabaseSchemaSettings}
 4 | \alias{createDatabaseSchemaSettings}
 5 | \title{Create the PatientLevelPrediction database result schema settings}
 6 | \usage{
 7 | createDatabaseSchemaSettings(
 8 |   resultSchema = "main",
 9 |   tablePrefix = "",
10 |   targetDialect = "sqlite",
11 |   tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"),
12 |   cohortDefinitionSchema = resultSchema,
13 |   tablePrefixCohortDefinitionTables = tablePrefix,
14 |   databaseDefinitionSchema = resultSchema,
15 |   tablePrefixDatabaseDefinitionTables = tablePrefix
16 | )
17 | }
18 | \arguments{
19 | \item{resultSchema}{(string) The name of the database schema with the result tables.}
20 | 
21 | \item{tablePrefix}{(string) A string that appends to the PatientLevelPrediction result tables}
22 | 
23 | \item{targetDialect}{(string) The database management system being used}
24 | 
25 | \item{tempEmulationSchema}{(string) The temp schema used when the database management system is oracle}
26 | 
27 | \item{cohortDefinitionSchema}{(string) The name of the database schema with the cohort definition tables (defaults to resultSchema).}
28 | 
29 | \item{tablePrefixCohortDefinitionTables}{(string) A string that appends to the cohort definition tables}
30 | 
31 | \item{databaseDefinitionSchema}{(string) The name of the database schema with the database definition tables (defaults to resultSchema).}
32 | 
33 | \item{tablePrefixDatabaseDefinitionTables}{(string) A string that appends to the database definition tables}
34 | }
35 | \value{
36 | Returns a list of class 'plpDatabaseResultSchema' with all the database settings
37 | }
38 | \description{
39 | This function specifies where the results schema is and lets you pick a different schema for the cohorts and databases
40 | }
41 | \details{
42 | This function can be used to specify the database settings used to upload PatientLevelPrediction results into a database
43 | }
44 | \examples{
45 | createDatabaseSchemaSettings(resultSchema = "cdm",
46 |                              tablePrefix = "plp_")
47 | 
48 | }
49 | 


--------------------------------------------------------------------------------
/man/plotLearningCurve.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/LearningCurve.R
 3 | \name{plotLearningCurve}
 4 | \alias{plotLearningCurve}
 5 | \title{plotLearningCurve}
 6 | \usage{
 7 | plotLearningCurve(
 8 |   learningCurve,
 9 |   metric = "AUROC",
10 |   abscissa = "events",
11 |   plotTitle = "Learning Curve",
12 |   plotSubtitle = NULL,
13 |   fileName = NULL
14 | )
15 | }
16 | \arguments{
17 | \item{learningCurve}{An object returned by \code{\link{createLearningCurve}}
18 | function.}
19 | 
20 | \item{metric}{Specifies the metric to be plotted:
21 | \itemize{
22 | \item{\code{'AUROC'} - use the area under the Receiver Operating
23 | Characteristic curve}
24 | \item{\code{'AUPRC'} - use the area under the Precision-Recall curve}
25 | \item{\code{'sBrier'} - use the scaled Brier score}
26 | }}
27 | 
28 | \item{abscissa}{Specify the abscissa metric to be plotted:
29 | \itemize{
30 | \item{\code{'events'} - use number of events}
31 | \item{\code{'observations'} - use number of observations}
32 | }}
33 | 
34 | \item{plotTitle}{Title of the learning curve plot.}
35 | 
36 | \item{plotSubtitle}{Subtitle of the learning curve plot.}
37 | 
38 | \item{fileName}{Filename of plot to be saved, for example \code{'plot.png'}.
39 | See the function \code{ggsave} in the ggplot2 package for supported file
40 | formats.}
41 | }
42 | \value{
43 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to
44 | file in a different format.
45 | }
46 | \description{
47 | Create a plot of the learning curve using the object returned
48 | from \code{createLearningCurve}.
49 | }
50 | \examples{
51 | \dontshow{if (rlang::is_installed("parallel")) withAutoprint(\{ # examplesIf}
52 | \donttest{ \dontshow{ # takes too long to run }
53 | data("simulationProfile")
54 | plpData <- simulatePlpData(simulationProfile, n = 1800, seed = 42)
55 | outcomeId <- 3
56 | modelSettings <- setLassoLogisticRegression(seed=42)
57 | learningCurve <- createLearningCurve(plpData, outcomeId, modelSettings = modelSettings,
58 | saveDirectory = file.path(tempdir(), "learningCurve"), parallel = FALSE)
59 | plotLearningCurve(learningCurve)
60 | }
61 | \dontshow{\}) # examplesIf}
62 | }
63 | 


--------------------------------------------------------------------------------
/man/setSVM.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SklearnClassifierSettings.R
 3 | \name{setSVM}
 4 | \alias{setSVM}
 5 | \title{Create setting for the python sklearn SVM (SVC function)}
 6 | \usage{
 7 | setSVM(
 8 |   C = list(1, 0.9, 2, 0.1),
 9 |   kernel = list("rbf"),
10 |   degree = list(1, 3, 5),
11 |   gamma = list("scale", 1e-04, 3e-05, 0.001, 0.01, 0.25),
12 |   coef0 = list(0),
13 |   shrinking = list(TRUE),
14 |   tol = list(0.001),
15 |   classWeight = list(NULL),
16 |   cacheSize = 500,
17 |   seed = sample(1e+05, 1)
18 | )
19 | }
20 | \arguments{
21 | \item{C}{(list) Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty.}
22 | 
23 | \item{kernel}{(list) Specifies the kernel type to be used in the algorithm. one of ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’. If none is given ‘rbf’ will be used.}
24 | 
25 | \item{degree}{(list) degree of kernel function is significant only in poly, rbf, sigmoid}
26 | 
27 | \item{gamma}{(list) kernel coefficient for rbf and poly, by default 1/n_features will be taken. ‘scale’, ‘auto’ or float, default=’scale’}
28 | 
29 | \item{coef0}{(list) independent term in kernel function. It is only significant in poly/sigmoid.}
30 | 
31 | \item{shrinking}{(list) whether to use the shrinking heuristic.}
32 | 
33 | \item{tol}{(list) Tolerance for stopping criterion.}
34 | 
35 | \item{classWeight}{(list) Class weight based on imbalance either 'balanced' or NULL}
36 | 
37 | \item{cacheSize}{Specify the size of the kernel cache (in MB).}
38 | 
39 | \item{seed}{A seed for the model}
40 | }
41 | \value{
42 | a modelSettings object
43 | }
44 | \description{
45 | Create setting for the python sklearn SVM (SVC function)
46 | }
47 | \examples{
48 | \dontshow{ # dontrun reason: requires python's scikit-learn, checkSklearn() will error without it }
49 | \dontrun{ 
50 | plpData <- getEunomiaPlpData()
51 | model <- setSVM(C = list(1), gamma = list("scale"), seed = 42)
52 | saveLoc <- file.path(tempdir(), "svm")
53 | results <- runPlp(plpData, modelSettings = model, saveDirectory = saveLoc)
54 | # clean up
55 | unlink(saveLoc, recursive = TRUE)
56 | }
57 | }
58 | 


--------------------------------------------------------------------------------
/man/viewMultiplePlp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ViewShinyPlp.R
 3 | \name{viewMultiplePlp}
 4 | \alias{viewMultiplePlp}
 5 | \title{open a local shiny app for viewing the result of a multiple PLP analyses}
 6 | \usage{
 7 | viewMultiplePlp(analysesLocation)
 8 | }
 9 | \arguments{
10 | \item{analysesLocation}{The directory containing the results (with the analysis_x folders)}
11 | }
12 | \value{
13 | Opens a shiny app for interactively viewing the results
14 | }
15 | \description{
16 | open a local shiny app for viewing the result of a multiple PLP analyses
17 | }
18 | \details{
19 | Opens a shiny app for viewing the results of the models from various T,O, Tar and settings
20 | settings.
21 | }
22 | \examples{
23 | \dontshow{if (rlang::is_interactive() && rlang::is_installed("Eunomia") && rlang::is_installed("OhdsiShinyAppBuilder") && rlang::is_installed("curl") && curl::has_internet()  ) withAutoprint(\{ # examplesIf}
24 | \donttest{ \dontshow{ # takes too long }
25 | connectionDetails <- Eunomia::getEunomiaConnectionDetails()
26 | Eunomia::createCohorts(connectionDetails)
27 | databaseDetails <- createDatabaseDetails(connectionDetails = connectionDetails,
28 |                                           cdmDatabaseSchema = "main",
29 |                                           cdmDatabaseName = "Eunomia",
30 |                                           cdmDatabaseId = "1",
31 |                                           targetId = 1,
32 |                                           outcomeIds = 3)
33 | modelDesign <- createModelDesign(targetId = 1, 
34 |                                  outcomeId = 3, 
35 |                                  modelSettings = setLassoLogisticRegression())
36 | saveLoc <- file.path(tempdir(), "viewMultiplePlp", "development")
37 | runMultiplePlp(databaseDetails = databaseDetails, modelDesignList = list(modelDesign),
38 |                saveDirectory = saveLoc)
39 | # view result files
40 | dir(saveLoc, recursive = TRUE)
41 | # open shiny app
42 | viewMultiplePlp(analysesLocation = saveLoc)
43 | # clean up, shiny app can't be opened after the following has been run
44 | unlink(saveLoc, recursive = TRUE)
45 | }
46 | \dontshow{\}) # examplesIf}
47 | }
48 | 


--------------------------------------------------------------------------------
/man/plotNetBenefit.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Plotting.R
 3 | \name{plotNetBenefit}
 4 | \alias{plotNetBenefit}
 5 | \title{Plot the net benefit}
 6 | \usage{
 7 | plotNetBenefit(
 8 |   plpResults,
 9 |   modelNames = NULL,
10 |   typeColumn = "evaluation",
11 |   saveLocation = NULL,
12 |   showPlot = TRUE,
13 |   fileName = "netBenefit.png",
14 |   evalType = NULL,
15 |   ylim = NULL,
16 |   xlim = NULL
17 | )
18 | }
19 | \arguments{
20 | \item{plpResults}{list of (named) plpResult objects or a single plpResult as
21 | generated using the \code{\link{runPlp}} function.}
22 | 
23 | \item{modelNames}{(optional) names of the models to be used in the plot. If NULL, the names of the plpResults are used. Must have the same length as plpResults.}
24 | 
25 | \item{typeColumn}{The name of the column specifying the evaluation type}
26 | 
27 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)}
28 | 
29 | \item{showPlot}{If TRUE, the plot is shown on the screen, if FALSE the plot
30 | object is returned without plotting.}
31 | 
32 | \item{fileName}{Name of the file to save to plot, for example 'plot.png'. See the function \code{ggsave} in the ggplot2 package for supported file formats.}
33 | 
34 | \item{evalType}{Which evaluation type to plot for. For example \code{Test}, \code{Train}. If NULL everything is plotted}
35 | 
36 | \item{ylim}{The y limits for the plot, if NULL the limits are calculated from the data}
37 | 
38 | \item{xlim}{The x limits for the plot, if NULL the limits are calculated from the data}
39 | }
40 | \value{
41 | A list of ggplot objects or a single ggplot object if only one evaluation type is plotted
42 | }
43 | \description{
44 | Plot the net benefit
45 | }
46 | \examples{
47 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf}
48 | \donttest{ \dontshow{ # takes too long }
49 | data("simulationProfile")
50 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
51 | saveLoc <- file.path(tempdir(), "plotNetBenefit")
52 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
53 | plotNetBenefit(results)
54 | # clean up
55 | unlink(saveLoc, recursive = TRUE)
56 | }
57 | \dontshow{\}) # examplesIf}
58 | }
59 | 


--------------------------------------------------------------------------------
/tests/testthat/test-andromedahelperfunctions.R:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Observational Health Data Sciences and Informatics
 2 | #
 3 | # This file is part of PatientLevelPrediction
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # add limitCovariatesToPopulation(covariateData, rowIds) test
18 | test_that("batchRestrict", {
19 |   skip_if_offline()
20 |   metaData <- attr(plpData$covariateData, "metaData")
21 |   covariateData <-
22 |     PatientLevelPrediction:::batchRestrict(
23 |       plpData$covariateData,
24 |       population,
25 |       sizeN = 1000000
26 |     )
27 |   expect_s4_class(covariateData, "CovariateData")
28 | 
29 |   expect_equal(
30 |     names(metaData),
31 |     names(attr(covariateData, "metaData"))
32 |   )
33 | })
34 | 
35 | test_that("limitPop with timeRef", {
36 |   covs <- Andromeda::andromeda(
37 |     covariates = data.frame(
38 |       covariateId = c(1, 2, 1, 2, 2),
39 |       rowId = c(1, 1, 2, 2, 2),
40 |       value = c(1, 1, 1, 1, 1),
41 |       timeId = c(1, 1, 1, 1, 2)
42 |     ),
43 |     covariateRef = data.frame(
44 |       covariateId = c(1, 2),
45 |       covariateName = c("cov1", "cov2"),
46 |       analysisId = c(1, 2)
47 |     ),
48 |     analysisRef = data.frame(
49 |       analysisId = c(1, 2),
50 |       analysisName = c("analysis1", "analysis2")
51 |     ),
52 |     timeRef = data.frame(
53 |       timePart = "day",
54 |       timeInterval = 1,
55 |       sequenceStartDay = 0,
56 |       sequenceEndDay = 1
57 |     )
58 |   )
59 |   class(covs) <- "CovariateData"
60 |   rowIds <- c(2)
61 |   limitedCovs <- limitCovariatesToPopulation(covs, rowIds)
62 |   expect_equal(
63 |     as.data.frame(limitedCovs$timeRef),
64 |     as.data.frame(covs$timeRef)
65 |   )
66 | })
67 | 


--------------------------------------------------------------------------------
/tests/testthat/test-getCalibration.R:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Observational Health Data Sciences and Informatics
 2 | #
 3 | # This file is part of PatientLevelPrediction
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | test_that("getCalibration binary", {
17 |   pErediction <- data.frame(
18 |     rowId = 1:100,
19 |     evaluation = rep("Test", 100),
20 |     value = runif(100),
21 |     outcomeCount = round(runif(100))
22 |   )
23 |   attr(pErediction, "metaData")$predictionType <- "binary"
24 |   calib <- getCalibrationSummary(
25 |     prediction = pErediction,
26 |     predictionType = "binary",
27 |     typeColumn = "evaluation",
28 |     numberOfStrata = 100,
29 |     truncateFraction = 0.05
30 |   )
31 | 
32 |   expect_equal(nrow(calib), 100)
33 |   expect_equal(ncol(calib), 12)
34 |   expect_true("evaluation" %in% colnames(calib))
35 | 
36 | 
37 |   calibBinary <- getCalibrationSummary_binary(
38 |     prediction = pErediction,
39 |     evalColumn = "evaluation",
40 |     numberOfStrata = 100,
41 |     truncateFraction = 0.05
42 |   )
43 | 
44 |   expect_equal(calib, calibBinary)
45 | })
46 | 
47 | 
48 | 
49 | 
50 | test_that("getCalibration survival", {
51 |   pErediction <- data.frame(
52 |     rowId = 1:100,
53 |     evaluation = rep("Test", 100),
54 |     value = runif(100),
55 |     survivalTime = 50 + sample(2 * 365, 100),
56 |     outcomeCount = round(runif(100))
57 |   )
58 | 
59 |   calib <- getCalibrationSummary_survival(
60 |     prediction = pErediction,
61 |     evalColumn = "evaluation",
62 |     numberOfStrata = 50,
63 |     truncateFraction = 0.05,
64 |     timepoint = 365
65 |   )
66 | 
67 |   expect_true("evaluation" %in% colnames(calib))
68 |   expect_equal(nrow(calib), 50)
69 |   expect_equal(ncol(calib), 7)
70 | })
71 | 


--------------------------------------------------------------------------------
/man/createDefaultSplitSetting.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DataSplitting.R
 3 | \name{createDefaultSplitSetting}
 4 | \alias{createDefaultSplitSetting}
 5 | \title{Create the settings for defining how the plpData are split into
 6 | test/validation/train sets using default splitting functions
 7 | (either random stratified by outcome, time or subject splitting)}
 8 | \usage{
 9 | createDefaultSplitSetting(
10 |   testFraction = 0.25,
11 |   trainFraction = 0.75,
12 |   splitSeed = sample(1e+05, 1),
13 |   nfold = 3,
14 |   type = "stratified"
15 | )
16 | }
17 | \arguments{
18 | \item{testFraction}{(numeric) A real number between 0 and 1
19 | indicating the test set fraction of the data}
20 | 
21 | \item{trainFraction}{(numeric) A real number between 0 and 1 indicating the
22 | train set fraction of the data. If not set train is equal to 1 - test}
23 | 
24 | \item{splitSeed}{(numeric) A seed to use when splitting the data for
25 | reproducibility (if not set a random number will be generated)}
26 | 
27 | \item{nfold}{(numeric) An integer > 1 specifying the number of
28 | folds used in cross validation}
29 | 
30 | \item{type}{(character) Choice of: \itemize{
31 | \item'stratified' Each data point is
32 | randomly assigned into the test or a train fold set but this is done
33 | stratified such that the outcome rate is consistent in each partition
34 | \item'time' Older data are assigned
35 | into the training set and newer data are assigned into the test set
36 | \item'subject' Data are partitioned by
37 | subject, if a subject is in the data more than once, all the data points for
38 | the subject are assigned either into the test data or into the train data
39 | (not both).
40 | }}
41 | }
42 | \value{
43 | An object of class \code{splitSettings}
44 | }
45 | \description{
46 | Create the settings for defining how the plpData are split into
47 | test/validation/train sets using default splitting functions
48 | (either random stratified by outcome, time or subject splitting)
49 | }
50 | \details{
51 | Returns an object of class \code{splitSettings} that specifies the
52 | splitting function that will be called and the settings
53 | }
54 | \examples{
55 | createDefaultSplitSetting(testFraction=0.25, trainFraction=0.75, nfold=3,
56 |                           splitSeed=42)
57 | }
58 | 


--------------------------------------------------------------------------------
/man/fitPlp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Fit.R
 3 | \name{fitPlp}
 4 | \alias{fitPlp}
 5 | \title{fitPlp}
 6 | \usage{
 7 | fitPlp(trainData, modelSettings, search = "grid", analysisId, analysisPath)
 8 | }
 9 | \arguments{
10 | \item{trainData}{An object of type \code{trainData} created using \code{splitData}
11 | data extracted from the CDM.}
12 | 
13 | \item{modelSettings}{An object of class \code{modelSettings} created using
14 | one of the \code{createModelSettings} functions}
15 | 
16 | \item{search}{The search strategy for the hyper-parameter selection (currently not used)}
17 | 
18 | \item{analysisId}{The id of the analysis}
19 | 
20 | \item{analysisPath}{The path of the analysis}
21 | }
22 | \value{
23 | An object of class \code{plpModel} containing:
24 | 
25 | \item{model}{The trained prediction model}
26 | \item{preprocessing}{The preprocessing required when applying the model}
27 | \item{prediction}{The cohort data.frame with the predicted risk column added}
28 | \item{modelDesign}{A list specifiying the modelDesign settings used to fit the model}
29 | \item{trainDetails}{The model meta data}
30 | \item{covariateImportance}{The covariate importance for the model}
31 | }
32 | \description{
33 | Train various models using a default parameter grid search or user specified
34 | parameters
35 | }
36 | \details{
37 | The user can define the machine learning model to train
38 | }
39 | \examples{
40 | \donttest{ \dontshow{ # takes too long }
41 | # simulate data
42 | data("simulationProfile")
43 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
44 | # create study population, split into train/test and preprocess with default settings
45 | population <- createStudyPopulation(plpData, outcomeId = 3)
46 | data <- splitData(plpData, population, createDefaultSplitSetting())
47 | data$Train$covariateData <- preprocessData(data$Train$covariateData)
48 | saveLoc <- file.path(tempdir(), "fitPlp")
49 | # fit a lasso logistic regression model using the training data
50 | plpModel <- fitPlp(data$Train, modelSettings=setLassoLogisticRegression(seed=42),
51 |                    analysisId=1, analysisPath=saveLoc)
52 | # show evaluationSummary for model
53 | evaluatePlp(plpModel$prediction)$evaluationSummary
54 | # clean up
55 | unlink(saveLoc, recursive = TRUE)
56 | }
57 | }
58 | 


--------------------------------------------------------------------------------
/man/createStudyPopulation.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/PopulationSettings.R
 3 | \name{createStudyPopulation}
 4 | \alias{createStudyPopulation}
 5 | \title{Create a study population}
 6 | \usage{
 7 | createStudyPopulation(
 8 |   plpData,
 9 |   outcomeId = plpData$metaData$databaseDetails$outcomeIds[1],
10 |   populationSettings = createStudyPopulationSettings(),
11 |   population = NULL
12 | )
13 | }
14 | \arguments{
15 | \item{plpData}{An object of type \code{plpData} as generated using
16 | \code{getplpData}.}
17 | 
18 | \item{outcomeId}{The  ID of the outcome.}
19 | 
20 | \item{populationSettings}{An object of class populationSettings created using \code{createPopulationSettings}}
21 | 
22 | \item{population}{If specified, this population will be used as the starting point instead of the
23 | cohorts in the \code{plpData} object.}
24 | }
25 | \value{
26 | A data frame specifying the study population. This data frame will have the following columns:
27 | \describe{
28 | \item{rowId}{A unique identifier for an exposure}
29 | \item{subjectId}{The person ID of the subject}
30 | \item{cohortStartdate}{The index date}
31 | \item{outcomeCount}{The number of outcomes observed during the risk window}
32 | \item{timeAtRisk}{The number of days in the risk window}
33 | \item{survivalTime}{The number of days until either the outcome or the end of the risk window}
34 | }
35 | }
36 | \description{
37 | Create a study population
38 | }
39 | \details{
40 | Create a study population by enforcing certain inclusion and exclusion criteria, defining
41 | a risk window, and determining which outcomes fall inside the risk window.
42 | }
43 | \examples{
44 | \donttest{ \dontshow{ # takes too long } 
45 | data("simulationProfile")
46 | plpData <- simulatePlpData(simulationProfile, n = 100, seed = 42)
47 | # Create study population, require time at risk of 30 days. The risk window is 1 to 90 days.
48 | populationSettings <- createStudyPopulationSettings(requireTimeAtRisk = TRUE,
49 |                                                      minTimeAtRisk = 30,
50 |                                                      riskWindowStart = 1,
51 |                                                      riskWindowEnd = 90)
52 | population <- createStudyPopulation(plpData, outcomeId = 3, populationSettings)
53 | }
54 | }
55 | 


--------------------------------------------------------------------------------
/man/getCalibrationSummary.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/CalibrationSummary.R
 3 | \name{getCalibrationSummary}
 4 | \alias{getCalibrationSummary}
 5 | \title{Get a sparse summary of the calibration}
 6 | \usage{
 7 | getCalibrationSummary(
 8 |   prediction,
 9 |   predictionType,
10 |   typeColumn = "evaluation",
11 |   numberOfStrata = 10,
12 |   truncateFraction = 0.05
13 | )
14 | }
15 | \arguments{
16 | \item{prediction}{A prediction object as generated using the
17 | \code{\link{predict}} functions.}
18 | 
19 | \item{predictionType}{The type of prediction (binary or survival)}
20 | 
21 | \item{typeColumn}{A column that is used to stratify the results}
22 | 
23 | \item{numberOfStrata}{The number of strata in the plot.}
24 | 
25 | \item{truncateFraction}{This fraction of probability values will be ignored when plotting, to
26 | avoid the x-axis scale being dominated by a few outliers.}
27 | }
28 | \value{
29 | A dataframe with the calibration summary
30 | }
31 | \description{
32 | Get a sparse summary of the calibration
33 | }
34 | \details{
35 | Generates a sparse summary showing the predicted probabilities and the observed fractions. Predictions are
36 | stratified into equally sized bins of predicted probabilities.
37 | }
38 | \examples{
39 | # simulate data
40 | data("simulationProfile")
41 | plpData <- simulatePlpData(simulationProfile, n = 500, seed = 42)
42 | # create study population, split into train/test and preprocess with default settings
43 | population <- createStudyPopulation(plpData, outcomeId = 3)
44 | data <- splitData(plpData, population, createDefaultSplitSetting())
45 | data$Train$covariateData <- preprocessData(data$Train$covariateData)
46 | saveLoc <- file.path(tempdir(), "calibrationSummary")
47 | # fit a lasso logistic regression model using the training data
48 | plpModel <- fitPlp(data$Train, modelSettings=setLassoLogisticRegression(seed=42),
49 |                    analysisId=1, analysisPath=saveLoc)
50 | calibrationSummary <- getCalibrationSummary(plpModel$prediction, 
51 |                                             "binary", 
52 |                                             numberOfStrata = 10,
53 |                                             typeColumn = "evaluationType")
54 | calibrationSummary
55 | # clean up
56 | unlink(saveLoc, recursive = TRUE)
57 | }
58 | 


--------------------------------------------------------------------------------
/vignettes/ClinicalModels.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Clinical Models"
 3 | author: "Jenna Reps, Peter R. Rijnbeek"
 4 | date: '`r Sys.Date()`'
 5 | output: rmarkdown::html_vignette
 6 | vignette: >
 7 |   %\VignetteEngine{knitr::rmarkdown}
 8 |   %\VignetteIndexEntry{Clinical Models}
 9 |   %\VignetteEncoding{UTF-8}
10 | ---
11 | 
12 | ## Clinical models developed using the OHDSI PatientLevelPrediction framework
13 | 
14 | | Title                | Link  |
15 | |----------------------|-------|
16 | | Using Machine Learning Applied to Real-World Healthcare Data for Predictive Analytics: An Applied Example in Bariatric Surgery |  [Value in Health](https://doi.org/10.1016/j.jval.2019.01.011) |   
17 | | Development and validation of a prognostic model predicting symptomatic hemorrhagic transformation in acute ischemic stroke at scale in the OHDSI network |  [PLoS One](https://doi.org/10.1371/journal.pone.0226718) |  
18 | | Wisdom of the CROUD: development and validation of a patient-level prediction model for opioid use disorder using population-level claims data |  [PLoS One](https://doi.org/10.1371/journal.pone.0228632) | 
19 | | Developing predictive models to determine Patients in End-of-life Care in Administrative datasets |  [Drug Safety](https://doi.org/10.1007/s40264-020-00906-7) |   
20 | | Predictors of diagnostic transition from major depressive disorder to bipolar disorder: a retrospective observational network study |  [Translational psychiatry](https://doi.org/10.1038/s41398-021-01760-6) |   
21 | | Seek COVER: using a disease proxy to rapidly develop and validate a personalized risk calculator for COVID-19 outcomes in an international network |  [BMC Medical Research Methodology](https://doi.org/10.1186/s12874-022-01505-z) |   
22 | | 90-Day all-cause mortality can be predicted following a total knee replacement: an international, network study to develop and validate a prediction model |  [Knee Surgery, Sports Traumatology, Arthroscopy](https://doi.org/10.1007/s00167-021-06799-y) |   
23 | | Machine learning and real-world data to predict lung cancer risk in routine care |  [Cancer Epidemiology, Biomarkers & Prevention](https://doi.org/10.1158/1055-9965.EPI-22-0873) |   
24 | | Development and validation of a patient-level model to predict dementia across a network of observational databases |  [BMC medicine](https://doi.org/10.1186/s12916-024-03530-9) |   
25 | 


--------------------------------------------------------------------------------
/man/extractDatabaseToCsv.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/SaveLoadPlp.R
 3 | \name{extractDatabaseToCsv}
 4 | \alias{extractDatabaseToCsv}
 5 | \title{Exports all the results from a database into csv files}
 6 | \usage{
 7 | extractDatabaseToCsv(
 8 |   conn = NULL,
 9 |   connectionDetails,
10 |   databaseSchemaSettings = createDatabaseSchemaSettings(resultSchema = "main"),
11 |   csvFolder,
12 |   minCellCount = 5,
13 |   sensitiveColumns = getPlpSensitiveColumns(),
14 |   fileAppend = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{conn}{The connection to the database with the results}
19 | 
20 | \item{connectionDetails}{The connectionDetails for the result database}
21 | 
22 | \item{databaseSchemaSettings}{The result database schema settings}
23 | 
24 | \item{csvFolder}{Location to save the csv files}
25 | 
26 | \item{minCellCount}{The min value to show in cells that are sensitive (values less than this value will be replaced with -1)}
27 | 
28 | \item{sensitiveColumns}{A named list (name of table columns belong to) with a list of columns to apply the minCellCount to.}
29 | 
30 | \item{fileAppend}{If set to a string this will be appended to the start of the csv file names}
31 | }
32 | \value{
33 | The directory path where the results were saved
34 | }
35 | \description{
36 | Exports all the results from a database into csv files
37 | }
38 | \details{
39 | Extracts the results from a database into a set of csv files
40 | }
41 | \examples{
42 | \donttest{ \dontshow{ # takes too long }
43 | # develop a simple model on simulated data
44 | data("simulationProfile")
45 | plpData <- simulatePlpData(simulationProfile, n = 500, seed = 42)
46 | saveLoc <- file.path(tempdir(), "extractDatabaseToCsv")
47 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc)
48 | # now upload the results to a sqlite database
49 | databasePath <- insertResultsToSqlite(saveLoc)
50 | # now extract the results to csv
51 | connectionDetails <- 
52 |   DatabaseConnector::createConnectionDetails(dbms = "sqlite", 
53 |                                              server = databasePath)
54 | extractDatabaseToCsv(
55 |   connectionDetails = connectionDetails,
56 |   csvFolder = file.path(saveLoc, "csv")
57 | )
58 | # show csv file
59 | list.files(file.path(saveLoc, "csv"))
60 | # clean up
61 | unlink(saveLoc, recursive = TRUE)
62 | }
63 | }
64 | 


--------------------------------------------------------------------------------
/man/insertResultsToSqlite.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/uploadToDatabase.R
 3 | \name{insertResultsToSqlite}
 4 | \alias{insertResultsToSqlite}
 5 | \title{Create sqlite database with the results}
 6 | \usage{
 7 | insertResultsToSqlite(
 8 |   resultLocation,
 9 |   cohortDefinitions = NULL,
10 |   databaseList = NULL,
11 |   sqliteLocation = file.path(resultLocation, "sqlite"),
12 |   skipDiagnostics = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{resultLocation}{(string) location of directory where the main package results were saved}
17 | 
18 | \item{cohortDefinitions}{A set of one or more cohorts extracted using ROhdsiWebApi::exportCohortDefinitionSet()}
19 | 
20 | \item{databaseList}{A list created by \code{createDatabaseList} to specify the databases}
21 | 
22 | \item{sqliteLocation}{(string) location of directory where the sqlite database will be saved}
23 | 
24 | \item{skipDiagnostics}{Whether to skip uploading the diagnostics}
25 | }
26 | \value{
27 | Returns the location of the sqlite database file
28 | }
29 | \description{
30 | This function create an sqlite database with the PLP result schema and inserts all results
31 | }
32 | \details{
33 | This function can be used upload PatientLevelPrediction results into an sqlite database
34 | }
35 | \examples{
36 | \dontshow{if (rlang::is_installed("RSQLite") && rlang::is_installed("Eunomia") && rlang::is_installed("curl") && curl::has_internet()) withAutoprint(\{ # examplesIf}
37 | \donttest{ \dontshow{ # takes too long }
38 | plpData <- getEunomiaPlpData()
39 | saveLoc <- file.path(tempdir(), "insertResultsToSqlite")
40 | results <- runPlp(plpData, outcomeId = 3, analysisId = 1, saveDirectory = saveLoc)
41 | databaseFile <- insertResultsToSqlite(saveLoc, cohortDefinitions = NULL, 
42 |                                       sqliteLocation = file.path(saveLoc, "sqlite"))
43 | # check there is some data in the database
44 | library(DatabaseConnector)
45 | connectionDetails <- createConnectionDetails(
46 |   dbms = "sqlite",
47 |   server = databaseFile)
48 | conn <- connect(connectionDetails)
49 | # All tables should be created
50 | getTableNames(conn, databaseSchema = "main")
51 | # There is data in the tables
52 | querySql(conn, "SELECT * FROM main.model_designs limit 10")
53 | # clean up
54 | unlink(saveLoc, recursive = TRUE)
55 | }
56 | \dontshow{\}) # examplesIf}
57 | }
58 | 


--------------------------------------------------------------------------------
/man/recalibratePlp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Recalibration.R
 3 | \name{recalibratePlp}
 4 | \alias{recalibratePlp}
 5 | \title{recalibratePlp}
 6 | \usage{
 7 | recalibratePlp(
 8 |   prediction,
 9 |   analysisId,
10 |   typeColumn = "evaluationType",
11 |   method = c("recalibrationInTheLarge", "weakRecalibration")
12 | )
13 | }
14 | \arguments{
15 | \item{prediction}{A prediction dataframe}
16 | 
17 | \item{analysisId}{The model analysisId}
18 | 
19 | \item{typeColumn}{The column name where the strata types are specified}
20 | 
21 | \item{method}{Method used to recalibrate ('recalibrationInTheLarge' or 'weakRecalibration' )}
22 | }
23 | \value{
24 | A prediction dataframe with the recalibrated predictions added
25 | }
26 | \description{
27 | Recalibrating a model using the recalibrationInTheLarge or weakRecalibration methods
28 | }
29 | \details{
30 | 'recalibrationInTheLarge' calculates a single correction factor for the
31 | average predicted risks to match the average observed risks.
32 | 'weakRecalibration' fits a glm model to the logit of the predicted risks,
33 | also known as Platt scaling/logistic recalibration.
34 | }
35 | \examples{
36 | prediction <- data.frame(rowId = 1:100,
37 |                          value = runif(100),
38 |                          outcomeCount = stats::rbinom(100, 1, 0.1),
39 |                          evaluationType = rep("validation", 100))
40 | attr(prediction, "metaData") <- list(modelType = "binary")
41 | # since value is unformally distributed but outcomeCount is not (prob <- 0.1)
42 | # the predictions are mis-calibrated
43 | outcomeRate <- mean(prediction$outcomeCount)
44 | observedRisk <- mean(prediction$value)
45 | message("outcome rate is: ", outcomeRate)
46 | message("observed risk is: ", observedRisk)
47 | # lets recalibrate the predictions
48 | prediction <- recalibratePlp(prediction, 
49 |                              analysisId = "recalibration", 
50 |                              method = "recalibrationInTheLarge")
51 | recalibratedRisk <- mean(prediction$value)
52 | message("recalibrated risk with recalibration in the large is: ", recalibratedRisk)
53 | prediction <- recalibratePlp(prediction, 
54 |                              analysisId = "recalibration", 
55 |                              method = "weakRecalibration")
56 | recalibratedRisk <- mean(prediction$value)
57 | message("recalibrated risk with weak recalibration is: ", recalibratedRisk)
58 | }
59 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: PatientLevelPrediction
 2 | Type: Package
 3 | Title: Develop Clinical Prediction Models Using the Common Data Model
 4 | Version: 6.5.1
 5 | Date: 2025-10-14
 6 | Authors@R: c(
 7 |     person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")),
 8 |     person("Jenna", "Reps", email = "jreps@its.jnj.com", role = c("aut")),
 9 |     person("Martijn", "Schuemie", role = c("aut")),
10 |     person("Marc", "Suchard", role = c("aut")),
11 |     person("Patrick", "Ryan", role = c("aut")),
12 |     person("Peter", "Rijnbeek", role = c("aut")),
13 |     person("Observational Health Data Science and Informatics", role = c("cph")))
14 | Description: A user friendly way to create patient level prediction models using
15 |   the Observational Medical Outcomes Partnership Common Data Model. Given a cohort
16 |   of interest and an outcome of interest, the package can use data in the Common
17 |   Data Model to build a large set of features. These features can then be used to
18 |   fit a predictive model with a number of machine learning algorithms. This is
19 |   further described in Reps (2017) <doi:10.1093/jamia/ocy032>.
20 | License: Apache License 2.0
21 | URL: https://ohdsi.github.io/PatientLevelPrediction/, https://github.com/OHDSI/PatientLevelPrediction
22 | BugReports: https://github.com/OHDSI/PatientLevelPrediction/issues
23 | VignetteBuilder: knitr
24 | Depends:
25 |     R (>= 4.0.0)
26 | Imports:
27 |     Andromeda,
28 |     Cyclops (>= 3.0.0),
29 |     DatabaseConnector (>= 6.0.0),
30 |     digest,
31 |     dplyr,
32 |     FeatureExtraction (>= 3.0.0),
33 |     Matrix,
34 |     memuse,
35 |     ParallelLogger (>= 2.0.0),
36 |     pROC,
37 |     PRROC,
38 |     rlang,
39 |     SqlRender (>= 1.1.3),
40 |     tidyr,
41 |     utils
42 | Suggests:
43 |     curl,
44 |     Eunomia (>= 2.0.0),
45 |     glmnet,
46 |     ggplot2,
47 |     gridExtra,
48 |     IterativeHardThresholding,
49 |     knitr,
50 |     lightgbm,
51 |     Metrics,
52 |     mgcv,
53 |     OhdsiShinyAppBuilder (>= 1.0.0),
54 |     parallel,
55 |     pkgload,
56 |     polspline,
57 |     readr,
58 |     ResourceSelection,
59 |     ResultModelManager (>= 0.6.0),
60 |     reticulate (>= 1.41),
61 |     rmarkdown,
62 |     RSQLite,
63 |     scoring,
64 |     survival,
65 |     survminer,
66 |     testthat,
67 |     withr,
68 |     xgboost (> 1.3.2.1)
69 | RoxygenNote: 7.3.3
70 | Encoding: UTF-8
71 | Config/testthat/edition: 3
72 | Roxygen: list(markdown = TRUE)
73 | 


--------------------------------------------------------------------------------
/man/validateExternal.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ExternalValidatePlp.R
 3 | \name{validateExternal}
 4 | \alias{validateExternal}
 5 | \title{validateExternal - Validate model performance on new data}
 6 | \usage{
 7 | validateExternal(
 8 |   validationDesignList,
 9 |   databaseDetails,
10 |   logSettings = createLogSettings(verbosity = "INFO", logName = "validatePLP"),
11 |   outputFolder,
12 |   cohortDefinitions = NULL
13 | )
14 | }
15 | \arguments{
16 | \item{validationDesignList}{A list of objects created with \code{createValidationDesign}}
17 | 
18 | \item{databaseDetails}{A list of objects of class
19 | \code{databaseDetails} created using \code{createDatabaseDetails}}
20 | 
21 | \item{logSettings}{An object of \code{logSettings} created
22 | using \code{createLogSettings}}
23 | 
24 | \item{outputFolder}{The directory to save the validation results to}
25 | 
26 | \item{cohortDefinitions}{A cohortDefinitionSet object created with
27 | \code{CohortGenerator}
28 | (subfolders are created per database in validationDatabaseDetails)}
29 | }
30 | \value{
31 | A list of results
32 | }
33 | \description{
34 | validateExternal - Validate model performance on new data
35 | }
36 | \examples{
37 | \dontshow{if (rlang::is_installed("Eunomia") && rlang::is_installed("curl") && curl::has_internet()) withAutoprint(\{ # examplesIf}
38 | \donttest{ \dontshow{ # takes too long }
39 | data("simulationProfile")
40 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
41 | # first fit a model on some data, default is a L1 logistic regression
42 | saveLoc <- file.path(tempdir(), "development")
43 | results <- runPlp(plpData, saveDirectory = saveLoc)
44 | # then create my validation design
45 | validationDesign <- createValidationDesign(1, 3, plpModelList = list(results$model))
46 | # I will validate on Eunomia example database
47 | connectionDetails <- Eunomia::getEunomiaConnectionDetails()
48 | Eunomia::createCohorts(connectionDetails)
49 | databaseDetails <- createDatabaseDetails(connectionDetails = connectionDetails,
50 | cdmDatabaseSchema = "main", cdmDatabaseName = "Eunomia", cdmDatabaseId = 1,
51 | targetId = 1, outcomeIds = 3)
52 | path <- file.path(tempdir(), "validation")
53 | validateExternal(validationDesign, databaseDetails, outputFolder = path)
54 | # see generated result files
55 | dir(path, recursive = TRUE)
56 | # clean up
57 | unlink(saveLoc, recursive = TRUE)
58 | unlink(path, recursive = TRUE)
59 | }
60 | \dontshow{\}) # examplesIf}
61 | }
62 | 


--------------------------------------------------------------------------------
/tests/testthat/test-demographicSummary.R:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Observational Health Data Sciences and Informatics
 2 | #
 3 | # This file is part of PatientLevelPrediction
 4 | #
 5 | # Licensed under the Apache License, Version 2.0 (the "License");
 6 | # you may not use this file except in compliance with the License.
 7 | # You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | test_that("getDemographicSummary", {
18 |   prediction <- data.frame(
19 |     rowId = 1:100,
20 |     ageYear = sample(100, 100, replace = TRUE),
21 |     gender = sample(c(8507, "female"), 100, replace = TRUE),
22 |     value = runif(100),
23 |     outcomeCount = round(runif(100)),
24 |     evaluation = rep("Test", 100)
25 |   )
26 | 
27 |   demoSum <- getDemographicSummary(
28 |     prediction = prediction,
29 |     predictionType = "binary",
30 |     typeColumn = "evaluation"
31 |   )
32 | 
33 |   expect_equal(ncol(demoSum), 12)
34 |   expect_true("evaluation" %in% colnames(demoSum))
35 | 
36 |   # check correct gender length
37 |   expect_equal(length(unique(prediction$gender)), length(unique(demoSum$genGroup)))
38 | 
39 | 
40 |   demoSumBin <- getDemographicSummary_binary(
41 |     prediction = prediction,
42 |     evalColumn = "evaluation"
43 |   )
44 |   expect_equal(demoSum, demoSumBin)
45 | })
46 | 
47 | 
48 | test_that("getDemographicSummary", {
49 |   prediction <- data.frame(
50 |     rowId = 1:100,
51 |     ageYear = sample(100, 100, replace = TRUE),
52 |     gender = sample(c(8507, "female"), 100, replace = TRUE),
53 |     value = runif(100),
54 |     outcomeCount = round(runif(100)),
55 |     evaluation = rep("Test", 100),
56 |     survivalTime = 50 + sample(730, 100, replace = TRUE)
57 |   )
58 | 
59 |   demoSumSurv <- getDemographicSummary_survival(
60 |     prediction = prediction,
61 |     evalColumn = "evaluation",
62 |     timepoint = 365
63 |   )
64 | 
65 |   expect_s3_class(demoSumSurv, "data.frame")
66 |   expect_equal(ncol(demoSumSurv), 8)
67 |   expect_true("evaluation" %in% colnames(demoSumSurv))
68 | 
69 |   # check correct gender length
70 |   expect_equal(length(unique(prediction$gender)), length(unique(demoSumSurv$genGroup)))
71 | })
72 | 


--------------------------------------------------------------------------------
/demo/LearningCurveDemo.R:
--------------------------------------------------------------------------------
 1 | library(PatientLevelPrediction)
 2 | 
 3 | # This demo will generate a learning curve using 8 training set sizes
 4 | # Dependent on your system it can take some time to run
 5 | # If you have multiple cores we suggest to use them
 6 | 
 7 | selection <- readline(prompt="Would you like to demo the parallel version (y/n):")
 8 | 
 9 | # Generate simulated plpData
10 | data(plpDataSimulationProfile)
11 | set.seed(1234)
12 | sampleSize <- 12000
13 | plpData <- simulatePlpData(
14 |   plpDataSimulationProfile,
15 |   n = sampleSize
16 | )
17 | 
18 | # Create the study population
19 | populationSettings <- createStudyPopulationSettings(
20 |   binary = TRUE,
21 |   firstExposureOnly = FALSE,
22 |   washoutPeriod = 0,
23 |   removeSubjectsWithPriorOutcome = FALSE,
24 |   priorOutcomeLookback = 99999,
25 |   requireTimeAtRisk = TRUE,
26 |   minTimeAtRisk = 0,
27 |   riskWindowStart = 0,
28 |   startAnchor = 'cohort start',
29 |   riskWindowEnd = 365,
30 |   endAnchor = 'cohort start'
31 | )
32 | 
33 | # Specify the prediction algorithm to be used
34 | modelSettings <- setLassoLogisticRegression()
35 | 
36 | # Specify a test fraction and a sequence of training set fractions
37 | splitSettings <- createDefaultSplitSetting(
38 |   testFraction = 0.2,
39 |   type = 'stratified'
40 |   )
41 | trainEvents <- seq(100, 800, 100)
42 | 
43 | 
44 | # Create the learning curve object
45 | if (selection != "y" &&
46 |     selection != "Y") {
47 |   learningCurve <- createLearningCurve(
48 |     plpData = plpData, 
49 |     outcomeId = 2, 
50 |     analysisId = 'learningCurveDemo', 
51 |     parallel = F,
52 |     cores = 4, 
53 |     modelSettings = modelSettings, 
54 |     populationSettings = populationSettings, 
55 |     splitSettings = splitSettings, 
56 |     trainEvents = trainEvents,
57 |     saveDirectory = './learningCurve'
58 | )
59 |   
60 | } else {
61 |   # create a learning curve object in parallel
62 |   learningCurve <- createLearningCurve(
63 |     plpData = plpData, 
64 |     outcomeId = 2, 
65 |     analysisId = 'learningCurveDemo', 
66 |     parallel = T,
67 |     cores = 4, 
68 |     modelSettings = modelSettings, 
69 |     populationSettings = populationSettings, 
70 |     splitSettings = splitSettings, 
71 |     trainEvents = trainEvents,
72 |     saveDirectory = './learningCurve'
73 |   )
74 | 
75 | }
76 | 
77 | # plot the learning curve
78 | plotLearningCurve(
79 |   learningCurve,
80 |   metric = "AUROC",
81 |   abscissa = "events",
82 |   plotTitle = "Learning Curve Parallel",
83 |   plotSubtitle = "AUROC performance"
84 | )
85 | 


--------------------------------------------------------------------------------
/man/getEunomiaPlpData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/ExtractData.R
 3 | \name{getEunomiaPlpData}
 4 | \alias{getEunomiaPlpData}
 5 | \title{Create a plpData object from the Eunomia database'}
 6 | \usage{
 7 | getEunomiaPlpData(covariateSettings = NULL)
 8 | }
 9 | \arguments{
10 | \item{covariateSettings}{A list of covariateSettings objects created using the
11 | \code{createCovariateSettings} function in the \code{FeatureExtraction} package.
12 | If nothing is specified covariates with age, gender, conditions and drug era are used.}
13 | }
14 | \value{
15 | An object of type \code{plpData}, containing information on the cohorts, their
16 | outcomes, and baseline covariates. Information about multiple outcomes can be
17 | captured at once for efficiency reasons. This object is a list with the
18 | following components: \describe{ \item{outcomes}{A data frame listing the
19 | outcomes per person, including the time to event, and the outcome id}
20 | \item{cohorts}{A data frame listing the persons in each cohort, listing their
21 | exposure status as well as the time to the end of the observation period and
22 | time to the end of the cohort} \item{covariateData}{An Andromeda object created
23 | with the \code{FeatureExtraction} package. This object contains the following items:
24 | \describe{ \item{covariates}{An Andromeda table listing the covariates per
25 | person in the two cohorts. This is done using a sparse representation:
26 | covariates with a value of 0 are omitted to save space. Usually has three
27 | columns, rowId, covariateId and covariateValue'.} \item{covariateRef}{An
28 | Andromeda table describing the covariates that have been extracted.}
29 | \item{AnalysisRef}{An Andromeda table with information about which analysisIds
30 | from 'FeatureExtraction' were used.} }}}
31 | }
32 | \description{
33 | This function creates a plpData object from the Eunomia database. It gets
34 | the connection details, creates the cohorts, and extracts the data. The cohort
35 | is predicting GIbleed in new users of celecoxib.
36 | }
37 | \examples{
38 | \dontshow{if (rlang::is_installed("Eunomia") && rlang::is_installed("curl") && curl::has_internet()) withAutoprint(\{ # examplesIf}
39 | \donttest{ \dontshow{ # takes too long }
40 | covariateSettings <- FeatureExtraction::createCovariateSettings(
41 |   useDemographicsAge = TRUE,
42 |   useDemographicsGender = TRUE,
43 |   useConditionOccurrenceAnyTimePrior = TRUE
44 | )
45 | plpData <- getEunomiaPlpData(covariateSettings = covariateSettings)
46 | }
47 | \dontshow{\}) # examplesIf}
48 | }
49 | 


--------------------------------------------------------------------------------
/demo/EnsembleModelDemo.R:
--------------------------------------------------------------------------------
 1 | library(PatientLevelPrediction)
 2 | 
 3 | # This demo will generate a stacked ensemble consisting 
 4 | # of a Logistic Regression and Random Forest model.
 5 | # Dependent on your system it can take some time to run
 6 | 
 7 | # We first simulate some data
 8 | cat("Press a key to continue")
 9 | invisible(readline())
10 | 
11 | # Simulate plpData
12 | data(plpDataSimulationProfile)
13 | set.seed(1234)
14 | sampleSize <- 2000
15 | plpData <- simulatePlpData(
16 |   plpDataSimulationProfile,
17 |   n = sampleSize,
18 |   seed = 42
19 | )
20 | 
21 | # Generate the study population
22 | populationSettings <- createStudyPopulationSettings(
23 |   binary = TRUE,
24 |   firstExposureOnly = FALSE,
25 |   washoutPeriod = 0,
26 |   removeSubjectsWithPriorOutcome = FALSE,
27 |   priorOutcomeLookback = 99999,
28 |   requireTimeAtRisk = TRUE,
29 |   minTimeAtRisk = 0,
30 |   riskWindowStart = 0,
31 |   startAnchor = 'cohort start',
32 |   riskWindowEnd = 365,
33 |   endAnchor = 'cohort start'
34 | )
35 | 
36 | # Let's set the models and model building parameters
37 | cat("Press a key to continue")
38 | invisible(readline())
39 | 
40 | # Use LASSO logistic regression and Random Forest as base predictors
41 | model1 <- setLassoLogisticRegression()
42 | model2 <- setRandomForest()
43 | 
44 | # Specify the spilt settings
45 | splitSettings <- createDefaultSplitSetting(
46 |   testFraction = 0.2, 
47 |   nfold = 4, 
48 |   splitSeed = 100 # this makes sure same split is done 
49 |   )
50 | 
51 | # Specify the ensemble strategy
52 | ensembleStrategy <- 'stacked'
53 | 
54 | # Now we build the stacked ensemble
55 | cat("Press a key to continue")
56 | invisible(readline())
57 | ensembleResults <- runEnsembleModel(
58 |   ensembleStrategy = ensembleStrategy,
59 |   parallel = T,
60 |   maxCores = 2,
61 |   dataList = list(
62 |     plpData, 
63 |     plpData
64 |     ), 
65 |   outcomeIds = list(2,2),
66 |   populationSettings = list(
67 |     populationSettings, 
68 |     populationSettings
69 |     ),
70 |   sampleSettings = list(
71 |     createSampleSettings(),
72 |     createSampleSettings()
73 |   ),
74 |   featureEngineeringSettings = list(
75 |     createFeatureEngineeringSettings(),
76 |     createFeatureEngineeringSettings()
77 |   ),
78 |   preprocessSettings = list(
79 |     createPreprocessSettings(),
80 |     createPreprocessSettings()
81 |   ),
82 |   modelList = list(
83 |     model1, 
84 |     model2
85 |     ),
86 |   splitSettings = splitSettings
87 | ) 
88 | 
89 | # You could now save the model and apply it on other data as described in more detail
90 | # in the vignette.
91 | 


--------------------------------------------------------------------------------
/man/createCohortCovariateSettings.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/AdditionalCovariates.R
 3 | \name{createCohortCovariateSettings}
 4 | \alias{createCohortCovariateSettings}
 5 | \title{Extracts covariates based on cohorts}
 6 | \usage{
 7 | createCohortCovariateSettings(
 8 |   cohortName,
 9 |   settingId,
10 |   cohortDatabaseSchema = NULL,
11 |   cohortTable = NULL,
12 |   cohortId,
13 |   startDay = -30,
14 |   endDay = 0,
15 |   count = FALSE,
16 |   ageInteraction = FALSE,
17 |   lnAgeInteraction = FALSE,
18 |   analysisId = 456
19 | )
20 | }
21 | \arguments{
22 | \item{cohortName}{Name for the cohort}
23 | 
24 | \item{settingId}{A unique id for the covariate time and}
25 | 
26 | \item{cohortDatabaseSchema}{The schema of the database with the cohort. If
27 | nothing is specified then the cohortDatabaseSchema from databaseDetails at runtime is used.}
28 | 
29 | \item{cohortTable}{the table name that contains the covariate cohort. If
30 | nothing is specified then the cohortTable from databaseDetails at runtime is used.}
31 | 
32 | \item{cohortId}{cohort id for the covariate cohort}
33 | 
34 | \item{startDay}{The number of days prior to index to start observing the cohort}
35 | 
36 | \item{endDay}{The number of days prior to index to stop observing the cohort}
37 | 
38 | \item{count}{If FALSE the covariate value is binary (1 means cohort occurred between index+startDay and index+endDay, 0 means it did not)
39 | If TRUE then the covariate value is the number of unique cohort_start_dates between index+startDay and index+endDay}
40 | 
41 | \item{ageInteraction}{If TRUE multiple covariate value by the patient's age in years}
42 | 
43 | \item{lnAgeInteraction}{If TRUE multiple covariate value by the log of the patient's age in years}
44 | 
45 | \item{analysisId}{The analysisId for the covariate}
46 | }
47 | \value{
48 | An object of class \code{covariateSettings} specifying how to create the cohort covariate with the covariateId
49 | cohortId x 100000 + settingId x 1000 + analysisId
50 | }
51 | \description{
52 | Extracts covariates based on cohorts
53 | }
54 | \details{
55 | The user specifies a cohort and time period and then a covariate is constructed whether they are in the
56 | cohort during the time periods relative to target population cohort index
57 | }
58 | \examples{
59 | createCohortCovariateSettings(cohortName="testCohort",
60 |                               settingId=1,
61 |                               cohortId=1,
62 |                               cohortDatabaseSchema="cohorts",
63 |                               cohortTable="cohort_table")
64 | }
65 | 


--------------------------------------------------------------------------------
/man/splitData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/DataSplitting.R
 3 | \name{splitData}
 4 | \alias{splitData}
 5 | \title{Split the plpData into test/train sets using a splitting settings of class
 6 | \code{splitSettings}}
 7 | \usage{
 8 | splitData(
 9 |   plpData = plpData,
10 |   population = population,
11 |   splitSettings = createDefaultSplitSetting(splitSeed = 42)
12 | )
13 | }
14 | \arguments{
15 | \item{plpData}{An object of type \code{plpData} - the patient level
16 | prediction data extracted from the CDM.}
17 | 
18 | \item{population}{The population created using \code{createStudyPopulation}
19 | that define who will be used to develop the model}
20 | 
21 | \item{splitSettings}{An object of type \code{splitSettings} specifying the
22 | split - the default can be created using \code{createDefaultSplitSetting}}
23 | }
24 | \value{
25 | Returns a list containing the training data (Train) and optionally the test
26 | data (Test). Train is an Andromeda object containing
27 | \itemize{\item covariates: a table (rowId, covariateId, covariateValue)
28 | containing the covariates for each data point in the train data
29 | \item covariateRef: a table with the covariate information
30 | \item labels: a table (rowId, outcomeCount, ...) for each data point
31 | in the train data (outcomeCount is the class label)
32 | \item folds: a table (rowId, index) specifying which training
33 | fold each data point is in.
34 | }
35 | Test is an Andromeda object containing
36 | \itemize{\item covariates: a table (rowId, covariateId, covariateValue)
37 | containing the covariates for each data point in the test data
38 | \item covariateRef: a table with the covariate information
39 | \item labels: a table (rowId, outcomeCount, ...) for each data
40 | point in the test data (outcomeCount is the class label)
41 | }
42 | }
43 | \description{
44 | Split the plpData into test/train sets using a splitting settings of class
45 | \code{splitSettings}
46 | }
47 | \examples{
48 | data("simulationProfile")
49 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42)
50 | population <- createStudyPopulation(plpData)
51 | splitSettings <- createDefaultSplitSetting(testFraction = 0.50, 
52 |                                            trainFraction = 0.50, nfold = 5)
53 | data = splitData(plpData, population, splitSettings)
54 | # test data should be ~500 rows (changes because of study population)
55 | nrow(data$Test$labels)
56 | # train data should be ~500 rows
57 | nrow(data$Train$labels)
58 | # should be five fold in the train data
59 | length(unique(data$Train$folds$index))
60 | }
61 | 


--------------------------------------------------------------------------------