├── vignettes ├── .gitignore ├── images │ ├── gis1.avif │ ├── gis2.avif │ ├── Figure1.avif │ ├── popdef1.avif │ ├── popdef2.avif │ ├── popdef3.avif │ ├── popdef4.avif │ ├── popdef5.avif │ ├── popdef6.avif │ ├── atlasplp1.avif │ ├── atlasplp2.avif │ ├── atlasplp3.avif │ ├── atlasplp4.avif │ ├── problems.avif │ ├── shinyroc.avif │ ├── sparseRoc.avif │ ├── shinysummary.avif │ ├── studydesign.avif │ ├── atlasdownload1.avif │ ├── atlasdownload2.avif │ ├── learningCurve.avif │ ├── precisionRecall.avif │ ├── preferencePDF.avif │ ├── generalizability.avif │ ├── learningCurveBias.avif │ ├── learningCurvePlot.avif │ ├── smoothCalibration.avif │ ├── sparseCalibration.avif │ ├── demographicSummary.avif │ ├── variableScatterplot.avif │ ├── learningCurveVariance.avif │ └── predictionDistribution.avif ├── example1 │ ├── ATLAS_O.avif │ ├── ATLAS_O.webp │ ├── ATLAS_T.avif │ └── ATLAS_T.webp ├── example2 │ ├── angioedema.avif │ ├── angioedema.webp │ ├── aceinhibitors.avif │ └── aceinhibitors.webp ├── PatientLevelPredictionFigures.pptx └── ClinicalModels.Rmd ├── data └── simulationProfile.rda ├── inst ├── sql │ ├── sql_server │ │ ├── RemoveCohortTempTables.sql │ │ ├── UpdateVersionNumber.sql │ │ ├── GetCohorts.sql │ │ └── migrations │ │ │ └── Migration_1-store_version.sql │ ├── sqlite │ │ └── migrations │ │ │ └── Migration_1-store_version.sql │ └── postgresql │ │ └── migrations │ │ └── Migration_1-store_version.sql ├── CITATION ├── shinyConfigUpdate.json └── shinyConfig.json ├── tests ├── testthat.R └── testthat │ ├── test-sklearnClassifierHelpers.R │ ├── helper-expectations.R │ ├── test-helperfunctions.R │ ├── test-PredictionDistribution.R │ ├── test-fitting.R │ ├── test-featureImportance.R │ ├── test-andromedahelperfunctions.R │ ├── test-getCalibration.R │ └── test-demographicSummary.R ├── extras └── PatientLevelPrediction.pdf ├── demo ├── 00Index ├── SingleModelDemo.R ├── LearningCurveDemo.R └── EnsembleModelDemo.R ├── .Rbuildignore ├── .github ├── workflows │ ├── nightly_cleanup_Hades.yml │ ├── pkgdown.yaml │ └── revdeps.yml └── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── man ├── calibrationInLarge.Rd ├── listAppend.Rd ├── createTempModelLoc.Rd ├── listCartesian.Rd ├── print.plpData.Rd ├── pmmFit.Rd ├── simpleImpute.Rd ├── brierScore.Rd ├── iterativeImpute.Rd ├── summary.plpData.Rd ├── createDefaultExecuteSettings.Rd ├── print.summary.plpData.Rd ├── averagePrecision.Rd ├── calibrationLine.Rd ├── minMaxNormalize.Rd ├── getPredictionDistribution_binary.Rd ├── removeRareFeatures.Rd ├── setNaiveBayes.Rd ├── simulationProfile.Rd ├── createFeatureEngineeringSettings.Rd ├── createSimpleImputer.Rd ├── computeAuc.Rd ├── loadPlpData.Rd ├── migrateDataModel.Rd ├── ici.Rd ├── loadPlpResult.Rd ├── createRandomForestFeatureSelection.Rd ├── sklearnToJson.Rd ├── createSplineSettings.Rd ├── loadPrediction.Rd ├── setPythonEnvironment.Rd ├── PatientLevelPrediction.Rd ├── modelBasedConcordance.Rd ├── loadPlpModel.Rd ├── createValidationSettings.Rd ├── createUnivariateFeatureSelection.Rd ├── simulatePlpData.Rd ├── computeGridPerformance.Rd ├── savePlpData.Rd ├── getPredictionDistribution.Rd ├── savePlpModel.Rd ├── getThresholdSummary.Rd ├── loadPlpShareable.Rd ├── sklearnFromJson.Rd ├── createExistingSplitSettings.Rd ├── loadPlpAnalysesJson.Rd ├── configurePython.Rd ├── savePrediction.Rd ├── predictGlm.Rd ├── createStratifiedImputationSettings.Rd ├── plotPlp.Rd ├── savePlpResult.Rd ├── MapIds.Rd ├── predictPlp.Rd ├── createNormalizer.Rd ├── createExecuteSettings.Rd ├── viewPlp.Rd ├── createPreprocessSettings.Rd ├── predictCyclops.Rd ├── savePlpAnalysesJson.Rd ├── setCoxModel.Rd ├── setAdaBoost.Rd ├── createIterativeImputer.Rd ├── plotVariableScatterplot.Rd ├── recalibratePlpRefit.Rd ├── createRareFeatureRemover.Rd ├── plotSparseCalibration.Rd ├── preprocessData.Rd ├── savePlpShareable.Rd ├── createLogSettings.Rd ├── plotSparseCalibration2.Rd ├── robustNormalize.Rd ├── plotSparseRoc.Rd ├── setIterativeHardThresholding.Rd ├── plotDemographicSummary.Rd ├── plotF1Measure.Rd ├── plotPrecisionRecall.Rd ├── createSampleSettings.Rd ├── plotPredictionDistribution.Rd ├── plotPredictedPDF.Rd ├── createValidationDesign.Rd ├── diagnoseMultiplePlp.Rd ├── plotPreferencePDF.Rd ├── covariateSummary.Rd ├── getDemographicSummary.Rd ├── outcomeSurvivalPlot.Rd ├── setLassoLogisticRegression.Rd ├── plotGeneralizability.Rd ├── evaluatePlp.Rd ├── setGradientBoostingMachine.Rd ├── toSparseM.Rd ├── pfi.Rd ├── createDatabaseSchemaSettings.Rd ├── plotLearningCurve.Rd ├── setSVM.Rd ├── viewMultiplePlp.Rd ├── plotNetBenefit.Rd ├── createDefaultSplitSetting.Rd ├── fitPlp.Rd ├── createStudyPopulation.Rd ├── getCalibrationSummary.Rd ├── extractDatabaseToCsv.Rd ├── insertResultsToSqlite.Rd ├── recalibratePlp.Rd ├── validateExternal.Rd ├── getEunomiaPlpData.Rd ├── createCohortCovariateSettings.Rd └── splitData.Rd ├── PatientLevelPrediction.Rproj ├── .gitattributes ├── compare_versions ├── .gitignore ├── deploy.sh ├── R ├── SklearnClassifierHelpers.R └── PatientLevelPrediction.R ├── .settings └── org.eclipse.cdt.managedbuilder.core.prefs └── DESCRIPTION /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | -------------------------------------------------------------------------------- /data/simulationProfile.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/data/simulationProfile.rda -------------------------------------------------------------------------------- /inst/sql/sql_server/RemoveCohortTempTables.sql: -------------------------------------------------------------------------------- 1 | TRUNCATE TABLE #cohort_person; 2 | 3 | DROP TABLE #cohort_person; 4 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(PatientLevelPrediction) 3 | test_check("PatientLevelPrediction") 4 | -------------------------------------------------------------------------------- /vignettes/images/gis1.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/gis1.avif -------------------------------------------------------------------------------- /vignettes/images/gis2.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/gis2.avif -------------------------------------------------------------------------------- /vignettes/images/Figure1.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/Figure1.avif -------------------------------------------------------------------------------- /vignettes/images/popdef1.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/popdef1.avif -------------------------------------------------------------------------------- /vignettes/images/popdef2.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/popdef2.avif -------------------------------------------------------------------------------- /vignettes/images/popdef3.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/popdef3.avif -------------------------------------------------------------------------------- /vignettes/images/popdef4.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/popdef4.avif -------------------------------------------------------------------------------- /vignettes/images/popdef5.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/popdef5.avif -------------------------------------------------------------------------------- /vignettes/images/popdef6.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/popdef6.avif -------------------------------------------------------------------------------- /vignettes/example1/ATLAS_O.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example1/ATLAS_O.avif -------------------------------------------------------------------------------- /vignettes/example1/ATLAS_O.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example1/ATLAS_O.webp -------------------------------------------------------------------------------- /vignettes/example1/ATLAS_T.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example1/ATLAS_T.avif -------------------------------------------------------------------------------- /vignettes/example1/ATLAS_T.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example1/ATLAS_T.webp -------------------------------------------------------------------------------- /vignettes/images/atlasplp1.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/atlasplp1.avif -------------------------------------------------------------------------------- /vignettes/images/atlasplp2.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/atlasplp2.avif -------------------------------------------------------------------------------- /vignettes/images/atlasplp3.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/atlasplp3.avif -------------------------------------------------------------------------------- /vignettes/images/atlasplp4.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/atlasplp4.avif -------------------------------------------------------------------------------- /vignettes/images/problems.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/problems.avif -------------------------------------------------------------------------------- /vignettes/images/shinyroc.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/shinyroc.avif -------------------------------------------------------------------------------- /vignettes/images/sparseRoc.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/sparseRoc.avif -------------------------------------------------------------------------------- /extras/PatientLevelPrediction.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/extras/PatientLevelPrediction.pdf -------------------------------------------------------------------------------- /vignettes/example2/angioedema.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example2/angioedema.avif -------------------------------------------------------------------------------- /vignettes/example2/angioedema.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example2/angioedema.webp -------------------------------------------------------------------------------- /vignettes/images/shinysummary.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/shinysummary.avif -------------------------------------------------------------------------------- /vignettes/images/studydesign.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/studydesign.avif -------------------------------------------------------------------------------- /vignettes/example2/aceinhibitors.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example2/aceinhibitors.avif -------------------------------------------------------------------------------- /vignettes/example2/aceinhibitors.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/example2/aceinhibitors.webp -------------------------------------------------------------------------------- /vignettes/images/atlasdownload1.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/atlasdownload1.avif -------------------------------------------------------------------------------- /vignettes/images/atlasdownload2.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/atlasdownload2.avif -------------------------------------------------------------------------------- /vignettes/images/learningCurve.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/learningCurve.avif -------------------------------------------------------------------------------- /vignettes/images/precisionRecall.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/precisionRecall.avif -------------------------------------------------------------------------------- /vignettes/images/preferencePDF.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/preferencePDF.avif -------------------------------------------------------------------------------- /vignettes/images/generalizability.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/generalizability.avif -------------------------------------------------------------------------------- /vignettes/images/learningCurveBias.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/learningCurveBias.avif -------------------------------------------------------------------------------- /vignettes/images/learningCurvePlot.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/learningCurvePlot.avif -------------------------------------------------------------------------------- /vignettes/images/smoothCalibration.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/smoothCalibration.avif -------------------------------------------------------------------------------- /vignettes/images/sparseCalibration.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/sparseCalibration.avif -------------------------------------------------------------------------------- /vignettes/images/demographicSummary.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/demographicSummary.avif -------------------------------------------------------------------------------- /vignettes/images/variableScatterplot.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/variableScatterplot.avif -------------------------------------------------------------------------------- /vignettes/PatientLevelPredictionFigures.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/PatientLevelPredictionFigures.pptx -------------------------------------------------------------------------------- /vignettes/images/learningCurveVariance.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/learningCurveVariance.avif -------------------------------------------------------------------------------- /vignettes/images/predictionDistribution.avif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OHDSI/PatientLevelPrediction/HEAD/vignettes/images/predictionDistribution.avif -------------------------------------------------------------------------------- /demo/00Index: -------------------------------------------------------------------------------- 1 | SingleModelDemo Develop a single model on simulated data 2 | LearningCurveDemo Generate a learning curve on simulated data 3 | EnsembleModelDemo Generate an ensemble model on simulated data 4 | -------------------------------------------------------------------------------- /inst/sql/sql_server/UpdateVersionNumber.sql: -------------------------------------------------------------------------------- 1 | {DEFAULT @package_version = package_version} 2 | {DEFAULT @version_number = '6.0.10'} 3 | 4 | DELETE FROM @database_schema.@table_prefix@package_version; 5 | INSERT INTO @database_schema.@table_prefix@package_version (version_number) VALUES ('@version_number'); 6 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^renv$ 2 | ^renv\.lock$ 3 | ^.*\.Rproj$ 4 | ^\.Rproj\.user$ 5 | ^.vscode$ 6 | standalone 7 | deploy.sh 8 | extras/* 9 | man-roxygen 10 | compare_versions 11 | .github 12 | docs/* 13 | ^pyproject.toml$ 14 | ^.python-version$ 15 | ^uv.lock$ 16 | _pkgdown.yml 17 | ^vignettes/articles$ 18 | ^vignettes/Videos.Rmd 19 | ^doc$ 20 | ^Meta$ 21 | ^CRAN-RELEASE$ 22 | ^cran-comments\.md$ 23 | ^Dockerfile$ 24 | ^.venv$ 25 | ^.notVenv$ 26 | -------------------------------------------------------------------------------- /inst/sql/sql_server/GetCohorts.sql: -------------------------------------------------------------------------------- 1 | {DEFAULT @cdm_version = '5'} 2 | 3 | SELECT cast(row_id as int) row_id, 4 | subject_id, 5 | {@cdm_version == "4"} ? { 6 | cohort_concept_id AS target_id, 7 | } : { 8 | cohort_definition_id AS target_id, 9 | } 10 | cohort_start_date, 11 | days_from_obs_start, 12 | days_to_cohort_end, 13 | days_to_obs_end, 14 | age_year, 15 | gender 16 | FROM #cohort_person cohort 17 | ORDER BY subject_id 18 | -------------------------------------------------------------------------------- /.github/workflows/nightly_cleanup_Hades.yml: -------------------------------------------------------------------------------- 1 | name: 'nightly artifacts cleanup' 2 | on: 3 | schedule: 4 | - cron: '0 1 * * *' # every night at 1 am UTC 5 | 6 | jobs: 7 | remove-old-artifacts: 8 | runs-on: ubuntu-latest 9 | timeout-minutes: 10 10 | 11 | steps: 12 | - name: Remove old artifacts 13 | uses: c-hive/gha-remove-artifacts@v1 14 | with: 15 | age: '7 days' 16 | # Optional inputs 17 | # skip-tags: true 18 | skip-recent: 1 -------------------------------------------------------------------------------- /man/calibrationInLarge.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/EvaluationSummary.R 3 | \name{calibrationInLarge} 4 | \alias{calibrationInLarge} 5 | \title{Calculate the calibration in large} 6 | \usage{ 7 | calibrationInLarge(prediction) 8 | } 9 | \arguments{ 10 | \item{prediction}{A prediction dataframe} 11 | } 12 | \value{ 13 | data.frame with meanPredictionRisk, observedRisk, and N 14 | } 15 | \description{ 16 | Calculate the calibration in large 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /PatientLevelPrediction.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: 6acb9f49-7428-4e24-8a2a-6b10f35b95e2 3 | 4 | RestoreWorkspace: No 5 | SaveWorkspace: No 6 | AlwaysSaveHistory: Default 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: knitr 14 | LaTeX: pdfLaTeX 15 | 16 | BuildType: Package 17 | PackageUseDevtools: Yes 18 | PackageInstallArgs: --no-multiarch --with-keep.source 19 | PackageBuildArgs: --compact-vignettes=both 20 | PackageCheckArgs: --as-cran 21 | PackageRoxygenize: rd,namespace 22 | -------------------------------------------------------------------------------- /man/listAppend.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/HelperFunctions.R 3 | \name{listAppend} 4 | \alias{listAppend} 5 | \title{join two lists} 6 | \usage{ 7 | listAppend(a, b) 8 | } 9 | \arguments{ 10 | \item{a}{A list} 11 | 12 | \item{b}{Another list} 13 | } 14 | \value{ 15 | the joined list 16 | } 17 | \description{ 18 | join two lists 19 | } 20 | \details{ 21 | This function joins two lists 22 | } 23 | \examples{ 24 | a <- list(a = 1, b = 2) 25 | b <- list(c = 3, d = 4) 26 | listAppend(a, b) 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/createTempModelLoc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/HelperFunctions.R 3 | \name{createTempModelLoc} 4 | \alias{createTempModelLoc} 5 | \title{Create a temporary model location} 6 | \usage{ 7 | createTempModelLoc() 8 | } 9 | \value{ 10 | A string for the location of the temporary model location 11 | } 12 | \description{ 13 | Create a temporary model location 14 | } 15 | \examples{ 16 | modelLoc <- createTempModelLoc() 17 | dir.exists(modelLoc) 18 | # clean up 19 | unlink(modelLoc, recursive = TRUE) 20 | } 21 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Set the default behavior, in case people don't have core.autocrlf set. 2 | * text=auto 3 | 4 | # Explicitly declare text files you want to always be normalized and converted 5 | # to native line endings on checkout. 6 | DESCRIPTION text 7 | NAMESPACE text 8 | *.R text 9 | *.Rd text 10 | .cproject text 11 | .project text 12 | .settings/* text 13 | 14 | # Declare files that will always have CRLF line endings on checkout. 15 | *.sln text eol=crlf 16 | 17 | # Denote all files that are truly binary and should not be modified. 18 | *.png binary 19 | *.jpg binary 20 | -------------------------------------------------------------------------------- /man/listCartesian.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SklearnClassifierHelpers.R 3 | \name{listCartesian} 4 | \alias{listCartesian} 5 | \title{Cartesian product} 6 | \usage{ 7 | listCartesian(allList) 8 | } 9 | \arguments{ 10 | \item{allList}{a list of lists} 11 | } 12 | \value{ 13 | A list with all possible combinations from the input list of lists 14 | } 15 | \description{ 16 | Computes the Cartesian product of all the combinations of elements in a list 17 | } 18 | \examples{ 19 | listCartesian(list(list(1, 2), list(3, 4))) 20 | } 21 | -------------------------------------------------------------------------------- /man/print.plpData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ExtractData.R 3 | \name{print.plpData} 4 | \alias{print.plpData} 5 | \title{Print a plpData object} 6 | \usage{ 7 | \method{print}{plpData}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{The plpData object to print} 11 | 12 | \item{...}{Additional arguments} 13 | } 14 | \value{ 15 | A message describing the object 16 | } 17 | \description{ 18 | Print a plpData object 19 | } 20 | \examples{ 21 | 22 | data("simulationProfile") 23 | plpData <- simulatePlpData(simulationProfile, n = 10, seed = 42) 24 | print(plpData) 25 | } 26 | -------------------------------------------------------------------------------- /man/pmmFit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Imputation.R 3 | \name{pmmFit} 4 | \alias{pmmFit} 5 | \title{Predictive mean matching using lasso} 6 | \usage{ 7 | pmmFit(data, k = 5) 8 | } 9 | \arguments{ 10 | \item{data}{An andromeda object with the following fields: 11 | xObs: covariates table for observed data 12 | xMiss: covariates table for missing data 13 | yObs: outcome variable that we want to impute} 14 | 15 | \item{k}{The number of donors to use for matching (default 5)} 16 | } 17 | \description{ 18 | Predictive mean matching using lasso 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/simpleImpute.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Imputation.R 3 | \name{simpleImpute} 4 | \alias{simpleImpute} 5 | \title{Simple Imputation} 6 | \usage{ 7 | simpleImpute(trainData, featureEngineeringSettings, done = FALSE) 8 | } 9 | \arguments{ 10 | \item{trainData}{The data to be imputed} 11 | 12 | \item{featureEngineeringSettings}{The settings for the imputation} 13 | 14 | \item{done}{Whether the imputation has already been done (bool)} 15 | } 16 | \value{ 17 | The imputed data 18 | } 19 | \description{ 20 | This function does single imputation with the mean or median 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /compare_versions: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | open(R_VERSION, "grep 'Version' DESCRIPTION |"); 4 | $version = ; 5 | close(R_VERSION); 6 | 7 | $version =~ /(\d+)\.(\d+)\.(\d+)/; 8 | $r_major = $1; 9 | $r_minor = $2; 10 | $r_mod = $3; 11 | 12 | open(GIT_VERSION, "git describe --tags |"); 13 | $git = ; 14 | close(GIT_VERSION); 15 | 16 | $git =~ /v(\d+)\.(\d+)\.(\d+)/; 17 | $git_major = $1; 18 | $git_minor = $2; 19 | $git_mod = $3; 20 | 21 | if ($r_major > $git_major || $r_minor > $git_minor || $r_mod > $git_mod) { 22 | $new_version = "v$r_major.$r_minor.$r_mod"; 23 | } else { 24 | $new_version = ""; 25 | } 26 | 27 | print($new_version); 28 | -------------------------------------------------------------------------------- /man/brierScore.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/EvaluationSummary.R 3 | \name{brierScore} 4 | \alias{brierScore} 5 | \title{brierScore} 6 | \usage{ 7 | brierScore(prediction) 8 | } 9 | \arguments{ 10 | \item{prediction}{A prediction dataframe} 11 | } 12 | \value{ 13 | A list containing the brier score and the scaled brier score 14 | } 15 | \description{ 16 | brierScore 17 | } 18 | \details{ 19 | Calculates the brierScore from prediction object 20 | } 21 | \examples{ 22 | prediction <- data.frame( 23 | value = c(0.1, 0.2, 0.3, 0.4, 0.5), 24 | outcomeCount = c(0, 1, 0, 1, 1)) 25 | brierScore(prediction) 26 | } 27 | -------------------------------------------------------------------------------- /man/iterativeImpute.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Imputation.R 3 | \name{iterativeImpute} 4 | \alias{iterativeImpute} 5 | \title{Imputation} 6 | \usage{ 7 | iterativeImpute(trainData, featureEngineeringSettings, done = FALSE) 8 | } 9 | \arguments{ 10 | \item{trainData}{The data to be imputed} 11 | 12 | \item{featureEngineeringSettings}{The settings for the imputation} 13 | 14 | \item{done}{Whether the imputation has already been done (bool)} 15 | } 16 | \value{ 17 | The imputed data 18 | } 19 | \description{ 20 | This function does single imputation with predictive mean matchin 21 | } 22 | \keyword{internal} 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | # Example code in package build process 4 | *-Ex.R 5 | # R data files from past sessions 6 | .Rdata 7 | # R environ 8 | .Renviron 9 | # RStudio files 10 | .Rproj.user/ 11 | .Rproj.user 12 | # SqlRender 13 | statement_*.sql 14 | errorReport.txt 15 | #C++ objects 16 | src/*.o 17 | src/*.so 18 | src/*.dll 19 | /Debug 20 | standalone/build/* 21 | #models 22 | /plpmodels/* 23 | /python_models/* 24 | /mycache/* 25 | /inst/shiny/DiagnosticsExplorer/rsconnect/* 26 | /doc/ 27 | /Meta/ 28 | /extras/ 29 | /results/ 30 | /.vscode/ 31 | /sql/ 32 | .project 33 | .cproject 34 | /docs/ 35 | .python-version 36 | pyproject.toml 37 | uv.lock 38 | -------------------------------------------------------------------------------- /inst/sql/sqlite/migrations/Migration_1-store_version.sql: -------------------------------------------------------------------------------- 1 | -- Database migrations for verion 6.0.10 2 | -- This migration updates the schema: 3 | -- 1. to store the patient level prediction version 4 | -- 2. Add a migrations table for supporting database migrations 5 | 6 | {DEFAULT @package_version = package_version} 7 | {DEFAULT @migration = migration} 8 | {DEFAULT @table_prefix = ''} 9 | 10 | -- Create table indicating version number of ddl 11 | DROP TABLE IF EXISTS @database_schema.@table_prefix@package_version; 12 | 13 | --HINT DISTRIBUTE ON RANDOM 14 | CREATE TABLE @database_schema.@table_prefix@package_version ( 15 | version_number VARCHAR(50) PRIMARY KEY 16 | ); -------------------------------------------------------------------------------- /inst/sql/postgresql/migrations/Migration_1-store_version.sql: -------------------------------------------------------------------------------- 1 | -- Database migrations for verion 6.0.10 2 | -- This migration updates the schema: 3 | -- 1. to store the patient level prediction version 4 | -- 2. Add a migrations table for supporting database migrations 5 | 6 | {DEFAULT @package_version = package_version} 7 | {DEFAULT @migration = migration} 8 | {DEFAULT @table_prefix = ''} 9 | 10 | -- Create table indicating version number of ddl 11 | DROP TABLE IF EXISTS @database_schema.@table_prefix@package_version; 12 | 13 | --HINT DISTRIBUTE ON RANDOM 14 | CREATE TABLE @database_schema.@table_prefix@package_version ( 15 | version_number VARCHAR(50) PRIMARY KEY 16 | ); -------------------------------------------------------------------------------- /inst/sql/sql_server/migrations/Migration_1-store_version.sql: -------------------------------------------------------------------------------- 1 | -- Database migrations for verion 6.0.10 2 | -- This migration updates the schema: 3 | -- 1. to store the patient level prediction version 4 | -- 2. Add a migrations table for supporting database migrations 5 | 6 | {DEFAULT @package_version = package_version} 7 | {DEFAULT @migration = migration} 8 | {DEFAULT @table_prefix = ''} 9 | 10 | -- Create table indicating version number of ddl 11 | DROP TABLE IF EXISTS @database_schema.@table_prefix@package_version; 12 | 13 | --HINT DISTRIBUTE ON RANDOM 14 | CREATE TABLE @database_schema.@table_prefix@package_version ( 15 | version_number VARCHAR(50) PRIMARY KEY 16 | ); -------------------------------------------------------------------------------- /man/summary.plpData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ExtractData.R 3 | \name{summary.plpData} 4 | \alias{summary.plpData} 5 | \title{Summarize a plpData object} 6 | \usage{ 7 | \method{summary}{plpData}(object, ...) 8 | } 9 | \arguments{ 10 | \item{object}{The plpData object to summarize} 11 | 12 | \item{...}{Additional arguments} 13 | } 14 | \value{ 15 | A summary of the object containing the number of people, outcomes and covariates 16 | } 17 | \description{ 18 | Summarize a plpData object 19 | } 20 | \examples{ 21 | data("simulationProfile") 22 | plpData <- simulatePlpData(simulationProfile, n = 10, seed = 42) 23 | summary(plpData) 24 | } 25 | -------------------------------------------------------------------------------- /man/createDefaultExecuteSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RunPlpHelpers.R 3 | \name{createDefaultExecuteSettings} 4 | \alias{createDefaultExecuteSettings} 5 | \title{Creates default list of settings specifying what parts of runPlp to execute} 6 | \usage{ 7 | createDefaultExecuteSettings() 8 | } 9 | \value{ 10 | list with TRUE for split, preprocess, model development and covariate summary 11 | } 12 | \description{ 13 | Creates default list of settings specifying what parts of runPlp to execute 14 | } 15 | \details{ 16 | runs split, preprocess, model development and covariate summary 17 | } 18 | \examples{ 19 | createDefaultExecuteSettings() 20 | } 21 | -------------------------------------------------------------------------------- /man/print.summary.plpData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ExtractData.R 3 | \name{print.summary.plpData} 4 | \alias{print.summary.plpData} 5 | \title{Print a summary.plpData object} 6 | \usage{ 7 | \method{print}{summary.plpData}(x, ...) 8 | } 9 | \arguments{ 10 | \item{x}{The summary.plpData object to print} 11 | 12 | \item{...}{Additional arguments} 13 | } 14 | \value{ 15 | A message describing the object 16 | } 17 | \description{ 18 | Print a summary.plpData object 19 | } 20 | \examples{ 21 | data("simulationProfile") 22 | plpData <- simulatePlpData(simulationProfile, n = 10, seed = 42) 23 | summary <- summary(plpData) 24 | print(summary) 25 | } 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Set up (please run in R "sessionInfo()" and copy the output here):** 14 | copy the system set up details including the R version and operating system details... 15 | 16 | **To Reproduce** 17 | Enter the code you tried to run here (with sensitive information such as passwords removed) 18 | 19 | **PLP Log File** 20 | Enter the log file contents here (plplog.txt) 21 | 22 | **Additional context** 23 | Add any other context about the problem here. 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /man/averagePrecision.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/EvaluationSummary.R 3 | \name{averagePrecision} 4 | \alias{averagePrecision} 5 | \title{Calculate the average precision} 6 | \usage{ 7 | averagePrecision(prediction) 8 | } 9 | \arguments{ 10 | \item{prediction}{A prediction object} 11 | } 12 | \value{ 13 | The average precision value 14 | } 15 | \description{ 16 | Calculate the average precision 17 | } 18 | \details{ 19 | Calculates the average precision from a predition object 20 | } 21 | \examples{ 22 | prediction <- data.frame( 23 | value = c(0.1, 0.2, 0.3, 0.4, 0.5), 24 | outcomeCount = c(0, 1, 0, 1, 1) 25 | ) 26 | averagePrecision(prediction) 27 | } 28 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite PatientLevelPrediction in publications use:") 2 | 3 | bibentry(bibtype = "Article", 4 | author = c(as.person("J. M. Reps"), 5 | as.person("M. J. Schuemie"), 6 | as.person("M. A. Suchard"), 7 | as.person("P. B. Ryan"), 8 | as.person("P. Rijnbeek")), 9 | title = "Design and implementation of a standardized framework to generate and evaluate patient-level prediction models using observational healthcare data", 10 | journal = "Journal of the American Medical Informatics Association", 11 | volume = "25", 12 | number = "8", 13 | pages = "969-975", 14 | year = "2018", 15 | url = "https://doi.org/10.1093/jamia/ocy032" 16 | ) 17 | -------------------------------------------------------------------------------- /inst/shinyConfigUpdate.json: -------------------------------------------------------------------------------- 1 | { 2 | "shinyModules": [ 3 | { 4 | "id": "about", 5 | "tabName": "About", 6 | "tabText": "About", 7 | "shinyModulePackage": "OhdsiShinyModules", 8 | "uiFunction": "aboutViewer", 9 | "serverFunction": "aboutServer", 10 | "infoBoxFile": "aboutHelperFile()", 11 | "icon": "info", 12 | "order": 1 13 | }, 14 | { 15 | "id": "prediction", 16 | "tabName": "Prediction", 17 | "tabText": "Prediction", 18 | "shinyModulePackage": "OhdsiShinyModules", 19 | "uiFunction": "patientLevelPredictionViewer", 20 | "serverFunction": "patientLevelPredictionServer", 21 | "infoBoxFile": "patientLevelPredictionHelperFile()", 22 | "icon": "chart-line", 23 | "order": 2 24 | } 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /man/calibrationLine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/EvaluationSummary.R 3 | \name{calibrationLine} 4 | \alias{calibrationLine} 5 | \title{calibrationLine} 6 | \usage{ 7 | calibrationLine(prediction, numberOfStrata = 10) 8 | } 9 | \arguments{ 10 | \item{prediction}{A prediction object} 11 | 12 | \item{numberOfStrata}{The number of groups to split the prediction into} 13 | } 14 | \value{ 15 | A list containing the calibrationLine coefficients, the aggregate data used 16 | to fit the line and the Hosmer-Lemeshow goodness of fit test 17 | } 18 | \description{ 19 | calibrationLine 20 | } 21 | \examples{ 22 | prediction <- data.frame( 23 | value = c(0.1, 0.2, 0.3, 0.4, 0.5), 24 | outcomeCount = c(0, 1, 0, 1, 1)) 25 | calibrationLine(prediction, numberOfStrata = 1) 26 | } 27 | -------------------------------------------------------------------------------- /man/minMaxNormalize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FeatureEngineering.R 3 | \name{minMaxNormalize} 4 | \alias{minMaxNormalize} 5 | \title{A function that normalizes continous features to have values between 0 and 1} 6 | \usage{ 7 | minMaxNormalize(trainData, featureEngineeringSettings, done = FALSE) 8 | } 9 | \arguments{ 10 | \item{trainData}{The training data to be normalized} 11 | 12 | \item{featureEngineeringSettings}{The settings for the normalization} 13 | 14 | \item{done}{Whether the data has already been normalized (bool)} 15 | } 16 | \value{ 17 | The normalized data 18 | } 19 | \description{ 20 | A function that normalizes continous features to have values between 0 and 1 21 | } 22 | \details{ 23 | uses value - min / (max - min) to normalize the data 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /man/getPredictionDistribution_binary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PredictionDistribution.R 3 | \name{getPredictionDistribution_binary} 4 | \alias{getPredictionDistribution_binary} 5 | \title{Calculates the prediction distribution} 6 | \usage{ 7 | getPredictionDistribution_binary(prediction, evalColumn, ...) 8 | } 9 | \arguments{ 10 | \item{prediction}{A prediction object} 11 | 12 | \item{evalColumn}{A column that is used to stratify the results} 13 | 14 | \item{...}{Other inputs} 15 | } 16 | \value{ 17 | The 0.00, 0.1, 0.25, 0.5, 0.75, 0.9, 1.00 quantile pf the prediction, 18 | the mean and standard deviation per class 19 | } 20 | \description{ 21 | Calculates the prediction distribution 22 | } 23 | \details{ 24 | Calculates the quantiles from a predition object 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/removeRareFeatures.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FeatureEngineering.R 3 | \name{removeRareFeatures} 4 | \alias{removeRareFeatures} 5 | \title{A function that removes rare features from the data} 6 | \usage{ 7 | removeRareFeatures(trainData, featureEngineeringSettings, done = FALSE) 8 | } 9 | \arguments{ 10 | \item{trainData}{The data to be normalized} 11 | 12 | \item{featureEngineeringSettings}{The settings for the normalization} 13 | 14 | \item{done}{Whether to find and remove rare features or remove them only (bool)} 15 | } 16 | \value{ 17 | The data with rare features removed 18 | } 19 | \description{ 20 | A function that removes rare features from the data 21 | } 22 | \details{ 23 | removes features that are present in less than a certain fraction of the population 24 | } 25 | \keyword{internal} 26 | -------------------------------------------------------------------------------- /deploy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -o errexit -o nounset 3 | addToDrat(){ 4 | PKG_REPO=$PWD 5 | 6 | ## Build package tar ball 7 | export PKG_TARBALL=$(ls *.tar.gz) 8 | 9 | cd ..; mkdir drat; cd drat 10 | 11 | ## Set up Repo parameters 12 | git init 13 | git config user.name "Martijn Schuemie" 14 | git config user.email "schuemie@ohdsi.org" 15 | git config --global push.default simple 16 | 17 | ## Get drat repo 18 | git remote add upstream "https://$GH_TOKEN@github.com/OHDSI/drat.git" 19 | git fetch upstream 2>err.txt 20 | git checkout gh-pages 21 | 22 | ## Link to local R packages 23 | echo 'R_LIBS=~/Rlib' > .Renviron 24 | 25 | Rscript -e "drat::insertPackage('$PKG_REPO/$PKG_TARBALL', \ 26 | repodir = '.', \ 27 | commit='GitHub Actions release: $PKG_TARBALL run $GITHUB_RUN_ID')" 28 | git push 29 | 30 | } 31 | addToDrat 32 | -------------------------------------------------------------------------------- /man/setNaiveBayes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SklearnClassifierSettings.R 3 | \name{setNaiveBayes} 4 | \alias{setNaiveBayes} 5 | \title{Create setting for naive bayes model with python} 6 | \usage{ 7 | setNaiveBayes() 8 | } 9 | \value{ 10 | a modelSettings object 11 | } 12 | \description{ 13 | Create setting for naive bayes model with python 14 | } 15 | \examples{ 16 | \dontshow{ # dontrun reason: requires python's scikit-learn, checkSklearn() will error without it } 17 | \dontrun{ 18 | plpData <- getEunomiaPlpData() 19 | model <- setNaiveBayes() 20 | analysisId <- "naiveBayes" 21 | saveLocation <- file.path(tempdir(), analysisId) 22 | results <- runPlp(plpData, modelSettings = model, 23 | saveDirectory = saveLocation, 24 | analysisId = analysisId) 25 | # clean up 26 | unlink(saveLocation, recursive = TRUE) 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /man/simulationProfile.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PatientLevelPrediction.R 3 | \docType{data} 4 | \name{simulationProfile} 5 | \alias{simulationProfile} 6 | \title{A simulation profile for generating synthetic patient level prediction data} 7 | \format{ 8 | A data frame containing the following elements: 9 | \describe{ 10 | \item{covariatePrevalence}{prevalence of all covariates} 11 | \item{outcomeModels}{regression model parameters to simulate outcomes} 12 | \item{metaData}{settings used to simulate the profile} 13 | \item{covariateRef}{covariateIds and covariateNames} 14 | \item{timePrevalence}{time window} 15 | \item{exclusionPrevalence}{prevalence of exclusion of covariates} 16 | } 17 | } 18 | \usage{ 19 | data(simulationProfile) 20 | } 21 | \description{ 22 | A simulation profile for generating synthetic patient level prediction data 23 | } 24 | \keyword{datasets} 25 | -------------------------------------------------------------------------------- /man/createFeatureEngineeringSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FeatureEngineering.R 3 | \name{createFeatureEngineeringSettings} 4 | \alias{createFeatureEngineeringSettings} 5 | \title{Create the settings for defining any feature engineering that will be done} 6 | \usage{ 7 | createFeatureEngineeringSettings(type = "none") 8 | } 9 | \arguments{ 10 | \item{type}{(character) Choice of: \itemize{ 11 | \item'none' No feature engineering - this is the default 12 | }} 13 | } 14 | \value{ 15 | An object of class \code{featureEngineeringSettings} 16 | } 17 | \description{ 18 | Create the settings for defining any feature engineering that will be done 19 | } 20 | \details{ 21 | Returns an object of class \code{featureEngineeringSettings} that specifies the sampling function that will be called and the settings 22 | } 23 | \examples{ 24 | createFeatureEngineeringSettings(type = "none") 25 | } 26 | -------------------------------------------------------------------------------- /man/createSimpleImputer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Imputation.R 3 | \name{createSimpleImputer} 4 | \alias{createSimpleImputer} 5 | \title{Create Simple Imputer settings} 6 | \usage{ 7 | createSimpleImputer(method = "mean", missingThreshold = 0.3) 8 | } 9 | \arguments{ 10 | \item{method}{The method to use for imputation, either "mean" or "median"} 11 | 12 | \item{missingThreshold}{The threshold for missing values to be imputed vs removed} 13 | } 14 | \value{ 15 | The settings for the single imputer of class \code{featureEngineeringSettings} 16 | } 17 | \description{ 18 | This function creates the settings for a simple imputer 19 | which imputes missing values with the mean or median 20 | } 21 | \examples{ 22 | # create imputer to impute values with missingness less than 10\% using the median 23 | # of observed values 24 | createSimpleImputer(method = "median", missingThreshold = 0.10) 25 | } 26 | -------------------------------------------------------------------------------- /man/computeAuc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/EvaluationSummary.R 3 | \name{computeAuc} 4 | \alias{computeAuc} 5 | \title{Compute the area under the ROC curve} 6 | \usage{ 7 | computeAuc(prediction, confidenceInterval = FALSE) 8 | } 9 | \arguments{ 10 | \item{prediction}{A prediction object as generated using the 11 | \code{\link{predict}} functions.} 12 | 13 | \item{confidenceInterval}{Should 95 percebt confidence intervals be computed?} 14 | } 15 | \value{ 16 | A data.frame containing the AUC and optionally the 95\% confidence interval 17 | } 18 | \description{ 19 | Compute the area under the ROC curve 20 | } 21 | \details{ 22 | Computes the area under the ROC curve for the predicted probabilities, given the true observed 23 | outcomes. 24 | } 25 | \examples{ 26 | prediction <- data.frame( 27 | value = c(0.1, 0.2, 0.3, 0.4, 0.5), 28 | outcomeCount = c(0, 1, 0, 1, 1)) 29 | computeAuc(prediction) 30 | } 31 | -------------------------------------------------------------------------------- /man/loadPlpData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SaveLoadPlp.R 3 | \name{loadPlpData} 4 | \alias{loadPlpData} 5 | \title{Load the plpData from a folder} 6 | \usage{ 7 | loadPlpData(file, readOnly = TRUE) 8 | } 9 | \arguments{ 10 | \item{file}{The name of the folder containing the data.} 11 | 12 | \item{readOnly}{If true, the data is opened read only.} 13 | } 14 | \value{ 15 | An object of class plpData. 16 | } 17 | \description{ 18 | \code{loadPlpData} loads an object of type plpData from a folder in the file 19 | system. 20 | } 21 | \details{ 22 | The data will be written to a set of files in the folder specified by the user. 23 | } 24 | \examples{ 25 | data("simulationProfile") 26 | plpData <- simulatePlpData(simulationProfile, n = 500, seed = 42) 27 | saveLoc <- file.path(tempdir(), "loadPlpData") 28 | savePlpData(plpData, saveLoc) 29 | dir(saveLoc) 30 | # clean up 31 | unlink(saveLoc, recursive = TRUE) 32 | } 33 | -------------------------------------------------------------------------------- /man/migrateDataModel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DatabaseMigration.R 3 | \name{migrateDataModel} 4 | \alias{migrateDataModel} 5 | \title{Migrate Data model} 6 | \usage{ 7 | migrateDataModel(connectionDetails, databaseSchema, tablePrefix = "") 8 | } 9 | \arguments{ 10 | \item{connectionDetails}{DatabaseConnector connection details object} 11 | 12 | \item{databaseSchema}{String schema where database schema lives} 13 | 14 | \item{tablePrefix}{(Optional) Use if a table prefix is used before table names (e.g. "cd_")} 15 | } 16 | \value{ 17 | Nothing. Is called for side effects of migrating data model in the 18 | database 19 | } 20 | \description{ 21 | Migrate data from current state to next state 22 | 23 | It is strongly advised that you have a backup of all data (either sqlite files, a backup database (in the case you 24 | are using a postgres backend) or have kept the csv/zip files from your data generation. 25 | } 26 | -------------------------------------------------------------------------------- /man/ici.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/EvaluationSummary.R 3 | \name{ici} 4 | \alias{ici} 5 | \title{Calculate the Integrated Calibration Index from Austin and Steyerberg 6 | https://onlinelibrary.wiley.com/doi/full/10.1002/sim.8281} 7 | \usage{ 8 | ici(prediction) 9 | } 10 | \arguments{ 11 | \item{prediction}{the prediction object found in the plpResult object} 12 | } 13 | \value{ 14 | Integrated Calibration Index value or NULL if the calculation fails 15 | } 16 | \description{ 17 | Calculate the Integrated Calibration Index from Austin and Steyerberg 18 | https://onlinelibrary.wiley.com/doi/full/10.1002/sim.8281 19 | } 20 | \details{ 21 | Calculate the Integrated Calibration Index 22 | } 23 | \examples{ 24 | prediction <- data.frame(rowId = 1:100, 25 | outcomeCount = stats::rbinom(1:100, 1, prob=0.5), 26 | value = runif(100), 27 | evaluation = rep("Train", 100)) 28 | ici(prediction) 29 | } 30 | -------------------------------------------------------------------------------- /man/loadPlpResult.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SaveLoadPlp.R 3 | \name{loadPlpResult} 4 | \alias{loadPlpResult} 5 | \title{Loads the evalaution dataframe} 6 | \usage{ 7 | loadPlpResult(dirPath) 8 | } 9 | \arguments{ 10 | \item{dirPath}{The directory where the evaluation was saved} 11 | } 12 | \value{ 13 | \if{html}{\out{
}}\preformatted{ The runPlp object 14 | }\if{html}{\out{
}} 15 | } 16 | \description{ 17 | Loads the evalaution dataframe 18 | } 19 | \details{ 20 | Loads the evaluation 21 | } 22 | \examples{ 23 | \donttest{ \dontshow{ # takes too long } 24 | data("simulationProfile") 25 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 26 | saveLoc <- file.path(tempdir(), "loadPlpResult") 27 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 28 | savePlpResult(results, saveLoc) 29 | loadedResults <- loadPlpResult(saveLoc) 30 | # clean up 31 | unlink(saveLoc, recursive = TRUE) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /man/createRandomForestFeatureSelection.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FeatureEngineering.R 3 | \name{createRandomForestFeatureSelection} 4 | \alias{createRandomForestFeatureSelection} 5 | \title{Create the settings for random foreat based feature selection} 6 | \usage{ 7 | createRandomForestFeatureSelection(ntrees = 2000, maxDepth = 17) 8 | } 9 | \arguments{ 10 | \item{ntrees}{number of tree in forest} 11 | 12 | \item{maxDepth}{MAx depth of each tree} 13 | } 14 | \value{ 15 | An object of class \code{featureEngineeringSettings} 16 | } 17 | \description{ 18 | Create the settings for random foreat based feature selection 19 | } 20 | \details{ 21 | Returns an object of class \code{featureEngineeringSettings} that specifies the sampling function that will be called and the settings 22 | } 23 | \examples{ 24 | \dontshow{ # dontrun reason: requires python and scikit-learn } 25 | \dontrun{ #' featureSelector <- createRandomForestFeatureSelection(ntrees = 2000, maxDepth = 10) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /inst/shinyConfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "shinyModules": [ 3 | { 4 | "id": "about", 5 | "tabName": "About", 6 | "tabText": "About", 7 | "shinyModulePackage": "OhdsiShinyModules", 8 | "uiFunction": "aboutViewer", 9 | "serverFunction": "aboutServer", 10 | "databaseConnectionKeyService": null, 11 | "databaseConnectionKeyUsername": null, 12 | "infoBoxFile": "aboutHelperFile()", 13 | "icon": "info", 14 | "keyring": true, 15 | "order": 1 16 | }, 17 | { 18 | "id": "prediction", 19 | "tabName": "Prediction", 20 | "tabText": "Prediction", 21 | "shinyModulePackage": "OhdsiShinyModules", 22 | "uiFunction": "predictionViewer", 23 | "serverFunction": "predictionServer", 24 | "databaseConnectionKeyService": "resultDatabaseDetails", 25 | "databaseConnectionKeyUsername": "prediction", 26 | "infoBoxFile": "predictionHelperFile()", 27 | "icon": "chart-line", 28 | "keyring": false, 29 | "order": 2 30 | } 31 | ] 32 | } 33 | -------------------------------------------------------------------------------- /man/sklearnToJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SklearnToJson.R 3 | \name{sklearnToJson} 4 | \alias{sklearnToJson} 5 | \title{Saves sklearn python model object to json in path} 6 | \usage{ 7 | sklearnToJson(model, path) 8 | } 9 | \arguments{ 10 | \item{model}{a fitted sklearn python model object} 11 | 12 | \item{path}{path to the saved model file} 13 | } 14 | \value{ 15 | nothing, saves the model to the path as json 16 | } 17 | \description{ 18 | Saves sklearn python model object to json in path 19 | } 20 | \examples{ 21 | \dontshow{ # dontrun reason: requires python environment with sklearn } 22 | \dontrun{ 23 | sklearn <- reticulate::import("sklearn", convert = FALSE) 24 | model <- sklearn$tree$DecisionTreeClassifier() 25 | model$fit(sklearn$datasets$load_iris()$data, sklearn$datasets$load_iris()$target) 26 | saveLoc <- file.path(tempdir(), "model.json") 27 | sklearnToJson(model, saveLoc) 28 | # the model.json is saved in the tempdir 29 | dir(tempdir()) 30 | # clean up 31 | unlink(saveLoc) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /man/createSplineSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FeatureEngineering.R 3 | \name{createSplineSettings} 4 | \alias{createSplineSettings} 5 | \title{Create the settings for adding a spline for continuous variables} 6 | \usage{ 7 | createSplineSettings(continousCovariateId, knots, analysisId = 683) 8 | } 9 | \arguments{ 10 | \item{continousCovariateId}{The covariateId to apply splines to} 11 | 12 | \item{knots}{Either number of knots of vector of split values} 13 | 14 | \item{analysisId}{The analysisId to use for the spline covariates} 15 | } 16 | \value{ 17 | An object of class \code{featureEngineeringSettings} 18 | } 19 | \description{ 20 | Create the settings for adding a spline for continuous variables 21 | } 22 | \details{ 23 | Returns an object of class \code{featureEngineeringSettings} that specifies the sampling function that will be called and the settings 24 | } 25 | \examples{ 26 | # create splines for age (1002) with 5 knots 27 | createSplineSettings(continousCovariateId = 1002, knots = 5, analysisId = 683) 28 | } 29 | -------------------------------------------------------------------------------- /man/loadPrediction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SaveLoadPlp.R 3 | \name{loadPrediction} 4 | \alias{loadPrediction} 5 | \title{Loads the prediction dataframe to json} 6 | \usage{ 7 | loadPrediction(fileLocation) 8 | } 9 | \arguments{ 10 | \item{fileLocation}{The location with the saved prediction} 11 | } 12 | \value{ 13 | \if{html}{\out{
}}\preformatted{ The prediction data.frame 14 | }\if{html}{\out{
}} 15 | } 16 | \description{ 17 | Loads the prediction dataframe to json 18 | } 19 | \details{ 20 | Loads the prediciton json file 21 | } 22 | \examples{ 23 | \donttest{ \dontshow{ # takes too long } 24 | data("simulationProfile") 25 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 26 | saveLoc <- file.path(tempdir(), "loadPrediction") 27 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 28 | savePrediction(results$prediction, saveLoc) 29 | dir(saveLoc) 30 | loadedPrediction <- loadPrediction(file.path(saveLoc, "prediction.json")) 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /man/setPythonEnvironment.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/HelperFunctions.R 3 | \name{setPythonEnvironment} 4 | \alias{setPythonEnvironment} 5 | \title{Use the python environment created using configurePython()} 6 | \usage{ 7 | setPythonEnvironment(envname = "PLP", envtype = NULL) 8 | } 9 | \arguments{ 10 | \item{envname}{A string for the name of the virtual environment (default is 'PLP')} 11 | 12 | \item{envtype}{An option for specifying the environment as'conda' or 'python'. If NULL then the default is 'conda' for windows users and 'python' for non-windows users} 13 | } 14 | \value{ 15 | A string indicating the which python environment will be used 16 | } 17 | \description{ 18 | Use the python environment created using configurePython() 19 | } 20 | \details{ 21 | This function sets PatientLevelPrediction to use a python environment 22 | } 23 | \examples{ 24 | \dontshow{ # dontrun reason: don't modify environment in examples } 25 | \dontrun{ #' # create a conda environment named PLP 26 | configurePython(envname="PLP", envtype="conda") 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /tests/testthat/test-sklearnClassifierHelpers.R: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Observational Health Data Sciences and Informatics 2 | # 3 | # This file is part of PatientLevelPrediction 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | test_that("listCartesian works", { 18 | allList <- list(a = list(1, 2), b = list(NULL, "auto"), c = list(-1)) 19 | 20 | paramLists <- listCartesian(allList) 21 | 22 | expect_equal(length(paramLists), 2 * 2 * 1) 23 | expect_equal(names(paramLists[[1]]), c("a", "b", "c")) 24 | expect_equal(length(paramLists[[1]]), 3) 25 | }) 26 | -------------------------------------------------------------------------------- /man/PatientLevelPrediction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PatientLevelPrediction.R 3 | \docType{package} 4 | \name{PatientLevelPrediction} 5 | \alias{PatientLevelPrediction-package} 6 | \alias{PatientLevelPrediction} 7 | \title{PatientLevelPrediction} 8 | \description{ 9 | A package for running predictions using data in the OMOP CDM 10 | } 11 | \seealso{ 12 | Useful links: 13 | \itemize{ 14 | \item \url{https://ohdsi.github.io/PatientLevelPrediction/} 15 | \item \url{https://github.com/OHDSI/PatientLevelPrediction} 16 | \item Report bugs at \url{https://github.com/OHDSI/PatientLevelPrediction/issues} 17 | } 18 | 19 | } 20 | \author{ 21 | \strong{Maintainer}: Egill Fridgeirsson \email{e.fridgeirsson@erasmusmc.nl} 22 | 23 | Authors: 24 | \itemize{ 25 | \item Jenna Reps \email{jreps@its.jnj.com} 26 | \item Martijn Schuemie 27 | \item Marc Suchard 28 | \item Patrick Ryan 29 | \item Peter Rijnbeek 30 | } 31 | 32 | Other contributors: 33 | \itemize{ 34 | \item Observational Health Data Science and Informatics [copyright holder] 35 | } 36 | 37 | } 38 | \keyword{internal} 39 | -------------------------------------------------------------------------------- /man/modelBasedConcordance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/EvaluatePlp.R 3 | \name{modelBasedConcordance} 4 | \alias{modelBasedConcordance} 5 | \title{Calculate the model-based concordance, which is a calculation of the expected 6 | discrimination performance of a model under the assumption the model predicts 7 | the "TRUE" outcome as detailed in van Klaveren et al. 8 | https://pubmed.ncbi.nlm.nih.gov/27251001/} 9 | \usage{ 10 | modelBasedConcordance(prediction) 11 | } 12 | \arguments{ 13 | \item{prediction}{the prediction object found in the plpResult object} 14 | } 15 | \value{ 16 | The model-based concordance value 17 | } 18 | \description{ 19 | Calculate the model-based concordance, which is a calculation of the expected 20 | discrimination performance of a model under the assumption the model predicts 21 | the "TRUE" outcome as detailed in van Klaveren et al. 22 | https://pubmed.ncbi.nlm.nih.gov/27251001/ 23 | } 24 | \details{ 25 | Calculate the model-based concordance 26 | } 27 | \examples{ 28 | prediction <- data.frame(value = runif(100)) 29 | modelBasedConcordance(prediction) 30 | } 31 | -------------------------------------------------------------------------------- /man/loadPlpModel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SaveLoadPlp.R 3 | \name{loadPlpModel} 4 | \alias{loadPlpModel} 5 | \title{loads the plp model} 6 | \usage{ 7 | loadPlpModel(dirPath) 8 | } 9 | \arguments{ 10 | \item{dirPath}{The location of the model} 11 | } 12 | \value{ 13 | \if{html}{\out{
}}\preformatted{ The plpModel object 14 | }\if{html}{\out{
}} 15 | } 16 | \description{ 17 | loads the plp model 18 | } 19 | \details{ 20 | Loads a plp model that was saved using \code{savePlpModel()} 21 | } 22 | \examples{ 23 | \donttest{ \dontshow{ # takes too long } 24 | data("simulationProfile") 25 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 26 | saveLoc <- file.path(tempdir(), "loadPlpModel") 27 | plpResult <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 28 | savePlpModel(plpResult$model, file.path(saveLoc, "savedModel")) 29 | loadedModel <- loadPlpModel(file.path(saveLoc, "savedModel")) 30 | # show design of loaded model 31 | str(loadedModel$modelDesign) 32 | 33 | # clean up 34 | unlink(saveLoc, recursive = TRUE) 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /man/createValidationSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ExternalValidatePlp.R 3 | \name{createValidationSettings} 4 | \alias{createValidationSettings} 5 | \title{createValidationSettings define optional settings for performing external validation} 6 | \usage{ 7 | createValidationSettings(recalibrate = NULL, runCovariateSummary = TRUE) 8 | } 9 | \arguments{ 10 | \item{recalibrate}{A vector of characters specifying the recalibration method to apply} 11 | 12 | \item{runCovariateSummary}{Whether to run the covariate summary for the validation data} 13 | } 14 | \value{ 15 | A setting object of class \code{validationSettings} containing a list of settings for externalValidatePlp 16 | } 17 | \description{ 18 | This function creates the settings required by externalValidatePlp 19 | } 20 | \details{ 21 | Users need to specify whether they want to sample or recalibate when performing external validation 22 | } 23 | \examples{ 24 | # do weak recalibration and don't run covariate summary 25 | createValidationSettings(recalibrate = "weakRecalibration", 26 | runCovariateSummary = FALSE) 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/createUnivariateFeatureSelection.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FeatureEngineering.R 3 | \name{createUnivariateFeatureSelection} 4 | \alias{createUnivariateFeatureSelection} 5 | \title{Create the settings for defining any feature selection that will be done} 6 | \usage{ 7 | createUnivariateFeatureSelection(k = 100) 8 | } 9 | \arguments{ 10 | \item{k}{This function returns the K features most associated 11 | (univariately) to the outcome} 12 | } 13 | \value{ 14 | An object of class \code{featureEngineeringSettings} 15 | } 16 | \description{ 17 | Create the settings for defining any feature selection that will be done 18 | } 19 | \details{ 20 | Returns an object of class \code{featureEngineeringSettings} that specifies 21 | the function that will be called and the settings. Uses the scikit-learn 22 | SelectKBest function with chi2 for univariate feature selection. 23 | } 24 | \examples{ 25 | \dontshow{ # dontrun reason: requires python and scikit-learn } 26 | \dontrun{ #' # create a feature selection that selects the 100 most associated features 27 | featureSelector <- createUnivariateFeatureSelection(k = 100) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /man/simulatePlpData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Simulation.R 3 | \name{simulatePlpData} 4 | \alias{simulatePlpData} 5 | \title{Generate simulated data} 6 | \usage{ 7 | simulatePlpData(plpDataSimulationProfile, n = 10000, seed = NULL) 8 | } 9 | \arguments{ 10 | \item{plpDataSimulationProfile}{An object of type \code{plpDataSimulationProfile} as generated 11 | using the \cr\code{createplpDataSimulationProfile} function.} 12 | 13 | \item{n}{The size of the population to be generated.} 14 | 15 | \item{seed}{An optional seed for the random number generator. If provided} 16 | } 17 | \value{ 18 | An object of type \code{plpData}. 19 | } 20 | \description{ 21 | \code{simulateplpData} creates a plpData object with simulated data. 22 | } 23 | \details{ 24 | This function generates simulated data that is in many ways similar to the original data on which 25 | the simulation profile is based. 26 | } 27 | \examples{ 28 | # first load the simulation profile to use 29 | data("simulationProfile") 30 | # then generate the simulated data 31 | plpData <- simulatePlpData(simulationProfile, n = 100, seed = 42) 32 | nrow(plpData$cohorts) 33 | } 34 | -------------------------------------------------------------------------------- /man/computeGridPerformance.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SklearnClassifier.R 3 | \name{computeGridPerformance} 4 | \alias{computeGridPerformance} 5 | \title{Computes grid performance with a specified performance function} 6 | \usage{ 7 | computeGridPerformance(prediction, param, performanceFunct = "computeAuc") 8 | } 9 | \arguments{ 10 | \item{prediction}{a dataframe with predictions and outcomeCount per rowId} 11 | 12 | \item{param}{a list of hyperparameters} 13 | 14 | \item{performanceFunct}{a string specifying which performance function to use 15 | . Default \code{'compute_AUC'}} 16 | } 17 | \value{ 18 | A list with overview of the performance 19 | } 20 | \description{ 21 | Computes grid performance with a specified performance function 22 | } 23 | \examples{ 24 | prediction <- data.frame(rowId = c(1, 2, 3, 4, 5), 25 | outcomeCount = c(0, 1, 0, 1, 0), 26 | value = c(0.1, 0.9, 0.2, 0.8, 0.3), 27 | index = c(1, 1, 1, 1, 1)) 28 | param <- list(hyperParam1 = 5, hyperParam2 = 100) 29 | computeGridPerformance(prediction, param, performanceFunct = "computeAuc") 30 | } 31 | -------------------------------------------------------------------------------- /man/savePlpData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SaveLoadPlp.R 3 | \name{savePlpData} 4 | \alias{savePlpData} 5 | \title{Save the plpData to folder} 6 | \usage{ 7 | savePlpData(plpData, file, envir = NULL, overwrite = FALSE) 8 | } 9 | \arguments{ 10 | \item{plpData}{An object of type \code{plpData} as generated using 11 | \code{getPlpData}.} 12 | 13 | \item{file}{The name of the folder where the data will be written. The folder should 14 | not yet exist.} 15 | 16 | \item{envir}{The environment for to evaluate variables when saving} 17 | 18 | \item{overwrite}{Whether to force overwrite an existing file} 19 | } 20 | \value{ 21 | Called for its side effect, the data will be written to a set of files in the 22 | folder specified by the user. 23 | } 24 | \description{ 25 | \code{savePlpData} saves an object of type plpData to folder. 26 | } 27 | \examples{ 28 | data("simulationProfile") 29 | plpData <- simulatePlpData(simulationProfile, n = 500, seed = 42) 30 | saveLoc <- file.path(tempdir(), "savePlpData") 31 | savePlpData(plpData, saveLoc) 32 | dir(saveLoc, full.names = TRUE) 33 | 34 | # clean up 35 | unlink(saveLoc, recursive = TRUE) 36 | } 37 | -------------------------------------------------------------------------------- /man/getPredictionDistribution.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PredictionDistribution.R 3 | \name{getPredictionDistribution} 4 | \alias{getPredictionDistribution} 5 | \title{Calculates the prediction distribution} 6 | \usage{ 7 | getPredictionDistribution( 8 | prediction, 9 | predictionType = "binary", 10 | typeColumn = "evaluation" 11 | ) 12 | } 13 | \arguments{ 14 | \item{prediction}{A prediction object} 15 | 16 | \item{predictionType}{The type of prediction (binary or survival)} 17 | 18 | \item{typeColumn}{A column that is used to stratify the results} 19 | } 20 | \value{ 21 | The 0.00, 0.1, 0.25, 0.5, 0.75, 0.9, 1.00 quantile pf the prediction, 22 | the mean and standard deviation per class 23 | } 24 | \description{ 25 | Calculates the prediction distribution 26 | } 27 | \details{ 28 | Calculates the quantiles from a predition object 29 | } 30 | \examples{ 31 | prediction <- data.frame(rowId = 1:100, 32 | outcomeCount = stats::rbinom(1:100, 1, prob=0.5), 33 | value = runif(100), 34 | evaluation = rep("Train", 100)) 35 | getPredictionDistribution(prediction) 36 | } 37 | -------------------------------------------------------------------------------- /man/savePlpModel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SaveLoadPlp.R 3 | \name{savePlpModel} 4 | \alias{savePlpModel} 5 | \title{Saves the plp model} 6 | \usage{ 7 | savePlpModel(plpModel, dirPath) 8 | } 9 | \arguments{ 10 | \item{plpModel}{A trained classifier returned by running \code{runPlp()$model}} 11 | 12 | \item{dirPath}{A location to save the model to} 13 | } 14 | \value{ 15 | \if{html}{\out{
}}\preformatted{ The directory path where the model was saved 16 | }\if{html}{\out{
}} 17 | } 18 | \description{ 19 | Saves the plp model 20 | } 21 | \details{ 22 | Saves the plp model to a user specificed folder 23 | } 24 | \examples{ 25 | \donttest{ \dontshow{ # takes too long } 26 | data("simulationProfile") 27 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 28 | saveLoc <- file.path(tempdir(), "savePlpModel") 29 | plpResult <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 30 | path <- savePlpModel(plpResult$model, file.path(saveLoc, "savedModel")) 31 | # show the saved model 32 | dir(path, full.names = TRUE) 33 | 34 | # clean up 35 | unlink(saveLoc, recursive = TRUE) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /man/getThresholdSummary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ThresholdSummary.R 3 | \name{getThresholdSummary} 4 | \alias{getThresholdSummary} 5 | \title{Calculate all measures for sparse ROC} 6 | \usage{ 7 | getThresholdSummary( 8 | prediction, 9 | predictionType = "binary", 10 | typeColumn = "evaluation" 11 | ) 12 | } 13 | \arguments{ 14 | \item{prediction}{A prediction object} 15 | 16 | \item{predictionType}{The type of prediction (binary or survival)} 17 | 18 | \item{typeColumn}{A column that is used to stratify the results} 19 | } 20 | \value{ 21 | A data.frame with TP, FP, TN, FN, TPR, FPR, accuracy, PPF, FOR and Fmeasure 22 | } 23 | \description{ 24 | Calculate all measures for sparse ROC 25 | } 26 | \details{ 27 | Calculates the TP, FP, TN, FN, TPR, FPR, accuracy, PPF, FOR and Fmeasure 28 | from a prediction object 29 | } 30 | \examples{ 31 | prediction <- data.frame(rowId = 1:100, 32 | outcomeCount = stats::rbinom(1:100, 1, prob=0.5), 33 | value = runif(100), 34 | evaluation = rep("Train", 100)) 35 | summary <- getThresholdSummary(prediction) 36 | str(summary) 37 | } 38 | -------------------------------------------------------------------------------- /man/loadPlpShareable.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SaveLoadPlp.R 3 | \name{loadPlpShareable} 4 | \alias{loadPlpShareable} 5 | \title{Loads the plp result saved as json/csv files for transparent sharing} 6 | \usage{ 7 | loadPlpShareable(loadDirectory) 8 | } 9 | \arguments{ 10 | \item{loadDirectory}{The directory with the results as json/csv files} 11 | } 12 | \value{ 13 | \if{html}{\out{
}}\preformatted{ The runPlp object 14 | }\if{html}{\out{
}} 15 | } 16 | \description{ 17 | Loads the plp result saved as json/csv files for transparent sharing 18 | } 19 | \details{ 20 | Load the main results from json/csv files into a runPlp object 21 | } 22 | \examples{ 23 | \donttest{ \dontshow{ # takes too long } 24 | data("simulationProfile") 25 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 26 | saveLoc <- file.path(tempdir(), "loadPlpShareable") 27 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 28 | savePlpShareable(results, saveLoc) 29 | dir(saveLoc) 30 | loadedResults <- loadPlpShareable(saveLoc) 31 | # clean up 32 | unlink(saveLoc, recursive = TRUE) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /man/sklearnFromJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SklearnToJson.R 3 | \name{sklearnFromJson} 4 | \alias{sklearnFromJson} 5 | \title{Loads sklearn python model from json} 6 | \usage{ 7 | sklearnFromJson(path) 8 | } 9 | \arguments{ 10 | \item{path}{path to the model json file} 11 | } 12 | \value{ 13 | a sklearn python model object 14 | } 15 | \description{ 16 | Loads sklearn python model from json 17 | } 18 | \examples{ 19 | \dontshow{ # dontrun reason: requires python environment with sklearn } 20 | \dontrun{ 21 | plpData <- getEunomiaPlpData() 22 | modelSettings <- setDecisionTree(maxDepth = list(3), minSamplesSplit = list(2), 23 | minSamplesLeaf = list(1), maxFeatures = list(100)) 24 | saveLocation <- file.path(tempdir(), "sklearnFromJson") 25 | results <- runPlp(plpData, modelSettings = modelSettings, saveDirectory = saveLocation) 26 | # view save model 27 | dir(results$model$model, full.names = TRUE) 28 | # load into a sklearn object 29 | model <- sklearnFromJson(file.path(results$model$model, "model.json")) 30 | # max depth is 3 as we set in beginning 31 | model$max_depth 32 | # clean up 33 | unlink(saveLocation, recursive = TRUE) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /man/createExistingSplitSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DataSplitting.R 3 | \name{createExistingSplitSettings} 4 | \alias{createExistingSplitSettings} 5 | \title{Create the settings for defining how the plpData are split into 6 | test/validation/train sets using an existing split - good to use for 7 | reproducing results from a different run} 8 | \usage{ 9 | createExistingSplitSettings(splitIds) 10 | } 11 | \arguments{ 12 | \item{splitIds}{(data.frame) A data frame with rowId and index columns of 13 | type integer/numeric. Index is -1 for test set, positive integer for train 14 | set folds} 15 | } 16 | \value{ 17 | An object of class \code{splitSettings} 18 | } 19 | \description{ 20 | Create the settings for defining how the plpData are split into 21 | test/validation/train sets using an existing split - good to use for 22 | reproducing results from a different run 23 | } 24 | \examples{ 25 | # rowId 1 is in fold 1, rowId 2 is in fold 2, rowId 3 is in the test set 26 | # rowId 4 is in fold 1, rowId 5 is in fold 2 27 | createExistingSplitSettings(splitIds = data.frame(rowId = c(1, 2, 3, 4, 5), 28 | index = c(1, 2, -1, 1, 2))) 29 | } 30 | -------------------------------------------------------------------------------- /man/loadPlpAnalysesJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RunMultiplePlp.R 3 | \name{loadPlpAnalysesJson} 4 | \alias{loadPlpAnalysesJson} 5 | \title{Load the multiple prediction json settings from a file} 6 | \usage{ 7 | loadPlpAnalysesJson(jsonFileLocation) 8 | } 9 | \arguments{ 10 | \item{jsonFileLocation}{The location of the file 'predictionAnalysisList.json' with the modelDesignList} 11 | } 12 | \value{ 13 | A list with the modelDesignList and cohortDefinitions 14 | } 15 | \description{ 16 | Load the multiple prediction json settings from a file 17 | } 18 | \details{ 19 | This function interprets a json with the multiple prediction settings and creates a list 20 | that can be combined with connection settings to run a multiple prediction study 21 | } 22 | \examples{ 23 | modelDesign <- createModelDesign(targetId = 1, outcomeId = 2, 24 | modelSettings = setLassoLogisticRegression()) 25 | saveLoc <- file.path(tempdir(), "loadPlpAnalysesJson") 26 | savePlpAnalysesJson(modelDesignList = modelDesign, saveDirectory = saveLoc) 27 | loadPlpAnalysesJson(file.path(saveLoc, "predictionAnalysisList.json")) 28 | # clean use 29 | unlink(saveLoc, recursive = TRUE) 30 | } 31 | -------------------------------------------------------------------------------- /man/configurePython.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/HelperFunctions.R 3 | \name{configurePython} 4 | \alias{configurePython} 5 | \title{Sets up a python environment to use for PLP (can be conda or venv)} 6 | \usage{ 7 | configurePython(envname = "PLP", envtype = NULL, condaPythonVersion = "3.11") 8 | } 9 | \arguments{ 10 | \item{envname}{A string for the name of the virtual environment (default is 'PLP')} 11 | 12 | \item{envtype}{An option for specifying the environment as'conda' or 'python'. If NULL then the default is 'conda' for windows users and 'python' for non-windows users} 13 | 14 | \item{condaPythonVersion}{String, Python version to use when creating a conda environment} 15 | } 16 | \value{ 17 | location of the created conda or virtual python environment 18 | } 19 | \description{ 20 | Sets up a python environment to use for PLP (can be conda or venv) 21 | } 22 | \details{ 23 | This function creates a python environment that can be used by PatientLevelPrediction 24 | and installs all the required package dependancies. 25 | } 26 | \examples{ 27 | \dontshow{ # dontrun reason: don't modify environment in examples } 28 | \dontrun{ 29 | configurePython(envname="PLP", envtype="conda") 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /man/savePrediction.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SaveLoadPlp.R 3 | \name{savePrediction} 4 | \alias{savePrediction} 5 | \title{Saves the prediction dataframe to a json file} 6 | \usage{ 7 | savePrediction(prediction, dirPath, fileName = "prediction.json") 8 | } 9 | \arguments{ 10 | \item{prediction}{The prediciton data.frame} 11 | 12 | \item{dirPath}{The directory to save the prediction json} 13 | 14 | \item{fileName}{The name of the json file that will be saved} 15 | } 16 | \value{ 17 | \if{html}{\out{
}}\preformatted{ The file location where the prediction was saved 18 | }\if{html}{\out{
}} 19 | } 20 | \description{ 21 | Saves the prediction dataframe to a json file 22 | } 23 | \details{ 24 | Saves the prediction data frame returned by predict.R to an json file and 25 | returns the fileLocation where the prediction is saved 26 | } 27 | \examples{ 28 | prediction <- data.frame( 29 | rowIds = c(1, 2, 3), 30 | outcomeCount = c(0, 1, 0), 31 | value = c(0.1, 0.9, 0.2) 32 | ) 33 | saveLoc <- file.path(tempdir()) 34 | savePrediction(prediction, saveLoc) 35 | dir(saveLoc) 36 | 37 | # clean up 38 | unlink(file.path(saveLoc, "prediction.json")) 39 | } 40 | -------------------------------------------------------------------------------- /man/predictGlm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Glm.R 3 | \name{predictGlm} 4 | \alias{predictGlm} 5 | \title{predict using a logistic regression model} 6 | \usage{ 7 | predictGlm(plpModel, data, cohort) 8 | } 9 | \arguments{ 10 | \item{plpModel}{An object of type \code{plpModel} - a patient level 11 | prediction model} 12 | 13 | \item{data}{An object of type \code{plpData} - the patient level prediction 14 | data extracted from the CDM.} 15 | 16 | \item{cohort}{The population dataframe created using 17 | \code{createStudyPopulation} who will have their risks predicted or a cohort 18 | without the outcome known} 19 | } 20 | \value{ 21 | A dataframe containing the prediction for each person in the 22 | population 23 | } 24 | \description{ 25 | Predict risk with a given plpModel containing a generalized linear model. 26 | } 27 | \examples{ 28 | coefficients <- data.frame( 29 | covariateId = c(1002), 30 | coefficient = c(0.05)) 31 | model <- createGlmModel(coefficients, intercept = -2.5) 32 | data("simulationProfile") 33 | plpData <- simulatePlpData(simulationProfile, n = 50, seed = 42) 34 | prediction <- predictGlm(model, plpData, plpData$cohorts) 35 | # see the predicted risk values 36 | head(prediction) 37 | } 38 | -------------------------------------------------------------------------------- /man/createStratifiedImputationSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FeatureEngineering.R 3 | \name{createStratifiedImputationSettings} 4 | \alias{createStratifiedImputationSettings} 5 | \title{Create the settings for using stratified imputation.} 6 | \usage{ 7 | createStratifiedImputationSettings(covariateId, ageSplits = NULL) 8 | } 9 | \arguments{ 10 | \item{covariateId}{The covariateId that needs imputed values} 11 | 12 | \item{ageSplits}{A vector of age splits in years to create age groups} 13 | } 14 | \value{ 15 | An object of class \code{featureEngineeringSettings} 16 | } 17 | \description{ 18 | Create the settings for using stratified imputation. 19 | } 20 | \details{ 21 | Returns an object of class \code{featureEngineeringSettings} that specifies 22 | how to do stratified imputation. This function splits the covariate into 23 | age groups and fits splines to the covariate within each age group. The spline 24 | values are then used to impute missing values. 25 | } 26 | \examples{ 27 | # create a stratified imputation settings for covariate 1050 with age splits 28 | # at 50 and 70 29 | stratifiedImputationSettings <- 30 | createStratifiedImputationSettings(covariateId = 1050, ageSplits = c(50, 70)) 31 | } 32 | -------------------------------------------------------------------------------- /man/plotPlp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{plotPlp} 4 | \alias{plotPlp} 5 | \title{Plot all the PatientLevelPrediction plots} 6 | \usage{ 7 | plotPlp(plpResult, saveLocation = NULL, typeColumn = "evaluation") 8 | } 9 | \arguments{ 10 | \item{plpResult}{Object returned by the runPlp() function} 11 | 12 | \item{saveLocation}{Name of the directory where the plots should be saved (NULL means no saving)} 13 | 14 | \item{typeColumn}{The name of the column specifying the evaluation type 15 | (to stratify the plots)} 16 | } 17 | \value{ 18 | TRUE if it ran, plots are saved in the specified directory 19 | } 20 | \description{ 21 | Plot all the PatientLevelPrediction plots 22 | } 23 | \details{ 24 | Create a directory with all the plots 25 | } 26 | \examples{ 27 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf} 28 | \donttest{ \dontshow{ # takes too long } 29 | data("simulationProfile") 30 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 31 | saveLoc <- file.path(tempdir(), "plotPlp") 32 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 33 | plotPlp(results) 34 | # clean up 35 | unlink(saveLoc, recursive = TRUE) 36 | } 37 | \dontshow{\}) # examplesIf} 38 | } 39 | -------------------------------------------------------------------------------- /tests/testthat/helper-expectations.R: -------------------------------------------------------------------------------- 1 | # common tests that can be grouped together, such as testing the output from fitplp 2 | expect_correct_fitPlp <- function(plpModel, trainData, testLocation = TRUE) { 3 | outcomeId <- 3 4 | # predictions are same amount as labels 5 | multiplicativeFactor <- dplyr::n_distinct(plpModel$prediction %>% 6 | dplyr::pull(.data$evaluationType)) 7 | expect_equal(NROW(trainData$labels) * multiplicativeFactor, NROW(plpModel$prediction)) 8 | 9 | # predictions are all between 0 and 1 10 | expect_true(all((plpModel$prediction$value >= 0) & 11 | (plpModel$prediction$value <= 1))) 12 | 13 | # model directory exists 14 | if (testLocation) { 15 | expect_true(dir.exists(plpModel$model)) 16 | } 17 | 18 | expect_equal(plpModel$modelDesign$outcomeId, outcomeId) 19 | expect_equal(plpModel$modelDesign$targetId, 1) 20 | 21 | # structure of plpModel is correct 22 | expect_equal(names(plpModel), c( 23 | "model", "preprocessing", "prediction", 24 | "modelDesign", "trainDetails", "covariateImportance" 25 | )) 26 | } 27 | 28 | expect_correct_predictions <- function(predictions, testData) { 29 | # predictions are all between 0 and 1 30 | expect_true(all((predictions$value >= 0) & (predictions$value <= 1))) 31 | expect_equal(NROW(testData$labels), NROW(predictions)) 32 | } 33 | -------------------------------------------------------------------------------- /man/savePlpResult.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SaveLoadPlp.R 3 | \name{savePlpResult} 4 | \alias{savePlpResult} 5 | \title{Saves the result from runPlp into the location directory} 6 | \usage{ 7 | savePlpResult(result, dirPath) 8 | } 9 | \arguments{ 10 | \item{result}{The result of running runPlp()} 11 | 12 | \item{dirPath}{The directory to save the csv} 13 | } 14 | \value{ 15 | \if{html}{\out{
}}\preformatted{ The directory path where the results were saved 16 | }\if{html}{\out{
}} 17 | } 18 | \description{ 19 | Saves the result from runPlp into the location directory 20 | } 21 | \details{ 22 | Saves the result from runPlp into the location directory 23 | } 24 | \examples{ 25 | \donttest{ \dontshow{ # takes too long } 26 | data("simulationProfile") 27 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 28 | saveLoc <- file.path(tempdir(), "savePlpResult") 29 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 30 | # save the results 31 | newSaveLoc <- file.path(tempdir(), "savePlpResult", "saved") 32 | savePlpResult(results, newSaveLoc) 33 | # show the saved results 34 | dir(newSaveLoc, recursive = TRUE, full.names = TRUE) 35 | 36 | # clean up 37 | unlink(saveLoc, recursive = TRUE) 38 | unlink(newSaveLoc, recursive = TRUE) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /man/MapIds.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Formatting.R 3 | \name{MapIds} 4 | \alias{MapIds} 5 | \title{Map covariate and row Ids so they start from 1} 6 | \usage{ 7 | MapIds(covariateData, cohort = NULL, mapping = NULL) 8 | } 9 | \arguments{ 10 | \item{covariateData}{a covariateData object} 11 | 12 | \item{cohort}{if specified rowIds restricted to the ones in cohort} 13 | 14 | \item{mapping}{A pre defined mapping to use} 15 | } 16 | \value{ 17 | a new \code{covariateData} object with remapped covariate and row ids 18 | } 19 | \description{ 20 | this functions takes covariate data and a cohort/population and remaps 21 | the covariate and row ids, restricts to pop and saves/creates mapping 22 | } 23 | \examples{ 24 | covariateData <- Andromeda::andromeda( 25 | covariates = data.frame(rowId = c(1, 3, 5, 7, 9), 26 | covariateId = c(10, 20, 10, 10, 20), 27 | covariateValue = c(1, 1, 1, 1, 1)), 28 | covariateRef = data.frame(covariateId = c(10, 20), 29 | covariateNames = c("covariateA", 30 | "covariateB"), 31 | analysisId = c(1, 1))) 32 | mappedData <- MapIds(covariateData) 33 | # columnId and rowId are now starting from 1 and are consecutive 34 | mappedData$covariates 35 | } 36 | -------------------------------------------------------------------------------- /R/SklearnClassifierHelpers.R: -------------------------------------------------------------------------------- 1 | # @file SklearnClassifierHelpers.R 2 | # 3 | # Copyright 2022 Observational Health Data Sciences and Informatics 4 | # 5 | # This file is part of PatientLevelPrediction 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | #' Cartesian product 20 | #' 21 | #' Computes the Cartesian product of all the combinations of elements in a list 22 | #' 23 | #' @param allList a list of lists 24 | #' @return A list with all possible combinations from the input list of lists 25 | #' @examples 26 | #' listCartesian(list(list(1, 2), list(3, 4))) 27 | #' @export 28 | listCartesian <- function(allList) { 29 | combinations <- expand.grid(allList, stringsAsFactors = FALSE) 30 | results <- lapply(seq_len(nrow(combinations)), 31 | function(i) lapply(combinations, function(x) x[i][[1]])) 32 | return(results) 33 | } 34 | -------------------------------------------------------------------------------- /man/predictPlp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Predict.R 3 | \name{predictPlp} 4 | \alias{predictPlp} 5 | \title{predictPlp} 6 | \usage{ 7 | predictPlp(plpModel, plpData, population, timepoint) 8 | } 9 | \arguments{ 10 | \item{plpModel}{An object of type \code{plpModel} - a patient level prediction model} 11 | 12 | \item{plpData}{An object of type \code{plpData} - the patient level prediction 13 | data extracted from the CDM.} 14 | 15 | \item{population}{The population created using createStudyPopulation() who will have their risks predicted or a cohort without the outcome known} 16 | 17 | \item{timepoint}{The timepoint to predict risk (survival models only)} 18 | } 19 | \value{ 20 | A data frame containing the predicted risk values 21 | } 22 | \description{ 23 | Predict the risk of the outcome using the input plpModel for the input plpData 24 | } 25 | \details{ 26 | The function applied the trained model on the plpData to make predictions 27 | } 28 | \examples{ 29 | coefficients <- data.frame( 30 | covariateId = c(1002), 31 | coefficient = c(0.05) 32 | ) 33 | model <- createGlmModel(coefficients, intercept = -2.5) 34 | data("simulationProfile") 35 | plpData <- simulatePlpData(simulationProfile, n = 50, seed = 42) 36 | prediction <- predictPlp(model, plpData, plpData$cohorts) 37 | # see the predicted risk values 38 | head(prediction) 39 | } 40 | -------------------------------------------------------------------------------- /man/createNormalizer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FeatureEngineering.R 3 | \name{createNormalizer} 4 | \alias{createNormalizer} 5 | \title{Create the settings for normalizing the data @param type The type of normalization to use, either "minmax" or "robust"} 6 | \usage{ 7 | createNormalizer(type = "minmax", settings = list()) 8 | } 9 | \arguments{ 10 | \item{type}{The type of normalization to use, either "minmax" or "robust"} 11 | 12 | \item{settings}{A list of settings for the normalization. 13 | For robust normalization, the settings list can contain a boolean value for 14 | clip, which clips the values to be between -3 and 3 after normalization. See 15 | https://arxiv.org/abs/2407.04491} 16 | } 17 | \value{ 18 | An object of class \code{featureEngineeringSettings} 19 | 20 | An object of class \code{featureEngineeringSettings}' 21 | } 22 | \description{ 23 | Create the settings for normalizing the data @param type The type of normalization to use, either "minmax" or "robust" 24 | } 25 | \examples{ 26 | # create a minmax normalizer that normalizes the data between 0 and 1 27 | normalizer <- createNormalizer(type = "minmax") 28 | # create a robust normalizer that normalizes the data by the interquartile range 29 | # and squeezes the values to be between -3 and 3 30 | normalizer <- createNormalizer(type = "robust", settings = list(clip = TRUE)) 31 | } 32 | -------------------------------------------------------------------------------- /man/createExecuteSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RunPlpHelpers.R 3 | \name{createExecuteSettings} 4 | \alias{createExecuteSettings} 5 | \title{Creates list of settings specifying what parts of runPlp to execute} 6 | \usage{ 7 | createExecuteSettings( 8 | runSplitData = FALSE, 9 | runSampleData = FALSE, 10 | runFeatureEngineering = FALSE, 11 | runPreprocessData = FALSE, 12 | runModelDevelopment = FALSE, 13 | runCovariateSummary = FALSE 14 | ) 15 | } 16 | \arguments{ 17 | \item{runSplitData}{TRUE or FALSE whether to split data into train/test} 18 | 19 | \item{runSampleData}{TRUE or FALSE whether to over or under sample} 20 | 21 | \item{runFeatureEngineering}{TRUE or FALSE whether to do feature engineering} 22 | 23 | \item{runPreprocessData}{TRUE or FALSE whether to do preprocessing} 24 | 25 | \item{runModelDevelopment}{TRUE or FALSE whether to develop the model} 26 | 27 | \item{runCovariateSummary}{TRUE or FALSE whether to create covariate summary} 28 | } 29 | \value{ 30 | list with TRUE/FALSE for each part of runPlp 31 | } 32 | \description{ 33 | Creates list of settings specifying what parts of runPlp to execute 34 | } 35 | \details{ 36 | define what parts of runPlp to execute 37 | } 38 | \examples{ 39 | # create settings with only split and model development 40 | createExecuteSettings(runSplitData = TRUE, runModelDevelopment = TRUE) 41 | } 42 | -------------------------------------------------------------------------------- /.settings/org.eclipse.cdt.managedbuilder.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/CPATH/delimiter=; 3 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/CPATH/operation=remove 4 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/CPLUS_INCLUDE_PATH/delimiter=; 5 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/CPLUS_INCLUDE_PATH/operation=remove 6 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/C_INCLUDE_PATH/delimiter=; 7 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/C_INCLUDE_PATH/operation=remove 8 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/append=true 9 | environment/buildEnvironmentInclude/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/appendContributed=true 10 | environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/LIBRARY_PATH/delimiter=; 11 | environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/LIBRARY_PATH/operation=remove 12 | environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/append=true 13 | environment/buildEnvironmentLibrary/cdt.managedbuild.config.gnu.mingw.exe.debug.350870109/appendContributed=true 14 | -------------------------------------------------------------------------------- /man/viewPlp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ViewShinyPlp.R 3 | \name{viewPlp} 4 | \alias{viewPlp} 5 | \title{viewPlp - Interactively view the performance and model settings} 6 | \usage{ 7 | viewPlp(runPlp, validatePlp = NULL, diagnosePlp = NULL) 8 | } 9 | \arguments{ 10 | \item{runPlp}{The output of runPlp() (an object of class 'runPlp')} 11 | 12 | \item{validatePlp}{The output of externalValidatePlp (on object of class 'validatePlp')} 13 | 14 | \item{diagnosePlp}{The output of diagnosePlp()} 15 | } 16 | \value{ 17 | Opens a shiny app for interactively viewing the results 18 | } 19 | \description{ 20 | This is a shiny app for viewing interactive plots of the performance and the settings 21 | } 22 | \details{ 23 | Either the result of runPlp and view the plots 24 | } 25 | \examples{ 26 | \dontshow{if (rlang::is_interactive() && rlang::is_installed("OhdsiShinyAppBuilder") ) withAutoprint(\{ # examplesIf} 27 | \donttest{ \dontshow{ # takes too long } 28 | data("simulationProfile") 29 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 30 | saveLoc <- file.path(tempdir(), "viewPlp", "development") 31 | results <- runPlp(plpData, saveDirectory = saveLoc) 32 | # view result files 33 | dir(saveLoc, recursive = TRUE) 34 | # open shiny app 35 | viewPlp(results) 36 | # clean up, shiny app can't be opened after the following has been run 37 | unlink(saveLoc, recursive = TRUE) 38 | } 39 | \dontshow{\}) # examplesIf} 40 | } 41 | -------------------------------------------------------------------------------- /man/createPreprocessSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PreprocessingData.R 3 | \name{createPreprocessSettings} 4 | \alias{createPreprocessSettings} 5 | \title{Create the settings for preprocessing the trainData.} 6 | \usage{ 7 | createPreprocessSettings( 8 | minFraction = 0.001, 9 | normalize = TRUE, 10 | removeRedundancy = TRUE 11 | ) 12 | } 13 | \arguments{ 14 | \item{minFraction}{The minimum fraction of target population who must have a 15 | covariate for it to be included in the model training} 16 | 17 | \item{normalize}{Whether to normalise the covariates before training 18 | (Default: TRUE)} 19 | 20 | \item{removeRedundancy}{Whether to remove redundant features (Default: TRUE) 21 | Redundant features are features that within an analysisId together cover all 22 | observations. For example with ageGroups, if you have ageGroup 0-18 and 18-100 23 | and all patients are in one of these groups, then one of these groups is redundant.} 24 | } 25 | \value{ 26 | An object of class \code{preprocessingSettings} 27 | } 28 | \description{ 29 | Create the settings for preprocessing the trainData. 30 | } 31 | \details{ 32 | Returns an object of class \code{preprocessingSettings} that specifies how to 33 | preprocess the training data 34 | } 35 | \examples{ 36 | # Create the settings for preprocessing, remove no features, normalise the data 37 | createPreprocessSettings(minFraction = 0.0, normalize = TRUE, removeRedundancy = FALSE) 38 | } 39 | -------------------------------------------------------------------------------- /man/predictCyclops.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CyclopsModels.R 3 | \name{predictCyclops} 4 | \alias{predictCyclops} 5 | \title{Create predictive probabilities} 6 | \usage{ 7 | predictCyclops(plpModel, data, cohort) 8 | } 9 | \arguments{ 10 | \item{plpModel}{An object of type \code{predictiveModel} as generated using 11 | \code{\link{fitPlp}}.} 12 | 13 | \item{data}{The new plpData containing the covariateData for the new population} 14 | 15 | \item{cohort}{The cohort to calculate the prediction for} 16 | } 17 | \value{ 18 | The value column in the result data.frame is: logistic: probabilities of the outcome, poisson: 19 | Poisson rate (per day) of the outome, survival: hazard rate (per day) of the outcome. 20 | } 21 | \description{ 22 | Create predictive probabilities 23 | } 24 | \details{ 25 | Generates predictions for the population specified in plpData given the model. 26 | } 27 | \examples{ 28 | \donttest{ \dontshow{ # takes too long } 29 | data("simulationProfile") 30 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 31 | population <- createStudyPopulation(plpData, outcomeId = 3) 32 | data <- splitData(plpData, population) 33 | plpModel <- fitPlp(data$Train, modelSettings = setLassoLogisticRegression(seed = 42), 34 | analysisId = "test", analysisPath = NULL) 35 | prediction <- predictCyclops(plpModel, data$Test, data$Test$labels) 36 | # view prediction dataframe 37 | head(prediction) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /man/savePlpAnalysesJson.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RunMultiplePlp.R 3 | \name{savePlpAnalysesJson} 4 | \alias{savePlpAnalysesJson} 5 | \title{Save the modelDesignList to a json file} 6 | \usage{ 7 | savePlpAnalysesJson( 8 | modelDesignList = list(createModelDesign(targetId = 1, outcomeId = 2, modelSettings = 9 | setLassoLogisticRegression()), createModelDesign(targetId = 1, outcomeId = 3, 10 | modelSettings = setLassoLogisticRegression())), 11 | cohortDefinitions = NULL, 12 | saveDirectory = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{modelDesignList}{A list of modelDesigns created using \code{createModelDesign()}} 17 | 18 | \item{cohortDefinitions}{A list of the cohortDefinitions (generally extracted from ATLAS)} 19 | 20 | \item{saveDirectory}{The directory to save the modelDesignList settings} 21 | } 22 | \value{ 23 | The json string of the ModelDesignList 24 | } 25 | \description{ 26 | Save the modelDesignList to a json file 27 | } 28 | \details{ 29 | This function creates a json file with the modelDesignList saved 30 | } 31 | \examples{ 32 | modelDesign <- createModelDesign(targetId = 1, 33 | outcomeId = 2, 34 | modelSettings = setLassoLogisticRegression()) 35 | saveLoc <- file.path(tempdir(), "loadPlpAnalysesJson") 36 | jsonFile <- savePlpAnalysesJson(modelDesignList = modelDesign, saveDirectory = saveLoc) 37 | # clean up 38 | unlink(saveLoc, recursive = TRUE) 39 | } 40 | -------------------------------------------------------------------------------- /man/setCoxModel.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CyclopsSettings.R 3 | \name{setCoxModel} 4 | \alias{setCoxModel} 5 | \title{Create setting for lasso Cox model} 6 | \usage{ 7 | setCoxModel( 8 | variance = 0.01, 9 | seed = NULL, 10 | includeCovariateIds = c(), 11 | noShrinkage = c(), 12 | threads = -1, 13 | upperLimit = 20, 14 | lowerLimit = 0.01, 15 | tolerance = 2e-07, 16 | maxIterations = 3000 17 | ) 18 | } 19 | \arguments{ 20 | \item{variance}{Numeric: prior distribution starting variance} 21 | 22 | \item{seed}{An option to add a seed when training the model} 23 | 24 | \item{includeCovariateIds}{a set of covariate IDS to limit the analysis to} 25 | 26 | \item{noShrinkage}{a set of covariates whcih are to be forced to be included in the final model. default is the intercept} 27 | 28 | \item{threads}{An option to set number of threads when training model} 29 | 30 | \item{upperLimit}{Numeric: Upper prior variance limit for grid-search} 31 | 32 | \item{lowerLimit}{Numeric: Lower prior variance limit for grid-search} 33 | 34 | \item{tolerance}{Numeric: maximum relative change in convergence criterion from successive iterations to achieve convergence} 35 | 36 | \item{maxIterations}{Integer: maximum iterations of Cyclops to attempt before returning a failed-to-converge error} 37 | } 38 | \value{ 39 | \code{modelSettings} object 40 | } 41 | \description{ 42 | Create setting for lasso Cox model 43 | } 44 | \examples{ 45 | coxL1 <- setCoxModel() 46 | } 47 | -------------------------------------------------------------------------------- /man/setAdaBoost.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SklearnClassifierSettings.R 3 | \name{setAdaBoost} 4 | \alias{setAdaBoost} 5 | \title{Create setting for AdaBoost with python DecisionTreeClassifier base estimator} 6 | \usage{ 7 | setAdaBoost( 8 | nEstimators = list(10, 50, 200), 9 | learningRate = list(1, 0.5, 0.1), 10 | algorithm = list("SAMME"), 11 | seed = sample(1e+06, 1) 12 | ) 13 | } 14 | \arguments{ 15 | \item{nEstimators}{(list) The maximum number of estimators at which boosting is terminated. In case of perfect fit, the learning procedure is stopped early.} 16 | 17 | \item{learningRate}{(list) Weight applied to each classifier at each boosting iteration. A higher learning rate increases the contribution of each classifier. There is a trade-off between the learningRate and nEstimators parameters 18 | There is a trade-off between learningRate and nEstimators.} 19 | 20 | \item{algorithm}{Only ‘SAMME’ can be provided. The 'algorithm' argument will be deprecated in scikit-learn 1.8.} 21 | 22 | \item{seed}{A seed for the model} 23 | } 24 | \value{ 25 | a modelSettings object 26 | } 27 | \description{ 28 | Create setting for AdaBoost with python DecisionTreeClassifier base estimator 29 | } 30 | \examples{ 31 | \dontshow{ # dontrun reason: requires python's scikit-learn, checkSklearn() will error without it } 32 | \dontrun{ 33 | model <- setAdaBoost(nEstimators = list(10), 34 | learningRate = list(0.1), 35 | seed = 42) 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /man/createIterativeImputer.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Imputation.R 3 | \name{createIterativeImputer} 4 | \alias{createIterativeImputer} 5 | \title{Create Iterative Imputer settings} 6 | \usage{ 7 | createIterativeImputer( 8 | missingThreshold = 0.3, 9 | method = "pmm", 10 | methodSettings = list(pmm = list(k = 5, iterations = 5)) 11 | ) 12 | } 13 | \arguments{ 14 | \item{missingThreshold}{The threshold for missing values to remove a feature} 15 | 16 | \item{method}{The method to use for imputation, currently only "pmm" is supported} 17 | 18 | \item{methodSettings}{A list of settings for the imputation method to use. 19 | Currently only "pmm" is supported with the following settings: 20 | \itemize{ 21 | \item k: The number of donors to use for matching 22 | \item iterations: The number of iterations to use for imputation 23 | }} 24 | } 25 | \value{ 26 | The settings for the iterative imputer of class \code{featureEngineeringSettings} 27 | } 28 | \description{ 29 | This function creates the settings for an iterative imputer 30 | which first removes features with more than \code{missingThreshold} missing values 31 | and then imputes the missing values iteratively using chained equations 32 | } 33 | \examples{ 34 | # create imputer to impute values with missingness less than 30\% using 35 | # predictive mean matching in 5 iterations with 5 donors 36 | createIterativeImputer(missingThreshold = 0.3, method = "pmm", 37 | methodSettings = list(pmm = list(k = 5, iterations = 5))) 38 | } 39 | -------------------------------------------------------------------------------- /man/plotVariableScatterplot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{plotVariableScatterplot} 4 | \alias{plotVariableScatterplot} 5 | \title{Plot the variable importance scatterplot} 6 | \usage{ 7 | plotVariableScatterplot( 8 | covariateSummary, 9 | saveLocation = NULL, 10 | fileName = "VariableScatterplot.png" 11 | ) 12 | } 13 | \arguments{ 14 | \item{covariateSummary}{A prediction object as generated using the 15 | \code{\link{runPlp}} function.} 16 | 17 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)} 18 | 19 | \item{fileName}{Name of the file to save to plot, for example 20 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for 21 | supported file formats.} 22 | } 23 | \value{ 24 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different 25 | format. 26 | } 27 | \description{ 28 | Plot the variable importance scatterplot 29 | } 30 | \details{ 31 | Create a plot showing the variable importance scatterplot 32 | #' 33 | } 34 | \examples{ 35 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf} 36 | \donttest{ \dontshow{ # takes too long } 37 | data("simulationProfile") 38 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 39 | saveLoc <- file.path(tempdir(), "plotVariableScatterplot") 40 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 41 | plotVariableScatterplot(results$covariateSummary) 42 | # clean up 43 | } 44 | \dontshow{\}) # examplesIf} 45 | } 46 | -------------------------------------------------------------------------------- /man/recalibratePlpRefit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Recalibration.R 3 | \name{recalibratePlpRefit} 4 | \alias{recalibratePlpRefit} 5 | \title{recalibratePlpRefit} 6 | \usage{ 7 | recalibratePlpRefit(plpModel, newPopulation, newData, returnModel = FALSE) 8 | } 9 | \arguments{ 10 | \item{plpModel}{The trained plpModel (runPlp$model)} 11 | 12 | \item{newPopulation}{The population created using createStudyPopulation() who will have their risks predicted} 13 | 14 | \item{newData}{An object of type \code{plpData} - the patient level prediction 15 | data extracted from the CDM.} 16 | 17 | \item{returnModel}{Logical: return the refitted model} 18 | } 19 | \value{ 20 | An prediction dataframe with the predictions of the recalibrated model added 21 | } 22 | \description{ 23 | Recalibrating a model by refitting it 24 | } 25 | \examples{ 26 | \donttest{ \dontshow{ # takes too long } 27 | data("simulationProfile") 28 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 29 | saveLoc <- file.path(tempdir(), "recalibratePlpRefit") 30 | plpResults <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 31 | newData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 32 | newPopulation <- createStudyPopulation(newData, outcomeId = 3) 33 | predictions <- recalibratePlpRefit(plpModel = plpResults$model, 34 | newPopulation = newPopulation, 35 | newData = newData) 36 | # clean up 37 | unlink(saveLoc, recursive = TRUE) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /man/createRareFeatureRemover.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FeatureEngineering.R 3 | \name{createRareFeatureRemover} 4 | \alias{createRareFeatureRemover} 5 | \title{Create the settings for removing rare features} 6 | \usage{ 7 | createRareFeatureRemover(threshold = 0.001) 8 | } 9 | \arguments{ 10 | \item{threshold}{The minimum fraction of the training data that must have a 11 | feature for it to be included} 12 | } 13 | \value{ 14 | An object of class \code{featureEngineeringSettings} 15 | } 16 | \description{ 17 | Create the settings for removing rare features 18 | } 19 | \examples{ 20 | \dontshow{if (rlang::is_installed("Eunomia") && rlang::is_installed("curl") && curl::has_internet()) withAutoprint(\{ # examplesIf} 21 | \donttest{ \dontshow{ # takes too long } 22 | # create a rare feature remover that removes features that are present in less 23 | # than 1\% of the population 24 | rareFeatureRemover <- createRareFeatureRemover(threshold = 0.01) 25 | plpData <- getEunomiaPlpData() 26 | analysisId <- "rareFeatureRemover" 27 | saveLocation <- file.path(tempdir(), analysisId) 28 | results <- runPlp( 29 | plpData = plpData, 30 | featureEngineeringSettings = rareFeatureRemover, 31 | outcomeId = 3, 32 | executeSettings = createExecuteSettings( 33 | runModelDevelopment = TRUE, 34 | runSplitData = TRUE, 35 | runFeatureEngineering = TRUE), 36 | saveDirectory = saveLocation, 37 | analysisId = analysisId) 38 | # clean up 39 | unlink(saveLocation, recursive = TRUE) 40 | } 41 | \dontshow{\}) # examplesIf} 42 | } 43 | -------------------------------------------------------------------------------- /tests/testthat/test-helperfunctions.R: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Observational Health Data Sciences and Informatics 2 | # 3 | # This file is part of PatientLevelPrediction 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # how to test checkPlpInstallation? 17 | 18 | 19 | test_that("createTempModelLoc", { 20 | expect_equal(class(PatientLevelPrediction:::createTempModelLoc()), "character") 21 | }) 22 | 23 | list1 <- list(a = 1:2, b = 5:6) 24 | list2 <- list(c = 1:5) 25 | test_that("listAppend", { 26 | expect_equal(length(listAppend(list1, list2)), 3) 27 | }) 28 | 29 | # how to test configurePython? 30 | 31 | test_that("setPythonEnvironment", { 32 | skip_if_not_installed("reticulate") 33 | skip_on_cran() 34 | expect_error(setPythonEnvironment(envname = "madeup34343")) 35 | }) 36 | 37 | test_that("Borrowed cut2", { 38 | x <- c(1, rep(2, 2), rep(4, 4), rep(5, 5), rep(6, 6)) 39 | groups <- PatientLevelPrediction:::cut2(x, g = 3) 40 | expect_true( 41 | all(levels(groups) == c("[1,5)", "5", "6")) 42 | ) 43 | }) 44 | 45 | # getOs test? 46 | -------------------------------------------------------------------------------- /man/plotSparseCalibration.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{plotSparseCalibration} 4 | \alias{plotSparseCalibration} 5 | \title{Plot the calibration} 6 | \usage{ 7 | plotSparseCalibration( 8 | plpResult, 9 | typeColumn = "evaluation", 10 | saveLocation = NULL, 11 | fileName = "roc.png" 12 | ) 13 | } 14 | \arguments{ 15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.} 16 | 17 | \item{typeColumn}{The name of the column specifying the evaluation type} 18 | 19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)} 20 | 21 | \item{fileName}{Name of the file to save to plot, for example 22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for 23 | supported file formats.} 24 | } 25 | \value{ 26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different 27 | format. 28 | } 29 | \description{ 30 | Plot the calibration 31 | } 32 | \details{ 33 | Create a plot showing the calibration 34 | #' 35 | } 36 | \examples{ 37 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf} 38 | \donttest{ \dontshow{ # takes too long } 39 | data("simulationProfile") 40 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 41 | saveLoc <- file.path(tempdir(), "plotSparseCalibration") 42 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 43 | plotSparseCalibration(results) 44 | # clean up 45 | unlink(saveLoc, recursive = TRUE) 46 | } 47 | \dontshow{\}) # examplesIf} 48 | } 49 | -------------------------------------------------------------------------------- /man/preprocessData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PreprocessingData.R 3 | \name{preprocessData} 4 | \alias{preprocessData} 5 | \title{A function that wraps around FeatureExtraction::tidyCovariateData to normalise 6 | the data and remove rare or redundant features} 7 | \usage{ 8 | preprocessData(covariateData, preprocessSettings = createPreprocessSettings()) 9 | } 10 | \arguments{ 11 | \item{covariateData}{The covariate part of the training data created by \code{splitData} after being sampled and having 12 | any required feature engineering} 13 | 14 | \item{preprocessSettings}{The settings for the preprocessing created by \code{createPreprocessSettings} 15 | The data processed} 16 | } 17 | \value{ 18 | The covariateData object with the processed covariates 19 | } 20 | \description{ 21 | A function that wraps around FeatureExtraction::tidyCovariateData to normalise 22 | the data and remove rare or redundant features 23 | } 24 | \details{ 25 | Returns an object of class \code{covariateData} that has been processed. 26 | This includes normalising the data and removing rare or redundant features. 27 | Redundant features are features that within an analysisId together cover 28 | all obervations. 29 | } 30 | \examples{ 31 | library(dplyr) 32 | data("simulationProfile") 33 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 34 | preProcessedData <- preprocessData(plpData$covariateData, createPreprocessSettings()) 35 | # check age is normalized by max value 36 | preProcessedData$covariates \%>\% dplyr::filter(.data$covariateId == 1002) 37 | } 38 | -------------------------------------------------------------------------------- /man/savePlpShareable.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SaveLoadPlp.R 3 | \name{savePlpShareable} 4 | \alias{savePlpShareable} 5 | \title{Save the plp result as json files and csv files for transparent sharing} 6 | \usage{ 7 | savePlpShareable(result, saveDirectory, minCellCount = 10) 8 | } 9 | \arguments{ 10 | \item{result}{An object of class runPlp with development or validation results} 11 | 12 | \item{saveDirectory}{The directory the save the results as csv files} 13 | 14 | \item{minCellCount}{Minimum cell count for the covariateSummary and certain evaluation results} 15 | } 16 | \value{ 17 | \if{html}{\out{
}}\preformatted{ The directory path where the results were saved 18 | }\if{html}{\out{
}} 19 | } 20 | \description{ 21 | Save the plp result as json files and csv files for transparent sharing 22 | } 23 | \details{ 24 | Saves the main results json/csv files (these files can be read by the shiny app) 25 | } 26 | \examples{ 27 | \donttest{ \dontshow{ # takes too long } 28 | data("simulationProfile") 29 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 30 | saveLoc <- file.path(tempdir(), "savePlpShareable") 31 | results <- runPlp(plpData, saveDirectory = saveLoc) 32 | newSaveLoc <- file.path(tempdir(), "savePlpShareable", "saved") 33 | path <- savePlpShareable(results, newSaveLoc) 34 | # show the saved result 35 | dir(newSaveLoc, full.names = TRUE, recursive = TRUE) 36 | 37 | # clean up 38 | unlink(saveLoc, recursive = TRUE) 39 | unlink(newSaveLoc, recursive = TRUE) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /man/createLogSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Logging.R 3 | \name{createLogSettings} 4 | \alias{createLogSettings} 5 | \title{Create the settings for logging the progression of the analysis} 6 | \usage{ 7 | createLogSettings( 8 | verbosity = "DEBUG", 9 | timeStamp = TRUE, 10 | logName = "runPlp Log" 11 | ) 12 | } 13 | \arguments{ 14 | \item{verbosity}{Sets the level of the verbosity. If the log level is at or higher in priority than the logger threshold, a message will print. The levels are: 15 | \itemize{ 16 | \item DEBUG Highest verbosity showing all debug statements 17 | \item TRACE Showing information about start and end of steps 18 | \item INFO Show informative information (Default) 19 | \item WARN Show warning messages 20 | \item ERROR Show error messages 21 | \item FATAL Be silent except for fatal errors 22 | }} 23 | 24 | \item{timeStamp}{If TRUE a timestamp will be added to each logging statement. Automatically switched on for TRACE level.} 25 | 26 | \item{logName}{A string reference for the logger} 27 | } 28 | \value{ 29 | An object of class \code{logSettings} containing the settings for the logger 30 | } 31 | \description{ 32 | Create the settings for logging the progression of the analysis 33 | } 34 | \details{ 35 | Returns an object of class \code{logSettings} that specifies the logger settings 36 | } 37 | \examples{ 38 | # create a log settings object with DENUG verbosity, timestamp and log name 39 | # "runPlp Log". This needs to be passed to `runPlp`. 40 | createLogSettings(verbosity = "DEBUG", timeStamp = TRUE, logName = "runPlp Log") 41 | } 42 | -------------------------------------------------------------------------------- /man/plotSparseCalibration2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{plotSparseCalibration2} 4 | \alias{plotSparseCalibration2} 5 | \title{Plot the conventional calibration} 6 | \usage{ 7 | plotSparseCalibration2( 8 | plpResult, 9 | typeColumn = "evaluation", 10 | saveLocation = NULL, 11 | fileName = "roc.png" 12 | ) 13 | } 14 | \arguments{ 15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.} 16 | 17 | \item{typeColumn}{The name of the column specifying the evaluation type} 18 | 19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)} 20 | 21 | \item{fileName}{Name of the file to save to plot, for example 22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for 23 | supported file formats.} 24 | } 25 | \value{ 26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different 27 | format. 28 | } 29 | \description{ 30 | Plot the conventional calibration 31 | } 32 | \details{ 33 | Create a plot showing the calibration 34 | #' 35 | } 36 | \examples{ 37 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf} 38 | \donttest{ \dontshow{ # takes too long } 39 | data("simulationProfile") 40 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 41 | saveLoc <- file.path(tempdir(), "plotSparseCalibration2") 42 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 43 | plotSparseCalibration2(results) 44 | # clean up 45 | unlink(saveLoc, recursive = TRUE) 46 | } 47 | \dontshow{\}) # examplesIf} 48 | } 49 | -------------------------------------------------------------------------------- /man/robustNormalize.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FeatureEngineering.R 3 | \name{robustNormalize} 4 | \alias{robustNormalize} 5 | \title{A function that normalizes continous by the interquartile range and 6 | optionally forces the resulting values to be between -3 and 3 with 7 | f(x) = x / sqrt(1 + (x/3)^2) 8 | '@details uses (value - median) / iqr to normalize the data and then can 9 | applies the function f(x) = x / sqrt(1 + (x/3)^2) to the normalized values. 10 | This forces the values to be between -3 and 3 while preserving the relative 11 | ordering of the values. 12 | based on https://arxiv.org/abs/2407.04491 for more details} 13 | \usage{ 14 | robustNormalize(trainData, featureEngineeringSettings, done = FALSE) 15 | } 16 | \arguments{ 17 | \item{trainData}{The training data to be normalized} 18 | 19 | \item{featureEngineeringSettings}{The settings for the normalization} 20 | 21 | \item{done}{Whether the data has already been normalized (bool)} 22 | } 23 | \value{ 24 | The \code{trainData} object with normalized data 25 | } 26 | \description{ 27 | A function that normalizes continous by the interquartile range and 28 | optionally forces the resulting values to be between -3 and 3 with 29 | f(x) = x / sqrt(1 + (x/3)^2) 30 | '@details uses (value - median) / iqr to normalize the data and then can 31 | applies the function f(x) = x / sqrt(1 + (x/3)^2) to the normalized values. 32 | This forces the values to be between -3 and 3 while preserving the relative 33 | ordering of the values. 34 | based on https://arxiv.org/abs/2407.04491 for more details 35 | } 36 | \keyword{internal} 37 | -------------------------------------------------------------------------------- /man/plotSparseRoc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{plotSparseRoc} 4 | \alias{plotSparseRoc} 5 | \title{Plot the ROC curve using the sparse thresholdSummary data frame} 6 | \usage{ 7 | plotSparseRoc( 8 | plpResult, 9 | typeColumn = "evaluation", 10 | saveLocation = NULL, 11 | fileName = "roc.png" 12 | ) 13 | } 14 | \arguments{ 15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.} 16 | 17 | \item{typeColumn}{The name of the column specifying the evaluation type} 18 | 19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)} 20 | 21 | \item{fileName}{Name of the file to save to plot, for example 22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for 23 | supported file formats.} 24 | } 25 | \value{ 26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different 27 | format. 28 | } 29 | \description{ 30 | Plot the ROC curve using the sparse thresholdSummary data frame 31 | } 32 | \details{ 33 | Create a plot showing the Receiver Operator Characteristics (ROC) curve. 34 | } 35 | \examples{ 36 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf} 37 | \donttest{ \dontshow{ # takes too long } 38 | data("simulationProfile") 39 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 40 | saveLoc <- file.path(tempdir(), "plotSparseRoc") 41 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 42 | plotSparseRoc(results) 43 | # clean up 44 | unlink(saveLoc, recursive = TRUE) 45 | } 46 | \dontshow{\}) # examplesIf} 47 | } 48 | -------------------------------------------------------------------------------- /man/setIterativeHardThresholding.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CyclopsSettings.R 3 | \name{setIterativeHardThresholding} 4 | \alias{setIterativeHardThresholding} 5 | \title{Create setting for Iterative Hard Thresholding model} 6 | \usage{ 7 | setIterativeHardThresholding( 8 | K = 10, 9 | penalty = "bic", 10 | seed = sample(1e+05, 1), 11 | exclude = c(), 12 | forceIntercept = FALSE, 13 | fitBestSubset = FALSE, 14 | initialRidgeVariance = 0.1, 15 | tolerance = 1e-08, 16 | maxIterations = 10000, 17 | threshold = 1e-06, 18 | delta = 0 19 | ) 20 | } 21 | \arguments{ 22 | \item{K}{The maximum number of non-zero predictors} 23 | 24 | \item{penalty}{Specifies the IHT penalty; possible values are \code{BIC} or \code{AIC} or a numeric value} 25 | 26 | \item{seed}{An option to add a seed when training the model} 27 | 28 | \item{exclude}{A vector of numbers or covariateId names to exclude from prior} 29 | 30 | \item{forceIntercept}{Logical: Force intercept coefficient into regularization} 31 | 32 | \item{fitBestSubset}{Logical: Fit final subset with no regularization} 33 | 34 | \item{initialRidgeVariance}{integer} 35 | 36 | \item{tolerance}{numeric} 37 | 38 | \item{maxIterations}{integer} 39 | 40 | \item{threshold}{numeric} 41 | 42 | \item{delta}{numeric} 43 | } 44 | \value{ 45 | \code{modelSettings} object 46 | } 47 | \description{ 48 | Create setting for Iterative Hard Thresholding model 49 | } 50 | \examples{ 51 | \dontshow{if (rlang::is_installed("IterativeHardThresholding")) withAutoprint(\{ # examplesIf} 52 | modelIht <- setIterativeHardThresholding(K = 5, seed = 42) 53 | \dontshow{\}) # examplesIf} 54 | } 55 | -------------------------------------------------------------------------------- /man/plotDemographicSummary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{plotDemographicSummary} 4 | \alias{plotDemographicSummary} 5 | \title{Plot the Observed vs. expected incidence, by age and gender} 6 | \usage{ 7 | plotDemographicSummary( 8 | plpResult, 9 | typeColumn = "evaluation", 10 | saveLocation = NULL, 11 | fileName = "roc.png" 12 | ) 13 | } 14 | \arguments{ 15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.} 16 | 17 | \item{typeColumn}{The name of the column specifying the evaluation type} 18 | 19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)} 20 | 21 | \item{fileName}{Name of the file to save to plot, for example 22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for 23 | supported file formats.} 24 | } 25 | \value{ 26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different 27 | format. 28 | } 29 | \description{ 30 | Plot the Observed vs. expected incidence, by age and gender 31 | } 32 | \details{ 33 | Create a plot showing the Observed vs. expected incidence, by age and gender 34 | #' 35 | } 36 | \examples{ 37 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf} 38 | \donttest{ 39 | data("simulationProfile") 40 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 41 | saveLoc <- file.path(tempdir(), "plotDemographicSummary") 42 | plpResult <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 43 | plotDemographicSummary(plpResult) 44 | # clean up 45 | unlink(saveLoc, recursive = TRUE) 46 | } 47 | \dontshow{\}) # examplesIf} 48 | } 49 | -------------------------------------------------------------------------------- /demo/SingleModelDemo.R: -------------------------------------------------------------------------------- 1 | # This demo will run a logistic regression model on simulated data and will show the Shiny App 2 | library(PatientLevelPrediction) 3 | devAskNewPage(ask = FALSE) 4 | 5 | ### Simulated data from a database profile 6 | set.seed(1234) 7 | data(plpDataSimulationProfile) 8 | sampleSize <- 2000 9 | plpData <- simulatePlpData(plpDataSimulationProfile, n = sampleSize, seed = 42) 10 | 11 | ### Define the study population 12 | populationSettings <- createStudyPopulationSettings( 13 | binary = TRUE, 14 | firstExposureOnly = FALSE, 15 | washoutPeriod = 0, 16 | removeSubjectsWithPriorOutcome = FALSE, 17 | priorOutcomeLookback = 99999, 18 | requireTimeAtRisk = TRUE, 19 | minTimeAtRisk = 0, 20 | riskWindowStart = 0, 21 | startAnchor = 'cohort start', 22 | riskWindowEnd = 365, 23 | endAnchor = 'cohort start' 24 | ) 25 | 26 | ### Regularised logistic regression 27 | lr_model <- setLassoLogisticRegression() 28 | lr_results <- runPlp( 29 | plpData = plpData, 30 | outcomeId = 2, 31 | analysisId = 'demo', 32 | analysisName = 'run plp demo', 33 | populationSettings = populationSettings, 34 | splitSettings = createDefaultSplitSetting( 35 | type = "time", 36 | testFraction = 0.25, 37 | nfold = 2 38 | ), 39 | sampleSettings = createSampleSettings(), 40 | preprocessSettings = createPreprocessSettings( 41 | minFraction = 0, 42 | normalize = T 43 | ), 44 | modelSettings = lr_model, 45 | executeSettings = createDefaultExecuteSettings(), 46 | saveDirectory = "./plpdemo" 47 | ) 48 | 49 | 50 | ### Have a look at the results object. 51 | 52 | ### You can start the Shiny App by using this command now: 53 | ### viewPlp(lr_results) 54 | -------------------------------------------------------------------------------- /man/plotF1Measure.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{plotF1Measure} 4 | \alias{plotF1Measure} 5 | \title{Plot the F1 measure efficiency frontier using the sparse thresholdSummary data frame} 6 | \usage{ 7 | plotF1Measure( 8 | plpResult, 9 | typeColumn = "evaluation", 10 | saveLocation = NULL, 11 | fileName = "roc.png" 12 | ) 13 | } 14 | \arguments{ 15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.} 16 | 17 | \item{typeColumn}{The name of the column specifying the evaluation type} 18 | 19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)} 20 | 21 | \item{fileName}{Name of the file to save to plot, for example 22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for 23 | supported file formats.} 24 | } 25 | \value{ 26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different 27 | format. 28 | } 29 | \description{ 30 | Plot the F1 measure efficiency frontier using the sparse thresholdSummary data frame 31 | } 32 | \details{ 33 | Create a plot showing the F1 measure efficiency frontier using the sparse thresholdSummary data frame 34 | } 35 | \examples{ 36 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf} 37 | \donttest{ \dontshow{ # takes too long } 38 | data("simulationProfile") 39 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 40 | saveLoc <- file.path(tempdir(), "plotF1Measure") 41 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 42 | plotF1Measure(results) 43 | # clean up 44 | unlink(saveLoc, recursive = TRUE) 45 | } 46 | \dontshow{\}) # examplesIf} 47 | } 48 | -------------------------------------------------------------------------------- /man/plotPrecisionRecall.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{plotPrecisionRecall} 4 | \alias{plotPrecisionRecall} 5 | \title{Plot the precision-recall curve using the sparse thresholdSummary data frame} 6 | \usage{ 7 | plotPrecisionRecall( 8 | plpResult, 9 | typeColumn = "evaluation", 10 | saveLocation = NULL, 11 | fileName = "roc.png" 12 | ) 13 | } 14 | \arguments{ 15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.} 16 | 17 | \item{typeColumn}{The name of the column specifying the evaluation type} 18 | 19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)} 20 | 21 | \item{fileName}{Name of the file to save to plot, for example 22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for 23 | supported file formats.} 24 | } 25 | \value{ 26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different 27 | format. 28 | } 29 | \description{ 30 | Plot the precision-recall curve using the sparse thresholdSummary data frame 31 | } 32 | \details{ 33 | Create a plot showing the precision-recall curve using the sparse thresholdSummary data frame 34 | } 35 | \examples{ 36 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf} 37 | \donttest{ \dontshow{ # takes too long } 38 | data("simulationProfile") 39 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 40 | saveLoc <- file.path(tempdir(), "plotPrecisionRecall") 41 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 42 | plotPrecisionRecall(results) 43 | # clean up 44 | unlink(saveLoc, recursive = TRUE) 45 | } 46 | \dontshow{\}) # examplesIf} 47 | } 48 | -------------------------------------------------------------------------------- /.github/workflows/pkgdown.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, develop] 6 | release: 7 | types: [published] 8 | workflow_dispatch: 9 | 10 | name: pkgdown 11 | 12 | jobs: 13 | pkgdown: 14 | runs-on: ubuntu-latest 15 | # Only restrict concurrency for non-PR jobs 16 | concurrency: 17 | group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} 18 | env: 19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 20 | steps: 21 | - uses: actions/checkout@v4 22 | 23 | - uses: r-lib/actions/setup-pandoc@v2 24 | 25 | - uses: r-lib/actions/setup-r@v2 26 | with: 27 | use-public-rspm: true 28 | 29 | - uses: r-lib/actions/setup-r-dependencies@v2 30 | with: 31 | cache: always 32 | extra-packages: any::pkgdown, ohdsi/OhdsiRTools 33 | needs: website 34 | 35 | - uses: lycheeverse/lychee-action@v2 36 | with: 37 | args: --base . --verbose --no-progress --accept '100..=103, 200..=299, 403, 429' './**/*.md' './**/*.Rmd' 38 | 39 | - name: Build site 40 | run: Rscript -e 'pkgdown::build_site_github_pages(new_process = FALSE, install = TRUE)' 41 | 42 | - name: Fix Hades Logo 43 | run: Rscript -e 'OhdsiRTools::fixHadesLogo()' 44 | 45 | - name: Deploy to GitHub pages 🚀 46 | if: github.event_name != 'pull_request' 47 | uses: JamesIves/github-pages-deploy-action@v4 48 | with: 49 | clean: false 50 | branch: gh-pages 51 | folder: docs 52 | -------------------------------------------------------------------------------- /R/PatientLevelPrediction.R: -------------------------------------------------------------------------------- 1 | # @file PatientLevelPrediction.R 2 | # 3 | # Copyright 2025 Observational Health Data Sciences and Informatics 4 | # 5 | # This file is part of PatientLevelPrediction 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | 19 | #' PatientLevelPrediction 20 | #' 21 | #' @description A package for running predictions using data in the OMOP CDM 22 | #' 23 | #' @name PatientLevelPrediction 24 | #' @keywords internal 25 | #' @importFrom dplyr %>% 26 | #' @importFrom rlang .data 27 | "_PACKAGE" 28 | 29 | #' A simulation profile for generating synthetic patient level prediction data 30 | #' @docType data 31 | #' @keywords datasets 32 | #' @name simulationProfile 33 | #' @format A data frame containing the following elements: 34 | #' \describe{ 35 | #' \item{covariatePrevalence}{prevalence of all covariates} 36 | #' \item{outcomeModels}{regression model parameters to simulate outcomes} 37 | #' \item{metaData}{settings used to simulate the profile} 38 | #' \item{covariateRef}{covariateIds and covariateNames} 39 | #' \item{timePrevalence}{time window} 40 | #' \item{exclusionPrevalence}{prevalence of exclusion of covariates} 41 | #' } 42 | #' @usage 43 | #' data(simulationProfile) 44 | NULL 45 | -------------------------------------------------------------------------------- /man/createSampleSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Sampling.R 3 | \name{createSampleSettings} 4 | \alias{createSampleSettings} 5 | \title{Create the settings for defining how the trainData from \code{splitData} are sampled using 6 | default sample functions.} 7 | \usage{ 8 | createSampleSettings( 9 | type = "none", 10 | numberOutcomestoNonOutcomes = 1, 11 | sampleSeed = sample(10000, 1) 12 | ) 13 | } 14 | \arguments{ 15 | \item{type}{(character) Choice of: \itemize{ 16 | \item 'none' No sampling is applied - this is the default 17 | \item 'underSample' Undersample the non-outcome class to make the data more balanced 18 | \item 'overSample' Oversample the outcome class by adding in each outcome multiple times 19 | }} 20 | 21 | \item{numberOutcomestoNonOutcomes}{(numeric) A numeric specifying the required number of outcomes per non-outcomes} 22 | 23 | \item{sampleSeed}{(numeric) A seed to use when splitting the data for reproducibility (if not set a random number will be generated)} 24 | } 25 | \value{ 26 | An object of class \code{sampleSettings} 27 | } 28 | \description{ 29 | Create the settings for defining how the trainData from \code{splitData} are sampled using 30 | default sample functions. 31 | } 32 | \details{ 33 | Returns an object of class \code{sampleSettings} that specifies the sampling function that will be called and the settings 34 | } 35 | \examples{ 36 | \dontshow{if (rlang::is_installed("Eunomia")) withAutoprint(\{ # examplesIf} 37 | \donttest{ 38 | # sample even rate of outcomes to non-outcomes 39 | sampleSetting <- createSampleSettings( 40 | type = "underSample", 41 | numberOutcomestoNonOutcomes = 1, 42 | sampleSeed = 42 43 | ) 44 | } 45 | \dontshow{\}) # examplesIf} 46 | } 47 | -------------------------------------------------------------------------------- /man/plotPredictionDistribution.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{plotPredictionDistribution} 4 | \alias{plotPredictionDistribution} 5 | \title{Plot the side-by-side boxplots of prediction distribution, by class} 6 | \usage{ 7 | plotPredictionDistribution( 8 | plpResult, 9 | typeColumn = "evaluation", 10 | saveLocation = NULL, 11 | fileName = "PredictionDistribution.png" 12 | ) 13 | } 14 | \arguments{ 15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.} 16 | 17 | \item{typeColumn}{The name of the column specifying the evaluation type} 18 | 19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)} 20 | 21 | \item{fileName}{Name of the file to save to plot, for example 22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for 23 | supported file formats.} 24 | } 25 | \value{ 26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different 27 | format. 28 | } 29 | \description{ 30 | Plot the side-by-side boxplots of prediction distribution, by class 31 | } 32 | \details{ 33 | Create a plot showing the side-by-side boxplots of prediction distribution, by class 34 | #' 35 | } 36 | \examples{ 37 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf} 38 | \donttest{ \dontshow{ # takes too long } 39 | data("simulationProfile") 40 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 41 | saveLoc <- file.path(tempdir(), "plotPredictionDistribution") 42 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 43 | plotPredictionDistribution(results) 44 | # clean up 45 | unlink(saveLoc, recursive = TRUE) 46 | } 47 | \dontshow{\}) # examplesIf} 48 | } 49 | -------------------------------------------------------------------------------- /man/plotPredictedPDF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{plotPredictedPDF} 4 | \alias{plotPredictedPDF} 5 | \title{Plot the Predicted probability density function, showing prediction overlap between true and false cases} 6 | \usage{ 7 | plotPredictedPDF( 8 | plpResult, 9 | typeColumn = "evaluation", 10 | saveLocation = NULL, 11 | fileName = "PredictedPDF.png" 12 | ) 13 | } 14 | \arguments{ 15 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.} 16 | 17 | \item{typeColumn}{The name of the column specifying the evaluation type} 18 | 19 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)} 20 | 21 | \item{fileName}{Name of the file to save to plot, for example 22 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for 23 | supported file formats.} 24 | } 25 | \value{ 26 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different 27 | format. 28 | } 29 | \description{ 30 | Plot the Predicted probability density function, showing prediction overlap between true and false cases 31 | } 32 | \details{ 33 | Create a plot showing the predicted probability density function, showing prediction overlap between true and false cases 34 | } 35 | \examples{ 36 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf} 37 | \donttest{ \dontshow{ # takes too long } 38 | data("simulationProfile") 39 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 40 | saveLoc <- file.path(tempdir(), "plotPredictedPDF") 41 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 42 | plotPredictedPDF(results) 43 | # clean up 44 | unlink(saveLoc, recursive = TRUE) 45 | } 46 | \dontshow{\}) # examplesIf} 47 | } 48 | -------------------------------------------------------------------------------- /man/createValidationDesign.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ExternalValidatePlp.R 3 | \name{createValidationDesign} 4 | \alias{createValidationDesign} 5 | \title{createValidationDesign - Define the validation design for external validation} 6 | \usage{ 7 | createValidationDesign( 8 | targetId, 9 | outcomeId, 10 | populationSettings = NULL, 11 | restrictPlpDataSettings = NULL, 12 | plpModelList, 13 | recalibrate = NULL, 14 | runCovariateSummary = TRUE 15 | ) 16 | } 17 | \arguments{ 18 | \item{targetId}{The targetId of the target cohort to validate on} 19 | 20 | \item{outcomeId}{The outcomeId of the outcome cohort to validate on} 21 | 22 | \item{populationSettings}{A list of population restriction settings created 23 | by \code{createPopulationSettings}. Default is NULL and then this is taken 24 | from the model} 25 | 26 | \item{restrictPlpDataSettings}{A list of plpData restriction settings 27 | created by \code{createRestrictPlpDataSettings}. Default is NULL and then 28 | this is taken from the model.} 29 | 30 | \item{plpModelList}{A list of plpModels objects created by \code{runPlp} or a path to such objects} 31 | 32 | \item{recalibrate}{A vector of characters specifying the recalibration method to apply,} 33 | 34 | \item{runCovariateSummary}{whether to run the covariate summary for the validation data} 35 | } 36 | \value{ 37 | A validation design object of class \code{validationDesign} or a list of such objects 38 | } 39 | \description{ 40 | createValidationDesign - Define the validation design for external validation 41 | } 42 | \examples{ 43 | # create a validation design for targetId 1 and outcomeId 2 one l1 model and 44 | # one gradient boosting model 45 | createValidationDesign(1, 2, plpModelList = list( 46 | "pathToL1model", "PathToGBMModel")) 47 | } 48 | -------------------------------------------------------------------------------- /.github/workflows/revdeps.yml: -------------------------------------------------------------------------------- 1 | name: Reverse dependency checks 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | repos: 7 | description: "owner/repo lines (optional). If empty, uses revdep/github.txt" 8 | required: false 9 | default: "" 10 | 11 | permissions: 12 | contents: read 13 | 14 | concurrency: 15 | group: revdep-${{ github.ref }} 16 | cancel-in-progress: false 17 | 18 | env: 19 | RSPM: https://packagemanager.posit.co/cran/__linux__/noble/latest 20 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 21 | R_KEEP_PKG_SOURCE: yes 22 | 23 | jobs: 24 | revdep: 25 | runs-on: ubuntu-latest 26 | 27 | steps: 28 | - name: Checkout repo 29 | uses: actions/checkout@v5 30 | 31 | - name: Setup R 32 | uses: r-lib/actions/setup-r@v2 33 | with: 34 | r-version: 'release' 35 | use-public-rspm: true 36 | 37 | - name: Setup Pandoc 38 | uses: r-lib/actions/setup-pandoc@v2 39 | 40 | - name: Setup R dependencies 41 | uses: r-lib/actions/setup-r-dependencies@v2 42 | with: 43 | extra-packages: | 44 | any::pak 45 | any::rcmdcheck 46 | any::jsonlite 47 | any::desc 48 | any::gert 49 | 50 | - name: Install this package (dev version) 51 | run: | 52 | Rscript -e 'pak::pkg_install("local::.", upgrade = FALSE)' 53 | 54 | - name: Run reverse dependency checks 55 | env: 56 | INPUT_REPOS: ${{ github.event.inputs.repos }} 57 | run: | 58 | Rscript extras/revDeps.R 59 | 60 | - name: Upload results 61 | if: always() 62 | uses: actions/upload-artifact@v5 63 | with: 64 | name: revdep-results 65 | path: revdep/results 66 | -------------------------------------------------------------------------------- /man/diagnoseMultiplePlp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DiagnosePlp.R 3 | \name{diagnoseMultiplePlp} 4 | \alias{diagnoseMultiplePlp} 5 | \title{Run a list of predictions diagnoses} 6 | \usage{ 7 | diagnoseMultiplePlp( 8 | databaseDetails = createDatabaseDetails(), 9 | modelDesignList = list(createModelDesign(targetId = 1, outcomeId = 2, modelSettings = 10 | setLassoLogisticRegression()), createModelDesign(targetId = 1, outcomeId = 3, 11 | modelSettings = setLassoLogisticRegression())), 12 | cohortDefinitions = NULL, 13 | logSettings = createLogSettings(verbosity = "DEBUG", timeStamp = TRUE, logName = 14 | "diagnosePlp Log"), 15 | saveDirectory = NULL 16 | ) 17 | } 18 | \arguments{ 19 | \item{databaseDetails}{The database settings created using \code{createDatabaseDetails()}} 20 | 21 | \item{modelDesignList}{A list of model designs created using \code{createModelDesign()}} 22 | 23 | \item{cohortDefinitions}{A list of cohort definitions for the target and outcome cohorts} 24 | 25 | \item{logSettings}{The setting spexcifying the logging for the analyses created using \code{createLogSettings()}} 26 | 27 | \item{saveDirectory}{Name of the folder where all the outputs will written to.} 28 | } 29 | \value{ 30 | A data frame with the following columns: \tabular{ll}{ \verb{analysisId} \tab The unique identifier 31 | for a set of analysis choices.\cr \verb{targetId} \tab The ID of the target cohort populations.\cr 32 | \verb{outcomeId} \tab The ID of the outcomeId.\cr \verb{dataLocation} \tab The location where the plpData was saved 33 | \cr \verb{the settings ids} \tab The ids for all other settings used for model development.\cr } 34 | } 35 | \description{ 36 | Run a list of predictions diagnoses 37 | } 38 | \details{ 39 | This function will run all specified prediction design diagnoses. 40 | } 41 | -------------------------------------------------------------------------------- /man/plotPreferencePDF.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{plotPreferencePDF} 4 | \alias{plotPreferencePDF} 5 | \title{Plot the preference score probability density function, showing prediction overlap between true and false cases 6 | #'} 7 | \usage{ 8 | plotPreferencePDF( 9 | plpResult, 10 | typeColumn = "evaluation", 11 | saveLocation = NULL, 12 | fileName = "plotPreferencePDF.png" 13 | ) 14 | } 15 | \arguments{ 16 | \item{plpResult}{A plp result object as generated using the \code{\link{runPlp}} function.} 17 | 18 | \item{typeColumn}{The name of the column specifying the evaluation type} 19 | 20 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)} 21 | 22 | \item{fileName}{Name of the file to save to plot, for example 23 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for 24 | supported file formats.} 25 | } 26 | \value{ 27 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different 28 | format. 29 | } 30 | \description{ 31 | Plot the preference score probability density function, showing prediction overlap between true and false cases 32 | #' 33 | } 34 | \details{ 35 | Create a plot showing the preference score probability density function, showing prediction overlap between true and false cases 36 | #' 37 | } 38 | \examples{ 39 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf} 40 | \donttest{ \dontshow{ # takes too long } 41 | data("simulationProfile") 42 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 43 | saveLoc <- file.path(tempdir(), "plotPreferencePDF") 44 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 45 | plotPreferencePDF(results) 46 | # clean up 47 | unlink(saveLoc, recursive = TRUE) 48 | } 49 | \dontshow{\}) # examplesIf} 50 | } 51 | -------------------------------------------------------------------------------- /man/covariateSummary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CovariateSummary.R 3 | \name{covariateSummary} 4 | \alias{covariateSummary} 5 | \title{covariateSummary} 6 | \usage{ 7 | covariateSummary( 8 | covariateData, 9 | cohort, 10 | labels = NULL, 11 | strata = NULL, 12 | variableImportance = NULL, 13 | featureEngineering = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{covariateData}{The covariateData part of the plpData that is 18 | extracted using \code{getPlpData}} 19 | 20 | \item{cohort}{The patient cohort to calculate the summary} 21 | 22 | \item{labels}{A data.frame with the columns rowId and outcomeCount} 23 | 24 | \item{strata}{A data.frame containing the columns rowId, strataName} 25 | 26 | \item{variableImportance}{A data.frame with the columns covariateId and 27 | value (the variable importance value)} 28 | 29 | \item{featureEngineering}{(currently not used ) 30 | A function or list of functions specifying any feature engineering 31 | to create covariates before summarising} 32 | } 33 | \value{ 34 | A data.frame containing: CovariateCount, CovariateMean and CovariateStDev 35 | for any specified stratification 36 | } 37 | \description{ 38 | Summarises the covariateData to calculate the mean and standard deviation per covariate 39 | if the labels are given it also stratifies this by class label and if the trainRowIds and testRowIds 40 | specifying the patients in the train/test sets respectively are input, these values are also stratified 41 | by train and test set 42 | } 43 | \details{ 44 | The function calculates various metrics to measure the performance of the model 45 | } 46 | \examples{ 47 | data("simulationProfile") 48 | plpData <- simulatePlpData(simulationProfile, n = 100, seed = 42) 49 | covariateSummary <- covariateSummary(plpData$covariateData, plpData$cohorts) 50 | head(covariateSummary) 51 | } 52 | -------------------------------------------------------------------------------- /man/getDemographicSummary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DemographicSummary.R 3 | \name{getDemographicSummary} 4 | \alias{getDemographicSummary} 5 | \title{Get a demographic summary} 6 | \usage{ 7 | getDemographicSummary(prediction, predictionType, typeColumn = "evaluation") 8 | } 9 | \arguments{ 10 | \item{prediction}{A prediction object} 11 | 12 | \item{predictionType}{The type of prediction (binary or survival)} 13 | 14 | \item{typeColumn}{A column that is used to stratify the results} 15 | } 16 | \value{ 17 | A dataframe with the demographic summary 18 | } 19 | \description{ 20 | Get a demographic summary 21 | } 22 | \details{ 23 | Generates a data.frame with a prediction summary per each 5 year age group 24 | and gender group 25 | } 26 | \examples{ 27 | \donttest{ \dontshow{ # takes too long } 28 | # simulate data 29 | data("simulationProfile") 30 | plpData <- simulatePlpData(simulationProfile, n = 500, seed = 42) 31 | # create study population, split into train/test and preprocess with default settings 32 | population <- createStudyPopulation(plpData, outcomeId = 3) 33 | data <- splitData(plpData, population, createDefaultSplitSetting()) 34 | data$Train$covariateData <- preprocessData(data$Train$covariateData) 35 | saveLoc <- file.path(tempdir(), "demographicSummary") 36 | # fit a lasso logistic regression model using the training data 37 | plpModel <- fitPlp(data$Train, modelSettings=setLassoLogisticRegression(seed=42), 38 | analysisId=1, analysisPath=saveLoc) 39 | demographicSummary <- getDemographicSummary(plpModel$prediction, 40 | "binary", 41 | typeColumn = "evaluationType") 42 | # show the demographic summary dataframe 43 | str(demographicSummary) 44 | # clean up 45 | unlink(saveLoc, recursive = TRUE) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /man/outcomeSurvivalPlot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{outcomeSurvivalPlot} 4 | \alias{outcomeSurvivalPlot} 5 | \title{Plot the outcome incidence over time} 6 | \usage{ 7 | outcomeSurvivalPlot( 8 | plpData, 9 | outcomeId, 10 | populationSettings = createStudyPopulationSettings(binary = TRUE, includeAllOutcomes = 11 | TRUE, firstExposureOnly = FALSE, washoutPeriod = 0, removeSubjectsWithPriorOutcome = 12 | TRUE, priorOutcomeLookback = 99999, requireTimeAtRisk = FALSE, riskWindowStart = 1, 13 | startAnchor = "cohort start", riskWindowEnd = 3650, endAnchor = "cohort start"), 14 | riskTable = TRUE, 15 | confInt = TRUE, 16 | yLabel = "Fraction of those who are outcome free in target population" 17 | ) 18 | } 19 | \arguments{ 20 | \item{plpData}{The plpData object returned by running getPlpData()} 21 | 22 | \item{outcomeId}{The cohort id corresponding to the outcome} 23 | 24 | \item{populationSettings}{The population settings created using \code{createStudyPopulationSettings}} 25 | 26 | \item{riskTable}{(binary) Whether to include a table at the bottom of the plot showing the number of people at risk over time} 27 | 28 | \item{confInt}{(binary) Whether to include a confidence interval} 29 | 30 | \item{yLabel}{(string) The label for the y-axis} 31 | } 32 | \value{ 33 | A \code{ggsurvplot} object 34 | } 35 | \description{ 36 | Plot the outcome incidence over time 37 | } 38 | \details{ 39 | This creates a survival plot that can be used to pick a suitable time-at-risk period 40 | } 41 | \examples{ 42 | \dontshow{if (rlang::is_installed("survminer")) withAutoprint(\{ # examplesIf} 43 | data("simulationProfile") 44 | plpData <- simulatePlpData(simulationProfile, n = 999, seed = 42) 45 | plotObject <- outcomeSurvivalPlot(plpData, outcomeId = 3) 46 | print(plotObject) 47 | \dontshow{\}) # examplesIf} 48 | } 49 | -------------------------------------------------------------------------------- /tests/testthat/test-PredictionDistribution.R: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Observational Health Data Sciences and Informatics 2 | # 3 | # This file is part of PatientLevelPrediction 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | test_that("getPredictionDistribution binary type", { 17 | ePrediction <- data.frame( 18 | value = runif(100), 19 | outcomeCount = round(runif(100)), 20 | evaluation = rep("Test", 100) 21 | ) 22 | predSum <- getPredictionDistribution( 23 | prediction = ePrediction, 24 | predictionType = "binary", 25 | typeColumn = "evaluation" 26 | ) 27 | 28 | expect_equal(nrow(predSum), 2) 29 | expect_equal(ncol(predSum), 12) 30 | 31 | 32 | 33 | predBinary <- getPredictionDistribution_binary( 34 | prediction = ePrediction, 35 | evaluation = rep("Test", 100), 36 | evalColumn = "evaluation" 37 | ) 38 | 39 | expect_equal(predBinary, predSum) 40 | }) 41 | 42 | 43 | test_that("getPredictionDistribution survival type", { 44 | ePrediction <- data.frame( 45 | value = runif(100), 46 | outcomeCount = round(runif(100)), 47 | evaluation = rep("Test", 100) 48 | ) 49 | 50 | predSurvival <- getPredictionDistribution_survival( 51 | prediction = ePrediction, 52 | evaluation = rep("Test", 100), 53 | evalColumn = "evaluation" 54 | ) 55 | 56 | expect_true(is.null(predSurvival)) 57 | }) 58 | -------------------------------------------------------------------------------- /man/setLassoLogisticRegression.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CyclopsSettings.R 3 | \name{setLassoLogisticRegression} 4 | \alias{setLassoLogisticRegression} 5 | \title{Create modelSettings for lasso logistic regression} 6 | \usage{ 7 | setLassoLogisticRegression( 8 | variance = 0.01, 9 | seed = NULL, 10 | includeCovariateIds = c(), 11 | noShrinkage = c(0), 12 | threads = -1, 13 | forceIntercept = FALSE, 14 | upperLimit = 20, 15 | lowerLimit = 0.01, 16 | tolerance = 2e-06, 17 | maxIterations = 3000, 18 | priorCoefs = NULL 19 | ) 20 | } 21 | \arguments{ 22 | \item{variance}{Numeric: prior distribution starting variance} 23 | 24 | \item{seed}{An option to add a seed when training the model} 25 | 26 | \item{includeCovariateIds}{a set of covariateIds to limit the analysis to} 27 | 28 | \item{noShrinkage}{a set of covariates whcih are to be forced to be included in 29 | in the final model. Default is the intercept} 30 | 31 | \item{threads}{An option to set number of threads when training model.} 32 | 33 | \item{forceIntercept}{Logical: Force intercept coefficient into prior} 34 | 35 | \item{upperLimit}{Numeric: Upper prior variance limit for grid-search} 36 | 37 | \item{lowerLimit}{Numeric: Lower prior variance limit for grid-search} 38 | 39 | \item{tolerance}{Numeric: maximum relative change in convergence criterion from 40 | from successive iterations to achieve convergence} 41 | 42 | \item{maxIterations}{Integer: maximum iterations of Cyclops to attempt 43 | before returning a failed-to-converge error} 44 | 45 | \item{priorCoefs}{Use coefficients from a previous model as starting 46 | points for model fit (transfer learning)} 47 | } 48 | \value{ 49 | \code{modelSettings} object 50 | } 51 | \description{ 52 | Create modelSettings for lasso logistic regression 53 | } 54 | \examples{ 55 | modelLasso <- setLassoLogisticRegression(seed=42) 56 | } 57 | -------------------------------------------------------------------------------- /man/plotGeneralizability.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{plotGeneralizability} 4 | \alias{plotGeneralizability} 5 | \title{Plot the train/test generalizability diagnostic} 6 | \usage{ 7 | plotGeneralizability( 8 | covariateSummary, 9 | saveLocation = NULL, 10 | fileName = "Generalizability.png" 11 | ) 12 | } 13 | \arguments{ 14 | \item{covariateSummary}{A prediction object as generated using the 15 | \code{\link{runPlp}} function.} 16 | 17 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)} 18 | 19 | \item{fileName}{Name of the file to save to plot, for example 20 | 'plot.png'. See the function \code{ggsave} in the ggplot2 package for 21 | supported file formats.} 22 | } 23 | \value{ 24 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to file in a different 25 | format. 26 | } 27 | \description{ 28 | Plot the train/test generalizability diagnostic 29 | } 30 | \details{ 31 | Create a plot showing the train/test generalizability diagnostic 32 | #' 33 | } 34 | \examples{ 35 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf} 36 | \donttest{ \dontshow{ # takes too long } 37 | data("simulationProfile") 38 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 39 | population <- createStudyPopulation(plpData, outcomeId = 3) 40 | data <- splitData(plpData, population = population) 41 | strata <- data.frame( 42 | rowId = c(data$Train$labels$rowId, data$Test$labels$rowId), 43 | strataName = c(rep("Train", nrow(data$Train$labels)), 44 | rep("Test", nrow(data$Test$labels)))) 45 | covariateSummary <- covariateSummary(plpData$covariateData, 46 | cohort = dplyr::select(population, "rowId"), 47 | strata = strata, labels = population) 48 | plotGeneralizability(covariateSummary) 49 | } 50 | \dontshow{\}) # examplesIf} 51 | } 52 | -------------------------------------------------------------------------------- /tests/testthat/test-fitting.R: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Observational Health Data Sciences and Informatics 2 | # 3 | # This file is part of PatientLevelPrediction 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | modelSettings <- setLassoLogisticRegression() 18 | 19 | test_that("fitPlp", { 20 | skip_if_offline() 21 | plpModel <- fitPlp( 22 | trainData = trainData, 23 | modelSettings = modelSettings, 24 | search = "grid", 25 | analysisId = "fitting", 26 | analysisPath = tempdir() 27 | ) 28 | 29 | expect_s3_class(plpModel, "plpModel") 30 | }) 31 | 32 | test_that("fitPlp input errors", { 33 | skip_if_offline() 34 | expect_error( 35 | fitPlp( 36 | trainData = trainData, 37 | modelSettings = modelSettings, 38 | analysisPath = tempDir() 39 | ) 40 | ) 41 | 42 | expect_error( 43 | fitPlp( 44 | trainData = list(covariateData = NULL), 45 | modelSettings = modelSettings, 46 | analysisId = "fitting", 47 | analysisPath = tempDir() 48 | ) 49 | ) 50 | 51 | expect_error( 52 | fitPlp( 53 | trainData = trainData, 54 | modelSettings = NULL, 55 | analysisId = "fitting", 56 | analysisPath = tempDir() 57 | ) 58 | ) 59 | 60 | expect_error( 61 | fitPlp( 62 | trainData = trainData, 63 | modelSettings = modelSettings, 64 | analysisId = "fitting" 65 | ) 66 | ) 67 | }) 68 | -------------------------------------------------------------------------------- /man/evaluatePlp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/EvaluatePlp.R 3 | \name{evaluatePlp} 4 | \alias{evaluatePlp} 5 | \title{evaluatePlp} 6 | \usage{ 7 | evaluatePlp(prediction, typeColumn = "evaluationType") 8 | } 9 | \arguments{ 10 | \item{prediction}{The patient level prediction model's prediction} 11 | 12 | \item{typeColumn}{The column name in the prediction object that is used to 13 | stratify the evaluation} 14 | } 15 | \value{ 16 | An object of class plpEvaluation containing the following components 17 | \itemize{ 18 | \item evaluationStatistics: A data frame containing the evaluation statistics' 19 | \item thresholdSummary: A data frame containing the threshold summary' 20 | \item demographicSummary: A data frame containing the demographic summary' 21 | \item calibrationSummary: A data frame containing the calibration summary' 22 | \item predictionDistribution: A data frame containing the prediction distribution' 23 | } 24 | } 25 | \description{ 26 | Evaluates the performance of the patient level prediction model 27 | } 28 | \details{ 29 | The function calculates various metrics to measure the performance of the model 30 | } 31 | \examples{ 32 | \donttest{ \dontshow{ # takes too long to run } 33 | data("simulationProfile") 34 | plpData <- simulatePlpData(simulationProfile, n = 1500, seed = 42) 35 | population <- createStudyPopulation(plpData, outcomeId = 3, 36 | populationSettings = createStudyPopulationSettings()) 37 | data <- splitData(plpData, population, splitSettings=createDefaultSplitSetting(splitSeed=42)) 38 | data$Train$covariateData <- preprocessData(data$Train$covariateData, 39 | createPreprocessSettings()) 40 | path <- file.path(tempdir(), "plp") 41 | model <- fitPlp(data$Train, modelSettings=setLassoLogisticRegression(seed=42), 42 | analysisId=1, analysisPath = path) 43 | evaluatePlp(model$prediction) # Train and CV metrics 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /man/setGradientBoostingMachine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/GradientBoostingMachine.R 3 | \name{setGradientBoostingMachine} 4 | \alias{setGradientBoostingMachine} 5 | \title{Create setting for gradient boosting machine model using gbm_xgboost implementation} 6 | \usage{ 7 | setGradientBoostingMachine( 8 | ntrees = c(100, 300), 9 | nthread = 20, 10 | earlyStopRound = 25, 11 | maxDepth = c(4, 6, 8), 12 | minChildWeight = 1, 13 | learnRate = c(0.05, 0.1, 0.3), 14 | scalePosWeight = 1, 15 | lambda = 1, 16 | alpha = 0, 17 | seed = sample(1e+07, 1) 18 | ) 19 | } 20 | \arguments{ 21 | \item{ntrees}{The number of trees to build} 22 | 23 | \item{nthread}{The number of computer threads to use (how many cores do you have?)} 24 | 25 | \item{earlyStopRound}{If the performance does not increase over earlyStopRound number of trees then training stops (this prevents overfitting)} 26 | 27 | \item{maxDepth}{Maximum depth of each tree - a large value will lead to slow model training} 28 | 29 | \item{minChildWeight}{Minimum sum of of instance weight in a child node - larger values are more conservative} 30 | 31 | \item{learnRate}{The boosting learn rate} 32 | 33 | \item{scalePosWeight}{Controls weight of positive class in loss - useful for imbalanced classes} 34 | 35 | \item{lambda}{L2 regularization on weights - larger is more conservative} 36 | 37 | \item{alpha}{L1 regularization on weights - larger is more conservative} 38 | 39 | \item{seed}{An option to add a seed when training the final model} 40 | } 41 | \value{ 42 | A modelSettings object that can be used to fit the model 43 | } 44 | \description{ 45 | Create setting for gradient boosting machine model using gbm_xgboost implementation 46 | } 47 | \examples{ 48 | \dontshow{if (rlang::is_installed("xgboost")) withAutoprint(\{ # examplesIf} 49 | modelGbm <- setGradientBoostingMachine( 50 | ntrees = c(10, 100), nthread = 20, 51 | maxDepth = c(4, 6), learnRate = c(0.1, 0.3) 52 | ) 53 | \dontshow{\}) # examplesIf} 54 | } 55 | -------------------------------------------------------------------------------- /man/toSparseM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Formatting.R 3 | \name{toSparseM} 4 | \alias{toSparseM} 5 | \title{Convert the plpData in COO format into a sparse R matrix} 6 | \usage{ 7 | toSparseM(plpData, cohort = NULL, map = NULL) 8 | } 9 | \arguments{ 10 | \item{plpData}{An object of type \code{plpData} with covariate in coo format - the patient level prediction 11 | data extracted from the CDM.} 12 | 13 | \item{cohort}{If specified the plpData is restricted to the rowIds in the cohort (otherwise plpData$labels is used)} 14 | 15 | \item{map}{A covariate map (telling us the column number for covariates)} 16 | } 17 | \value{ 18 | Returns a list, containing the data as a sparse matrix, the plpData covariateRef 19 | and a data.frame named map that tells us what covariate corresponds to each column 20 | This object is a list with the following components: \describe{ 21 | \item{data}{A sparse matrix with the rows corresponding to each person in the plpData and the columns corresponding to the covariates.} 22 | \item{covariateRef}{The plpData covariateRef.} 23 | \item{map}{A data.frame containing the data column ids and the corresponding covariateId from covariateRef.} 24 | } 25 | } 26 | \description{ 27 | Converts the standard plpData to a sparse matrix 28 | } 29 | \details{ 30 | This function converts the covariates \code{Andromeda} table in COO format into a sparse matrix from 31 | the package Matrix 32 | } 33 | \examples{ 34 | \donttest{ \dontshow{ # takes too long } 35 | library(dplyr) 36 | data("simulationProfile") 37 | plpData <- simulatePlpData(simulationProfile, n = 100, seed = 42) 38 | # how many covariates are there before we convert to sparse matrix 39 | plpData$covariateData$covariates \%>\% 40 | dplyr::group_by(.data$covariateId) \%>\% 41 | dplyr::summarise(n = n()) \%>\% 42 | dplyr::collect() \%>\% nrow() 43 | sparseData <- toSparseM(plpData, cohort=plpData$cohorts) 44 | # how many covariates are there after we convert to sparse matrix' 45 | sparseData$dataMatrix@Dim[2] 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /tests/testthat/test-featureImportance.R: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Observational Health Data Sciences and Informatics 2 | # 3 | # This file is part of PatientLevelPrediction 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | 18 | test_that("pfi feature importance returns data.frame", { 19 | skip_if_offline() 20 | # limit to a sample of 2 covariates for faster test 21 | covariates <- plpResult$model$covariateImportance %>% 22 | dplyr::filter("covariateValue" != 0) %>% 23 | dplyr::select("covariateId") %>% 24 | dplyr::arrange(desc("covariateValue")) %>% 25 | dplyr::pull() 26 | 27 | # if the model had non-zero covariates 28 | if (length(covariates) > 0) { 29 | if (length(covariates) > 2) { 30 | covariates <- covariates[1:2] 31 | } 32 | pfiTest <- pfi(plpResult, population, plpData, 33 | repeats = 1, 34 | covariates = covariates, cores = 1, log = NULL, 35 | logthreshold = "INFO" 36 | ) 37 | 38 | expect_equal(class(pfiTest), "data.frame") 39 | expect_equal(sum(names(pfiTest) %in% c("covariateId", "pfi")), 2) 40 | expect_true(all(!is.nan(pfiTest$pfi))) 41 | } 42 | }) 43 | 44 | test_that("pfi feature importance works with logger or without covariates", { 45 | skip_if_offline() 46 | pfiTest <- pfi(tinyResults, population, nanoData, 47 | cores = 1, 48 | covariates = NULL, log = file.path(tempdir(), "pfiLog") 49 | ) 50 | 51 | expect_equal(class(pfiTest), "data.frame") 52 | expect_equal(sum(names(pfiTest) %in% c("covariateId", "pfi")), 2) 53 | expect_true(all(!is.nan(pfiTest$pfi))) 54 | }) 55 | -------------------------------------------------------------------------------- /man/pfi.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/FeatureImportance.R 3 | \name{pfi} 4 | \alias{pfi} 5 | \title{Permutation Feature Importance} 6 | \usage{ 7 | pfi( 8 | plpResult, 9 | population, 10 | plpData, 11 | repeats = 1, 12 | covariates = NULL, 13 | cores = NULL, 14 | log = NULL, 15 | logthreshold = "INFO" 16 | ) 17 | } 18 | \arguments{ 19 | \item{plpResult}{An object of type \code{runPlp}} 20 | 21 | \item{population}{The population created using createStudyPopulation() who will have their risks predicted} 22 | 23 | \item{plpData}{An object of type \code{plpData} - the patient level prediction 24 | data extracted from the CDM.} 25 | 26 | \item{repeats}{The number of times to permute each covariate} 27 | 28 | \item{covariates}{A vector of covariates to calculate the pfi for. If NULL it uses all covariates included in the model.} 29 | 30 | \item{cores}{Number of cores to use when running this (it runs in parallel)} 31 | 32 | \item{log}{A location to save the log for running pfi} 33 | 34 | \item{logthreshold}{The log threshold (e.g., INFO, TRACE, ...)} 35 | } 36 | \value{ 37 | A dataframe with the covariateIds and the pfi (change in AUC caused by permuting the covariate) value 38 | } 39 | \description{ 40 | Calculate the permutation feature importance (pfi) for a PLP model. 41 | } 42 | \details{ 43 | The function permutes the each covariate/features \code{repeats} times and 44 | calculates the mean AUC change caused by the permutation. 45 | } 46 | \examples{ 47 | \donttest{ \dontshow{ # takes too long } 48 | library(dplyr) 49 | # simulate some data 50 | data("simulationProfile") 51 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 52 | # now fit a model 53 | saveLoc <- file.path(tempdir(), "pfi") 54 | plpResult <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 55 | population <- createStudyPopulation(plpData, outcomeId = 3) 56 | pfi(plpResult, population, plpData, repeats = 1, cores = 1) 57 | # compare to model coefficients 58 | plpResult$model$covariateImportance \%>\% dplyr::filter(.data$covariateValue != 0) 59 | # clean up 60 | unlink(saveLoc, recursive = TRUE) 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /man/createDatabaseSchemaSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/uploadToDatabase.R 3 | \name{createDatabaseSchemaSettings} 4 | \alias{createDatabaseSchemaSettings} 5 | \title{Create the PatientLevelPrediction database result schema settings} 6 | \usage{ 7 | createDatabaseSchemaSettings( 8 | resultSchema = "main", 9 | tablePrefix = "", 10 | targetDialect = "sqlite", 11 | tempEmulationSchema = getOption("sqlRenderTempEmulationSchema"), 12 | cohortDefinitionSchema = resultSchema, 13 | tablePrefixCohortDefinitionTables = tablePrefix, 14 | databaseDefinitionSchema = resultSchema, 15 | tablePrefixDatabaseDefinitionTables = tablePrefix 16 | ) 17 | } 18 | \arguments{ 19 | \item{resultSchema}{(string) The name of the database schema with the result tables.} 20 | 21 | \item{tablePrefix}{(string) A string that appends to the PatientLevelPrediction result tables} 22 | 23 | \item{targetDialect}{(string) The database management system being used} 24 | 25 | \item{tempEmulationSchema}{(string) The temp schema used when the database management system is oracle} 26 | 27 | \item{cohortDefinitionSchema}{(string) The name of the database schema with the cohort definition tables (defaults to resultSchema).} 28 | 29 | \item{tablePrefixCohortDefinitionTables}{(string) A string that appends to the cohort definition tables} 30 | 31 | \item{databaseDefinitionSchema}{(string) The name of the database schema with the database definition tables (defaults to resultSchema).} 32 | 33 | \item{tablePrefixDatabaseDefinitionTables}{(string) A string that appends to the database definition tables} 34 | } 35 | \value{ 36 | Returns a list of class 'plpDatabaseResultSchema' with all the database settings 37 | } 38 | \description{ 39 | This function specifies where the results schema is and lets you pick a different schema for the cohorts and databases 40 | } 41 | \details{ 42 | This function can be used to specify the database settings used to upload PatientLevelPrediction results into a database 43 | } 44 | \examples{ 45 | createDatabaseSchemaSettings(resultSchema = "cdm", 46 | tablePrefix = "plp_") 47 | 48 | } 49 | -------------------------------------------------------------------------------- /man/plotLearningCurve.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/LearningCurve.R 3 | \name{plotLearningCurve} 4 | \alias{plotLearningCurve} 5 | \title{plotLearningCurve} 6 | \usage{ 7 | plotLearningCurve( 8 | learningCurve, 9 | metric = "AUROC", 10 | abscissa = "events", 11 | plotTitle = "Learning Curve", 12 | plotSubtitle = NULL, 13 | fileName = NULL 14 | ) 15 | } 16 | \arguments{ 17 | \item{learningCurve}{An object returned by \code{\link{createLearningCurve}} 18 | function.} 19 | 20 | \item{metric}{Specifies the metric to be plotted: 21 | \itemize{ 22 | \item{\code{'AUROC'} - use the area under the Receiver Operating 23 | Characteristic curve} 24 | \item{\code{'AUPRC'} - use the area under the Precision-Recall curve} 25 | \item{\code{'sBrier'} - use the scaled Brier score} 26 | }} 27 | 28 | \item{abscissa}{Specify the abscissa metric to be plotted: 29 | \itemize{ 30 | \item{\code{'events'} - use number of events} 31 | \item{\code{'observations'} - use number of observations} 32 | }} 33 | 34 | \item{plotTitle}{Title of the learning curve plot.} 35 | 36 | \item{plotSubtitle}{Subtitle of the learning curve plot.} 37 | 38 | \item{fileName}{Filename of plot to be saved, for example \code{'plot.png'}. 39 | See the function \code{ggsave} in the ggplot2 package for supported file 40 | formats.} 41 | } 42 | \value{ 43 | A ggplot object. Use the \code{\link[ggplot2]{ggsave}} function to save to 44 | file in a different format. 45 | } 46 | \description{ 47 | Create a plot of the learning curve using the object returned 48 | from \code{createLearningCurve}. 49 | } 50 | \examples{ 51 | \dontshow{if (rlang::is_installed("parallel")) withAutoprint(\{ # examplesIf} 52 | \donttest{ \dontshow{ # takes too long to run } 53 | data("simulationProfile") 54 | plpData <- simulatePlpData(simulationProfile, n = 1800, seed = 42) 55 | outcomeId <- 3 56 | modelSettings <- setLassoLogisticRegression(seed=42) 57 | learningCurve <- createLearningCurve(plpData, outcomeId, modelSettings = modelSettings, 58 | saveDirectory = file.path(tempdir(), "learningCurve"), parallel = FALSE) 59 | plotLearningCurve(learningCurve) 60 | } 61 | \dontshow{\}) # examplesIf} 62 | } 63 | -------------------------------------------------------------------------------- /man/setSVM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SklearnClassifierSettings.R 3 | \name{setSVM} 4 | \alias{setSVM} 5 | \title{Create setting for the python sklearn SVM (SVC function)} 6 | \usage{ 7 | setSVM( 8 | C = list(1, 0.9, 2, 0.1), 9 | kernel = list("rbf"), 10 | degree = list(1, 3, 5), 11 | gamma = list("scale", 1e-04, 3e-05, 0.001, 0.01, 0.25), 12 | coef0 = list(0), 13 | shrinking = list(TRUE), 14 | tol = list(0.001), 15 | classWeight = list(NULL), 16 | cacheSize = 500, 17 | seed = sample(1e+05, 1) 18 | ) 19 | } 20 | \arguments{ 21 | \item{C}{(list) Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive. The penalty is a squared l2 penalty.} 22 | 23 | \item{kernel}{(list) Specifies the kernel type to be used in the algorithm. one of ‘linear’, ‘poly’, ‘rbf’, ‘sigmoid’, ‘precomputed’. If none is given ‘rbf’ will be used.} 24 | 25 | \item{degree}{(list) degree of kernel function is significant only in poly, rbf, sigmoid} 26 | 27 | \item{gamma}{(list) kernel coefficient for rbf and poly, by default 1/n_features will be taken. ‘scale’, ‘auto’ or float, default=’scale’} 28 | 29 | \item{coef0}{(list) independent term in kernel function. It is only significant in poly/sigmoid.} 30 | 31 | \item{shrinking}{(list) whether to use the shrinking heuristic.} 32 | 33 | \item{tol}{(list) Tolerance for stopping criterion.} 34 | 35 | \item{classWeight}{(list) Class weight based on imbalance either 'balanced' or NULL} 36 | 37 | \item{cacheSize}{Specify the size of the kernel cache (in MB).} 38 | 39 | \item{seed}{A seed for the model} 40 | } 41 | \value{ 42 | a modelSettings object 43 | } 44 | \description{ 45 | Create setting for the python sklearn SVM (SVC function) 46 | } 47 | \examples{ 48 | \dontshow{ # dontrun reason: requires python's scikit-learn, checkSklearn() will error without it } 49 | \dontrun{ 50 | plpData <- getEunomiaPlpData() 51 | model <- setSVM(C = list(1), gamma = list("scale"), seed = 42) 52 | saveLoc <- file.path(tempdir(), "svm") 53 | results <- runPlp(plpData, modelSettings = model, saveDirectory = saveLoc) 54 | # clean up 55 | unlink(saveLoc, recursive = TRUE) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /man/viewMultiplePlp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ViewShinyPlp.R 3 | \name{viewMultiplePlp} 4 | \alias{viewMultiplePlp} 5 | \title{open a local shiny app for viewing the result of a multiple PLP analyses} 6 | \usage{ 7 | viewMultiplePlp(analysesLocation) 8 | } 9 | \arguments{ 10 | \item{analysesLocation}{The directory containing the results (with the analysis_x folders)} 11 | } 12 | \value{ 13 | Opens a shiny app for interactively viewing the results 14 | } 15 | \description{ 16 | open a local shiny app for viewing the result of a multiple PLP analyses 17 | } 18 | \details{ 19 | Opens a shiny app for viewing the results of the models from various T,O, Tar and settings 20 | settings. 21 | } 22 | \examples{ 23 | \dontshow{if (rlang::is_interactive() && rlang::is_installed("Eunomia") && rlang::is_installed("OhdsiShinyAppBuilder") && rlang::is_installed("curl") && curl::has_internet() ) withAutoprint(\{ # examplesIf} 24 | \donttest{ \dontshow{ # takes too long } 25 | connectionDetails <- Eunomia::getEunomiaConnectionDetails() 26 | Eunomia::createCohorts(connectionDetails) 27 | databaseDetails <- createDatabaseDetails(connectionDetails = connectionDetails, 28 | cdmDatabaseSchema = "main", 29 | cdmDatabaseName = "Eunomia", 30 | cdmDatabaseId = "1", 31 | targetId = 1, 32 | outcomeIds = 3) 33 | modelDesign <- createModelDesign(targetId = 1, 34 | outcomeId = 3, 35 | modelSettings = setLassoLogisticRegression()) 36 | saveLoc <- file.path(tempdir(), "viewMultiplePlp", "development") 37 | runMultiplePlp(databaseDetails = databaseDetails, modelDesignList = list(modelDesign), 38 | saveDirectory = saveLoc) 39 | # view result files 40 | dir(saveLoc, recursive = TRUE) 41 | # open shiny app 42 | viewMultiplePlp(analysesLocation = saveLoc) 43 | # clean up, shiny app can't be opened after the following has been run 44 | unlink(saveLoc, recursive = TRUE) 45 | } 46 | \dontshow{\}) # examplesIf} 47 | } 48 | -------------------------------------------------------------------------------- /man/plotNetBenefit.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Plotting.R 3 | \name{plotNetBenefit} 4 | \alias{plotNetBenefit} 5 | \title{Plot the net benefit} 6 | \usage{ 7 | plotNetBenefit( 8 | plpResults, 9 | modelNames = NULL, 10 | typeColumn = "evaluation", 11 | saveLocation = NULL, 12 | showPlot = TRUE, 13 | fileName = "netBenefit.png", 14 | evalType = NULL, 15 | ylim = NULL, 16 | xlim = NULL 17 | ) 18 | } 19 | \arguments{ 20 | \item{plpResults}{list of (named) plpResult objects or a single plpResult as 21 | generated using the \code{\link{runPlp}} function.} 22 | 23 | \item{modelNames}{(optional) names of the models to be used in the plot. If NULL, the names of the plpResults are used. Must have the same length as plpResults.} 24 | 25 | \item{typeColumn}{The name of the column specifying the evaluation type} 26 | 27 | \item{saveLocation}{Directory to save plot (if NULL plot is not saved)} 28 | 29 | \item{showPlot}{If TRUE, the plot is shown on the screen, if FALSE the plot 30 | object is returned without plotting.} 31 | 32 | \item{fileName}{Name of the file to save to plot, for example 'plot.png'. See the function \code{ggsave} in the ggplot2 package for supported file formats.} 33 | 34 | \item{evalType}{Which evaluation type to plot for. For example \code{Test}, \code{Train}. If NULL everything is plotted} 35 | 36 | \item{ylim}{The y limits for the plot, if NULL the limits are calculated from the data} 37 | 38 | \item{xlim}{The x limits for the plot, if NULL the limits are calculated from the data} 39 | } 40 | \value{ 41 | A list of ggplot objects or a single ggplot object if only one evaluation type is plotted 42 | } 43 | \description{ 44 | Plot the net benefit 45 | } 46 | \examples{ 47 | \dontshow{if (rlang::is_installed("ggplot2")) withAutoprint(\{ # examplesIf} 48 | \donttest{ \dontshow{ # takes too long } 49 | data("simulationProfile") 50 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 51 | saveLoc <- file.path(tempdir(), "plotNetBenefit") 52 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 53 | plotNetBenefit(results) 54 | # clean up 55 | unlink(saveLoc, recursive = TRUE) 56 | } 57 | \dontshow{\}) # examplesIf} 58 | } 59 | -------------------------------------------------------------------------------- /tests/testthat/test-andromedahelperfunctions.R: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Observational Health Data Sciences and Informatics 2 | # 3 | # This file is part of PatientLevelPrediction 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # add limitCovariatesToPopulation(covariateData, rowIds) test 18 | test_that("batchRestrict", { 19 | skip_if_offline() 20 | metaData <- attr(plpData$covariateData, "metaData") 21 | covariateData <- 22 | PatientLevelPrediction:::batchRestrict( 23 | plpData$covariateData, 24 | population, 25 | sizeN = 1000000 26 | ) 27 | expect_s4_class(covariateData, "CovariateData") 28 | 29 | expect_equal( 30 | names(metaData), 31 | names(attr(covariateData, "metaData")) 32 | ) 33 | }) 34 | 35 | test_that("limitPop with timeRef", { 36 | covs <- Andromeda::andromeda( 37 | covariates = data.frame( 38 | covariateId = c(1, 2, 1, 2, 2), 39 | rowId = c(1, 1, 2, 2, 2), 40 | value = c(1, 1, 1, 1, 1), 41 | timeId = c(1, 1, 1, 1, 2) 42 | ), 43 | covariateRef = data.frame( 44 | covariateId = c(1, 2), 45 | covariateName = c("cov1", "cov2"), 46 | analysisId = c(1, 2) 47 | ), 48 | analysisRef = data.frame( 49 | analysisId = c(1, 2), 50 | analysisName = c("analysis1", "analysis2") 51 | ), 52 | timeRef = data.frame( 53 | timePart = "day", 54 | timeInterval = 1, 55 | sequenceStartDay = 0, 56 | sequenceEndDay = 1 57 | ) 58 | ) 59 | class(covs) <- "CovariateData" 60 | rowIds <- c(2) 61 | limitedCovs <- limitCovariatesToPopulation(covs, rowIds) 62 | expect_equal( 63 | as.data.frame(limitedCovs$timeRef), 64 | as.data.frame(covs$timeRef) 65 | ) 66 | }) 67 | -------------------------------------------------------------------------------- /tests/testthat/test-getCalibration.R: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Observational Health Data Sciences and Informatics 2 | # 3 | # This file is part of PatientLevelPrediction 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | test_that("getCalibration binary", { 17 | pErediction <- data.frame( 18 | rowId = 1:100, 19 | evaluation = rep("Test", 100), 20 | value = runif(100), 21 | outcomeCount = round(runif(100)) 22 | ) 23 | attr(pErediction, "metaData")$predictionType <- "binary" 24 | calib <- getCalibrationSummary( 25 | prediction = pErediction, 26 | predictionType = "binary", 27 | typeColumn = "evaluation", 28 | numberOfStrata = 100, 29 | truncateFraction = 0.05 30 | ) 31 | 32 | expect_equal(nrow(calib), 100) 33 | expect_equal(ncol(calib), 12) 34 | expect_true("evaluation" %in% colnames(calib)) 35 | 36 | 37 | calibBinary <- getCalibrationSummary_binary( 38 | prediction = pErediction, 39 | evalColumn = "evaluation", 40 | numberOfStrata = 100, 41 | truncateFraction = 0.05 42 | ) 43 | 44 | expect_equal(calib, calibBinary) 45 | }) 46 | 47 | 48 | 49 | 50 | test_that("getCalibration survival", { 51 | pErediction <- data.frame( 52 | rowId = 1:100, 53 | evaluation = rep("Test", 100), 54 | value = runif(100), 55 | survivalTime = 50 + sample(2 * 365, 100), 56 | outcomeCount = round(runif(100)) 57 | ) 58 | 59 | calib <- getCalibrationSummary_survival( 60 | prediction = pErediction, 61 | evalColumn = "evaluation", 62 | numberOfStrata = 50, 63 | truncateFraction = 0.05, 64 | timepoint = 365 65 | ) 66 | 67 | expect_true("evaluation" %in% colnames(calib)) 68 | expect_equal(nrow(calib), 50) 69 | expect_equal(ncol(calib), 7) 70 | }) 71 | -------------------------------------------------------------------------------- /man/createDefaultSplitSetting.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DataSplitting.R 3 | \name{createDefaultSplitSetting} 4 | \alias{createDefaultSplitSetting} 5 | \title{Create the settings for defining how the plpData are split into 6 | test/validation/train sets using default splitting functions 7 | (either random stratified by outcome, time or subject splitting)} 8 | \usage{ 9 | createDefaultSplitSetting( 10 | testFraction = 0.25, 11 | trainFraction = 0.75, 12 | splitSeed = sample(1e+05, 1), 13 | nfold = 3, 14 | type = "stratified" 15 | ) 16 | } 17 | \arguments{ 18 | \item{testFraction}{(numeric) A real number between 0 and 1 19 | indicating the test set fraction of the data} 20 | 21 | \item{trainFraction}{(numeric) A real number between 0 and 1 indicating the 22 | train set fraction of the data. If not set train is equal to 1 - test} 23 | 24 | \item{splitSeed}{(numeric) A seed to use when splitting the data for 25 | reproducibility (if not set a random number will be generated)} 26 | 27 | \item{nfold}{(numeric) An integer > 1 specifying the number of 28 | folds used in cross validation} 29 | 30 | \item{type}{(character) Choice of: \itemize{ 31 | \item'stratified' Each data point is 32 | randomly assigned into the test or a train fold set but this is done 33 | stratified such that the outcome rate is consistent in each partition 34 | \item'time' Older data are assigned 35 | into the training set and newer data are assigned into the test set 36 | \item'subject' Data are partitioned by 37 | subject, if a subject is in the data more than once, all the data points for 38 | the subject are assigned either into the test data or into the train data 39 | (not both). 40 | }} 41 | } 42 | \value{ 43 | An object of class \code{splitSettings} 44 | } 45 | \description{ 46 | Create the settings for defining how the plpData are split into 47 | test/validation/train sets using default splitting functions 48 | (either random stratified by outcome, time or subject splitting) 49 | } 50 | \details{ 51 | Returns an object of class \code{splitSettings} that specifies the 52 | splitting function that will be called and the settings 53 | } 54 | \examples{ 55 | createDefaultSplitSetting(testFraction=0.25, trainFraction=0.75, nfold=3, 56 | splitSeed=42) 57 | } 58 | -------------------------------------------------------------------------------- /man/fitPlp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Fit.R 3 | \name{fitPlp} 4 | \alias{fitPlp} 5 | \title{fitPlp} 6 | \usage{ 7 | fitPlp(trainData, modelSettings, search = "grid", analysisId, analysisPath) 8 | } 9 | \arguments{ 10 | \item{trainData}{An object of type \code{trainData} created using \code{splitData} 11 | data extracted from the CDM.} 12 | 13 | \item{modelSettings}{An object of class \code{modelSettings} created using 14 | one of the \code{createModelSettings} functions} 15 | 16 | \item{search}{The search strategy for the hyper-parameter selection (currently not used)} 17 | 18 | \item{analysisId}{The id of the analysis} 19 | 20 | \item{analysisPath}{The path of the analysis} 21 | } 22 | \value{ 23 | An object of class \code{plpModel} containing: 24 | 25 | \item{model}{The trained prediction model} 26 | \item{preprocessing}{The preprocessing required when applying the model} 27 | \item{prediction}{The cohort data.frame with the predicted risk column added} 28 | \item{modelDesign}{A list specifiying the modelDesign settings used to fit the model} 29 | \item{trainDetails}{The model meta data} 30 | \item{covariateImportance}{The covariate importance for the model} 31 | } 32 | \description{ 33 | Train various models using a default parameter grid search or user specified 34 | parameters 35 | } 36 | \details{ 37 | The user can define the machine learning model to train 38 | } 39 | \examples{ 40 | \donttest{ \dontshow{ # takes too long } 41 | # simulate data 42 | data("simulationProfile") 43 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 44 | # create study population, split into train/test and preprocess with default settings 45 | population <- createStudyPopulation(plpData, outcomeId = 3) 46 | data <- splitData(plpData, population, createDefaultSplitSetting()) 47 | data$Train$covariateData <- preprocessData(data$Train$covariateData) 48 | saveLoc <- file.path(tempdir(), "fitPlp") 49 | # fit a lasso logistic regression model using the training data 50 | plpModel <- fitPlp(data$Train, modelSettings=setLassoLogisticRegression(seed=42), 51 | analysisId=1, analysisPath=saveLoc) 52 | # show evaluationSummary for model 53 | evaluatePlp(plpModel$prediction)$evaluationSummary 54 | # clean up 55 | unlink(saveLoc, recursive = TRUE) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /man/createStudyPopulation.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/PopulationSettings.R 3 | \name{createStudyPopulation} 4 | \alias{createStudyPopulation} 5 | \title{Create a study population} 6 | \usage{ 7 | createStudyPopulation( 8 | plpData, 9 | outcomeId = plpData$metaData$databaseDetails$outcomeIds[1], 10 | populationSettings = createStudyPopulationSettings(), 11 | population = NULL 12 | ) 13 | } 14 | \arguments{ 15 | \item{plpData}{An object of type \code{plpData} as generated using 16 | \code{getplpData}.} 17 | 18 | \item{outcomeId}{The ID of the outcome.} 19 | 20 | \item{populationSettings}{An object of class populationSettings created using \code{createPopulationSettings}} 21 | 22 | \item{population}{If specified, this population will be used as the starting point instead of the 23 | cohorts in the \code{plpData} object.} 24 | } 25 | \value{ 26 | A data frame specifying the study population. This data frame will have the following columns: 27 | \describe{ 28 | \item{rowId}{A unique identifier for an exposure} 29 | \item{subjectId}{The person ID of the subject} 30 | \item{cohortStartdate}{The index date} 31 | \item{outcomeCount}{The number of outcomes observed during the risk window} 32 | \item{timeAtRisk}{The number of days in the risk window} 33 | \item{survivalTime}{The number of days until either the outcome or the end of the risk window} 34 | } 35 | } 36 | \description{ 37 | Create a study population 38 | } 39 | \details{ 40 | Create a study population by enforcing certain inclusion and exclusion criteria, defining 41 | a risk window, and determining which outcomes fall inside the risk window. 42 | } 43 | \examples{ 44 | \donttest{ \dontshow{ # takes too long } 45 | data("simulationProfile") 46 | plpData <- simulatePlpData(simulationProfile, n = 100, seed = 42) 47 | # Create study population, require time at risk of 30 days. The risk window is 1 to 90 days. 48 | populationSettings <- createStudyPopulationSettings(requireTimeAtRisk = TRUE, 49 | minTimeAtRisk = 30, 50 | riskWindowStart = 1, 51 | riskWindowEnd = 90) 52 | population <- createStudyPopulation(plpData, outcomeId = 3, populationSettings) 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /man/getCalibrationSummary.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/CalibrationSummary.R 3 | \name{getCalibrationSummary} 4 | \alias{getCalibrationSummary} 5 | \title{Get a sparse summary of the calibration} 6 | \usage{ 7 | getCalibrationSummary( 8 | prediction, 9 | predictionType, 10 | typeColumn = "evaluation", 11 | numberOfStrata = 10, 12 | truncateFraction = 0.05 13 | ) 14 | } 15 | \arguments{ 16 | \item{prediction}{A prediction object as generated using the 17 | \code{\link{predict}} functions.} 18 | 19 | \item{predictionType}{The type of prediction (binary or survival)} 20 | 21 | \item{typeColumn}{A column that is used to stratify the results} 22 | 23 | \item{numberOfStrata}{The number of strata in the plot.} 24 | 25 | \item{truncateFraction}{This fraction of probability values will be ignored when plotting, to 26 | avoid the x-axis scale being dominated by a few outliers.} 27 | } 28 | \value{ 29 | A dataframe with the calibration summary 30 | } 31 | \description{ 32 | Get a sparse summary of the calibration 33 | } 34 | \details{ 35 | Generates a sparse summary showing the predicted probabilities and the observed fractions. Predictions are 36 | stratified into equally sized bins of predicted probabilities. 37 | } 38 | \examples{ 39 | # simulate data 40 | data("simulationProfile") 41 | plpData <- simulatePlpData(simulationProfile, n = 500, seed = 42) 42 | # create study population, split into train/test and preprocess with default settings 43 | population <- createStudyPopulation(plpData, outcomeId = 3) 44 | data <- splitData(plpData, population, createDefaultSplitSetting()) 45 | data$Train$covariateData <- preprocessData(data$Train$covariateData) 46 | saveLoc <- file.path(tempdir(), "calibrationSummary") 47 | # fit a lasso logistic regression model using the training data 48 | plpModel <- fitPlp(data$Train, modelSettings=setLassoLogisticRegression(seed=42), 49 | analysisId=1, analysisPath=saveLoc) 50 | calibrationSummary <- getCalibrationSummary(plpModel$prediction, 51 | "binary", 52 | numberOfStrata = 10, 53 | typeColumn = "evaluationType") 54 | calibrationSummary 55 | # clean up 56 | unlink(saveLoc, recursive = TRUE) 57 | } 58 | -------------------------------------------------------------------------------- /vignettes/ClinicalModels.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Clinical Models" 3 | author: "Jenna Reps, Peter R. Rijnbeek" 4 | date: '`r Sys.Date()`' 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteEngine{knitr::rmarkdown} 8 | %\VignetteIndexEntry{Clinical Models} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ## Clinical models developed using the OHDSI PatientLevelPrediction framework 13 | 14 | | Title | Link | 15 | |----------------------|-------| 16 | | Using Machine Learning Applied to Real-World Healthcare Data for Predictive Analytics: An Applied Example in Bariatric Surgery | [Value in Health](https://doi.org/10.1016/j.jval.2019.01.011) | 17 | | Development and validation of a prognostic model predicting symptomatic hemorrhagic transformation in acute ischemic stroke at scale in the OHDSI network | [PLoS One](https://doi.org/10.1371/journal.pone.0226718) | 18 | | Wisdom of the CROUD: development and validation of a patient-level prediction model for opioid use disorder using population-level claims data | [PLoS One](https://doi.org/10.1371/journal.pone.0228632) | 19 | | Developing predictive models to determine Patients in End-of-life Care in Administrative datasets | [Drug Safety](https://doi.org/10.1007/s40264-020-00906-7) | 20 | | Predictors of diagnostic transition from major depressive disorder to bipolar disorder: a retrospective observational network study | [Translational psychiatry](https://doi.org/10.1038/s41398-021-01760-6) | 21 | | Seek COVER: using a disease proxy to rapidly develop and validate a personalized risk calculator for COVID-19 outcomes in an international network | [BMC Medical Research Methodology](https://doi.org/10.1186/s12874-022-01505-z) | 22 | | 90-Day all-cause mortality can be predicted following a total knee replacement: an international, network study to develop and validate a prediction model | [Knee Surgery, Sports Traumatology, Arthroscopy](https://doi.org/10.1007/s00167-021-06799-y) | 23 | | Machine learning and real-world data to predict lung cancer risk in routine care | [Cancer Epidemiology, Biomarkers & Prevention](https://doi.org/10.1158/1055-9965.EPI-22-0873) | 24 | | Development and validation of a patient-level model to predict dementia across a network of observational databases | [BMC medicine](https://doi.org/10.1186/s12916-024-03530-9) | 25 | -------------------------------------------------------------------------------- /man/extractDatabaseToCsv.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/SaveLoadPlp.R 3 | \name{extractDatabaseToCsv} 4 | \alias{extractDatabaseToCsv} 5 | \title{Exports all the results from a database into csv files} 6 | \usage{ 7 | extractDatabaseToCsv( 8 | conn = NULL, 9 | connectionDetails, 10 | databaseSchemaSettings = createDatabaseSchemaSettings(resultSchema = "main"), 11 | csvFolder, 12 | minCellCount = 5, 13 | sensitiveColumns = getPlpSensitiveColumns(), 14 | fileAppend = NULL 15 | ) 16 | } 17 | \arguments{ 18 | \item{conn}{The connection to the database with the results} 19 | 20 | \item{connectionDetails}{The connectionDetails for the result database} 21 | 22 | \item{databaseSchemaSettings}{The result database schema settings} 23 | 24 | \item{csvFolder}{Location to save the csv files} 25 | 26 | \item{minCellCount}{The min value to show in cells that are sensitive (values less than this value will be replaced with -1)} 27 | 28 | \item{sensitiveColumns}{A named list (name of table columns belong to) with a list of columns to apply the minCellCount to.} 29 | 30 | \item{fileAppend}{If set to a string this will be appended to the start of the csv file names} 31 | } 32 | \value{ 33 | The directory path where the results were saved 34 | } 35 | \description{ 36 | Exports all the results from a database into csv files 37 | } 38 | \details{ 39 | Extracts the results from a database into a set of csv files 40 | } 41 | \examples{ 42 | \donttest{ \dontshow{ # takes too long } 43 | # develop a simple model on simulated data 44 | data("simulationProfile") 45 | plpData <- simulatePlpData(simulationProfile, n = 500, seed = 42) 46 | saveLoc <- file.path(tempdir(), "extractDatabaseToCsv") 47 | results <- runPlp(plpData, outcomeId = 3, saveDirectory = saveLoc) 48 | # now upload the results to a sqlite database 49 | databasePath <- insertResultsToSqlite(saveLoc) 50 | # now extract the results to csv 51 | connectionDetails <- 52 | DatabaseConnector::createConnectionDetails(dbms = "sqlite", 53 | server = databasePath) 54 | extractDatabaseToCsv( 55 | connectionDetails = connectionDetails, 56 | csvFolder = file.path(saveLoc, "csv") 57 | ) 58 | # show csv file 59 | list.files(file.path(saveLoc, "csv")) 60 | # clean up 61 | unlink(saveLoc, recursive = TRUE) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /man/insertResultsToSqlite.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/uploadToDatabase.R 3 | \name{insertResultsToSqlite} 4 | \alias{insertResultsToSqlite} 5 | \title{Create sqlite database with the results} 6 | \usage{ 7 | insertResultsToSqlite( 8 | resultLocation, 9 | cohortDefinitions = NULL, 10 | databaseList = NULL, 11 | sqliteLocation = file.path(resultLocation, "sqlite"), 12 | skipDiagnostics = FALSE 13 | ) 14 | } 15 | \arguments{ 16 | \item{resultLocation}{(string) location of directory where the main package results were saved} 17 | 18 | \item{cohortDefinitions}{A set of one or more cohorts extracted using ROhdsiWebApi::exportCohortDefinitionSet()} 19 | 20 | \item{databaseList}{A list created by \code{createDatabaseList} to specify the databases} 21 | 22 | \item{sqliteLocation}{(string) location of directory where the sqlite database will be saved} 23 | 24 | \item{skipDiagnostics}{Whether to skip uploading the diagnostics} 25 | } 26 | \value{ 27 | Returns the location of the sqlite database file 28 | } 29 | \description{ 30 | This function create an sqlite database with the PLP result schema and inserts all results 31 | } 32 | \details{ 33 | This function can be used upload PatientLevelPrediction results into an sqlite database 34 | } 35 | \examples{ 36 | \dontshow{if (rlang::is_installed("RSQLite") && rlang::is_installed("Eunomia") && rlang::is_installed("curl") && curl::has_internet()) withAutoprint(\{ # examplesIf} 37 | \donttest{ \dontshow{ # takes too long } 38 | plpData <- getEunomiaPlpData() 39 | saveLoc <- file.path(tempdir(), "insertResultsToSqlite") 40 | results <- runPlp(plpData, outcomeId = 3, analysisId = 1, saveDirectory = saveLoc) 41 | databaseFile <- insertResultsToSqlite(saveLoc, cohortDefinitions = NULL, 42 | sqliteLocation = file.path(saveLoc, "sqlite")) 43 | # check there is some data in the database 44 | library(DatabaseConnector) 45 | connectionDetails <- createConnectionDetails( 46 | dbms = "sqlite", 47 | server = databaseFile) 48 | conn <- connect(connectionDetails) 49 | # All tables should be created 50 | getTableNames(conn, databaseSchema = "main") 51 | # There is data in the tables 52 | querySql(conn, "SELECT * FROM main.model_designs limit 10") 53 | # clean up 54 | unlink(saveLoc, recursive = TRUE) 55 | } 56 | \dontshow{\}) # examplesIf} 57 | } 58 | -------------------------------------------------------------------------------- /man/recalibratePlp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Recalibration.R 3 | \name{recalibratePlp} 4 | \alias{recalibratePlp} 5 | \title{recalibratePlp} 6 | \usage{ 7 | recalibratePlp( 8 | prediction, 9 | analysisId, 10 | typeColumn = "evaluationType", 11 | method = c("recalibrationInTheLarge", "weakRecalibration") 12 | ) 13 | } 14 | \arguments{ 15 | \item{prediction}{A prediction dataframe} 16 | 17 | \item{analysisId}{The model analysisId} 18 | 19 | \item{typeColumn}{The column name where the strata types are specified} 20 | 21 | \item{method}{Method used to recalibrate ('recalibrationInTheLarge' or 'weakRecalibration' )} 22 | } 23 | \value{ 24 | A prediction dataframe with the recalibrated predictions added 25 | } 26 | \description{ 27 | Recalibrating a model using the recalibrationInTheLarge or weakRecalibration methods 28 | } 29 | \details{ 30 | 'recalibrationInTheLarge' calculates a single correction factor for the 31 | average predicted risks to match the average observed risks. 32 | 'weakRecalibration' fits a glm model to the logit of the predicted risks, 33 | also known as Platt scaling/logistic recalibration. 34 | } 35 | \examples{ 36 | prediction <- data.frame(rowId = 1:100, 37 | value = runif(100), 38 | outcomeCount = stats::rbinom(100, 1, 0.1), 39 | evaluationType = rep("validation", 100)) 40 | attr(prediction, "metaData") <- list(modelType = "binary") 41 | # since value is unformally distributed but outcomeCount is not (prob <- 0.1) 42 | # the predictions are mis-calibrated 43 | outcomeRate <- mean(prediction$outcomeCount) 44 | observedRisk <- mean(prediction$value) 45 | message("outcome rate is: ", outcomeRate) 46 | message("observed risk is: ", observedRisk) 47 | # lets recalibrate the predictions 48 | prediction <- recalibratePlp(prediction, 49 | analysisId = "recalibration", 50 | method = "recalibrationInTheLarge") 51 | recalibratedRisk <- mean(prediction$value) 52 | message("recalibrated risk with recalibration in the large is: ", recalibratedRisk) 53 | prediction <- recalibratePlp(prediction, 54 | analysisId = "recalibration", 55 | method = "weakRecalibration") 56 | recalibratedRisk <- mean(prediction$value) 57 | message("recalibrated risk with weak recalibration is: ", recalibratedRisk) 58 | } 59 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: PatientLevelPrediction 2 | Type: Package 3 | Title: Develop Clinical Prediction Models Using the Common Data Model 4 | Version: 6.5.1 5 | Date: 2025-10-14 6 | Authors@R: c( 7 | person("Egill", "Fridgeirsson", email = "e.fridgeirsson@erasmusmc.nl", role = c("aut", "cre")), 8 | person("Jenna", "Reps", email = "jreps@its.jnj.com", role = c("aut")), 9 | person("Martijn", "Schuemie", role = c("aut")), 10 | person("Marc", "Suchard", role = c("aut")), 11 | person("Patrick", "Ryan", role = c("aut")), 12 | person("Peter", "Rijnbeek", role = c("aut")), 13 | person("Observational Health Data Science and Informatics", role = c("cph"))) 14 | Description: A user friendly way to create patient level prediction models using 15 | the Observational Medical Outcomes Partnership Common Data Model. Given a cohort 16 | of interest and an outcome of interest, the package can use data in the Common 17 | Data Model to build a large set of features. These features can then be used to 18 | fit a predictive model with a number of machine learning algorithms. This is 19 | further described in Reps (2017) . 20 | License: Apache License 2.0 21 | URL: https://ohdsi.github.io/PatientLevelPrediction/, https://github.com/OHDSI/PatientLevelPrediction 22 | BugReports: https://github.com/OHDSI/PatientLevelPrediction/issues 23 | VignetteBuilder: knitr 24 | Depends: 25 | R (>= 4.0.0) 26 | Imports: 27 | Andromeda, 28 | Cyclops (>= 3.0.0), 29 | DatabaseConnector (>= 6.0.0), 30 | digest, 31 | dplyr, 32 | FeatureExtraction (>= 3.0.0), 33 | Matrix, 34 | memuse, 35 | ParallelLogger (>= 2.0.0), 36 | pROC, 37 | PRROC, 38 | rlang, 39 | SqlRender (>= 1.1.3), 40 | tidyr, 41 | utils 42 | Suggests: 43 | curl, 44 | Eunomia (>= 2.0.0), 45 | glmnet, 46 | ggplot2, 47 | gridExtra, 48 | IterativeHardThresholding, 49 | knitr, 50 | lightgbm, 51 | Metrics, 52 | mgcv, 53 | OhdsiShinyAppBuilder (>= 1.0.0), 54 | parallel, 55 | pkgload, 56 | polspline, 57 | readr, 58 | ResourceSelection, 59 | ResultModelManager (>= 0.6.0), 60 | reticulate (>= 1.41), 61 | rmarkdown, 62 | RSQLite, 63 | scoring, 64 | survival, 65 | survminer, 66 | testthat, 67 | withr, 68 | xgboost (> 1.3.2.1) 69 | RoxygenNote: 7.3.3 70 | Encoding: UTF-8 71 | Config/testthat/edition: 3 72 | Roxygen: list(markdown = TRUE) 73 | -------------------------------------------------------------------------------- /man/validateExternal.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ExternalValidatePlp.R 3 | \name{validateExternal} 4 | \alias{validateExternal} 5 | \title{validateExternal - Validate model performance on new data} 6 | \usage{ 7 | validateExternal( 8 | validationDesignList, 9 | databaseDetails, 10 | logSettings = createLogSettings(verbosity = "INFO", logName = "validatePLP"), 11 | outputFolder, 12 | cohortDefinitions = NULL 13 | ) 14 | } 15 | \arguments{ 16 | \item{validationDesignList}{A list of objects created with \code{createValidationDesign}} 17 | 18 | \item{databaseDetails}{A list of objects of class 19 | \code{databaseDetails} created using \code{createDatabaseDetails}} 20 | 21 | \item{logSettings}{An object of \code{logSettings} created 22 | using \code{createLogSettings}} 23 | 24 | \item{outputFolder}{The directory to save the validation results to} 25 | 26 | \item{cohortDefinitions}{A cohortDefinitionSet object created with 27 | \code{CohortGenerator} 28 | (subfolders are created per database in validationDatabaseDetails)} 29 | } 30 | \value{ 31 | A list of results 32 | } 33 | \description{ 34 | validateExternal - Validate model performance on new data 35 | } 36 | \examples{ 37 | \dontshow{if (rlang::is_installed("Eunomia") && rlang::is_installed("curl") && curl::has_internet()) withAutoprint(\{ # examplesIf} 38 | \donttest{ \dontshow{ # takes too long } 39 | data("simulationProfile") 40 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 41 | # first fit a model on some data, default is a L1 logistic regression 42 | saveLoc <- file.path(tempdir(), "development") 43 | results <- runPlp(plpData, saveDirectory = saveLoc) 44 | # then create my validation design 45 | validationDesign <- createValidationDesign(1, 3, plpModelList = list(results$model)) 46 | # I will validate on Eunomia example database 47 | connectionDetails <- Eunomia::getEunomiaConnectionDetails() 48 | Eunomia::createCohorts(connectionDetails) 49 | databaseDetails <- createDatabaseDetails(connectionDetails = connectionDetails, 50 | cdmDatabaseSchema = "main", cdmDatabaseName = "Eunomia", cdmDatabaseId = 1, 51 | targetId = 1, outcomeIds = 3) 52 | path <- file.path(tempdir(), "validation") 53 | validateExternal(validationDesign, databaseDetails, outputFolder = path) 54 | # see generated result files 55 | dir(path, recursive = TRUE) 56 | # clean up 57 | unlink(saveLoc, recursive = TRUE) 58 | unlink(path, recursive = TRUE) 59 | } 60 | \dontshow{\}) # examplesIf} 61 | } 62 | -------------------------------------------------------------------------------- /tests/testthat/test-demographicSummary.R: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Observational Health Data Sciences and Informatics 2 | # 3 | # This file is part of PatientLevelPrediction 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | test_that("getDemographicSummary", { 18 | prediction <- data.frame( 19 | rowId = 1:100, 20 | ageYear = sample(100, 100, replace = TRUE), 21 | gender = sample(c(8507, "female"), 100, replace = TRUE), 22 | value = runif(100), 23 | outcomeCount = round(runif(100)), 24 | evaluation = rep("Test", 100) 25 | ) 26 | 27 | demoSum <- getDemographicSummary( 28 | prediction = prediction, 29 | predictionType = "binary", 30 | typeColumn = "evaluation" 31 | ) 32 | 33 | expect_equal(ncol(demoSum), 12) 34 | expect_true("evaluation" %in% colnames(demoSum)) 35 | 36 | # check correct gender length 37 | expect_equal(length(unique(prediction$gender)), length(unique(demoSum$genGroup))) 38 | 39 | 40 | demoSumBin <- getDemographicSummary_binary( 41 | prediction = prediction, 42 | evalColumn = "evaluation" 43 | ) 44 | expect_equal(demoSum, demoSumBin) 45 | }) 46 | 47 | 48 | test_that("getDemographicSummary", { 49 | prediction <- data.frame( 50 | rowId = 1:100, 51 | ageYear = sample(100, 100, replace = TRUE), 52 | gender = sample(c(8507, "female"), 100, replace = TRUE), 53 | value = runif(100), 54 | outcomeCount = round(runif(100)), 55 | evaluation = rep("Test", 100), 56 | survivalTime = 50 + sample(730, 100, replace = TRUE) 57 | ) 58 | 59 | demoSumSurv <- getDemographicSummary_survival( 60 | prediction = prediction, 61 | evalColumn = "evaluation", 62 | timepoint = 365 63 | ) 64 | 65 | expect_s3_class(demoSumSurv, "data.frame") 66 | expect_equal(ncol(demoSumSurv), 8) 67 | expect_true("evaluation" %in% colnames(demoSumSurv)) 68 | 69 | # check correct gender length 70 | expect_equal(length(unique(prediction$gender)), length(unique(demoSumSurv$genGroup))) 71 | }) 72 | -------------------------------------------------------------------------------- /demo/LearningCurveDemo.R: -------------------------------------------------------------------------------- 1 | library(PatientLevelPrediction) 2 | 3 | # This demo will generate a learning curve using 8 training set sizes 4 | # Dependent on your system it can take some time to run 5 | # If you have multiple cores we suggest to use them 6 | 7 | selection <- readline(prompt="Would you like to demo the parallel version (y/n):") 8 | 9 | # Generate simulated plpData 10 | data(plpDataSimulationProfile) 11 | set.seed(1234) 12 | sampleSize <- 12000 13 | plpData <- simulatePlpData( 14 | plpDataSimulationProfile, 15 | n = sampleSize 16 | ) 17 | 18 | # Create the study population 19 | populationSettings <- createStudyPopulationSettings( 20 | binary = TRUE, 21 | firstExposureOnly = FALSE, 22 | washoutPeriod = 0, 23 | removeSubjectsWithPriorOutcome = FALSE, 24 | priorOutcomeLookback = 99999, 25 | requireTimeAtRisk = TRUE, 26 | minTimeAtRisk = 0, 27 | riskWindowStart = 0, 28 | startAnchor = 'cohort start', 29 | riskWindowEnd = 365, 30 | endAnchor = 'cohort start' 31 | ) 32 | 33 | # Specify the prediction algorithm to be used 34 | modelSettings <- setLassoLogisticRegression() 35 | 36 | # Specify a test fraction and a sequence of training set fractions 37 | splitSettings <- createDefaultSplitSetting( 38 | testFraction = 0.2, 39 | type = 'stratified' 40 | ) 41 | trainEvents <- seq(100, 800, 100) 42 | 43 | 44 | # Create the learning curve object 45 | if (selection != "y" && 46 | selection != "Y") { 47 | learningCurve <- createLearningCurve( 48 | plpData = plpData, 49 | outcomeId = 2, 50 | analysisId = 'learningCurveDemo', 51 | parallel = F, 52 | cores = 4, 53 | modelSettings = modelSettings, 54 | populationSettings = populationSettings, 55 | splitSettings = splitSettings, 56 | trainEvents = trainEvents, 57 | saveDirectory = './learningCurve' 58 | ) 59 | 60 | } else { 61 | # create a learning curve object in parallel 62 | learningCurve <- createLearningCurve( 63 | plpData = plpData, 64 | outcomeId = 2, 65 | analysisId = 'learningCurveDemo', 66 | parallel = T, 67 | cores = 4, 68 | modelSettings = modelSettings, 69 | populationSettings = populationSettings, 70 | splitSettings = splitSettings, 71 | trainEvents = trainEvents, 72 | saveDirectory = './learningCurve' 73 | ) 74 | 75 | } 76 | 77 | # plot the learning curve 78 | plotLearningCurve( 79 | learningCurve, 80 | metric = "AUROC", 81 | abscissa = "events", 82 | plotTitle = "Learning Curve Parallel", 83 | plotSubtitle = "AUROC performance" 84 | ) 85 | -------------------------------------------------------------------------------- /man/getEunomiaPlpData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ExtractData.R 3 | \name{getEunomiaPlpData} 4 | \alias{getEunomiaPlpData} 5 | \title{Create a plpData object from the Eunomia database'} 6 | \usage{ 7 | getEunomiaPlpData(covariateSettings = NULL) 8 | } 9 | \arguments{ 10 | \item{covariateSettings}{A list of covariateSettings objects created using the 11 | \code{createCovariateSettings} function in the \code{FeatureExtraction} package. 12 | If nothing is specified covariates with age, gender, conditions and drug era are used.} 13 | } 14 | \value{ 15 | An object of type \code{plpData}, containing information on the cohorts, their 16 | outcomes, and baseline covariates. Information about multiple outcomes can be 17 | captured at once for efficiency reasons. This object is a list with the 18 | following components: \describe{ \item{outcomes}{A data frame listing the 19 | outcomes per person, including the time to event, and the outcome id} 20 | \item{cohorts}{A data frame listing the persons in each cohort, listing their 21 | exposure status as well as the time to the end of the observation period and 22 | time to the end of the cohort} \item{covariateData}{An Andromeda object created 23 | with the \code{FeatureExtraction} package. This object contains the following items: 24 | \describe{ \item{covariates}{An Andromeda table listing the covariates per 25 | person in the two cohorts. This is done using a sparse representation: 26 | covariates with a value of 0 are omitted to save space. Usually has three 27 | columns, rowId, covariateId and covariateValue'.} \item{covariateRef}{An 28 | Andromeda table describing the covariates that have been extracted.} 29 | \item{AnalysisRef}{An Andromeda table with information about which analysisIds 30 | from 'FeatureExtraction' were used.} }}} 31 | } 32 | \description{ 33 | This function creates a plpData object from the Eunomia database. It gets 34 | the connection details, creates the cohorts, and extracts the data. The cohort 35 | is predicting GIbleed in new users of celecoxib. 36 | } 37 | \examples{ 38 | \dontshow{if (rlang::is_installed("Eunomia") && rlang::is_installed("curl") && curl::has_internet()) withAutoprint(\{ # examplesIf} 39 | \donttest{ \dontshow{ # takes too long } 40 | covariateSettings <- FeatureExtraction::createCovariateSettings( 41 | useDemographicsAge = TRUE, 42 | useDemographicsGender = TRUE, 43 | useConditionOccurrenceAnyTimePrior = TRUE 44 | ) 45 | plpData <- getEunomiaPlpData(covariateSettings = covariateSettings) 46 | } 47 | \dontshow{\}) # examplesIf} 48 | } 49 | -------------------------------------------------------------------------------- /demo/EnsembleModelDemo.R: -------------------------------------------------------------------------------- 1 | library(PatientLevelPrediction) 2 | 3 | # This demo will generate a stacked ensemble consisting 4 | # of a Logistic Regression and Random Forest model. 5 | # Dependent on your system it can take some time to run 6 | 7 | # We first simulate some data 8 | cat("Press a key to continue") 9 | invisible(readline()) 10 | 11 | # Simulate plpData 12 | data(plpDataSimulationProfile) 13 | set.seed(1234) 14 | sampleSize <- 2000 15 | plpData <- simulatePlpData( 16 | plpDataSimulationProfile, 17 | n = sampleSize, 18 | seed = 42 19 | ) 20 | 21 | # Generate the study population 22 | populationSettings <- createStudyPopulationSettings( 23 | binary = TRUE, 24 | firstExposureOnly = FALSE, 25 | washoutPeriod = 0, 26 | removeSubjectsWithPriorOutcome = FALSE, 27 | priorOutcomeLookback = 99999, 28 | requireTimeAtRisk = TRUE, 29 | minTimeAtRisk = 0, 30 | riskWindowStart = 0, 31 | startAnchor = 'cohort start', 32 | riskWindowEnd = 365, 33 | endAnchor = 'cohort start' 34 | ) 35 | 36 | # Let's set the models and model building parameters 37 | cat("Press a key to continue") 38 | invisible(readline()) 39 | 40 | # Use LASSO logistic regression and Random Forest as base predictors 41 | model1 <- setLassoLogisticRegression() 42 | model2 <- setRandomForest() 43 | 44 | # Specify the spilt settings 45 | splitSettings <- createDefaultSplitSetting( 46 | testFraction = 0.2, 47 | nfold = 4, 48 | splitSeed = 100 # this makes sure same split is done 49 | ) 50 | 51 | # Specify the ensemble strategy 52 | ensembleStrategy <- 'stacked' 53 | 54 | # Now we build the stacked ensemble 55 | cat("Press a key to continue") 56 | invisible(readline()) 57 | ensembleResults <- runEnsembleModel( 58 | ensembleStrategy = ensembleStrategy, 59 | parallel = T, 60 | maxCores = 2, 61 | dataList = list( 62 | plpData, 63 | plpData 64 | ), 65 | outcomeIds = list(2,2), 66 | populationSettings = list( 67 | populationSettings, 68 | populationSettings 69 | ), 70 | sampleSettings = list( 71 | createSampleSettings(), 72 | createSampleSettings() 73 | ), 74 | featureEngineeringSettings = list( 75 | createFeatureEngineeringSettings(), 76 | createFeatureEngineeringSettings() 77 | ), 78 | preprocessSettings = list( 79 | createPreprocessSettings(), 80 | createPreprocessSettings() 81 | ), 82 | modelList = list( 83 | model1, 84 | model2 85 | ), 86 | splitSettings = splitSettings 87 | ) 88 | 89 | # You could now save the model and apply it on other data as described in more detail 90 | # in the vignette. 91 | -------------------------------------------------------------------------------- /man/createCohortCovariateSettings.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/AdditionalCovariates.R 3 | \name{createCohortCovariateSettings} 4 | \alias{createCohortCovariateSettings} 5 | \title{Extracts covariates based on cohorts} 6 | \usage{ 7 | createCohortCovariateSettings( 8 | cohortName, 9 | settingId, 10 | cohortDatabaseSchema = NULL, 11 | cohortTable = NULL, 12 | cohortId, 13 | startDay = -30, 14 | endDay = 0, 15 | count = FALSE, 16 | ageInteraction = FALSE, 17 | lnAgeInteraction = FALSE, 18 | analysisId = 456 19 | ) 20 | } 21 | \arguments{ 22 | \item{cohortName}{Name for the cohort} 23 | 24 | \item{settingId}{A unique id for the covariate time and} 25 | 26 | \item{cohortDatabaseSchema}{The schema of the database with the cohort. If 27 | nothing is specified then the cohortDatabaseSchema from databaseDetails at runtime is used.} 28 | 29 | \item{cohortTable}{the table name that contains the covariate cohort. If 30 | nothing is specified then the cohortTable from databaseDetails at runtime is used.} 31 | 32 | \item{cohortId}{cohort id for the covariate cohort} 33 | 34 | \item{startDay}{The number of days prior to index to start observing the cohort} 35 | 36 | \item{endDay}{The number of days prior to index to stop observing the cohort} 37 | 38 | \item{count}{If FALSE the covariate value is binary (1 means cohort occurred between index+startDay and index+endDay, 0 means it did not) 39 | If TRUE then the covariate value is the number of unique cohort_start_dates between index+startDay and index+endDay} 40 | 41 | \item{ageInteraction}{If TRUE multiple covariate value by the patient's age in years} 42 | 43 | \item{lnAgeInteraction}{If TRUE multiple covariate value by the log of the patient's age in years} 44 | 45 | \item{analysisId}{The analysisId for the covariate} 46 | } 47 | \value{ 48 | An object of class \code{covariateSettings} specifying how to create the cohort covariate with the covariateId 49 | cohortId x 100000 + settingId x 1000 + analysisId 50 | } 51 | \description{ 52 | Extracts covariates based on cohorts 53 | } 54 | \details{ 55 | The user specifies a cohort and time period and then a covariate is constructed whether they are in the 56 | cohort during the time periods relative to target population cohort index 57 | } 58 | \examples{ 59 | createCohortCovariateSettings(cohortName="testCohort", 60 | settingId=1, 61 | cohortId=1, 62 | cohortDatabaseSchema="cohorts", 63 | cohortTable="cohort_table") 64 | } 65 | -------------------------------------------------------------------------------- /man/splitData.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DataSplitting.R 3 | \name{splitData} 4 | \alias{splitData} 5 | \title{Split the plpData into test/train sets using a splitting settings of class 6 | \code{splitSettings}} 7 | \usage{ 8 | splitData( 9 | plpData = plpData, 10 | population = population, 11 | splitSettings = createDefaultSplitSetting(splitSeed = 42) 12 | ) 13 | } 14 | \arguments{ 15 | \item{plpData}{An object of type \code{plpData} - the patient level 16 | prediction data extracted from the CDM.} 17 | 18 | \item{population}{The population created using \code{createStudyPopulation} 19 | that define who will be used to develop the model} 20 | 21 | \item{splitSettings}{An object of type \code{splitSettings} specifying the 22 | split - the default can be created using \code{createDefaultSplitSetting}} 23 | } 24 | \value{ 25 | Returns a list containing the training data (Train) and optionally the test 26 | data (Test). Train is an Andromeda object containing 27 | \itemize{\item covariates: a table (rowId, covariateId, covariateValue) 28 | containing the covariates for each data point in the train data 29 | \item covariateRef: a table with the covariate information 30 | \item labels: a table (rowId, outcomeCount, ...) for each data point 31 | in the train data (outcomeCount is the class label) 32 | \item folds: a table (rowId, index) specifying which training 33 | fold each data point is in. 34 | } 35 | Test is an Andromeda object containing 36 | \itemize{\item covariates: a table (rowId, covariateId, covariateValue) 37 | containing the covariates for each data point in the test data 38 | \item covariateRef: a table with the covariate information 39 | \item labels: a table (rowId, outcomeCount, ...) for each data 40 | point in the test data (outcomeCount is the class label) 41 | } 42 | } 43 | \description{ 44 | Split the plpData into test/train sets using a splitting settings of class 45 | \code{splitSettings} 46 | } 47 | \examples{ 48 | data("simulationProfile") 49 | plpData <- simulatePlpData(simulationProfile, n = 1000, seed = 42) 50 | population <- createStudyPopulation(plpData) 51 | splitSettings <- createDefaultSplitSetting(testFraction = 0.50, 52 | trainFraction = 0.50, nfold = 5) 53 | data = splitData(plpData, population, splitSettings) 54 | # test data should be ~500 rows (changes because of study population) 55 | nrow(data$Test$labels) 56 | # train data should be ~500 rows 57 | nrow(data$Train$labels) 58 | # should be five fold in the train data 59 | length(unique(data$Train$folds$index)) 60 | } 61 | --------------------------------------------------------------------------------