├── cleanup ├── src ├── Makevars.win ├── Makevars.in ├── nativeInfo.h ├── rfsrc.h ├── error.h ├── sexpOutgoing.h ├── entryGeneric.h ├── stackParallel.h ├── splitRegr.h ├── treeJIT.h ├── factor.h ├── snpAuxiliaryInfo.h ├── splitMahalanobis.h ├── processEnsemble.h ├── splitClas.h ├── distance.h ├── marginal.h ├── cindex.h ├── polarity.h ├── factorOps.h ├── bootstrap.h ├── error.c ├── sortedLink.h ├── diagnostic.h ├── splitQuantile.h ├── svdUtil.h ├── importanceAnti.h ├── importanceRand.h ├── nodeOps.h ├── nativeUtil.h ├── splitCustomDriver.h ├── splitSurv.h ├── splitInfo.h ├── tree.h ├── treeUtil.h ├── partial.h ├── sampling.h ├── terminal.h ├── importancePerm.h ├── quantile.h ├── survival.h ├── stackOutputQQ.h ├── random.h ├── leafLink.h ├── sources.list ├── importance.h ├── node.h ├── stackPreDefined.h ├── trace.h ├── R_init_randomForestSRC.c ├── polarity.c ├── stackOutput.h ├── split.h ├── regression.h ├── splitGreedy.h ├── splitMult.h ├── survivalE.h ├── trace.c ├── termOps.h ├── stack.h ├── leafLink.c ├── classification.h ├── sexpOutgoing.c ├── sortedLink.c ├── rfsrcUtil.h ├── entry.h ├── impute.h ├── distance.c ├── cindex.c ├── nrutil.h ├── splitUtil.h ├── splitCustom.h └── random.c ├── data ├── hd.rda ├── pbc.rda ├── vdv.rda ├── wihs.rda ├── wine.rda ├── breast.rda ├── follic.rda ├── housing.rda ├── peakVO2.rda ├── veteran.rda └── nutrigenomic.rda ├── R ├── rfsrc.news.R ├── rfsrc.cart.R ├── utilities_internal.R ├── zzz.R ├── utilities_impute.R ├── predict.rfsrc.R ├── plot.competing.risk.rfsrc.R ├── rfsrc.anonymous.R ├── utilities_varselect.R ├── generic.impute.rfsrc.R ├── vimp.rfsrc.R ├── distance.R ├── rfsrc.fast.R ├── utilities_quantreg.R ├── plot.quantreg.rfsrc.R ├── imbalanced.rfsrc.R └── utilities_saveload.R ├── man ├── rfsrc.news.Rd ├── veteran.Rd ├── vdv.Rd ├── wine.Rd ├── pbc.Rd ├── follic.Rd ├── hd.Rd ├── breast.Rd ├── print.rfsrc.Rd ├── housing.Rd ├── utilities_internal.Rd ├── plot.quantreg.rfsrc.Rd ├── wihs.Rd ├── peakVO2.Rd ├── plot.competing.risk.rfsrc.Rd ├── plot.rfsrc.Rd ├── plot.subsample.rfsrc.Rd ├── nutrigenomic.Rd ├── plot.survival.rfsrc.Rd ├── rfsrc.fast.Rd └── rfsrc.anonymous.Rd ├── DESCRIPTION ├── inst └── CITATION ├── configure.ac └── NAMESPACE /cleanup: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm -rf config.* src/Makevars 4 | -------------------------------------------------------------------------------- /src/Makevars.win: -------------------------------------------------------------------------------- 1 | PKG_CFLAGS = -fopenmp 2 | PKG_LIBS = -fopenmp 3 | -------------------------------------------------------------------------------- /data/hd.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kogalur/randomForestSRC/HEAD/data/hd.rda -------------------------------------------------------------------------------- /data/pbc.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kogalur/randomForestSRC/HEAD/data/pbc.rda -------------------------------------------------------------------------------- /data/vdv.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kogalur/randomForestSRC/HEAD/data/vdv.rda -------------------------------------------------------------------------------- /src/Makevars.in: -------------------------------------------------------------------------------- 1 | PKG_CFLAGS = @OPENMP_CFLAGS@ 2 | PKG_LIBS = @OPENMP_CFLAGS@ 3 | -------------------------------------------------------------------------------- /data/wihs.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kogalur/randomForestSRC/HEAD/data/wihs.rda -------------------------------------------------------------------------------- /data/wine.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kogalur/randomForestSRC/HEAD/data/wine.rda -------------------------------------------------------------------------------- /src/nativeInfo.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_NATIVE_INFO_H 2 | #define RF_NATIVE_INFO_H 3 | #endif 4 | -------------------------------------------------------------------------------- /data/breast.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kogalur/randomForestSRC/HEAD/data/breast.rda -------------------------------------------------------------------------------- /data/follic.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kogalur/randomForestSRC/HEAD/data/follic.rda -------------------------------------------------------------------------------- /data/housing.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kogalur/randomForestSRC/HEAD/data/housing.rda -------------------------------------------------------------------------------- /data/peakVO2.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kogalur/randomForestSRC/HEAD/data/peakVO2.rda -------------------------------------------------------------------------------- /data/veteran.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kogalur/randomForestSRC/HEAD/data/veteran.rda -------------------------------------------------------------------------------- /data/nutrigenomic.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kogalur/randomForestSRC/HEAD/data/nutrigenomic.rda -------------------------------------------------------------------------------- /src/rfsrc.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_RFSRC_H 2 | #define RF_RFSRC_H 3 | void rfsrc(char mode, int seedValue); 4 | #endif 5 | -------------------------------------------------------------------------------- /src/error.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_ERROR_H 2 | #define RF_ERROR_H 3 | void exit2R(void); 4 | void printR(char *format, ...); 5 | #endif 6 | -------------------------------------------------------------------------------- /src/sexpOutgoing.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SEXP_OUTGOING_H 2 | #define RF_SEXP_OUTGOING_H 3 | extern char *RF_sexpString[]; 4 | #endif 5 | -------------------------------------------------------------------------------- /R/rfsrc.news.R: -------------------------------------------------------------------------------- 1 | rfsrc.news <- function(...) { 2 | newsfile <- file.path(system.file(package="randomForestSRC"), "NEWS") 3 | file.show(newsfile) 4 | } 5 | -------------------------------------------------------------------------------- /src/entryGeneric.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_ENTRY_GENERIC_H 2 | #define RF_ENTRY_GENERIC_H 3 | void processDefaultGrow(void); 4 | void processDefaultPredict(void); 5 | #endif 6 | -------------------------------------------------------------------------------- /src/stackParallel.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_STACK_PARALLEL_H 2 | #define RF_STACK_PARALLEL_H 3 | void stackLocksOpenMP(char mode); 4 | void unstackLocksOpenMP(char mode); 5 | #endif 6 | -------------------------------------------------------------------------------- /R/rfsrc.cart.R: -------------------------------------------------------------------------------- 1 | rfsrc.cart <- function(formula, data, ntree = 1, mtry = ncol(data), bootstrap = "none", nsplit = 0, ...) 2 | { 3 | rfsrc(formula, data, ntree = ntree, mtry = mtry, bootstrap = bootstrap, nsplit = 0, ...) 4 | } 5 | -------------------------------------------------------------------------------- /src/splitRegr.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SPLIT_REGR_H 2 | #define RF_SPLIT_REGR_H 3 | #include "node.h" 4 | #include "splitInfo.h" 5 | char regressionXwghtSplitCur (uint treeID, Node *parent, SplitInfoMax *splitInfoMax, GreedyObj *greedyMembr, char multImpFlag); 6 | #endif 7 | -------------------------------------------------------------------------------- /src/treeJIT.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_TREE_JIT_H 2 | #define RF_TREE_JIT_H 3 | #include "node.h" 4 | #include "terminal.h" 5 | void acquireTreeJIT(char mode, uint r, uint treeID); 6 | void restoreTerminalNodeJIT(uint treeID, Node *root, uint indv, double **xArray, Terminal **termMembership); 7 | #endif 8 | -------------------------------------------------------------------------------- /R/utilities_internal.R: -------------------------------------------------------------------------------- 1 | # internal-utils.R -- Dummy file for documentation linkage 2 | # This file does not define actual functions. It exists to tie together 3 | # the manual documentation (man/internal-utils.Rd) with the exported names, 4 | # preventing \"Undocumented object\" warnings during R CMD check. 5 | NULL 6 | -------------------------------------------------------------------------------- /src/factor.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_FACTOR_H 2 | #define RF_FACTOR_H 3 | typedef struct factor Factor; 4 | struct factor { 5 | unsigned int r; 6 | unsigned int cardinalGroupCount; 7 | void *complementaryPairCount; 8 | void *cardinalGroupSize; 9 | unsigned int **cardinalGroupBinary; 10 | unsigned int mwcpSize; 11 | }; 12 | #endif 13 | -------------------------------------------------------------------------------- /src/snpAuxiliaryInfo.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SNP_AUXILIARY_INFO_H 2 | #define RF_SNP_AUXILIARY_INFO_H 3 | typedef struct snpAuxiliaryInfo SNPAuxiliaryInfo; 4 | struct snpAuxiliaryInfo { 5 | char type; 6 | char *identity; 7 | uint slot; 8 | ulong linearSize; 9 | void *snpPtr; 10 | void *auxiliaryArrayPtr; 11 | uint dimSize; 12 | int *dim; 13 | }; 14 | #endif 15 | -------------------------------------------------------------------------------- /src/splitMahalanobis.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SPLIT_MAHALANOBIS_H 2 | #define RF_SPLIT_MAHALANOBIS_H 3 | #include "node.h" 4 | #include "splitInfo.h" 5 | char mahalanobis (uint treeID, 6 | Node *parent, 7 | SplitInfoMax *splitInfoMax, 8 | GreedyObj *greedyMembr, 9 | char multImpFlag); 10 | #endif 11 | -------------------------------------------------------------------------------- /src/processEnsemble.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_PROCESS_ENSEMBLE_H 2 | #define RF_PROCESS_ENSEMBLE_H 3 | void stackFactorInSitu(uint treeID); 4 | void unstackFactorInSitu(uint treeID); 5 | void processEnsembleInSitu(char mode, char multImpFlag, uint b); 6 | void processEnsemblePost(char mode); 7 | void processEnsembleHoldout(uint xVarIdx, uint b); 8 | void processEnsembleHoldoutPost(uint bb); 9 | #endif 10 | -------------------------------------------------------------------------------- /man/rfsrc.news.Rd: -------------------------------------------------------------------------------- 1 | \name{rfsrc.news} 2 | \alias{rfsrc.news} 3 | \title{Show the NEWS file} 4 | \description{ 5 | Show the NEWS file of the \pkg{randomForestSRC} package. 6 | } 7 | \usage{ 8 | rfsrc.news(...) 9 | } 10 | \arguments{ 11 | \item{...}{Further arguments passed to or from other methods.} 12 | } 13 | \value{ 14 | None. 15 | } 16 | \author{ 17 | Hemant Ishwaran and Udaya B. Kogalur 18 | } 19 | \keyword{documentation} 20 | -------------------------------------------------------------------------------- /src/splitClas.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SPLIT_CLAS_H 2 | #define RF_SPLIT_CLAS_H 3 | char classificationXwghtSplitCur (uint treeID, Node *parent, SplitInfoMax *splitInfoMax, GreedyObj *greedyMembr, char multImpFlag); 4 | char classificationAreaUnderROCSplit (uint treeID, Node *parent, SplitInfoMax *splitInfoMax, GreedyObj *greedyMembr, char multImpFlag); 5 | char classificationEntropySplit (uint treeID, Node *parent, SplitInfoMax *splitInfoMax, GreedyObj *greedyMembr, char multImpFlag); 6 | #endif 7 | -------------------------------------------------------------------------------- /src/distance.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_DISTANCE_H 2 | #define RF_DISTANCE_H 3 | SEXP rfsrcDistance(SEXP sexp_metric, 4 | SEXP sexp_n, 5 | SEXP sexp_p, 6 | SEXP sexp_x, 7 | SEXP sexp_sizeIJ, 8 | SEXP sexp_rowI, 9 | SEXP sexp_rowJ, 10 | SEXP sexp_numThreads, 11 | SEXP sexp_traceFlag); 12 | double euclidean(uint n, uint p, uint i, uint j, double **x); 13 | #endif 14 | -------------------------------------------------------------------------------- /src/marginal.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_MARGINAL_H 2 | #define RF_MARGINAL_H 3 | #include "node.h" 4 | void getMarginalMembership(char mode, uint treeID); 5 | void releaseMarginalMembership(char mode, uint treeID); 6 | void marginalMembership(uint treeID, 7 | Node *parent, 8 | uint *gAllMembrIndx, 9 | uint gAllMembrSize, 10 | uint obsSize, 11 | double **xArray); 12 | #endif 13 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(libname, pkgname) { 2 | rfsrc.version <- read.dcf(file=system.file("DESCRIPTION", package=pkgname), 3 | fields="Version") 4 | packageStartupMessage(paste("\n", 5 | pkgname, 6 | rfsrc.version, 7 | "\n", 8 | "\n", 9 | "Type rfsrc.news() to see new features, changes, and bug fixes.", 10 | "\n", 11 | "\n")) 12 | } 13 | -------------------------------------------------------------------------------- /man/veteran.Rd: -------------------------------------------------------------------------------- 1 | \name{veteran} 2 | \docType{data} 3 | \alias{veteran} 4 | \title{Veteran's Administration Lung Cancer Trial} 5 | \description{ 6 | Randomized trial of two treatment regimens for lung cancer. 7 | This is a standard survival analysis data set. 8 | } 9 | \source{Kalbfleisch and Prentice, \emph{The Statistical 10 | Analysis of Failure Time Data.}} 11 | \references{ 12 | Kalbfleisch J. and Prentice R, (1980) \emph{The Statistical 13 | Analysis of Failure Time Data.} New York: Wiley. 14 | } 15 | \examples{data(veteran, package = "randomForestSRC")} 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /src/cindex.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_CINDEX_H 2 | #define RF_CINDEX_H 3 | SEXP rfsrcCIndex(SEXP sexp_traceFlag, 4 | SEXP sexp_size, 5 | SEXP sexp_time, 6 | SEXP sexp_censoring, 7 | SEXP sexp_predicted, 8 | SEXP sexp_denom); 9 | SEXP rfsrcCIndexNew(SEXP sexp_traceFlag, 10 | SEXP sexp_size, 11 | SEXP sexp_time, 12 | SEXP sexp_censoring, 13 | SEXP sexp_predicted, 14 | SEXP sexp_denom); 15 | SEXP rfsrcTestSEXP(SEXP sexp_size); 16 | #endif 17 | -------------------------------------------------------------------------------- /man/vdv.Rd: -------------------------------------------------------------------------------- 1 | \name{vdv} 2 | \docType{data} 3 | \alias{vdv} 4 | \title{van de Vijver Microarray Breast Cancer} 5 | \description{ 6 | Gene expression profiling for predicting clinical outcome of breast 7 | cancer (van't Veer et al., 2002). Microarray breast cancer data set 8 | of 4707 expression values on 78 patients with survival information. 9 | } 10 | \references{ 11 | van't Veer L.J. et al. (2002). Gene expression profiling predicts 12 | clinical outcome of breast cancer. \emph{Nature}, \bold{12}, 13 | 530--536. 14 | } 15 | \examples{data(vdv, package = "randomForestSRC")} 16 | \keyword{datasets} 17 | -------------------------------------------------------------------------------- /src/polarity.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_POLARITY_H 2 | #define RF_POLARITY_H 3 | #include "splitInfo.h" 4 | char getDaughterPolaritySimpleFactor (uint treeID, SplitInfo *info, uint index, void *value, ...); 5 | char getDaughterPolaritySimpleNonFactor(uint treeID, SplitInfo *info, uint index, void *value, ...); 6 | char getDaughterPolarity (uint treeID, SplitInfo *info, uint index, void *value, ...); 7 | char getDaughterPolaritySimpleFactorSingle(uint treeID, SplitInfo *info, uint index, void *value, ...); 8 | char getDaughterPolaritySimpleNonFactorSingle(uint treeID, SplitInfo *info, uint index, void *value, ...); 9 | #endif 10 | -------------------------------------------------------------------------------- /src/factorOps.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_FACTOR_OPS_H 2 | #define RF_FACTOR_OPS_H 3 | #include "factor.h" 4 | Factor *makeFactor(uint r, char bookFlag); 5 | void freeFactor(Factor *f); 6 | char bookFactor(Factor *f); 7 | char unbookFactor(Factor *f); 8 | void bookPair (uint levelCount, 9 | uint groupIndex, 10 | uint levelIndex, 11 | uint *row, 12 | uint *level, 13 | Factor *f); 14 | void nChooseK (uint n, uint r, char type, void *result); 15 | char reduceFraction(uint *numerator, uint *denominator); 16 | char splitOnFactor(uint level, uint *mwcp); 17 | #endif 18 | -------------------------------------------------------------------------------- /src/bootstrap.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_BOOTSTRAP_H 2 | #define RF_BOOTSTRAP_H 3 | #include "node.h" 4 | char bootstrap (char mode, 5 | uint treeID, 6 | Node *nodePtr, 7 | uint *subIndex, 8 | uint subsetSize, 9 | uint *index, 10 | uint indexSize); 11 | char getNodeSign (char mode, uint treeID, Node *nodePtr, uint *bmIndex, uint repMembrSize); 12 | char bootstrapSubject (char mode, 13 | uint treeID, 14 | Node *nodePtr, 15 | uint **index, 16 | uint *indexSize); 17 | #endif 18 | -------------------------------------------------------------------------------- /src/error.c: -------------------------------------------------------------------------------- 1 | 2 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 3 | #include "global.h" 4 | #include "external.h" 5 | 6 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 7 | 8 | 9 | 10 | 11 | #include "error.h" 12 | const char vomit[] = "\nRF-SRC: The application will now exit.\n"; 13 | void exit2R(void) { 14 | Rprintf("%s", vomit); 15 | error(NULL); 16 | } 17 | void printR(char *format, ...) { 18 | char *buffer; 19 | va_list aptr; 20 | buffer = (char *) malloc(sizeof(char) * 1023); 21 | va_start(aptr, format); 22 | vsnprintf(buffer, sizeof(char) * 1023, format, aptr); 23 | va_end(aptr); 24 | Rprintf("%s", buffer); 25 | free((char *) buffer); 26 | } 27 | -------------------------------------------------------------------------------- /src/sortedLink.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SORTED_LINK_H 2 | #define RF_SORTED_LINK_H 3 | typedef struct sortedLinkedObj SortedLinkedObj; 4 | struct sortedLinkedObj { 5 | struct sortedLinkedObj *fwdLink; 6 | struct sortedLinkedObj *bakLink; 7 | uint rank; 8 | uint indx; 9 | }; 10 | SortedLinkedObj *makeSortedLinkedObj(void); 11 | void makeAndSpliceSortedLinkedObj(uint treeID, 12 | SortedLinkedObj **headPtr, 13 | SortedLinkedObj **tailPtr, 14 | uint *listLength, 15 | uint rank, uint indx); 16 | void freeSortedLinkedObjList(SortedLinkedObj *obj); 17 | void freeSortedLinkedObj(SortedLinkedObj *obj); 18 | #endif 19 | -------------------------------------------------------------------------------- /src/diagnostic.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_DIAGNOSTIC_H 2 | #define RF_DIAGNOSTIC_H 3 | #include "splitInfo.h" 4 | #include "node.h" 5 | #include "terminal.h" 6 | void getSplitObjectInfo(SplitInfo *info); 7 | void getNodeInfo(Node *leaf); 8 | void getTerminalInfo(Terminal *termPtr); 9 | Node *getTerminalNode(uint treeID, uint leaf); 10 | void getRawNodeSize(uint type, 11 | uint treeID, 12 | Node *parent, 13 | uint *repMembrIndx, 14 | uint *repMembrSize, 15 | uint *allMembrIndx, 16 | uint *allMembrSize); 17 | void printTreeInfo(uint treeID, Node *parent); 18 | void initTimer(void); 19 | void printTimer(void); 20 | void printParameters(char mode); 21 | #endif 22 | -------------------------------------------------------------------------------- /src/splitQuantile.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SPLIT_QUANTILE_H 2 | #define RF_SPLIT_QUANTILE_H 3 | #include "node.h" 4 | #include "splitInfo.h" 5 | char locallyAdaptiveQuantileRegrSplit (uint treeID, 6 | Node *parent, 7 | SplitInfoMax *splitInfoMax, 8 | GreedyObj *greedyMembr, 9 | char multImpFlag); 10 | char quantileRegrSplit (uint treeID, 11 | Node *parent, 12 | SplitInfoMax *splitInfoMax, 13 | GreedyObj *greedyMembr, 14 | char multImpFlag); 15 | double quantile7 (double *r, uint s, double p); 16 | #endif 17 | -------------------------------------------------------------------------------- /src/svdUtil.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SVD_UTIL_H 2 | #define RF_SVD_UTIL_H 3 | void svdcmp(double **a, int m, int n, double ***uptr, double **wptr, double ***vptr); 4 | char svdchk(double **a, uint m, uint n, double **u, double *w, double **v); 5 | double **svdinv(double **u, double *w, double **v, uint m, uint n, uint singularity); 6 | void free_svdcmp(double **a, int m, int n, double **u, double *w, double **v); 7 | void svbksb(double **u, double *w, double **v, uint m, uint n, double *b, double *x); 8 | double **matrixCopy(double **a, uint m, uint n); 9 | double **matrixTrans(double **a, uint m, uint n); 10 | double **matrixMult(double **a, double **b, uint m, uint n, uint p); 11 | void matrixPrint(double **x, uint m, uint n); 12 | double pythag(double a, double b); 13 | void harness(void); 14 | #endif 15 | -------------------------------------------------------------------------------- /src/importanceAnti.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_IMPORTANCE_ANTI_H 2 | #define RF_IMPORTANCE_ANTI_H 3 | #include "terminal.h" 4 | #include "node.h" 5 | void getAntiMembership(char mode, 6 | uint treeID, 7 | Terminal **vimpMembership, 8 | uint p); 9 | Node *antiMembershipGeneric(uint treeID, 10 | Node *parent, 11 | uint individual, 12 | uint vimpX, 13 | double **xArray); 14 | Node *antiMembershipJIT(uint treeID, 15 | Node *parent, 16 | uint individual, 17 | uint vimpX, 18 | double **xArray); 19 | #endif 20 | -------------------------------------------------------------------------------- /man/wine.Rd: -------------------------------------------------------------------------------- 1 | \name{wine} 2 | \docType{data} 3 | \alias{wine} 4 | \title{White Wine Quality Data} 5 | \description{ 6 | The inputs include objective tests (e.g. PH values) and the output is 7 | based on sensory data (median of at least 3 evaluations made by wine 8 | experts) of white wine. Each expert graded the wine quality between 0 9 | (very bad) and 10 (very excellent). 10 | } 11 | \references{ 12 | Cortez, P., Cerdeira, A., Almeida, F., Matos T. and Reis, J. (2009). 13 | Modeling wine preferences by data mining from physicochemical properties. 14 | In \emph{Decision Support Systems}, Elsevier, 47(4):547-553. 15 | } 16 | \examples{ 17 | ## load wine and convert to a multiclass problem 18 | data(wine, package = "randomForestSRC") 19 | wine$quality <- factor(wine$quality) 20 | } 21 | \keyword{datasets} 22 | -------------------------------------------------------------------------------- /R/utilities_impute.R: -------------------------------------------------------------------------------- 1 | assign.impute.mean <- function(data, impute.mean) { 2 | d <- data.frame(lapply(colnames(data), function(xnms) { 3 | x <- data[, xnms] 4 | is.na.x <- is.na(x) 5 | if (any(is.na.x)) { 6 | x[is.na.x] <- impute.mean[[xnms]] 7 | } 8 | x 9 | }), stringsAsFactors = TRUE) 10 | colnames(d) <- colnames(data) 11 | d 12 | } 13 | get.impute.mean <- function(data) { 14 | imean <- mclapply(data, function(x) { 15 | if (all(is.na(x))) { 16 | NA 17 | } 18 | else { 19 | if (is.factor(x)) { 20 | x.table <- table(x) 21 | names(x.table)[which.max(x.table)] 22 | } 23 | else { 24 | mean(x, na.rm = TRUE) 25 | } 26 | } 27 | }) 28 | names(imean) <- colnames(data) 29 | imean 30 | } 31 | get.na.roughfix <- function(data) { 32 | assign.impute.mean(data, get.impute.mean(data)) 33 | } 34 | -------------------------------------------------------------------------------- /src/importanceRand.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_IMPORTANCE_RAND_H 2 | #define RF_IMPORTANCE_RAND_H 3 | #include "importanceRand.h" 4 | #include "terminal.h" 5 | #include "node.h" 6 | void getRandomMembership (char mode, 7 | uint treeID, 8 | Terminal **vimpMembership, 9 | uint p); 10 | Node *randomMembershipGeneric(uint treeID, 11 | Node *parent, 12 | uint individual, 13 | uint vimpX, 14 | double **xArray); 15 | Node *randomMembershipJIT(uint treeID, 16 | Node *parent, 17 | uint individual, 18 | uint vimpX, 19 | double **xArray); 20 | #endif 21 | -------------------------------------------------------------------------------- /src/nodeOps.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_NODE_OPS_H 2 | #define RF_NODE_OPS_H 3 | Node *makeNode(unsigned int xSize); 4 | void freeNodeGeneric(Node *parent); 5 | void freeNodeNew(Node *parent); 6 | void setParent( 7 | Node *daughter, 8 | Node *parent 9 | ); 10 | void setLeftDaughter( 11 | Node *daughter, 12 | Node *parent 13 | ); 14 | void setRightDaughter( 15 | Node *daughter, 16 | Node *parent 17 | ); 18 | void stackMPSign(Node *node, unsigned int mpIndexSize); 19 | void unstackMPSign(Node *node); 20 | void stackFMPSign(Node *node, unsigned int fmpIndexSize); 21 | void unstackFMPSign(Node *node); 22 | void stackNodeLMPIndex(Node *node, unsigned int size); 23 | void unstackNodeLMPIndex(Node *node); 24 | void stackNodeFLMPIndex(Node *node, unsigned int size); 25 | void unstackNodeFLMPIndex(Node *node); 26 | void stackSplitDepth(Node *tNode, unsigned int depth); 27 | void unstackSplitDepth(Node *tNode); 28 | #endif 29 | -------------------------------------------------------------------------------- /man/pbc.Rd: -------------------------------------------------------------------------------- 1 | \name{pbc} 2 | \docType{data} 3 | \alias{pbc} 4 | \title{Primary Biliary Cirrhosis (PBC) Data} 5 | \description{ 6 | Data from the Mayo Clinic trial in primary biliary cirrhosis (PBC) of 7 | the liver conducted between 1974 and 1984. A total of 424 PBC 8 | patients, referred to Mayo Clinic during that ten-year interval, met 9 | eligibility criteria for the randomized placebo controlled trial of 10 | the drug D-penicillamine. The first 312 cases in the data set 11 | participated in the randomized trial and contain largely complete 12 | data. 13 | } 14 | \source{Flemming and Harrington, 1991, Appendix D.1.} 15 | \references{ 16 | Flemming T.R and Harrington D.P., (1991) \emph{Counting Processes 17 | and Survival Analysis.} New York: Wiley. 18 | } 19 | \examples{ 20 | \donttest{ 21 | data(pbc, package = "randomForestSRC") 22 | pbc.obj <- rfsrc(Surv(days, status) ~ ., pbc, nsplit = 3) 23 | } 24 | } 25 | \keyword{datasets} 26 | -------------------------------------------------------------------------------- /src/nativeUtil.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_NATIVE_UTIL_H 2 | #define RF_NATIVE_UTIL_H 3 | void setNativeGlobalEnv(uint *nativeIndex, uint *stackCount); 4 | void *copy1DObject(SEXP arr, char type, uint size, char actual); 5 | void *copy2DObject(SEXP arr, char type, char flag, uint row, uint col); 6 | void free_1DObject(void *arr, char type, uint size); 7 | void free_2DObject(void *arr, char type, char flag, uint row, uint col); 8 | void initProtect(uint stackCount); 9 | void *stackAndProtect(char mode, 10 | uint *sexpIndex, 11 | char sexpType, 12 | uint sexpIdentity, 13 | ulong size, 14 | double value, 15 | char *sexpString, 16 | void *auxiliaryPtr, 17 | uint auxiliaryDimSize, 18 | ...); 19 | void setUserTraceFlag (uint traceFlag); 20 | uint getUserTraceFlag (void); 21 | #endif 22 | -------------------------------------------------------------------------------- /man/follic.Rd: -------------------------------------------------------------------------------- 1 | \name{follic} 2 | \docType{data} 3 | \alias{follic} 4 | \title{Follicular Cell Lymphoma} 5 | \description{ 6 | Competing risk data set involving follicular cell lymphoma. 7 | } 8 | \format{A data frame containing: 9 | \tabular{ll}{ 10 | age \tab age\cr 11 | hgb \tab hemoglobin (g/l)\cr 12 | clinstg \tab clinical stage: 1=stage I, 2=stage II\cr 13 | ch \tab chemotherapy\cr 14 | rt \tab radiotherapy\cr 15 | time \tab first failure time\cr 16 | status \tab censoring status: 0=censored, 1=relapse, 2=death 17 | } 18 | } 19 | \source{Table 1.4b, \emph{Competing Risks: A Practical Perspective}.} 20 | \references{ 21 | Pintilie M., (2006) \emph{Competing Risks: A 22 | Practical Perspective.} West Sussex: John Wiley and Sons. 23 | } 24 | \examples{ 25 | \donttest{ 26 | data(follic, package = "randomForestSRC") 27 | follic.obj <- rfsrc(Surv(time, status) ~ ., follic, nsplit = 3, ntree = 100) 28 | } 29 | } 30 | \keyword{datasets} 31 | -------------------------------------------------------------------------------- /src/splitCustomDriver.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SPLIT_CUSTOM_DRIVER_H 2 | #define RF_SPLIT_CUSTOM_DRIVER_H 3 | #include "splitInfo.h" 4 | #include "node.h" 5 | char customMultivariateSplit (uint treeID, 6 | Node *parent, 7 | SplitInfoMax *splitInfoMax, 8 | GreedyObj *greedyMembr, 9 | char multImpFlag); 10 | char customSurvivalSplit (uint treeID, 11 | Node *parent, 12 | SplitInfoMax *splitInfoMax, 13 | GreedyObj *greedyMembr, 14 | char multImpFlag); 15 | char customCompetingRiskSplit (uint treeID, 16 | Node *parent, 17 | SplitInfoMax *splitInfoMax, 18 | GreedyObj *greedyMembr, 19 | char multImpFlag); 20 | #endif 21 | -------------------------------------------------------------------------------- /man/hd.Rd: -------------------------------------------------------------------------------- 1 | \name{hd} 2 | \docType{data} 3 | \alias{hd} 4 | \title{Hodgkin's Disease} 5 | \description{ 6 | Competing risk data set involving Hodgkin's disease. 7 | } 8 | \format{A data frame containing: 9 | \tabular{ll}{ 10 | age \tab age\cr 11 | sex \tab gender\cr 12 | trtgiven \tab treatment: RT=radition, CMT=Chemotherapy and radiation\cr 13 | medwidsi \tab mediastinum involvement: N=no, S=small, L=Large\cr 14 | extranod \tab extranodal disease: Y=extranodal disease, N=nodal disease\cr 15 | clinstg \tab clinical stage: 1=stage I, 2=stage II\cr 16 | time \tab first failure time\cr 17 | status \tab censoring status: 0=censored, 1=relapse, 2=death 18 | } 19 | } 20 | \source{ 21 | Table 1.6b, \emph{Competing Risks: A Practical Perspective}. 22 | } 23 | \references{ 24 | Pintilie M., (2006) \emph{Competing Risks: A 25 | Practical Perspective.} West Sussex: John Wiley and Sons. 26 | } 27 | \examples{data(hd, package = "randomForestSRC")} 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /man/breast.Rd: -------------------------------------------------------------------------------- 1 | \name{breast} 2 | \docType{data} 3 | \alias{breast} 4 | \title{Wisconsin Prognostic Breast Cancer Data} 5 | \description{ 6 | Recurrence of breast cancer from 198 breast cancer patients, all of 7 | which exhibited no evidence of distant metastases at the time of 8 | diagnosis. The first 30 features of the data describe characteristics 9 | of the cell nuclei present in the digitized image of a fine needle 10 | aspirate (FNA) of the breast mass. 11 | } 12 | \source{ 13 | The data were obtained from the UCI machine learning repository, see 14 | \url{http://archive.ics.uci.edu/ml/datasets/Breast+Cancer+Wisconsin+(Prognostic)}. 15 | } 16 | \examples{ 17 | \donttest{ 18 | ## ------------------------------------------------------------ 19 | ## Standard analysis 20 | ## ------------------------------------------------------------ 21 | 22 | data(breast, package = "randomForestSRC") 23 | breast <- na.omit(breast) 24 | o <- rfsrc(status ~ ., data = breast, nsplit = 10) 25 | print(o) 26 | 27 | }} 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /src/splitSurv.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SPLIT_SURV_H 2 | #define RF_SPLIT_SURV_H 3 | #include "splitInfo.h" 4 | #include "node.h" 5 | char logRankNCR(uint treeID, 6 | Node *parent, 7 | SplitInfoMax *splitInfoMax, 8 | GreedyObj *greedyMembr, 9 | char multImpFlag); 10 | char logRankCR(uint treeID, 11 | Node *parent, 12 | SplitInfoMax *splitInfoMax, 13 | GreedyObj *greedyMembr, 14 | char multImpFlag); 15 | char wiBrierScore (uint treeID, 16 | Node *parent, 17 | SplitInfoMax *splitInfoMax, 18 | GreedyObj *greedyMembr, 19 | char multImpFlag); 20 | char brierScoreGradient1 (uint treeID, 21 | Node *parent, 22 | SplitInfoMax *splitInfoMax, 23 | GreedyObj *greedyMembr, 24 | char multImpFlag); 25 | #endif 26 | -------------------------------------------------------------------------------- /src/splitInfo.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SPLIT_INFO_H 2 | #define RF_SPLIT_INFO_H 3 | typedef struct node Node; 4 | typedef struct splitInfo SplitInfo; 5 | struct splitInfo { 6 | uint size; 7 | char *indicator; 8 | int *randomVar; 9 | uint *mwcpSizeAbs; 10 | void **randomPts; 11 | }; 12 | typedef struct greedyObj GreedyObj; 13 | struct greedyObj { 14 | Node *parent; 15 | GreedyObj *fwdLink; 16 | GreedyObj *bakLink; 17 | GreedyObj *head; 18 | SplitInfo *splitInfo; 19 | uint inbagProxy; 20 | uint nodeID; 21 | uint depth; 22 | char leafFlag; 23 | double *standardResponse; 24 | uint *membershipComplement; 25 | double G_nR_h_l; 26 | double G_nR_h_r; 27 | double sgStat; 28 | double eRisk; 29 | double oobEmprRisk; 30 | }; 31 | typedef struct splitInfoMax SplitInfoMax; 32 | struct splitInfoMax { 33 | uint size; 34 | char *indicator; 35 | double deltaMax; 36 | int splitParameterMax; 37 | double splitValueMaxCont; 38 | uint splitValueMaxFactSize; 39 | uint *splitValueMaxFactPtr; 40 | double splitStatistic; 41 | }; 42 | #endif 43 | -------------------------------------------------------------------------------- /man/print.rfsrc.Rd: -------------------------------------------------------------------------------- 1 | \name{print.rfsrc} 2 | \alias{print.rfsrc} 3 | \title{Print Summary Output of a RF-SRC Analysis} 4 | \description{ 5 | Print summary output from a RF-SRC analysis. This is the default 6 | print method for the package. 7 | } 8 | \usage{\method{print}{rfsrc}(x, outcome.target = NULL, ...)} 9 | \arguments{ 10 | \item{x}{An object of class \code{(rfsrc, grow)}, 11 | or \code{(rfsrc, predict)}.} 12 | \item{outcome.target}{Character value for multivariate families 13 | specifying the target outcome to be used. The default is to use the 14 | first coordinate from the continuous outcomes (otherwise if none, 15 | the first coordinate from the categorical outcomes).} 16 | \item{...}{Further arguments passed to or from other methods.} 17 | } 18 | \author{ 19 | Hemant Ishwaran and Udaya B. Kogalur 20 | } 21 | \references{ 22 | Ishwaran H. and Kogalur U.B. (2007). Random survival forests for R, 23 | \emph{Rnews}, 7/2:25-31. 24 | } 25 | \examples{ 26 | iris.obj <- rfsrc(Species ~., data = iris, ntree=10) 27 | print(iris.obj) 28 | } 29 | \keyword{print} 30 | -------------------------------------------------------------------------------- /src/tree.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_TREE_H 2 | #define RF_TREE_H 3 | #include "node.h" 4 | void acquireTreeGeneric(char mode, uint r, uint b); 5 | void updateWeight(char mode, uint b); 6 | void finalizeWeight(char mode); 7 | void updateDistance(char mode, uint b); 8 | void finalizeDistance(char mode); 9 | void updateProximity(char mode, uint b); 10 | void finalizeProximity(char mode); 11 | void updateSplitDepth(uint treeID, Node *rootPtr, uint maxDepth); 12 | char pruneBranch(uint obsSize, uint treeID, Node **nodesAtDepth, uint nadCount, uint ptnTarget, uint ptnCurrent); 13 | uint pruneTree(uint obsSize, uint treeID, uint ptnCount); 14 | void stackAuxiliary(char mode, uint b); 15 | void unstackAuxiliary(char mode, uint b); 16 | void printPseudoTNInfo(char mode, uint b); 17 | void getPTNodeList(Node *parent, 18 | Node **list, 19 | uint *offset); 20 | void getSplitPath(uint treeID, Node *parent); 21 | void freeSplitPath(uint treeID); 22 | uint getMaximumDepth(Node *parent); 23 | void getNodesAtDepth(Node *parent, uint tagDepth, Node **nodesAtDepth, uint *nadCount); 24 | #endif 25 | -------------------------------------------------------------------------------- /src/treeUtil.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_TREE_UTIL_H 2 | #define RF_TREE_UTIL_H 3 | #include "node.h" 4 | #include "terminal.h" 5 | char growTreeRecursive(uint r, 6 | char rootFlag, 7 | char multImpFlag, 8 | uint b, 9 | Node *parent, 10 | uint *bootMembrIndxIter, 11 | uint *rmbrIterator, 12 | uint *ambrIterator); 13 | void freeTree(uint treeID, Node *parent); 14 | void saveStatistics(char mode, 15 | uint b, 16 | Node *parent, 17 | uint *offset, 18 | double *spltST, 19 | uint *dpthST); 20 | void initTerminalNodeMembership(uint treeID, 21 | Terminal *parent, 22 | uint *allMembrIndx, 23 | uint allMembrSize); 24 | void updatePruning(char mode, uint treeID); 25 | void updateCaseDepth(char mode, uint treeID); 26 | #endif 27 | -------------------------------------------------------------------------------- /src/partial.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_PARTIAL_H 2 | #define RF_PARTIAL_H 3 | #include "node.h" 4 | #include "terminal.h" 5 | void getAndUpdatePartialMembership(uint treeID, Node *root); 6 | void partialMembershipGeneric(uint treeID, 7 | Node *parent, 8 | uint partialIndex, 9 | uint *allMembrIndx, 10 | uint allMembrSize, 11 | double **xArray, 12 | Terminal **membership); 13 | void partialMembershipJIT(uint treeID, 14 | Node *root, 15 | uint partialIndex, 16 | uint *nullMembrIndx, 17 | uint individual, 18 | double **xArray, 19 | Terminal **membership); 20 | void updatePartialCalculations (uint treeID, 21 | uint pVarIdx, 22 | Terminal **partialMembership); 23 | void summarizePartialCalculations(uint treeID, 24 | uint pVarIdx); 25 | #endif 26 | -------------------------------------------------------------------------------- /src/sampling.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SAMPLING_H 2 | #define RF_SAMPLING_H 3 | typedef struct distributionObj DistributionObj; 4 | struct distributionObj { 5 | uint *permissibleIndex; 6 | char *permissible; 7 | uint permissibleSize; 8 | uint *augmentationSize; 9 | uint weightType; 10 | double *weight; 11 | uint *weightSorted; 12 | uint densityAllocSize; 13 | double *cdf; 14 | uint cdfSize; 15 | uint *cdfSort; 16 | uint *density; 17 | uint densitySize; 18 | uint **densitySwap; 19 | uint *index; 20 | uint indexSize; 21 | uint uIndexAllocSize; 22 | uint slot; 23 | }; 24 | DistributionObj *makeDistributionObjRaw(void); 25 | DistributionObj *makeDistributionObjFull(void); 26 | void freeDistributionObjRaw(DistributionObj *obj); 27 | void initializeCDFNew(uint treeID, DistributionObj *obj); 28 | uint sampleFromCDFNew (float (*genericGenerator) (uint), uint treeID, DistributionObj *obj); 29 | void updateCDFNew(uint treeID, DistributionObj *obj); 30 | void discardCDFNew(uint treeID, DistributionObj *obj); 31 | uint sampleUniformlyFromVector (uint treeID, 32 | uint *index, 33 | uint size, 34 | uint *sampleSlot); 35 | #endif 36 | -------------------------------------------------------------------------------- /src/terminal.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_TERMINAL_H 2 | #define RF_TERMINAL_H 3 | typedef struct terminal Terminal; 4 | struct terminal { 5 | unsigned int nodeID; 6 | struct node *mate; 7 | unsigned int *lmiIndex; 8 | unsigned int lmiAllocSize, lmiSize; 9 | double *lmiValue; 10 | unsigned int eTypeSize; 11 | unsigned int mTimeSize; 12 | unsigned int eTimeSize; 13 | unsigned int sTimeSize; 14 | unsigned int *atRiskCount; 15 | unsigned int **eventCount; 16 | unsigned int *eventTimeIndex; 17 | double **localRatio; 18 | double **localCSH; 19 | double **localCIF; 20 | double *localSurvival; 21 | double *localNelsonAalen; 22 | double **CSH; 23 | double **CIF; 24 | double *survival; 25 | double *nelsonAalen; 26 | double *mortality; 27 | unsigned int rnfCount; 28 | double *meanResponse; 29 | unsigned int rfCount; 30 | unsigned int *rfSize; 31 | unsigned int **multiClassProb; 32 | double *maxClass; 33 | double weight; 34 | unsigned int membrCount; 35 | unsigned int *membrStream; 36 | unsigned int inbagProxy; 37 | uint repMembrSizeAlloc, oobMembrSizeAlloc, ibgMembrSizeAlloc; 38 | uint repMembrSize, oobMembrSize, ibgMembrSize; 39 | uint *repMembrIndx, *oobMembrIndx, *ibgMembrIndx; 40 | }; 41 | #endif 42 | -------------------------------------------------------------------------------- /man/housing.Rd: -------------------------------------------------------------------------------- 1 | \name{housing} 2 | \docType{data} 3 | \alias{housing} 4 | \title{Ames Iowa Housing Data} 5 | 6 | \description{ 7 | Data from the Ames Assessor's Office used in assessing values of 8 | individual residential properties sold in Ames, Iowa from 2006 to 9 | 2010. This is a regression problem and the goal is to predict 10 | "SalePrice" which records the price of a home in thousands of dollars. 11 | } 12 | \references{ 13 | De Cock, D., (2011). Ames, Iowa: Alternative to the Boston housing 14 | data as an end of semester regression project. \emph{Journal of Statistics 15 | Education}, 19(3), 1--14. 16 | } 17 | \examples{ 18 | \donttest{ 19 | ## load the data 20 | data(housing, package = "randomForestSRC") 21 | 22 | ## the original data contains lots of missing data, so impute it 23 | ## use missForest, can be slow so grow trees with small training sizes 24 | housing2 <- impute(data = housing, mf.q = 1, sampsize = function(x){x * .1}) 25 | 26 | ## same idea ... but directly use rfsrc.fast and multivariate missForest 27 | housing3 <- impute(data = housing, mf.q = .5, fast = TRUE) 28 | 29 | ## even faster, but potentially less acurate 30 | housing4 <- impute(SalePrice~., housing, splitrule = "random", nimpute = 1) 31 | 32 | 33 | }} 34 | \keyword{datasets} 35 | -------------------------------------------------------------------------------- /man/utilities_internal.Rd: -------------------------------------------------------------------------------- 1 | \name{internal.utils} 2 | \alias{extract.subsample} 3 | \alias{extract.bootsample} 4 | \alias{print.bootsample} 5 | \alias{print.bootsample.rfsrc} 6 | \alias{print.imbalanced.performance} 7 | \alias{print.subsample} 8 | \alias{print.subsample.rfsrc} 9 | \alias{fast.save} 10 | \alias{fast.load} 11 | \alias{fast.save.list} 12 | \alias{fast.load.list} 13 | \alias{lsos} 14 | \alias{get.imbalanced.performance} 15 | \alias{get.imbalanced.optimize} 16 | \alias{get.pr.auc} 17 | \alias{get.pr.curve} 18 | \alias{get.rfq.threshold} 19 | \alias{get.auc} 20 | \alias{get.bayes.rule} 21 | \alias{get.brier.error} 22 | \alias{get.cindex} 23 | \alias{get.confusion} 24 | \alias{get.logloss} 25 | \alias{get.misclass.error} 26 | \alias{get.mv.cserror} 27 | \alias{get.mv.csvimp} 28 | \alias{get.mv.error} 29 | \alias{get.mv.error.block} 30 | \alias{get.mv.formula} 31 | \alias{get.mv.predicted} 32 | \alias{get.mv.vimp} 33 | \alias{extract.quantile} 34 | \alias{get.quantile} 35 | \alias{get.quantile.crps} 36 | \alias{get.quantile.stat} 37 | \alias{sid.perf.metric} 38 | \title{Internal Utility Functions} 39 | 40 | \description{ 41 | These are internal utility functions exported for advanced usage. This 42 | Rd file is used solely to register aliases. 43 | } 44 | 45 | \keyword{internal} 46 | -------------------------------------------------------------------------------- /src/importancePerm.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_IMPORTANCE_PERM_H 2 | #define RF_IMPORTANCE_PERM_H 3 | #include "terminal.h" 4 | #include "node.h" 5 | void getPermuteMembership(char mode, 6 | uint treeID, 7 | Terminal **vimpMembership, 8 | uint p); 9 | Node *permuteMembershipGeneric(uint treeID, 10 | Node *parent, 11 | uint individual, 12 | uint vimpX, 13 | double **xArray); 14 | Node *permuteMembershipJIT(uint treeID, 15 | Node *parent, 16 | uint individual, 17 | uint vimpX, 18 | double **xArray); 19 | Node *getMembershipGeneric(uint treeID, 20 | Node *parent, 21 | uint individual, 22 | double **xArray); 23 | Node *getMembershipJIT(uint treeID, 24 | Node *parent, 25 | uint individual, 26 | double **xArray); 27 | void permute(uint ranGenID, uint parallelID, uint n, uint *indx); 28 | #endif 29 | -------------------------------------------------------------------------------- /src/quantile.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_QUANTILE_H 2 | #define RF_QUANTILE_H 3 | typedef struct quantileObj QuantileObj; 4 | struct quantileObj { 5 | double v; 6 | uint g; 7 | uint dlt; 8 | QuantileObj *fwdLink; 9 | QuantileObj *bakLink; 10 | }; 11 | typedef struct lookUpInfo LookUpInfo; 12 | struct lookUpInfo { 13 | QuantileObj *qPtr; 14 | LookUpInfo *rootPtr; 15 | LookUpInfo *leftPtr; 16 | LookUpInfo *rghtPtr; 17 | }; 18 | QuantileObj *makeQuantileObj(double value); 19 | void freeQuantileObj(QuantileObj *obj); 20 | void freeQuantileObjList(QuantileObj *obj); 21 | QuantileObj *insertQuantileObj(uint *qStreamSize, QuantileObj **head, QuantileObj **tail, uint *quantileLinkLength, double value, LookUpInfo **tree); 22 | QuantileObj *findInsertionPoint(QuantileObj *head, double value, LookUpInfo *tree); 23 | double getApproxQuantile(QuantileObj *head, double phi, uint streamSize); 24 | void populateBand(uint p, uint *band); 25 | void makeLookUpTree(LookUpInfo *infoObj, QuantileObj *qObj, uint size, uint depth); 26 | void findApproximateInsertionPoint(QuantileObj *head, LookUpInfo *tree, double value, QuantileObj **insertPtr); 27 | LookUpInfo *makeLookUpInfo(void); 28 | void freeLookUpInfo(LookUpInfo *obj); 29 | void freeLookUpTree(LookUpInfo *obj); 30 | void testQuantile(uint treeID); 31 | #endif 32 | -------------------------------------------------------------------------------- /src/survival.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SURVIVAL_H 2 | #define RF_SURVIVAL_H 3 | #include "terminal.h" 4 | void getAtRiskAndEventCount (uint treeID, 5 | Terminal *parent, 6 | uint *repMembrIndx, 7 | uint repMembrSize, 8 | uint *allMembrIndx, 9 | uint allMembrSize, 10 | uint *rmbrIterator); 11 | void getLocalRatio (uint treeID, Terminal *parent); 12 | void getRevLocalRatio(uint treeID, Terminal *parent); 13 | void getLocalCSH (uint treeID, Terminal *parent); 14 | void getLocalCIF (uint treeID, Terminal *parent); 15 | void mapLocalToTimeInterest(uint treeID, 16 | Terminal *parent, 17 | void *genericLocal, 18 | void *genericGlobal); 19 | void getLocalSurvival (uint treeID, Terminal *parent); 20 | void getLocalNelsonAalen (uint treeID, Terminal *parent); 21 | void getSurvival (uint treeID, Terminal *parent); 22 | void getMortality (uint treeID, Terminal *parent); 23 | void getNelsonAalen (uint treeID, Terminal *parent); 24 | void getCSH (uint treeID, Terminal *parent); 25 | void getCIF (uint treeID, Terminal *parent); 26 | #endif 27 | -------------------------------------------------------------------------------- /src/stackOutputQQ.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_STACK_OUTPUT_QQ_H 2 | #define RF_STACK_OUTPUT_QQ_H 3 | void stackTNQualitativeObjectsKnown(char mode, 4 | uint **pRF_RMBR_ID_, 5 | uint **pRF_AMBR_ID_, 6 | uint **pRF_TN_RCNT_, 7 | uint **pRF_TN_ACNT_, 8 | uint **pRF_OOB_SZ_, 9 | uint **pRF_IBG_SZ_); 10 | void stackTNQualitativeObjectsUnknown(char mode, 11 | uint **pRF_TN_RCNT_, 12 | uint **pRF_TN_ACNT_, 13 | uint **pRF_TN_OCNT_, 14 | uint **pRF_TN_ICNT_); 15 | void stackTNQuantitativeForestObjectsPtrOnly(char mode); 16 | void unstackTNQuantitativeForestObjectsPtrOnly(char mode); 17 | void stackTNQuantitativeTreeObjectsPtrOnly(uint treeID); 18 | void unstackTNQuantitativeTreeObjectsPtrOnly(uint treeID); 19 | void saveTNQuantitativeTreeObjects(uint treeID); 20 | void stackTNQuantitativeForestObjectsOutput(char mode); 21 | void writeTNQuantitativeForestObjectsOutput(char mode); 22 | void stackTNQualitativeObjectsUnknownMembership(char mode, uint **pRF_OMBR_ID_, uint **pRF_IMBR_ID_); 23 | #endif 24 | -------------------------------------------------------------------------------- /src/random.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_RANDOM_H 2 | #define RF_RANDOM_H 3 | void randomStack(uint bSize, uint bnpSize); 4 | void randomUnstack(uint bSize, uint bnpSize); 5 | void randomSetChainParallel(uint b, int value); 6 | void randomSetUChainParallel(uint b, int value); 7 | void randomSetUChainParallelVimp(uint b, int value); 8 | void randomSetChainParallelVimp(uint p, int value); 9 | void randomSetChainSerial(uint b, int value); 10 | void randomSetUChainSerial(uint b, int value); 11 | void randomSetUChainSerialVimp(uint b, int value); 12 | void randomSetChainSerialVimp(uint p, int value); 13 | int randomGetChainParallel(uint b); 14 | int randomGetUChainParallel(uint b); 15 | int randomGetUChainParallelVimp(uint b); 16 | int randomGetChainParallelVimp(uint p); 17 | int randomGetChainSerial(uint b); 18 | int randomGetUChainSerial(uint b); 19 | int randomGetUChainSerialVimp(uint b); 20 | int randomGetChainSerialVimp(uint p); 21 | float randomChainParallel(uint b); 22 | float randomUChainParallel(uint b); 23 | float randomUChainParallelVimp(uint b); 24 | float randomChainParallelVimp(uint p); 25 | float randomChainSerial(uint b); 26 | float randomUChainSerial(uint b); 27 | float randomUChainSerialVimp(uint b); 28 | float randomChainSerialVimp(uint p); 29 | float ran1_generic(int *iy, int *iv, int *idum); 30 | void lcgenerator(unsigned int *seed, unsigned char reset); 31 | float ran1_original(int *idum); 32 | #endif 33 | -------------------------------------------------------------------------------- /src/leafLink.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_LEAF_LINK_H 2 | #define RF_LEAF_LINK_H 3 | #include "terminal.h" 4 | #include "node.h" 5 | typedef struct leafLinkedObj LeafLinkedObj; 6 | struct leafLinkedObj { 7 | struct leafLinkedObj *fwdLink; 8 | struct leafLinkedObj *bakLink; 9 | struct node *nodePtr; 10 | struct terminal *termPtr; 11 | struct terminal *termPtrAux; 12 | uint nodeID; 13 | uint ibgMembrCount; 14 | uint allMembrCount; 15 | uint oobMembrCount; 16 | }; 17 | typedef struct leafLinkedObjSimple LeafLinkedObjSimple; 18 | struct leafLinkedObjSimple { 19 | struct leafLinkedObjSimple *fwdLink; 20 | struct leafLinkedObjSimple *bakLink; 21 | struct node *nodePtr; 22 | }; 23 | LeafLinkedObj *makeLeafLinkedObj(void); 24 | LeafLinkedObjSimple *makeLeafLinkedObjSimple(void); 25 | LeafLinkedObj *makeAndSpliceLeafLinkedObj(LeafLinkedObj *tail, 26 | Node *nodePtr, 27 | uint ibgCount, 28 | uint allCount); 29 | LeafLinkedObjSimple *makeAndSpliceLeafLinkedObjSimple(LeafLinkedObjSimple *tail, 30 | Node *nodePtr); 31 | void freeLeafLinkedObj(LeafLinkedObj *obj); 32 | void freeLeafLinkedObjSimple(LeafLinkedObjSimple *obj); 33 | void freeLeafLinkedObjList(LeafLinkedObj *obj); 34 | void freeLeafLinkedObjListRev(LeafLinkedObj *obj); 35 | #endif 36 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: randomForestSRC 2 | Version: 3.4.5 3 | Date: 2025-11-26 4 | Title: Fast Unified Random Forests for Survival, Regression, and Classification (RF-SRC) 5 | Authors@R: c(person("Hemant", "Ishwaran", email = "hemant.ishwaran@gmail.com", role = "aut"), 6 | person("Udaya B.", "Kogalur", email = "ubk@kogalur.com", role = c("aut", "cre"))) 7 | Author: Hemant Ishwaran [aut], Udaya B. Kogalur [aut, cre] 8 | Maintainer: Udaya B. Kogalur 9 | BugReports: https://github.com/kogalur/randomForestSRC/issues/ 10 | Depends: R (>= 4.3.0), 11 | Imports: parallel, data.tree, DiagrammeR 12 | Suggests: survival, pec, prodlim, mlbench, interp, caret, imbalance, cluster, fst, data.table 13 | Description: Fast OpenMP parallel computing of Breiman's random forests for univariate, multivariate, unsupervised, survival, competing risks, class imbalanced classification and quantile regression. New Mahalanobis splitting for correlated outcomes. Extreme random forests and randomized splitting. Suite of imputation methods for missing data. Fast random forests using subsampling. Confidence regions and standard errors for variable importance. New improved holdout importance. Case-specific importance. Minimal depth variable importance. Visualize trees on your Safari or Google Chrome browser. Anonymous random forests for data privacy. 14 | License: GPL (>=3) 15 | URL: https://www.randomforestsrc.org/ https://ishwaran.org/ -------------------------------------------------------------------------------- /src/sources.list: -------------------------------------------------------------------------------- 1 | augmentation.h augmentationOps.c augmentationOps.h bootstrap.c bootstrap.h classification.c classification.h diagnostic.c diagnostic.h entryGeneric.c entryGeneric.h external.h factor.h factorOps.c factorOps.h global.h importance.c importance.h importanceAnti.c importanceAnti.h importancePerm.c importancePerm.h importanceRand.c importanceRand.h impute.c impute.h internal.c leafLink.c leafLink.h marginal.c marginal.h node.h nodeOps.c nodeOps.h nrutil.c nrutil.h parallel.c parallel.h partial.c partial.h polarity.c polarity.h processEnsemble.c processEnsemble.h quantile.c quantile.h random.c random.h regression.c regression.h rfsrc.c rfsrc.h rfsrcUtil.c rfsrcUtil.h sampling.c sampling.h snpAuxiliaryInfo.h sortedLink.c sortedLink.h split.c split.h splitClas.c splitClas.h splitCustom.c splitCustom.h splitCustomDriver.c splitCustomDriver.h splitGreedy.c splitGreedy.h splitInfo.h splitMahalanobis.c splitMahalanobis.h splitMult.c splitMult.h splitQuantile.c splitQuantile.h splitRegr.c splitRegr.h splitSurv.c splitSurv.h splitUtil.c splitUtil.h splitUtilSimple.c splitUtilSimple.h splitUtilSurv.c splitUtilSurv.h stack.c stack.h stackOutput.c stackOutput.h stackOutputQQ.c stackOutputQQ.h stackParallel.c stackParallel.h stackPreDefined.c stackPreDefined.h survival.c survival.h survivalE.c survivalE.h svdUtil.c svdUtil.h terminal.h termOps.c termOps.h trace.c trace.h tree.c tree.h treeJIT.c treeJIT.h treeUtil.c treeUtil.h cindex.c cindex.h distance.c distance.h entry.c entry.h error.c error.h nativeInfo.h nativeUtil.c nativeUtil.h R_init_randomForestSRC.c -------------------------------------------------------------------------------- /man/plot.quantreg.rfsrc.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.quantreg.rfsrc} 2 | \alias{plot.quantreg.rfsrc} 3 | \alias{plot.quantreg} 4 | \title{Plot Quantiles from Quantile Regression Forests} 5 | \description{ 6 | Plots quantiles obtained from a quantile regression forest. 7 | Additionally insets the continuous rank probability score (crps), a 8 | useful diagnostic of accuracy. 9 | } 10 | \usage{\method{plot.quantreg}{rfsrc}(x, prbL = .25, prbU = .75, 11 | m.target = NULL, crps = TRUE, subset = NULL, xlab = NULL, ylab = NULL, ...) 12 | } 13 | \arguments{ 14 | 15 | \item{x}{A quantile regression object returned by a call to \command{quantreg}.} 16 | 17 | \item{prbL}{Lower quantile level, typically less than \code{0.5}.} 18 | 19 | \item{prbU}{Upper quantile level, typically greater than \code{0.5}.} 20 | 21 | \item{m.target}{Character string specifying the target outcome for multivariate families. 22 | If not provided, a default target is selected automatically.} 23 | 24 | \item{crps}{Logical. If \code{TRUE}, calculates the continuous ranked probability score (CRPS) and adds it to the plot.} 25 | 26 | \item{subset}{Optional vector specifying a subset of the data to be plotted. 27 | Defaults to plotting all data points.} 28 | 29 | \item{xlab}{Label for the x-axis.} 30 | 31 | \item{ylab}{Label for the y-axis.} 32 | 33 | \item{...}{Additional arguments passed to or from other methods.} 34 | } 35 | \author{ 36 | Hemant Ishwaran and Udaya B. Kogalur 37 | } 38 | \seealso{ 39 | \command{\link{quantreg.rfsrc}} 40 | } 41 | \keyword{plot} 42 | -------------------------------------------------------------------------------- /src/importance.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_IMPORTANCE_H 2 | #define RF_IMPORTANCE_H 3 | #include "terminal.h" 4 | Node *identifyExtrapolatedMembership (Node *parent, 5 | double **yShadow, 6 | double **xShadow); 7 | void getVimpMembership(char mode, 8 | uint treeID, 9 | Terminal **vimpMembership, 10 | uint p); 11 | void updateEnsembleVimp (char mode, 12 | uint treeID, 13 | Terminal **vimpMembership, 14 | uint xVarIdx); 15 | void summarizePerturbedPerformance(char mode, 16 | uint treeID, 17 | uint bb, 18 | uint p, 19 | double **responsePtr); 20 | void finalizeVimpPerformance(char mode); 21 | void stackVimpMembership(char mode, Terminal ***membership); 22 | void unstackVimpMembership(char mode, Terminal **membership); 23 | void normalizeBlockedEnsembleEstimates(char mode, 24 | double **blkEnsembleMRTnum, 25 | double ***blkEnsembleCLSnum, 26 | double **blkEnsembleRGRnum, 27 | double *blkEnsembleDen); 28 | void resetBlockedEnsembleEstimates(char mode); 29 | void rfsrc_omp_atomic_update(double *addr, double incr); 30 | uint getVimpRecoverySeedDimension(char mode, uint opt); 31 | #endif 32 | -------------------------------------------------------------------------------- /man/wihs.Rd: -------------------------------------------------------------------------------- 1 | \name{wihs} 2 | \docType{data} 3 | \alias{wihs} 4 | \title{Women's Interagency HIV Study (WIHS)} 5 | \description{ 6 | Competing risk data set involving AIDS in women. 7 | } 8 | \format{A data frame containing: 9 | \tabular{ll}{ 10 | time \tab time to event\cr 11 | status \tab censoring status: 0=censoring, 1=HAART initiation, 2=AIDS/Death before HAART\cr 12 | ageatfda \tab age in years at time of FDA approval of first protease inhibitor\cr 13 | idu \tab history of IDU: 0=no history, 1=history\cr 14 | black \tab race: 0=not African-American; 1=African-American\cr 15 | cd4nadir \tab CD4 count (per 100 cells/ul) 16 | } 17 | } 18 | \source{ 19 | Study included 1164 women enrolled in WIHS, who were alive, infected 20 | with HIV, and free of clinical AIDS on December, 1995, when the first 21 | protease inhibitor (saquinavir mesylate) was approved by the Federal 22 | Drug Administration. Women were followed until the first of the 23 | following occurred: treatment initiation, AIDS diagnosis, death, or 24 | administrative censoring (September, 2006). Variables included history 25 | of injection drug use at WIHS enrollment, whether an individual was 26 | African American, age, and CD4 nadir prior to baseline.} 27 | \references{ 28 | Bacon M.C, von Wyl V., Alden C., et al. (2005). The Women's Interagency 29 | HIV Study: an observational cohort brings clinical sciences to the 30 | bench, \emph{Clin Diagn Lab Immunol}, 12(9):1013-1019. 31 | } 32 | \examples{ 33 | \donttest{ 34 | data(wihs, package = "randomForestSRC") 35 | wihs.obj <- rfsrc(Surv(time, status) ~ ., wihs, nsplit = 3, ntree = 100) 36 | } 37 | } 38 | \keyword{datasets} 39 | -------------------------------------------------------------------------------- /src/node.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_NODE_H 2 | #define RF_NODE_H 3 | typedef struct splitInfo SplitInfo; 4 | typedef struct node Node; 5 | struct node { 6 | unsigned int nodeID; 7 | unsigned int bnodeID; 8 | unsigned int blnodeID; 9 | unsigned int brnodeID; 10 | unsigned int fsrecID; 11 | struct node *parent; 12 | struct node *left; 13 | struct node *right; 14 | struct terminal *mate; 15 | unsigned int xSize; 16 | char *permissible; 17 | uint *permissibleIndx; 18 | uint permissibleIndxSize; 19 | char permissibleReIndxFlag; 20 | char permissibleOwnershipFlag; 21 | char splitFlag; 22 | double splitStatistic; 23 | double mean; 24 | double variance; 25 | unsigned int depth; 26 | unsigned int *splitDepth; 27 | char pseudoTerminal; 28 | unsigned int mpIndexSize; 29 | unsigned int fmpIndexSize; 30 | int *mpSign; 31 | int *fmpSign; 32 | char imputed; 33 | unsigned int *lmpIndex; 34 | unsigned int lmpIndexAllocSize, lmpIndexActualSize; 35 | double *lmpValue; 36 | unsigned int *flmpIndex; 37 | unsigned int flmpIndexAllocSize, flmpIndexActualSize; 38 | double *flmpValue; 39 | struct splitInfo *splitInfo; 40 | unsigned int *repMembrIndx; 41 | unsigned int *allMembrIndx; 42 | unsigned int repMembrSizeAlloc; 43 | unsigned int allMembrSizeAlloc; 44 | unsigned int repMembrSize; 45 | unsigned int allMembrSize; 46 | unsigned int oobMembrSizeAlloc; 47 | unsigned int oobMembrSize; 48 | unsigned int *oobMembrIndx; 49 | unsigned int *nonMissMembrIndxStatic; 50 | unsigned int nonMissMembrSizeStatic; 51 | unsigned int *nonMissMembrIndx; 52 | unsigned int nonMissMembrSize; 53 | double sumRght; 54 | }; 55 | #endif 56 | -------------------------------------------------------------------------------- /src/stackPreDefined.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_STACK_PRE_DEFINED_H 2 | #define RF_STACK_PRE_DEFINED_H 3 | #include "node.h" 4 | #include "terminal.h" 5 | void stackIncomingResponseArrays(char mode); 6 | void unstackIncomingResponseArrays(char mode); 7 | void unstackIncomingCovariateArrays(char mode); 8 | void unstackIncomingCovariateArrays(char mode); 9 | void stackIncomingArrays(char mode); 10 | void unstackIncomingArrays(char mode); 11 | void checkInteraction(void); 12 | void stackPreDefinedCommonArrays(char mode, 13 | Node ****nodeMembership, 14 | Terminal ****tTermMembership, 15 | Terminal ****tTermList, 16 | Node ***root); 17 | void unstackPreDefinedCommonArrays(char mode, 18 | Node ***nodeMembership, 19 | Terminal ***tTermMembership, 20 | Terminal ***tTermList, 21 | Node **root); 22 | void stackPreDefinedGrowthArrays(void); 23 | void unstackPreDefinedGrowthArrays(void); 24 | void stackPreDefinedRestoreArrays(void); 25 | void unstackPreDefinedRestoreArrays(void); 26 | void stackPreDefinedPredictArrays(void); 27 | void unstackPreDefinedPredictArrays(void); 28 | void stackWeights(double *weight, 29 | uint size, 30 | uint *weightType, 31 | uint **weightSorted, 32 | uint *weightDensitySize); 33 | void unstackWeights(uint weightType, 34 | uint size, 35 | uint *weightSorted); 36 | #endif 37 | -------------------------------------------------------------------------------- /R/predict.rfsrc.R: -------------------------------------------------------------------------------- 1 | predict.rfsrc <- function(object, 2 | newdata, 3 | importance = c(FALSE, TRUE, "none", "anti", "permute", "random"), 4 | get.tree = NULL, 5 | block.size = if (any(is.element(as.character(importance), c("none", "FALSE")))) NULL else 10, 6 | na.action = c("na.omit", "na.impute", "na.random"), 7 | outcome = c("train", "test"), 8 | perf.type = NULL, 9 | proximity = FALSE, 10 | forest.wt = FALSE, 11 | ptn.count = 0, 12 | distance = FALSE, 13 | var.used = c(FALSE, "all.trees", "by.tree"), 14 | split.depth = c(FALSE, "all.trees", "by.tree"), 15 | case.depth = FALSE, 16 | seed = NULL, 17 | do.trace = FALSE, 18 | membership = FALSE, 19 | marginal.xvar = NULL, 20 | ...) 21 | { 22 | dots <- list(...) 23 | m.target <- dots$m.target 24 | dots$m.target <- NULL 25 | importance.xvar <- dots$importance.xvar 26 | dots$importance.xvar <- NULL 27 | args <- c(list( 28 | object = object, 29 | m.target = m.target, 30 | importance = importance, 31 | get.tree = get.tree, 32 | block.size = block.size, 33 | importance.xvar = importance.xvar, 34 | na.action = na.action, 35 | outcome = outcome, 36 | perf.type = perf.type, 37 | proximity = proximity, 38 | forest.wt = forest.wt, 39 | ptn.count = ptn.count, 40 | distance = distance, 41 | var.used = var.used, 42 | split.depth = split.depth, 43 | case.depth = case.depth, 44 | seed = seed, 45 | do.trace = do.trace, 46 | membership = membership, 47 | marginal.xvar = marginal.xvar 48 | ), dots) 49 | ## if newdata is not missing 50 | if (!missing(newdata)) { 51 | args$newdata <- newdata 52 | } 53 | return(do.call("generic.predict.rfsrc", args)) 54 | } 55 | -------------------------------------------------------------------------------- /src/trace.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_TRACE_H 2 | #define RF_TRACE_H 3 | #define SUMM_DEF_TRACE 0x00000001 4 | #define SUMM_LOW_TRACE 0x00000002 5 | #define SUMM_MED_TRACE 0x00000004 6 | #define SUMM_HGH_TRACE 0x00000008 7 | #define SPLT_DEF_TRACE 0x00000010 8 | #define SPLT_LOW_TRACE 0x00000020 9 | #define SPLT_MED_TRACE 0x00000040 10 | #define SPLT_HGH_TRACE 0x00000080 11 | #define FORK_DEF_TRACE 0x00000100 12 | #define MISS_LOW_TRACE 0x00000200 13 | #define MISS_MED_TRACE 0x00000400 14 | #define MISS_HGH_TRACE 0x00000800 15 | #define OUTP_DEF_TRACE 0x00001000 16 | #define NUMR_DEF_TRACE 0x00002000 17 | #define FACT_LOW_TRACE 0x00004000 18 | #define FACT_HGH_TRACE 0x00008000 19 | #define ENSB_LOW_TRACE 0x00010000 20 | #define ENSB_HGH_TRACE 0x00020000 21 | #define BOOT_MED_TRACE 0x00040000 22 | #define VIMP_LOW_TRACE 0x00080000 23 | #define NODE_DEF_TRACE 0x00100000 24 | #define TIME_DEF_TRACE 0x00200000 25 | #define RAND_DEF_TRACE 0x00400000 26 | #define QUAN_DEF_TRACE 0x00800000 27 | #define SAMP_DEF_TRACE 0x01000000 28 | #define SAMP_LOW_TRACE 0x02000000 29 | #define TURN_OFF_TRACE 0x00000000 30 | void setTraceFlag(unsigned int traceFlag, unsigned int tree); 31 | unsigned int getTraceFlag(unsigned int tree); 32 | unsigned int updateTimeStamp(unsigned int before); 33 | void setMaxMemoryAllocation(size_t value); 34 | void setMinMemoryAllocation(size_t value); 35 | void setProbeMemoryAllocation(size_t value); 36 | size_t getMaxMemoryAllocation(void); 37 | size_t getMinMemoryAllocation(void); 38 | size_t getProbeMemoryAllocation(void); 39 | void increaseMemoryAllocation(size_t amount); 40 | void increaseProbeMemoryAllocation(size_t amount); 41 | void decreaseMemoryAllocation(size_t amount); 42 | void decreaseProbeMemoryAllocation(size_t amount); 43 | void memoryCheck(void); 44 | #endif 45 | -------------------------------------------------------------------------------- /R/plot.competing.risk.rfsrc.R: -------------------------------------------------------------------------------- 1 | plot.competing.risk.rfsrc <- function (x, plots.one.page = FALSE, ...) { 2 | ## Incoming parameter checks. All are fatal. 3 | if (is.null(x)) { 4 | stop("object x is empty!") 5 | } 6 | if (sum(inherits(x, c("rfsrc", "grow"), TRUE) == c(1, 2)) != 2 & 7 | sum(inherits(x, c("rfsrc", "predict"), TRUE) == c(1, 2)) != 2) { 8 | stop("This function only works for objects of class `(rfsrc, grow)' or '(rfsrc, predict)'.") 9 | } 10 | if (x$family != "surv-CR") { 11 | stop("this function only supports competing risk settings") 12 | } 13 | ## work-horse plotting function 14 | comprplot <- function(matx, ylab = "", legend = "", pos = 2) { 15 | m <- dim(cbind(matx))[2] 16 | if (m > 1) legend <- paste(legend, 1:m, " ") 17 | matplot(x$time.interest, matx, xlab = "Time", ylab = ylab, type = "l", 18 | col = (1:m), lty = 1, lwd = 3) 19 | legend(c("bottomleft", "topleft", "topright", "bottomright")[pos], 20 | legend = legend, 21 | col = (1:m), 22 | lty = 1, 23 | lwd = 3) 24 | } 25 | ## save par settings 26 | opar <- par(no.readonly = TRUE) 27 | on.exit(par(opar)) 28 | if (plots.one.page) par(mfrow = c(1,1)) else par(mfrow = c(2,2)) 29 | ## acquire the estimators - use OOB whenever possible 30 | if (!is.null(x$chf.oob)) { 31 | cschf <- apply(x$chf.oob, c(2, 3), mean, na.rm = TRUE) 32 | cif <- apply(x$cif.oob, c(2, 3), mean, na.rm = TRUE) 33 | } 34 | else { 35 | cschf <- apply(x$chf, c(2, 3), mean, na.rm = TRUE) 36 | cif <- apply(x$cif, c(2, 3), mean, na.rm = TRUE) 37 | } 38 | cpc <- do.call(cbind, lapply(1:ncol(cif), function(j) { 39 | cif[, j] / (1 - rowSums(cif[, -j, drop = FALSE])) 40 | })) 41 | ## plot the results 42 | comprplot(cschf, "Cause-Specific CHF", "CSCHF", 2) 43 | comprplot(100 * cif, "Probability (%)", "CIF", 2) 44 | comprplot(100 * cpc, "Probability (%)", "CPC", 2) 45 | } 46 | plot.competing.risk <- plot.competing.risk.rfsrc 47 | -------------------------------------------------------------------------------- /src/R_init_randomForestSRC.c: -------------------------------------------------------------------------------- 1 | 2 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 3 | #include "global.h" 4 | #include "external.h" 5 | 6 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 7 | 8 | 9 | 10 | 11 | #include 12 | #include 13 | #include 14 | #include 15 | extern SEXP rfsrcCIndex(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 16 | extern SEXP rfsrcCIndexNew(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 17 | extern SEXP rfsrcDistance(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 18 | extern SEXP rfsrcGrow(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, 19 | SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, 20 | SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, 21 | SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 22 | extern SEXP rfsrcPredict(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, 23 | SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, 24 | SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, 25 | SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, 26 | SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, 27 | SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, 28 | SEXP, SEXP); 29 | static const R_CallMethodDef CallEntries[] = { 30 | {"rfsrcCIndex", (DL_FUNC) &rfsrcCIndex, 6}, 31 | {"rfsrcCIndexNew",(DL_FUNC) &rfsrcCIndex, 6}, 32 | {"rfsrcDistance", (DL_FUNC) &rfsrcDistance, 9}, 33 | {"rfsrcGrow", (DL_FUNC) &rfsrcGrow, 38}, 34 | {"rfsrcPredict", (DL_FUNC) &rfsrcPredict, 62}, 35 | {NULL, NULL, 0} 36 | }; 37 | void R_init_randomForestSRC(DllInfo *dll) 38 | { 39 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 40 | R_useDynamicSymbols(dll, FALSE); 41 | } 42 | -------------------------------------------------------------------------------- /src/polarity.c: -------------------------------------------------------------------------------- 1 | 2 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 3 | #include "global.h" 4 | #include "external.h" 5 | 6 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 7 | 8 | 9 | 10 | 11 | #include "polarity.h" 12 | #include "factorOps.h" 13 | #include "nrutil.h" 14 | char getDaughterPolarity(uint treeID, SplitInfo *info, uint indv, void *value, ...) { 15 | char (*getDaughterPolarityGeneric) (uint treeID, 16 | SplitInfo *info, 17 | uint indv, 18 | void *value, 19 | ...); 20 | void *obsLocal; 21 | char daughterFlag; 22 | obsLocal = ((double **) value)[info -> randomVar[1]]; 23 | if (info -> mwcpSizeAbs[1] > 0) { 24 | getDaughterPolarityGeneric = &getDaughterPolaritySimpleFactor; 25 | } 26 | else { 27 | getDaughterPolarityGeneric = &getDaughterPolaritySimpleNonFactor; 28 | } 29 | daughterFlag = getDaughterPolarityGeneric(0, info, indv, obsLocal); 30 | return daughterFlag; 31 | } 32 | char getDaughterPolaritySimpleFactor(uint treeID, SplitInfo *info, uint indv, void *value, ...) { 33 | char daughterFlag; 34 | daughterFlag = splitOnFactor((uint) ((double *) value)[indv], (uint*) info -> randomPts[1]); 35 | return daughterFlag; 36 | } 37 | char getDaughterPolaritySimpleNonFactor(uint treeID, SplitInfo *info, uint indv, void *value, ...) { 38 | char daughterFlag; 39 | daughterFlag = (( ((double*) info -> randomPts[1])[1] - ((double *) value)[indv]) >= 0.0) ? LEFT : RIGHT; 40 | return daughterFlag; 41 | } 42 | char getDaughterPolaritySimpleFactorSingle(uint treeID, SplitInfo *info, uint indv, void *value, ...) { 43 | char daughterFlag; 44 | daughterFlag = splitOnFactor(*((uint *) ((double *) value)), (uint*) info -> randomPts[1]); 45 | return daughterFlag; 46 | } 47 | char getDaughterPolaritySimpleNonFactorSingle(uint treeID, SplitInfo *info, uint indv, void *value, ...) { 48 | char daughterFlag; 49 | daughterFlag = (( ((double*) info -> randomPts[1])[1] - (*((double *) value))) >= 0.0) ? LEFT : RIGHT; 50 | return daughterFlag; 51 | } 52 | -------------------------------------------------------------------------------- /src/stackOutput.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_STACK_OUTPUT_H 2 | #define RF_STACK_OUTPUT_H 3 | #include "node.h" 4 | #include "snpAuxiliaryInfo.h" 5 | void stackDefinedOutputObjects(char mode, 6 | char **sexpString, 7 | Node ***pRF_root, 8 | uint **pRF_tLeafCount_, 9 | double **pRF_proximity_, 10 | double **pRF_distance_, 11 | double **pRF_weight_, 12 | double **p_imputation_, 13 | double ***pRF_sImputeResponsePtr, 14 | double ***pRF_sImputePredictorPtr, 15 | uint **pRF_varUsed_, 16 | uint ***pRF_varUsedPtr, 17 | double **p_splitDepth_); 18 | void unstackDefinedOutputObjects(char mode); 19 | void stackForestObjectsPtrOnly(char mode); 20 | void stackTreeObjectsPtrOnly(char mode, uint treeID); 21 | void stackForestObjectsOutput(char mode); 22 | void writeForestObjectsOutput(char mode); 23 | void unstackForestObjectsPtrOnly(char mode); 24 | void unstackTreeObjectsPtrOnly(uint treeID); 25 | void stackForestObjectsAuxOnly(char mode); 26 | void unstackForestObjectsAuxOnly(char mode); 27 | void unstackAuxStatisticalStructures(char mode); 28 | void restackTermListAndQualitativeObjectsUnknown(uint treeID, uint length); 29 | void verifyAndRegisterCustomSplitRules(void); 30 | extern void registerCustomFunctions(void); 31 | void stackAuxiliaryInfoList(SNPAuxiliaryInfo ***list, uint count); 32 | void allocateAuxiliaryInfo(char targetFlag, 33 | char type, 34 | char *stringIdentifier, 35 | SNPAuxiliaryInfo **list, 36 | uint slot, 37 | void *snpPtr, 38 | void *auxiliaryArrayPtr, 39 | uint dimSize, 40 | int *dim); 41 | uint getAuxDim(char flag, int *dim, uint preIndex, uint postIndex); 42 | void unstackAuxiliaryInfoAndList(char targetFlag, SNPAuxiliaryInfo **list, uint count); 43 | #endif 44 | -------------------------------------------------------------------------------- /man/peakVO2.Rd: -------------------------------------------------------------------------------- 1 | \name{peakVO2} 2 | \docType{data} 3 | \alias{peakVO2} 4 | \title{Systolic Heart Failure Data} 5 | 6 | \description{ 7 | 8 | The data involve 2231 patients with systolic heart failure who 9 | underwent cardiopulmonary stress testing at the Cleveland Clinic. The 10 | primary end point was all-cause death. In total, 39 variables were 11 | measured for each patient, including baseline clinical values and 12 | exercise stress test results. A key variable of interest is 13 | peak VO2 (mL/kg per min), the peak respiratory exchange ratio. 14 | More details regarding the data can be found in Hsich et al. (2011). 15 | 16 | } 17 | \references{ 18 | 19 | Hsich E., Gorodeski E.Z.,Blackstone E.H., Ishwaran H. and Lauer 20 | M.S. (2011). Identifying important risk factors for survival in 21 | systolic heart failure patients using random survival 22 | forests. Circulation: Cardio. Qual. Outcomes, 4(1), 39-45. 23 | 24 | } 25 | \examples{ 26 | \donttest{ 27 | ## load the data 28 | data(peakVO2, package = "randomForestSRC") 29 | 30 | ## random survival forest analysis 31 | o <- rfsrc(Surv(ttodead, died)~., peakVO2) 32 | print(o) 33 | 34 | ## partial effect of peak V02 on mortality 35 | partial.o <- partial(o, 36 | partial.type = "mort", 37 | partial.xvar = "peak.vo2", 38 | partial.values = o$xvar$peak.vo2, 39 | partial.time = o$time.interest) 40 | pdta.m <- get.partial.plot.data(partial.o) 41 | 42 | 43 | ## partial effect of peak V02 on survival 44 | pvo2 <- quantile(o$xvar$peak.vo2) 45 | partial.o <- partial(o, 46 | partial.type = "surv", 47 | partial.xvar = "peak.vo2", 48 | partial.values = pvo2, 49 | partial.time = o$time.interest) 50 | pdta.s <- get.partial.plot.data(partial.o) 51 | 52 | 53 | ## compare the two plots 54 | par(mfrow=c(1,2)) 55 | 56 | plot(lowess(pdta.m$x, pdta.m$yhat, f = 2/3), 57 | type = "l", xlab = "peak VO2", ylab = "adjusted mortality") 58 | rug(o$xvar$peak.vo2) 59 | 60 | matplot(pdta.s$partial.time, t(pdta.s$yhat), type = "l", lty = 1, 61 | xlab = "years", ylab = "peak VO2 adjusted survival") 62 | legend("bottomleft", legend = paste0("peak VO2 = ", pvo2), 63 | bty = "n", cex = .75, fill = 1:5) 64 | 65 | 66 | }} 67 | \keyword{datasets} 68 | -------------------------------------------------------------------------------- /src/split.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SPLIT_H 2 | #define RF_SPLIT_H 3 | #include "splitInfo.h" 4 | typedef struct splitRuleObj SplitRuleObj; 5 | struct splitRuleObj { 6 | char (*function) (uint, 7 | Node*, 8 | SplitInfoMax*, 9 | GreedyObj*, 10 | char); 11 | }; 12 | SplitRuleObj *makeSplitRuleObj(uint rule); 13 | void freeSplitRuleObj(SplitRuleObj *obj); 14 | char getBestSplit(uint treeID, 15 | Node *parent, 16 | uint splitRule, 17 | SplitInfoMax *splitInfoMax, 18 | char multImpFlag); 19 | char randomSplitGeneric(uint treeID, 20 | Node *parent, 21 | SplitInfoMax *splitInfoMax, 22 | GreedyObj *greedyMembr, 23 | char multImpFlag); 24 | char randomSplitSimple(uint treeID, 25 | Node *parent, 26 | SplitInfoMax *splitInfoMax, 27 | GreedyObj *greedyMembr, 28 | char multImpFlag); 29 | typedef double (*customFunction) (unsigned int n, 30 | char *membership, 31 | double *time, 32 | double *event, 33 | unsigned int eventTypeSize, 34 | unsigned int eventTimeSize, 35 | double *eventTime, 36 | double *response, 37 | double mean, 38 | double variance, 39 | unsigned int maxLevel, 40 | double **feature, 41 | unsigned int featureCount); 42 | void regCustomFunctionClassification (customFunction func, uint i); 43 | void regCustomFunctionRegression (customFunction func, uint i); 44 | void regCustomFunctionSurvival (customFunction func, uint i); 45 | void regCustomFunctionCompetingRisk (customFunction func, uint i); 46 | void registerThis (customFunction func, unsigned int family, unsigned int slot); 47 | #endif 48 | -------------------------------------------------------------------------------- /src/regression.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_REGRESSION_H 2 | #define RF_REGRESSION_H 3 | #include "terminal.h" 4 | void getMeanResponse(uint treeID, 5 | Terminal *parent, 6 | uint *repMembrIndx, 7 | uint repMembrSize, 8 | uint *allMembrIndx, 9 | uint allMembrSize, 10 | uint *rmbrIterator); 11 | void updateEnsembleMean(char mode, 12 | uint treeID, 13 | char perfFlag, 14 | char omitDenominator); 15 | double getMeanSquareError(uint size, 16 | double *responsePtr, 17 | double *predictedOutcome, 18 | double *denomCount); 19 | char getVarianceClassic(uint repMembrSize, 20 | uint *repMembrIndx, 21 | uint nonMissMembrSize, 22 | uint *nonMissMembrIndx, 23 | double *targetResponse, 24 | double *mean, 25 | double *variance); 26 | char getVarianceClassicNoMiss(uint repMembrSize, 27 | uint *repMembrIndx, 28 | uint nonMissMembrSize, 29 | uint *nonMissMembrIndx, 30 | double *targetResponse, 31 | double *mean, 32 | double *variance); 33 | char getVarianceSinglePass(uint repMembrSize, 34 | uint *repMembrIndx, 35 | uint nonMissMembrSize, 36 | uint *nonMissMembrIndx, 37 | double *targetResponse, 38 | double *mean, 39 | double *variance); 40 | char getVarianceDoublePass(uint repMembrSize, 41 | uint *repMembrIndx, 42 | uint nonMissMembrSize, 43 | uint *nonMissMembrIndx, 44 | double *targetResponse, 45 | double *mean, 46 | double *variance); 47 | void updateQuantileStream(char mode, 48 | uint treeID); 49 | #endif 50 | -------------------------------------------------------------------------------- /src/splitGreedy.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SPLIT_GREEDY_H 2 | #define RF_SPLIT_GREEDY_H 3 | char summarizeSplitResultGreedy(SplitInfo *info); 4 | SplitInfo *makeSplitInfo(uint indicatorSize); 5 | void freeSplitInfo(SplitInfo *info); 6 | SplitInfoMax *makeSplitInfoMax(uint size); 7 | void freeSplitInfoMax(SplitInfoMax *info); 8 | char forkAndUpdateGeneric(uint treeID, 9 | Node *parent, 10 | uint *repMembrIndx, 11 | uint repMembrSize, 12 | uint *allMembrIndx, 13 | uint allMembrSize, 14 | char multImpFlag, 15 | SplitInfo *info, 16 | uint *leafCount, 17 | Node **nodeMembership); 18 | char forkNode(Node *parent, 19 | SplitInfo *info); 20 | void saveTree(uint b, Node *parent, uint *offset); 21 | void restoreTree(char mode, uint b, Node *parent); 22 | void integerToHexString(uint n, char *s); 23 | uint numHexDigits(unsigned n); 24 | double standardVector(uint treeID, 25 | char standardFlag, 26 | GreedyObj *greedyMembr, 27 | double *rawVector, 28 | uint *repMembrIndx, 29 | uint repMembrSize); 30 | double getL2Loss(uint treeID, 31 | double *response, 32 | uint *repMembrIndx, 33 | uint repMembrSize, 34 | uint *allMembrIndx, 35 | uint allMembrSize, 36 | char *membershipFlag, 37 | char selectFlag); 38 | double getNegLogLikelihood(uint treeID, 39 | uint maxLevel, 40 | double *response, 41 | uint *repMembrIndx, 42 | uint repMembrSize, 43 | uint *allMembrIndx, 44 | uint allMembrSize, 45 | char *membershipFlag, 46 | char selectFlag); 47 | GreedyObj *makeGreedyObj(Node *parent, GreedyObj *head); 48 | void freeGreedyObj(GreedyObj *gObj); 49 | void freeGreedyObjList(GreedyObj *gObj); 50 | GreedyObj *findGreedyObj(GreedyObj *head, Node *parent); 51 | #endif 52 | -------------------------------------------------------------------------------- /src/splitMult.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SPLIT_MULT_H 2 | #define RF_SPLIT_MULT_H 3 | #include "node.h" 4 | #include "splitInfo.h" 5 | char unsupervisedSplitMiss(uint treeID, 6 | Node *parent, 7 | SplitInfoMax *splitInfoMax, 8 | GreedyObj *greedyMembr, 9 | char multImpFlag); 10 | char unsupervisedSplitNew(uint treeID, 11 | Node *parent, 12 | SplitInfoMax *splitInfoMax, 13 | GreedyObj *greedyMembr, 14 | char multImpFlag); 15 | char multivariateSplitOld (uint treeID, 16 | Node *parent, 17 | SplitInfoMax *splitInfoMax, 18 | GreedyObj *greedyMembr, 19 | char multImpFlag); 20 | char multivariateSplitNew (uint treeID, 21 | Node *parent, 22 | SplitInfoMax *splitInfoMax, 23 | GreedyObj *greedyMembr, 24 | char multImpFlag); 25 | char multivariateSplitNew3 (uint treeID, 26 | Node *parent, 27 | SplitInfoMax *splitInfoMax, 28 | GreedyObj *greedyMembr, 29 | char multImpFlag); 30 | DistributionObj *stackRandomResponsesSimple(uint treeID, Node *parent); 31 | void unstackRandomResponsesSimple(uint treeID, DistributionObj *obj); 32 | char selectRandomResponsesSimpleVector(uint treeID, 33 | Node *parent, 34 | DistributionObj *distributionObj, 35 | uint *response, 36 | uint *responseCount); 37 | DistributionObj *stackRandomResponsesGeneric(uint treeID, Node *parent); 38 | void unstackRandomResponsesGeneric(uint treeID, DistributionObj *obj); 39 | char selectRandomResponsesGenericVector(uint treeID, 40 | Node *parent, 41 | DistributionObj *distributionObj, 42 | uint *covariate, 43 | uint *covariateCount); 44 | #endif 45 | -------------------------------------------------------------------------------- /R/rfsrc.anonymous.R: -------------------------------------------------------------------------------- 1 | rfsrc.anonymous <- function(formula, data, forest = TRUE, ...) 2 | { 3 | ## -------------------------------------------------------------- 4 | ## 5 | ## preliminary processing 6 | ## 7 | ## -------------------------------------------------------------- 8 | #if (any(is.na(data))) { 9 | #stop("missing values not allowed in anonymous mode") 10 | #} 11 | ##-------------------------------------------------------------- 12 | ## 13 | ## extract additional options specified by user 14 | ## we lock this down to allowed types 15 | ## 16 | ##-------------------------------------------------------------- 17 | ## list of forest parameters 18 | rfnames <- get.rfnames(hidden = TRUE) 19 | ## restrict to allowed values 20 | rfnames <- rfnames[rfnames != "data" & rfnames != "forest"] 21 | ## get the permissible hidden options 22 | ## add formula if present 23 | dots <- list(...) 24 | dots <- dots[names(dots) %in% rfnames] 25 | if (!missing(formula)) { 26 | dots$formula <- formula 27 | } 28 | ## manually over-ride key hidden options 29 | dots$terminal.qualts <- TRUE 30 | dots$terminal.quants <- TRUE 31 | ##-------------------------------------------------------------- 32 | ## 33 | ## make the grow call 34 | ## 35 | ##-------------------------------------------------------------- 36 | retO <- do.call("rfsrc", c(list(data = data, forest = forest, na.action = "na.omit"), dots)) 37 | ##-------------------------------------------------------------- 38 | ## 39 | ## save impute mean 40 | ## 41 | ##-------------------------------------------------------------- 42 | retO$forest$impute.mean <- get.impute.mean(data) 43 | ##-------------------------------------------------------------- 44 | ## 45 | ## strip out the training data 46 | ## 47 | ##-------------------------------------------------------------- 48 | retO$xvar <- retO$forest$xvar <- NULL 49 | ##-------------------------------------------------------------- 50 | ## 51 | ## add special class distinction --> for both the grow object AND forest 52 | ## 53 | ##-------------------------------------------------------------- 54 | class(retO) <- c(class(retO), "anonymous") 55 | class(retO$forest) <- c(class(retO$forest), "anonymous") 56 | ##-------------------------------------------------------------- 57 | ## 58 | ## return the anonymized object 59 | ## 60 | ##-------------------------------------------------------------- 61 | retO 62 | } 63 | -------------------------------------------------------------------------------- /src/survivalE.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SURVIVAL_E_H 2 | #define RF_SURVIVAL_E_H 3 | void updateEnsembleSurvival(char mode, 4 | uint treeID, 5 | char perfFlag); 6 | void getEnsembleMortality(char mode, 7 | uint treeID, 8 | uint obsSize, 9 | double **ensembleMRTptr, 10 | double *ensembleDen, 11 | double *mortality); 12 | void getEnsembleMortalityCR(char mode, 13 | uint treeID, 14 | uint obsSize, 15 | double **ensembleMRTptr, 16 | double *ensembleDen, 17 | double **cMortality); 18 | void getConditionalConcordanceArrays(uint j, 19 | double *timePtr, 20 | double *statusPtr, 21 | double *mortalityPtr, 22 | double *genericEnsembleDenPtr, 23 | uint *meIndividualSize, 24 | uint **eIndividual, 25 | double *subsettedTime, 26 | double *subsettedStatus, 27 | double *subsettedMortality, 28 | double *subsettedEnsembleDen); 29 | double getConcordanceIndex(int polarity, 30 | uint size, 31 | double *timePtr, 32 | double *statusPtr, 33 | double *predictedOutcome, 34 | double *oobCount); 35 | double getConcordanceIndexNew(int polarity, 36 | uint size, 37 | double *timePtr, 38 | double *statusPtr, 39 | double *predicted, 40 | double *oobCount); 41 | void getCRPerformance (char mode, 42 | uint obsSize, 43 | double **responsePtr, 44 | double **yearsLost, 45 | double *denom, 46 | double *performanceVector); 47 | uint getTimeInterestIndex(double *array, uint length, double value); 48 | #endif 49 | -------------------------------------------------------------------------------- /src/trace.c: -------------------------------------------------------------------------------- 1 | 2 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 3 | #include "global.h" 4 | #include "external.h" 5 | 6 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 7 | 8 | 9 | 10 | 11 | #include "trace.h" 12 | #include "error.h" 13 | unsigned int RF_traceFlagDiagLevel; 14 | unsigned int RF_traceFlagIterValue; 15 | size_t RF_memor_maxMemoryAllocation; 16 | size_t RF_memor_minMemoryAllocation; 17 | size_t RF_memor_probeMemoryAllocation; 18 | void setTraceFlag(unsigned int traceFlag, unsigned int tree) { 19 | RF_traceFlagDiagLevel = traceFlag; 20 | RF_traceFlagIterValue = tree; 21 | } 22 | unsigned int getTraceFlag(unsigned int tree) { 23 | unsigned int result; 24 | result = FALSE; 25 | if (RF_traceFlagIterValue == tree) { 26 | result = RF_traceFlagDiagLevel; 27 | } 28 | else { 29 | if (RF_traceFlagIterValue == 0) { 30 | result = RF_traceFlagDiagLevel; 31 | } 32 | } 33 | return result; 34 | } 35 | unsigned int updateTimeStamp(unsigned int before) { 36 | unsigned int stamp; 37 | double cpuTimeUsed; 38 | stamp = clock(); 39 | cpuTimeUsed = ((double) (stamp - before)) / CLOCKS_PER_SEC; 40 | RF_nativePrint("\nRF-SRC: CPU process time: %20.3f \n", cpuTimeUsed); 41 | return stamp; 42 | } 43 | void memoryCheck(void) { 44 | } 45 | void setMaxMemoryAllocation(size_t value) { 46 | RF_memor_maxMemoryAllocation = value; 47 | } 48 | void setMinMemoryAllocation(size_t value) { 49 | RF_memor_minMemoryAllocation = value; 50 | } 51 | void setProbeMemoryAllocation(size_t value) { 52 | RF_memor_probeMemoryAllocation = value; 53 | } 54 | size_t getMaxMemoryAllocation(void) { 55 | return (RF_memor_maxMemoryAllocation); 56 | } 57 | size_t getMinMemoryAllocation(void) { 58 | return (RF_memor_minMemoryAllocation); 59 | } 60 | size_t getProbeMemoryAllocation(void) { 61 | return (RF_memor_probeMemoryAllocation); 62 | } 63 | void increaseMemoryAllocation(size_t amount) { 64 | RF_memor_minMemoryAllocation += amount; 65 | if (RF_memor_minMemoryAllocation > RF_memor_maxMemoryAllocation) { 66 | RF_memor_maxMemoryAllocation = RF_memor_minMemoryAllocation; 67 | } 68 | } 69 | void increaseProbeMemoryAllocation(size_t amount) { 70 | RF_memor_probeMemoryAllocation += amount; 71 | } 72 | void decreaseMemoryAllocation(size_t amount) { 73 | RF_memor_minMemoryAllocation -= amount; 74 | } 75 | void decreaseProbeMemoryAllocation(size_t amount) { 76 | RF_memor_probeMemoryAllocation -= amount; 77 | } 78 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite randomForestSRC in publications use:") 2 | 3 | titl <- meta$Title 4 | year <- sub(".*(2[[:digit:]]{3})-.*", "\\1", meta$Date) 5 | vers <- paste("R package version", meta$Version) 6 | 7 | bibentry(bibtype="Manual", 8 | title = titl, 9 | author = c(person(family="Ishwaran", given="H."), 10 | person(family="Kogalur", given="U.B.")), 11 | publisher = "manual", 12 | year = year, 13 | note = vers, 14 | url = "https://cran.r-project.org/package=randomForestSRC", 15 | pdf = "https://cran.r-project.org/web/packages/randomForestSRC/randomForestSRC.pdf", 16 | textVersion = 17 | paste("Ishwaran H. and Kogalur U.B. (",year ,"). ", 18 | titl, ", ", vers, ".", sep="")) 19 | 20 | bibentry(bibtype="Article", 21 | title = "Random survival forests for R", 22 | author = c(person(family="Ishwaran", given="H."), 23 | person(family="Kogalur", given="U.B.")), 24 | journal = "R News", 25 | year = "2007", 26 | volume = "7", 27 | number = "2", 28 | pages = "25--31", 29 | month = "October", 30 | url = "https://CRAN.R-project.org/doc/Rnews/", 31 | pdf = "https://CRAN.R-project.org/doc/Rnews/Rnews_2007-2.pdf", 32 | 33 | textVersion = 34 | paste("Ishwaran H. and Kogalur U.B. (2007). ", 35 | "Random survival forests for R. ", 36 | "R News 7(2), 25--31.", 37 | sep="")) 38 | 39 | bibentry(bibtype="Article", 40 | title = "Random survival forests", 41 | author = c(person(family="Ishwaran", given="H."), 42 | person(family="Kogalur", given="U.B."), 43 | person(family="Blackstone", given="E.H."), 44 | person(family="Lauer", given="M.S.")), 45 | journal = "Ann. Appl. Statist.", 46 | year = "2008", 47 | volume = "2", 48 | number = "3", 49 | pages = "841--860", 50 | url = "https://arXiv.org/abs/0811.1645v1", 51 | pdf = "http://arxiv.org/pdf/0811.1645", 52 | 53 | textVersion = 54 | paste("Ishwaran H., Kogalur U.B., Blackstone E.H. and Lauer M.S. (2008). ", 55 | "Random survival forests. ", 56 | "Ann. Appl. Statist. 2(3), 841--860.", 57 | sep="")) 58 | 59 | -------------------------------------------------------------------------------- /src/termOps.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_TERM_OPS_H 2 | #define RF_TERM_OPS_H 3 | #include "terminal.h" 4 | Terminal *makeTerminal(void); 5 | void freeTerminal(Terminal *parent); 6 | void stackTermLMIIndex(Terminal *tTerm, unsigned int size); 7 | void unstackTermLMIIndex(Terminal *tTerm); 8 | void freeTerminalNodeLocalSurvivalStructures(Terminal *tTerm); 9 | void freeTerminalNodeSurvivalStructuresIntermediate(Terminal *tTerm); 10 | void freeTerminalNodeSurvivalStructuresFinal(Terminal *tTerm); 11 | void freeTerminalNodeNonSurvivalStructures(Terminal *tTerm); 12 | void stackAtRiskAndEventCount(Terminal *tTerm, unsigned int eTypeSize, unsigned int mTimeSize); 13 | void unstackAtRiskAndEventCount(Terminal *tTerm); 14 | void stackEventTimeIndex(Terminal *tTerm, unsigned int mTimeSize); 15 | void unstackEventTimeIndex(Terminal *tTerm); 16 | void stackLocalRatio(Terminal *tTerm, unsigned int eTypeSize, unsigned int eTimeSize); 17 | void unstackLocalRatio(Terminal *tTerm); 18 | void stackLocalRatioHazard(Terminal *tTerm, unsigned int eTypeSize, unsigned int eTimeSize); 19 | void unstackLocalRatioHazard(Terminal *tTerm); 20 | void stackLocalSurvival(Terminal *tTerm, unsigned int eTimeSize); 21 | void unstackLocalSurvival(Terminal *tTerm); 22 | void stackLocalNelsonAalen(Terminal *tTerm, unsigned int eTimeSize); 23 | void unstackLocalNelsonAalen(Terminal *tTerm); 24 | void stackLocalCSH(Terminal *tTerm, unsigned int eTypeSize, unsigned int eTimeSize); 25 | void unstackLocalCSH(Terminal *tTerm); 26 | void stackLocalCIF(Terminal *tTerm, unsigned int eTypeSize, unsigned int eTimeSize); 27 | void unstackLocalCIF(Terminal *tTerm); 28 | void stackNelsonAalen(Terminal *tTerm, unsigned int sTimeSize); 29 | void unstackNelsonAalen(Terminal *tTerm); 30 | void stackSurvival(Terminal *tTerm, unsigned int sTimeSize); 31 | void unstackSurvival(Terminal *tTerm); 32 | void stackCSH(Terminal *tTerm, unsigned int eTypeSize, unsigned int sTimeSize); 33 | void unstackCSH(Terminal *tTerm); 34 | void stackCIF(Terminal *tTerm, unsigned int eTypeSize, unsigned int sTimeSize); 35 | void unstackCIF(Terminal *tTerm); 36 | void stackMortality(Terminal *tTerm, unsigned int eTypeSize); 37 | void unstackMortality(Terminal *tTerm); 38 | void stackMultiClassProb(Terminal *tTerm, unsigned int rfCount, unsigned int *rfSize); 39 | void stackMultiClassProbPartial(Terminal *tTerm, unsigned int rfCount); 40 | void unstackMultiClassProb(Terminal *tTerm); 41 | void stackMeanResponse(Terminal *tTerm, unsigned int rnfCount); 42 | void unstackMeanResponse(Terminal *tTerm); 43 | void stackMemberStream(Terminal *tTerm, unsigned int membrSize); 44 | void unstackMemberStream(Terminal *tTerm); 45 | #endif 46 | -------------------------------------------------------------------------------- /R/utilities_varselect.R: -------------------------------------------------------------------------------- 1 | get.varselect.imp <- function(f.o, target.dim) { 2 | if (!is.null(f.o$importance)) { 3 | c(cbind(f.o$importance)[, target.dim]) 4 | } 5 | else { 6 | rep(NA, length(f.o$xvar.names)) 7 | } 8 | } 9 | get.varselect.imp.all <- function(f.o) { 10 | if (!is.null(f.o$importance)) { 11 | imp.all <- cbind(f.o$importance) 12 | if (ncol(imp.all) == 1) { 13 | colnames(imp.all) <- "vimp" 14 | } 15 | else { 16 | colnames(imp.all) <- paste("vimp.", colnames(imp.all), sep = "") 17 | } 18 | imp.all 19 | } 20 | else { 21 | rep(NA, length(f.o$xvar.names)) 22 | } 23 | } 24 | get.varselect.err <- function(f.o) { 25 | if (!is.null(f.o$err.rate)) { 26 | if (grepl("surv", f.o$family)) { 27 | err <- 100 * cbind(f.o$err.rate)[f.o$ntree, ] 28 | } 29 | else { 30 | err <- cbind(f.o$err.rate)[f.o$ntree, ] 31 | } 32 | } 33 | else { 34 | err = NA 35 | } 36 | err 37 | } 38 | get.varselect.length <- function(x, y) { 39 | (length(x) > 0 & length(y) > 0) 40 | } 41 | get.varselect.mtry <- function(x, y) { 42 | mtry <- round((length(x) - length(y))/3) 43 | if (mtry == 0) { 44 | round(length(x)/3) 45 | } 46 | else { 47 | mtry 48 | } 49 | } 50 | get.varselect.sd <- function(x) { 51 | if (all(is.na(x))) { 52 | NA 53 | } 54 | else { 55 | sd(x, na.rm = TRUE) 56 | } 57 | } 58 | permute.rows <-function(x) { 59 | n <- nrow(x) 60 | p <- ncol(x) 61 | mm <- runif(length(x)) + rep(seq(n) * 10, rep(p, n)) 62 | matrix(t(x)[order(mm)], n, p, byrow = TRUE) 63 | } 64 | balanced.folds <- function(y, nfolds = min(min(table(y)), 10)) { 65 | y[is.na(y)] <- resample(y[!is.na(y)], size = sum(is.na(y)), replace = TRUE) 66 | totals <- table(y) 67 | if (length(totals) < 2) { 68 | return(cv.folds(length(y), nfolds)) 69 | } 70 | else { 71 | fmax <- max(totals) 72 | nfolds <- min(nfolds, fmax) 73 | nfolds <- max(nfolds, 2) 74 | folds <- as.list(seq(nfolds)) 75 | yids <- split(seq(y), y) 76 | bigmat <- matrix(NA, ceiling(fmax/nfolds) * nfolds, length(totals)) 77 | for(i in seq(totals)) { 78 | if(length(yids[[i]])>1){bigmat[seq(totals[i]), i] <- sample(yids[[i]])} 79 | if(length(yids[[i]])==1){bigmat[seq(totals[i]), i] <- yids[[i]]} 80 | } 81 | smallmat <- matrix(bigmat, nrow = nfolds) 82 | smallmat <- permute.rows(t(smallmat)) 83 | res <- vector("list", nfolds) 84 | for(j in 1:nfolds) { 85 | jj <- !is.na(smallmat[, j]) 86 | res[[j]] <- smallmat[jj, j] 87 | } 88 | return(res) 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/stack.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_STACK_H 2 | #define RF_STACK_H 3 | void stackAndInitializeTimeAndSubjectArrays(char mode); 4 | void unstackTimeAndSubjectArrays(char mode); 5 | void stackFactorArrays(char mode); 6 | void stackFactorGeneric(char respFlag, 7 | uint size, 8 | char *type, 9 | uint **p_factorMap, 10 | uint *factorCount, 11 | uint **p_factorIndex, 12 | uint **p_factorSize, 13 | uint **p_nonfactorMap, 14 | uint *nonfactorCount, 15 | uint **p_nonfactorIndex); 16 | void unstackFactorArrays(char mode); 17 | void initializeFactorArrays(char mode); 18 | char stackMissingArraysPhase1(char mode); 19 | char stackMissingArraysPhase2(char mode); 20 | void unstackMissingArrays(char mode); 21 | void stackMissingSignatures(uint obsSize, 22 | uint rspSize, 23 | double **responsePtr, 24 | double **predictorPtr, 25 | uint *recordMap, 26 | uint recordSize, 27 | uint **p_recordIndex, 28 | uint *p_vSize, 29 | int ***p_vSign, 30 | int **p_vIndex, 31 | uint *pRF_mrFactorSize, 32 | uint **pRF_mrFactorIndex, 33 | uint *pRF_mxFactorSize, 34 | uint **pRF_mxFactorIndex, 35 | char *pRF_mTimeFlag, 36 | char *pRF_mStatusFlag, 37 | char *pRF_mResponseFlag, 38 | char *pRF_mPredictorFlag); 39 | void unstackMissingSignatures(uint rspSize, 40 | uint recordSize, 41 | uint *recordIndex, 42 | uint vSize, 43 | int **vSign, 44 | int *vIndex, 45 | uint mrFactorSize, 46 | uint *mrFactorIndex, 47 | uint mxFactorSize, 48 | uint *mxFactorIndex); 49 | char stackCompetingArrays(char mode); 50 | void unstackCompetingArrays(char mode); 51 | char stackClassificationArrays(char mode); 52 | void unstackClassificationArrays(char mode); 53 | void getEventInfo(char mode); 54 | #endif 55 | -------------------------------------------------------------------------------- /R/generic.impute.rfsrc.R: -------------------------------------------------------------------------------- 1 | generic.impute.rfsrc <- function(data, 2 | ntree = 250, 3 | nodesize = NULL, 4 | nsplit = 1, 5 | nimpute = 1, 6 | fast = FALSE, 7 | ...) 8 | { 9 | ## save the row and column names: later we will check if any rows or columns 10 | ## were deleted as part of the missing data preprocessing 11 | c.names <- colnames(data) 12 | r.names <- rownames(data) 13 | ## acquire the permissible hidden options 14 | dots <- list(...) 15 | dots$na.action <- dots$impute.only <- dots$forest <- NULL 16 | ## rfsrc grow call 17 | if (!fast) { 18 | object <- do.call("rfsrc", 19 | c(list(data = data, 20 | ntree = ntree, 21 | nodesize = nodesize, 22 | nsplit = nsplit, 23 | nimpute = nimpute, 24 | na.action = "na.impute", 25 | impute.only = TRUE), dots)) 26 | } 27 | else {## user has requested the fast forest interface 28 | object <- do.call("rfsrc.fast", 29 | c(list(data = data, 30 | ntree = ntree, 31 | nodesize = nodesize, 32 | nsplit = nsplit, 33 | nimpute = nimpute, 34 | na.action = "na.impute", 35 | impute.only = TRUE), dots)) 36 | } 37 | ## confirm that no error has occured 38 | if (is.null(object)) { 39 | return(NULL) 40 | } 41 | ## the data is no longer needed 42 | rm(data) 43 | ## if the return object is a data frame then imputation was not 44 | ## performed: for example, there was no missing data either before 45 | ## or after processing 46 | if (is.data.frame(object)) { 47 | return(invisible(list(data = object, missing = row.col.deleted(object, r.names, c.names)))) 48 | } 49 | ## preliminary results of imputation 50 | if(is.null(object$yvar.names)) { 51 | imputed.result <- object$xvar 52 | } 53 | else { 54 | imputed.result <- cbind(object$yvar, object$xvar) 55 | } 56 | colnames(imputed.result) <- c(object$yvar.names, object$xvar.names) 57 | ## overlay the data (only necessary when nimpute = 1) 58 | if (nimpute == 1) { 59 | imputed.result[object$imputed.indv, ] <- object$imputed.data 60 | } 61 | ## the object is no longer required 62 | rm(object) 63 | ## return the goodies 64 | invisible(list(data = imputed.result, missing = row.col.deleted(imputed.result, r.names, c.names))) 65 | } 66 | -------------------------------------------------------------------------------- /R/vimp.rfsrc.R: -------------------------------------------------------------------------------- 1 | vimp.rfsrc <- function(object, 2 | xvar.names, 3 | importance = c("anti", "permute", "random"), 4 | block.size = 10, 5 | joint = FALSE, 6 | seed = NULL, 7 | do.trace = FALSE, 8 | ...) 9 | { 10 | ## incoming parameter checks - all are fatal 11 | if (missing(object)) { 12 | stop("object is missing") 13 | } 14 | if (object$family == "unsupv") { 15 | stop("vimp does not apply to unsupervised forests: consider using max.subtree or varpro") 16 | } 17 | if (sum(inherits(object, c("rfsrc", "grow"), TRUE) == c(1, 2)) != 2 & 18 | sum(inherits(object, c("rfsrc", "forest"), TRUE) == c(1, 2)) != 2) { 19 | stop("This function only works for objects of class `(rfsrc, grow)' or '(rfsrc, forest)'") 20 | } 21 | ## process the importance specification 22 | if (!is.logical(joint)) { 23 | stop("joint must be a logical value") 24 | } 25 | importance <- importance[1] 26 | if (joint & importance != "none") { 27 | i.str <- unlist(strsplit(importance, "\\.")) 28 | if (length(i.str) == 1) { 29 | importance <- paste(i.str[1], ".joint", sep = "") 30 | } 31 | else if (length(i.str) == 2) { 32 | importance <- paste(i.str[1], ".joint.", i.str[2], sep = "") 33 | } 34 | } 35 | importance <- match.arg(as.character(importance), 36 | c(TRUE, "anti", "permute", "random", "anti.joint", "permute.joint", "random.joint")) 37 | ## grow objects under non-standard bootstrapping are devoid of performance values 38 | if (sum(inherits(object, c("rfsrc", "grow"), TRUE) == c(1, 2)) == 2) { 39 | if (is.null(object$forest)) { 40 | stop("The forest is empty. Re-run rfsrc (grow) call with forest=TRUE") 41 | } 42 | else { 43 | bootstrap <- object$forest$bootstrap 44 | } 45 | } 46 | else { 47 | bootstrap <- object$bootstrap 48 | } 49 | if (bootstrap == "none" || bootstrap == "by.node") { 50 | stop("grow objects under non-standard bootstrapping are devoid of performance values") 51 | } 52 | ## legacy m.target 53 | dots <- list(...) 54 | m.target <- dots$m.target 55 | dots$m.target <- NULL 56 | ## generic predict call 57 | args <- c(list( 58 | object = object, 59 | m.target = m.target, 60 | importance = importance, 61 | block.size = block.size, 62 | seed = seed, 63 | do.trace = do.trace, 64 | membership = FALSE 65 | ), dots) 66 | ## if xvar.names is not missing 67 | if (!missing(xvar.names)) { 68 | args$importance.xvar <- xvar.names 69 | } 70 | return(do.call("generic.predict.rfsrc", args)) 71 | } 72 | vimp <- vimp.rfsrc 73 | -------------------------------------------------------------------------------- /src/leafLink.c: -------------------------------------------------------------------------------- 1 | 2 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 3 | #include "global.h" 4 | #include "external.h" 5 | 6 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 7 | 8 | 9 | 10 | 11 | #include "leafLink.h" 12 | #include "termOps.h" 13 | #include "nrutil.h" 14 | LeafLinkedObj *makeLeafLinkedObj(void) { 15 | LeafLinkedObj *obj = (LeafLinkedObj*) gblock((size_t) sizeof(LeafLinkedObj)); 16 | obj -> fwdLink = NULL; 17 | obj -> bakLink = NULL; 18 | obj -> nodePtr = NULL; 19 | obj -> termPtr = NULL; 20 | obj -> termPtrAux = NULL; 21 | obj -> nodeID = 0; 22 | obj -> ibgMembrCount = 0; 23 | obj -> allMembrCount = 0; 24 | obj -> oobMembrCount = 0; 25 | return obj; 26 | } 27 | LeafLinkedObjSimple *makeLeafLinkedObjSimple(void) { 28 | LeafLinkedObjSimple *obj = (LeafLinkedObjSimple*) gblock((size_t) sizeof(LeafLinkedObjSimple)); 29 | obj -> fwdLink = NULL; 30 | obj -> bakLink = NULL; 31 | obj -> nodePtr = NULL; 32 | return obj; 33 | } 34 | LeafLinkedObj *makeAndSpliceLeafLinkedObj(LeafLinkedObj *tail, 35 | Node *nodePtr, 36 | uint ibgCount, 37 | uint allCount) { 38 | LeafLinkedObj *obj = makeLeafLinkedObj(); 39 | tail -> fwdLink = obj; 40 | obj -> bakLink = tail; 41 | obj -> nodePtr = nodePtr; 42 | obj -> termPtr = makeTerminal(); 43 | (obj -> termPtr) -> mate = obj -> nodePtr; 44 | (obj -> nodePtr) -> mate = obj -> termPtr; 45 | (obj -> termPtr) -> nodeID = obj -> nodeID = nodePtr -> nodeID; 46 | obj -> ibgMembrCount = ibgCount; 47 | obj -> allMembrCount = allCount; 48 | return obj; 49 | } 50 | LeafLinkedObjSimple *makeAndSpliceLeafLinkedObjSimple(LeafLinkedObjSimple *tail, 51 | Node *nodePtr) { 52 | LeafLinkedObjSimple *obj = makeLeafLinkedObjSimple(); 53 | tail -> fwdLink = obj; 54 | obj -> bakLink = tail; 55 | obj -> nodePtr = nodePtr; 56 | return obj; 57 | } 58 | void freeLeafLinkedObj(LeafLinkedObj *obj) { 59 | if (obj -> termPtr != NULL) { 60 | freeTerminal(obj -> termPtr); 61 | obj -> termPtr = NULL; 62 | } 63 | free_gblock(obj, (size_t) sizeof(LeafLinkedObj)); 64 | } 65 | void freeLeafLinkedObjSimple(LeafLinkedObjSimple *obj) { 66 | free_gblock(obj, (size_t) sizeof(LeafLinkedObjSimple)); 67 | } 68 | void freeLeafLinkedObjList(LeafLinkedObj *obj) { 69 | if (obj -> fwdLink != NULL) { 70 | freeLeafLinkedObjList(obj -> fwdLink); 71 | } 72 | freeLeafLinkedObj(obj); 73 | } 74 | void freeLeafLinkedObjListRev(LeafLinkedObj *obj) { 75 | if (obj -> bakLink != NULL) { 76 | freeLeafLinkedObjListRev(obj -> bakLink); 77 | } 78 | freeLeafLinkedObj(obj); 79 | } 80 | -------------------------------------------------------------------------------- /R/distance.R: -------------------------------------------------------------------------------- 1 | distance <- function (x, 2 | method = "euclidean", 3 | rowI = NULL, 4 | rowJ = NULL, 5 | do.trace = FALSE) 6 | { 7 | n = dim(x)[1] 8 | p = dim(x)[2] 9 | if (n < 2) { 10 | stop("matrix must have more than one (1) row") 11 | } 12 | if (length(rowI) > 0 ) { 13 | if (length(rowI) != length(rowJ)) { 14 | stop("rowI and rowJ identifiers must have the same length") 15 | } 16 | } 17 | ## Legal methods. Not all are implemented. They are placeholders. 18 | method.names <- c("euclidean", ## 1 19 | "canberra", ## 2 20 | "maximum") ## 3 21 | ## Default method is euclidean. 22 | if(is.null(method)) { 23 | method.idx <- which(method.names == "euclidean") 24 | } 25 | else { 26 | method.idx <- which(method.names == method) 27 | } 28 | ## Check for coherent distance method. 29 | if (length(method.idx) != 1) { 30 | stop("distance metric invalid") 31 | } 32 | ## Jump to native code. Note that if rowI and rowJ are NULL, we return the distance matrix 33 | ## in it's entirity. Only the lower-diagonal entries are returned. If rowI and rowJ are non-NULL 34 | ## we return the distance for only the cell pairs (rowI[], rowJ[]). This allows for master/slave hybrid 35 | ## OpenMP/MPI cluster processing. 36 | nativeOutput <- .Call("rfsrcDistance", 37 | as.integer(method.idx), 38 | as.integer(n), 39 | as.integer(p), 40 | as.double(x), 41 | as.integer(length(rowI)), 42 | as.integer(rowI), 43 | as.integer(rowJ), 44 | as.integer(get.rf.cores()), 45 | as.integer(do.trace)) 46 | ## check for error return condition in the native code 47 | if (is.null(nativeOutput)) { 48 | stop("An error has occurred in rfsrcDistance. Please turn trace on for further analysis.") 49 | } 50 | if (length(rowI) > 0) { 51 | ## Return only the cell pairs (rowI[], rowJ[]) for processing in the master/slave scripts. 52 | result <- list(rowI = rowI, rowJ = rowJ, distance = nativeOutput$distance) 53 | } 54 | else { 55 | ## Return the matrix in its entirety. 56 | result <- matrix(0, n, n) 57 | count <- 0 58 | for (k in 2:n) { 59 | result[k,1:(k-1)] <- nativeOutput$distance[(count + 1):(count + k - 1)] 60 | result[1:(k-1),k] <- result[k,1:(k-1)] 61 | count <- count + (k-1) 62 | } 63 | } 64 | return (result) 65 | } 66 | -------------------------------------------------------------------------------- /src/classification.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_CLASSIFICATION_H 2 | #define RF_CLASSIFICATION_H 3 | #include "terminal.h" 4 | void getMultiClassProb (uint treeID, 5 | Terminal *parent, 6 | uint *repMembrIndx, 7 | uint repMembrSize, 8 | uint *allMembrIndx, 9 | uint allMembrSize, 10 | uint *rmbrIterator); 11 | void updateEnsembleMultiClass(char mode, 12 | uint treeID, 13 | char perfFlag, 14 | char omitDenominator); 15 | double getBrierScore(uint obsSize, 16 | uint rTarget, 17 | double *responsePtr, 18 | double **outcomeCLS, 19 | double *denomCount, 20 | double *cpv); 21 | void getConditionalClassificationIndexGrow(uint size, 22 | uint rTarget, 23 | double *responsePtr, 24 | double **outcomeCLS, 25 | double *maxVote, 26 | double *denomCount, 27 | double *cpv); 28 | void getConditionalClassificationIndexPred(uint size, 29 | uint rTarget, 30 | double *responsePtr, 31 | double **outcomeCLS, 32 | double *maxVote, 33 | double *denomCount, 34 | double *cpv); 35 | double getClassificationIndex(uint size, 36 | uint rTarget, 37 | double *responsePtr, 38 | double *denomCount, 39 | double *maxVote); 40 | double getGMeanIndexGrow(uint size, 41 | uint rTarget, 42 | double *responsePtr, 43 | double *denomCount, 44 | double *maxVote); 45 | double getGMeanIndexPred(uint size, 46 | uint rTarget, 47 | double *responsePtr, 48 | double *denomCount, 49 | double *maxVote); 50 | void getMaxVote(uint size, 51 | uint rTarget, 52 | double **outcomeCLS, 53 | double *denomCount, 54 | double *maxVote); 55 | #endif 56 | -------------------------------------------------------------------------------- /man/plot.competing.risk.rfsrc.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.competing.risk.rfsrc} 2 | \alias{plot.competing.risk.rfsrc} 3 | \alias{plot.competing.risk} 4 | \title{Plots for Competing Risks} 5 | \description{ 6 | Plot useful summary curves from a random survival forest competing risk 7 | analysis. 8 | } 9 | \usage{\method{plot.competing.risk}{rfsrc}(x, plots.one.page = FALSE, ...)} 10 | \arguments{ 11 | \item{x}{An object of class \code{(rfsrc, grow)} or 12 | \code{(rfsrc, predict)}.} 13 | \item{plots.one.page}{Should plots be placed on one page?} 14 | \item{...}{Further arguments passed to or from other methods.} 15 | } 16 | \details{ 17 | 18 | Given a random survival forest object from a competing risk analysis 19 | (Ishwaran et al. 2014), plots from top to bottom, left to right: (1) 20 | cause-specific cumulative hazard function (CSCHF) for each event, (2) 21 | cumulative incidence function (CIF) for each event, and (3) continuous 22 | probability curves (CPC) for each event (Pepe and Mori, 1993). 23 | 24 | Does not apply to right-censored data. Whenever possible, out-of-bag 25 | (OOB) values are displayed. 26 | } 27 | \author{ 28 | Hemant Ishwaran and Udaya B. Kogalur 29 | } 30 | \references{ 31 | 32 | Ishwaran H., Gerds T.A., Kogalur U.B., Moore R.D., Gange S.J. and Lau 33 | B.M. (2014). Random survival forests for competing risks. 34 | \emph{Biostatistics}, 15(4):757-773. 35 | 36 | Pepe, M.S. and Mori, M., (1993). Kaplan-Meier, marginal or conditional 37 | probability curves in summarizing competing risks failure time 38 | data? \emph{Statistics in Medicine}, 12(8):737-751. 39 | 40 | 41 | } 42 | \seealso{ 43 | \command{\link{follic}}, 44 | \command{\link{hd}}, 45 | \command{\link{rfsrc}}, 46 | \command{\link{wihs}} 47 | } 48 | \examples{ 49 | \donttest{ 50 | ## ------------------------------------------------------------ 51 | ## follicular cell lymphoma 52 | ## ------------------------------------------------------------ 53 | 54 | data(follic, package = "randomForestSRC") 55 | follic.obj <- rfsrc(Surv(time, status) ~ ., follic, nsplit = 3, ntree = 100) 56 | print(follic.obj) 57 | plot.competing.risk(follic.obj) 58 | 59 | ## ------------------------------------------------------------ 60 | ## Hodgkin's Disease 61 | ## ------------------------------------------------------------ 62 | 63 | data(hd, package = "randomForestSRC") 64 | hd.obj <- rfsrc(Surv(time, status) ~ ., hd, nsplit = 3, ntree = 100) 65 | print(hd.obj) 66 | plot.competing.risk(hd.obj) 67 | 68 | ## ------------------------------------------------------------ 69 | ## competing risk analysis of pbc data from the survival package 70 | ## events are transplant (1) and death (2) 71 | ## ------------------------------------------------------------ 72 | 73 | if (library("survival", logical.return = TRUE)) { 74 | data(pbc, package = "survival") 75 | pbc$id <- NULL 76 | plot.competing.risk(rfsrc(Surv(time, status) ~ ., pbc)) 77 | } 78 | } 79 | } 80 | \keyword{plot} 81 | -------------------------------------------------------------------------------- /R/rfsrc.fast.R: -------------------------------------------------------------------------------- 1 | rfsrc.fast <- function(formula, data, 2 | ntree = 500, 3 | nsplit = 10, 4 | bootstrap = "by.root", 5 | sampsize = function(x){min(x * .632, max(150, x ^ (3/4)))}, 6 | samptype = "swor", 7 | samp = NULL, 8 | ntime = 50, 9 | forest = FALSE, 10 | save.memory = TRUE, 11 | ...) 12 | { 13 | ## -------------------------------------------------------------- 14 | ## 15 | ## preliminary processing 16 | ## 17 | ## -------------------------------------------------------------- 18 | ## verify key options 19 | if (!is.function(sampsize) && !is.numeric(sampsize)) { 20 | stop("sampsize must be a function or number specifying size of subsampled data") 21 | } 22 | ##-------------------------------------------------------------- 23 | ## 24 | ## extract additional options specified by user 25 | ## we lock this down to allowed types 26 | ## 27 | ##-------------------------------------------------------------- 28 | ## list of forest parameters 29 | rfnames <- get.rfnames(hidden = TRUE) 30 | ## restrict to allowed values 31 | rfnames <- rfnames[rfnames != "data" & 32 | rfnames != "ntree" & 33 | rfnames != "nsplit" & 34 | rfnames != "bootstrap" & 35 | rfnames != "sampsize" & 36 | rfnames != "samptype" & 37 | rfnames != "ntime" & 38 | rfnames != "forest" & 39 | rfnames != "save.memory" ] 40 | ## get the permissible hidden options 41 | ## add formula if present 42 | dots <- list(...) 43 | dots <- dots[names(dots) %in% rfnames] 44 | if (!missing(formula)) { 45 | dots$formula <- formula 46 | } 47 | ## set bootstrap accordingly if the user has provided their own sampling 48 | ## ntree and sampsize are handled in rfsrc 49 | if (!is.null(samp)) { 50 | bootstrap <- "by.user" 51 | } 52 | ## manually set key hidden options if forest is not requested 53 | if (!forest) { 54 | dots$terminal.qualts <- FALSE 55 | dots$terminal.quants <- FALSE 56 | dots$save.memory <- TRUE 57 | } 58 | ##-------------------------------------------------------------- 59 | ## 60 | ## make the grow call and return the object 61 | ## 62 | ##-------------------------------------------------------------- 63 | return(do.call("rfsrc", 64 | c(list(data = data, 65 | ntree = ntree, 66 | nsplit = nsplit, 67 | bootstrap = bootstrap, 68 | sampsize = sampsize, 69 | samptype = samptype, 70 | samp = samp, 71 | ntime = ntime, 72 | forest = forest), dots))) 73 | } 74 | -------------------------------------------------------------------------------- /configure.ac: -------------------------------------------------------------------------------- 1 | AC_INIT([randomForestSRC], [3.4.5], [ubk@kogalur.com], [randomForestSRC], [https://github.com/kogalur/randomForestSGT]) 2 | 3 | AC_PREREQ([2.62]) 4 | 5 | # Find R home and set CC/CFLAGS 6 | : ${R_HOME=`R RHOME`} 7 | if test -z "${R_HOME}"; then 8 | AC_MSG_ERROR([could not determine R_HOME]) 9 | fi 10 | RBIN="${R_HOME}/bin/R" 11 | LIBS="${PKG_LIBS}" 12 | 13 | # Checking for C 14 | # Based on: https://unconj.ca/blog/an-autoconf-primer-for-r-package-authors.html 15 | CC=`"${RBIN}" CMD config CC`; 16 | CFLAGS=`"${RBIN}" CMD config CFLAGS` 17 | 18 | AC_LANG(C) 19 | # This the meat of R's m4/openmp.m4 20 | # Based on https://svn.r-project.org/R/trunk/m4/openmp.m4 21 | OPENMP_[]_AC_LANG_PREFIX[]FLAGS= 22 | AC_ARG_ENABLE([openmp], 23 | [AS_HELP_STRING([--disable-openmp], [do not use OpenMP])]) 24 | if test "$enable_openmp" != no; then 25 | AC_CACHE_CHECK([for $[]_AC_CC[] option to support OpenMP], 26 | [ac_cv_prog_[]_AC_LANG_ABBREV[]_openmp], 27 | [AC_LINK_IFELSE([_AC_LANG_OPENMP], 28 | [ac_cv_prog_[]_AC_LANG_ABBREV[]_openmp='none needed'], 29 | [ac_cv_prog_[]_AC_LANG_ABBREV[]_openmp='unsupported' 30 | for ac_option in -fopenmp -xopenmp -qopenmp \ 31 | -openmp -mp -omp -qsmp=omp -homp \ 32 | -fopenmp=libomp \ 33 | -Popenmp --openmp; do 34 | ac_save_[]_AC_LANG_PREFIX[]FLAGS=$[]_AC_LANG_PREFIX[]FLAGS 35 | _AC_LANG_PREFIX[]FLAGS="$[]_AC_LANG_PREFIX[]FLAGS $ac_option" 36 | AC_LINK_IFELSE([_AC_LANG_OPENMP], 37 | [ac_cv_prog_[]_AC_LANG_ABBREV[]_openmp=$ac_option]) 38 | _AC_LANG_PREFIX[]FLAGS=$ac_save_[]_AC_LANG_PREFIX[]FLAGS 39 | if test "$ac_cv_prog_[]_AC_LANG_ABBREV[]_openmp" != unsupported; then 40 | break 41 | fi 42 | done])]) 43 | case $ac_cv_prog_[]_AC_LANG_ABBREV[]_openmp in #( 44 | "none needed" | unsupported) 45 | ;; #( 46 | *) 47 | OPENMP_[]_AC_LANG_PREFIX[]FLAGS=$ac_cv_prog_[]_AC_LANG_ABBREV[]_openmp ;; 48 | esac 49 | fi 50 | 51 | # Check for typedefs, structures, and compiler characteristics 52 | AC_CHECK_HEADERS([sys/time.h]) 53 | 54 | AC_SUBST(PKG_CFLAGS) 55 | AC_SUBST(PKG_LIBS) 56 | AC_SUBST(OPENMP_CFLAGS) 57 | 58 | # substitute externalized source list 59 | PKG_SOURCES=$(cat src/sources.list) 60 | AC_SUBST(PKG_SOURCES) 61 | 62 | AC_CONFIG_FILES([src/Makevars.tmp:src/Makevars.in], [ 63 | if test -f src/Makevars && cmp -s src/Makevars.tmp src/Makevars; then 64 | AC_MSG_NOTICE([creating src/Makevars]) 65 | AC_MSG_NOTICE([src/Makevars is unchanged]) 66 | rm src/Makevars.tmp 67 | else 68 | AC_MSG_NOTICE([creating src/Makevars]) 69 | mv src/Makevars.tmp src/Makevars 70 | fi 71 | ] 72 | ) 73 | 74 | AC_OUTPUT 75 | 76 | echo " 77 | -------------------------------------------------- 78 | Configuration for ${PACKAGE_NAME} 79 | ================ 80 | 81 | cflags: ${CFLAGS} 82 | libs: ${PKG_LIBS} 83 | 84 | -------------------------------------------------- 85 | " 86 | 87 | -------------------------------------------------------------------------------- /man/plot.rfsrc.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.rfsrc} 2 | \alias{plot.rfsrc} 3 | \title{Plot Error Rate and Variable Importance from a RF-SRC analysis} 4 | \description{ 5 | Plot out-of-bag (OOB) error rates and variable importance (VIMP) 6 | from a RF-SRC analysis. This is the default plot method for the package. 7 | } 8 | \usage{\method{plot}{rfsrc}(x, m.target = NULL, 9 | plots.one.page = TRUE, sorted = TRUE, verbose = TRUE, ...)} 10 | \arguments{ 11 | \item{x}{An object of class \code{(rfsrc, grow)}, 12 | or \code{(rfsrc, predict)}.} 13 | \item{m.target}{Character value for multivariate families 14 | specifying the target outcome to be used. If left unspecified, the 15 | algorithm will choose a default target.} 16 | \item{plots.one.page}{Should plots be placed on one page?} 17 | \item{sorted}{Should variables be sorted by importance values?} 18 | \item{verbose}{Should VIMP be printed?} 19 | \item{...}{Further arguments passed to or from other methods.} 20 | } 21 | \details{ 22 | Plot cumulative OOB error rates as a function of number of trees and 23 | variable importance (VIMP) if available. Note that the default 24 | settings are now such that the error rate is no longer calculated on 25 | every tree and VIMP is only calculated if requested. To get OOB error 26 | rates for ever tree, use the option \code{block.size = 1} when 27 | growing or restoring the forest. Likewise, to view VIMP, use the option 28 | \code{importance} when growing or restoring the forest. 29 | } 30 | \author{ 31 | Hemant Ishwaran and Udaya B. Kogalur 32 | } 33 | \references{ 34 | Breiman L. (2001). Random forests, \emph{Machine Learning}, 45:5-32. 35 | 36 | Ishwaran H. and Kogalur U.B. (2007). Random survival forests for R, 37 | \emph{Rnews}, 7(2):25-31. 38 | } 39 | \examples{ 40 | \donttest{ 41 | ## ------------------------------------------------------------ 42 | ## classification example 43 | ## ------------------------------------------------------------ 44 | 45 | iris.obj <- rfsrc(Species ~ ., data = iris, 46 | block.size = 1, importance = TRUE) 47 | plot(iris.obj) 48 | 49 | ## ------------------------------------------------------------ 50 | ## competing risk example 51 | ## ------------------------------------------------------------ 52 | 53 | ## use the pbc data from the survival package 54 | ## events are transplant (1) and death (2) 55 | if (library("survival", logical.return = TRUE)) { 56 | data(pbc, package = "survival") 57 | pbc$id <- NULL 58 | plot(rfsrc(Surv(time, status) ~ ., pbc, block.size = 1)) 59 | } 60 | 61 | ## ------------------------------------------------------------ 62 | ## multivariate mixed forests 63 | ## ------------------------------------------------------------ 64 | 65 | mtcars.new <- mtcars 66 | mtcars.new$cyl <- factor(mtcars.new$cyl) 67 | mtcars.new$carb <- factor(mtcars.new$carb, ordered = TRUE) 68 | mv.obj <- rfsrc(cbind(carb, mpg, cyl) ~., data = mtcars.new, block.size = 1) 69 | plot(mv.obj, m.target = "carb") 70 | plot(mv.obj, m.target = "mpg") 71 | plot(mv.obj, m.target = "cyl") 72 | 73 | } 74 | } 75 | \keyword{plot} 76 | -------------------------------------------------------------------------------- /src/sexpOutgoing.c: -------------------------------------------------------------------------------- 1 | 2 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 3 | #include "global.h" 4 | #include "external.h" 5 | 6 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 7 | 8 | 9 | 10 | 11 | #include "sexpOutgoing.h" 12 | char *RF_sexpString[RF_SEXP_CNT] = { 13 | "", 14 | "", 15 | "allEnsbCHF", 16 | "oobEnsbCHF", 17 | "allEnsbCIF", 18 | "oobEnsbCIF", 19 | "allEnsbSRV", 20 | "oobEnsbSRV", 21 | "allEnsbMRT", 22 | "oobEnsbMRT", 23 | "allEnsbCLS", 24 | "oobEnsbCLS", 25 | "allEnsbRGR", 26 | "oobEnsbRGR", 27 | "perfSurv", 28 | "perfClas", 29 | "perfRegr", 30 | "proximity", 31 | "leafCount", 32 | "seed", 33 | "seedVimp", 34 | "allEnsbQNT", 35 | "oobEnsbQNT", 36 | "blockSurv", 37 | "blockClas", 38 | "blockRegr", 39 | "vimpSurv", 40 | "vimpClas", 41 | "vimpRegr", 42 | "imputation", 43 | "", 44 | "varUsed", 45 | "splitDepth", 46 | "nodeMembership", 47 | "pstnMembership", 48 | "bootMembership", 49 | "rmbrMembership", 50 | "ambrMembership", 51 | "tnRCNT", 52 | "tnACNT", 53 | "spltST", 54 | "dpthST", 55 | "weight", 56 | "tnSURV", 57 | "tnMORT", 58 | "tnNLSN", 59 | "tnCSHZ", 60 | "tnCIFN", 61 | "tnREGR", 62 | "tnCLAS", 63 | " ", 64 | "cseRegr", 65 | "csvRegr", 66 | "cseClas", 67 | "csvClas", 68 | "", 69 | "partialSurv", 70 | "partialClas", 71 | "partialRegr", 72 | "distance", 73 | "treeID", 74 | "nodeID", 75 | "parmID", 76 | "contPT", 77 | "mwcpSZ", 78 | "mwcpPT", 79 | "mwcpCT", 80 | "hcDim", 81 | "contPTR", 82 | " ", 83 | " ", 84 | " ", 85 | " ", 86 | "emprRisk", 87 | "oobEmprRisk", 88 | "splitStatLOT", 89 | "holdoutBlk", 90 | "holdoutSurv", 91 | "holdoutClas", 92 | "holdoutRegr", 93 | " ", 94 | " ", 95 | " ", 96 | " ", 97 | " ", 98 | " ", 99 | " ", 100 | " ", 101 | " ", 102 | " ", 103 | " ", 104 | " ", 105 | " ", 106 | " ", 107 | "cseDen", 108 | "csvDen", 109 | "blnodeID", 110 | "brnodeID", 111 | "fsrecID", 112 | "nodeSZ", 113 | "caseDepth", 114 | "optLoGrow", 115 | "optHiGrow", 116 | "cTimeInternal", 117 | "tnOCNT", 118 | "tnICNT", 119 | "ombrMembership", 120 | "imbrMembership", 121 | "oobSZ", 122 | "ibgSZ" 123 | }; 124 | -------------------------------------------------------------------------------- /man/plot.subsample.rfsrc.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.subsample.rfsrc} 2 | \alias{plot.subsample.rfsrc} 3 | \alias{plot.subsample} 4 | \title{Plot Subsampled VIMP Confidence Intervals} 5 | \description{ 6 | Plots VIMP (variable importance) confidence regions obtained from 7 | subsampling a forest. 8 | } 9 | \usage{\method{plot.subsample}{rfsrc}(x, alpha = .01, xvar.names, 10 | standardize = TRUE, normal = TRUE, jknife = FALSE, target, m.target = NULL, 11 | pmax = 75, main = "", sorted = TRUE, show.plots = TRUE, ...)} 12 | \arguments{ 13 | 14 | \item{x}{An object obtained from calling \command{subample}.} 15 | 16 | \item{alpha}{Desired level of significance.} 17 | 18 | \item{xvar.names}{Names of the x-variables to be used. If not 19 | specified all variables used.} 20 | 21 | \item{standardize}{Standardize VIMP? For regression families, VIMP is 22 | standardized by dividing by the variance. For all other families, 23 | VIMP is unaltered.} 24 | 25 | \item{normal}{Use parametric normal confidence regions or 26 | nonparametric regions? Generally, parametric regions perform better.} 27 | 28 | \item{jknife}{Use the delete-d jackknife variance estimator?} 29 | 30 | \item{target}{For classification families, an integer or character 31 | value specifying the class VIMP will be conditioned on (default is 32 | to use unconditional VIMP). For competing risk families, an integer 33 | value between 1 and \code{J} indicating the event VIMP is requested, 34 | where \code{J} is the number of event types. The default is to use 35 | the first event.} 36 | 37 | \item{m.target}{Character value for multivariate families 38 | specifying the target outcome to be used. If left unspecified, the 39 | algorithm will choose a default target.} 40 | 41 | \item{pmax}{Trims the data to this number of variables (sorted by VIMP).} 42 | 43 | \item{main}{Title used for plot.} 44 | 45 | \item{sorted}{Should variables be sorted by importance values?} 46 | 47 | \item{show.plots}{Should plots be displayed? Allows users to produce 48 | their own custom plots.} 49 | 50 | \item{...}{Further arguments that can be passed to \command{bxp}.} 51 | 52 | } 53 | \details{ 54 | Most of the options used by the R function bxp will work here and can 55 | be used for customization of plots. Currently the following 56 | parameters will work: 57 | 58 | "xaxt", "yaxt", "las", "cex.axis", 59 | "col.axis", "cex.main", 60 | "col.main", "sub", "cex.sub", "col.sub", 61 | "ylab", "cex.lab", "col.lab" 62 | 63 | } 64 | \value{ 65 | Invisibly, returns the boxplot data that is plotted. 66 | } 67 | 68 | \author{ 69 | Hemant Ishwaran and Udaya B. Kogalur 70 | } 71 | \references{ 72 | Ishwaran H. and Lu M. (2017). Standard errors and confidence 73 | intervals for variable importance in random forest regression, 74 | classification, and survival. 75 | 76 | Politis, D.N. and Romano, J.P. (1994). Large sample confidence 77 | regions based on subsamples under minimal assumptions. \emph{The 78 | Annals of Statistics}, 22(4):2031-2050. 79 | 80 | Shao, J. and Wu, C.J. (1989). A general theory for jackknife variance 81 | estimation. \emph{The Annals of Statistics}, 17(3):1176-1197. 82 | } 83 | \seealso{ 84 | \command{\link{subsample.rfsrc}} 85 | } 86 | \examples{ 87 | \donttest{ 88 | o <- rfsrc(Ozone ~ ., airquality) 89 | oo <- subsample(o) 90 | plot.subsample(oo) 91 | plot.subsample(oo, xvar.names = o$xvar.names[1:3]) 92 | plot.subsample(oo, jknife = FALSE) 93 | plot.subsample(oo, alpha = .01) 94 | plot(oo,cex.axis=.5) 95 | } 96 | } 97 | \keyword{plot} 98 | -------------------------------------------------------------------------------- /man/nutrigenomic.Rd: -------------------------------------------------------------------------------- 1 | \name{nutrigenomic} 2 | \docType{data} 3 | \alias{nutrigenomic} 4 | \title{Nutrigenomic Study} 5 | \description{ 6 | Investigates the effects of five dietary treatments on 21 liver lipids 7 | and 120 hepatic gene expressions in wild-type and PPAR-alpha deficient 8 | mice. A multivariate mixed random forest analysis is performed by 9 | regressing gene expression, diet, and genotype (x-variables) on lipid 10 | expression profiles (multivariate y-responses). 11 | } 12 | \references{ 13 | Martin P.G. et al. (2007). Novel aspects of PPAR-alpha-mediated 14 | regulation of lipid and xenobiotic metabolism revealed through a 15 | nutrigenomic study. \emph{Hepatology}, 45(3), 767--777. 16 | } 17 | \examples{ 18 | \donttest{ 19 | ## ------------------------------------------------------------ 20 | ## multivariate regression forests using Mahalanobis splitting 21 | ## lipids (all real values) used as the multivariate y 22 | ## ------------------------------------------------------------ 23 | 24 | ## load the data 25 | data(nutrigenomic, package = "randomForestSRC") 26 | 27 | ## parse into y and x data 28 | ydta <- nutrigenomic$lipids 29 | xdta <- data.frame(nutrigenomic$genes, 30 | diet = nutrigenomic$diet, 31 | genotype = nutrigenomic$genotype) 32 | 33 | ## multivariate mixed forest call 34 | obj <- rfsrc(get.mv.formula(colnames(ydta)), 35 | data.frame(ydta, xdta), 36 | importance=TRUE, nsplit = 10, 37 | splitrule = "mahalanobis") 38 | print(obj) 39 | 40 | ## ------------------------------------------------------------ 41 | ## plot the standarized performance and VIMP values 42 | ## ------------------------------------------------------------ 43 | 44 | ## acquire the error rate for each of the 21-coordinates 45 | ## standardize to allow for comparison across coordinates 46 | serr <- get.mv.error(obj, standardize = TRUE) 47 | 48 | ## acquire standardized VIMP 49 | svimp <- get.mv.vimp(obj, standardize = TRUE) 50 | 51 | par(mfrow = c(1,2)) 52 | plot(serr, xlab = "Lipids", ylab = "Standardized Performance") 53 | matplot(svimp, xlab = "Genes/Diet/Genotype", ylab = "Standardized VIMP") 54 | 55 | 56 | ## ------------------------------------------------------------ 57 | ## plot some trees 58 | ## ------------------------------------------------------------ 59 | 60 | plot(get.tree(obj, 1)) 61 | plot(get.tree(obj, 2)) 62 | plot(get.tree(obj, 3)) 63 | 64 | 65 | ## ------------------------------------------------------------ 66 | ## 67 | ## Compare above to (1) user specified covariance matrix 68 | ## (2) default composite (independent) splitting 69 | ## 70 | ## ------------------------------------------------------------ 71 | 72 | ## user specified sigma matrix 73 | obj2 <- rfsrc(get.mv.formula(colnames(ydta)), 74 | data.frame(ydta, xdta), 75 | importance = TRUE, nsplit = 10, 76 | splitrule = "mahalanobis", 77 | sigma = cov(ydta)) 78 | print(obj2) 79 | 80 | ## default independence split rule 81 | obj3 <- rfsrc(get.mv.formula(colnames(ydta)), 82 | data.frame(ydta, xdta), 83 | importance=TRUE, nsplit = 10) 84 | print(obj3) 85 | 86 | ## compare vimp 87 | imp <- data.frame(mahalanobis = rowMeans(get.mv.vimp(obj, standardize = TRUE)), 88 | mahalanobis2 = rowMeans(get.mv.vimp(obj2, standardize = TRUE)), 89 | default = rowMeans(get.mv.vimp(obj3, standardize = TRUE))) 90 | 91 | print(head(100 * imp[order(imp$mahalanobis, decreasing = TRUE), ], 15)) 92 | 93 | }} 94 | \keyword{datasets} 95 | -------------------------------------------------------------------------------- /src/sortedLink.c: -------------------------------------------------------------------------------- 1 | 2 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 3 | #include "global.h" 4 | #include "external.h" 5 | 6 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 7 | 8 | 9 | 10 | 11 | #include "sortedLink.h" 12 | #include "nrutil.h" 13 | SortedLinkedObj *makeSortedLinkedObj(void) { 14 | SortedLinkedObj *obj = (SortedLinkedObj*) gblock((size_t) sizeof(SortedLinkedObj)); 15 | obj -> fwdLink = NULL; 16 | obj -> bakLink = NULL; 17 | obj -> rank = 0; 18 | obj -> indx = 0; 19 | return obj; 20 | } 21 | void makeAndSpliceSortedLinkedObj(uint treeID, 22 | SortedLinkedObj **headPtr, 23 | SortedLinkedObj **tailPtr, 24 | uint *listLength, 25 | uint rank, uint indx) { 26 | char flag; 27 | uint lowIndx, highIndx, halfIndx; 28 | SortedLinkedObj *objIterator; 29 | uint i; 30 | SortedLinkedObj *head = headPtr[treeID]; 31 | SortedLinkedObj *tail = tailPtr[treeID]; 32 | SortedLinkedObj *obj = makeSortedLinkedObj(); 33 | obj -> rank = rank; 34 | obj -> indx = indx; 35 | obj -> fwdLink = obj -> bakLink = NULL; 36 | flag = TRUE; 37 | if (*listLength == 0) { 38 | head = tail = obj; 39 | flag = FALSE; 40 | } 41 | else if (rank >= tail -> rank) { 42 | tail -> fwdLink = obj; 43 | obj -> bakLink = tail; 44 | tail = obj; 45 | flag = FALSE; 46 | } 47 | else if (rank <= head -> rank) { 48 | head -> bakLink = obj; 49 | obj -> fwdLink = head; 50 | head = obj; 51 | flag = FALSE; 52 | } 53 | else { 54 | lowIndx = 1; 55 | highIndx = *listLength; 56 | while (flag) { 57 | halfIndx = (uint) ((double) (highIndx + lowIndx) / 2.0); 58 | objIterator = head; 59 | for (i = lowIndx; i < halfIndx; i++) { 60 | objIterator = objIterator -> fwdLink; 61 | } 62 | if (rank == head -> rank) { 63 | obj -> fwdLink = head; 64 | obj -> bakLink = (head -> bakLink); 65 | (head -> bakLink) -> fwdLink = obj; 66 | head -> bakLink = obj; 67 | flag = FALSE; 68 | } 69 | else if (rank == tail -> rank) { 70 | obj -> fwdLink = tail; 71 | obj -> bakLink = (tail -> bakLink); 72 | (tail -> bakLink) -> fwdLink = obj; 73 | tail -> bakLink = obj; 74 | flag = FALSE; 75 | } 76 | else if (rank == objIterator -> rank) { 77 | obj -> fwdLink = objIterator; 78 | obj -> bakLink = (objIterator -> bakLink); 79 | (objIterator -> bakLink) -> fwdLink = obj; 80 | objIterator -> bakLink = obj; 81 | flag = FALSE; 82 | } 83 | else if (halfIndx == lowIndx) { 84 | obj -> fwdLink = tail; 85 | obj -> bakLink = (tail -> bakLink); 86 | (tail -> bakLink) -> fwdLink = obj; 87 | tail -> bakLink = obj; 88 | flag = FALSE; 89 | } 90 | else if (rank < objIterator -> rank) { 91 | tail = objIterator; 92 | highIndx = halfIndx; 93 | } 94 | else { 95 | head = objIterator; 96 | lowIndx = halfIndx; 97 | } 98 | } 99 | } 100 | (*listLength) ++; 101 | } 102 | void freeSortedLinkedObjList(SortedLinkedObj *obj) { 103 | if (obj -> fwdLink != NULL) { 104 | freeSortedLinkedObjList(obj -> fwdLink); 105 | } 106 | freeSortedLinkedObj(obj); 107 | obj = NULL; 108 | } 109 | void freeSortedLinkedObj(SortedLinkedObj *obj) { 110 | free_gblock(obj, (size_t) sizeof(SortedLinkedObj)); 111 | } 112 | -------------------------------------------------------------------------------- /src/rfsrcUtil.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_RFSRC_UTIL_H 2 | #define RF_RFSRC_UTIL_H 3 | #include "terminal.h" 4 | void updateTerminalNodeOutcomes(char mode, 5 | uint treeID, 6 | Terminal *parent, 7 | uint *repMembrIndx, 8 | uint repMembrSize, 9 | uint *allMembrIndx, 10 | uint allMembrSize, 11 | uint *rbmrIterator, 12 | uint *ambrIterator); 13 | void getMembrCountOnly (uint treeID, 14 | Terminal *parent, 15 | uint *repMembrIndx, 16 | uint repMembrSize, 17 | uint *allMembrIndx, 18 | uint allMembrSize); 19 | void updateEnsemble (char mode, uint b); 20 | void summarizeFaithfulBlockPerformance (char mode, 21 | uint b, 22 | uint blockID, 23 | double **blkEnsembleMRTnum, 24 | double ***blkEnsembleCLSnum, 25 | double **blkEnsembleRGRnum, 26 | double *blkEnsembleDen, 27 | double **responsePtr, 28 | double **perfMRTblk, 29 | double ***perfCLSblk, 30 | double **perfRGRblk); 31 | void summarizeHoldoutBlockPerformance (char mode, 32 | uint b, 33 | uint xVarIdx, 34 | uint blockID, 35 | double **responsePtr, 36 | double **holdMRTstd, 37 | double ***holdCLSstd, 38 | double **holdRGRstd, 39 | double *holdEnsembleDen, 40 | double *holdMRTptr, 41 | double **holdCLSptr, 42 | double *holdRGRptr); 43 | char stackAndImputePerfResponse(char mode, 44 | char multImpFlag, 45 | uint b, 46 | uint loSerialTreeID, 47 | uint hiSerialTreeID, 48 | uint *serialTreePtr, 49 | double ***responsePtr); 50 | void unstackPerfResponse(char mode, char flag, double **mResponsePtr); 51 | void getPerformance(uint serialTreeID, 52 | char mode, 53 | uint obsSize, 54 | double **responsePtr, 55 | double *denomPtr, 56 | double **outcomeMRT, 57 | double ***outcomeCLS, 58 | double **outcomeRGR, 59 | double *perfMRTptr, 60 | double **perfCLSptr, 61 | double *perfRGRptr); 62 | void normalizeEnsembleEstimates(char mode, char final); 63 | char getPerfFlag (char mode, uint serialTreeID); 64 | void getVariablesUsed(uint treeID, Node *rootPtr, uint *varUsedVector); 65 | #endif 66 | -------------------------------------------------------------------------------- /src/entry.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_RFSRC_GROW_H 2 | #define RF_RFSRC_GROW_H 3 | SEXP rfsrcGrow(SEXP traceFlag, 4 | SEXP seedPtr, 5 | SEXP optLow, 6 | SEXP optHigh, 7 | SEXP optSup, 8 | SEXP splitRule, 9 | SEXP nsplit, 10 | SEXP mtry, 11 | SEXP lot, 12 | SEXP baseLearn, 13 | SEXP vtry, 14 | SEXP vtryArray, 15 | SEXP vtryExperimental, 16 | SEXP ytry, 17 | SEXP nodeSize, 18 | SEXP nodeDepth, 19 | SEXP crWeightSize, 20 | SEXP crWeight, 21 | SEXP vimpThreshold, 22 | SEXP ntree, 23 | SEXP observationSize, 24 | SEXP yInfo, 25 | SEXP yLevels, 26 | SEXP yData, 27 | SEXP xInfo, 28 | SEXP xLevels, 29 | SEXP xData, 30 | SEXP sampleInfo, 31 | SEXP xWeightStat, 32 | SEXP yWeight, 33 | SEXP xWeight, 34 | SEXP timeInterest, 35 | SEXP nImpute, 36 | SEXP perfBlock, 37 | SEXP quantile, 38 | SEXP qStarPlus, 39 | SEXP xPreSort, 40 | SEXP numThreads); 41 | SEXP rfsrcPredict(SEXP traceFlag, 42 | SEXP seedPtr, 43 | SEXP optLow, 44 | SEXP optHigh, 45 | SEXP vimpThreshold, 46 | SEXP ntree, 47 | SEXP observationSize, 48 | SEXP yInfo, 49 | SEXP yLevels, 50 | SEXP yData, 51 | SEXP xInfo, 52 | SEXP xLevels, 53 | SEXP xData, 54 | SEXP sampleInfo, 55 | SEXP timeInterestInfo, 56 | SEXP totalNodeCount, 57 | SEXP tLeafCount, 58 | SEXP seedInfo, 59 | SEXP hdim, 60 | SEXP baseLearn, 61 | SEXP treeID, 62 | SEXP nodeID, 63 | SEXP nodeSZ, 64 | SEXP brnodeID, 65 | SEXP hc_zero, 66 | SEXP hc_oneAugIntr, 67 | SEXP hc_oneAugSyth, 68 | SEXP hc_one, 69 | SEXP hc_parmID, 70 | SEXP hc_contPT, 71 | SEXP hc_contPTR, 72 | SEXP hc_mwcpSZ, 73 | SEXP hc_fsrecID, 74 | SEXP hc_mwcpPT, 75 | SEXP hc_augmXone, 76 | SEXP hc_augmXtwo, 77 | SEXP hc_augmXS, 78 | SEXP hc_augmSythTop, 79 | SEXP tnRMBR, 80 | SEXP tnAMBR, 81 | SEXP tnRCNT, 82 | SEXP tnACNT, 83 | SEXP tnSURV, 84 | SEXP tnMORT, 85 | SEXP tnNLSN, 86 | SEXP tnCSHZ, 87 | SEXP tnCIFN, 88 | SEXP tnREGR, 89 | SEXP tnCLAS, 90 | SEXP rTargetInfo, 91 | SEXP ptnCount, 92 | SEXP xMarginalInfo, 93 | SEXP intrPredictorInfo, 94 | SEXP partial, 95 | SEXP fobservationSize, 96 | SEXP frSize, 97 | SEXP frData, 98 | SEXP fxData, 99 | SEXP perfBlock, 100 | SEXP quantileInfo, 101 | SEXP getTree, 102 | SEXP numThreads); 103 | #endif 104 | -------------------------------------------------------------------------------- /src/impute.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_IMPUTE_H 2 | #define RF_IMPUTE_H 3 | #include "node.h" 4 | #include "terminal.h" 5 | char imputeNode (char type, 6 | char termFlag, 7 | char chainFlag, 8 | uint treeID, 9 | Node *nodePtr, 10 | uint *repAbsIdx, 11 | uint repNodeSize, 12 | uint *iAbsIdx, 13 | uint iNodeSize); 14 | char restoreNodeMembership(char mode, 15 | char rootFlag, 16 | uint treeID, 17 | Node *parent, 18 | uint *repMembrIndx, 19 | uint repMembrSize, 20 | uint *allMembrIndx, 21 | uint allMembrSize, 22 | uint *ngAllMembrIndx, 23 | uint ngAllMembrSize, 24 | uint *bootMembrIndxIter, 25 | uint *rmbrIterator, 26 | uint *ambrIterator); 27 | void imputeUpdateShadow (char mode, 28 | double **shadowResponse, 29 | double **shadowPredictor); 30 | void imputeNodeAndSummarize(uint r, 31 | char mode, 32 | uint treeID, 33 | Node *parent, 34 | uint *repMembrIndx, 35 | uint repMembrSize, 36 | uint *allMembrIndx, 37 | uint allMembrSize, 38 | uint *ngAllMembrIndx, 39 | uint ngAllMembrSize); 40 | void imputeSummary(char mode, 41 | char selectionFlag); 42 | void imputeResponse(char mode, 43 | uint loSerialTreeID, 44 | uint hiSerialTreeID, 45 | uint *serialTreePtr, 46 | double **tempResponse); 47 | void imputeCommon(char mode, 48 | uint loSerialTreeID, 49 | uint hiSerialTreeID, 50 | uint *serialTreePtr, 51 | char selectionFlag, 52 | char predictorFlag); 53 | void imputeMultipleTime (char selectionFlag); 54 | double getNearestMasterTime (double meanvalue, 55 | char chainFlag, 56 | uint treeID); 57 | double getMaximalValue(double *value, uint size, char chainFlag, uint treeID); 58 | double getMedianValue(double *value, uint size); 59 | double getMeanValue(double *value, uint size); 60 | double getSampleValue(double *value, uint size, char chainFlag, uint treeID); 61 | uint getRecordMap(uint *map, 62 | uint size, 63 | double **resp, 64 | double **data); 65 | void updateTimeIndexArray(uint treeID, 66 | uint *allMemberIndx, 67 | uint allMembrSize, 68 | double *time, 69 | char naflag, 70 | char idFlag, 71 | uint *masterTimeIndex); 72 | void updateEventTypeSubsets(double *summaryStatus, 73 | uint mRecordSize, 74 | int **mpSign, 75 | uint *mRecordIndex, 76 | uint *meIndividualSize, 77 | uint **eIndividual); 78 | void stackShadow (char mode, uint treeID); 79 | void unstackShadow (char mode, uint treeID); 80 | char xferMissingness(char type, Node *source, Terminal *destination); 81 | #endif 82 | -------------------------------------------------------------------------------- /R/utilities_quantreg.R: -------------------------------------------------------------------------------- 1 | ## pulls the quantile object and converts to a list 2 | extract.quantile <- function(o) { 3 | ## confirm this is a quantile regression object 4 | if (sum(grepl("quantreg", class(o))) == 0) { 5 | stop("object must be a quantreg object") 6 | } 7 | ## used to determine univariate vs multivariate objects 8 | mv.y.names <- intersect(names(o$quantreg), o$yvar.names) 9 | ## we have a univariate quantile regression object 10 | if (length(mv.y.names) == 0) { 11 | q <- list(o$quantreg) 12 | names(q) <- o$yvar.names 13 | } 14 | ## the quantile regression object is multivariate 15 | else { 16 | q <- lapply(mv.y.names, function(m.target) { 17 | o$quantreg[[m.target]] 18 | }) 19 | names(q) <- mv.y.names 20 | } 21 | ## return the processed list 22 | q 23 | } 24 | ## extract target quantiles 25 | get.quantile <- function(o, target.prob = NULL, pretty = TRUE) { 26 | ## extract the quantile object 27 | qo <- extract.quantile(o) 28 | ## process the target probabilities 29 | if (!is.null(target.prob)) { 30 | target.prob <- sort(unique(target.prob)) 31 | } 32 | else {## default is to use existing values 33 | target.prob <- qo[[1]]$prob 34 | } 35 | ## pull the target quantiles 36 | rO <- lapply(qo, function(q) { 37 | q.dat <- do.call(cbind, lapply(target.prob, function(pr) { 38 | q$quant[, which.min(abs(pr - q$prob))] 39 | })) 40 | colnames(q.dat) <- paste("q.", 100 * target.prob, sep = "") 41 | q.dat 42 | }) 43 | if (pretty && length(rO) == 1) { 44 | rO <- rO[[1]] 45 | } 46 | rO 47 | } 48 | ## extract crps 49 | get.quantile.crps <- function(o, pretty = TRUE, subset = NULL, standardize = TRUE) { 50 | ## extract the quantile object 51 | qO <- extract.quantile(o) 52 | ## does not apply to predict objects without y 53 | if (sum(grepl("predict", class(o))) > 0 && is.null(o$yvar)) { 54 | stop("no yvar present in quantreg predict object") 55 | } 56 | ## subset assignment 57 | if (is.null(subset)) { 58 | subset <- 1:o$n 59 | } 60 | else { 61 | if (is.logical(subset)) { 62 | subset <- which(subset) 63 | } 64 | subset <- subset[subset >=1 & subset <= o$n] 65 | } 66 | if (length(subset) == 0) { 67 | stop("requested subset is empty") 68 | } 69 | ## pull the target stats 70 | rO <- lapply(1:length(qO), function(j) { 71 | q <- qO[[j]] 72 | if (is.vector(o$yvar)) { 73 | y <- cbind(o$yvar) 74 | } 75 | else { 76 | y <- o$yvar[, names(qO)[j]] 77 | } 78 | n <- length(y) 79 | ## brier score 80 | brS <- colMeans(do.call(rbind, mclapply(subset, function(i) { 81 | (1 * (y[i] <= q$yunq) - q$cdf[i, ]) ^ 2 82 | })), na.rm = TRUE) 83 | ## crps 84 | crps <- unlist(lapply(1:length(q$yunq), function(j) { 85 | if (standardize) { 86 | trapz(q$yunq[1:j], brS[1:j]) / diff(range(q$yunq[1:j])) 87 | } 88 | else { 89 | trapz(q$yunq[1:j], brS[1:j]) 90 | } 91 | })) 92 | data.frame(y = q$yunq, crps = crps) 93 | }) 94 | names(rO) <- names(qO) 95 | if (pretty && length(rO) == 1) { 96 | rO <- rO[[1]] 97 | } 98 | rO 99 | } 100 | ## extract target stats 101 | get.quantile.stat <- function(o, pretty = TRUE) { 102 | ## extract the quantile object 103 | qO <- extract.quantile(o) 104 | ## conditional median 105 | mdn <- get.quantile(o, .5, FALSE) 106 | ## pull the target stats 107 | rO <- lapply(1:length(qO), function(j) { 108 | q <- qO[[j]] 109 | ## conditional mean 110 | mn <- q$density %*% q$yunq 111 | ## conditional standard deviation 112 | std <- sqrt(q$density %*% q$yunq^2 - mn ^ 2) 113 | data.frame(mean = mn, median = c(mdn[[j]]), std = std) 114 | }) 115 | names(rO) <- names(qO) 116 | if (pretty && length(rO) == 1) { 117 | rO <- rO[[1]] 118 | } 119 | rO 120 | } 121 | -------------------------------------------------------------------------------- /src/distance.c: -------------------------------------------------------------------------------- 1 | 2 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 3 | #include "global.h" 4 | #include "external.h" 5 | 6 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 7 | 8 | 9 | 10 | 11 | #include "distance.h" 12 | #include "stackOutput.h" 13 | #include "nativeUtil.h" 14 | #include "nrutil.h" 15 | #include "error.h" 16 | SEXP rfsrcDistance(SEXP sexp_metricType, 17 | SEXP sexp_n, 18 | SEXP sexp_p, 19 | SEXP sexp_x, 20 | SEXP sexp_sizeIJ, 21 | SEXP sexp_rowI, 22 | SEXP sexp_rowJ, 23 | SEXP sexp_numThreads, 24 | SEXP sexp_traceFlag) { 25 | uint traceFlag = INTEGER(sexp_traceFlag)[0]; 26 | setUserTraceFlag(traceFlag); 27 | setNativeGlobalEnv(&RF_nativeIndex, &RF_stackCount); 28 | uint metricType = INTEGER(sexp_metricType)[0]; 29 | uint n = INTEGER(sexp_n)[0]; 30 | uint p = INTEGER(sexp_p)[0]; 31 | double *x = REAL(sexp_x); 32 | uint sizeIJ = INTEGER(sexp_sizeIJ)[0]; 33 | RF_numThreads = INTEGER(sexp_numThreads)[0]; 34 | uint *rowI; 35 | uint *rowJ; 36 | double **xMatrix; 37 | double *dist; 38 | uint size; 39 | uint i, j, k; 40 | char *sexpString[3] = { 41 | "", 42 | "", 43 | "distance" 44 | }; 45 | if (metricType != RF_DISTANCE_EUCLIDEAN) { 46 | RF_nativeError("\nRF-SRC: *** ERROR *** "); 47 | RF_nativeError("\nRF-SRC: Parameter verification failed."); 48 | RF_nativeError("\nRF-SRC: Distance metric is invalid: %10d \n", metricType); 49 | RF_nativeExit(); 50 | } 51 | if (n < 2) { 52 | RF_nativeError("\nRF-SRC: *** ERROR *** "); 53 | RF_nativeError("\nRF-SRC: Parameter verification failed."); 54 | RF_nativeError("\nRF-SRC: Matrix must have more than one (1) row: %10d \n", n); 55 | RF_nativeExit(); 56 | } 57 | #ifdef _OPENMP 58 | if (RF_numThreads < 0) { 59 | RF_numThreads = omp_get_max_threads(); 60 | } 61 | else { 62 | RF_numThreads = (RF_numThreads < omp_get_max_threads()) ? (RF_numThreads) : (omp_get_max_threads()); 63 | } 64 | #endif 65 | if (sizeIJ > 0) { 66 | rowI = (uint*) INTEGER(sexp_rowI); rowI--; 67 | rowJ = (uint*) INTEGER(sexp_rowJ); rowJ--; 68 | size = sizeIJ; 69 | } 70 | else { 71 | size = (n * (n-1)) >> 1; 72 | rowI = uivector(1, size); 73 | rowJ = uivector(1, size); 74 | k = 0; 75 | for (i = 1; i <= n; i++) { 76 | for (j = 1; j < i; j++) { 77 | k++; 78 | rowI[k] = i; 79 | rowJ[k] = j; 80 | } 81 | } 82 | } 83 | RF_stackCount = 1; 84 | initProtect(RF_stackCount); 85 | stackAuxiliaryInfoList(&RF_snpAuxiliaryInfoList, RF_stackCount); 86 | dist = (double*) stackAndProtect(RF_GROW, &RF_nativeIndex, NATIVE_TYPE_NUMERIC, 2, size, 0, sexpString[2], NULL, 1, size); 87 | dist --; 88 | xMatrix = (double **) new_vvector(1, p, NRUTIL_DPTR); 89 | for (i = 1; i <= p; i++) { 90 | xMatrix[i] = (x + ((i-1) * n) - 1); 91 | } 92 | #ifdef _OPENMP 93 | #pragma omp parallel for num_threads(RF_numThreads) 94 | #endif 95 | for (k = 1; k <= size; k++) { 96 | dist[k] = euclidean(n, p, rowI[k], rowJ[k], xMatrix); 97 | } 98 | free_new_vvector(xMatrix, 1, p, NRUTIL_DPTR); 99 | if (sizeIJ > 0) { 100 | } 101 | else { 102 | free_uivector(rowI, 1, size); 103 | free_uivector(rowJ, 1, size); 104 | } 105 | unstackAuxiliaryInfoAndList(FALSE, RF_snpAuxiliaryInfoList, RF_stackCount); 106 | R_ReleaseObject(RF_sexpVector[RF_OUTP_ID]); 107 | R_ReleaseObject(RF_sexpVector[RF_STRG_ID]); 108 | return RF_sexpVector[RF_OUTP_ID]; 109 | } 110 | double euclidean(uint n, uint p, uint i, uint j, double **x) { 111 | double result; 112 | double difference; 113 | uint k; 114 | result = 0.0; 115 | for (k = 1; k <= p; k++) { 116 | difference = x[k][i] - x[k][j]; 117 | result += (difference * difference); 118 | } 119 | result = sqrt(result); 120 | return result; 121 | } 122 | -------------------------------------------------------------------------------- /src/cindex.c: -------------------------------------------------------------------------------- 1 | 2 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 3 | #include "global.h" 4 | #include "external.h" 5 | 6 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 7 | 8 | 9 | 10 | 11 | #include "cindex.h" 12 | #include "stackOutput.h" 13 | #include "nativeUtil.h" 14 | #include "survivalE.h" 15 | SEXP rfsrcCIndex(SEXP sexp_traceFlag, 16 | SEXP sexp_size, 17 | SEXP sexp_time, 18 | SEXP sexp_censoring, 19 | SEXP sexp_predicted, 20 | SEXP sexp_denom) { 21 | uint traceFlag = INTEGER(sexp_traceFlag)[0]; 22 | setUserTraceFlag(traceFlag); 23 | setNativeGlobalEnv(&RF_nativeIndex, &RF_stackCount); 24 | uint size = (uint) INTEGER(sexp_size)[0]; 25 | double *time = REAL(sexp_time); time--; 26 | double *censoring = REAL(sexp_censoring); censoring--; 27 | double *predicted = REAL(sexp_predicted); predicted--; 28 | double *denom = REAL(sexp_denom); denom--; 29 | double *v; 30 | char *sexpString[3] = { 31 | "", 32 | "", 33 | "err" 34 | }; 35 | RF_stackCount = 1; 36 | initProtect(RF_stackCount); 37 | stackAuxiliaryInfoList(&RF_snpAuxiliaryInfoList, RF_stackCount); 38 | v = (double*) stackAndProtect(RF_GROW, &RF_nativeIndex, NATIVE_TYPE_NUMERIC, 2, 1, 0, sexpString[2], NULL, 1, 1); 39 | *v = getConcordanceIndex( 1, 40 | size, 41 | time, 42 | censoring, 43 | predicted, 44 | denom); 45 | unstackAuxiliaryInfoAndList(FALSE, RF_snpAuxiliaryInfoList, RF_stackCount); 46 | R_ReleaseObject(RF_sexpVector[RF_OUTP_ID]); 47 | R_ReleaseObject(RF_sexpVector[RF_STRG_ID]); 48 | return RF_sexpVector[RF_OUTP_ID]; 49 | } 50 | SEXP rfsrcCIndexNew(SEXP sexp_traceFlag, 51 | SEXP sexp_size, 52 | SEXP sexp_time, 53 | SEXP sexp_censoring, 54 | SEXP sexp_predicted, 55 | SEXP sexp_denom) { 56 | uint traceFlag = INTEGER(sexp_traceFlag)[0]; 57 | setUserTraceFlag(traceFlag); 58 | setNativeGlobalEnv(&RF_nativeIndex, &RF_stackCount); 59 | uint size = (uint) INTEGER(sexp_size)[0]; 60 | double *time = REAL(sexp_time); time--; 61 | double *censoring = REAL(sexp_censoring); censoring--; 62 | double *predicted = REAL(sexp_predicted); predicted--; 63 | double *denom = REAL(sexp_denom); denom--; 64 | double *v; 65 | char *sexpString[3] = { 66 | "", 67 | "", 68 | "err" 69 | }; 70 | stackAuxiliaryInfoList(&RF_snpAuxiliaryInfoList, RF_stackCount); 71 | v = (double*) stackAndProtect(RF_GROW, &RF_nativeIndex, NATIVE_TYPE_NUMERIC, 2, 1, 0, sexpString[2], NULL, 1, 1); 72 | *v = getConcordanceIndexNew( 1, 73 | size, 74 | time, 75 | censoring, 76 | predicted, 77 | denom); 78 | unstackAuxiliaryInfoAndList(FALSE, RF_snpAuxiliaryInfoList, RF_stackCount); 79 | R_ReleaseObject(RF_sexpVector[RF_OUTP_ID]); 80 | R_ReleaseObject(RF_sexpVector[RF_STRG_ID]); 81 | return RF_sexpVector[RF_OUTP_ID]; 82 | } 83 | SEXP rfsrcTestSEXP(SEXP sexp_size) { 84 | setNativeGlobalEnv(&RF_nativeIndex, &RF_stackCount); 85 | ulong size = (ulong) REAL(sexp_size)[0]; 86 | char *v; 87 | char *sexpString[3] = { 88 | "", 89 | "", 90 | "dummy" 91 | }; 92 | RF_stackCount = 1; 93 | initProtect(RF_stackCount); 94 | stackAuxiliaryInfoList(&RF_snpAuxiliaryInfoList, RF_stackCount); 95 | v = (char*) stackAndProtect(RF_GROW, &RF_nativeIndex, NATIVE_TYPE_CHARACTER, 2, size, 0, sexpString[2], NULL, 1, size); 96 | v --; 97 | unstackAuxiliaryInfoAndList(FALSE, RF_snpAuxiliaryInfoList, RF_stackCount); 98 | R_ReleaseObject(RF_sexpVector[RF_OUTP_ID]); 99 | R_ReleaseObject(RF_sexpVector[RF_STRG_ID]); 100 | return RF_sexpVector[RF_OUTP_ID]; 101 | } 102 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | useDynLib(randomForestSRC, .registration = TRUE) 2 | 3 | importFrom("data.tree", "FromDataFrameNetwork", "SetGraphStyle", 4 | "SetEdgeStyle", "SetNodeStyle", "Do", "GetDefaultTooltip") 5 | importFrom("DiagrammeR", "render_graph", "create_graph", 6 | "create_node_df", "create_edge_df", "add_global_graph_attrs") 7 | importFrom("parallel", mclapply) 8 | importFrom("graphics", "abline", "axis", "box", "boxplot", "bxp", 9 | "legend", "lines", "matlines", "matplot", "mtext", "par", "plot", 10 | "plot.new", "plot.window", "points", "rug", "segments", 11 | "strwidth", "text", "title", "grconvertX", "grconvertY", "plot.default") 12 | importFrom("stats", "as.dist", "as.formula", "cutree", "dlnorm", "formula", 13 | "hclust", "lowess", "median", "model.matrix", "na.omit", 14 | "optim", "pgamma", "plnorm", "pnorm", "predict", 15 | "quantile", "qnorm", "runif", "sd", "supsmu", "var", "wilcox.test", "rnorm", 16 | "lm", "coef", "binom.test") 17 | importFrom("utils", "installed.packages", "txtProgressBar", "setTxtProgressBar", 18 | "write.table", "tail", "object.size", "combn", "flush.console") 19 | importFrom("grDevices", "gray") 20 | 21 | export(extract.bootsample, 22 | extract.quantile, 23 | extract.subsample, 24 | get.auc, 25 | get.bayes.rule, 26 | get.brier.error, 27 | get.brier.survival, 28 | get.cindex, 29 | get.confusion, 30 | get.imbalanced.performance, 31 | get.imbalanced.optimize, 32 | get.logloss, 33 | get.misclass.error, 34 | get.mv.cserror, 35 | get.mv.csvimp, 36 | get.mv.error, 37 | get.mv.error.block, 38 | get.mv.formula, 39 | get.mv.predicted, 40 | get.mv.vimp, 41 | get.partial.plot.data, 42 | get.pr.auc, 43 | get.pr.curve, 44 | get.quantile, 45 | get.quantile.crps, 46 | get.quantile.stat, 47 | get.rfq.threshold, 48 | get.tree, 49 | get.tree.rfsrc, 50 | holdout.vimp, 51 | holdout.vimp.rfsrc, 52 | imbalanced, 53 | imbalanced.rfsrc, 54 | impute, 55 | impute.rfsrc, 56 | max.subtree, 57 | max.subtree.rfsrc, 58 | partial, 59 | partial.rfsrc, 60 | plot.competing.risk, 61 | plot.competing.risk.rfsrc, 62 | plot.quantreg, 63 | plot.quantreg.rfsrc, 64 | plot.rfsrc, 65 | plot.subsample, 66 | plot.subsample.rfsrc, 67 | plot.survival, 68 | plot.survival.rfsrc, 69 | plot.variable, 70 | plot.variable.rfsrc, 71 | predict.rfsrc, 72 | print.rfsrc, 73 | print.bootsample, 74 | print.bootsample.rfsrc, 75 | print.imbalanced.performance, 76 | print.subsample, 77 | print.subsample.rfsrc, 78 | quantreg, 79 | quantreg.rfsrc, 80 | rfsrc, 81 | rfsrc.anonymous, 82 | rfsrc.cart, 83 | rfsrc.fast, 84 | rfsrc.news, 85 | sid.perf.metric, 86 | sidClustering, 87 | sidClustering.rfsrc, 88 | subsample, 89 | subsample.rfsrc, 90 | tune, 91 | tune.rfsrc, 92 | tune.nodesize, 93 | tune.nodesize.rfsrc, 94 | vimp, 95 | vimp.rfsrc) 96 | 97 | 98 | 99 | 100 | export(fast.save) 101 | export(fast.load) 102 | export(fast.save.list) 103 | export(fast.load.list) 104 | export(lsos) 105 | 106 | 107 | 108 | S3method(max, subtree) 109 | S3method(max, subtree.rfsrc) 110 | 111 | S3method(plot, competing.risk) 112 | S3method(plot, competing.risk.rfsrc) 113 | S3method(plot, quantreg) 114 | S3method(plot, quantreg.rfsrc) 115 | S3method(plot, rfsrc) 116 | S3method(plot, subsample) 117 | S3method(plot, subsample.rfsrc) 118 | S3method(plot, survival) 119 | S3method(plot, survival.rfsrc) 120 | S3method(plot, variable) 121 | S3method(plot, variable.rfsrc) 122 | 123 | S3method(predict, rfsrc) 124 | 125 | S3method(print, rfsrc) 126 | S3method(print, bootsample) 127 | S3method(print, bootsample.rfsrc) 128 | S3method(print, imbalanced.performance) 129 | S3method(print, subsample) 130 | S3method(print, subsample.rfsrc) 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /R/plot.quantreg.rfsrc.R: -------------------------------------------------------------------------------- 1 | plot.quantreg.rfsrc <- function(x, prbL = .25, prbU = .75, 2 | m.target = NULL, crps = TRUE, subset = NULL, 3 | xlab = NULL, ylab = NULL, ...) { 4 | ##-------------------------------------------------------------- 5 | ## 6 | ## prelimary checks 7 | ## 8 | ##-------------------------------------------------------------- 9 | ## does not apply to predict objects without y 10 | if (sum(grepl("predict", class(x))) > 0 && is.null(x$yvar)) { 11 | stop("no yvar present in quantreg predict object") 12 | } 13 | ## check probs 14 | if (prbL < 0 || prbU > 1) { 15 | stop("requested probabilities must lie in (0, 1)") 16 | } 17 | if (prbL >= prbU) { 18 | stop("prbL must be less than prbU") 19 | } 20 | prbM <- max(prbL, .5) 21 | if (prbM == prbL) { 22 | prbM <- (prbL + prbU) / 2 23 | } 24 | ##-------------------------------------------------------------- 25 | ## 26 | ## subset assignment 27 | ## 28 | ##-------------------------------------------------------------- 29 | if (is.null(subset)) { 30 | subset <- 1:x$n 31 | } 32 | else { 33 | if (is.logical(subset)) { 34 | subset <- which(subset) 35 | } 36 | subset <- subset[subset >=1 & subset <= x$n] 37 | } 38 | if (length(subset) == 0) { 39 | stop("requested subset analysis has subset that is empty") 40 | } 41 | ##-------------------------------------------------------------- 42 | ## 43 | ## assemble the quantile data for plotting 44 | ## 45 | ##-------------------------------------------------------------- 46 | quant.dat <- get.quantile(x, c(prbL, prbM, prbU), FALSE) 47 | ## we have a univariate quantile regression object 48 | if (length(quant.dat) == 1) { 49 | y.names <- names(quant.dat)[1] 50 | quant.dat <- quant.dat[[1]][subset,, drop = FALSE] 51 | y <- x$yvar[subset] 52 | if (crps) crps.dat <- get.quantile.crps(x, subset = subset) 53 | } 54 | ## the quantile regression object is multivariate 55 | else { 56 | if (is.null(m.target) || length(intersect(m.target, names(quant.dat))) == 0) { 57 | y.names <- names(quant.dat)[1] 58 | quant.dat <- quant.dat[[1]][subset,, drop = FALSE] 59 | y <- x$yvar[, y.names][subset] 60 | if (crps) crps.dat <- get.quantile.crps(x, subset = subset)[[1]] 61 | } 62 | else { 63 | y.names <- m.target 64 | quant.dat <- quant.dat[[y.names]][subset,, drop = FALSE] 65 | y <- x$yvar[, y.names][subset] 66 | if (crps) crps.dat <- get.quantile.crps(x, subset = subset)[[y.names]] 67 | } 68 | } 69 | ##-------------------------------------------------------------- 70 | ## 71 | ## quantile regression plot 72 | ## 73 | ##-------------------------------------------------------------- 74 | if (is.null(xlab)) { 75 | xlab <- y.names 76 | } 77 | if (is.null(ylab)) { 78 | ylab <- "Target Quantiles" 79 | } 80 | jitter.y <- jitter(y, 10) 81 | rng <- range(c(y, quant.dat, jitter.y)) 82 | plot(rng, rng, xlab = xlab, ylab = ylab, type = "n") 83 | points(jitter.y, quant.dat[, 2], pch = 15, col = 4, cex = 0.75) 84 | segments(jitter.y, quant.dat[, 2], jitter.y, quant.dat[, 1], col = "grey") 85 | segments(jitter.y, quant.dat[, 2], jitter.y, quant.dat[, 3], col = "grey") 86 | points(jitter.y, quant.dat[, 1], pch = "-", cex = 1) 87 | points(jitter.y, quant.dat[, 3], pch = "-", cex = 1) 88 | abline(0, 1, lty = 2, col = 2) 89 | ##-------------------------------------------------------------- 90 | ## 91 | ## inset the CRPS 92 | ## 93 | ##-------------------------------------------------------------- 94 | if (crps) { 95 | old.par <- par(no.readonly = TRUE) 96 | on.exit(par(old.par)) 97 | u <- par("usr") 98 | v <- c( 99 | grconvertX(u[1:2], "user", "ndc"), 100 | grconvertY(u[3:4], "user", "ndc") 101 | ) 102 | v <- c(v[1], (v[1]+v[2]) / 3, (v[3]+v[4]) / (1.5), v[4]) 103 | figO <- tryCatch({par(fig = v, new = TRUE, mar = c(0,0,0,0), mgp = c(0,0,0))}, error=function(ex){NULL}) 104 | if (!is.null(figO)) { 105 | plot(crps.dat, yaxt = "n", col = 2, type = "l", lwd = 2, tck = .05, cex.axis = .75, ...) 106 | axis(4, cex.axis = .75, tck = .05) 107 | box() 108 | } 109 | } 110 | } 111 | plot.quantreg <- plot.quantreg.rfsrc 112 | -------------------------------------------------------------------------------- /R/imbalanced.rfsrc.R: -------------------------------------------------------------------------------- 1 | imbalanced.rfsrc <- function(formula, data, ntree = 3000, 2 | method = c("rfq", "brf", "standard"), splitrule = "auc", 3 | perf.type = NULL, block.size = NULL, fast = FALSE, 4 | ratio = NULL, ...) 5 | { 6 | ## preliminary checks: all are fatal 7 | ## parse the formula to ensure this is a two-class problem 8 | formulaPrelim <- parseFormula(formula, data) 9 | if (formulaPrelim$family != "class") { 10 | stop("this function only applies to classification problems") 11 | } 12 | yvar <- data[, formulaPrelim$yvar.names] 13 | if (length(levels(yvar)) != 2) { 14 | stop("this function only applies to two-class problems") 15 | } 16 | ## check that method is set correctly 17 | method <- match.arg(method, c("rfq", "brf", "standard")) 18 | rfq.flag <- NULL 19 | if (method == "rfq") { 20 | rfq.flag <- TRUE 21 | } 22 | ## set default performance 23 | if (is.null(perf.type)) { 24 | if (method == "brf" || method == "rfq") { 25 | perf.type <- "gmean" 26 | } 27 | else { 28 | perf.type <- "default"##equivalent to misclass 29 | } 30 | } 31 | ## check performance type is properly set 32 | perf.type <- match.arg(perf.type, 33 | c("none", "default", "standard", "misclass", "brier", "gmean", "g.mean")) 34 | if (perf.type == "g.mean") {##legacy 35 | perf.type <- "gmean" 36 | } 37 | ##----------------------------------------- 38 | ## 39 | ## rfsrc call - depends on what's requested 40 | ## 41 | ##----------------------------------------- 42 | ##---------------------------------------- 43 | ## brf method 44 | ##---------------------------------------- 45 | if (method == "brf") { 46 | ## TBD2 currently cannot handle missing values 47 | data <- na.omit(data) 48 | yvar <- data[, formulaPrelim$yvar.names] 49 | ## for legacy reasons we maintain swr here 50 | ## TBD2 consider swor 51 | o <- rfsrc(formula, data, ntree = ntree, 52 | splitrule = splitrule, 53 | perf.type = perf.type, 54 | block.size = block.size, 55 | case.wt = make.wt(yvar), 56 | sampsize = make.size(yvar), 57 | samptype = "swr", ...) 58 | } 59 | ##---------------------------------------- 60 | ## standard and rfq method 61 | ##---------------------------------------- 62 | else { 63 | ##------------------------------------------------- 64 | ## determine the grow interface - rfsrc or rfsrc.fast? 65 | ##------------------------------------------------- 66 | if (!fast) { 67 | rfsrc.grow <- "rfsrc" 68 | } 69 | else { 70 | rfsrc.grow <- "rfsrc.fast" 71 | } 72 | ##------------------------------------------------- 73 | ## acquire the user specified additional options 74 | ##------------------------------------------------- 75 | dots <- list(...) 76 | ##------------------------------------------------- 77 | ## undersampling of the majority class if requested 78 | ##------------------------------------------------- 79 | if (!is.null(ratio)) { 80 | ## TBD2 currently cannot handle missing values 81 | data <- na.omit(data) 82 | yvar <- data[, formulaPrelim$yvar.names] 83 | samp <- make.imbalanced.sample(ntree = ntree, ratio = ratio, y = yvar) 84 | dots$bootstrap <- dots$samp <- NULL 85 | o <- do.call(rfsrc.grow, c(list(formula = formula, data = data, ntree = ntree, 86 | rfq = rfq.flag, splitrule = splitrule, 87 | perf.type = perf.type, block.size = block.size, 88 | samp = samp, bootstrap = "by.user"), dots)) 89 | } 90 | ##----------------------------------------------------------- 91 | ## proceed to standard and rfq analysis without undersampling 92 | ## this is the default scenario 93 | ## allow fast random forests if requested 94 | ##----------------------------------------------------------- 95 | else { 96 | o <- do.call(rfsrc.grow, c(list(formula = formula, data = data, ntree = ntree, 97 | rfq = rfq.flag, splitrule = splitrule, 98 | perf.type = perf.type, block.size = block.size), dots)) 99 | } 100 | } 101 | ## return the object 102 | o 103 | } 104 | imbalanced <- imbalanced.rfsrc 105 | -------------------------------------------------------------------------------- /src/nrutil.h: -------------------------------------------------------------------------------- 1 | #ifndef NRUTIL_H 2 | #define NRUTIL_H 3 | #define FREE_ARG char* 4 | #define NR_END 2 5 | enum alloc_type{ 6 | NRUTIL_DPTR, 7 | NRUTIL_UPTR, 8 | NRUTIL_IPTR, 9 | NRUTIL_CPTR, 10 | NRUTIL_NPTR, 11 | NRUTIL_TPTR, 12 | NRUTIL_FPTR, 13 | NRUTIL_LPTR, 14 | NRUTIL_DPTR2, 15 | NRUTIL_UPTR2, 16 | NRUTIL_IPTR2, 17 | NRUTIL_CPTR2, 18 | NRUTIL_NPTR2, 19 | NRUTIL_TPTR2, 20 | NRUTIL_FPTR2, 21 | NRUTIL_DPTR3, 22 | NRUTIL_UPTR3, 23 | NRUTIL_NPTR3, 24 | NRUTIL_DPTR4, 25 | NRUTIL_UPTR4, 26 | NRUTIL_XPTR, 27 | NRUTIL_QPTR, 28 | NRUTIL_QPTR2, 29 | NRUTIL_SPTR, 30 | NRUTIL_SPTR2, 31 | NRUTIL_VPTR, 32 | NRUTIL_OMPLPTR, 33 | NRUTIL_OMPLPTR2, 34 | NRUTIL_LEAFPTR, 35 | NRUTIL_LEAFPTR2, 36 | NRUTIL_SRTLNKPTR, 37 | NRUTIL_TARPTR, 38 | }; 39 | unsigned int upower (unsigned int x, unsigned int n); 40 | unsigned int upower2 (unsigned int n); 41 | unsigned int ulog2 (unsigned int n); 42 | void hpsort(double *ra, unsigned int n); 43 | void hpsortui(unsigned int *ra, unsigned int n); 44 | void hpsorti(int *ra, unsigned int n); 45 | void qksort(double *arr, unsigned int n); 46 | void indexx(unsigned int n, double *arr, unsigned int *indx); 47 | void nrerror(char error_text[]); 48 | void *gblock(size_t size); 49 | void free_gblock(void *v, size_t size); 50 | void *gvector(unsigned long long nl, unsigned long long nh, size_t size); 51 | void free_gvector(void *v, unsigned long long nl, unsigned long long nh, size_t size); 52 | char *cvector(unsigned long long nl, unsigned long long nh); 53 | void free_cvector(char *v, unsigned long long nl, unsigned long long nh); 54 | char **cmatrix(unsigned long long nrl, unsigned long long nrh, unsigned long long ncl, unsigned long long nch); 55 | void free_cmatrix(char **v, unsigned long long nrl, unsigned long long nrh, unsigned long long ncl, unsigned long long nch); 56 | int *ivector(unsigned long long nl, unsigned long long nh); 57 | void free_ivector(int *v, unsigned long long nl, unsigned long long nh); 58 | int **imatrix(unsigned long long nrl, unsigned long long nrh, unsigned long long ncl, unsigned long long nch); 59 | void free_imatrix(int **v, unsigned long long nrl, unsigned long long nrh, unsigned long long ncl, unsigned long long nch); 60 | unsigned int *uivector(unsigned long long nl, unsigned long long nh); 61 | void free_uivector(unsigned int *v, unsigned long long nl, unsigned long long nh); 62 | unsigned int **uimatrix(unsigned long long nrl, unsigned long long nrh, unsigned long long ncl, unsigned long long nch); 63 | void free_uimatrix(unsigned int **v, unsigned long long nrl, unsigned long long nrh, unsigned long long ncl, unsigned long long nch); 64 | unsigned long *ulvector(unsigned long long nl, unsigned long long nh); 65 | void free_ulvector(unsigned long *v, unsigned long long nl, unsigned long long nh); 66 | double *dvector(unsigned long long nl, unsigned long long nh); 67 | void free_dvector(double *v, unsigned long long nl, unsigned long long nh); 68 | double **dmatrix(unsigned long long nrl, unsigned long long nrh, unsigned long long ncl, unsigned long long nch); 69 | void free_dmatrix(double **v, unsigned long long nrl, unsigned long long nrh, unsigned long long ncl, unsigned long long nch); 70 | double ***dmatrix3(unsigned long long n3l, unsigned long long n3h, unsigned long long nrl, unsigned long long nrh, unsigned long long ncl, unsigned long long nch); 71 | void free_dmatrix3(double ***v, unsigned long long n3l, unsigned long long n3h, unsigned long long nrl, unsigned long long nrh, unsigned long long ncl, unsigned long long nch); 72 | double ****dmatrix4(unsigned long long n4l, unsigned long long n4h, unsigned long long n3l, unsigned long long n3h, unsigned long long nrl, unsigned long long nrh, unsigned long long ncl, unsigned long long nch); 73 | void free_dmatrix4(double ****v, unsigned long long n4l, unsigned long long n4h, unsigned long long n3l, unsigned long long n3h, unsigned long long nrl, unsigned long long nrh, unsigned long long ncl, unsigned long long nch); 74 | #ifdef _OPENMP 75 | omp_lock_t *ompvector(unsigned long long nl, unsigned long long nh); 76 | void free_ompvector(omp_lock_t *v, unsigned long long nl, unsigned long long nh); 77 | #endif 78 | void *new_vvector(unsigned long long nl, unsigned long long nh, enum alloc_type type); 79 | void free_new_vvector(void *v, unsigned long long nl, unsigned long long nh, enum alloc_type type); 80 | void nrCopyMatrix( 81 | unsigned int **new, 82 | unsigned int **old, 83 | unsigned int nrow, 84 | unsigned int ncol 85 | ); 86 | void nrCopyVector( 87 | char *new, 88 | char *old, 89 | unsigned int ncol 90 | ); 91 | void testEndianness(void); 92 | #endif 93 | -------------------------------------------------------------------------------- /man/plot.survival.rfsrc.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.survival.rfsrc} 2 | \alias{plot.survival.rfsrc} 3 | \alias{plot.survival} 4 | \alias{get.brier.survival} 5 | \title{Plot of Survival Estimates} 6 | \description{ 7 | Plot various survival estimates. 8 | } 9 | \usage{\method{plot.survival}{rfsrc}(x, show.plots = TRUE, subset, 10 | collapse = FALSE, cens.model = c("km", "rfsrc"), ...) 11 | } 12 | \arguments{ 13 | \item{x}{An object of class \code{(rfsrc, grow)} or 14 | \code{(rfsrc, predict)}.} 15 | 16 | \item{show.plots}{Should plots be displayed?} 17 | 18 | \item{subset}{Vector indicating which cases from \code{x} we want 19 | estimates for. All cases used if not specified.} 20 | 21 | \item{collapse}{Collapse the survival function?} 22 | 23 | \item{cens.model}{Using the training data, specifies method for 24 | estimating the censoring distribution used in the inverse 25 | probability of censoring weights (IPCW) for calculating the Brier 26 | score: 27 | 28 | \describe{ 29 | \item{\code{km}:}{Uses the Kaplan-Meier estimator.} 30 | \item{\code{rfscr}:}{Uses a censoring random survival forest estimator.} 31 | } 32 | } 33 | 34 | \item{...}{Further arguments passed to or from other methods.} 35 | 36 | } 37 | \details{ 38 | Produces the following plots (going from top to bottom, left to right): 39 | 40 | \enumerate{ 41 | 42 | \item Forest estimated survival function for each individual (thick 43 | red line is overall ensemble survival, thick green line is 44 | Nelson-Aalen estimator). 45 | 46 | \item Brier score (0=perfect, 1=poor, and 0.25=guessing) stratified 47 | by ensemble mortality. Based on the IPCW method described in Gerds 48 | et al. (2006). Stratification is into 4 groups corresponding to the 49 | 0-25, 25-50, 50-75 and 75-100 percentile values of mortality. Red 50 | line is overall (non-stratified) Brier score. 51 | 52 | \item Continuous rank probability score (CRPS) equal to the 53 | integrated Brier score divided by time. 54 | 55 | \item Plot of mortality of each individual versus observed time. 56 | Points in blue correspond to events, black points are censored 57 | observations. Not given for prediction settings lacking 58 | survival response information. 59 | 60 | } 61 | 62 | Whenever possible, out-of-bag (OOB) values are used. 63 | 64 | Only applies to survival families. In particular, fails for competing 65 | risk analyses. Use \command{plot.competing.risk} in such cases. 66 | 67 | Mortality (Ishwaran et al., 2008) represents estimated risk for an 68 | individual calibrated to the scale of number of events (as a specific 69 | example, if \emph{i} has a mortality value of 100, then if all 70 | individuals had the same x-values as \emph{i}, we would expect an 71 | average of 100 events). 72 | 73 | The utility function \code{get.brier.survival} can be used to extract 74 | the Brier score among other useful quantities. 75 | 76 | } 77 | \value{ 78 | Invisibly, the conditional and unconditional Brier scores, and the 79 | integrated Brier score. 80 | } 81 | \author{ 82 | Hemant Ishwaran and Udaya B. Kogalur 83 | } 84 | \references{ 85 | Gerds T.A and Schumacher M. (2006). Consistent estimation of the 86 | expected Brier score in general survival models with right-censored 87 | event times, \emph{Biometrical J.}, 6:1029-1040. 88 | 89 | Graf E., Schmoor C., Sauerbrei W. and Schumacher M. (1999). 90 | Assessment and comparison of prognostic classification 91 | schemes for survival data, \emph{Statist. in Medicine}, 92 | 18:2529-2545. 93 | 94 | Ishwaran H. and Kogalur U.B. (2007). Random survival forests for R, 95 | \emph{Rnews}, 7(2):25-31. 96 | 97 | Ishwaran H., Kogalur U.B., Blackstone E.H. and Lauer M.S. 98 | (2008). Random survival forests, \emph{Ann. App. Statist.}, 2:841-860. 99 | 100 | } 101 | \seealso{ 102 | \command{\link{plot.competing.risk.rfsrc}}, 103 | \command{\link{predict.rfsrc}}, 104 | \command{\link{rfsrc}} 105 | } 106 | \examples{ 107 | \donttest{ 108 | ## veteran data 109 | data(veteran, package = "randomForestSRC") 110 | plot.survival(rfsrc(Surv(time, status)~ ., veteran), cens.model = "rfsrc") 111 | 112 | ## pbc data 113 | data(pbc, package = "randomForestSRC") 114 | pbc.obj <- rfsrc(Surv(days, status) ~ ., pbc) 115 | 116 | ## use subset to focus on specific individuals 117 | plot.survival(pbc.obj, subset = 3) 118 | plot.survival(pbc.obj, subset = c(3, 10)) 119 | plot.survival(pbc.obj, subset = c(3, 10), collapse = TRUE) 120 | 121 | ## get.brier.survival function does many nice things! 122 | plot(get.brier.survival(pbc.obj, cens.model="km")$brier.score,type="s", col=2) 123 | lines(get.brier.survival(pbc.obj, cens.model="rfsrc")$brier.score, type="s", col=4) 124 | legend("bottomright", legend=c("cens.model = km", "cens.model = rfsrc"), fill=c(2,4)) 125 | 126 | } 127 | } 128 | \keyword{plot} 129 | -------------------------------------------------------------------------------- /man/rfsrc.fast.Rd: -------------------------------------------------------------------------------- 1 | \name{rfsrc.fast} 2 | \alias{rfsrc.fast} 3 | \title{Fast Random Forests} 4 | \description{ 5 | Fast approximate random forests using subsampling with forest options 6 | set to encourage computational speed. Applies to all families. 7 | } 8 | \usage{rfsrc.fast(formula, data, 9 | ntree = 500, 10 | nsplit = 10, 11 | bootstrap = "by.root", 12 | sampsize = function(x){min(x * .632, max(150, x ^ (3/4)))}, 13 | samptype = "swor", 14 | samp = NULL, 15 | ntime = 50, 16 | forest = FALSE, 17 | save.memory = TRUE, 18 | ...) 19 | } 20 | \arguments{ 21 | 22 | \item{formula}{Model to be fit. If missing, unsupervised splitting is 23 | implemented.} 24 | 25 | \item{data}{Data frame containing the y-outcome and x-variables.} 26 | 27 | \item{ntree}{Number of trees.} 28 | 29 | \item{nsplit}{Non-negative integer value specifying number of 30 | random split points used to split a node (deterministic splitting 31 | corresponds to the value zero and can be slower).} 32 | 33 | \item{bootstrap}{Bootstrap protocol used in growing a tree.} 34 | 35 | \item{sampsize}{Function specifying size of subsampled data. Can also be a number.} 36 | 37 | \item{samptype}{Type of bootstrap used.} 38 | 39 | \item{samp}{Bootstrap specification when \code{"by.user"} is used.} 40 | 41 | \item{ntime}{Integer value used for survival to 42 | constrain ensemble calculations to a grid of \code{ntime} time points.} 43 | 44 | \item{forest}{Save key forest values? Turn this on if you want prediction on test data.} 45 | 46 | \item{save.memory}{Save memory? Setting this to \code{FALSE} stores 47 | terminal node quantities used for prediction on test data. This 48 | yields rapid prediction but can be memory intensive for big data, 49 | especially competing risks and survival models.} 50 | 51 | \item{...}{Further arguments to be passed to \code{\link{rfsrc}}.} 52 | 53 | } 54 | \details{ 55 | Calls \code{\link{rfsrc}} by choosing options (like subsampling) to 56 | encourage computational speeds. This will provide a good 57 | approximation but will not be as good as default settings of 58 | \code{\link{rfsrc}}. 59 | } 60 | \value{ 61 | An object of class \code{(rfsrc, grow)}. 62 | } 63 | \author{ 64 | Hemant Ishwaran and Udaya B. Kogalur 65 | } 66 | \seealso{ 67 | \command{\link{rfsrc}} 68 | } 69 | \examples{ 70 | \donttest{ 71 | ## ------------------------------------------------------------ 72 | ## regression 73 | ## ------------------------------------------------------------ 74 | 75 | ## load the Iowa housing data 76 | data(housing, package = "randomForestSRC") 77 | 78 | ## do quick and *dirty* imputation 79 | housing <- impute(SalePrice ~ ., housing, 80 | ntree = 50, nimpute = 1, splitrule = "random") 81 | 82 | ## grow a fast forest 83 | o1 <- rfsrc.fast(SalePrice ~ ., housing) 84 | o2 <- rfsrc.fast(SalePrice ~ ., housing, nodesize = 1) 85 | print(o1) 86 | print(o2) 87 | 88 | ## grow a fast bivariate forest 89 | o3 <- rfsrc.fast(cbind(SalePrice,Overall.Qual) ~ ., housing) 90 | print(o3) 91 | 92 | ## ------------------------------------------------------------ 93 | ## classification 94 | ## ------------------------------------------------------------ 95 | 96 | data(wine, package = "randomForestSRC") 97 | wine$quality <- factor(wine$quality) 98 | o <- rfsrc.fast(quality ~ ., wine) 99 | print(o) 100 | 101 | ## ------------------------------------------------------------ 102 | ## grow fast random survival forests without C-calculation 103 | ## use brier score to assess model performance 104 | ## compare pure random splitting to logrank splitting 105 | ## ------------------------------------------------------------ 106 | 107 | data(peakVO2, package = "randomForestSRC") 108 | f <- as.formula(Surv(ttodead, died)~.) 109 | o1 <- rfsrc.fast(f, peakVO2, perf.type = "none") 110 | o2 <- rfsrc.fast(f, peakVO2, perf.type = "none", splitrule = "random") 111 | bs1 <- get.brier.survival(o1, cens.model = "km") 112 | bs2 <- get.brier.survival(o2, cens.model = "km") 113 | plot(bs2$brier.score, type = "s", col = 2) 114 | lines(bs1$brier.score, type = "s", col = 4) 115 | legend("bottomright", legend = c("random", "logrank"), fill = c(2,4)) 116 | 117 | ## ------------------------------------------------------------ 118 | ## competing risks 119 | ## ------------------------------------------------------------ 120 | 121 | data(wihs, package = "randomForestSRC") 122 | o <- rfsrc.fast(Surv(time, status) ~ ., wihs) 123 | print(o) 124 | 125 | ## ------------------------------------------------------------ 126 | ## class imbalanced data using gmean performance 127 | ## ------------------------------------------------------------ 128 | 129 | data(breast, package = "randomForestSRC") 130 | breast <- na.omit(breast) 131 | f <- as.formula(status ~ .) 132 | o <- rfsrc.fast(f, breast, perf.type = "gmean") 133 | print(o) 134 | 135 | ## ------------------------------------------------------------ 136 | ## class imbalanced data using random forests quantile-classifer (RFQ) 137 | ## fast=TRUE => rfsrc.fast 138 | ## see imbalanced function for further details 139 | ## ------------------------------------------------------------ 140 | 141 | data(breast, package = "randomForestSRC") 142 | breast <- na.omit(breast) 143 | f <- as.formula(status ~ .) 144 | o <- imbalanced(f, breast, fast = TRUE) 145 | print(o) 146 | 147 | }} 148 | \keyword{forest} 149 | \keyword{fast} -------------------------------------------------------------------------------- /src/splitUtil.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SPLIT_UTIL_H 2 | #define RF_SPLIT_UTIL_H 3 | #include "node.h" 4 | #include "splitInfo.h" 5 | #include "sampling.h" 6 | char getPreSplitResultGeneric (uint treeID, 7 | Node *parent, 8 | char multImpFlag, 9 | char multVarFlag); 10 | char getPreSplitResultNoMiss (uint treeID, 11 | Node *parent, 12 | char multImpFlag, 13 | char multVarFlag); 14 | void unstackPreSplit (char preliminaryResult, 15 | Node *parent, 16 | char multImpFlag, 17 | char multVarFlag); 18 | void stackSplitPreliminary(uint nodeSize, 19 | char **localSplitIndicator, 20 | double **splitVector); 21 | void unstackSplitPreliminary(uint nodeSize, 22 | char *localSplitIndicator, 23 | double *splitVector); 24 | DistributionObj *stackRandomCovariatesGeneric(uint treeID, Node *parent); 25 | void unstackRandomCovariatesGeneric(uint treeID, DistributionObj *obj); 26 | char selectRandomCovariatesGeneric(uint treeID, 27 | Node *parent, 28 | DistributionObj *distributionObj, 29 | char *factorFlag, 30 | uint *covariate, 31 | uint *covariateCount); 32 | uint stackAndConstructSplitVectorGenericPhase1 (uint treeID, 33 | Node *parent, 34 | uint covariate, 35 | ...); 36 | uint stackAndConstructSplitVectorGenericPhase2 (uint treeID, 37 | Node *parent, 38 | uint covariate, 39 | double *splitVector, 40 | uint vectorSize, 41 | char *factorFlag, 42 | char *deterministicSplitFlag, 43 | uint *mwcpSizeAbsolute, 44 | void **splitVectorPtr); 45 | void unstackSplitVectorGeneric(uint treeID, 46 | Node *parent, 47 | uint splitLength, 48 | char factorFlag, 49 | uint splitVectorSize, 50 | uint mwcpSizeAbsolute, 51 | char deterministicSplitFlag, 52 | void *splitVectorPtr, 53 | char multImpFlag, 54 | uint *indxx); 55 | uint virtuallySplitNodeGeneric(uint treeID, 56 | Node *parent, 57 | char factorFlag, 58 | uint mwcpSizeAbsolute, 59 | double *observation, 60 | uint *indxx, 61 | void *splitVectorPtr, 62 | uint offset, 63 | char *localSplitIndicator, 64 | uint *leftSize, 65 | uint priorMembrIter, 66 | uint *currentMembrIter); 67 | char summarizeSplitResult(SplitInfoMax *splitInfoMax); 68 | char updateMaximumSplitGeneric(uint treeID, 69 | Node *parent, 70 | double delta, 71 | uint covariate, 72 | uint index, 73 | char factorFlag, 74 | uint mwcpSizeAbsolute, 75 | uint repMembrSize, 76 | char **polarity, 77 | void *splitVectorPtr, 78 | SplitInfoMax *splitInfoMax); 79 | void getReweightedRandomPair(uint treeID, 80 | uint relativefactorSize, 81 | uint absoluteFactorSize, 82 | double *absoluteLevel, 83 | uint *result); 84 | void getRandomPair(uint treeID, uint relativeFactorSize, uint absoluteFactorSize, double *absoluteLevel, uint *result); 85 | void createRandomBinaryPair(uint treeID, 86 | uint relativeFactorSize, 87 | uint absoluteFactorSize, 88 | uint groupSize, 89 | double *absolutelevel, 90 | uint *pair); 91 | void convertRelToAbsBinaryPair(uint treeID, 92 | uint relativeFactorSize, 93 | uint absoluteFactorSize, 94 | uint relativePair, 95 | double *absoluteLevel, 96 | uint *pair); 97 | #endif 98 | -------------------------------------------------------------------------------- /src/splitCustom.h: -------------------------------------------------------------------------------- 1 | #ifndef RF_SPLIT_CUSTOM_H 2 | #define RF_SPLIT_CUSTOM_H 3 | #define LEFT 0x01 4 | #define RIGHT 0x02 5 | #define CLAS_FAM 0 6 | #define REGR_FAM 1 7 | #define SURV_FAM 2 8 | #define CRSK_FAM 3 9 | void registerCustomFunctions(void); 10 | extern void registerThis (double (*func) (unsigned int n, 11 | char *membership, 12 | double *time, 13 | double *event, 14 | unsigned int eventTypeSize, 15 | unsigned int eventTimeSize, 16 | double *eventTime, 17 | double *response, 18 | double mean, 19 | double variance, 20 | unsigned int maxLevel, 21 | double **feature, 22 | unsigned int featureCount), 23 | unsigned int family, 24 | unsigned int slot); 25 | double getCustomSplitStatisticMultivariateRegression (unsigned int n, 26 | char *membership, 27 | double *time, 28 | double *event, 29 | unsigned int eventTypeSize, 30 | unsigned int eventTimeSize, 31 | double *eventTime, 32 | double *response, 33 | double mean, 34 | double variance, 35 | unsigned int maxLevel, 36 | double **feature, 37 | unsigned int featureCount); 38 | double getCustomSplitStatisticMultivariateClassification (unsigned int n, 39 | char *membership, 40 | double *time, 41 | double *event, 42 | unsigned int eventTypeSize, 43 | unsigned int eventTimeSize, 44 | double *eventTime, 45 | double *response, 46 | double mean, 47 | double variance, 48 | unsigned int maxLevel, 49 | double **feature, 50 | unsigned int featureCount); 51 | double getCustomSplitStatisticSurvival (unsigned int n, 52 | char *membership, 53 | double *time, 54 | double *event, 55 | unsigned int eventTypeSize, 56 | unsigned int eventTimeSize, 57 | double *eventTime, 58 | double *response, 59 | double mean, 60 | double variance, 61 | unsigned int maxLevel, 62 | double **feature, 63 | unsigned int featureCount); 64 | double getCustomSplitStatisticCompetingRisk (unsigned int n, 65 | char *membership, 66 | double *time, 67 | double *event, 68 | unsigned int eventTypeSize, 69 | unsigned int eventTimeSize, 70 | double *eventTime, 71 | double *response, 72 | double mean, 73 | double variance, 74 | unsigned int maxLevel, 75 | double **feature, 76 | unsigned int featureCount); 77 | unsigned int *alloc_uivector(unsigned int nh); 78 | void dealloc_uivector(unsigned int *v, unsigned int nh); 79 | double *alloc_dvector(double *v, unsigned int nh); 80 | void dealloc_dvector(double *v, unsigned int nh); 81 | unsigned int **alloc_uimatrix(unsigned int n2h, unsigned int nh); 82 | void dealloc_uimatrix(unsigned int **v, unsigned int n2h, unsigned int nh); 83 | #endif 84 | -------------------------------------------------------------------------------- /src/random.c: -------------------------------------------------------------------------------- 1 | 2 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 3 | #include "global.h" 4 | #include "external.h" 5 | 6 | // *** THIS HEADER IS AUTO GENERATED. DO NOT EDIT IT *** 7 | 8 | 9 | 10 | 11 | #include "random.h" 12 | #include "nrutil.h" 13 | #define IA 16807 14 | #define IM 2147483647 15 | #define AM (1.0/IM) 16 | #define IQ 127773 17 | #define IR 2836 18 | #define NTAB 32 19 | #define NDIV (1+(IM-1)/NTAB) 20 | #define EPS 1.2e-7 21 | #define RNMX (1.0-EPS) 22 | #define LCG_IM 714025 23 | #define LCG_IA 1366 24 | #define LCG_IC 150889 25 | int *ran1A_iy; 26 | int **ran1A_iv; 27 | int *ran1B_iy; 28 | int **ran1B_iv; 29 | int *ran1C_iy; 30 | int **ran1C_iv; 31 | int *ran1D_iy; 32 | int **ran1D_iv; 33 | int *seed1AValue; 34 | int *seed1BValue; 35 | int *seed1CValue; 36 | int *seed1DValue; 37 | void randomStack(uint bSize, uint bnpSize) { 38 | uint b, p; 39 | ran1A_iy = ivector(1, bSize); 40 | ran1A_iv = imatrix(1, bSize, 1, NTAB); 41 | ran1B_iy = ivector(1, bSize); 42 | ran1B_iv = imatrix(1, bSize, 1, NTAB); 43 | for (b = 1; b <= bSize; b++) { 44 | ran1A_iy[b] = 0; 45 | ran1B_iy[b] = 0; 46 | } 47 | seed1AValue = ivector(1, bSize); 48 | seed1BValue = ivector(1, bSize); 49 | if (bnpSize > 0) { 50 | ran1D_iy = ivector(1, bnpSize); 51 | ran1D_iv = imatrix(1, bnpSize, 1, NTAB); 52 | for (p = 1; p <= bnpSize; p++) { 53 | ran1D_iy[p] = 0; 54 | } 55 | seed1DValue = ivector(1, bnpSize); 56 | } 57 | } 58 | void randomUnstack(uint bSize, uint bnpSize) { 59 | free_ivector(ran1A_iy, 1, bSize); 60 | free_imatrix(ran1A_iv, 1, bSize, 1, NTAB); 61 | free_ivector(ran1B_iy, 1, bSize); 62 | free_imatrix(ran1B_iv, 1, bSize, 1, NTAB); 63 | free_ivector(seed1AValue, 1, bSize); 64 | free_ivector(seed1BValue, 1, bSize); 65 | if (bnpSize > 0) { 66 | free_ivector(ran1D_iy, 1, bnpSize); 67 | free_imatrix(ran1D_iv, 1, bnpSize, 1, NTAB); 68 | free_ivector(seed1DValue, 1, bnpSize); 69 | } 70 | } 71 | void randomSetChainParallel(uint b, int value) { 72 | seed1AValue[b] = value; 73 | } 74 | void randomSetUChainParallel(uint b, int value) { 75 | seed1BValue[b] = value; 76 | } 77 | void randomSetUChainParallelVimp(uint b, int value) { 78 | seed1CValue[b] = value; 79 | } 80 | void randomSetChainParallelVimp(uint p, int value) { 81 | seed1DValue[p] = value; 82 | } 83 | void randomSetChainSerial(uint b, int value) { 84 | seed1AValue[1] = value; 85 | } 86 | void randomSetUChainSerial(uint b, int value) { 87 | seed1BValue[1] = value; 88 | } 89 | void randomSetUChainSerialVimp(uint b, int value) { 90 | seed1CValue[1] = value; 91 | } 92 | void randomSetChainSerialVimp(uint p, int value) { 93 | seed1DValue[1] = value; 94 | } 95 | int randomGetChainParallel(uint b) { 96 | return seed1AValue[b]; 97 | } 98 | int randomGetUChainParallel(uint b) { 99 | return seed1BValue[b]; 100 | } 101 | int randomGetUChainParallelVimp(uint b) { 102 | return seed1CValue[b]; 103 | } 104 | int randomGetChainParallelVimp(uint p) { 105 | return seed1DValue[p]; 106 | } 107 | int randomGetChainSerial(uint b) { 108 | return seed1AValue[1]; 109 | } 110 | int randomGetUChainSerial(uint b) { 111 | return seed1BValue[1]; 112 | } 113 | int randomGetUChainSerialVimp(uint b) { 114 | return seed1CValue[1]; 115 | } 116 | int randomGetChainSerialVimp(uint p) { 117 | return seed1DValue[1]; 118 | } 119 | float randomChainParallel(uint b) { 120 | return ran1_generic(& ran1A_iy[b], ran1A_iv[b], & seed1AValue[b]); 121 | } 122 | float randomUChainParallel(uint b) { 123 | return ran1_generic(& ran1B_iy[b], ran1B_iv[b], & seed1BValue[b]); 124 | } 125 | float randomUChainParallelVimp(uint b) { 126 | return ran1_generic(& ran1C_iy[b], ran1C_iv[b], & seed1CValue[b]); 127 | } 128 | float randomChainParallelVimp(uint p) { 129 | return ran1_generic(& ran1D_iy[p], ran1D_iv[p], & seed1DValue[p]); 130 | } 131 | float randomChainSerial(uint b) { 132 | return ran1_generic(& ran1A_iy[1], ran1A_iv[1], & seed1AValue[1]); 133 | } 134 | float randomUChainSerial(uint b) { 135 | return ran1_generic(& ran1B_iy[1], ran1B_iv[1], & seed1BValue[1]); 136 | } 137 | float randomUChainSerialVimp(uint b) { 138 | return ran1_generic(& ran1C_iy[1], ran1C_iv[1], & seed1CValue[1]); 139 | } 140 | float randomChainSerialVimp(uint p) { 141 | return ran1_generic(& ran1D_iy[1], ran1D_iv[1], & seed1DValue[1]); 142 | } 143 | float ran1_generic(int *iy, int *iv, int *idum) { 144 | int j, k; 145 | float temp; 146 | if (*idum <= 0 || !(*iy)) { 147 | if (-(*idum) < 1) { 148 | *idum = 1; 149 | } 150 | else { 151 | *idum = -(*idum); 152 | } 153 | for (j = NTAB+7; j >= 0; j--) { 154 | k = (*idum) / IQ; 155 | *idum = IA * (*idum - k * IQ) - IR * k; 156 | if (*idum < 0) *idum += IM; 157 | if (j < NTAB) iv[j] = *idum; 158 | } 159 | (*iy) = iv[1]; 160 | } 161 | k = (*idum) / IQ; 162 | *idum = IA * (*idum - k * IQ) - IR * k; 163 | if (*idum < 0) *idum += IM; 164 | j = (*iy) / NDIV; 165 | (*iy) = iv[j]; 166 | iv[j] = *idum; 167 | if ((temp = AM * (*iy)) > RNMX) { 168 | return RNMX; 169 | } 170 | else { 171 | return temp; 172 | } 173 | } 174 | void lcgenerator(unsigned int *seed, unsigned char reset) { 175 | if (reset) { 176 | if (*seed >= LCG_IM) (*seed) %= LCG_IM; 177 | } 178 | else { 179 | *seed = (LCG_IA * (*seed) + LCG_IC) % LCG_IM; 180 | } 181 | } 182 | float ran1_original(int *idum) { 183 | int j; 184 | int k; 185 | static int iy = 0; 186 | static int iv[NTAB]; 187 | float temp; 188 | if (*idum <= 0 || !iy) { 189 | if (-(*idum) < 1) { 190 | *idum = 1; 191 | } 192 | else { 193 | *idum = -(*idum); 194 | } 195 | for (j = NTAB+7; j >= 0; j--) { 196 | k = (*idum) / IQ; 197 | *idum = IA * (*idum - k * IQ) - IR * k; 198 | if (*idum < 0) *idum += IM; 199 | if (j < NTAB) iv[j] = *idum; 200 | } 201 | iy = iv[0]; 202 | } 203 | k = (*idum) / IQ; 204 | *idum = IA * (*idum - k * IQ) - IR * k; 205 | if (*idum < 0) *idum += IM; 206 | j = iy / NDIV; 207 | iy = iv[j]; 208 | iv[j] = *idum; 209 | if ((temp = AM * iy) > RNMX) { 210 | return RNMX; 211 | } 212 | else { 213 | return temp; 214 | } 215 | } 216 | #undef IA 217 | #undef IM 218 | #undef AM 219 | #undef IQ 220 | #undef IR 221 | #undef NTAB 222 | #undef NDIV 223 | #undef EPS 224 | #undef RNMX 225 | -------------------------------------------------------------------------------- /man/rfsrc.anonymous.Rd: -------------------------------------------------------------------------------- 1 | \name{rfsrc.anonymous} 2 | \alias{rfsrc.anonymous} 3 | \title{Anonymous Random Forests} 4 | 5 | \description{ 6 | 7 | Anonymous random forests is carefully modified to ensure that the 8 | original training data is not retained. This enables users to share 9 | the trained forest with others without disclosing the underlying data. 10 | 11 | } 12 | 13 | \usage{rfsrc.anonymous(formula, data, forest = TRUE, ...)} 14 | 15 | \arguments{ 16 | \item{formula}{A symbolic description of the model to be fit. If missing, unsupervised splitting is performed.} 17 | 18 | \item{data}{A data frame containing the y-outcome and x-variables.} 19 | 20 | \item{forest}{Logical. Should the forest object be returned? Required for prediction on new data and by many other package functions.} 21 | 22 | \item{...}{Additional arguments passed to \code{\link{rfsrc}}. See the \code{rfsrc} help file for full details.} 23 | } 24 | 25 | 26 | \details{ 27 | 28 | This function calls \code{\link{rfsrc}} and returns a forest object with the original training data removed. This enables users to share their forest while preserving the privacy of their data. 29 | 30 | To enable prediction on new (test) data, certain minimal information from the training data must still be retained. This includes: 31 | \itemize{ 32 | \item Names of the original variables. 33 | \item For factor variables, the levels of each factor. 34 | \item Summary statistics used for imputation: the mean for continuous variables and the most frequent class for factors. 35 | \item Tree topology, including split points used to grow the trees. 36 | } 37 | 38 | For maximal privacy, users are strongly encouraged to replace variable names with non-identifiable labels and convert all variables to continuous format when possible. If factor variables are used, their levels should also be anonymized. However, the user is solely responsible for de-identifying the data and verifying that privacy is maintained. \strong{We provide NO GUARANTEES regarding data confidentiality.} 39 | 40 | \emph{Missing data handling:} Anonymous forests do not support imputation of training data. The option \code{na.action = "na.impute"} is automatically downgraded to \code{"na.omit"}. If training data contain missing values, we recommend pre-imputing them using \code{\link{impute}}. 41 | 42 | Test data, however, \emph{can} be imputed at prediction time: 43 | \itemize{ 44 | \item \code{na.action = "na.impute"} performs a fast imputation by replacing missing values with the training mean (for numeric variables) or most frequent class (for factors). 45 | \item \code{na.action = "na.random"} uses a fast random draw from training distributions for imputation. 46 | } 47 | 48 | Although anonymous forests are compatible with many package functions, they are only guaranteed to work with functions that do not explicitly require access to the original training data. 49 | 50 | } 51 | 52 | \value{ 53 | An object of class \code{(rfsrc, grow, anonymous)}. 54 | } 55 | \author{ 56 | Hemant Ishwaran and Udaya B. Kogalur 57 | } 58 | \seealso{ 59 | \command{\link{rfsrc}} 60 | } 61 | \examples{ 62 | \donttest{ 63 | 64 | ## ------------------------------------------------------------ 65 | ## regression 66 | ## ------------------------------------------------------------ 67 | print(rfsrc.anonymous(mpg ~ ., mtcars)) 68 | 69 | ## ------------------------------------------------------------ 70 | ## plot anonymous regression tree (using get.tree) 71 | ## TBD CURRENTLY NOT IMPLEMENTED 72 | ## ------------------------------------------------------------ 73 | ## plot(get.tree(rfsrc.anonymous(mpg ~ ., mtcars), 10)) 74 | 75 | ## ------------------------------------------------------------ 76 | ## classification 77 | ## ------------------------------------------------------------ 78 | print(rfsrc.anonymous(Species ~ ., iris)) 79 | 80 | ## ------------------------------------------------------------ 81 | ## survival 82 | ## ------------------------------------------------------------ 83 | data(veteran, package = "randomForestSRC") 84 | print(rfsrc.anonymous(Surv(time, status) ~ ., data = veteran)) 85 | 86 | ## ------------------------------------------------------------ 87 | ## competing risks 88 | ## ------------------------------------------------------------ 89 | data(wihs, package = "randomForestSRC") 90 | print(rfsrc.anonymous(Surv(time, status) ~ ., wihs, ntree = 100)) 91 | 92 | ## ------------------------------------------------------------ 93 | ## unsupervised forests 94 | ## ------------------------------------------------------------ 95 | print(rfsrc.anonymous(data = iris)) 96 | 97 | ## ------------------------------------------------------------ 98 | ## multivariate regression 99 | ## ------------------------------------------------------------ 100 | print(rfsrc.anonymous(Multivar(mpg, cyl) ~., data = mtcars)) 101 | 102 | ## ------------------------------------------------------------ 103 | ## prediction on test data with missing values using pbc data 104 | ## cases 1 to 312 have no missing values 105 | ## cases 313 to 418 having missing values 106 | ## ------------------------------------------------------------ 107 | data(pbc, package = "randomForestSRC") 108 | pbc.obj <- rfsrc.anonymous(Surv(days, status) ~ ., pbc) 109 | print(pbc.obj) 110 | 111 | ## mean value imputation 112 | print(predict(pbc.obj, pbc[-(1:312),], na.action = "na.impute")) 113 | 114 | ## random imputation 115 | print(predict(pbc.obj, pbc[-(1:312),], na.action = "na.random")) 116 | 117 | ## ------------------------------------------------------------ 118 | ## train/test setting but tricky because factor labels differ over 119 | ## training and test data 120 | ## ------------------------------------------------------------ 121 | 122 | # first we convert all x-variables to factors 123 | data(veteran, package = "randomForestSRC") 124 | veteran.factor <- data.frame(lapply(veteran, factor)) 125 | veteran.factor$time <- veteran$time 126 | veteran.factor$status <- veteran$status 127 | 128 | # split the data into train/test data (25/75) 129 | # the train/test data have the same levels, but different labels 130 | train <- sample(1:nrow(veteran), round(nrow(veteran) * .5)) 131 | summary(veteran.factor[train, ]) 132 | summary(veteran.factor[-train, ]) 133 | 134 | # grow the forest on the training data and predict on the test data 135 | v.grow <- rfsrc.anonymous(Surv(time, status) ~ ., veteran.factor[train, ]) 136 | v.pred <- predict(v.grow, veteran.factor[-train, ]) 137 | print(v.grow) 138 | print(v.pred) 139 | 140 | 141 | 142 | }} 143 | \keyword{forest} 144 | \keyword{anonymous} 145 | -------------------------------------------------------------------------------- /R/utilities_saveload.R: -------------------------------------------------------------------------------- 1 | ## o must be a list of forests! 2 | fast.save.list <- function(o, path=NULL, testing=FALSE, units="Mb") { 3 | if (is.null(path)) { 4 | path <- paste0(getwd(), "/forest") 5 | } 6 | unlink(path, recursive=TRUE) 7 | dir.create(path, showWarnings=FALSE, recursive=TRUE) 8 | lapply(1:length(o), function(j) { 9 | fast.save(o[[j]], paste0(path, "/forest", j), testing=testing, units=units) 10 | }) 11 | } 12 | ## o must be a list of forests! 13 | fast.load.list <- function(directory, path = NULL, testing = FALSE, units="Mb") { 14 | if (is.null(path)) { 15 | path <- paste0(getwd(), "/", directory) 16 | } 17 | else { 18 | path <- paste0(path, "/", directory) 19 | } 20 | files <- list.files(path) 21 | lapply(1:length(files), function(j) { 22 | fast.load(files[j], path, testing=testing, units=units) 23 | }) 24 | } 25 | fast.save <- function(o, path=NULL, testing=TRUE, units="Mb") { 26 | if (is.null(path)) { 27 | path <- paste0(getwd(), "/forest") 28 | } 29 | unlink(path, recursive=TRUE) 30 | dir.create(path, showWarnings=FALSE, recursive=TRUE) 31 | ## coherence check 32 | if (sum(inherits(o, c("rfsrc", "grow"), TRUE) == c(1, 2)) != 2) 33 | stop("this function only works for objects of class `(rfsrc, grow)'") 34 | ## extract the forest - hereafter this is what we work with 35 | o <- o$forest 36 | ## nativeArrayTNDS 37 | check <- !sapply(o$nativeArrayTNDS, is.null) 38 | if (sum(check) > 0) { 39 | lapply(names(o$nativeArrayTNDS)[check], function(nm) { 40 | fst::write_fst(data.table::data.table(o$nativeArrayTNDS[[nm]]), paste0(path, "/nativeArrayTDNS_", nm, ".rda")) 41 | }) 42 | o$nativeArrayTNDS[names(o$nativeArrayTNDS)[check]] <- NULL 43 | } 44 | ## nativeArray 45 | fst::write_fst(data.table::data.table(o$nativeArray), paste0(path, "/nativeArray.rda")) 46 | o$nativeArray <- NULL 47 | ## x and y 48 | if (!is.null(o$xvar)) { 49 | fst::write_fst(data.table::data.table(o$xvar), paste0(path, "/xvar.rda")) 50 | o$xvar <- NULL 51 | } 52 | if (!is.null(o$yvar)) { 53 | fst::write_fst(data.frame(o$yvar), paste0(path, "/yvar.rda")) 54 | o$yvar <- NULL 55 | } 56 | ## misc. 57 | fst::write_fst(data.frame(o$case.wt), paste0(path, "/case.wt.rda")) 58 | fst::write_fst(data.frame(o$leafCount), paste0(path, "/leafCount.rda")) 59 | fst::write_fst(data.frame(o$seed), paste0(path, "/seed.rda")) 60 | o$case.wt <- o$yvar <- o$leafCount <- o$seed <- NULL 61 | ## convert sample size to a number 62 | o$sampfrac <- o$sampsize(1) 63 | o$sampsize <- NULL 64 | ## survival 65 | if (o$family == "surv" || o$family == "surv-CR") { 66 | check <- !sapply(o$event.info, is.null) 67 | lapply(names(o$event.info)[check], function(nm) { 68 | fst::write_fst(data.table::data.table(o$event.info[[nm]]), paste0(path, "/event.info_", nm, ".rda")) 69 | }) 70 | o$event.info[names(o$event.info[check])] <- NULL 71 | } 72 | ## output size (used for testing) 73 | if (testing) { 74 | print(lsos(o, units=units, n=length(o))) 75 | } 76 | ## forest 77 | saveRDS(o, file=paste0(path, "/forest.rda"), compress=FALSE) 78 | gc(FALSE) 79 | #cat("finished\n") 80 | } 81 | fast.load <- function(directory, path = NULL, testing = FALSE, units="Mb") { 82 | if (is.null(path)) { 83 | path <- paste0(getwd(), "/", directory) 84 | } 85 | else { 86 | path <- paste0(path, "/", directory) 87 | } 88 | files <- list.files(path) 89 | ## forest 90 | o <- readRDS(paste0(path, "/forest.rda")) 91 | gc() 92 | ## nativeArrayTNDS 93 | if (any(grepl("nativeArrayTDNS", files))) { 94 | target <- files[grepl("nativeArrayTDNS", files)] 95 | target <- gsub(".rda", "", target) 96 | target <- gsub("nativeArrayTDNS_", "", target) 97 | lapply(target, function(nm) { 98 | o$nativeArrayTNDS[[nm]] <<- fst::read_fst(paste0(path, "/nativeArrayTDNS_", nm, ".rda"))[[1]] 99 | }) 100 | } 101 | ## nativeArrayTNDS 102 | o$nativeArray <- fst::read_fst(paste0(path, "/nativeArray.rda")) 103 | ## x and y 104 | if (any(grepl("xvar.rda", files))) { 105 | o$xvar <- fst::read_fst(paste0(path, "/xvar.rda")) 106 | } 107 | if (any(grepl("yvar.rda", files))) { 108 | o$yvar <- fst::read_fst(paste0(path, "/yvar.rda")) 109 | if (!is.data.frame(o$yvar)) { 110 | o$yvar <- o$yvar[[1]] 111 | } 112 | } 113 | ## misc. 114 | o$case.wt <- fst::read_fst(paste0(path, "/case.wt.rda"))[[1]] 115 | o$leafCount <- fst::read_fst(paste0(path, "/leafCount.rda"))[[1]] 116 | o$seed <- fst::read_fst(paste0(path, "/seed.rda"))[[1]] 117 | ## sample size --- hard coded 118 | o$sampsize <- function(x){x * o$sampfrac} 119 | ## survival 120 | if (any(grepl("event.info", files))) { 121 | target <- files[grepl("event.info", files)] 122 | target <- gsub(".rda", "", target) 123 | target <- gsub("event.info_", "", target) 124 | lapply(target, function(nm) { 125 | o$event.info[[nm]] <<- fst::read_fst(paste0(path, "/event.info_", nm, ".rda"))[[1]] 126 | }) 127 | } 128 | ## output size (used for testing) 129 | if (testing) { 130 | print(lsos(o, units=units, n=length(o))) 131 | } 132 | ##return the object 133 | o 134 | } 135 | ## improved list of objects 136 | .ls.objects <- function (pos = 1, pattern, order.by, 137 | decreasing=FALSE, head=FALSE, n=5, 138 | units="Gb") { 139 | napply <- function(names, fn) sapply(names, function(x) 140 | fn(get(x, pos = pos))) 141 | names <- ls(pos = pos, pattern = pattern) 142 | obj.class <- napply(names, function(x) as.character(class(x))[1]) 143 | obj.mode <- napply(names, mode) 144 | obj.type <- ifelse(is.na(obj.class), obj.mode, obj.class) 145 | obj.prettysize <- napply(names, function(x) { 146 | format(object.size(x), units = units)}) 147 | obj.size <- napply(names, object.size) 148 | obj.dim <- t(napply(names, function(x) 149 | as.numeric(dim(x))[1:2])) 150 | vec <- is.na(obj.dim)[, 1] & (obj.type != "function") 151 | obj.dim[vec, 1] <- napply(names, length)[vec] 152 | out <- data.frame(obj.type, obj.size, obj.prettysize, obj.dim) 153 | names(out) <- c("Type", "Size", "PrettySize", "Rows", "Columns") 154 | if (!missing(order.by)) 155 | out <- out[order(out[[order.by]], decreasing=decreasing), ] 156 | if (head) 157 | out <- head(out, n) 158 | out 159 | } 160 | ## shorthand for improved list of objects 161 | lsos <- function(..., n=10) { 162 | .ls.objects(..., order.by="Size", decreasing=TRUE, head=TRUE, n=n) 163 | } 164 | --------------------------------------------------------------------------------