├── data ├── ex1.rda ├── ex2.rda ├── actcal.RData ├── biofam.RData ├── ex1.txt.gz ├── famform.rda ├── mvad.RData ├── bfspell.RData └── actcal.tse.RData ├── inst └── doc │ └── TraMineR-state-sequence.pdf ├── demo ├── 00Index ├── Seqdist.R └── Events.R ├── R ├── TraMineR-length.R ├── debuglevel.R ├── seqHtplot.R ├── entropy.R ├── seqe2tse.R ├── seqfplot.R ├── TraMineR-mscore.R ├── cpal.R ├── seqmsplot.R ├── seqrfplot.R ├── seqrplot.R ├── stlab.R ├── seqmtplot.R ├── seqcomp.R ├── seqedist.R ├── seqshift.R ├── seqdplot.R ├── seqiplot.R ├── stslist.meant-methods.R ├── dist2matrix.R ├── TraMineR-extractVer.R ├── seqdim.R ├── seqgbar.R ├── cpal-set.R ├── seqgen.R ├── stlab-set.R ├── seqpm.R ├── read.tda.mdist.R ├── seqxtract.R ├── seqecontain.R ├── checktriangleineq.R ├── seqfcheck.R ├── stslist.freq-methods.R ├── seqsep.R ├── seqfpos.R ├── seqfposend.R ├── seqlength.R ├── alphabet.R ├── seqhasmiss.R ├── zzz.R ├── seqibad.R ├── seqstatf.R ├── seqfind.R ├── TraMineRInternal.R ├── seqasnum.R ├── seqici.R ├── seqmpos.R ├── seqeid.R ├── seqLLCS.R ├── seqLLCP.R ├── seqformat-STS_to_SRS.R ├── seqlength-align.R ├── seqient.R ├── alphabet-set.R ├── stslist.modst-methods.R ├── seqientdiff.R ├── seqmaintokens.R ├── stslist.statd-methods.R ├── seqistatd.R ├── dissvar.R ├── TraMineR-check_helpers.R ├── seqformat-STS_to_DSS.R ├── seqelength.R ├── seqformat-SPS_to_STS.R ├── seqivolatility.R ├── seqstatl.R ├── seqconc.R ├── seqdecomp.R ├── seqeweight.R ├── seqnum.R ├── diss.rep-methods.R ├── seqtransn.R ├── TraMineR-legend.R ├── stslist.rep-methods.R ├── TraMineR-group.R ├── seqformat-STS_to_SPS.R ├── seqdur.R ├── TraMineR-trunc.R ├── seqmeant.R ├── seqprep.R ├── implicativestat.R ├── seqetm.R ├── subseqelist.R ├── seqintegr.R ├── seqtree.R ├── seqsubsn.R ├── seqlegend.R ├── seqstatd.R ├── seqdss.R ├── seqmodst.R ├── seqformat-STS_to_TSE.R ├── seqrf.R ├── plot.stslist.meant.R └── seqtab.R ├── src ├── distancecalculator.cpp ├── traminerdebug.cpp ├── TWEDdistance.h ├── OMdistance.h ├── DHDdistance.cpp ├── LCPdistance.h ├── LCPdistance.cpp ├── NMSMSTdistance.h ├── NMSMSTSoftdistanceII.h ├── DHDdistance.h ├── NMSMSTSoftdistance.h ├── NMSDURSoftdistance.h ├── treeeventmap.h ├── constraint.h ├── OMvdistance.h ├── dist2matrix.c ├── eventdictionary.h ├── prefixtree.h ├── distanceobject.cpp ├── checktriangleineq.c ├── OMPerdistance.h ├── constraint.cpp ├── eventdictionary.cpp ├── OMPerdistanceII.h ├── NMSdistance.h ├── ffunctions.c ├── chisq.cpp ├── treeeventnode.h └── prefixtree.cpp └── man ├── read.tda.mdist.Rd ├── seqdim.Rd ├── seqsep.Rd ├── ex1.Rd ├── seqeid.Rd ├── seqhasmiss.Rd ├── seqLLCS.Rd ├── seqfind.Rd ├── seqfpos.Rd ├── seqcomp.Rd ├── seqLLCP.Rd ├── seqgen.Rd ├── seqmpos.Rd ├── famform.Rd ├── seqlength.Rd ├── seqlength-align.Rd ├── is.stslist.Rd ├── seqfposend.Rd ├── bfspell.Rd ├── seqdecomp.Rd ├── disstree-get-rules.Rd ├── seqpm.Rd ├── disstree-assign.Rd ├── print.stslist.Rd ├── plot.subseqelist.Rd ├── str.eseq.Rd ├── TraMineRInternal.Rd ├── actcal.tse.Rd ├── seqstatl.Rd ├── ex2.Rd ├── seqe.Rd ├── seqnum.Rd ├── disstreeleaf.Rd ├── seqdur.Rd ├── seqistatd.Rd ├── seqconc.Rd ├── seqelength.Rd ├── seqeweight.Rd ├── TraMineR.check.depr.args.Rd ├── stlab.Rd ├── actcal.Rd ├── cpal.Rd ├── seqfcheck.Rd ├── seqecontain.Rd ├── seqstatf.Rd ├── seqsubsn.Rd ├── seqdss.Rd ├── plot.subseqelistchisq.Rd ├── seqibad.Rd ├── seqmaintokens.Rd └── seqtransn.Rd /data/ex1.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/ex1.rda -------------------------------------------------------------------------------- /data/ex2.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/ex2.rda -------------------------------------------------------------------------------- /data/actcal.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/actcal.RData -------------------------------------------------------------------------------- /data/biofam.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/biofam.RData -------------------------------------------------------------------------------- /data/ex1.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/ex1.txt.gz -------------------------------------------------------------------------------- /data/famform.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/famform.rda -------------------------------------------------------------------------------- /data/mvad.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/mvad.RData -------------------------------------------------------------------------------- /data/bfspell.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/bfspell.RData -------------------------------------------------------------------------------- /data/actcal.tse.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/actcal.tse.RData -------------------------------------------------------------------------------- /inst/doc/TraMineR-state-sequence.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cran/TraMineR/HEAD/inst/doc/TraMineR-state-sequence.pdf -------------------------------------------------------------------------------- /demo/00Index: -------------------------------------------------------------------------------- 1 | Rendering Examples for desribing and visualazing sequences 2 | Seqdist Examples for computing distances between sequences 3 | Events Examples for analysing sequences of events 4 | 5 | -------------------------------------------------------------------------------- /R/TraMineR-length.R: -------------------------------------------------------------------------------- 1 | 2 | TraMineR.length <- function(seq, void) { 3 | 4 | totlength <- length(seq) 5 | nvoid <- sum(seq==void, na.rm=TRUE) 6 | l <- totlength - nvoid 7 | 8 | return(l) 9 | 10 | } 11 | -------------------------------------------------------------------------------- /R/debuglevel.R: -------------------------------------------------------------------------------- 1 | 2 | debuglevel <- function(level=NULL) { 3 | if(is.null(level)){ 4 | return(.Call(C_getTraMineRDebugLevel)) 5 | } 6 | .Call(C_setTraMineRDebugLevel,as.integer(level)) 7 | return(level) 8 | 9 | } 10 | -------------------------------------------------------------------------------- /R/seqHtplot.R: -------------------------------------------------------------------------------- 1 | ## ====================== 2 | ## Plotting entropy index 3 | ## ====================== 4 | 5 | seqHtplot <- function(seqdata, group = NULL, main = "auto", ...) { 6 | seqplot(seqdata, group=group, type="Ht", main=main, ...) 7 | } 8 | -------------------------------------------------------------------------------- /R/entropy.R: -------------------------------------------------------------------------------- 1 | ## Compute the entropy of a distribution 2 | 3 | entropy <- function(distrib, base=exp(1)) 4 | { 5 | distrib <- distrib[distrib!=0] 6 | p <- distrib/sum(distrib) 7 | e <- -sum(p*log(p, base=base)) 8 | return(e) 9 | } 10 | -------------------------------------------------------------------------------- /R/seqe2tse.R: -------------------------------------------------------------------------------- 1 | seqe2TSE <- function(eseq){ 2 | tse <- .Call(C_tmrseqetotse, unlist(list(eseq))) 3 | ll <- levels(eseq) 4 | tse <- data.frame(id=tse[[1]], timestamp=tse[[2]], event=factor(tse[[3]], levels=1:length(ll), labels=ll)) 5 | return(tse) 6 | } 7 | -------------------------------------------------------------------------------- /src/distancecalculator.cpp: -------------------------------------------------------------------------------- 1 | #include "distancecalculator.h" 2 | 3 | void DistanceCalculator::finalizeDistanceCalculator(SEXP ptr){ 4 | DistanceCalculator * sdo; 5 | sdo= static_cast(R_ExternalPtrAddr(ptr)); 6 | delete sdo; 7 | } 8 | -------------------------------------------------------------------------------- /R/seqfplot.R: -------------------------------------------------------------------------------- 1 | ## ================================ 2 | ## PLot of the sequences frequency 3 | ## ================================ 4 | 5 | seqfplot <- function(seqdata, group = NULL, main = "auto", ...) { 6 | seqplot(seqdata, group=group, type="f", main=main, ...) 7 | } 8 | -------------------------------------------------------------------------------- /R/TraMineR-mscore.R: -------------------------------------------------------------------------------- 1 | 2 | TraMineR.mscore <- function(seq, slength, statelist, freq) { 3 | mscore <- 0 4 | 5 | for (i in 1:slength) { 6 | si <- which(seq[i]==statelist) 7 | if (length(si)==1) 8 | mscore <- mscore+freq[si, i] 9 | } 10 | return(mscore) 11 | } 12 | 13 | -------------------------------------------------------------------------------- /R/cpal.R: -------------------------------------------------------------------------------- 1 | ## ================================================ 2 | ## Retrieve the color palette from a sequence object 3 | ## ================================================ 4 | 5 | cpal <- function(seqdata) { 6 | palette <- attr(seqdata,"cpal") 7 | return(palette) 8 | } 9 | 10 | -------------------------------------------------------------------------------- /R/seqmsplot.R: -------------------------------------------------------------------------------- 1 | ## ==================================== 2 | ## PLOT OF THE SEQUENCE OF MODAL STATES 3 | ## ==================================== 4 | 5 | seqmsplot <- function(seqdata, group = NULL, main = "auto", ...) { 6 | seqplot(seqdata, group=group, type="ms", main=main, ...) 7 | } 8 | -------------------------------------------------------------------------------- /R/seqrfplot.R: -------------------------------------------------------------------------------- 1 | ## ============================= 2 | ## Plotting medoids of relative frequeny group of sequences 3 | ## ============================= 4 | 5 | seqrfplot <- function(seqdata, group = NULL, main = "auto", ...) { 6 | seqplot(seqdata, group=group, type="rf", main=main, ...) 7 | } 8 | -------------------------------------------------------------------------------- /R/seqrplot.R: -------------------------------------------------------------------------------- 1 | ## ============================================ 2 | ## PLOT A REPRESENTATIVE SEQUENCE 3 | ## ============================================ 4 | 5 | seqrplot <- function(seqdata, group = NULL, main = "auto", ...) { 6 | seqplot(seqdata, group=group, type="r", main=main, ...) 7 | } 8 | -------------------------------------------------------------------------------- /R/stlab.R: -------------------------------------------------------------------------------- 1 | ## ================================================ 2 | ## Retrieve the state labels from a sequence object 3 | ## ================================================ 4 | 5 | stlab <- function(seqdata) { 6 | statelab <- attr(seqdata,"labels") 7 | return(statelab) 8 | } 9 | 10 | -------------------------------------------------------------------------------- /R/seqmtplot.R: -------------------------------------------------------------------------------- 1 | ## ============================================ 2 | ## PLOT OF THE STATES DISTRIBUTION BY TIME UNIT 3 | ## ============================================ 4 | 5 | seqmtplot <- function(seqdata, group = NULL, main = "auto", ...) { 6 | seqplot(seqdata, group=group, type="mt", main=main, ...) 7 | 8 | } 9 | -------------------------------------------------------------------------------- /R/seqcomp.R: -------------------------------------------------------------------------------- 1 | ## ======================= 2 | ## Comparing two sequences 3 | ## ======================= 4 | 5 | seqcomp <- function(x, y) { 6 | 7 | lx <- seqlength(x) 8 | ly <- seqlength(y) 9 | 10 | if (lx!=ly) 11 | return(FALSE) 12 | else if (sum(x[1:lx]==y[1:ly])==lx) 13 | return(TRUE) 14 | else 15 | return(FALSE) 16 | } 17 | -------------------------------------------------------------------------------- /R/seqedist.R: -------------------------------------------------------------------------------- 1 | seqedist <- function(eseq, idcost, vparam, interval=TRUE, norm=TRUE){ 2 | norm <- as.integer(norm) 3 | interval <- as.integer(interval) 4 | return(.Call(C_tmrseqedist, eseq, idcost, vparam, norm, interval)); 5 | } 6 | 7 | seqeage <- function(eseq, event.list) { 8 | return(.Call(C_tmreventinseq, eseq, as.integer(event.list))) 9 | } 10 | -------------------------------------------------------------------------------- /src/traminerdebug.cpp: -------------------------------------------------------------------------------- 1 | #include "TraMineR.h" 2 | int TRAMINER_DEBUG_LEVEL=TRAMINER_DEBUG_LEVEL_DEFAULT; 3 | 4 | extern "C"{ 5 | SEXP setTraMineRDebugLevel(SEXP level){ 6 | TRAMINER_DEBUG_LEVEL= INTEGER(level)[0]; 7 | return R_NilValue; 8 | } 9 | 10 | SEXP getTraMineRDebugLevel(void){ 11 | return Rf_ScalarReal(TRAMINER_DEBUG_LEVEL); 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /R/seqshift.R: -------------------------------------------------------------------------------- 1 | ## ========================================== 2 | ## shifting sequence 3 | ## function used by seqformat, option to='SRS' 4 | ## =========================================== 5 | 6 | seqshift <- function (seq, nbshift) { 7 | seql <- length(seq) 8 | 9 | sseq <- c(rep(NA,(seql-nbshift)), seq[1:nbshift]) 10 | 11 | return(sseq) 12 | } 13 | 14 | 15 | -------------------------------------------------------------------------------- /R/seqdplot.R: -------------------------------------------------------------------------------- 1 | ## ======================== 2 | ## State distribution plot 3 | ## ======================== 4 | 5 | seqdplot <- function(seqdata, group=NULL, main="auto", ...) { 6 | seqplot(seqdata, group=group, type="d", main=main, ...) 7 | } 8 | 9 | seqdHplot <- function(seqdata, group=NULL, main="auto", ...) { 10 | seqplot(seqdata, group=group, type="dH", main=main, ...) 11 | } 12 | -------------------------------------------------------------------------------- /R/seqiplot.R: -------------------------------------------------------------------------------- 1 | ## ============================= 2 | ## Plotting individual sequences 3 | ## ============================= 4 | 5 | seqiplot <- function(seqdata, group = NULL, main = "auto", ...) { 6 | seqplot(seqdata, group=group, type="i", main=main, ...) 7 | } 8 | seqIplot <- function(seqdata, group = NULL, main = "auto", ...) { 9 | seqplot(seqdata, group=group, type="I", main=main, ...) 10 | } 11 | -------------------------------------------------------------------------------- /R/stslist.meant-methods.R: -------------------------------------------------------------------------------- 1 | ## ============================ 2 | ## Methods for stsmeant objects 3 | ## ============================ 4 | 5 | print.stslist.meant <- function(x, digits=2, ...) { 6 | 7 | cn <- colnames(x) 8 | ## Conversion for printing without attributes 9 | x <- as.matrix(x[1:nrow(x),1:ncol(x)]) 10 | colnames(x) <- cn 11 | 12 | NextMethod("print", digits=digits,...) 13 | } 14 | -------------------------------------------------------------------------------- /R/dist2matrix.R: -------------------------------------------------------------------------------- 1 | ########################### 2 | ## Transform dist object to a matrix using fast and efficiant C code 3 | ########################### 4 | 5 | dist2matrix <- function(dist) { 6 | if (inherits(dist, "dist")) { 7 | return(.Call(C_dist2matrix, as.double(dist), attr(dist, "Size"))) 8 | } 9 | else if (is.matrix(dist)) { 10 | return(dist) 11 | } 12 | stop("dist should be a matrix or a \"dist\" object") 13 | } -------------------------------------------------------------------------------- /R/TraMineR-extractVer.R: -------------------------------------------------------------------------------- 1 | extract.ver <- function(x) { 2 | 3 | ver.list <- strsplit(x, "\\.") 4 | ver.unit <- ver.list[[1]][1] 5 | if(length(grep("-", ver.list[[1]][2]))>0) { 6 | ver.dec.list <- strsplit(ver.list[[1]][2], "-") 7 | ver.dec <- ver.dec.list[[1]][1] 8 | ver.bug <- ver.dec.list[[1]][2] 9 | return(c(ver.unit, ver.dec, ver.bug)) 10 | } 11 | 12 | ver.dec <- ver.list[[1]][2] 13 | return(c(ver.unit, ver.dec)) 14 | } 15 | -------------------------------------------------------------------------------- /R/seqdim.R: -------------------------------------------------------------------------------- 1 | ## RETURNS THE DIMENSION OF ONE OR MORE SEQUENCES 2 | 3 | seqdim <- function(seqdata) { 4 | ## search for compressed sequences 5 | format <- seqfcheck(seqdata) 6 | 7 | if (format %in% c(":","-")) seqdata <- seqdecomp(seqdata,sep=format) 8 | 9 | if (is.vector(seqdata)) sdim <- c(1,length(seqdata)) 10 | else sdim <- c(nrow(seqdata),ncol(seqdata)) 11 | 12 | names(sdim) <- c("Rows","Columns") 13 | 14 | return(sdim) 15 | 16 | } 17 | -------------------------------------------------------------------------------- /R/seqgbar.R: -------------------------------------------------------------------------------- 1 | ## ======================================================== 2 | ## Creates a vector of 0 and 1 used for plotting a sequence 3 | ## ======================================================== 4 | 5 | seqgbar <- function(seq, statl, seql) { 6 | 7 | nbstat <- length(statl) 8 | 9 | gbar <- vector("integer", seql*nbstat) 10 | 11 | for (j in 1:seql) 12 | gbar[((j-1)*nbstat)+which(statl==seq[j])] <- 1 13 | 14 | return(gbar) 15 | } 16 | 17 | -------------------------------------------------------------------------------- /R/cpal-set.R: -------------------------------------------------------------------------------- 1 | ## ==================================== 2 | ## Function to set the color palette 3 | ## of a sequence object 4 | ## ==================================== 5 | 6 | "cpal<-" <- function(seqdata, value) { 7 | 8 | if (!inherits(seqdata,"stslist")) 9 | stop("data is not a sequence object, see seqdef function to create one") 10 | 11 | nbstate <- length(alphabet(seqdata)) 12 | 13 | if (length(value)!=nbstate) 14 | stop("number of colors must be",nbstate) 15 | else 16 | attr(seqdata,"cpal") <- value 17 | 18 | seqdata 19 | } 20 | -------------------------------------------------------------------------------- /R/seqgen.R: -------------------------------------------------------------------------------- 1 | ## ============================ 2 | ## Random sequences generation 3 | ## ============================ 4 | 5 | seqgen <- function(n, length, alphabet, p=NULL) { 6 | 7 | ## FROM http://www.cs.chalmers.se/~dubhashi/Courses/BioAlg/Markov%20chains%20and%20DNA%20sequence%20analysis%20in%20R.htm 8 | 9 | m <- matrix(nrow=n, ncol=length) 10 | 11 | for (i in 1:n) 12 | m[i,] <- sample(alphabet, length, replace=TRUE, p) 13 | 14 | colnames(m) <- paste("[",1:length,"]", sep="") 15 | rownames(m) <- 1:n 16 | 17 | return(m) 18 | } 19 | 20 | -------------------------------------------------------------------------------- /R/stlab-set.R: -------------------------------------------------------------------------------- 1 | ## ==================================== 2 | ## Function to set the state labels 3 | ## of a sequence object 4 | ## ==================================== 5 | 6 | "stlab<-" <- function(seqdata, value) { 7 | 8 | if (!inherits(seqdata,"stslist")) 9 | stop("data is not a sequence object, see seqdef function to create one") 10 | 11 | nbstate <- length(alphabet(seqdata)) 12 | 13 | if (length(value)!=nbstate) 14 | stop("number of labels must be",nbstate) 15 | else 16 | attr(seqdata,"labels") <- value 17 | 18 | seqdata 19 | } 20 | -------------------------------------------------------------------------------- /man/read.tda.mdist.Rd: -------------------------------------------------------------------------------- 1 | \name{read.tda.mdist} 2 | \alias{read.tda.mdist} 3 | \title{Read a distance matrix produced by TDA.} 4 | \description{This function reads a distance matrix produced by TDA into an R object. When computing OM distances in TDA, the output is a 'half' matrix stored in a text file as a vector.} 5 | \usage{ 6 | read.tda.mdist(file) 7 | } 8 | \arguments{ 9 | \item{file}{the path to the file containing TDA output.} 10 | } 11 | \value{a R matrix containing the distances.} 12 | %\seealso{\code{\link{}} 13 | %\examples{} 14 | \keyword{Data handling} 15 | -------------------------------------------------------------------------------- /R/seqpm.R: -------------------------------------------------------------------------------- 1 | ## RECHERCHE DE SOUS-SEQUENCES 2 | 3 | seqpm <- function(seqdata, pattern, sep="") { 4 | 5 | if (!inherits(seqdata,"stslist")) { 6 | stop("data is not a sequence object, use 'seqdef' function to create one") 7 | } 8 | 9 | seqdata <- seqconc(seqdata,sep=sep) 10 | 11 | pm <- grep(pattern,seqdata) 12 | nbocc <- length(pm) 13 | 14 | message(" [>] pattern ",pattern," has been found in ", nbocc," sequences") 15 | 16 | res <- list(data.frame(pattern,nbocc),pm) 17 | names(res) <- c("MTab","MIndex") 18 | 19 | 20 | return(res) 21 | } 22 | 23 | 24 | -------------------------------------------------------------------------------- /R/read.tda.mdist.R: -------------------------------------------------------------------------------- 1 | ## ======================================== 2 | ## Read a TDA output into a distance matrix 3 | ## ======================================== 4 | 5 | read.tda.mdist <- function(file) { 6 | tmp <- read.table(file=file,header=FALSE) 7 | Y <- nrow(tmp) 8 | d <- 1+8*Y 9 | n <- (1+sqrt(d))/2 10 | 11 | cat(" [>] reading a",n,"x",n,"matrix\n") 12 | m <- matrix(nrow=n,ncol=n) 13 | diag(m) <- 0 14 | 15 | for (i in 1:Y) { 16 | m[tmp[i,"V1"],tmp[i,"V2"]] <- tmp[i,"V5"] 17 | } 18 | 19 | m[upper.tri(m)] <- t(m)[upper.tri(m)] 20 | 21 | return(m) 22 | } 23 | 24 | 25 | -------------------------------------------------------------------------------- /R/seqxtract.R: -------------------------------------------------------------------------------- 1 | ## ================================= 2 | ## EXTRACT SEQUENCES FROM A DATA SET 3 | ## ================================= 4 | 5 | seqxtract <- function(data, var, data.frame=FALSE) { 6 | 7 | ## tibble transformed into data frame 8 | if (inherits(data, "tbl_df")) data <- as.data.frame(data) 9 | ## Extracting the sequences from the data set 10 | if (missing(var) || is.null(var) || is.na(var[1])) 11 | seqdata <- data 12 | else 13 | seqdata <- subset(data,,var) 14 | 15 | if (data.frame==FALSE) 16 | seqdata <- as.matrix(seqdata) 17 | 18 | return(seqdata) 19 | 20 | } 21 | -------------------------------------------------------------------------------- /R/seqecontain.R: -------------------------------------------------------------------------------- 1 | seqecontain <- function(eseq, event.list, unknown.exclude = FALSE, seq, 2 | eventList, exclude) { 3 | 4 | TraMineR.check.depr.args(alist(eseq = seq, event.list = eventList, unknown.exclude = exclude)) 5 | 6 | if(is.subseqelist(eseq))eseq <- eseq$subseq 7 | if(!is.seqelist(eseq))stop("eseq should be a seqelist. See help on seqecreate.") 8 | dict<-levels.seqelist(eseq) 9 | 10 | elist<-factor(event.list,levels=dict) 11 | if(unknown.exclude)excl=as.integer(c(1)) 12 | else excl=as.integer(c(0)) 13 | return(.Call(C_tmrsequencecontainevent, eseq, as.integer(elist), excl)) 14 | } 15 | -------------------------------------------------------------------------------- /R/checktriangleineq.R: -------------------------------------------------------------------------------- 1 | ## Check all possible 3 distance and check for triangle inequality consistency 2 | 3 | checktriangleineq <- function(mat, warn=TRUE, indices = FALSE, tol = 1e-7) { 4 | ## Take care to get a matrix 5 | mat <- dist2matrix(mat) 6 | ind <- .Call(C_checktriangleineq, mat, as.integer(nrow(mat)), as.double(tol)) 7 | if(is.null(ind)){ 8 | return(TRUE) 9 | } 10 | if (warn) { 11 | warning("At least the indices [", ind[1], ", ", ind[2], "] does not respect the triangle inequality when going through ", ind[3]) 12 | } 13 | if (indices) { 14 | return(ind) 15 | } 16 | return(FALSE) 17 | 18 | } -------------------------------------------------------------------------------- /R/seqfcheck.R: -------------------------------------------------------------------------------- 1 | ## CHECK IF THE SEQUENCES ARE IN THE COMPRESSED FORMAT 2 | 3 | seqfcheck <- function(seqdata) { 4 | seqdata <- as.matrix(seqdata) 5 | 6 | if (is.numeric(seqdata)) { 7 | if (any(seqdata<0,na.rm=TRUE)) format <- "-X" 8 | else format <- "X" 9 | } 10 | else { 11 | if (ncol(seqdata)==1) { 12 | if (length(grep("-",seqdata))>0) format <- "-" 13 | else if (length(grep(":",seqdata))>0) format <- ":" 14 | else format <- "?" 15 | } 16 | else { 17 | if (length(grep("-",seqdata))>0) format <- "-X" 18 | else format <- "X" 19 | } 20 | } 21 | 22 | return(format) 23 | } 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/TWEDdistance.h: -------------------------------------------------------------------------------- 1 | #ifndef TWEDDISTANCECALCULATOR_H 2 | #define TWEDDISTANCECALCULATOR_H 3 | #include "distancecalculator.h" 4 | #include "OMdistance.h" 5 | 6 | class TWEDdistance: public OMdistance{ 7 | protected: 8 | double nu; 9 | double lambda; 10 | 11 | public: 12 | TWEDdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS); 13 | TWEDdistance(TWEDdistance *dc); 14 | virtual DistanceCalculator * copy(){return new TWEDdistance(this);} 15 | virtual void setParameters(SEXP params); 16 | virtual ~TWEDdistance(); 17 | virtual double distance(const int&is, const int& js); 18 | }; 19 | 20 | 21 | #endif 22 | -------------------------------------------------------------------------------- /R/stslist.freq-methods.R: -------------------------------------------------------------------------------- 1 | ## =========================== 2 | ## Methods for stsstatd objects 3 | ## =========================== 4 | 5 | print.stslist.freq <- function(x, digits=2, ...) { 6 | table <- attr(x,"freq") 7 | print(table, digits=digits, ...) 8 | } 9 | 10 | "[.stslist.freq" <- function(x,i,j,drop=FALSE) { 11 | ## Column subscript are not allowed 12 | if (!missing(j)) 13 | stop(" [!] Column subscripts are not allowed", call.=FALSE) 14 | 15 | x <- NextMethod("[") 16 | 17 | if (!missing(i)) { 18 | attr(x,"weights") <- attr(x,"weights")[i] 19 | attr(x,"freq") <- attr(x,"freq")[i,] 20 | } 21 | 22 | return(x) 23 | } 24 | -------------------------------------------------------------------------------- /R/seqsep.R: -------------------------------------------------------------------------------- 1 | 2 | seqsep <- function (seqdata,sl=1, sep="-") { 3 | for (i in 1:length(seqdata)) { 4 | oseq <- seqdata[i] 5 | seql <- nchar(oseq) 6 | 7 | if ((seql %% sl)==0) { 8 | nbs <- seql/sl 9 | if (nbs>0) subseq <- substr(oseq,1,sl) 10 | if (nbs>1) { 11 | for (j in 2:nbs) { 12 | start <- ((j-1)*sl)+1 13 | stop <- start+(sl-1) 14 | subseq <- paste(subseq,sep,substr(oseq,start,stop),sep="") 15 | } 16 | } 17 | seqdata[i] <- subseq 18 | } 19 | else 20 | stop("Number of characters does not match number of states*states length in sequence",i) 21 | } 22 | return(seqdata) 23 | } 24 | -------------------------------------------------------------------------------- /R/seqfpos.R: -------------------------------------------------------------------------------- 1 | ## =============================================================== 2 | ## Search for the first occurence of a given element in a sequence 3 | ## =============================================================== 4 | 5 | statefpos <- function (seqdata, state) { 6 | pos <- which(seqdata==state) 7 | if (length(pos)==0) fpos <- NA 8 | else fpos <- min(pos) 9 | return(fpos) 10 | } 11 | 12 | 13 | seqfpos <- function (seqdata, state) { 14 | 15 | if (!inherits(seqdata,"stslist")) { 16 | stop("data is not a sequence object, use 'seqdef' function to create one") 17 | } 18 | 19 | fpos <- apply(seqdata,1,statefpos,state) 20 | 21 | return(fpos) 22 | } 23 | -------------------------------------------------------------------------------- /R/seqfposend.R: -------------------------------------------------------------------------------- 1 | seqfposend <- function(seqdata,state, with.missing=FALSE, lead=0, from.seq.start=TRUE){ 2 | if (!inherits(seqdata,"stslist")) { 3 | stop("data is not a sequence object, use 'seqdef' function to create one") 4 | } 5 | s.dss <- seqdss(seqdata, with.missing=with.missing) 6 | pos <- seqfpos(s.dss,state) 7 | s.dur <- seqdur(seqdata, with.missing) 8 | if (from.seq.start) { 9 | ## cumulated duration 10 | s.dur <- t(apply(s.dur,1,cumsum)) 11 | } 12 | tl <- vector(length=nrow(s.dur)) 13 | for (i in 1:nrow(s.dur)){ 14 | tl[i] <- ifelse(is.na(pos[i]),0,s.dur[i,pos[i]] + lead) 15 | } 16 | return(tl) 17 | } 18 | -------------------------------------------------------------------------------- /R/seqlength.R: -------------------------------------------------------------------------------- 1 | ## ================================== 2 | ## Returns a vector with the lengths 3 | ## of the sequences in seqdata 4 | ## ================================== 5 | 6 | seqlength <- function(seqdata, with.missing=TRUE) { 7 | 8 | if (!inherits(seqdata,"stslist")) 9 | stop("data is not a sequence object, use 'seqdef' function to create one") 10 | 11 | sl <- ncol(seqdata)-rowSums( seqdata==attr(seqdata,"void"), na.rm=TRUE ) 12 | 13 | if (!with.missing) 14 | sl <- sl-rowSums( seqdata==attr(seqdata,"nr"), na.rm=TRUE ) 15 | 16 | sl <- as.matrix(sl) 17 | colnames(sl) <- "Length" 18 | rownames(sl) <- rownames(seqdata) 19 | 20 | return(sl) 21 | } 22 | -------------------------------------------------------------------------------- /R/alphabet.R: -------------------------------------------------------------------------------- 1 | ## ============================================ 2 | ## Retrieve the alphabet from a sequence object 3 | ## ============================================ 4 | 5 | alphabet <- function(seqdata, with.missing=FALSE) { 6 | 7 | if (inherits(seqdata,c("stslist","PSTf"))){ 8 | statl <- attr(seqdata,"alphabet") 9 | if (isTRUE(with.missing)) statl <- c(statl, attr(seqdata,"nr")) 10 | } 11 | else if (inherits(seqdata,"seqelist")){ 12 | statl <- levels(seqdata) 13 | } 14 | else { 15 | stop("seqdata should be a state sequence object, an event sequence object, or a suffix tree. Use seqdef or seqecreate.") 16 | } 17 | 18 | return(statl) 19 | } 20 | 21 | -------------------------------------------------------------------------------- /man/seqdim.Rd: -------------------------------------------------------------------------------- 1 | \name{seqdim} 2 | \alias{seqdim} 3 | \title{Dimension of a set of sequences} 4 | \description{ 5 | Returns the number of sequences (rows) and the maximum length of a set of sequences. 6 | } 7 | \details{ 8 | The function will first search for separators '-' or ':' in the sequences in order to detect wether they are in the compressed or extended format. 9 | } 10 | \usage{ 11 | seqdim(seqdata) 12 | } 13 | \arguments{ 14 | \item{seqdata}{a set of sequences.} 15 | } 16 | \value{a vector with the number of sequences and the maximum sequence length.} 17 | %\seealso{} 18 | %\examples{} 19 | 20 | \author{Alexis Gabadinho} 21 | 22 | \keyword{Sequence-object attributes} 23 | -------------------------------------------------------------------------------- /man/seqsep.Rd: -------------------------------------------------------------------------------- 1 | \name{seqsep} 2 | \alias{seqsep} 3 | \title{Adds separators to sequences stored as character string} 4 | \description{ 5 | Adds separators to sequences stored as character string. 6 | } 7 | \usage{ 8 | seqsep(seqdata, sl=1, sep="-") 9 | } 10 | \arguments{ 11 | \item{seqdata}{a dataframe or matrix containing sequence data, as vectors of states or events.} 12 | \item{sl}{the length of the states (the number of characters used to represent them). Default is 1.} 13 | \item{sep}{the character used as separator. Set by default as \code{"-"}.} 14 | } 15 | \seealso{ 16 | \code{\link{seqdecomp}}. 17 | } 18 | \examples{ 19 | seqsep("ABAAAAAAD")} 20 | \keyword{Data handling} 21 | -------------------------------------------------------------------------------- /src/OMdistance.h: -------------------------------------------------------------------------------- 1 | #ifndef OMDISTANCECALCULATOR_H 2 | #define OMDISTANCECALCULATOR_H 3 | #include "distancecalculator.h" 4 | 5 | class OMdistance: public DistanceCalculator{ 6 | protected: 7 | double * fmat; 8 | double * scost; 9 | int alphasize; 10 | double indel; 11 | int fmatsize; 12 | double maxscost; 13 | 14 | public: 15 | OMdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS); 16 | OMdistance(OMdistance *dc); 17 | virtual DistanceCalculator* copy(){return new OMdistance(this);} 18 | virtual void setParameters(SEXP params); 19 | virtual ~OMdistance(); 20 | virtual double distance(const int&is, const int& js); 21 | }; 22 | 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /R/seqhasmiss.R: -------------------------------------------------------------------------------- 1 | seqhasmiss <- function(seqdata){ 2 | if (!is.stslist(seqdata)){ 3 | msg.stop("seqdata must be a stslist object!" ) 4 | } 5 | misscode <- c(attr(seqdata,"nr"),attr(seqdata,"void")) 6 | nrcode <- attr(seqdata,"nr") 7 | voidcode <- attr(seqdata,"void") 8 | has.miss <- apply(seqdata,1,function(x) any(x %in% misscode)) 9 | has.nr <- apply(seqdata,1,function(x) any(x %in% nrcode)) 10 | has.void <- apply(seqdata,1,function(x) any(x %in% voidcode)) 11 | cat("There are ",sum(has.miss)," sequences with missings\n", sum(has.nr),"have nr's,", sum(has.void), " have voids") 12 | return(invisible(list(has.miss=has.miss,has.nr=has.nr,has.void=has.void))) 13 | } 14 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | ## .First.lib <- function(lib, pkg) {library.dynam("TraMineR", pkg, lib)} 2 | 3 | .onAttach <- function(libname, pkgname){ 4 | suppressWarnings(descr <- utils::packageDescription("TraMineR")) 5 | Tver <- extract.ver(descr$Version) 6 | if(as.numeric(Tver[2])%%2==0) { 7 | state <- "stable" 8 | } 9 | else { 10 | state <- "development" 11 | } 12 | builtDate <- strsplit(strsplit(descr$Built, ";")[[1]][3], " ")[[1]][2] 13 | packageStartupMessage("\n",descr$Package," ", state, " version ", descr$Version, " (Built: ", builtDate, ")") 14 | packageStartupMessage("Website: ", descr$URL) 15 | packageStartupMessage("Please type 'citation(\"TraMineR\")' for citation information.\n") 16 | } 17 | -------------------------------------------------------------------------------- /R/seqibad.R: -------------------------------------------------------------------------------- 1 | seqibad <- function(seqdata, pow=1, with.missing=FALSE, ...){ 2 | if (!inherits(seqdata, "stslist")) { 3 | stop("[!] seqdata is not a sequence object, see seqdef function to create one") 4 | } 5 | alph <- alphabet(seqdata, with.missing=with.missing) 6 | lalph <- length(alph) 7 | 8 | stprec <- suppressMessages(seqprecstart(seqdata, with.missing=with.missing, ...)) 9 | integr <- suppressMessages(seqintegr(seqdata, with.missing=with.missing, pow=pow)) 10 | 11 | bad <- stprec[1] * integr[,1] 12 | 13 | if (lalph > 1) { 14 | for (i in 2:lalph){ 15 | bad <- bad + stprec[i] * integr[,i] 16 | } 17 | } 18 | 19 | return(bad) 20 | 21 | } 22 | -------------------------------------------------------------------------------- /src/DHDdistance.cpp: -------------------------------------------------------------------------------- 1 | #include "DHDdistance.h" 2 | 3 | double DHDdistance::distance(const int&is, const int& js){ 4 | 5 | 6 | int m=slen[is]; 7 | int n=slen[js]; 8 | // Computing min length 9 | int minimum = m; 10 | if (nsign){} 11 | virtual DistanceCalculator* copy(){return new LCPdistance(this);} 12 | virtual void setParameters(SEXP params){ 13 | sign = INTEGER(getListElement(params, "sign"))[0]; 14 | } 15 | virtual ~LCPdistance(){} 16 | virtual double distance(const int&is, const int& js); 17 | }; 18 | 19 | #endif 20 | -------------------------------------------------------------------------------- /R/seqfind.R: -------------------------------------------------------------------------------- 1 | ## ================================= 2 | ## Find the ocurrences of a sequence 3 | ## ================================= 4 | 5 | seqfind <- function(x,y) { 6 | 7 | if (!inherits(x,"stslist")) 8 | stop("x is not a sequence object, use 'seqdef' function to create one") 9 | 10 | if (!inherits(y,"stslist")) 11 | stop("y is not a sequence object, use 'seqdef' function to create one") 12 | 13 | x.void <- attr(x,"void") 14 | y.void <- attr(y,"void") 15 | 16 | x.conc <- as.vector(seqconc(x, void=x.void)) 17 | y.conc <- as.vector(seqconc(y, void=y.void)) 18 | 19 | occ <- NULL 20 | 21 | for (i in 1:length(x.conc)) 22 | occ <- c(occ,which(y.conc==x.conc[i])) 23 | 24 | return(occ) 25 | } 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /R/TraMineRInternal.R: -------------------------------------------------------------------------------- 1 | TraMineRInternalNodeInit <- function(...){ 2 | return(DTNInit(...)) 3 | } 4 | 5 | TraMineRInternalSplitInit <- function(...){ 6 | return(DTNsplit(...)) 7 | } 8 | 9 | TraMineRInternalLayout <- function(...){ 10 | return(TraMineR.setlayout(...)) 11 | } 12 | 13 | TraMineRInternalSeqeage <- function(...){ 14 | return(seqeage(...)) 15 | } 16 | 17 | TraMineRInternalLegend <- function(...){ 18 | return(TraMineR.legend(...)) 19 | } 20 | 21 | TraMineRInternalSeqgbar <- function(...){ 22 | return(seqgbar(...)) 23 | } 24 | 25 | TraMineRInternalWeightedInertiaDist <- function(diss, diss.size, is.dist, individuals, sweights, var) { 26 | return(.Call(C_tmrWeightedInertiaDist, diss, diss.size, is.dist, individuals, sweights, var)) 27 | } 28 | -------------------------------------------------------------------------------- /R/seqasnum.R: -------------------------------------------------------------------------------- 1 | ## TRANSFORMATION DE SEQUENCES DE CARACTERES EN SEQUENCES NUMERIQUES 2 | ## Returns a matrix with integer coding of the sequences 3 | 4 | ## ToDo: Check if different from seqnum 5 | ## seqnum returns an stslist object with a numeric alphabet 6 | ## seqasnum returns a numeric matrix 7 | 8 | seqasnum <- function(seqdata, with.missing=FALSE) { 9 | 10 | mnum <- matrix(NA,nrow=seqdim(seqdata)[1],ncol=seqdim(seqdata)[2]) 11 | 12 | rownames(mnum) <- rownames(seqdata) 13 | colnames(mnum) <- colnames(seqdata) 14 | 15 | statl <- attr(seqdata,"alphabet") 16 | 17 | if (with.missing) 18 | statl <- c(statl, attr(seqdata, "nr")) 19 | 20 | for (i in 1:length(statl)) { 21 | mnum[seqdata==statl[i]] <- i-1 22 | } 23 | 24 | return(mnum) 25 | 26 | } 27 | -------------------------------------------------------------------------------- /R/seqici.R: -------------------------------------------------------------------------------- 1 | ## ================ 2 | ## Complexity index 3 | ## ================ 4 | 5 | seqici <- function(seqdata, with.missing=FALSE, silent=TRUE) { 6 | 7 | if (!inherits(seqdata,"stslist")) 8 | stop("data is NOT a sequence object, see seqdef function to create one") 9 | 10 | if(!silent) message(" [>] computing complexity index for ",nrow(seqdata)," sequences ...") 11 | 12 | ## Number of transitions 13 | trans <- seqtransn(seqdata, with.missing=with.missing, norm=TRUE) 14 | 15 | ## Longitudinal Entropy 16 | ient <- suppressMessages( 17 | seqient(seqdata, with.missing=with.missing, norm=TRUE) 18 | ) 19 | 20 | ## Complexity index 21 | comp.index <- sqrt(trans * ient) 22 | 23 | colnames(comp.index) <- "C" 24 | 25 | return(comp.index) 26 | } 27 | -------------------------------------------------------------------------------- /R/seqmpos.R: -------------------------------------------------------------------------------- 1 | ## ============================ 2 | ## Number of matching positions 3 | ## ============================ 4 | 5 | seqmpos <- function(seq1, seq2, with.missing=FALSE) { 6 | 7 | if (!inherits(seq1,"stslist") | !inherits(seq2,"stslist")) 8 | stop("sequences must be sequence objects") 9 | 10 | ## Defining the positions to compare with logical values 11 | ## void positions are set to FALSE and hence will not be 12 | ## counted in the sum of matching positions 13 | comp1 <- seq1!=attr(seq1,"void") 14 | comp2 <- seq2!=attr(seq2,"void") 15 | 16 | if (!with.missing) { 17 | comp1 <- comp1 & seq1!=attr(seq1,"nr") 18 | comp2 <- comp2 & seq2!=attr(seq2,"nr") 19 | } 20 | 21 | mpos <- sum(seq1==seq2 & comp1 & comp2) 22 | 23 | return(mpos) 24 | } 25 | -------------------------------------------------------------------------------- /src/LCPdistance.cpp: -------------------------------------------------------------------------------- 1 | #include "LCPdistance.h" 2 | 3 | double LCPdistance::distance(const int&is, const int& js){ 4 | 5 | int m=slen[is]; 6 | int n=slen[js]; 7 | // Computing min length 8 | int minimum = m; 9 | if (n0) { 12 | i=0; 13 | while (sequences[MINDICE(is,i,nseq)]==sequences[MINDICE(js,i,nseq)] && i lgthmin[j] ) 21 | seq.list[[i]][j,(lgthmin[j]+1):lgth[j,i]] <- attr(seq.list[[i]],"void") 22 | } 23 | } 24 | 25 | 26 | return(seq.list) 27 | } 28 | 29 | -------------------------------------------------------------------------------- /R/seqient.R: -------------------------------------------------------------------------------- 1 | ## ======================= 2 | ## Within Sequence Entropy 3 | ## ======================= 4 | 5 | seqient <- function(seqdata, norm=TRUE, base=exp(1), with.missing=FALSE, silent=TRUE) { 6 | 7 | if (!inherits(seqdata,"stslist")) 8 | stop("data is NOT a sequence object, see seqdef function to create one") 9 | 10 | statl <- attr(seqdata,"alphabet") 11 | 12 | if (with.missing) { 13 | statl <- c(statl, attr(seqdata,"nr")) 14 | } 15 | 16 | if (!silent) message(" [>] computing entropy for ",nrow(seqdata)," sequences ...") 17 | 18 | iseqtab <- suppressMessages(seqistatd(seqdata, with.missing=with.missing)) 19 | 20 | ient <- apply(iseqtab,1,entropy, base=base) 21 | ient <- as.matrix(ient) 22 | if (norm==TRUE) { 23 | emax <- log(length(statl)) 24 | ient <- ient/emax 25 | } 26 | 27 | colnames(ient) <- "Entropy" 28 | rownames(ient) <- rownames(seqdata) 29 | 30 | return(ient) 31 | 32 | } 33 | -------------------------------------------------------------------------------- /R/alphabet-set.R: -------------------------------------------------------------------------------- 1 | ## ==================================== 2 | ## Function to set the alphabet 3 | ## of a sequence object 4 | ## ==================================== 5 | 6 | "alphabet<-" <- function(seqdata, value) { 7 | 8 | if (any(duplicated(value))) 9 | stop("[!] duplicated element(s) in value assigned to alphabet: ",value[duplicated(value)]) 10 | 11 | if (!inherits(seqdata,"stslist")) 12 | stop("[!] data is not a sequence object, see seqdef function to create one") 13 | 14 | nbstate <- length(alphabet(seqdata)) 15 | 16 | if (length(value)!=nbstate) 17 | stop("[!] number of states must be ",nbstate) 18 | else { 19 | for (i in 1:dim(seqdata)[2]) 20 | levels(seqdata[,i])[1:nbstate] <- value 21 | 22 | if (isTRUE(all.equal(attr(seqdata,"alphabet"),attr(seqdata,"labels")))) 23 | attr(seqdata,"labels") <- value 24 | 25 | attr(seqdata,"alphabet") <- value 26 | } 27 | 28 | seqdata 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/NMSMSTdistance.h: -------------------------------------------------------------------------------- 1 | #ifndef NMSMSTDISTANCECALCULATOR_H 2 | #define NMSMSTDISTANCECALCULATOR_H 3 | #include "NMSdistance.h" 4 | 5 | class NMSMSTdistance: public SUBSEQdistance{ 6 | protected: 7 | double *e; 8 | double *e1; 9 | double *t; 10 | double *t1; 11 | int rowsize; 12 | double * seqdur; 13 | 14 | public: 15 | NMSMSTdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){} 16 | NMSMSTdistance(NMSMSTdistance* dc);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){} 17 | virtual DistanceCalculator* copy(){return new NMSMSTdistance(this);} 18 | //NMSMSTdistance(const int& pnorm, int * psequences, const int & pnseq, int * pslen, const int &pmaxlen, double *pseqdur); 19 | virtual ~NMSMSTdistance(); 20 | virtual void setParameters(SEXP params); 21 | void computeattr(const int&is, const int& js); 22 | }; 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /R/stslist.modst-methods.R: -------------------------------------------------------------------------------- 1 | ## =========================== 2 | ## Methods for stsstatd objects 3 | ## =========================== 4 | 5 | print.stslist.modst <- function(x, digits=2, ...) { 6 | 7 | cat(" [Modal state sequence]\n") 8 | ## x <- NextMethod("print",...) 9 | print.stslist(x,...) 10 | 11 | cat("\n [State frequencies]\n") 12 | print(attr(x,"Frequencies"), digits=digits) 13 | } 14 | 15 | "[.stslist.modst" <- function(x,i,j,drop=FALSE) { 16 | ## Specialized only for column subscript 17 | ## If one column we keep the original data.frame method 18 | ## Otherwise we copy attributes and update "start" value 19 | if (!missing(i)) 20 | stop("row subscripts not allowed!", call.=FALSE) 21 | 22 | if (!missing(j)) { 23 | freq <- attr(x,"Frequencies") 24 | 25 | ## Applying method 26 | x <- NextMethod("[") 27 | 28 | ## Adapting frequencies 29 | attr(x,"Frequencies") <- freq[j] 30 | } 31 | 32 | return(x) 33 | } 34 | -------------------------------------------------------------------------------- /R/seqientdiff.R: -------------------------------------------------------------------------------- 1 | ## ======================= 2 | ## Within Sequence Entropy 3 | ## ======================= 4 | 5 | seqientdiff <- function(seqdata, norm=TRUE) { 6 | 7 | if (!inherits(seqdata, "stslist")) 8 | stop("data is NOT a sequence object, see seqdef function to create one") 9 | 10 | entropydiff <- function(dur, norm) { 11 | len <- sum(dur) 12 | ## If we don't check, the funcion returns NA (division by 0) 13 | ent <- entropy(dur) 14 | if(ent>0 && norm) { 15 | ## The maximum entropy is when length of the DSS=length of the sequence 16 | p <- 1/len 17 | entmax <- (-len)*(p*log(p)) 18 | ent <- ent/entmax 19 | } 20 | return(ent) 21 | } 22 | iseqtab <- seqdur(seqdata) 23 | iseqtab[is.na(iseqtab)] <- 0 24 | ient <- apply(iseqtab,1,entropydiff, norm=norm) 25 | 26 | ient <- as.matrix(ient) 27 | colnames(ient) <- "Hdss" 28 | rownames(ient) <- paste("[",seq(1:length(ient)),"]",sep="") 29 | return(ient) 30 | } 31 | -------------------------------------------------------------------------------- /R/seqmaintokens.R: -------------------------------------------------------------------------------- 1 | seqmaintokens <- function(seqdata, k=8L, mint=NULL, ...) { 2 | 3 | if (!inherits(seqdata,"stslist")){ 4 | stop("data is NOT a state sequence object, see seqdef function to create one", 5 | call. = FALSE) 6 | } 7 | if (!k>=1){ 8 | stop("k must be a strictly positive integer!", 9 | call. = FALSE) 10 | } 11 | 12 | meant <- seqmeant(seqdata, ...) 13 | if (!is.null(mint)){ 14 | main.tokens <- which(meant >= mint) 15 | if (length(main.tokens) > k){ 16 | meant.o <- order(meant[main.tokens], decreasing=TRUE) 17 | main.tokens <- sort(meant.o[1:k]) 18 | } 19 | else if (length(main.tokens) < 1) 20 | message(" !!! No token occurs more than mint times on average!") 21 | } else { 22 | meant.o <- order(meant, decreasing=TRUE) 23 | main.tokens <- sort(meant.o[1:k]) 24 | } 25 | return(main.tokens) 26 | } 27 | -------------------------------------------------------------------------------- /src/NMSMSTSoftdistanceII.h: -------------------------------------------------------------------------------- 1 | #ifndef NMSMSTSoftdistanceIICALCULATOR_H 2 | #define NMSMSTSoftdistanceIICALCULATOR_H 3 | #include "NMSdistance.h" 4 | 5 | class NMSMSTSoftdistanceII: public SUBSEQdistance{ 6 | protected: 7 | double *e; 8 | double *e1; 9 | int rowsize; 10 | double * softmatch; 11 | int alphasize; 12 | 13 | public: 14 | NMSMSTSoftdistanceII(NMSMSTSoftdistanceII *dc);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){} 15 | NMSMSTSoftdistanceII(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){} 16 | virtual DistanceCalculator* copy(){return new NMSMSTSoftdistanceII(this);} 17 | //NMSMSTdistance(const int& pnorm, int * psequences, const int & pnseq, int * pslen, const int &pmaxlen, double *pseqdur); 18 | virtual ~NMSMSTSoftdistanceII(); 19 | virtual void setParameters(SEXP params); 20 | void computeattr(const int&is, const int& js); 21 | }; 22 | 23 | #endif 24 | -------------------------------------------------------------------------------- /R/stslist.statd-methods.R: -------------------------------------------------------------------------------- 1 | ## =========================== 2 | ## Methods for stsstatd objects 3 | ## =========================== 4 | 5 | print.stslist.statd <- function(x, digits=2, ...) { 6 | 7 | ## computing max length of state label 8 | ## to align valid states and entropy tables 9 | rl <- max(nchar(rownames(x$Frequencies))) 10 | ## width <- max(nchar(colnames(x$Frequencies)), 4) 11 | 12 | ident1 <- rep(" ",rl) 13 | ident2 <- paste(rep(" ",rl-1), collapse="") 14 | 15 | cat(ident1,"[State frequencies]\n") 16 | print(x$Frequencies, digits=digits) 17 | 18 | VS <- t(as.matrix(x$ValidStates)) 19 | rownames(VS) <- paste("N",ident2,sep="") 20 | cat("\n", ident1,"[Valid states]\n") 21 | print(VS, digits=digits) 22 | 23 | H <- t(as.matrix(x$Entropy)) 24 | rownames(H) <- paste("H",ident2,sep="") 25 | cat("\n", ident1,"[Entropy index]\n") 26 | print(H, digits=digits) 27 | } 28 | 29 | "[.stslist.statd" <- function(...) { 30 | stop(" [!] Operation not allowed", call.=FALSE) 31 | } 32 | -------------------------------------------------------------------------------- /R/seqistatd.R: -------------------------------------------------------------------------------- 1 | ## ====================================== 2 | ## State distribution for each individual 3 | ## ====================================== 4 | 5 | seqistatd <- function(seqdata, with.missing=FALSE, prop=FALSE) { 6 | 7 | if (!inherits(seqdata,"stslist")) { 8 | stop("data is not a sequence object, see seqdef function to create one") 9 | return() 10 | } 11 | 12 | statl <- alphabet(seqdata) 13 | if (with.missing) { 14 | statl <- c(statl, attr(seqdata,"nr")) 15 | } 16 | 17 | nbstat <- length(statl) 18 | nbseq <- nrow(seqdata) 19 | 20 | iseqtab <- matrix(nrow=nbseq, ncol=nbstat) 21 | 22 | colnames(iseqtab) <- statl 23 | rownames(iseqtab) <- rownames(seqdata) 24 | 25 | message(" [>] computing state distribution for ", nbseq," sequences ...") 26 | 27 | for (i in 1:nbstat) { 28 | iseqtab[,i] <- apply(seqdata,1,function(x) sum(x==statl[i],na.rm=TRUE)) 29 | } 30 | 31 | if (prop) { 32 | iseqtab <- iseqtab/rowSums(iseqtab) 33 | } 34 | 35 | return(iseqtab) 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/DHDdistance.h: -------------------------------------------------------------------------------- 1 | #ifndef DHDDISTANCECALCULATOR_H 2 | #define DHDDISTANCECALCULATOR_H 3 | #include "distancecalculator.h" 4 | 5 | class DHDdistance: public DistanceCalculator{ 6 | protected: 7 | double * scost; 8 | int alphasize; 9 | double maxdist; 10 | 11 | public: 12 | DHDdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS) 13 | :DistanceCalculator(normS, Ssequences, seqdim, lenS), scost(NULL), alphasize(0), maxdist(0){} 14 | DHDdistance(DHDdistance *dc): DistanceCalculator(dc), scost(dc->scost), alphasize(dc->alphasize), maxdist(dc->maxdist){} 15 | virtual DistanceCalculator* copy(){return new DHDdistance(this);} 16 | virtual void setParameters(SEXP params){ 17 | scost = REAL(getListElement(params, "scost")); 18 | alphasize = INTEGER(getListElement(params, "alphasize"))[0]; 19 | maxdist = REAL(getListElement(params, "maxdist"))[0]; 20 | } 21 | virtual ~DHDdistance(){} 22 | virtual double distance(const int&is, const int& js); 23 | }; 24 | 25 | #endif 26 | -------------------------------------------------------------------------------- /src/NMSMSTSoftdistance.h: -------------------------------------------------------------------------------- 1 | #ifndef NMSMSTSOFTDISTANCECALCULATOR_H 2 | #define NMSMSTSOFTDISTANCECALCULATOR_H 3 | #include "NMSdistance.h" 4 | 5 | class NMSMSTSoftdistance: public SUBSEQdistance{ 6 | protected: 7 | double *e; 8 | double *e1; 9 | double *t; 10 | double *t1; 11 | int rowsize; 12 | double * seqdur; 13 | double * softmatch; 14 | int alphasize; 15 | 16 | public: 17 | NMSMSTSoftdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){} 18 | NMSMSTSoftdistance(NMSMSTSoftdistance *dc);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){} 19 | virtual DistanceCalculator* copy(){return new NMSMSTSoftdistance(this);} 20 | //NMSMSTdistance(const int& pnorm, int * psequences, const int & pnseq, int * pslen, const int &pmaxlen, double *pseqdur); 21 | virtual ~NMSMSTSoftdistance(); 22 | virtual void setParameters(SEXP params); 23 | void computeattr(const int&is, const int& js); 24 | }; 25 | 26 | #endif 27 | -------------------------------------------------------------------------------- /R/dissvar.R: -------------------------------------------------------------------------------- 1 | ########################### 2 | ## Compute discrepancy 3 | ########################### 4 | 5 | dissvar <- function(diss, weights=NULL, squared=FALSE) { 6 | if (squared) { 7 | diss <- diss^2 8 | } 9 | if (is.null(weights)) { 10 | if (inherits(diss, "dist")) { 11 | return(sum(diss)/(attr(diss, "Size")^2)) 12 | } else if (is.matrix(diss)) { 13 | return(sum(diss)/(2*(nrow(diss)^2))) 14 | } else { 15 | stop("diss argument should be a dist object or a dissimilarity matrix") 16 | } 17 | } 18 | else { 19 | isdist <- inherits(diss, "dist") 20 | if (isdist) { 21 | n <- attr(diss, "Size") 22 | } else if (is.matrix(diss)) { 23 | n <- nrow(diss) 24 | } else { 25 | stop("diss argument should be a dist object or a dissimilarity matrix") 26 | } 27 | if(is.null(weights)) { 28 | weights <- rep(1, n) 29 | } 30 | dvar <- .Call(C_tmrWeightedInertiaDist, diss, as.integer(n), as.integer(isdist), as.integer(1:n), as.double(weights), as.integer(TRUE)) 31 | return(dvar) 32 | } 33 | } -------------------------------------------------------------------------------- /man/seqLLCS.Rd: -------------------------------------------------------------------------------- 1 | \name{seqLLCS} 2 | \alias{seqLLCS} 3 | \title{Compute the length of the longest common subsequence of two sequences} 4 | \description{ 5 | Returns the length of the longest common subsequence of two sequences. This attribute is described in \cite{Elzinga (2008)}. 6 | } 7 | \usage{ 8 | seqLLCS(seq1, seq2) 9 | } 10 | \arguments{ 11 | \item{seq1}{a sequence from a sequence object} 12 | \item{seq2}{a sequence from a sequence object} 13 | } 14 | 15 | \value{an integer being the length of the longest common subsequence of the two sequences.} 16 | 17 | \references{ 18 | Elzinga, Cees H. (2008). Sequence analysis: Metric representations of categorical time 19 | series. \emph{Technical Report}, Department of Social Science Research Methods, Vrije Universiteit, Amsterdam. 20 | } 21 | 22 | \seealso{\code{\link{seqdist}}} 23 | 24 | \examples{ 25 | LCS.ex <- c("S-U-S-M-S-U", "U-S-SC-MC", "S-U-M-S-SC-UC-MC") 26 | LCS.ex <- seqdef(LCS.ex) 27 | seqLLCS(LCS.ex[1,],LCS.ex[3,]) 28 | } 29 | \keyword{Dissimilarity measures} -------------------------------------------------------------------------------- /man/seqfind.Rd: -------------------------------------------------------------------------------- 1 | \name{seqfind} 2 | \alias{seqfind} 3 | \title{Indexes of state sequence(s) x in state sequence object y} 4 | \description{ 5 | Finds the row indexes of state sequence(s) x in the state sequence object y. 6 | } 7 | \usage{ 8 | seqfind(x, y) 9 | } 10 | \arguments{ 11 | \item{x}{a state sequence object containing one or more sequences (\code{\link{seqdef}}).} 12 | \item{y}{a state sequence object.} 13 | } 14 | \seealso{ 15 | \code{}. 16 | } 17 | \value{row index(es) of sequence(s) x in the set of sequences y.} 18 | 19 | \examples{ 20 | data(mvad) 21 | mvad.shortlab <- c("EM", "FE", "HE", "JL", "SC", "TR") 22 | mvad.seq <- seqdef(mvad, states=mvad.shortlab, 15:86) 23 | 24 | ## Finding occurrences of sequence 176 in mvad.seq 25 | seqfind(mvad.seq[176,],mvad.seq) 26 | 27 | ## Finding occurrences of sequence 1 to 8 in mvad.seq 28 | seqfind(mvad.seq[1:8,],mvad.seq) 29 | } 30 | 31 | \author{Alexis Gabadinho (with Gilbert Ritschard for the help page)} 32 | 33 | \keyword{Data handling} 34 | \keyword{State sequences} 35 | -------------------------------------------------------------------------------- /R/TraMineR-check_helpers.R: -------------------------------------------------------------------------------- 1 | # Check if an object is a valid data frame or matrix index. 2 | # 3 | # Otherwise, an error is raised and an information message is displayed. 4 | # 5 | # @param x Object to test. 6 | # name Name of the object to test. 7 | # 8 | # @return Nothing. 9 | # 10 | # @author Pierre-Alexandre Fonta (2016-2017) 11 | 12 | checkindex <- function(x, name) { 13 | if (!is.index(x)) 14 | msg.stop(aprint(name), "must be a positive integer or a string") 15 | } 16 | 17 | # Check if an object is a vector of valid data frame or matrix indexes. 18 | # 19 | # Otherwise, an error is raised and an information message is displayed. 20 | # 21 | # @param x Object to test. 22 | # name Name of the object to test. 23 | # 24 | # @return Nothing. 25 | # 26 | # @author Pierre-Alexandre Fonta (2016-2017) 27 | 28 | checkindexes <- function(x, name) { 29 | if (!is.vector(x) || !xor(is.positive.integers(x), is.strings(x))) 30 | msg.stop(aprint(name), "must be a vector of positive integers or strings") 31 | } 32 | -------------------------------------------------------------------------------- /R/seqformat-STS_to_DSS.R: -------------------------------------------------------------------------------- 1 | # Should only be used through seqformat() 2 | 3 | ## ============================== 4 | ## Convert from STS to DSS format 5 | ## ============================== 6 | 7 | STS_to_DSS <- function(seqdata, 8 | left=NA, right="DEL", gaps=NA, missing=NA, void="%", nr="*") { 9 | 10 | nbseq <- seqdim(seqdata)[1] 11 | maxsl <- seqdim(seqdata)[2] 12 | 13 | out <- matrix(NA, nrow=nbseq, ncol=maxsl) 14 | 15 | rownames(out) <- paste("[",seq(1:nbseq),"]",sep="") 16 | 17 | ## PREPARING THE DATA 18 | seqdata <- as.matrix(seqdata) 19 | seqdata <- seqprep(seqdata, missing=missing, left=left, gaps=gaps, right=right, void=void, nr=nr) 20 | 21 | for (i in 1:nbseq) { 22 | idx <- 1 23 | j <- 1 24 | tmpseq <- seqdata[i,] 25 | sl <- TraMineR.length(tmpseq, void) 26 | 27 | while (j <= sl) { 28 | iseq <- tmpseq[j] 29 | 30 | out[i,idx] <- iseq 31 | 32 | while (j < sl & tmpseq[j+1]==iseq) { 33 | j <- j+1 34 | } 35 | 36 | j <- j+1 37 | idx <- idx+1 38 | } 39 | } 40 | return(out) 41 | } 42 | 43 | -------------------------------------------------------------------------------- /R/seqelength.R: -------------------------------------------------------------------------------- 1 | ## ======================================== 2 | ## Get and set length of eseq 3 | ## ======================================== 4 | 5 | seqelength <- function(eseq, s) { 6 | 7 | TraMineR.check.depr.args(alist(eseq = s)) 8 | 9 | seqelength.internal<-function(eseq){ 10 | if(is.eseq(eseq)){ 11 | return(.Call(C_tmrsequencegetlength, eseq)) 12 | } 13 | return(-1) 14 | } 15 | if(is.seqelist(eseq)){ 16 | as.numeric(sapply(unlist(eseq),seqelength.internal)) 17 | }else if(is.eseq(eseq)){ 18 | as.numeric(seqelength.internal(eseq)) 19 | }else{ 20 | stop("eseq should be a seqelist. See help on seqecreate.") 21 | } 22 | } 23 | 24 | "seqelength<-" <- function(eseq, s, value){ 25 | 26 | TraMineR.check.depr.args(alist(eseq = s)) 27 | 28 | if(!is.seqelist(eseq)) { 29 | stop("eseq should be a seqelist. See help on seqecreate.") 30 | } 31 | if(length(eseq)!=length(value)) { 32 | stop("eseq and len should be of the same size.") 33 | } 34 | .Call(C_tmrsequencesetlength, eseq, as.double(value)) 35 | return(eseq) 36 | } 37 | -------------------------------------------------------------------------------- /R/seqformat-SPS_to_STS.R: -------------------------------------------------------------------------------- 1 | # Should only be used through seqformat() 2 | 3 | SPS_to_STS <- function(seqdata, spsformat, missing = "*") { 4 | nseq <- nrow(seqdata) 5 | trans <- matrix("", nrow = nseq, ncol = 1) 6 | xfix <- spsformat$xfix 7 | if (!identical(xfix, "")) 8 | xfix <- paste0("[", xfix, "]") 9 | sdsep <- spsformat$sdsep 10 | for (i in 1:nseq) { 11 | tmpseq <- na.omit(seqdata[i, ]) 12 | if (length(tmpseq)>0) { 13 | for (s in 1:length(tmpseq)) { 14 | sps <- strsplit(gsub(xfix, "", tmpseq[s]), split = sdsep)[[1]] 15 | seq <- sps[1] 16 | if (seq %in% missing) 17 | seq <- NA 18 | dur <- as.integer(sps[2]) 19 | trans[i] <- 20 | if (s == 1) 21 | paste(trans[i], seq, sep = "") 22 | else 23 | paste(trans[i], seq, sep = "-") 24 | if (dur > 1) 25 | for (r in 2:dur) 26 | trans[i] <- paste(trans[i], "-", seq, sep = "") 27 | } 28 | } 29 | } 30 | sts <- seqdecomp(trans) 31 | return(sts) 32 | } 33 | -------------------------------------------------------------------------------- /man/seqfpos.Rd: -------------------------------------------------------------------------------- 1 | \name{seqfpos} 2 | \alias{seqfpos} 3 | \title{Search for the first occurrence of a given element in a sequence} 4 | \description{ 5 | Returns a vector containing the position of the first occurrence of the given element in each of the sequences in the data set. 6 | } 7 | \details{the state to search for has to be passed as a character string, and must be one of the state returned by the \code{\link{alphabet}} function. If the state is not contained in a sequence, NA is returned for this sequence.} 8 | \usage{ 9 | seqfpos(seqdata, state) 10 | } 11 | \arguments{ 12 | \item{seqdata}{a sequence object (see \code{\link{seqdef}} function).} 13 | \item{state}{the state element to search in the sequences} 14 | } 15 | 16 | \examples{ 17 | data(biofam) 18 | biofam.seq <- seqdef(biofam,10:25) 19 | 20 | ## Searching for the first occurrence of state 1 21 | ## in each of the 5 first sequence of the biofam data set. 22 | seqfpos(biofam.seq[1:5,],"1") 23 | } 24 | 25 | \author{Alexis Gabadinho} 26 | 27 | \keyword{Longitudinal characteristics} 28 | -------------------------------------------------------------------------------- /man/seqcomp.Rd: -------------------------------------------------------------------------------- 1 | \name{seqcomp} 2 | \alias{seqcomp} 3 | \title{Compare two state sequences} 4 | \description{ 5 | Check whether two state sequences are identical. 6 | } 7 | \usage{ 8 | seqcomp(x, y) 9 | } 10 | \arguments{ 11 | \item{x}{a state sequence object containing a single sequence (typically the row of a main sequence 12 | object, see \code{\link{seqdef}})} 13 | \item{y}{a state sequence object containing a single sequence (typically the row of a main sequence 14 | object, see \code{\link{seqdef}})} 15 | } 16 | 17 | \value{\code{TRUE} if sequences are identical, \code{FALSE} otherwise} 18 | 19 | \seealso{ 20 | \code{\link{seqfind}, \link{seqfpos}, \link{seqpm}} 21 | } 22 | \examples{ 23 | data(mvad) 24 | mvad.shortlab <- c("EM", "FE", "HE", "JL", "SC", "TR") 25 | mvad.seq <- seqdef(mvad, states=mvad.shortlab, 15:86) 26 | 27 | ## Comparing sequences 1 and 2 in mvad.seq 28 | seqcomp(mvad.seq[1,],mvad.seq[2,]) 29 | 30 | ## Comparing sequences 176 and 211 in mvad.seq 31 | seqcomp(mvad.seq[176,],mvad.seq[211,]) 32 | } 33 | \keyword{Data handling} 34 | -------------------------------------------------------------------------------- /man/seqLLCP.Rd: -------------------------------------------------------------------------------- 1 | \name{seqLLCP} 2 | \alias{seqLLCP} 3 | \title{Compute the length of the longest common prefix of two sequences} 4 | \description{ 5 | Returns the length of the longest common prefix of two sequences. This attribute is described in \cite{Elzinga (2008)}. 6 | } 7 | \usage{ 8 | seqLLCP(seq1, seq2) 9 | } 10 | \arguments{ 11 | \item{seq1}{a sequence from a sequence object.} 12 | \item{seq2}{a sequence from a sequence object.} 13 | } 14 | 15 | \value{an integer being the length of the longest common prefix of the two sequences.} 16 | \seealso{\code{\link{seqdist}}} 17 | 18 | \references{ 19 | Elzinga, Cees H. (2008). Sequence analysis: Metric representations of categorical time 20 | series. \emph{Technical Report}, Department of Social Science Research Methods, Vrije Universiteit, Amsterdam. 21 | } 22 | 23 | \examples{ 24 | data(famform) 25 | famform.seq <- seqdef(famform) 26 | 27 | ## The LCP's length between sequences 1 and 2 28 | ## in the famform sequence object is 2 29 | seqLLCP(famform.seq[1,],famform.seq[2,]) 30 | } 31 | \keyword{Dissimilarity measures} 32 | -------------------------------------------------------------------------------- /src/NMSDURSoftdistance.h: -------------------------------------------------------------------------------- 1 | #ifndef NMSDURSOFTDISTANCECALCULATOR_H 2 | #define NMSDURSOFTDISTANCECALCULATOR_H 3 | #include "NMSdistance.h" 4 | 5 | class NMSDURSoftdistance: public SUBSEQdistance{ 6 | protected: 7 | double *e; 8 | double *e1; 9 | double *t_i; 10 | double *t1_i; 11 | double *t1_j; 12 | double *t_j; 13 | double *t_ij; 14 | int rowsize; 15 | double * seqdur; 16 | double * softmatch; 17 | int alphasize; 18 | 19 | public: 20 | NMSDURSoftdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){} 21 | NMSDURSoftdistance(NMSDURSoftdistance * dc);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){} 22 | virtual DistanceCalculator* copy(){return new NMSDURSoftdistance(this);} 23 | //NMSDURdistance(const int& pnorm, int * psequences, const int & pnseq, int * pslen, const int &pmaxlen, double *pseqdur); 24 | virtual ~NMSDURSoftdistance(); 25 | virtual void setParameters(SEXP params); 26 | void computeattr(const int&is, const int& js); 27 | }; 28 | 29 | #endif 30 | -------------------------------------------------------------------------------- /R/seqivolatility.R: -------------------------------------------------------------------------------- 1 | seqivolatility <- function(seqdata, w=.5, with.missing=FALSE, adjust=TRUE){ 2 | 3 | if (!inherits(seqdata,"stslist")) 4 | stop(" [!] data is NOT a sequence object, see seqdef function to create one") 5 | if (!is.logical(adjust)) 6 | stop(" [!] adjust should be logical") 7 | if (w>1 | w<0) 8 | stop(" [!] w should be in the range [0, 1]!") 9 | 10 | 11 | alph <- alphabet(seqdata) 12 | ##void <- attr(seqdata,"void") 13 | nr <- attr(seqdata,"nr") 14 | if (with.missing) 15 | alph <- c(alph,nr) 16 | alph.size <- length(alph) 17 | 18 | 19 | transp <- suppressMessages( 20 | seqtransn(seqdata, with.missing=with.missing, norm=TRUE)) 21 | sdist <- suppressMessages( 22 | seqistatd(seqdata, with.missing=with.missing)) 23 | nvisit <- rowSums(sdist>0) 24 | if (adjust) { 25 | ret <- ifelse(nvisit - 1 <= 0, 0, (nvisit - 1)/(alph.size -1)) 26 | ret <- w * ret + (1-w) * transp 27 | } else { 28 | ret <- w * nvisit/alph.size + (1-w) * transp 29 | } 30 | 31 | return(as.vector(ret)) 32 | } 33 | -------------------------------------------------------------------------------- /src/treeeventmap.h: -------------------------------------------------------------------------------- 1 | #ifndef TREEEVENTMAP_H 2 | #define TREEEVENTMAP_H 3 | //#include "treeeventnode.h" 4 | //#include 5 | #include "eventseq.h" 6 | extern "C"{ 7 | #include 8 | #include 9 | } 10 | 11 | //using namespace std; 12 | 13 | //Forward declaration 14 | class TreeEventNode; 15 | //Iterateur pour la map 16 | typedef std::map::iterator TreeEventMapIterator; 17 | //Définition de type pour aleger le code 18 | //Une map sur le type d'événements, et la classe événement 19 | class TreeEventMap: public std::map { 20 | 21 | public: 22 | TreeEventMap():std::map() {} 23 | ~TreeEventMap() {} 24 | void simplifyTreeMap(const double &minSup); 25 | void print(const int & prof, const bool& isbrother); 26 | int countSubsequence(double minSup); 27 | void getSubsequences(SEXP result,double * support, Sequence *s2, int *index,const double &step, SEXP classname,EventDictionary * ed); 28 | void clearAllPointers(); 29 | void clearSupport(); 30 | 31 | }; 32 | 33 | #endif // TREEEVENTMAP_H 34 | -------------------------------------------------------------------------------- /src/constraint.h: -------------------------------------------------------------------------------- 1 | #ifndef _CONSTRAINT_INCLUDED_ 2 | #define _CONSTRAINT_INCLUDED_ 3 | #include 4 | #include 5 | 6 | class Constraint { 7 | 8 | protected: 9 | double maxGap; 10 | double windowSize; 11 | double ageMinBegin; 12 | double ageMaxBegin; 13 | double ageMaxEnd; 14 | int countMethod; 15 | public: 16 | Constraint(const double &mg, 17 | const double &ws, 18 | const double &aminb, 19 | const double &amaxb, 20 | const double &amaxe, 21 | const int &cmethod); 22 | virtual ~Constraint() 23 | { 24 | 25 | } 26 | inline double getmaxGap() 27 | { 28 | return(this->maxGap); 29 | } 30 | inline double getwindowSize() 31 | { 32 | return(this->windowSize); 33 | } 34 | inline double getageMinBegin() 35 | { 36 | return(this->ageMinBegin); 37 | } 38 | inline double getageMaxBegin() 39 | { 40 | return(this->ageMaxBegin); 41 | } 42 | inline double getageMaxEnd() 43 | { 44 | return(this->ageMaxEnd); 45 | } 46 | inline int getcountMethod() 47 | { 48 | return(this->countMethod); 49 | } 50 | }; 51 | #endif 52 | -------------------------------------------------------------------------------- /src/OMvdistance.h: -------------------------------------------------------------------------------- 1 | #ifndef OMvdistanceCALCULATOR_H 2 | #define OMvdistanceCALCULATOR_H 3 | #include "OMdistance.h" 4 | 5 | class OMvdistance: public OMdistance{ 6 | double * seqdur; 7 | double * indellist; 8 | int sublink; 9 | public: 10 | OMvdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS); 11 | OMvdistance(OMvdistance *dc); 12 | virtual DistanceCalculator* copy(){return new OMvdistance(this);} 13 | virtual void setParameters(SEXP params); 14 | virtual ~OMvdistance(); 15 | virtual double distance(const int&is, const int& js); 16 | inline double getIndel(const int& indice, const int& state){ 17 | return this->indellist[state]*seqdur[indice]; 18 | } 19 | inline double getSubCost(const int& i_state, const int& j_state, const int& i_state_indice, const int& j_state_indice){ 20 | if (sublink==1) { 21 | return(scost[MINDICE(i_state, j_state, alphasize)] * (seqdur[i_state_indice]+seqdur[j_state_indice])); 22 | } 23 | return(scost[MINDICE(i_state, j_state, alphasize)] * sqrt(seqdur[i_state_indice]*seqdur[j_state_indice])); 24 | } 25 | }; 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /src/dist2matrix.c: -------------------------------------------------------------------------------- 1 | #include "TraMineR.h" 2 | 3 | SEXP dist2matrix(SEXP diss, SEXP diss_size) { 4 | int n=INTEGER(diss_size)[0]; 5 | SEXP ans; 6 | PROTECT(ans = allocMatrix(REALSXP, n, n)); 7 | double *matrix=REAL(ans); 8 | double *dmat=REAL(diss); 9 | // Rprintf("mlen = %i \n", mlen); 10 | // Rprintf("ilen = %i \n", ilen); 11 | int i, j, i_indiv, base_indice; 12 | double r; 13 | for (i=0;i 4 | #include 5 | #include 6 | #include 7 | #include "TraMineR.h" 8 | 9 | 10 | 11 | class EventDictionary: public std::map { 12 | int numseq; 13 | public: 14 | EventDictionary():numseq(0) {} 15 | EventDictionary(SEXP flist); 16 | virtual ~EventDictionary(); 17 | bool codeExists(const int &code) const; 18 | //int sprint(char * buffer, const char* start, const int&code)const; 19 | void addSequence() { 20 | this->numseq++; 21 | } 22 | void removeSequence() { 23 | this->numseq--; 24 | } 25 | bool shouldDelete() { 26 | return this->numseq<1; 27 | } 28 | SEXP getDictionary()const; 29 | 30 | protected: 31 | 32 | private: 33 | 34 | }; 35 | typedef EventDictionary::iterator EventDictionaryIterator; 36 | 37 | class EventSet: public std::set{ 38 | 39 | public: 40 | EventSet():std::set(){} 41 | inline bool contain(const int& code) const{ 42 | return this->find(code)!=this->end(); 43 | } 44 | void add(SEXP elist); 45 | 46 | }; 47 | #endif // EVENTDICTIONARY_H 48 | -------------------------------------------------------------------------------- /R/seqstatl.R: -------------------------------------------------------------------------------- 1 | ## ============================================ 2 | ## Returns the alphabet for a sequence data set 3 | ## ============================================ 4 | 5 | seqstatl <- function(data, var=NULL, format='STS') { 6 | 7 | listform <- c("STS","SPS","SPELL","DSS") 8 | if (!format %in% listform) 9 | stop("Input format must be one of: ", listform) 10 | 11 | ## Extracting the sequences from the data set 12 | seqdata <- seqxtract(data, var) 13 | 14 | ## 15 | if (format=='SPS') seqdata <- seqformat(seqdata, from = "SPS", to = "STS") 16 | 17 | ## Convert into the extended format to list states/events 18 | if (seqfcheck(seqdata) %in% c(":","-")) 19 | seqdata <- seqdecomp(seqdata, sep=seqfcheck(seqdata)) 20 | 21 | statl <- sort(unique(as.vector(seqdata))) 22 | 23 | ## IF states are numeric values, sort them as integer 24 | ## (if sorted as characters, 10 will be after 1, 25 | ## not after 9) 26 | #statnum <- suppressWarnings(as.integer(statl)) 27 | ## using as.numeric to avoid issue with non-integer state value 28 | statnum <- suppressWarnings(as.numeric(statl)) 29 | 30 | if (all(is.na(statnum)==FALSE)) statl <- sort(statnum) 31 | 32 | return(statl) 33 | } 34 | -------------------------------------------------------------------------------- /man/seqmpos.Rd: -------------------------------------------------------------------------------- 1 | \name{seqmpos} 2 | \alias{seqmpos} 3 | \title{Number of matching positions between two sequences.} 4 | \description{ 5 | Returns the number of common elements, i.e., same states appearing at the same position in the two sequences. 6 | } 7 | \usage{ 8 | seqmpos(seq1, seq2, with.missing=FALSE) 9 | } 10 | \arguments{ 11 | \item{seq1}{a sequence from a sequence object.} 12 | \item{seq2}{a sequence from a sequence object.} 13 | \item{with.missing}{if \code{TRUE}, gaps appearing at the same position in both sequences are also considered as common elements}. 14 | } 15 | \seealso{ 16 | \code{\link{seqLLCP}}, \code{\link{seqLLCS} }. 17 | } 18 | \examples{ 19 | data(famform) 20 | famform.seq <- seqdef(famform) 21 | 22 | seqmpos(famform.seq[1,],famform.seq[2,]) 23 | seqmpos(famform.seq[2,],famform.seq[4,]) 24 | 25 | ## Example with gaps in sequences 26 | a <- c(NA,"A",NA,"B","C") 27 | b <- c(NA,"C",NA,"B","C") 28 | 29 | ex1.seq <- seqdef(rbind(a,b)) 30 | 31 | seqmpos(ex1.seq[1,], ex1.seq[2,]) 32 | seqmpos(ex1.seq[1,], ex1.seq[2,], with.missing=TRUE) 33 | 34 | } 35 | 36 | \author{Alexis Gabadinho (with Gilbert Ritschard for help page)} 37 | 38 | \keyword{Dissimilarity measures} 39 | -------------------------------------------------------------------------------- /src/prefixtree.h: -------------------------------------------------------------------------------- 1 | #ifndef PREFIXTREE_H 2 | #define PREFIXTREE_H 3 | #include "eventseq.h" 4 | #include "treeeventnode.h" 5 | #include "treeeventmap.h" 6 | #include 7 | #include "eventdictionary.h" 8 | #include "constraint.h" 9 | 10 | class TreeEventNode; 11 | 12 | 13 | /** 14 | Prefix Tree base node 15 | */ 16 | 17 | 18 | class PrefixTree { 19 | //Start of the tree 20 | TreeEventMap child; 21 | public: 22 | //Ctor 23 | PrefixTree(); 24 | //Dtor 25 | virtual ~PrefixTree(); 26 | // void addSequence(Sequence *s,const double &maxGap,const double &windowSize, const double & ageMin, const double & ageMax,const double & ageMaxEnd, const int& k); 27 | void addSequence(Sequence *s, Constraint *cst, const int& k); 28 | void simplifyTree(double minSup); 29 | int countSubsequence(double minSup); 30 | //Give an overview of this tree (paramètre prof==profondeur, interne) 31 | void print(); 32 | //Type of this event 33 | void getSubsequences(SEXP result,double * support, int *index, SEXP classname, EventDictionary * ed); 34 | void clearSupport() { 35 | this->child.clearSupport(); 36 | } 37 | }; 38 | 39 | #endif // PREFIXTREE_H 40 | -------------------------------------------------------------------------------- /man/famform.Rd: -------------------------------------------------------------------------------- 1 | \name{famform} 2 | \docType{data} 3 | \alias{famform} 4 | \title{Example data set: sequences of family formation} 5 | \description{ 6 | This data set contains 5 sequences of family formation histories, used by Elzinga (2008) to introduce several metrics for computing distances between sequences. These sequences don't contain information about the duration spent in each state, they contain only distinct successive states. 7 | } 8 | \details{ 9 | The sequences are in `STS' format and stored in character strings with states separated with `-'. The alphabet is made of the five tokens M, MC, S, SC, and U. 10 | 11 | This data set is used in TraMineR's manual to crosscheck some results with those presented by Elzinga. 12 | } 13 | \usage{data(famform)} 14 | \format{A data frame with 5 rows and 1 variable.} 15 | \source{Elzinga (2008)} 16 | \references{ 17 | Elzinga, Cees H. (2008). Sequence analysis: Metric representations of categorical time 18 | series. Non published manuscript. VU University, Amsterdam. 19 | } 20 | 21 | \examples{ 22 | data(famform) 23 | ff.seq <- seqdef(famform) 24 | seqiplot(ff.seq) 25 | } 26 | 27 | \author{Gilbert Ritschard and Alexis Gabadinho} 28 | \keyword{Datasets} 29 | -------------------------------------------------------------------------------- /src/distanceobject.cpp: -------------------------------------------------------------------------------- 1 | /* #include "distanceobject.h" 2 | 3 | DistanceObject::DistanceObject(SEXP magicIndexS, SEXP magicSeqS){ 4 | this->magicIndex=INTEGER(magicIndexS); 5 | this->magicSeq=INTEGER(magicSeqS); 6 | this->finalnseq=Rf_length(magicSeqS); 7 | PROTECT(ans = Rf_allocVector(REALSXP, (finalnseq*(finalnseq-1)/2))); 8 | result=REAL(ans); 9 | } 10 | DistanceObject::~DistanceObject(){ 11 | UNPROTECT(1); 12 | } 13 | 14 | void DistanceObject::setDistance(const int &is,const int &js, const double& cmpres){ 15 | int j_start=magicIndex[js]; 16 | int j_end=magicIndex[js+1]; 17 | int i_start=magicIndex[is]; 18 | int i_end=magicIndex[is+1]; 19 | int i_index, j_index, i, j, base_index; 20 | for(i=i_start;i (%d,%d)(%d) => %f \n",is,js,i_index,j_index,(base_index),cmpres); 28 | result[base_index]=cmpres; 29 | } 30 | } 31 | } 32 | } 33 | */ 34 | 35 | -------------------------------------------------------------------------------- /man/seqlength.Rd: -------------------------------------------------------------------------------- 1 | \name{seqlength} 2 | \alias{seqlength} 3 | \title{Sequence length} 4 | \description{ 5 | Returns the length of sequences. 6 | } 7 | \details{ 8 | The length of a sequence is computed by counting its number of non void elements, i.e. including non-void missing values. The \code{seqlength} function returns a vector containing the length of each sequence in the provided sequence object. 9 | } 10 | \usage{ 11 | seqlength(seqdata, with.missing=TRUE) 12 | } 13 | \arguments{ 14 | \item{seqdata}{a sequence object created with the \code{\link{seqdef}} function.} 15 | 16 | \item{with.missing}{logical: should non-void missing values be treated as a regular state? 17 | Default is \code{TRUE}. 18 | If \code{FALSE} missing values are considered as void.} 19 | } 20 | \seealso{\code{\link{seqlength.align}} 21 | } 22 | \examples{ 23 | ## Loading the 'famform' example data set 24 | data(famform) 25 | 26 | ## Defining a sequence object with the 'famform' data set 27 | ff.seq <- seqdef(famform) 28 | 29 | ## Retrieving the length of the sequences 30 | ## in the ff.seq sequence object 31 | seqlength(ff.seq) 32 | } 33 | 34 | \author{Alexis Gabadinho and Gilbert Ritschard} 35 | \keyword{Longitudinal characteristics} 36 | -------------------------------------------------------------------------------- /R/seqconc.R: -------------------------------------------------------------------------------- 1 | ## ======================================== 2 | ## Concatenates vectors of states or events 3 | ## into character strings 4 | ## ======================================== 5 | 6 | sconc <- function(seqdata, sep, void) { 7 | 8 | if (is.na(void)) 9 | vi <- !is.na(seqdata) 10 | else if (!is.null(void)) 11 | vi <- seqdata!=void 12 | else vi <- 1:length(seqdata) 13 | 14 | return(paste(seqdata[vi], collapse=sep)) 15 | } 16 | 17 | seqconc <- function (data, var=NULL, sep="-", vname="Sequence", void=NA) { 18 | 19 | if (inherits(data,"stslist")) { 20 | void <- attr(data,"void") 21 | cseq <- apply(data, 1, sconc, sep, void) 22 | cseq <- as.matrix(cseq) 23 | rownames(cseq) <- rownames(data) 24 | } 25 | else { 26 | seqdata <- seqxtract(data, var) 27 | 28 | if (seqdim(seqdata)[1]==1) 29 | cseq <- sconc(seqdata,sep, void) 30 | else 31 | cseq <- apply(seqdata, 1, sconc, sep, void) 32 | 33 | cseq <- as.matrix(cseq) 34 | 35 | ## Rows and column names for the output 36 | if (is.null(rownames(data))){ 37 | rownames(cseq) <- paste("[",seq(1:length(cseq)),"]",sep="") 38 | } else { 39 | rownames(cseq) <- rownames(data) 40 | } 41 | } 42 | 43 | colnames(cseq) <- vname 44 | 45 | return(cseq) 46 | } 47 | -------------------------------------------------------------------------------- /man/seqlength-align.Rd: -------------------------------------------------------------------------------- 1 | \name{seqlength.align} 2 | \alias{seqlength.align} 3 | \title{Align sequence length across domains} 4 | \description{ 5 | Sets lengths of sequences of multiple domains as the shortest lengths across domains. 6 | } 7 | \details{ 8 | Sequences in the sequence objects are assumed to be ordered conformably. The length of the \eqn{i}-th sequence in each domain is set as the length of the shortest \eqn{i}-th sequence of the domains. The reduction of length is done by filling end positions with voids. 9 | } 10 | \usage{ 11 | seqlength.align(seq.list) 12 | } 13 | \arguments{ 14 | \item{seq.list}{list of sequence objects (of class \code{stslist}) created with the \code{\link{seqdef}} function. The sequence objects must all have the same number of sequences.} 15 | } 16 | \seealso{\code{\link{seqlength}}} 17 | \examples{ 18 | ## Using the ex1 data set with sequences of different length 19 | data(ex1) 20 | s1 <- seqdef(ex1[,1:13]) 21 | seqlength(s1) 22 | 23 | ## sequence object s2 with a shorter 1st sequence 24 | 25 | s2 <- s1 26 | s2[1,8:13] <- attr(s2,"void") 27 | seqlength(s2) 28 | 29 | ## aligning sequence lengths 30 | seqlength.align(list(s1,s2)) 31 | 32 | } 33 | 34 | \author{Gilbert Ritschard} 35 | \keyword{Longitudinal characteristics} 36 | -------------------------------------------------------------------------------- /src/checktriangleineq.c: -------------------------------------------------------------------------------- 1 | #include "TraMineR.h" 2 | 3 | SEXP checktriangleineq(SEXP mat, SEXP matsize, SEXP tolS) { 4 | int n=INTEGER(matsize)[0]; 5 | double tol = REAL(tolS)[0]; 6 | SEXP ans; 7 | 8 | double *matrix=REAL(mat); 9 | // Rprintf("mlen = %i \n", mlen); 10 | // Rprintf("ilen = %i \n", ilen); 11 | int i, j, z, i_indiv, j_indiv; 12 | double d; 13 | for (i=0;i= tol) { 21 | PROTECT(ans = Rf_allocVector(INTSXP, 3)); 22 | INTEGER(ans)[0] =i+1; 23 | INTEGER(ans)[1] =j+1; 24 | INTEGER(ans)[2] =z+1; 25 | UNPROTECT(1); 26 | return ans; 27 | } 28 | 29 | } 30 | // Rprintf("index coord(%i,%i)=%f\n",indiv[i],indiv[j], distmatrix[TMRMATRIXINDEX(indiv[i],indiv[j],mlen)]); 31 | // Rprintf("cindex coord(%i,%i)=%f, (%i)\n",i,j, distmatrix[TMRMATRIXINDEX(indiv[i],indiv[j],mlen)],TMRMATRIXINDEX(indiv[i],indiv[j],mlen)); 32 | } 33 | } 34 | // Rprintf("Sum = %f\n",(*result)); 35 | return R_NilValue; 36 | // Rprintf("Inertia = %f\n",(*result)); 37 | } 38 | -------------------------------------------------------------------------------- /R/seqdecomp.R: -------------------------------------------------------------------------------- 1 | ## ===================================================== 2 | ## Translate sequences as character strings into vectors 3 | ## (one column (variable) for each state/event) 4 | ## ===================================================== 5 | 6 | seqdecomp <- function(data, var=NULL, sep="-", miss="NA", vnames=NULL) { 7 | 8 | rownames <- rownames(data) 9 | ## Extracting the sequences from the data set 10 | seqdata <- seqxtract(data, var) 11 | 12 | seqdata <- as.vector(seqdata) 13 | nbseq <- length(seqdata) 14 | 15 | ## Splitting the character strings 16 | tmp <- strsplit(seqdata, split=sep) 17 | 18 | ## We first look for the max sequence length 19 | sl <- sapply(tmp,length) 20 | lmax <- max(sl) 21 | 22 | sdecomp <- matrix(nrow=nbseq, ncol=lmax) 23 | if (is.null(rownames)){ 24 | rownames(sdecomp) <- paste("[",seq(1:nbseq),"]",sep="") 25 | } else { 26 | rownames(sdecomp) <- rownames 27 | } 28 | if (is.null(vnames)) 29 | colnames(sdecomp) <- paste("[",seq(1:lmax),"]",sep="") 30 | else 31 | colnames(sdecomp) <- vnames 32 | 33 | for (i in 1:nbseq) { 34 | seq <- tmp[[i]] 35 | seq[seq %in% miss] <- NA 36 | if (sl[i] < lmax) seq <- c(seq,rep(NA,lmax-sl[i])) 37 | sdecomp[i,] <- seq 38 | } 39 | 40 | return(sdecomp) 41 | 42 | } 43 | 44 | 45 | -------------------------------------------------------------------------------- /man/is.stslist.Rd: -------------------------------------------------------------------------------- 1 | \name{is.stslist} 2 | \alias{is.stslist} 3 | \title{Test if is a proper state sequence (stslist) object} 4 | \description{The function tests whether \code{x} is of class \code{stslist} and if its \code{weights} attribute has the expected length and names. 5 | } 6 | 7 | \usage{ 8 | is.stslist(x) 9 | } 10 | \arguments{ 11 | \item{x}{object to be tested.} 12 | } 13 | \value{ 14 | Logical: result of the test. 15 | } 16 | \seealso{ 17 | \code{\link{seqdef}} 18 | } 19 | \examples{ 20 | ## Creating a sequence object with the columns 13 to 24 21 | ## in the 'actcal' example data set 22 | data(biofam) 23 | biofam <- biofam[sample(nrow(biofam),300),] 24 | biofam.lab <- c("Parent", "Left", "Married", "Left+Marr", 25 | "Child", "Left+Child", "Left+Marr+Child", "Divorced") 26 | biofam.seq <- seqdef(biofam[,10:25], weights=biofam$wp00tbgs) 27 | 28 | is.stslist(biofam.seq) #TRUE 29 | 30 | attr(biofam.seq,"weights") <- NULL 31 | is.stslist(biofam.seq) #TRUE 32 | 33 | attr(biofam.seq,"weights") <- rep(1, nrow(biofam.seq)) 34 | is.stslist(biofam.seq) #FALSE 35 | 36 | w <- rep(1, nrow(biofam.seq)) 37 | names(w) <- rownames(biofam.seq) 38 | attr(biofam.seq,"weights") <- w 39 | is.stslist(biofam.seq) #TRUE 40 | 41 | } 42 | 43 | \author{Gilbert Ritschard} 44 | 45 | \keyword{State sequences} 46 | -------------------------------------------------------------------------------- /man/seqfposend.Rd: -------------------------------------------------------------------------------- 1 | \name{seqfposend} 2 | \alias{seqfposend} 3 | \title{End of first spell in given state} 4 | \description{Returns the position in the sequences of end of first spell in a given state 5 | } 6 | 7 | \usage{ 8 | seqfposend(seqdata, state, with.missing=FALSE, lead=0, from.seq.start=TRUE) 9 | } 10 | 11 | \arguments{ 12 | \item{seqdata}{State sequence object of class \code{stslist} as produced by \code{\link{seqdef}}.} 13 | \item{state}{Element of the alphabet of \code{seqdata}.} 14 | \item{with.missing}{Logical. Should non-void missing values be considered as regular states? See \code{\link{seqdss}} and \code{\link{seqdur}}.} 15 | \item{lead}{Integer. Value to be added to the end position.} 16 | \item{from.seq.start}{Logical. Should position be computed from the start of the sequence? Default is \code{TRUE}. If \code{FALSE}, position is computed from the start of the spell.} 17 | } 18 | 19 | \value{ 20 | Vector of integers giving position of end of spell in the sequences. 21 | } 22 | 23 | \seealso{ 24 | \code{\link{seqfpos}} 25 | } 26 | 27 | \author{Gilbert Ritschard} 28 | 29 | \examples{ 30 | ## End of spell in further education (FE) in first 10 mvad sequences 31 | 32 | data(mvad) 33 | m.seq <- seqdef(mvad[1:10,17:86]) 34 | seqfposend(m.seq, state="FE") 35 | 36 | } 37 | 38 | -------------------------------------------------------------------------------- /man/bfspell.Rd: -------------------------------------------------------------------------------- 1 | \name{bfspell} 2 | \docType{data} 3 | \alias{bfspell} 4 | \alias{bfspell20} 5 | \alias{bfpdata20} 6 | 7 | \title{Example data set: First 20 biofam sequences in SPELL form} 8 | 9 | \description{First 20 sequences of the \code{\link{biofam}} data set in SPELL form. The data serve to illustrate the use of \code{\link{seqformat}} for converting SPELL data into STS (horizontal) form. 10 | } 11 | 12 | 13 | \details{ 14 | The states are coded with the following short labels\cr 15 | \cr 16 | P = "Parent" \cr 17 | L = "Left" \cr 18 | M = "Married" \cr 19 | LM = "Left+Marr" \cr 20 | C = "Child" \cr 21 | LC = "Left+Child" \cr 22 | LMC = "Left+Marr+Child" \cr 23 | D = "Divorced" \cr 24 | \cr 25 | The data is a SPELL representation of \code{biofam[1:20,10:25]}, corresponding to 20 family life sequences between ages 15 and 30. 26 | } 27 | 28 | \usage{ 29 | data(bfspell) 30 | } 31 | 32 | \format{A data set with two data frames: \code{bfspell20} with one row per spell and \code{bfpdata20} with one row per id. The \code{bfspell20} data frame contains the spell data themselves (4 variables \code{id}, \code{begin}, \code{end}, \code{states}) and \code{bfpdata20} the year when aged 15 (2 variables \code{id}, \code{when15}). 33 | } 34 | \seealso{ 35 | \code{\link{biofam}} 36 | } 37 | \author{Gilbert Ritschard and Alexis Gabadinho} 38 | 39 | \keyword{Datasets} 40 | -------------------------------------------------------------------------------- /R/seqeweight.R: -------------------------------------------------------------------------------- 1 | ## ======================================== 2 | ## Get and set weight of eseq 3 | ## ======================================== 4 | 5 | seqeweight <- function(eseq, s) { 6 | 7 | TraMineR.check.depr.args(alist(eseq = s)) 8 | 9 | seqeweight.internal<-function(eseq){ 10 | if(is.eseq(eseq)) { 11 | return(.Call(C_tmrsequencegetweight, eseq)) 12 | } 13 | return(-1) 14 | } 15 | 16 | if (is.seqelist(eseq)) { 17 | as.numeric(sapply(unlist(eseq),seqeweight.internal)) 18 | }else if(is.eseq(eseq)) { 19 | as.numeric(seqeweight.internal(eseq)) 20 | } else { 21 | stop(" [!] eseq should be a seqelist. See help on seqecreate.") 22 | } 23 | } 24 | 25 | "seqeweight<-" <- function(eseq, s, value) { 26 | 27 | TraMineR.check.depr.args(alist(eseq = s)) 28 | 29 | if(!is.seqelist(eseq)) { 30 | stop(" [!] eseq should be a seqelist. See help on seqecreate.") 31 | } 32 | if(length(eseq)!=length(value)) { 33 | stop(" [!] eseq and weights should be of the same size.") 34 | } 35 | .Call(C_tmrsequencesetweight, eseq, as.double(value)) 36 | return(eseq) 37 | } 38 | 39 | seqeisweighted <- function(eseq, s) { 40 | 41 | TraMineR.check.depr.args(alist(eseq = s)) 42 | 43 | if(!is.seqelist(eseq)) { 44 | stop(" [!] eseq should be a seqelist. See help on seqecreate.") 45 | } 46 | weights <- seqeweight(eseq) 47 | return(any(weights!=1)) 48 | } 49 | -------------------------------------------------------------------------------- /R/seqnum.R: -------------------------------------------------------------------------------- 1 | ## ======================================== 2 | ## Change the alphabet of a sequence object 3 | ## ======================================== 4 | 5 | seqnum <- function(seqdata, with.missing=FALSE) { 6 | if (!inherits(seqdata,"stslist")) 7 | stop("data is not a sequence object, see seqdef function to create one") 8 | 9 | alphabet.orig <- attr(seqdata,"alphabet") 10 | levels.orig <- levels(seqdata[[1]]) 11 | 12 | nbstat <- length(alphabet.orig) 13 | 14 | alphabet.new <- 0:(nbstat-1) 15 | levels.new <- alphabet.new 16 | 17 | if (with.missing) { 18 | ## Changing missing code into numerical value 19 | nr.new <- which(levels.orig==attr(seqdata,"nr"))-1 20 | levels.new <- c(levels.new, nr.new) 21 | attr(seqdata,"nr") <- nr.new 22 | } 23 | else 24 | levels.new <- c(levels.new, attr(seqdata,"nr")) 25 | 26 | levels.new <- c(levels.new,attr(seqdata,"void")) 27 | 28 | if (length(alphabet.new)!=nbstat) 29 | stop("lengths of old and new alphabet are different") 30 | 31 | #for (i in 1:seqdim(seqdata)[2]) 32 | # #seqdata[,i] <- factor(seqdata[,i], levels=levels.orig, labels=levels.new) 33 | # seqdata[[i]] <- factor(seqdata[[i]], levels=levels.orig, labels=levels.new) 34 | 35 | seqdata[] <- lapply(seqdata, factor, levels=levels.orig, labels=levels.new) 36 | 37 | attr(seqdata,"alphabet") <- alphabet.new 38 | 39 | return(seqdata) 40 | 41 | } 42 | -------------------------------------------------------------------------------- /R/diss.rep-methods.R: -------------------------------------------------------------------------------- 1 | ## =========================== 2 | ## Methods for stsstatd objects 3 | ## =========================== 4 | 5 | print.diss.rep <- function(x, ...) { 6 | criterion <- attr(x,"criterion") 7 | n <- attr(x,"n") 8 | quality <- attr(x,"Quality") 9 | 10 | cat(" [>] criterion:",criterion,"\n") 11 | cat(" [>]", n,"objects in the original data set\n") 12 | cat(" [>]", length(x),"representative(s)\n") 13 | cat(" [>] overall quality:", round(quality*100,2),"\n") 14 | cat(" [>] representative(s) index(es):", x[1:length(x)],"\n") 15 | } 16 | 17 | summary.diss.rep <- function(object, ...) { 18 | criterion <- attr(object,"criterion") 19 | n <- attr(object,"n") 20 | 21 | cat(" [>] criterion:",criterion,"\n") 22 | cat(" [>]", n,"objects in the original data set\n") 23 | cat(" [>]", nrow(object),"representative object(s)\n") 24 | cat(" [>] statistics for the representative set:\n\n") 25 | print(attr(object,"Statistics"), digits=3, ...) 26 | cat("\n na: number of assigned objects\n") 27 | cat(" nb: number of objects in the neighborhood\n") 28 | cat(" SD: sum of the na distances to the representative\n") 29 | cat(" MD: mean of the na distances to the representative\n") 30 | cat(" DC: sum of the na distances to the center of the complete set\n") 31 | cat(" V: discrepancy of the subset\n") 32 | cat(" Q: quality of the representative\n") 33 | } 34 | -------------------------------------------------------------------------------- /man/seqdecomp.Rd: -------------------------------------------------------------------------------- 1 | \name{seqdecomp} 2 | \alias{seqdecomp} 3 | \title{Convert a character string into a vector of states or events} 4 | \description{ 5 | States can be represented by any substring that does not include the \code{sep} value. An empty separator \code{sep = ""} can only be used when each state is coded with a single character. 6 | } 7 | \usage{ 8 | seqdecomp(data, var=NULL, sep='-', miss="NA", vnames=NULL) 9 | } 10 | \arguments{ 11 | \item{data}{a dataframe, matrix, or character string vector containing sequence data (tibble will be converted with \code{as.data.frame}).} 12 | \item{var}{the list of columns containing the sequences. Default is \code{NULL}, ie all the columns. Whether the sequences are in the compressed (character strings) or extended format is automatically detected by counting the number of columns.} 13 | \item{sep}{the between states/events separator used in the input data set. Default is '\code{-}'.} 14 | \item{miss}{the symbol for missing values (if any) used in the input data set. Default is \code{NA}.} 15 | \item{vnames}{optional names for the column/variables of the output data set. Default is \code{NULL}.} 16 | } 17 | \seealso{ 18 | \code{\link{seqconc}}. 19 | } 20 | \examples{ 21 | # 1 sequence of length 4 22 | seqdecomp("A-BB-C-DD") 23 | 24 | # 2 sequences of length 6 25 | seqdecomp(c("ABBCDD","BBCCAD"),sep="") 26 | } 27 | \keyword{Data handling} 28 | -------------------------------------------------------------------------------- /R/seqtransn.R: -------------------------------------------------------------------------------- 1 | ## ===================================== 2 | ## Number of transitions in the sequence 3 | ## ===================================== 4 | 5 | seqtransn <- function(seqdata, with.missing=FALSE, norm=FALSE, pweight=FALSE) { 6 | 7 | if (!inherits(seqdata,"stslist")) 8 | stop("data is NOT a sequence object, see seqdef function to create one") 9 | 10 | ## Number of transitions 11 | dss <- seqdss(seqdata, with.missing=with.missing) 12 | dssl <- seqlength(dss) 13 | nbseq <- nrow(dss) 14 | 15 | if (pweight) { 16 | tr <- seqtrate(seqdata) 17 | dss.num <- seqasnum(dss)+1 18 | trans <- matrix(0, nrow=nbseq, ncol=1) 19 | rownames(trans) <- rownames(seqdata) 20 | 21 | for (i in 1:nbseq) { 22 | if (dssl[i]>1) { 23 | for (j in 2:dssl[i]) { 24 | trans[i] <- trans[i] + (1-tr[dss.num[i,j-1], dss.num[i,j]]) 25 | } 26 | } 27 | } 28 | } 29 | else { 30 | trans <- dssl-1 31 | if (any(dssl==0)) { 32 | trans[dssl==0] <- 0 33 | } 34 | } 35 | 36 | if (norm) { 37 | seql <- seqlength(seqdata) 38 | trans <- trans/(seql-1) 39 | if (any(seql==1)) { 40 | trans[seql==1] <- 0 41 | } 42 | } 43 | 44 | colnames(trans) <- "Trans." 45 | 46 | return(trans) 47 | } 48 | 49 | trans.pweight <- function(seqdata, tr) { 50 | res <- 0 51 | 52 | for (i in 2:seqlength(seqdata)) { 53 | res <- res + (1-tr[seqdata[i-1], seqdata[i]]) 54 | } 55 | 56 | return(res) 57 | } 58 | -------------------------------------------------------------------------------- /man/disstree-get-rules.Rd: -------------------------------------------------------------------------------- 1 | \name{disstree.get.rules} 2 | \alias{disstree.get.rules} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{Tree classification rules} 5 | \description{ 6 | Tree classification rules. 7 | } 8 | \usage{ 9 | disstree.get.rules(tree, collapse="; ") 10 | } 11 | %- maybe also 'usage' for other objects documented here. 12 | \arguments{ 13 | \item{tree}{A tree (\code{disstree} or \code{DissTreeNode} object).} 14 | \item{collapse}{Character string. Separator between categories in class of categorical values.} 15 | } 16 | \details{ 17 | \code{disstree.get.rules} extracts the classification rules defined by a tree grown from a dissimilarity matrix and returns them as a vector of character strings. The rules are expressed as R commands and the i-th rule, for example, can be applied using \code{\link{eval}(parse(text=rule[i]))}. Rules are built through a call to \code{\link{disstreeleaf}}. 18 | } 19 | 20 | \value{Character vector with the rules as R commands and an attribute \code{covariates} providing the names of the variables involved in the rules. 21 | } 22 | 23 | 24 | \seealso{\code{\link{disstree}} and examples therein, \code{\link{disstreeleaf}}, \code{\link{disstree.assign}}} 25 | \author{Gilbert Ritschard)} 26 | % Add one or more standard keywords, see file 'KEYWORDS' in the 27 | % R documentation directory. 28 | \keyword{Dissimilarity-based analysis} 29 | -------------------------------------------------------------------------------- /src/OMPerdistance.h: -------------------------------------------------------------------------------- 1 | #ifndef OMPERDISTANCECALCULATOR_H 2 | #define OMPERDISTANCECALCULATOR_H 3 | #include "OMdistance.h" 4 | 5 | class OMPerdistance: public OMdistance{ 6 | double timecost; 7 | double * seqdur; 8 | double * indellist; 9 | int * seqlen; 10 | public: 11 | OMPerdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS); 12 | OMPerdistance(OMPerdistance *dc); 13 | virtual void setParameters(SEXP params); 14 | virtual ~OMPerdistance(); 15 | virtual double distance(const int&is, const int& js); 16 | inline double getIndel(const int& indice, const int& state){ 17 | return this->indellist[state]+timecost*(seqdur[indice]); 18 | } 19 | virtual DistanceCalculator* copy(){return new OMPerdistance(this);} 20 | inline double getSubCost(const int& i_state, const int& j_state, const int& i_state_indice, const int& j_state_indice){ 21 | 22 | if(i_state==j_state){ 23 | double diffdur= seqdur[i_state_indice]-seqdur[j_state_indice]; 24 | if(diffdur<0) { 25 | return -1.0*(timecost*diffdur); 26 | }else{ 27 | return(timecost*diffdur); 28 | } 29 | }else{ 30 | //double commondur =fmin2(seqdur[i_state_indice], seqdur[j_state_indice]); 31 | //return(commondur*scost[MINDICE(i_state, j_state, alphasize)] + diffdur*timecost); 32 | return(scost[MINDICE(i_state, j_state, alphasize)] + (seqdur[i_state_indice]+seqdur[j_state_indice])*timecost); 33 | } 34 | } 35 | }; 36 | 37 | #endif 38 | -------------------------------------------------------------------------------- /man/seqpm.Rd: -------------------------------------------------------------------------------- 1 | \name{seqpm} 2 | \alias{seqpm} 3 | \title{Find substring patterns in sequences} 4 | \description{ 5 | Search for a pattern (substring) into sequences. 6 | } 7 | \usage{ 8 | seqpm(seqdata, pattern, sep="") 9 | } 10 | \arguments{ 11 | \item{seqdata}{a sequence object as defined by the \code{\link{seqdef}} function.} 12 | \item{pattern}{a character string representing the pattern (substring) to search for.} 13 | \item{sep}{state separator used in the pattern definition.} 14 | } 15 | \details{ 16 | This function searches a pattern (a character string) into a set of sequences and returns the results as a list with two elements: '\code{Nbmatch}' the number of occurrences of the pattern and '\code{MatchesIndex}' the vector of indexes (row numbers) of the sequences that match the pattern (see examples below). 17 | } 18 | 19 | \value{a list with two elements (see details).} 20 | 21 | %\seealso{} 22 | \examples{ 23 | data(actcal) 24 | actcal.seq <- seqdef(actcal,13:24) 25 | 26 | ## search for pattern "DAAD" 27 | ## (no work-full time work-full time work-no work) 28 | ## results are stored in the 'daad' object 29 | daad <- seqpm(actcal.seq,"DAAD") 30 | 31 | ## Looking at the sequences 32 | ## containing the pattern 33 | actcal.seq[daad$MIndex,] 34 | 35 | ## search for pattern "AD" 36 | ## (full time work-no work) 37 | seqpm(actcal.seq,"AD") 38 | } 39 | 40 | \author{Alexis Gabadinho} 41 | \keyword{State sequences} 42 | -------------------------------------------------------------------------------- /R/TraMineR-legend.R: -------------------------------------------------------------------------------- 1 | ## ==================== 2 | ## Plotting the legend 3 | ## ==================== 4 | 5 | TraMineR.legend <- function(pos, text, colors, cex=1, leg.ncol = NULL, ... ) { 6 | 7 | nbstat <- length(text) 8 | 9 | ## Computing some parameters for the legend's plotting ' 10 | 11 | if (is.null(leg.ncol)) { 12 | if (pos=="bottom") { 13 | if (nbstat > 6) 14 | nbcol <- 3 15 | else 16 | nbcol <- 2 17 | 18 | leg.ncol <- ceiling(nbstat/nbcol) 19 | } 20 | else 21 | leg.ncol <- 1 22 | } 23 | 24 | ## leg.inset <- -0.2 + ((2-leg.ncol)*0.025) 25 | 26 | ## Setting graphical parameters while saving them in savepar 27 | savepar <- par(mar = c(1, 1, 0.5, 1) + 0.1, xpd=FALSE) 28 | 29 | ## Restoring graphical parameters 30 | on.exit(par(savepar)) 31 | 32 | plot(0, type = "n", axes = FALSE, xlab = "", ylab = "") 33 | ## legend(position, fill = cpal, legend = ltext, cex = fontsize) 34 | 35 | oolist <- list(...) 36 | oolist <- oolist[!names(oolist) %in% c("x","y","legend","ncol","fill","border", "lty", "lwd")] 37 | 38 | legargs <- list(x=pos, legend=text, fill=colors, ncol=leg.ncol, cex=cex, border="black") 39 | legargs <- c(legargs,oolist) 40 | 41 | do.call(legend, legargs) 42 | 43 | 44 | ## legend(pos, 45 | ## ## inset=c(0,leg.inset), 46 | ## legend=text, 47 | ## fill=colors, 48 | ## ncol=leg.ncol, 49 | ## ##bty="o", 50 | ## cex=cex, 51 | ## ...) 52 | 53 | } 54 | -------------------------------------------------------------------------------- /R/stslist.rep-methods.R: -------------------------------------------------------------------------------- 1 | ## =========================== 2 | ## Methods for stsstatd objects 3 | ## =========================== 4 | 5 | print.stslist.rep <- function(x, ...) { 6 | criterion <- attr(x,"criterion") 7 | nbseq <- attr(x,"nbseq") 8 | quality <- attr(x,"Quality") 9 | 10 | cat("\n [>] criterion:",criterion,"\n") 11 | cat(" [>]", nbseq,"sequence(s) in the original data set\n") 12 | cat(" [>]", nrow(x),"representative sequence(s)\n") 13 | cat(" [>] overall quality:", round(quality*100,2),"\n\n") 14 | NextMethod(x,...) 15 | } 16 | 17 | summary.stslist.rep <- function(object, ...) { 18 | criterion <- attr(object,"criterion") 19 | nbseq <- attr(object,"nbseq") 20 | quality <- attr(object,"Quality") 21 | 22 | cat("\n [>] criterion:",criterion,"\n") 23 | cat(" [>]", nbseq,"sequence(s) in the original data set\n") 24 | cat(" [>]", nrow(object),"representative sequences\n") 25 | cat(" [>] overall quality:", quality,"\n") 26 | cat(" [>] statistics for the representative set:\n\n") 27 | print(attr(object,"Statistics"), digits=3, ...) 28 | cat("\n na: number of assigned objects\n") 29 | cat(" nb: number of objects in the neighborhood\n") 30 | cat(" SD: sum of the na distances to the representative\n") 31 | cat(" MD: mean of the na distances to the representative\n") 32 | cat(" DC: sum of the na distances to the center of the complete set\n") 33 | cat(" V: discrepancy of the subset\n") 34 | cat(" Q: quality of the representative\n") 35 | } 36 | -------------------------------------------------------------------------------- /R/TraMineR-group.R: -------------------------------------------------------------------------------- 1 | ## Returns a factor from one variable or a list of variable 2 | ## some code taken from the tapply function 3 | 4 | group <- function (INDEX, factor=TRUE) { 5 | 6 | if (!is.factor(INDEX)) { 7 | if (!is.list(INDEX)) { INDEX <- list(INDEX) } 8 | 9 | il <- sapply(INDEX, length) 10 | 11 | if (min(il)!=max(il)) { 12 | stop(" [!] all factors must have the same length") 13 | } 14 | 15 | if (factor) { 16 | group <- rep("", length(INDEX[[1]])) 17 | for (i in seq_along(INDEX)) { 18 | index <- as.factor(INDEX[[i]]) 19 | group <- paste(group, index) 20 | group[is.na(index)] <- NA 21 | } 22 | group <- as.factor(group) 23 | } 24 | else { 25 | ## fromt apply 26 | nI <- length(INDEX) 27 | 28 | namelist <- vector("list", nI) 29 | names(namelist) <- names(INDEX) 30 | extent <- integer(nI) 31 | one <- 1L 32 | 33 | group <- rep.int(one, length(INDEX[[1]])) 34 | ngroup <- one 35 | 36 | for (i in seq_along(INDEX)) { 37 | index <- as.factor(INDEX[[i]]) 38 | namelist[[i]] <- levels(index) 39 | extent[i] <- nlevels(index) 40 | group <- group + ngroup * (as.integer(index) - one) 41 | ngroup <- ngroup * nlevels(index) 42 | } 43 | } 44 | } else { 45 | ## Eliminate the unused levels 46 | uf <- unique(INDEX) 47 | fl <- levels(INDEX)[levels(INDEX) %in% uf] 48 | group <- factor(INDEX, levels=fl) 49 | } 50 | 51 | return(group) 52 | } 53 | 54 | -------------------------------------------------------------------------------- /src/constraint.cpp: -------------------------------------------------------------------------------- 1 | #include "constraint.h" 2 | /* 3 | * constraint.cpp 4 | * 5 | * Created on: Jun 6, 2011 6 | * Author: nmuller modified by buergin 7 | */ 8 | 9 | Constraint::Constraint(const double &mg, 10 | const double &ws, 11 | const double &aminb, 12 | const double &amaxb, 13 | const double &amaxe, 14 | const int &cmethod) 15 | { 16 | // if (wSize==-1)wSize=DBL_MAX; 17 | // if (mGap==-1)mGap=DBL_MAX; 18 | // if (aMax==-1)aMax=DBL_MAX; 19 | // if (aMaxEnd==-1)aMaxEnd=DBL_MAX; 20 | if (mg==-1) 21 | { 22 | this->maxGap=DBL_MAX; 23 | } else { 24 | this->maxGap=mg; 25 | } 26 | if (ws==-1) 27 | { 28 | this->windowSize=DBL_MAX; 29 | } else { 30 | this->windowSize=ws; 31 | } 32 | 33 | if (aminb==-1) 34 | { 35 | this->ageMinBegin=-DBL_MAX; 36 | } else { 37 | this->ageMinBegin=aminb; 38 | } 39 | 40 | if (amaxb==-1) 41 | { 42 | this->ageMaxBegin=DBL_MAX; 43 | } else { 44 | this->ageMaxBegin=amaxb; 45 | } 46 | if (amaxe==-1) 47 | { 48 | this->ageMaxEnd=DBL_MAX; 49 | } else { 50 | this->ageMaxEnd=amaxe; 51 | } 52 | if (cmethod==-1) 53 | { 54 | this->countMethod=1; 55 | } else { 56 | this->countMethod=cmethod; 57 | } 58 | // this->maxGap = mg; 59 | // this->windowSize = ws; 60 | // this->ageMinBegin = aminb; 61 | // this->ageMaxBegin = amaxb; 62 | // this->ageMaxEnd = amaxe; 63 | } 64 | 65 | 66 | -------------------------------------------------------------------------------- /R/seqformat-STS_to_SPS.R: -------------------------------------------------------------------------------- 1 | # Should only be used through seqformat() 2 | 3 | ## ============================== 4 | ## Convert from STS to SPS format 5 | ## ============================== 6 | 7 | STS_to_SPS <- function(seqdata, spsformat, 8 | left=NA, right="DEL", gaps=NA, missing=NA, void="%", nr="*") { 9 | 10 | nbseq <- seqdim(seqdata)[1] 11 | maxsl <- seqdim(seqdata)[2] 12 | 13 | out <- matrix(NA, nrow=nbseq, ncol=maxsl) 14 | 15 | if (is.null(rownames(seqdata))) { 16 | rownames(out) <- paste("[",seq(1:nbseq),"]",sep="") 17 | } else { 18 | rownames(out) <- rownames(seqdata) 19 | } 20 | colnames(out) <- paste("[",seq(1:maxsl),"]",sep="") 21 | 22 | ## Defining the format options 23 | prefix <- substring(spsformat$xfix,1,1) 24 | suffix <- substring(spsformat$xfix,2,2) 25 | stdursep <- spsformat$sdsep 26 | 27 | ## PREPARING THE DATA 28 | seqdata <- as.matrix(seqdata) 29 | seqdata <- seqprep(seqdata, missing=missing, left=left, gaps=gaps, right=right, void=void, nr=nr) 30 | 31 | for (i in 1:nbseq) { 32 | idx <- 1 33 | j <- 1 34 | 35 | tmpseq <- seqdata[i,] 36 | sl <- TraMineR.length(tmpseq, void) 37 | 38 | while (j <= sl) { 39 | iseq <- tmpseq[j] 40 | 41 | dur <- 1 42 | while (j < sl & tmpseq[j+1]==iseq) { 43 | dur <- dur+1 44 | j <- j+1 45 | } 46 | 47 | ## adding suffix 48 | sps <- paste(prefix, iseq, stdursep, dur, suffix, sep="") 49 | 50 | out[i,idx] <- sps 51 | 52 | j <- j+1 53 | idx <- idx+1 54 | } 55 | } 56 | 57 | return(out) 58 | } 59 | -------------------------------------------------------------------------------- /src/eventdictionary.cpp: -------------------------------------------------------------------------------- 1 | #include "eventdictionary.h" 2 | 3 | EventDictionary::EventDictionary(SEXP flist) :numseq(0){ 4 | for (int i = 0; i < Rf_length(flist); i++) { 5 | this->insert(std::make_pair(i+1,std::string(CHAR(STRING_ELT(flist, i)))));// add to respect R indice system 6 | } 7 | //ctor 8 | } 9 | 10 | EventDictionary::~EventDictionary() { 11 | //dtor 12 | } 13 | 14 | bool EventDictionary::codeExists(const int &code) const { 15 | return this->find(code)==this->end(); 16 | } 17 | /*int EventDictionary::sprint(char * buffer, const char* start, const int&code) const { 18 | const_iterator it=this->find(code); 19 | if (it!=this->end()) { 20 | return sprintf(buffer,"%s%s",start,it->second.c_str()); 21 | } 22 | return sprintf(buffer,"%s%i",start,code); 23 | }*/ 24 | 25 | SEXP EventDictionary::getDictionary()const{ 26 | SEXP ret; 27 | int s=this->size(); 28 | //REprintf((char*)"size %i\n",s); 29 | PROTECT(ret = Rf_allocVector(STRSXP, s)); 30 | for(const_iterator it=this->begin();it!=this->end();it++){ 31 | //REprintf((char*)"Code %i=%s\n",it->first,it->second.c_str()); 32 | if(it->first<=s){ 33 | SET_STRING_ELT(ret, it->first-1, Rf_mkChar(it->second.c_str())); 34 | //REprintf((char*)"Code %i=%s\n",it->first,it->second.c_str()); 35 | } 36 | } 37 | UNPROTECT(1); 38 | return ret; 39 | 40 | } 41 | 42 | void EventSet::add(SEXP elist){ 43 | int * code=INTEGER(elist); 44 | for (int i = 0; i < Rf_length(elist); i++) { 45 | this->insert(code[i]);// add to respect R indice system 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/OMPerdistanceII.h: -------------------------------------------------------------------------------- 1 | #ifndef OMPERDISTANCEIICALCULATOR_H 2 | #define OMPERDISTANCEIICALCULATOR_H 3 | #include "OMdistance.h" 4 | 5 | class OMPerdistanceII: public OMdistance{ 6 | double timecost; 7 | double * seqdur; 8 | double * indellist; 9 | double * tokdeplist; 10 | int * seqlen; 11 | public: 12 | OMPerdistanceII(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS); 13 | OMPerdistanceII(OMPerdistanceII *dc); 14 | virtual void setParameters(SEXP params); 15 | virtual ~OMPerdistanceII(); 16 | virtual double distance(const int&is, const int& js); 17 | inline double getIndel(const int& indice, const int& state){ 18 | return this->indellist[state]+timecost*tokdeplist[state]*(seqdur[indice]); 19 | } 20 | virtual DistanceCalculator* copy(){return new OMPerdistanceII(this);} 21 | inline double getSubCost(const int& i_state, const int& j_state, const int& i_state_indice, const int& j_state_indice){ 22 | 23 | if(i_state==j_state){ 24 | double diffdur= seqdur[i_state_indice]-seqdur[j_state_indice]; 25 | if(diffdur<0) { 26 | return -1.0*(timecost*diffdur*tokdeplist[i_state]); 27 | }else{ 28 | return(timecost*diffdur*tokdeplist[i_state]); 29 | } 30 | }else{ 31 | //double commondur =fmin2(seqdur[i_state_indice], seqdur[j_state_indice]); 32 | //return(commondur*scost[MINDICE(i_state, j_state, alphasize)] + diffdur*timecost); 33 | return(scost[MINDICE(i_state, j_state, alphasize)] + (tokdeplist[i_state]*seqdur[i_state_indice]+tokdeplist[j_state]*seqdur[j_state_indice])*timecost); 34 | } 35 | } 36 | }; 37 | 38 | #endif 39 | -------------------------------------------------------------------------------- /man/disstree-assign.Rd: -------------------------------------------------------------------------------- 1 | \name{disstree.assign} 2 | \alias{disstree.assign} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{Assign rules to profiles provided} 5 | \description{ 6 | Find the tree classification rule that applies to provided cases. 7 | } 8 | \usage{ 9 | disstree.assign(rules, profile, covar=attr(rules,"covariates")) 10 | } 11 | %- maybe also 'usage' for other objects documented here. 12 | \arguments{ 13 | \item{rules}{Character vector. List of classification rules such as those returned by \code{\link{disstree.get.rules}}.} 14 | \item{profile}{Data frame. Profiles of cases to be classified with the rules.} 15 | \item{covar}{Character vector. List of names of covariates used by the rules.} 16 | } 17 | \details{ 18 | \code{rules} must be given as strings of R commands. Use \code{\link{disstree.get.rules}} to get the classification rules of a tree of class \code{disstree}. 19 | 20 | Rules are expected to create a full partition of the space of possible values of the covariates, i.e., any profile must satisfy one and only one of the rules. 21 | 22 | } 23 | 24 | \value{A vector of length equal to the number of rows of \code{profile} with for each case the index of the rule that applies. 25 | } 26 | 27 | 28 | \seealso{\code{\link{disstree}} and examples therein, \code{\link{disstreeleaf}}, \code{\link{disstree.get.rules}}} 29 | \author{Gilbert Ritschard)} 30 | % Add one or more standard keywords, see file 'KEYWORDS' in the 31 | % R documentation directory. 32 | \keyword{Dissimilarity-based analysis} 33 | -------------------------------------------------------------------------------- /man/print.stslist.Rd: -------------------------------------------------------------------------------- 1 | \name{print.stslist} 2 | \alias{print.stslist} 3 | \title{Print method for state sequence objects} 4 | \description{ 5 | This is the print method for state sequence objects of class \code{stslist} created by the \code{seqdef} function. 6 | } 7 | \usage{ 8 | \method{print}{stslist}(x, format='STS', extended=FALSE, ...) 9 | } 10 | \arguments{ 11 | \item{x}{A state sequence (\code{stslist}) object.} 12 | 13 | \item{format}{String: print format. One of \code{"STS"} (default) or \code{"SPS"}.} 14 | 15 | \item{extended}{Logical: should the output be printed in extended matrix form?} 16 | 17 | \item{...}{Additional print arguments.} 18 | } 19 | 20 | \seealso{\code{\link{seqdef}}, \code{\link{plot.stslist}} 21 | } 22 | 23 | \examples{ 24 | ## Defining a sequence object with the data in columns 10 to 25 25 | ## (family status from age 15 to 30) in the biofam data set 26 | data(biofam) 27 | biofam <- biofam[500:600,] ## using a subsample only 28 | biofam.lab <- c("Parent", "Left", "Married", "Left+Marr", 29 | "Child", "Left+Child", "Left+Marr+Child", "Divorced") 30 | biofam.shortlab <- c("P","L","M","LM","C","LC","LMC","D") 31 | biofam.seq <- seqdef(biofam, 10:25, states=biofam.shortlab, 32 | labels=biofam.lab) 33 | 34 | ## Print of first 5 sequences 35 | print(biofam.seq[1:5,]) 36 | print(biofam.seq[1:5,], extended=TRUE) 37 | print(biofam.seq[1:5,], format="SPS") 38 | print(biofam.seq[1:5,], format="SPS", SPS.out = list(xfix = "", sdsep = "/")) 39 | 40 | } 41 | 42 | \author{Gilbert Ritschard} 43 | \keyword{Method} 44 | \keyword{State sequences} 45 | -------------------------------------------------------------------------------- /R/seqdur.R: -------------------------------------------------------------------------------- 1 | ## ======================================== 2 | ## Extracts states durations from sequences 3 | ## ======================================== 4 | 5 | seqdur <- function(seqdata, with.missing=FALSE) { 6 | 7 | if (!inherits(seqdata,"stslist")) 8 | stop("data is not a sequence object, see seqdef function to create one") 9 | 10 | nbseq <- nrow(seqdata) 11 | sl <- seqlength(seqdata, with.missing=TRUE) 12 | 13 | maxsl <- max(sl) 14 | trans <- matrix(nrow=nbseq, ncol=maxsl) 15 | rownames(trans) <- rownames(seqdata) 16 | colnames(trans) <- paste("DUR",1:maxsl, sep="") 17 | 18 | seqdatanum <- seqasnum(seqdata, with.missing=with.missing) 19 | #if (!with.missing) 20 | seqdatanum[is.na(seqdatanum)] <- -99 21 | 22 | maxcol <- 0 23 | for (i in 1:nbseq) { 24 | idx <- 1 25 | j <- 1 26 | 27 | tmpseq <- seqdatanum[i,] 28 | 29 | while (idx <= sl[i] && tmpseq[idx]==-99) idx <- idx + 1 ## Skipping initial -99 values 30 | 31 | while (idx <= sl[i]) { 32 | iseq <- tmpseq[idx] 33 | dur <- 1 34 | 35 | while (idx < sl[i] && (tmpseq[idx+1]==iseq || tmpseq[idx+1]==-99)) { 36 | if (tmpseq[idx+1]!=-99) dur <- dur+1 37 | idx <- idx+1 38 | } 39 | 40 | ## The range of the numeric alphabet 41 | ## obtained with seqasnum is 0..n 42 | if (iseq!=-99) { 43 | trans[i,j] <- dur 44 | j <- j+1 45 | } 46 | 47 | idx <- idx+1 48 | } 49 | if (j>maxcol) {maxcol <- j} 50 | } 51 | ## drop=FALSE ensures that the result is a matrix even if trans has only one row 52 | trans <- trans[,1:(maxcol-1), drop=FALSE] 53 | 54 | return(trans) 55 | } 56 | -------------------------------------------------------------------------------- /man/plot.subseqelist.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.subseqelist} 2 | \alias{plot.subseqelist} 3 | \title{Plot frequencies of subsequences} 4 | \description{ 5 | Plot frequencies of subsequences. 6 | } 7 | \usage{ 8 | \method{plot}{subseqelist}(x, freq=NULL,cex=1,...) 9 | } 10 | %- maybe also 'usage' for other objects documented here. 11 | \arguments{ 12 | \item{x}{The subsequences to plot (a \code{subseqelist} object} 13 | \item{freq}{The frequencies to plot, support if \code{NULL}} 14 | \item{cex}{Plotting text and symbols magnification. See \code{\link{par}}.} 15 | % \item{ylim}{A 2 length vector indicating minimum and maximum y values (by default to c(0,1))} 16 | \item{\dots}{arguments passed to \code{\link{barplot}}} 17 | } 18 | %\details{} 19 | %\references{ ~put references to the literature/web site here ~ } 20 | %\author{ ~~who you are~~ } 21 | %\note{ ~~further notes~~ 22 | 23 | % ~Make other sections like Warning with \section{Warning }{....} ~ 24 | %} 25 | \seealso{ 26 | \code{\link{seqefsub}} 27 | } 28 | \examples{ 29 | ## loading data 30 | data(actcal.tse) 31 | 32 | ## creating sequences 33 | actcal.eseq <- seqecreate(actcal.tse) 34 | 35 | ## Looking for frequent subsequences 36 | fsubseq <- seqefsub(actcal.eseq,pmin.support=0.01) 37 | 38 | ## Frequence of first ten subsequences 39 | plot(fsubseq[1:10], cex=2) 40 | plot(fsubseq[1:10]) 41 | 42 | } 43 | \author{Matthias Studer (with Gilbert Ritschard for the help page)} 44 | % Add one or more standard keywords, see file 'KEYWORDS' in the 45 | % R documentation directory. 46 | \keyword{Event sequences} 47 | \keyword{Method} 48 | -------------------------------------------------------------------------------- /man/str.eseq.Rd: -------------------------------------------------------------------------------- 1 | \name{str.eseq} 2 | \alias{str.eseq} 3 | \alias{str.seqelist} 4 | \alias{as.character.eseq} 5 | \alias{as.character.seqelist} 6 | 7 | \title{String representation of an event sequence object} 8 | \description{ 9 | Methods to get a string representation of an event sequence or event subsequence object. 10 | These are the generic \code{str} and \code{as.character} methods for objects of class \code{eseq} (single event sequence) and \code{seqelist} (list of event sequences). 11 | } 12 | \details{ 13 | String representations have the following form: 14 | \code{time-(e1,e2,...)-elapsedtime-(e3,...)...} 15 | where \code{time} is the time elapsed from start to the first set of simultaneous events \code{(e1,e2,...)} and \code{elapsedtime} the time between two consecutive sets of simultaneous events. Time is only displayed when events are time stamped. 16 | Use \code{str} (\code{str.seqelist}) to get a single string for a whole list, and \code{as.charcater} (\code{as.character.seqelist}) to get a vector of strings. 17 | \code{str.eseq} and \code{as.character.eseq} have mainly an internal purpose (invoked by print methods for example). 18 | } 19 | 20 | \seealso{\code{\link{seqecreate}} for a full example } 21 | \examples{ 22 | data(actcal.tse) 23 | actcal.eseq <- seqecreate(actcal.tse) 24 | head(as.character(actcal.eseq)) 25 | } 26 | 27 | \author{Matthias Studer (with Gilbert Ritschard for the help page)} 28 | % Add one or more standard keywords, see file 'KEYWORDS' in the 29 | % R documentation directory. 30 | 31 | \keyword{internal} 32 | \keyword{Event Sequences} 33 | -------------------------------------------------------------------------------- /man/TraMineRInternal.Rd: -------------------------------------------------------------------------------- 1 | \name{TraMineRInternal} 2 | %%\alias{TraMineRInternalCheckArgs} 3 | \alias{TraMineRInternalLayout} 4 | \alias{TraMineRInternalLegend} 5 | \alias{TraMineRInternalNodeInit} 6 | \alias{TraMineRInternalSeqeage} 7 | \alias{TraMineRInternalSeqgbar} 8 | \alias{TraMineRInternalSplitInit} 9 | \alias{TraMineRInternalWeightedInertiaDist} 10 | %- Also NEED an '\alias' for EACH other topic documented here. 11 | \title{Access to TraMineR internal functions} 12 | \description{ 13 | Functions allowing other packages to access some TraMineR internal functions. 14 | Corresponding functions are respectively \code{TraMineR.setlayout}, 15 | \code{TraMineR.Legend}, \code{DTNInit}, \code{seqeage}, \code{seqgbar}, 16 | \code{DTNsplit}, and \code{tmrWeightedInertiaDist}. For experts only. 17 | } 18 | \usage{ 19 | %%TraMineRInternalCheckArgs(...) 20 | TraMineRInternalLayout(...) 21 | TraMineRInternalLegend(...) 22 | TraMineRInternalNodeInit(...) 23 | TraMineRInternalSeqeage(...) 24 | TraMineRInternalSeqgbar(...) 25 | TraMineRInternalSplitInit(...) 26 | TraMineRInternalWeightedInertiaDist(diss, diss.size, is.dist, individuals, sweights, var) 27 | } 28 | %- maybe also 'usage' for other objects documented here. 29 | \arguments{ 30 | \item{\dots}{Arguments passed to or from other methods.} 31 | \item{diss}{See tmrWeightedInertiaDist().} 32 | \item{diss.size}{See tmrWeightedInertiaDist().} 33 | \item{is.dist}{See tmrWeightedInertiaDist().} 34 | \item{individuals}{See tmrWeightedInertiaDist().} 35 | \item{sweights}{See tmrWeightedInertiaDist().} 36 | \item{var}{See tmrWeightedInertiaDist().} 37 | } 38 | -------------------------------------------------------------------------------- /R/TraMineR-trunc.R: -------------------------------------------------------------------------------- 1 | ## SEQUENCE TRUNCATION 2 | 3 | TraMineR.trunc <- function(seqdata, mstate, sl, left = "DEL", right = "DEL", 4 | gaps = "DEL", neutral = "#", void = "%") { 5 | 6 | sidx <- 1:sl 7 | 8 | ## Index des missing et index des etats valides 9 | na.pos <- sidx[mstate] 10 | notna.pos <- sidx[!mstate] 11 | lc <- 0 12 | 13 | if(length(notna.pos)<1) { ## only missing values in sequence 14 | c1 <- 0 15 | rc <- 1 16 | mm <- NULL # most probably not used 17 | } 18 | else { 19 | 20 | ## Position of first valid state 21 | c1 <- notna.pos[1] 22 | 23 | if (c1>1) 24 | lc <- c1-1 25 | # else lc=0 26 | 27 | rc <- max(notna.pos)+1 28 | mm <- na.pos[na.pos > lc+1 & na.pos < rc-1] 29 | } 30 | 31 | seqdata.trunc <- seqdata 32 | 33 | if (!is.na(left) & lc>0) { 34 | if (left=="DEL") seqdata.trunc[1:lc] <- void 35 | else if (left=="NEUTRAL") seqdata.trunc[1:lc] <- neutral 36 | else seqdata.trunc[1:lc] <- left 37 | } 38 | 39 | if (!is.na(right) & rc<=sl) { 40 | if (right=="DEL") seqdata.trunc[rc:sl] <- void 41 | else if (right=="NEUTRAL") seqdata.trunc[rc:sl] <- neutral 42 | else seqdata.trunc[rc:sl] <- right 43 | } 44 | 45 | if (!is.na(gaps) & length(mm>0)) { 46 | if (gaps=="DEL") seqdata.trunc[mm] <- void 47 | else if (gaps=="NEUTRAL") seqdata.trunc[mm] <- neutral 48 | else seqdata.trunc[mm] <- gaps 49 | } 50 | 51 | ndel <- sum(seqdata.trunc==void, na.rm=TRUE) 52 | 53 | if (ndel>0) { 54 | seqdata.trunc <- seqdata.trunc[seqdata.trunc!=void] 55 | seqdata.trunc <- c(seqdata.trunc,rep(void,ndel)) 56 | } 57 | 58 | return(seqdata.trunc) 59 | } 60 | -------------------------------------------------------------------------------- /R/seqmeant.R: -------------------------------------------------------------------------------- 1 | ## ============== 2 | ## Mean durations 3 | ## ============== 4 | 5 | seqmeant <- function(seqdata, weighted=TRUE, with.missing=FALSE, prop=FALSE, serr=FALSE) { 6 | 7 | if (!inherits(seqdata,"stslist")) 8 | stop("seqmeant: seqdata is not a sequence object, use seqdef function to create one") 9 | 10 | istatd <- suppressMessages(seqistatd(seqdata, with.missing=with.missing, prop=prop)) 11 | 12 | weights <- attr(seqdata, "weights") 13 | 14 | if (!weighted || is.null(weights)) 15 | weights <- rep(1, nrow(seqdata)) 16 | ## Also takes into account that in unweighted sequence objects created with 17 | ## older TraMineR versions the weights attribute is a vector of 1 18 | ## instead of NULL 19 | if (all(weights==1)) 20 | weighted <- FALSE 21 | 22 | wtot <- sum(weights) 23 | 24 | mtime <- apply(istatd*weights,2,sum) 25 | 26 | res <- mtime/wtot 27 | 28 | res <- as.matrix(res) 29 | colnames(res) <- "Mean" 30 | 31 | col <- cpal(seqdata) 32 | if (with.missing) { 33 | col <- c(col, attr(seqdata,"missing.color")) 34 | } 35 | 36 | if(serr){ 37 | w2tot <- sum(weights^2) 38 | vcent <- t(t(istatd) - mtime/wtot) 39 | var <- apply(weights*(vcent^2),2,sum) * wtot/(wtot^2 - w2tot) 40 | sd <- sqrt(var) 41 | SE <- sqrt(var/wtot) 42 | res <- cbind(res,var,sd,SE) 43 | colnames(res) <- c("Mean", "Var", "Stdev", "SE") 44 | } 45 | 46 | attr(res,"nbseq") <- sum(weights) 47 | attr(res,"cpal") <- col 48 | attr(res,"xtlab") <- colnames(seqdata) 49 | attr(res,"weighted") <- weighted 50 | attr(res,"se") <- serr 51 | 52 | class(res) <- c("stslist.meant", "matrix") 53 | 54 | return(res) 55 | } 56 | -------------------------------------------------------------------------------- /man/actcal.tse.Rd: -------------------------------------------------------------------------------- 1 | \name{actcal.tse} 2 | \docType{data} 3 | \alias{actcal.tse} 4 | \title{Example data set: Activity calendar from the Swiss Household Panel (time stamped event format)} 5 | \description{ 6 | This data set contains events defined from the state sequences in the actcal data set. It was created with the code shown in the examples section. It is provided to symplify example of event sequence mining. 7 | } 8 | \usage{data(actcal.tse)} 9 | \format{Time stamped events derived from state sequences in the actcal data set.} 10 | \source{Swiss Household Panel} 11 | \seealso{ 12 | \code{\link{seqformat}, \link{actcal}}} 13 | %\references{} 14 | \examples{ 15 | data(actcal) 16 | actcal.seq <- seqdef(actcal[,13:24]) 17 | 18 | ## Defining the transition matrix 19 | transition <- seqetm(actcal.seq, method="transition") 20 | transition[1,1:4] <- c("FullTime" , "Decrease,PartTime", 21 | "Decrease,LowPartTime", "Stop") 22 | transition[2,1:4] <- c("Increase,FullTime", "PartTime" , 23 | "Decrease,LowPartTime", "Stop") 24 | transition[3,1:4] <- c("Increase,FullTime", "Increase,PartTime", 25 | "LowPartTime" , "Stop") 26 | transition[4,1:4] <- c("Start,FullTime" , "Start,PartTime" , 27 | "Start,LowPartTime" , "NoActivity") 28 | transition 29 | 30 | ## Converting STS data to TSE 31 | actcal.tse <- seqformat(actcal, 13:24, from = "STS",to = "TSE", 32 | tevent = transition) 33 | 34 | ## Defining the event sequence object 35 | actcal.eseq <- seqecreate(id=actcal.tse$id, 36 | time=actcal.tse$time, event=actcal.tse$event) 37 | } 38 | \author{Gilbert Ritschard and Matthias Studer} 39 | \keyword{Datasets} 40 | -------------------------------------------------------------------------------- /man/seqstatl.Rd: -------------------------------------------------------------------------------- 1 | \name{seqstatl} 2 | \alias{seqstatl} 3 | \title{List of distinct states or events (alphabet) in a sequence data set.} 4 | \description{ 5 | Returns a list containing distinct states or events found in a data frame or matrix containing sequence data, the alphabet. 6 | } 7 | \usage{ 8 | seqstatl(data, var=NULL, format='STS') 9 | } 10 | \arguments{ 11 | \item{data}{a data frame, matrix, or character string vector containing sequence data (tibble will be converted with \code{as.data.frame}).} 12 | \item{var}{the list of columns containing the sequences. Default \code{NULL} means all columns. Whether the sequences are in the compressed (character strings) or extended format is automatically detected from the number of columns.} 13 | \item{format}{the format of the sequence data set. One of \code{"STS"}, \code{"SPS"}, \code{"DSS"}. Default is \code{"STS"}. The \code{seqstatl} function uses the \code{\link{seqformat}} function to translate between formats when necessary.} 14 | } 15 | 16 | \references{ 17 | Gabadinho, A., G. Ritschard, N. S. Müller and M. Studer (2011). Analyzing and Visualizing State Sequences in R with TraMineR. \emph{Journal of Statistical Software} \bold{40}(4), 1-37. 18 | 19 | Gabadinho, A., G. Ritschard, M. Studer and N. S. Müller (2009). Mining Sequence Data in 20 | \code{R} with the \code{TraMineR} package: A user's guide. Department of Econometrics and 21 | Laboratory of Demography, University of Geneva. 22 | } 23 | 24 | \seealso{ 25 | \code{\link{seqformat}} 26 | } 27 | \examples{ 28 | data(actcal) 29 | seqstatl(actcal,13:24) 30 | } 31 | 32 | \author{Alexis Gabadinho} 33 | 34 | \keyword{Data handling} 35 | -------------------------------------------------------------------------------- /man/ex2.Rd: -------------------------------------------------------------------------------- 1 | \name{ex2} 2 | \docType{data} 3 | \alias{ex2} 4 | \alias{ex2.weighted} 5 | \alias{ex2.unweighted} 6 | 7 | \title{Example data sets with weighted and unweighted sequence data} 8 | 9 | \description{ 10 | Example data sets used to demonstrate the handling of weights. The \code{'ex2.weighted'} data set contains 6 sequences with weights inflating to 100 sequences (sum of weights is 100). The second data frame \code{'ex2.unweighted'} contains the corresponding 100 sequences. 11 | 12 | The sequences are, in both data frames, in the \code{'seq'} column, and weights in the \code{'weight'} column of \code{'ex2.weighted'}. 13 | 14 | The alphabet is made of four possible states: \code{A, B, C} and \code{D}. 15 | \cr 16 | 17 | These data sets are mainly intended to test and illustrate the handling of weights in TraMineR's functions. Weighted results obtained with '\code{ex2.weighted}' data set should be exactly the same as unweighted results obtained with the '\code{ex2.unweighted}' data set. 18 | } 19 | 20 | \usage{data(ex2)} 21 | \format{ 22 | The command \code{data(ex2)} generates two data frames: 23 | \cr 24 | \code{ex2.weighted}: a data frame with 6 rows, 1 variable containing sequences as character strings, 1 weight variable. 25 | \cr 26 | \code{ex2.unweighted}: a data frame with 100 rows, 1 variable containing sequences as character strings. 27 | 28 | } 29 | 30 | \examples{ 31 | data(ex2) 32 | 33 | ex2w.seq <- seqdef(ex2.weighted, 1, weights=ex2.weighted$weight) 34 | ex2u.seq <- seqdef(ex2.unweighted) 35 | } 36 | 37 | 38 | \source{The brain of the TraMineR package team.} 39 | \author{Gilbert Ritschard and Alexis Gabadinho} 40 | 41 | \keyword{Datasets} 42 | -------------------------------------------------------------------------------- /R/seqprep.R: -------------------------------------------------------------------------------- 1 | ## =========================== 2 | ## Treatment of missing values 3 | ## =========================== 4 | 5 | seqprep <- function(seqdata, left=NA, right="DEL", gaps=NA, 6 | neutral="#", missing=NA, void="%", nr="*") { 7 | 8 | nbseq <- nrow(seqdata) 9 | sl <- ncol(seqdata) 10 | 11 | message(" [>] preparing ",nbseq, " sequences") 12 | message(" [>] coding void elements with '", void, "' and missing values with '", nr,"'") 13 | 14 | if (is.na(missing)) { 15 | mstate <- is.na(seqdata) 16 | } 17 | else { 18 | mstate <- seqdata==missing 19 | } 20 | 21 | allmiss <- NULL 22 | for (i in 1:nbseq) { 23 | nbmiss <- sum(mstate[i,], na.rm=TRUE) 24 | # if (nbmiss>0 && nbmiss 0){ 34 | seqdata[i,] <- TraMineR.trunc(seqdata=seqdata[i,], mstate=mstate[i,], sl=sl, 35 | left=left, right=right, gaps=gaps, 36 | neutral=neutral, void=void) 37 | } 38 | } 39 | 40 | nempty <- length(allmiss) 41 | if (nempty>0) { 42 | dots <- "" 43 | if (nempty > 10){ 44 | allmiss <- allmiss[1:10] 45 | dots <- ", ..." 46 | } 47 | message(" [!!] ",nempty," empty sequence(s) with index: ", paste(allmiss, collapse=","),dots,"\n may produce inconsistent results.") 48 | } 49 | 50 | ## Setting a new code for missing statuses 51 | if (is.na(missing)) seqdata[is.na(seqdata)] <- nr 52 | else seqdata[seqdata==missing] <- nr 53 | 54 | return(seqdata) 55 | } 56 | -------------------------------------------------------------------------------- /man/seqe.Rd: -------------------------------------------------------------------------------- 1 | \name{is.eseq} 2 | \alias{is.eseq} 3 | \alias{is.seqe} 4 | \alias{is.seqelist} 5 | \alias{print.eseq} 6 | \alias{print.seqelist} 7 | \alias{levels.seqelist} 8 | \alias{[.seqelist} 9 | \alias{levels.eseq} 10 | \alias{Math.eseq} 11 | \alias{Math.seqelist} 12 | \alias{Ops.eseq} 13 | \alias{Ops.seqelist} 14 | \alias{Summary.eseq} 15 | \alias{Summary.seqelist} 16 | 17 | 18 | \title{Event sequence object} 19 | \description{ 20 | TraMineR uses an internal event sequence object for all its operations 21 | with event sequences such as mining frequent subsequences. The 22 | function \code{is.seqelist} checks wether the argument is an event 23 | sequence object while \code{is.eseq} checks wether the argument is a 24 | single element of an event sequence object. 25 | %\code{as.seqelist} transform a list of event sequences into an internal event sequence object. 26 | 27 | There is a print method that can be applied to such event sequence object. 28 | 29 | is.seqe() has been removed in TraMineR 2.x, use is.eseq() instead. 30 | } 31 | \usage{ 32 | is.eseq(eseq, s) 33 | is.seqelist(eseq, s) 34 | } 35 | \arguments{ 36 | \item{eseq}{Sequence} 37 | \item{s}{Deprecated. Use \code{eseq} instead.} 38 | } 39 | \seealso{\code{\link{str.seqelist}}, \code{\link{str.eseq}},\code{\link{as.character.seqelist}} and \code{\link{as.character.eseq}} to convert sequences from an event sequence object into character strings} 40 | %\examples{} 41 | 42 | \author{Matthias Studer (with Gilbert Ritschard for the help page)} 43 | % Add one or more standard keywords, see file 'KEYWORDS' in the 44 | % R documentation directory. 45 | \keyword{Event sequences} 46 | \keyword{internal} 47 | -------------------------------------------------------------------------------- /R/implicativestat.R: -------------------------------------------------------------------------------- 1 | ########################### 2 | ## Compute the implicative statistic of a rule 3 | ########################### 4 | 5 | 6 | implicativestat <- function(x, y, type="intensity", resid="standard") { 7 | if (!(type %in% c("intensity", "indice"))) { 8 | stop("type should be intensity or indice") 9 | } 10 | if (!(resid %in% c("standard", "deviance", "Freeman-Tukey", "adjusted"))) { 11 | stop("resid should be one of standard, deviance, Freeman-Tukey or ajusted") 12 | } 13 | x <- factor(x) 14 | if (length(levels(x)==1)) x <- factor(x, levels=c("0","1")) 15 | y <- factor(y) 16 | if (length(levels(y)==1)) y <- factor(y, levels=c("0","1")) 17 | xgrp <- levels(x) 18 | ygrp <- levels(y) 19 | result <- matrix(0, nrow=length(xgrp), ncol=length(ygrp)) 20 | n <- length(x) 21 | rownames(result) <- xgrp 22 | colnames(result) <- ygrp 23 | for (i in 1:length(xgrp)) { 24 | condi <- x==xgrp[i] 25 | for (j in 1:length(ygrp)) { 26 | condj <- y==ygrp[j] 27 | Nnbj <- sum((condi)&!condj) 28 | Nexpnbj <- sum(condi)*sum(!condj)/n 29 | if (resid=="standard") { 30 | indice <- (Nnbj-Nexpnbj)/sqrt(Nexpnbj) 31 | } 32 | else if (resid=="deviance") { 33 | indice <- sign(Nnbj-Nexpnbj)*sqrt(abs(2*Nnbj*log(Nnbj/Nexpnbj))) 34 | } 35 | else if (resid=="Freeman-Tukey") { 36 | indice <- sqrt(Nnbj)+sqrt(1+Nnbj)-sqrt(4*Nexpnbj+1) 37 | } 38 | else if (resid=="adjusted") { 39 | indice <- (Nnbj-Nexpnbj)/sqrt(Nexpnbj*(sum(condj)/n)*(1-sum(condi)/n)) 40 | } 41 | 42 | 43 | if (type=="indice") { 44 | result[i, j] <- indice 45 | } 46 | else { 47 | result[i, j] <- pnorm(-indice) 48 | } 49 | } 50 | } 51 | return(result) 52 | } 53 | 54 | -------------------------------------------------------------------------------- /man/seqnum.Rd: -------------------------------------------------------------------------------- 1 | \name{seqnum} 2 | \alias{seqnum} 3 | \title{Transform into a sequence object with numerical alphabet.} 4 | 5 | \description{ 6 | The function \code{seqnum} transforms the provided state sequence object into an equivalent sequence object in which the original alphabet is replaced with an alphabet of numbers ranging from \code{0} to \code{(nbstates-1)}. 7 | } 8 | \usage{ 9 | seqnum(seqdata, with.missing=FALSE) 10 | } 11 | \arguments{ 12 | \item{seqdata}{a state sequence object as defined by the \code{\link{seqdef}} function.} 13 | 14 | \item{with.missing}{logical: Should missing elements in the sequences be turned into numerical values as well? The code for missing values in the sequences is retrieved from the \code{'nr'} attribute of \code{seqdata}.} 15 | } 16 | 17 | \details{ 18 | The first state (for example \code{'A'}) is coded with the value \code{0}, the second state (for example \code{'B'}) is coded with the value \code{1}, etc... The function returns a sequence object containing the original sequences coded with the new numerical alphabet 19 | ranging from \code{0} to \code{(nbstates-1)}} 20 | 21 | \seealso{\code{\link{seqdef}}, \code{\link{alphabet} } } 22 | 23 | \examples{ 24 | data(actcal) 25 | actcal.seq <- seqdef(actcal,13:24) 26 | 27 | ## The first 10 sequences in the actcal.seq 28 | ## sequence object 29 | actcal.seq[1:10,] 30 | alphabet(actcal.seq) 31 | 32 | ## The first 10 sequences in the actcal.seq 33 | ## sequence object with numerical alphabet 34 | seqnum(actcal.seq[1:10,]) 35 | 36 | ## states A,B,C,D are now coded 0,1,2,3 37 | alphabet(seqnum(actcal.seq)) 38 | } 39 | 40 | \author{Alexis Gabadinho} 41 | 42 | \keyword{Data handling} 43 | \keyword{State sequences} 44 | -------------------------------------------------------------------------------- /man/disstreeleaf.Rd: -------------------------------------------------------------------------------- 1 | \name{disstreeleaf} 2 | \alias{disstreeleaf} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{Terminal node membership} 5 | \description{ 6 | Get the terminal node membership of each case. 7 | } 8 | \usage{ 9 | disstreeleaf(tree, label=FALSE, collapse=", ") 10 | } 11 | %- maybe also 'usage' for other objects documented here. 12 | \arguments{ 13 | \item{tree}{A tree (\code{disstree} or \code{DissTreeNode} object).} 14 | \item{label}{Logical. Should leaf memberships be labelled with classification rules?} 15 | \item{collapse}{Character string. Separator between categories in class of categorical values.} 16 | } 17 | \details{ 18 | \code{disstreeleaf} returns the terminal node membership of the cases either as the leaf number of the terminal node to which the cases are assigned or, when \code{label=TRUE}, as the classification rule leading to the assigned terminal node. In the latter case, \code{collapse} is used as separator between categorical values in classes of categorical values. The default \code{collapse} is \code{", "}. It is advisable to change this default when categorical values contain commas. 19 | } 20 | 21 | \value{Either a vector of leaf numbers or a factor. When \code{label=FALSE} (default), vector of assigned terminal node numbers. When \code{label=TRUE}, a factor with levels labelled with classification rules. 22 | } 23 | 24 | 25 | \seealso{\code{\link{disstree}} and examples therein, \code{\link{disstree.get.rules}}, and \code{\link{disstree.assign}}.} 26 | \author{Matthias Studer and Gilbert Ritschard)} 27 | % Add one or more standard keywords, see file 'KEYWORDS' in the 28 | % R documentation directory. 29 | \keyword{Dissimilarity-based analysis} 30 | -------------------------------------------------------------------------------- /man/seqdur.Rd: -------------------------------------------------------------------------------- 1 | \name{seqdur} 2 | \alias{seqdur} 3 | \title{Extract state durations from a sequence object.} 4 | \description{ 5 | Extracts states durations from a sequence object. Returns a matrix containing the states durations for the sequences. The states durations in \code{'D-D-D-D-A-A-A-A-A-A-A-D'} are 4,7,1. Distinct states can be extracted with the \code{\link{seqdss}} function. 6 | } 7 | \usage{ 8 | seqdur(seqdata, with.missing=FALSE) 9 | } 10 | \arguments{ 11 | \item{seqdata}{a sequence object as defined by the \code{\link{seqdef}} function.} 12 | \item{with.missing}{Should non-void missing values be considered as regular states? See Details.} 13 | 14 | } 15 | \details{ 16 | When \code{with.missing=FALSE} (default) missing values are ignored and a substring \code{AA***A} for example will be considered as a spell \code{AAA} of duration 3. When \code{with.missing=TRUE}, durations are also computed for spells of missing values (gaps in sequences). 17 | 18 | See \link{seqdef} on options for handling missing values when creating sequence objects. 19 | } 20 | 21 | \value{a matrix containing the states durations for each distinct state in each sequence.} 22 | 23 | \seealso{ 24 | \code{\link{seqdss}}. 25 | } 26 | 27 | \examples{ 28 | ## Creating a sequence object from columns 13 to 24 29 | ## in the 'actcal' example data set 30 | ## Here we retain the first 10 sequences only. 31 | data(actcal) 32 | actcal.seq <- seqdef(actcal[1:10,13:24]) 33 | 34 | ## Retrieving the spell durations 35 | actcal.dur <- seqdur(actcal.seq) 36 | 37 | ## Displaying the durations for the first 10 sequences 38 | actcal.dur 39 | } 40 | 41 | \author{Alexis Gabadinho and Gilbert Ritschard} 42 | 43 | \keyword{Longitudinal characteristics} 44 | -------------------------------------------------------------------------------- /man/seqistatd.Rd: -------------------------------------------------------------------------------- 1 | \name{seqistatd} 2 | \alias{seqistatd} 3 | \title{State frequencies in each individual sequence} 4 | \description{ 5 | Returns the state frequencies (total durations) for each sequence in the sequence object. 6 | } 7 | \usage{ 8 | seqistatd(seqdata, with.missing=FALSE, prop=FALSE) 9 | } 10 | \arguments{ 11 | \item{seqdata}{a sequence object (see \code{\link{seqdef}} function).} 12 | \item{with.missing}{logical: if set as \code{TRUE}, total durations are also computed for the missing status (gaps in the sequences). See \link{seqdef} on options for handling missing values when creating sequence objects.} 13 | \item{prop}{logical: if \code{TRUE}, proportions of time spent in each state are returned instead of absolute values. This option is specially useful when sequences contain missing states, since the sum of the state durations may not be the same for all sequences.} 14 | } 15 | 16 | \references{ 17 | Gabadinho, A., G. Ritschard, N. S. Müller and M. Studer (2011). Analyzing and Visualizing State Sequences in R with TraMineR. \emph{Journal of Statistical Software} \bold{40}(4), 1-37. 18 | 19 | Ritschard, G. (2023), "Measuring the nature of individual sequences", \emph{Sociological Methods and Research}, 52(4), 2016-2049. \doi{10.1177/00491241211036156}. 20 | } 21 | 22 | %\seealso{} 23 | 24 | \examples{ 25 | data(actcal) 26 | actcal.seq <- seqdef(actcal,13:24) 27 | seqistatd(actcal.seq[1:10,]) 28 | 29 | ## Example using "with.missing" argument 30 | data(ex1) 31 | ex1.seq <- seqdef(ex1, 1:13, weights=ex1$weights) 32 | 33 | seqistatd(ex1.seq) 34 | seqistatd(ex1.seq, with.missing=TRUE) 35 | 36 | } 37 | 38 | \author{Alexis Gabadinho} 39 | 40 | \keyword{Longitudinal characteristics} 41 | \keyword{State sequences} 42 | 43 | -------------------------------------------------------------------------------- /R/seqetm.R: -------------------------------------------------------------------------------- 1 | seqetm <- function(seqdata, method = "transition", use.labels = TRUE, sep = ">", 2 | bp = "", ep = "end", seq) { 3 | 4 | TraMineR.check.depr.args(alist(seqdata = seq)) 5 | 6 | statl <- alphabet(seqdata)#seqstatl(seqdata) 7 | nr <- attr(seqdata, "nr") 8 | has.nr <- any(seqdata==nr) 9 | if (has.nr) { 10 | statl <- c(statl, nr) 11 | } 12 | void <- attr(seqdata, "void") 13 | has.void <- any(seqdata==void) 14 | if (has.void) { 15 | statl <- c(statl, void) 16 | } 17 | nbstat <- length(statl) 18 | tevent <- matrix(nrow=nbstat, ncol=nbstat) 19 | rownames(tevent) <- statl 20 | colnames(tevent) <- statl 21 | alphabet <- statl 22 | if (use.labels && inherits(seqdata, "stslist")) { 23 | #label<-alphabet(seqdata) 24 | label <- attr(seqdata, "labels") 25 | if (has.nr) { 26 | label <- c(label, nr) 27 | } 28 | if (has.void) { 29 | label <- c(label, void) 30 | } 31 | if(length(label)==length(alphabet)){ 32 | alphabet <- label 33 | } 34 | else if(length(label)>0){ 35 | warning("Length of the labels and of the alphabet are not equal") 36 | } 37 | } 38 | if(any(grepl(",", alphabet))){ 39 | warning(" [!] Alphabet and/or state labels should not contain commas ',' which are reserved for separating multiple events of a same transition!\n") 40 | } 41 | for(i in 1:nbstat){ 42 | for(j in 1:nbstat){ 43 | if(i==j){ 44 | tevent[i,j] <- alphabet[[i]] 45 | }else{ 46 | if(method=="transition"){ 47 | tevent[i,j] <- paste(alphabet[[i]], alphabet[[j]], sep=sep) 48 | }else if(method == "period"){ 49 | tevent[i,j] <- paste(ep, alphabet[[i]], ",", bp, alphabet[[j]], sep="") 50 | }else if(method == "state"){ 51 | tevent[i,j] <- alphabet[[j]] 52 | } 53 | } 54 | } 55 | } 56 | return(tevent) 57 | 58 | } 59 | -------------------------------------------------------------------------------- /man/seqconc.Rd: -------------------------------------------------------------------------------- 1 | \name{seqconc} 2 | \alias{seqconc} 3 | \title{Concatenate vectors of states or events into a character string} 4 | \description{ 5 | Concatenate vectors of states or events into a character string. In the string, each state is separated by 'sep'. The void elements in the input sequences are eliminated. 6 | } 7 | \usage{ 8 | seqconc(data, var=NULL, sep="-", vname="Sequence", void=NA) 9 | } 10 | \arguments{ 11 | \item{data}{A data frame or matrix containing sequence data (tibble will be converted with \code{as.data.frame}).} 12 | 13 | \item{var}{List of the columns containing the sequences. Default is \code{NULL} in which case all columns are retained. Whether the sequences are in the compressed (character strings) or extended format is automatically detected by counting the number of columns.} 14 | 15 | \item{sep}{Character used as separator. By default, "\code{-}".} 16 | 17 | \item{vname}{an optional name for the variable containing the sequences. By default, \code{"Sequence"}.} 18 | 19 | \item{void}{the code used for void elements appearing in the sequences (see \cite{Gabadinho et al. (2009)} for more details on missing values and void elements in sequences). Default is \code{NA}.} 20 | 21 | } 22 | \value{a vector of character strings, one for each row in the input data.} 23 | 24 | \author{Alexis Gabadinho} 25 | 26 | \references{ 27 | Gabadinho, A., G. Ritschard, M. Studer and N. S. Müller (2009). Mining Sequence Data in \code{R} with the \code{TraMineR} package: A user's guide. \emph{Department of Econometrics and Laboratory of Demography, University of Geneva}. 28 | } 29 | 30 | \seealso{ 31 | \code{\link{seqdecomp}}. 32 | } 33 | \examples{ 34 | data(actcal) 35 | actcal.string <- seqconc(actcal,13:24) 36 | head(actcal.string) 37 | } 38 | \keyword{Data handling} 39 | -------------------------------------------------------------------------------- /man/seqelength.Rd: -------------------------------------------------------------------------------- 1 | \name{seqelength} 2 | \alias{seqelength} 3 | \alias{seqelength<-} 4 | %- Also NEED an '\alias' for EACH other topic documented here. 5 | \title{Lengths of event sequences} 6 | \description{ 7 | The length of an event sequence is its time span, i.e., the total time of observation. This information is useful to perform for instance a survival analysis. The function 8 | \code{seqelength} retrieves the lengths of the provided sequences, while 9 | \code{seqelength <-} sets the length of the sequences. 10 | } 11 | \usage{ 12 | seqelength(eseq, s) 13 | seqelength(eseq, s) <- value 14 | } 15 | %- maybe also 'usage' for other objects documented here. 16 | \arguments{ 17 | \item{eseq}{An event sequence object (\code{seqelist}).} 18 | \item{value}{A list of sequence lengths.} 19 | \item{s}{Deprecated. Use \code{eseq} instead.} 20 | } 21 | \value{ 22 | A numeric vector with the lengths of the sequences. 23 | } 24 | %\references{ ~put references to the literature/web site here ~ } 25 | %\author{ ~~who you are~~ } 26 | %\note{ ~~further notes~~} 27 | 28 | %\seealso{ ~~objects to See Also as \code{\link{help}}, ~~~ } 29 | \examples{ 30 | data(actcal.tse) 31 | actcal.eseq <- seqecreate(actcal.tse) 32 | ## Since end.event is not specified, contains no sequence lengths 33 | ## We set them manually as 12 for all sequences 34 | sl <- numeric() 35 | sl[1:2000] <- 12 36 | seqelength(actcal.eseq) <- sl 37 | actcal.eseq[1:10] 38 | ## Retrieve lengths 39 | slen <- seqelength(actcal.eseq) 40 | summary(slen) 41 | } 42 | %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 43 | \author{Matthias Studer (with Gilbert Ritschard for the help page)} 44 | % Add one or more standard keywords, see file 'KEYWORDS' in the 45 | % R documentation directory. 46 | \keyword{Event sequences} 47 | \keyword{Longitudinal characteristics} 48 | -------------------------------------------------------------------------------- /man/seqeweight.Rd: -------------------------------------------------------------------------------- 1 | \name{seqeweight} 2 | \alias{seqeweight} 3 | \alias{seqeweight<-} 4 | %- Also NEED an '\alias' for EACH other topic documented here. 5 | \title{ 6 | Setting or retrieving weights of an event sequence object. 7 | } 8 | \description{ 9 | Event sequence objects can be weighted. Weights are used by other functions such as \code{\link{seqefsub}} or \code{\link{seqecmpgroup}} to compute weighted statistics. 10 | } 11 | \usage{ 12 | seqeweight(eseq, s) 13 | seqeweight(eseq, s) <- value 14 | } 15 | %- maybe also 'usage' for other objects documented here. 16 | \arguments{ 17 | \item{eseq}{An event sequence object (\code{seqelist}).} 18 | \item{value}{Numerical vector containing weights} 19 | \item{s}{Deprecated. Use \code{eseq} instead.} 20 | } 21 | \value{ 22 | \code{seqeweight} returns a numerical vector containing the weights associated to each event sequence. 23 | } 24 | 25 | \examples{ 26 | ##Starting with states sequences 27 | ##Loading data 28 | data(biofam) 29 | ## Creating state sequences 30 | biofam.seq <- seqdef(biofam,10:25,informat='STS') 31 | 32 | ## Creating event sequences from biofam 33 | biofam.eseq <- seqecreate(biofam.seq, weighted=FALSE) 34 | 35 | ## Using the weights 36 | seqeweight(biofam.eseq) <- biofam$wp00tbgs 37 | 38 | ## Now seqefsub accounts for weights unless weighted is set to FALSE 39 | fsubseq <- seqefsub(biofam.eseq, pmin.support=0.01) 40 | 41 | ## Searching for weighted susbsequences which best 42 | ## discriminate the birth cohort 43 | discr <- seqecmpgroup(fsubseq, group=biofam$birthyr>=1940) 44 | plot(discr[1:15]) 45 | 46 | } 47 | \author{Matthias Studer (with Gilbert Ritschard for the help page)} 48 | % Add one or more standard keywords, see file 'KEYWORDS' in the 49 | % R documentation directory. 50 | \keyword{Sequence-object attributes} 51 | \keyword{Event sequences} 52 | -------------------------------------------------------------------------------- /src/NMSdistance.h: -------------------------------------------------------------------------------- 1 | #ifndef NMSDISTANCECALCULATOR_H 2 | #define NMSDISTANCECALCULATOR_H 3 | #include "distancecalculator.h" 4 | 5 | class SUBSEQdistance: public DistanceCalculator{ 6 | protected: 7 | double *selfmatvect; 8 | double *kvect; 9 | double *kweights; 10 | int distMethod; 11 | int distTransform; 12 | public: 13 | SUBSEQdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS):DistanceCalculator(normS, Ssequences, seqdim, lenS),kweights(NULL), distMethod(0){ 14 | selfmatvect= new double[nseq*maxlen]; 15 | kvect = new double[maxlen]; 16 | } 17 | SUBSEQdistance(SUBSEQdistance *dc):DistanceCalculator(dc),kweights(dc->kweights), distMethod(dc->distMethod){ 18 | selfmatvect= new double[nseq*maxlen]; 19 | memcpy(this->selfmatvect, dc->selfmatvect, nseq*maxlen*sizeof(double)); 20 | kvect = new double[maxlen]; 21 | 22 | } 23 | //virtual DistanceCalculator* copy(){return new SUBSEQdistance(this);} 24 | virtual ~SUBSEQdistance(){ 25 | delete[] selfmatvect; 26 | delete[] kvect; 27 | } 28 | void resetKvect(){ 29 | for(int k=0;k kvect[k]=0.0; 31 | } 32 | } 33 | virtual void computeattr(const int&is, const int& js)=0; 34 | virtual void setParameters(SEXP params); 35 | double distance(const int&is, const int& js); 36 | }; 37 | 38 | 39 | class NMSdistance: public SUBSEQdistance{ 40 | protected: 41 | int zmatsize; 42 | double *hmat; 43 | double *vmat; 44 | int *zmat; 45 | 46 | 47 | public: 48 | NMSdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);//: SUBSEQdistance( normS, Ssequences, seqdim, lenS){} 49 | NMSdistance(NMSdistance * dc); 50 | virtual DistanceCalculator* copy(){return new NMSdistance(this);} 51 | virtual ~NMSdistance(); 52 | void computeattr(const int&is, const int& js); 53 | }; 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /R/subseqelist.R: -------------------------------------------------------------------------------- 1 | is.subseqelist<-function(eseq){ 2 | # return(.Call(C_istmrsequence,eseq)) 3 | return(inherits(eseq, "subseqelist")) 4 | } 5 | 6 | createsubseqelist<-function(eseq, constraint, subseq, data, type="frequent"){ 7 | if(!is.seqelist(eseq)) { 8 | stop(" [!] eseq should be a seqelist") 9 | } 10 | ret <- list() 11 | ret$eseq <- eseq 12 | ret$constraint <- constraint 13 | ret$subseq <- subseq 14 | ret$data <- as.data.frame(data, optional=TRUE) 15 | ret$type <- type 16 | class(ret$subseq) <- c("seqelist", "list") 17 | # attr(ret$subseq,"dictionnary")<-attr(seq,"dictionnary") 18 | class(ret) <- "subseqelist" 19 | return(ret) 20 | } 21 | print.subseqelist<-function(x,...){ 22 | z <- data.frame(data.frame(Subsequence=as.character(x$subseq), check.names=FALSE), x$data, row.names=NULL, check.names=FALSE) 23 | print(z, ...) 24 | cat("\nComputed on", length(x$eseq), "event sequences\n") 25 | print(x$constraint,...) 26 | } 27 | 28 | "[.subseqelist" <- function(x, i,j, drop=FALSE) { 29 | # If only 1 subscript is given, the result will still be a Surv object 30 | # If the second is given extract the relevant columns as a matrix 31 | if (missing(j)) { 32 | ret <- createsubseqelist(x$eseq, x$constraint, x$subseq[i,drop=drop], data=x$data[i,], type=x$type) 33 | if(!is.null(x$labels)) { 34 | ret$labels<-x$labels 35 | } 36 | if(!is.null(x$bonferroni)) { 37 | ret$bonferroni<-x$bonferroni 38 | } 39 | class(ret)<-class(x) 40 | return(ret) 41 | } else { 42 | class(x) <- NULL 43 | NextMethod("[") 44 | } 45 | } 46 | 47 | plot.subseqelist<-function(x, freq=NULL, cex=1,...){ 48 | slegend <- as.character(x$subseq) 49 | if(is.null(freq)) { 50 | freq<-x$data[,1] 51 | } 52 | barpos <- barplot(freq, names.arg=c(""), ...) 53 | text(x=barpos, y=0.02, labels=slegend, srt=90, adj=c(0,0.5), cex=cex) 54 | } 55 | -------------------------------------------------------------------------------- /R/seqintegr.R: -------------------------------------------------------------------------------- 1 | seqintegration <- function(...){ 2 | msg.stop("Seqintegration is obsolete, use seqintegr instead!") 3 | } 4 | 5 | seqintegr <- function(seqdata, state=NULL, pow=1, with.missing=FALSE){ 6 | if (!inherits(seqdata, "stslist")) { 7 | stop("[!] seqdata is not a sequence object, see seqdef function to create one") 8 | } 9 | alph <- alphabet(seqdata) 10 | nr <- attr(seqdata, "nr") 11 | if (with.missing) { 12 | alph <- c(alph, nr) 13 | } 14 | if (length(state)>1) 15 | msg.stop("When non null, 'state' must be a single state") 16 | if (!is.null(state)){ 17 | if (!state %in% alph){ 18 | msg.stop("state ", state, " not in the alphabet!") 19 | } 20 | } 21 | nbstat <- ifelse(is.null(state),length(alph),1) 22 | nbseq <- nrow(seqdata) 23 | 24 | iseqtab <- matrix(nrow = nbseq, ncol = nbstat) 25 | if (is.null(state)) 26 | colnames(iseqtab) <- alph 27 | else 28 | colnames(iseqtab) <- state 29 | 30 | rownames(iseqtab) <- rownames(seqdata) 31 | ## message(" [>] computing state distribution for ", nbseq, " sequences ...") 32 | integVector <- (1:ncol(seqdata))^pow 33 | suminteg <- sum(integVector, na.rm = TRUE) 34 | 35 | ## when with.missing=FALSE we do not account for positions occupied by missings 36 | miss <- c(nr, attr(seqdata,"void")) 37 | if(!with.missing) 38 | suminteg <- suminteg - apply(seqdata, 1, function(x) sum(integVector[x %in% miss], na.rm = TRUE)) 39 | 40 | if(is.null(state)){ 41 | for(i in 1:nbstat) { 42 | iseqtab[, i] <- apply(seqdata, 1, function(x) sum(integVector[x == alph[i]], na.rm = 43 | TRUE))/suminteg 44 | } 45 | } else { 46 | iseqtab[, 1] <- apply(seqdata, 1, function(x) sum(integVector[x == state], na.rm = TRUE))/suminteg 47 | } 48 | 49 | return(iseqtab) 50 | } 51 | -------------------------------------------------------------------------------- /R/seqtree.R: -------------------------------------------------------------------------------- 1 | seqtree <- function(formula, data = NULL, weighted = TRUE, min.size = 0.05, 2 | max.depth = 5, R = 1000, pval = 0.01, weight.permutation = "replicate", 3 | seqdist.args = list(method = "LCS", norm = "auto"), diss = NULL, squared = FALSE, 4 | first = NULL, minSize, maxdepth, seqdist_arg) { 5 | 6 | TraMineR.check.depr.args(alist(min.size = minSize, max.depth = maxdepth, seqdist.args = seqdist_arg)) 7 | 8 | ##formula.call <- formula 9 | tterms <- terms(formula) 10 | seqdata <- eval(formula[[2]], data, parent.frame()) # to force evaluation 11 | if (!inherits(seqdata, "stslist")) { 12 | stop("Left hand term in formula should be a stslist object (see seqdef)") 13 | } 14 | if(is.null(diss)){ 15 | seqdist.args$seqdata <- seqdata 16 | diss <- do.call(seqdist, seqdist.args) 17 | } 18 | if(weighted){ 19 | weights <- attr(seqdata, "weights") 20 | } 21 | else { 22 | weights <- NULL 23 | } 24 | dissmatrix <- diss 25 | formula[[2]] <- NULL 26 | ## Model matrix from forumla 27 | predictor <- as.data.frame(model.frame(formula, data, drop.unused.levels = TRUE, na.action=NULL)) 28 | tree <- DTNdisstree(dissmatrix=dissmatrix, predictor=predictor, terms=tterms, 29 | weights=weights, min.size=min.size, max.depth=max.depth, R=R, 30 | pval=pval, object=seqdata, weight.permutation=weight.permutation, 31 | squared=squared, first=first) 32 | class(tree) <- c("seqtree", class(tree)) 33 | return(tree) 34 | 35 | } 36 | 37 | 38 | print.seqtree <- function(x, digits = getOption("digits") - 2, medoid=TRUE, ...){ 39 | stslistmedoid <- function(object, index) { 40 | x <- seqconc(object[index,], void=attr(object,"void")) 41 | x <- suppressMessages(seqformat(x, from = "STS", to = "SPS", compress = TRUE)) 42 | return(x) 43 | } 44 | print.disstree(x, digits = digits, medoid=medoid, medoid.fun=stslistmedoid,...) 45 | } 46 | -------------------------------------------------------------------------------- /man/TraMineR.check.depr.args.Rd: -------------------------------------------------------------------------------- 1 | \name{TraMineR.check.depr.args} 2 | \alias{TraMineR.check.depr.args} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{ 5 | Checking and managing deprecated arguments 6 | } 7 | \description{ 8 | Checks the presence of deprecated arguments, assigns value of a deprecated argument to the corresponding new argument name, and issues warning messages. 9 | } 10 | \usage{ 11 | TraMineR.check.depr.args(arg.pairs) 12 | } 13 | %- maybe also 'usage' for other objects documented here. 14 | \arguments{ 15 | \item{arg.pairs}{List of pairs of old and new argument names 16 | \cr(e.g. \code{alist(newname1 = oldname1, newname2 = oldname2)})} 17 | } 18 | 19 | \details{To be used inside functions. For developers only. 20 | 21 | For each specified pair of new and old argument names, the function checks if the old 22 | argument name is specified. If so and the new one is not, a warning message is raised 23 | and the argument value is assigned to the new argument name. If one of the names declared in check.depr.args() arguments si not an argument of the parent function 24 | or if both the new and old argument names are specified an error is raised. 25 | 26 | The function does not detect when the new and the old argument names are specified 27 | together and the new argument value is its default value. In this case, 28 | the value associated with the old argument name is assigned to the new name and a warning 29 | message is raised. 30 | 31 | The function works whether the argument names are explicitly declared or not in the call to the checked function. 32 | 33 | The only requirement for the function to work is that the deprecated arguments should be listed WITHOUT default values in the definition of the checked function. 34 | } 35 | 36 | \author{Pierre-Alexandre Fonta, Gilbert Ritschard} 37 | 38 | \value{None.} -------------------------------------------------------------------------------- /R/seqsubsn.R: -------------------------------------------------------------------------------- 1 | ## ============================================= 2 | ## Number of distinct subsequences in a sequence 3 | ## ============================================= 4 | 5 | nsubs <- function (x, nbstat, statlist, void, nr, with.missing) { 6 | l <- vector(mode="integer", nbstat) 7 | x <- x[x!=void] 8 | if (!with.missing) x <- x[x!=nr] 9 | slength <- length(x) 10 | if (slength == 0) return(1) ## empty sequence has one subseq (the empty one) 11 | 12 | N <- vector(mode="integer",(slength+1)) 13 | N[1] <- 1 14 | 15 | for (i in 2:(slength+1)) { 16 | N[i] <- 2*N[i-1] 17 | cidx <- which(statlist==x[i-1]) 18 | 19 | if (l[cidx[1]] > 0) N[i] <- N[i] - N[l[cidx[1]]] 20 | l[cidx[1]] <- i-1 21 | } 22 | return(N[(slength+1)]) 23 | } 24 | 25 | 26 | seqsubsn <- function(seqdata, DSS=TRUE, with.missing=FALSE) { 27 | 28 | if (!inherits(seqdata,"stslist")) 29 | stop("data is not a sequence object, see seqdef function to create one") 30 | 31 | ## Since v 2.0.13 we use thw with.missing argument 32 | ## with.missing=FALSE 33 | ##nr <- attr(seqdata,"nr") 34 | ##if (any(seqdata==nr)) { 35 | ## message(" [!] found missing state in the sequence(s), adding missing state to the alphabet") 36 | ## with.missing=TRUE 37 | ##} 38 | 39 | if (DSS==TRUE) { 40 | seqdata <- suppressMessages(seqdss(seqdata, with.missing=with.missing)) 41 | } 42 | 43 | ## alphabet 44 | #sl <- attr(seqdata,"alphabet") 45 | #if (with.missing) { 46 | # sl <- c(sl, nr) 47 | #} 48 | sl <- alphabet(seqdata, with.missing=with.missing) 49 | 50 | ns <- length(sl) 51 | 52 | void <- attr(seqdata,"void") 53 | nr <- attr(seqdata,"nr") 54 | 55 | result <- apply(seqdata, 1, nsubs, nbstat=ns, statlist=sl, void=void, nr=nr, with.missing=with.missing) 56 | 57 | result <- as.matrix(result) 58 | colnames(result) <- "Subseq." 59 | rownames(result) <- rownames(seqdata) 60 | 61 | return(result) 62 | } 63 | -------------------------------------------------------------------------------- /R/seqlegend.R: -------------------------------------------------------------------------------- 1 | ## ========================================= 2 | ## Plotting the legend for a sequence object 3 | ## ========================================= 4 | 5 | seqlegend <- function(seqdata, with.missing = "auto", cpal = NULL, 6 | missing.color = NULL, ltext = NULL, position = "topleft", cex = 1, 7 | boxes=TRUE, fontsize, ...) { 8 | 9 | TraMineR.check.depr.args(alist(cex = fontsize)) 10 | 11 | 12 | if (!inherits(seqdata,"stslist")) 13 | stop("data is not a sequence object, use seqdef function to create one") 14 | 15 | if (is.null(cpal)) 16 | cpal <- attr(seqdata,"cpal") 17 | 18 | if (is.null(ltext)) 19 | ltext <- attr(seqdata,"labels") 20 | 21 | if (is.null(missing.color)) 22 | missing.color <- attr(seqdata,"missing.color") 23 | 24 | ## Adding an entry for missing in the legend 25 | nr <- attr(seqdata,"nr") 26 | 27 | if ((with.missing=="auto" && any(seqdata==nr)) || with.missing==TRUE) { 28 | cpal <- c(cpal,missing.color) 29 | ltext <- c(ltext,"missing") 30 | ## statl <- c(statl,nr) 31 | ## nbstat <- nbstat+1 32 | } 33 | 34 | oolist <- list(...) 35 | if (! "col" %in% names(oolist)) oolist[["col"]] <- cpal 36 | if (! "x" %in% names(oolist)) oolist[["x"]] <- position 37 | if (! "legend" %in% names(oolist)) oolist[["legend"]] <- ltext 38 | oolist <- c(list(cex=cex), oolist) 39 | plot(0, type= "n", axes=FALSE, xlab="", ylab="") 40 | if (boxes) { 41 | #legend(position, fill=cpal, legend=ltext, cex=cex,...) 42 | if (! "fill" %in% names(oolist)) oolist[["fill"]] <- cpal 43 | res <- do.call(legend,oolist) 44 | } else { 45 | if (! "lty" %in% names(oolist)) oolist[["lty"]] <- 1 46 | if (! "lwd" %in% names(oolist)) oolist[["lwd"]] <- 15 47 | if (! "seg.len" %in% names(oolist)) oolist[["seg.len"]] <- .4 48 | if (! "x.intersp" %in% names(oolist)) oolist[["x.intersp"]] <- 1.5 49 | res <- do.call(legend,oolist) 50 | } 51 | invisible(res) 52 | } 53 | -------------------------------------------------------------------------------- /R/seqstatd.R: -------------------------------------------------------------------------------- 1 | ## States frequency by time unit 2 | 3 | seqstatd <- function(seqdata, weighted=TRUE, with.missing=FALSE, norm=TRUE) { 4 | 5 | if (!inherits(seqdata,"stslist")) 6 | stop("data is not a sequence object, use seqdef function to create one") 7 | 8 | ## Retrieving the alphabet 9 | statl <- attr(seqdata,"alphabet") 10 | col <- cpal(seqdata) 11 | 12 | if (with.missing) { 13 | statl <- c(statl, attr(seqdata,"nr")) 14 | col <- c(col, attr(seqdata,"missing.color")) 15 | } 16 | 17 | nbstat <- length(statl) 18 | 19 | seql <- ncol(seqdata) 20 | 21 | sd <- matrix(nrow=nbstat,ncol=seql) 22 | row.names(sd) <- statl 23 | colnames(sd) <- colnames(seqdata) 24 | 25 | ## Weights 26 | weights <- attr(seqdata, "weights") 27 | 28 | if (!weighted || is.null(weights)) { 29 | weights <- rep(1.0, nrow(seqdata)) 30 | } 31 | ## Also takes into account that in unweighted sequence objects created with 32 | ## older TraMineR versions the weights attribute is a vector of 1 33 | ## instead of NULL 34 | if (all(weights==1)) 35 | weighted <- FALSE 36 | 37 | for (i in 1:nbstat) 38 | for (j in 1:seql) 39 | sd[i,j] <- sum((seqdata[,j]==statl[i])*weights) 40 | 41 | ## sd <- apply(seqdata,2,table) 42 | N <- apply(sd,2,sum) 43 | for (i in 1:seql) sd[,i] <- sd[,i]/N[i] 44 | 45 | E <- apply(sd,2,entropy) 46 | ## Maximum entropy is the entropy of the alphabet 47 | if (norm==TRUE) { 48 | E.max <- entropy(rep(1/nbstat,nbstat)) 49 | E <- E/E.max 50 | } 51 | 52 | res <- list(sd,N,E) 53 | names(res) <- c("Frequencies", "ValidStates", "Entropy") 54 | 55 | class(res) <- c("stslist.statd","list") 56 | 57 | attr(res,"nbseq") <- sum(weights) 58 | attr(res,"cpal") <- col 59 | attr(res,"xtlab") <- colnames(seqdata) 60 | attr(res,"xtstep") <- attr(seqdata,"xtstep") 61 | attr(res,"tick.last") <- attr(seqdata,"tick.last") 62 | attr(res,"weighted") <- weighted 63 | attr(res,"norm") <- norm 64 | 65 | return(res) 66 | } 67 | -------------------------------------------------------------------------------- /man/stlab.Rd: -------------------------------------------------------------------------------- 1 | \name{stlab} 2 | \alias{stlab} 3 | \alias{stlab<-} 4 | \title{Get or set the state labels of a sequence object} 5 | \description{ 6 | This function gets or sets the state labels of a sequence object, that is, the long labels used when displaying the state legend in plotting functions. 7 | } 8 | \details{The state legend is plotted either automatically by the plot functions provided for visualizing sequence objects or with the \code{\link{seqlegend}} function. A long label is associated to each state of the alphabet and displayed in the legend. The state labels are defined when creating the sequence object, either automatically using the values found in the data or by specifying a user defined vector of labels. The \code{stlab} function can be used to get or set the state labels of a previously defined sequence object.} 9 | \usage{ 10 | stlab(seqdata) 11 | stlab(seqdata) <- value 12 | } 13 | \arguments{ 14 | \item{seqdata}{a state sequence object as defined by the \code{\link{seqdef}} function.} 15 | \item{value}{a vector of character strings containing the labels, of length equal to the number of states in the alphabet. Each string is attributed to the corresponding state in the alphabet, the order being the one returned by the \code{\link{alphabet}}.} 16 | } 17 | \value{For 'stlab' a vector containing the labels. 18 | 19 | For 'stlab<-' the updated sequence object. 20 | } 21 | \seealso{ 22 | \code{\link{seqdef}} 23 | } 24 | \examples{ 25 | ## Creating a sequence object with the columns 13 to 24 26 | ## in the 'actcal' example data set 27 | ## The color palette is automatically set 28 | data(actcal) 29 | actcal.seq <- seqdef(actcal,13:24) 30 | 31 | ## Retrieving the color palette 32 | stlab(actcal.seq) 33 | seqiplot(actcal.seq) 34 | 35 | ## Changing the state labels 36 | stlab(actcal.seq) <- c("Full time","Part time (19-36 hours)", 37 | "Part time (1-18 hours)", "No work") 38 | seqiplot(actcal.seq) 39 | } 40 | \keyword{Sequence-object attributes} 41 | -------------------------------------------------------------------------------- /man/actcal.Rd: -------------------------------------------------------------------------------- 1 | \name{actcal} 2 | \docType{data} 3 | \alias{actcal} 4 | \title{Example data set: Activity calendar from the Swiss Household Panel} 5 | 6 | \description{ 7 | This data set contains 2000 individual sequences of monthly activity statuses from 8 | January to December 2000.} 9 | 10 | \details{ 11 | The data set is a subsample of the data collected by the Swiss Household Panel (SHP). 12 | 13 | The state column (variable) names are `\code{jan00}', `\code{feb00}', etc... and correspond to columns 13 to 24. 14 | 15 | There are four possible states:\cr 16 | \cr 17 | A = Full-time paid job (> 37 hours)\cr 18 | B = Long part-time paid job (19-36 hours) \cr 19 | C = Short part-time paid job (1-18 hours) \cr 20 | D = Unemployed (no work) \cr 21 | 22 | The data set contains also the following covariates:\cr 23 | 24 | \tabular{ll}{ 25 | \code{age00} \tab (age in 2000) \cr 26 | \code{educat00} \tab (education level) \cr 27 | \code{civsta00} \tab (civil status) \cr 28 | \code{nbadul00} \tab (number of adults in household) \cr 29 | \code{nbkid00} \tab (number of children) \cr 30 | \code{aoldki00} \tab (age of oldest kid) \cr 31 | \code{ayouki00} \tab (age of youngest kid) \cr 32 | \code{region00} \tab (residence region) \cr 33 | \code{com2.00} \tab (residence commune type) \cr 34 | \code{sex} \tab (sex of respondent)\cr 35 | \code{birthy} \tab (birth year) \cr 36 | } 37 | } 38 | \usage{data(actcal)} 39 | \format{A data frame with 2000 rows, 12 state variables, 1 id variable and 11 covariates.} 40 | \source{Swiss Household Panel} 41 | \references{\url{https://forscenter.ch/projects/swiss-household-panel/}} 42 | 43 | \examples{ 44 | data(actcal) 45 | # labels for plot color legends 46 | actcal.lab <- c("> 37 hours", "19-36 hours", "1-18 hours", "no work") 47 | # state sequence object of the first six sequences 48 | actcal.seq <- seqdef(actcal[1:6 ,13:24], labels=actcal.lab) 49 | 50 | print(actcal.seq) 51 | } 52 | 53 | \author{Gilbert Ritschard and Alexis Gabadinho} 54 | \keyword{Datasets} 55 | -------------------------------------------------------------------------------- /man/cpal.Rd: -------------------------------------------------------------------------------- 1 | \name{cpal} 2 | \alias{cpal} 3 | \alias{cpal<-} 4 | \title{Get or set the color palette of a sequence object} 5 | \description{ 6 | This function gets or sets the color palette of a sequence object, that is, the list of colors used to represent the states. 7 | } 8 | \details{In the plot functions provided for visualizing sequence objects, a different color is associated to each state of the alphabet. The color palette is defined when creating the sequence object, either automatically or by specifying a user defined color vector. The \code{cpal} function can be used to get or set the color palette of a previously defined sequence object.} 9 | \usage{ 10 | cpal(seqdata) 11 | cpal(seqdata) <- value 12 | } 13 | \arguments{ 14 | \item{seqdata}{a state sequence object as defined by the \code{\link{seqdef}} function.} 15 | \item{value}{a vector containing the colors, of length equal to the number of states in the alphabet. The colors can be passed as character strings representing color names such as returned by the \code{\link[grDevices]{colors}} function, as hexadecimal values or as RGB vectors using the \code{\link{rgb}} function. Each color is attributed to the corresponding state in the alphabet, the order being the one returned by the \code{\link{alphabet}}.} 16 | } 17 | \value{For '\code{cpal(seqdata)}' a vector containing the colors. 18 | 19 | For '\code{cpal(seqdata) <-}' the updated sequence object. 20 | } 21 | \seealso{ 22 | \code{\link{seqdef}} 23 | } 24 | \examples{ 25 | ## Creating a sequence object with the columns 13 to 24 26 | ## in the 'actcal' example data set 27 | ## The color palette is automatically set 28 | data(actcal) 29 | actcal.seq <- seqdef(actcal,13:24) 30 | 31 | ## Retrieving the color palette 32 | cpal(actcal.seq) 33 | seqiplot(actcal.seq) 34 | 35 | ## Setting a user defined color palette 36 | cpal(actcal.seq) <- c("blue","red", "green", "yellow") 37 | seqiplot(actcal.seq) 38 | } 39 | 40 | \author{Alexis Gabadinho} 41 | 42 | \keyword{Sequence-object attributes} 43 | -------------------------------------------------------------------------------- /man/seqfcheck.Rd: -------------------------------------------------------------------------------- 1 | \name{seqfcheck} 2 | \alias{seqfcheck} 3 | \title{Check if sequences are in the compressed or extended format} 4 | \description{Check whether \emph{seqdata} contains sequences in the compressed format (as character strings with states separated by a separator) or in the extended format (sequences stored in a matrix with each successive state in a separate column.) For a more detailed description of the compressed and extended format, see \cite{Gabadinho, 2009}. 5 | } 6 | \details{ 7 | Whether the sequence(s) are in compressed format is checked by counting the number of columns and searching for the \code{'-'} or \code{':'} separator. The function returns the separator if it has been found in the data. If the data contains more than one column, the data is supposed to be in the extended format, and \code{'X'} is returned, unless some state codes contain the \code{'-'} character (e.g., states coded with negative integer values), in which case \code{'-X'} is returned. 8 | 9 | } 10 | \usage{ 11 | seqfcheck(seqdata) 12 | } 13 | \arguments{ 14 | \item{seqdata}{a vector, data frame or matrix containing sequence data.} 15 | } 16 | \value{a character string coding the format of the sequence data, either \code{':'},\code{'-'},\code{'X'} or \code{'-X'}.} 17 | \seealso{ 18 | \code{\link{seqconc}}, \code{\link{seqdecomp}} 19 | } 20 | 21 | \references{ 22 | Gabadinho, A., G. Ritschard, M. Studer and N. S. Müller (2009). Mining Sequence Data in \code{R} with \code{TraMineR}: A user's guide. \emph{Department of Econometrics and Laboratory of Demography, University of Geneva}. 23 | } 24 | 25 | \examples{ 26 | ## The sequences in the actcal data set 27 | ## are in the extended format 28 | data(actcal) 29 | head(actcal[,13:24]) 30 | seqfcheck(actcal[,13:24]) 31 | 32 | ## The sequences in the famform data set 33 | ## are in the compressed format 34 | data(famform) 35 | famform 36 | seqfcheck(famform) 37 | } 38 | 39 | \author{Alexis Gabadinho} 40 | 41 | \keyword{Data handling} 42 | \keyword{internal} 43 | -------------------------------------------------------------------------------- /src/ffunctions.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | // TraMineR 2, Pierre-Alexandre Fonta (2017): not deleted because it used by seqLLCS() in the JSS article 5 | void cLCS(int *iseq, int *jseq , double *length, int *result) { 6 | 7 | int np,mp,max,a,b,i,j,n,m,temp; 8 | n = (int)length[1]; 9 | m = (int)length[0]; 10 | np = n + 1; 11 | mp = m + 1; 12 | // Rprintf("m = %i, n = %i \n", m, n); 13 | // int** L = (int**) malloc(np * sizeof(int*)); 14 | // for (k = 0; k< mp;k++) L[k] = (int*)malloc(mp * sizeof(int)); 15 | int L [mp][np]; 16 | //Rprintf("Creation du tableau ok"); 17 | for (i = 0; i OM 63 | ## ---------- 64 | ccosts <- seqsubm(biofam.seq, method = "CONSTANT", cval = 2) 65 | ccosts 66 | 67 | biofam.om2 <- seqdist(biofam.seq, method = "OM", indel = 1, sm = ccosts) 68 | biofam.om2[1:10, 1:10] 69 | 70 | all.equal(biofam.om2, biofam.lcs) 71 | 72 | ## --- 73 | ## DHD 74 | ## --- 75 | dhdcosts <- seqsubm(biofam.seq, method = "TRATE", time.varying = TRUE) 76 | 77 | biofam.dhd <- seqdist(biofam.seq, method = "DHD", sm = dhdcosts) 78 | 79 | 80 | ## devAskNewPage(oask) 81 | -------------------------------------------------------------------------------- /R/seqdss.R: -------------------------------------------------------------------------------- 1 | ## ======================================= 2 | ## Extracts distinct states from sequences 3 | ## ======================================= 4 | 5 | seqdss <- function(seqdata, with.missing=FALSE) { 6 | 7 | if (!inherits(seqdata,"stslist")) 8 | stop("data is NOT a sequence object, see seqdef function to create one") 9 | 10 | nbseq <- nrow(seqdata) 11 | 12 | sl <- seqlength(seqdata, with.missing=TRUE) 13 | maxsl <- max(sl) 14 | 15 | void <- attr(seqdata, "void") 16 | statl <- attr(seqdata,"alphabet") 17 | nr <- attr(seqdata, "nr") 18 | 19 | trans <- matrix(void, nrow=nbseq, ncol=maxsl) 20 | 21 | if (with.missing) { 22 | statl <- c(statl, nr) 23 | } 24 | 25 | seqdatanum <- seqasnum(seqdata, with.missing=with.missing) 26 | 27 | if (!with.missing) 28 | seqdatanum[is.na(seqdatanum)] <- -99 29 | 30 | maxcol <- 0 31 | for (i in 1:nbseq) { 32 | idx <- 1 33 | j <- 1 34 | 35 | tmpseq <- seqdatanum[i,] 36 | 37 | while (idx <= sl[i]) { 38 | iseq <- tmpseq[idx] 39 | 40 | while (idx < sl[i] & (tmpseq[idx+1]==iseq || tmpseq[idx+1]==-99)) { 41 | idx <- idx+1 42 | } 43 | 44 | ## The range of the numeric alphabet 45 | ## obtained with seqasnum is 0..n 46 | if (iseq!=-99) { 47 | trans[i,j] <- statl[(iseq+1)] 48 | j <- j+1 49 | } 50 | idx <- idx+1 51 | } 52 | if (j>maxcol) {maxcol <- j} 53 | 54 | } 55 | ## drop=FALSE ensures that the result is a matrix even if trans has only one row 56 | trans <- trans[,1:(maxcol-1), drop=FALSE] 57 | 58 | trans <- 59 | suppressMessages( 60 | seqdef(trans, alphabet=alphabet(seqdata), 61 | labels=stlab(seqdata), 62 | missing=nr, right=NA, 63 | cnames=paste("ST",seq(1:(maxcol-1)),sep=""), 64 | cpal=cpal(seqdata), 65 | id=rownames(seqdata), 66 | weights=attr(seqdata, "weights"))) 67 | 68 | return(trans) 69 | } 70 | -------------------------------------------------------------------------------- /R/seqmodst.R: -------------------------------------------------------------------------------- 1 | ## ==================== 2 | ## Modal state sequence 3 | ## ==================== 4 | 5 | seqmodst <- function(seqdata, weighted=TRUE, with.missing=FALSE) { 6 | 7 | if (!inherits(seqdata,"stslist")) 8 | stop("data is not a sequence object, see seqdef function to create one", call.=FALSE) 9 | 10 | slength <- ncol(seqdata) 11 | statl <- alphabet(seqdata) 12 | cnames <- colnames(seqdata) 13 | 14 | if (with.missing) { 15 | statl <- c(statl, attr(seqdata,"nr")) 16 | } 17 | 18 | ## State distribution 19 | freq <- seqstatd(seqdata, weighted, with.missing)$Frequencies 20 | 21 | ctype <- matrix(nrow=1, ncol=slength) 22 | stfreq <- matrix(nrow=1, ncol=slength) 23 | colnames(stfreq) <- cnames 24 | rownames(stfreq) <- "Freq." 25 | colnames(ctype) <- cnames 26 | 27 | ## Constructing the transversal modal sequence 28 | for (i in 1:slength) { 29 | smax <- which(freq[,i]==max(freq[,i]))[1] 30 | stfreq[,i] <- freq[smax,i] 31 | ctype[,i] <- statl[smax] 32 | } 33 | 34 | res <- suppressMessages(seqdef(ctype, alphabet=alphabet(seqdata), 35 | missing=attr(seqdata,"nr"), nr=attr(seqdata,"nr"), 36 | left=NA, gaps=NA, right=NA, 37 | labels=stlab(seqdata), 38 | cpal=cpal(seqdata), missing.color=attr(seqdata,"missing.color"), 39 | xtstep=attr(seqdata, "xtstep"), 40 | tick.last=attr(seqdata, "tick.last"))) 41 | 42 | nbocc <- length(seqfind(res, seqdata)) 43 | 44 | ## Distance to modal state sequence 45 | ## if (dist) 46 | ## dist.modst <- seqdist(seqdata, refseq=res, ...) 47 | ## else 48 | ## dist.modst <- NULL 49 | 50 | class(res) <- c("stslist.modst", class(res)) 51 | 52 | attr(res, "Frequencies") <- stfreq 53 | ## attr(res, "Distances") <- dist.modst 54 | attr(res, "Occurrences") <- nbocc 55 | 56 | ## Weights 57 | weights <- attr(seqdata, "weights") 58 | 59 | if (!weighted || is.null(weights)) { 60 | weights <- rep(1.0, nrow(seqdata)) 61 | } 62 | if (all(weights==1)) 63 | weighted <- FALSE 64 | 65 | attr(res, "nbseq") <- sum(weights) 66 | attr(res,"weighted") <- weighted 67 | 68 | return(res) 69 | } 70 | -------------------------------------------------------------------------------- /man/seqecontain.Rd: -------------------------------------------------------------------------------- 1 | \name{seqecontain} 2 | \alias{seqecontain} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{Check if sequence contains events} 5 | \description{ 6 | Check if an event sequence or subsequence contains given events 7 | } 8 | \usage{ 9 | seqecontain(eseq, event.list, unknown.exclude = FALSE, 10 | seq, eventList, exclude) 11 | } 12 | %- maybe also 'usage' for other objects documented here. 13 | \arguments{ 14 | \item{eseq}{A event sequence object (\code{seqelist}) or a an event subsequence object (\code{subseqelist})} 15 | \item{event.list}{A list of events} 16 | \item{unknown.exclude}{if \code{TRUE} the search is exclusive and returns \code{FALSE} for any subsequence containing an event that is not in \code{event.list}} 17 | \item{seq}{Deprecated. Use \code{eseq} instead.} 18 | \item{eventList}{Deprecated. Use \code{event.list} instead.} 19 | \item{exclude}{Deprecated. Use \code{unknown.exclude} instead.} 20 | } 21 | \details{ 22 | Checks, for each provided event sequence, if it contains one of the events in \code{event.list}. 23 | If \code{unknown.exclude} is \code{TRUE}, \code{seqecontain} looks if all events of the subsequence are in \code{event.list}. 24 | } 25 | \value{ 26 | A logical vector. 27 | } 28 | \seealso{ 29 | \code{\link{seqecreate}} for creating event sequence objects and \code{\link{seqefsub}} for creating event subsequence objects. 30 | } 31 | \examples{ 32 | data(actcal.tse) 33 | actcal.eseq <- seqecreate(actcal.tse) 34 | 35 | ##Searching for frequent subsequences, that is appearing at least 20 times 36 | fsubseq <- seqefsub(actcal.eseq,min.support=20) 37 | 38 | ##looking for subsequence with FullTime 39 | seqecontain(fsubseq,c("FullTime")) 40 | 41 | } 42 | % Add one or more standard keywords, see file 'KEYWORDS' in the 43 | % R documentation directory. 44 | %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line 45 | 46 | \author{Matthias Studer (with Gilbert Ritschard for the help page)} 47 | % Add one or more standard keywords, see file 'KEYWORDS' in the 48 | % R documentation directory. 49 | \keyword{Event sequences} 50 | -------------------------------------------------------------------------------- /man/seqstatf.Rd: -------------------------------------------------------------------------------- 1 | \name{seqstatf} 2 | \alias{seqstatf} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{State frequencies in the whole sequence data set} 5 | \description{ 6 | Overall frequency of each state of the alphabet in the state sequence object. 7 | } 8 | \usage{ 9 | seqstatf(seqdata, weighted = TRUE, with.missing=FALSE) 10 | } 11 | %- maybe also 'usage' for other objects documented here. 12 | \arguments{ 13 | \item{seqdata}{a sequence object as defined by the \code{\link{seqdef}} function.} 14 | \item{weighted}{Logical. Should frequencies account for weights when present in the state sequence object 15 | (see \code{\link{seqdef}}). Default is \code{TRUE}.} 16 | \item{with.missing}{Logical. Should non void missing states be treated as regular values? Default is \code{FALSE}.} 17 | } 18 | \details{ 19 | The \code{seqstatf} function computes the (weighted) count and frequency of each state of the alphabet in \code{seqdata}, i.e., the (weighted) sum of the occurrences of a state in \code{seqdata}. 20 | } 21 | \value{ 22 | A data frame with as many rows as states in the alphabet and two columns, one for the count (Freq) and one for the percentage frequencies (Percent). 23 | } 24 | 25 | \seealso{ 26 | \code{\link{seqstatd}} for the state distribution by time point (position), \code{\link{seqistatd}} for the state distribution within each sequence. 27 | } 28 | 29 | \examples{ 30 | ## Creating a sequence object from the actcal data set 31 | data(actcal) 32 | actcal.lab <- c("> 37 hours", "19-36 hours", "1-18 hours", "no work") 33 | actcal.seq <- seqdef(actcal, 13:24, labels=actcal.lab) 34 | 35 | ## States frequencies 36 | seqstatf(actcal.seq) 37 | 38 | ## Example with weights 39 | data(ex1) 40 | ex1.seq <- seqdef(ex1, 1:13, weights=ex1$weights) 41 | 42 | ## Unweighted 43 | seqstatf(ex1.seq, weighted=FALSE) 44 | 45 | ## Weighted 46 | seqstatf(ex1.seq, weighted=TRUE) 47 | } 48 | 49 | \author{Alexis Gabadinho} 50 | 51 | % Add one or more standard keywords, see file 'KEYWORDS' in the 52 | % R documentation directory. 53 | \keyword{State sequences} 54 | \keyword{Global characteristics} 55 | 56 | -------------------------------------------------------------------------------- /man/seqsubsn.Rd: -------------------------------------------------------------------------------- 1 | \name{seqsubsn} 2 | \alias{seqsubsn} 3 | \title{Number of distinct subsequences in a sequence.} 4 | \description{ 5 | Computes the number of distinct subsequences in a sequence using Elzinga's algorithm. 6 | } 7 | \details{ 8 | The function first searches for missing states in the sequences and if found, adds the missing state to the alphabet for the extraction of the distinct subsequences. A missing state in a sequence is considered as the occurrence of an additional symbol of the alphabet, and two or more consecutive missing states are considered as two or more occurrences of the same state. The \code{with.missing=TRUE} argument is used for calling the \code{\link{seqdss}} function when \code{DSS=TRUE}. 9 | } 10 | \usage{ 11 | seqsubsn(seqdata, DSS=TRUE, with.missing=FALSE) 12 | } 13 | \arguments{ 14 | \item{seqdata}{a state sequence object as defined by the \code{\link{seqdef}} function.} 15 | 16 | \item{DSS}{if \code{TRUE}, the sequences of Distinct Successive States (DSS, see \code{\link{seqdss}}) are first extracted (e.g., the DSS contained in \code{'D-D-D-D-A-A-A-A-A-A-A-D'} is \code{'D-A-D'}), and the number of distinct subsequences in the DSS is computed. If \code{FALSE}, the number of distinct subsequences is computed from sequences as they appear in the input sequence object. Hence the number of distinct subsequences is in most cases much higher with the \code{DSS=FALSE} option.} 17 | 18 | \item{with.missing}{logical: should non-void missing values be treated as a regular state? 19 | If \code{FALSE} (default) missing values are ignored.} 20 | 21 | } 22 | \value{Vector with the number of distinct subsequences for each sequence in the input state sequence object.} 23 | \seealso{ 24 | \code{\link{seqdss}}. 25 | } 26 | \examples{ 27 | data(actcal) 28 | actcal.seq <- seqdef(actcal,13:24) 29 | 30 | ## Number of subsequences with DSS=TRUE 31 | seqsubsn(actcal.seq[1:10,]) 32 | 33 | ## Number of subsequences with DSS=FALSE 34 | seqsubsn(actcal.seq[1:10,],DSS=FALSE) 35 | } 36 | 37 | \author{Alexis Gabadinho (with Gilbert Ritschard for the help page)} 38 | 39 | \keyword{Longitudinal characteristics} 40 | -------------------------------------------------------------------------------- /man/seqdss.Rd: -------------------------------------------------------------------------------- 1 | \name{seqdss} 2 | \alias{seqdss} 3 | \title{Extract sequences of distinct successive states} 4 | \description{ 5 | Extract the sequence of distinct successive states from each sequence in a object. 6 | } 7 | \details{ 8 | Returns a sequence object containing the sequences of distinct successive states (DSS). The spell durations are not taken into account. E.g., the DSS contained in \code{'D-D-D-D-A-A-A-A-A-A-A-D'} is \code{'D-A-D'}. Associated durations can be extracted with the \code{\link{seqdur}} function. 9 | 10 | When \code{{with.missing=TRUE}}, non-void missing values are considered as a regular state of the alphabet. For example, the DSS of \code{A-A-*-*-*-B-B-C-C-D} is \code{A-*-B-C-D}. 11 | 12 | When \code{with.missing=FALSE} (default) missing values are ignored and a substring \code{A-A-*-*-*A} for example will be considered as a single spell in \code{A} while the DSS of this substring would be \code{A-*-A} whith \code{with.missing=TRUE}. 13 | 14 | See \link{seqdef} on options for handling missing values when creating sequence objects. 15 | 16 | } 17 | \usage{ 18 | seqdss(seqdata, with.missing=FALSE) 19 | } 20 | \arguments{ 21 | \item{seqdata}{a sequence object as defined by the \code{\link{seqdef}} function.} 22 | \item{with.missing}{Should non-void missing values be considered as regular states? See Details.} 23 | } 24 | \value{a sequence object containing the distinct state sequence (DSS) for each sequence in the object given as argument. } 25 | \seealso{ 26 | \code{\link{seqdur}}. 27 | } 28 | \examples{ 29 | ## Creating a sequence object from columns 13 to 24 30 | ## in the 'actcal' example data set 31 | ## Here we retain the first 10 sequences only. 32 | data(actcal) 33 | actcal.seq <- seqdef(actcal[1:10,13:24]) 34 | 35 | ## Retrieving the DSS 36 | actcal.dss <- seqdss(actcal.seq) 37 | 38 | ## Displaying the DSS for the first 10 sequences 39 | actcal.dss 40 | 41 | ## Example with with.missing argument 42 | data(ex1) 43 | ex1.seq <- seqdef(ex1[, 1:13]) 44 | 45 | seqdss(ex1.seq) 46 | seqdss(ex1.seq, with.missing=TRUE) 47 | } 48 | \author{Alexis Gabadinho and Gilbert Ritschard} 49 | 50 | \keyword{Longitudinal characteristics} 51 | -------------------------------------------------------------------------------- /src/chisq.cpp: -------------------------------------------------------------------------------- 1 | #include "TraMineR.h" 2 | 3 | 4 | 5 | // Getting a element by its names 6 | 7 | 8 | extern "C" { 9 | 10 | SEXP tmrChisq(SEXP ChiTableS, SEXP tdimS, SEXP margeS) { 11 | TMRLOG(5, "tmrChisq\n"); 12 | SEXP distS; 13 | int n = INTEGER(tdimS)[0]; 14 | int n1 = n-1; 15 | PROTECT(distS=Rf_allocVector(REALSXP, n*(n-1)/2)); 16 | double * dist=REAL(distS); 17 | int col =INTEGER(tdimS)[1]; 18 | double * chitable=REAL(ChiTableS); 19 | double * marge=REAL(margeS); 20 | 21 | for(int i=0; i< n1; i++){ 22 | int base_index= TMRDISTINDEX(i+1, 1, n); 23 | for(int j = i+1; j < n; j++){ 24 | 25 | double cmpres=0; 26 | for(int c=0; c < col;c++){ 27 | double diff=chitable[MINDICE(i, c, n)]-chitable[MINDICE(j, c, n)]; 28 | cmpres += diff*diff/marge[c]; 29 | } 30 | dist[base_index+j] = sqrt(cmpres); 31 | } 32 | } 33 | UNPROTECT(1); 34 | return distS; 35 | } 36 | 37 | SEXP tmrChisqRef(SEXP ChiTableS, SEXP tdimS, SEXP margeS, SEXP refid) { 38 | TMRLOG(5, "tmrChisq\n"); 39 | SEXP distS; 40 | int n = INTEGER(tdimS)[0]; 41 | int nseq = n; 42 | int na = n; 43 | //int rseq= INTEGER(refid)[0]-1; 44 | int rseq1= INTEGER(refid)[0]; 45 | int rseq2= INTEGER(refid)[1]; 46 | if (rseq1 < rseq2) { 47 | nseq = rseq1; 48 | na = nseq * (rseq2 - rseq1); 49 | } else { 50 | rseq1 = rseq1 - 1; 51 | } 52 | //int n1 = n-1; 53 | PROTECT(distS=Rf_allocVector(REALSXP, na)); 54 | double * dist=REAL(distS); 55 | int col =INTEGER(tdimS)[1]; 56 | double * chitable=REAL(ChiTableS); 57 | double * marge=REAL(margeS); 58 | 59 | for (int rseq=rseq1; rseq",colnames(tevent)[i],sep="")]] <- ll 35 | } else { 36 | levent[[paste(rownames(tevent)[j],">",colnames(tevent)[i],sep="")]] <- as.character(tevent[j,i]) 37 | } 38 | } 39 | } 40 | for (i in 1:nseq) { 41 | ## First status=> entrance event (diagonal of tevent) 42 | s1 <- seqdata[i,1] 43 | e1 <- levent[[paste(s1,">",s1,sep="")]] 44 | 45 | if (!is.null(e1) && !is.na(e1[1])) { ## if NA, we don't generate an event 46 | for (k in e1){ 47 | ids[myi] <- id[i] 48 | times[myi] <- 0 49 | events[myi] <- k 50 | myi <- myi+1 51 | } 52 | } #end if 53 | ## Rest of the sequence 54 | for (j in 1:(slength-1)) { 55 | s1 <- seqdata[i,j] ## Status at time t 56 | s2 <- seqdata[i,j+1] ## Status at time t+1 57 | 58 | if (!is.na(s1) && !is.na(s2) && s2!=s1 && s1!="" && s2!="") { 59 | ev <- levent[[paste(s1,">",s2,sep="")]] 60 | if(!is.na(ev[1])){ 61 | for (k in ev){ 62 | ids[myi] <- id[i] 63 | times[myi] <- j 64 | events[myi] <- k 65 | myi <- myi+1 66 | } 67 | } 68 | } 69 | } 70 | } 71 | 72 | sel <- 1:(myi-1) 73 | trans <- data.frame(id=ids[sel],time=times[sel],event=events[sel]) 74 | return(trans) 75 | } 76 | -------------------------------------------------------------------------------- /man/plot.subseqelistchisq.Rd: -------------------------------------------------------------------------------- 1 | \name{plot.subseqelistchisq} 2 | \alias{plot.subseqelistchisq} 3 | \title{Plot discriminant subsequences} 4 | \description{ 5 | Plot the result of \code{\link{seqecmpgroup}} 6 | } 7 | \usage{ 8 | \method{plot}{subseqelistchisq}(x, ylim = "uniform", rows = NA, cols = NA, 9 | resid.levels = c(0.05,0.01), 10 | cpal = brewer.pal(1 + 2 * length(resid.levels), "RdBu"), vlegend = NULL, 11 | cex.legend = 1, ptype = "freq", legend.title = NULL, 12 | with.legend = TRUE, residlevels, legendcol, legend.cex, ...) 13 | } 14 | %- maybe also 'usage' for other objects documented here. 15 | \arguments{ 16 | \item{x}{The subsequences to plot (a \code{subseqelist} object).} 17 | \item{ylim}{if \code{"uniform"} all axes have same limits.} 18 | \item{rows}{Number of graphic rows} 19 | \item{cols}{Number of graphic columns} 20 | \item{resid.levels}{Significance levels used to colorize the Pearson residual} 21 | \item{cpal}{Color palette used to color the results} 22 | \item{vlegend}{When \code{TRUE} the legend is printed vertically, when \code{FALSE} it is printed horizontally. If \code{NULL} (default) the best position will be chosen.} 23 | \item{cex.legend}{Scale parameters for text legend.} 24 | \item{ptype}{If set to \code{"resid"}, Pearson residuals are plotted instead of frequencies} 25 | \item{legend.title}{Legend title.} 26 | \item{with.legend}{Logical. Should legend be displayed?} 27 | \item{residlevels}{Deprecated. Use \code{resid.levels} instead.} 28 | \item{legendcol}{Deprecated. Use \code{vlegend} instead.} 29 | \item{legend.cex}{Deprecated. Use \code{cex.legend} instead.} 30 | \item{\dots}{Additional parameters passed to \code{\link{barplot}}} 31 | } 32 | %\details{} 33 | \value{ 34 | nothing 35 | } 36 | %\references{ ~put references to the literature/web site here ~ } 37 | %\author{ ~~who you are~~ } 38 | %\note{ ~~further notes~~ 39 | 40 | % ~Make other sections like Warning with \section{Warning }{....} ~ 41 | %} 42 | \seealso{\code{\link{seqecmpgroup}}} 43 | 44 | \author{Matthias Studer (with Gilbert Ritschard for the help page)} 45 | % Add one or more standard keywords, see file 'KEYWORDS' in the 46 | % R documentation directory. 47 | \keyword{Event sequences} 48 | \keyword{Method} 49 | -------------------------------------------------------------------------------- /man/seqibad.Rd: -------------------------------------------------------------------------------- 1 | \name{seqibad} 2 | \alias{seqibad} 3 | % 4 | \author{Gilbert Ritschard} 5 | % 6 | \title{Badness index} 7 | % 8 | \description{ 9 | Badness index of each sequence, i.e. the sum of undesirableness of each state weighted by the potential to integrate that state in the sequence. 10 | } 11 | \usage{ 12 | seqibad(seqdata, pow=1, with.missing=FALSE, ...) 13 | } 14 | \arguments{ 15 | \item{seqdata}{a state sequence object (\code{stslist}) as returned by \code{\link[TraMineR]{seqdef}}.} 16 | 17 | \item{pow}{real. Exponent applied to the position in the sequence. Higher value increase the importance of recency (see \code{\link{seqintegration}}). Default is 1.} 18 | 19 | \item{with.missing}{logical: should non-void missing values be treated as a regular state? If \code{FALSE} (default) missing values are ignored.} 20 | 21 | \item{...}{arguments such as \code{stprec} or \code{state.order} required by \code{\link{seqprecstart}} to determine/normalize the state undesirableness degrees.} 22 | 23 | } 24 | \details{ 25 | For each sequence, the badness is the sum of the undesirableness of each state weighted by the potential to integrate the state. As long as \code{pow} is strictly greater than zero, the undesirableness of states occurring at the end of the sequence get higher weights than those at the beginning. The index reaches its maximum 1 for a sequence made of a single spell in the worst state and the minimum 0 for a sequence made of a single spell is the most favorable state. 26 | 27 | } 28 | 29 | \value{ 30 | A vector with the badness index for each sequence. 31 | } 32 | 33 | \references{ 34 | Ritschard, G. (2023), "Measuring the nature of individual sequences", \emph{Sociological Methods and Research}, 52(4), 2016-2049. \doi{10.1177/00491241211036156}. 35 | } 36 | 37 | \seealso{ 38 | \code{\link{seqintegr}}, \code{\link{seqidegrad}}, \code{\link{seqprecarity}} 39 | } 40 | \examples{ 41 | data(ex1) 42 | sx <- seqdef(ex1[,1:13], right="DEL") 43 | 44 | seqibad(sx) ## using original alphabet order 45 | seqibad(sx, stprec=c(1,2,3,6)) ## user defined undesirableness values 46 | seqibad(sx, with.missing=TRUE, state.order=c('A','B','C','D')) 47 | 48 | } 49 | \keyword{Longitudinal characteristics} 50 | -------------------------------------------------------------------------------- /demo/Events.R: -------------------------------------------------------------------------------- 1 | require(grDevices); require(graphics) 2 | oask <- devAskNewPage(dev.interactive(orNone = TRUE)) 3 | 4 | library(TraMineR) 5 | data(actcal) 6 | actcal.seq <- seqdef(actcal[,13:24]) 7 | transition <- seqetm(actcal.seq, method="transition") 8 | transition[1,1:4] <- c("FullTime", "Decrease,PartTime", 9 | "Decrease,LowPartTime", "Stop") 10 | transition[2,1:4] <- c("Increase,FullTime", "PartTime", 11 | "Decrease,LowPartTime", "Stop") 12 | transition[3,1:4] <- c("Increase,FullTime", "Increase,PartTime", 13 | "LowPartTime", "Stop") 14 | transition[4,1:4] <- c("Start,FullTime", "Start,PartTime", 15 | "Start,LowPartTime", "NoActivity") 16 | print(transition) 17 | 18 | actcal.eseq <- seqecreate(actcal.seq, tevent=transition) 19 | 20 | fsubseq <- seqefsub(actcal.eseq, min.support=100) 21 | msubcount <- seqeapplysub(fsubseq, method="count") 22 | #First lines... 23 | msubcount[1:9, 6:7] 24 | ## Using time constraints 25 | ## Searching subsequences starting in summer (between June and September) 26 | fsubseq <- seqefsub(actcal.eseq, min.support=10, 27 | constraint=seqeconstraint(age.min=6, age.max=9)) 28 | fsubseq[1:10] 29 | ## Searching subsequences occurring in summer (between June and September) 30 | fsubseq <- seqefsub(actcal.eseq, min.support=10, 31 | constraint=seqeconstraint(age.min=6, age.max=9, age.max.end=9)) 32 | fsubseq[1:10] 33 | ## Searching subsequences enclosed in a 6 months period 34 | ## and with a maximum gap of 2 months 35 | fsubseq <- seqefsub(actcal.eseq, min.support=10, 36 | constraint=seqeconstraint(max.gap=2, window.size=6)) 37 | fsubseq[1:10] 38 | 39 | ## Looking for frequent subsequences 40 | fsubseq <- seqefsub(actcal.eseq, pmin.support=0.01) 41 | ## Frequences of 10 first subsequences 42 | omar <- par(mar=c(5,4,4,2)+.1) 43 | plot(fsubseq[1:10], col="cyan") 44 | par(omar) 45 | ## looking for subsequence with FullTime 46 | seqecontain(fsubseq, c("FullTime")) 47 | 48 | 49 | ## Looking for the discriminating subsequences for sex 50 | sexsubseq <- seqecmpgroup(fsubseq, group=actcal$sex, 51 | method="bonferroni") 52 | ## Plotting the ten most discriminating subsequences in 2 x 4 format 53 | ## frequencies 54 | plot(sexsubseq[1:10],ptype="freq") 55 | ## residuals 56 | plot(sexsubseq[1:10],ptype="resid") 57 | 58 | devAskNewPage(oask) 59 | -------------------------------------------------------------------------------- /src/treeeventnode.h: -------------------------------------------------------------------------------- 1 | #ifndef TREEEVENTNODE_H 2 | #define TREEEVENTNODE_H 3 | #include "treeeventmap.h" 4 | #include 5 | #include 6 | #include "eventseq.h" 7 | #include "constraint.h" 8 | 9 | 10 | class TreeEventMap; 11 | class TreeEventNode { 12 | //type of the event 13 | int type; 14 | //CurrentSupport 15 | double support; 16 | //Last sequence that has incremented the support of this subsequence 17 | int lastID; 18 | //Next event at same time (but event bigger!) 19 | TreeEventMap brother; 20 | //Next event with gap 21 | TreeEventMap child; 22 | static int nodeCount; 23 | public: 24 | static int getNodeCount(); 25 | //Ctor 26 | TreeEventNode(const int& t); 27 | //Dtor 28 | virtual ~TreeEventNode(); 29 | //Ajoute une séquence et l'ensemble des sous-séquences qui la compose. Méthode récursive (dernier paramètre = paramètre interne) 30 | //void addSequence(Sequence *s,const double &maxGap,const double &windowSize); 31 | //Ajoute une séquence et l'ensemble des sous-séquences qui la compose. Méthode récursive (dernier paramètre = paramètre interne) 32 | // void addSequenceInternal(Sequence *s, SequenceEventNode * en, const double &maxGap,const double &windowSize,const double & ageMax, const double &gapConsumed, const double& currentAge, const int& k, const int¤tK); 33 | void addSequenceInternal(Sequence *s, SequenceEventNode * en, Constraint * cst, const double &gapConsumed, const double& currentAge, const int& k, const int¤tK); 34 | 35 | //Simplifie l'arbre pour enlever l'ensemble des sous-séquences qui ne satisfont pas le support minimum (nb occurrences) 36 | void simplifyTree(double minSup); 37 | //Give an overview of this tree (paramètre prof==profondeur, interne) 38 | void print(const int & prof=0, const bool& isbrother=true); 39 | //Type of this event 40 | const int& getType() { 41 | return this->type; 42 | } 43 | //Actual support of this event 44 | const double& getSupport() { 45 | return this->support; 46 | } 47 | int countSubsequence(double minSup); 48 | void getSubsequences(SEXP result,double * isupport, Sequence *s, int *index,const double &step, SEXP classname,EventDictionary * ed); 49 | void clearSupport(); 50 | }; 51 | #endif // TREEEVENTNODE_H 52 | -------------------------------------------------------------------------------- /man/seqmaintokens.Rd: -------------------------------------------------------------------------------- 1 | \name{seqmaintokens} 2 | \alias{seqmaintokens} 3 | %- Also NEED an '\alias' for EACH other topic documented here. 4 | \title{Indexes of most frequent tokens 5 | %% ~~function to do ... ~~ 6 | } 7 | \description{Extracts the indexes of the most frequent token, either the \code{k} most frequent tokens or the tokens that occur on average more than \code{mint} times. 8 | } 9 | \usage{ 10 | seqmaintokens(seqdata, k=8L, mint=NULL, ...) 11 | } 12 | \arguments{ 13 | \item{seqdata}{state sequence \code{stslist} object as produced by \code{\link{seqdef}}. 14 | } 15 | \item{k}{Integer. Number of main states. 16 | } 17 | \item{mint}{Real. Minimal mean number of occurrences per sequence. 18 | } 19 | \item{...}{Additional arguments passed to \code{\link{seqmeant}} 20 | } 21 | } 22 | 23 | \details{ 24 | When \code{mint} is \code{NULL}, indexes of the \code{k} most frequent tokens. Otherwise, indexes of tokens occurring on average more than \code{tmin} times are returned as long as their number does not exceed \code{k}. If more than \code{k}, indexes of the \code{k} most frequent are returned. 25 | } 26 | 27 | \value{ 28 | Vector of indexes of the most frequent tokens repecting order of the alphabet. 29 | } 30 | 31 | \seealso{\code{\link{seqmeant}} 32 | } 33 | 34 | %%\references{ 35 | %% ~put references to the literature/web site here ~ 36 | %%} 37 | 38 | \author{ 39 | Gilbert Ritschard 40 | } 41 | 42 | \examples{ 43 | data(biofam) 44 | b.lab <- c("Parent", 45 | "Left", 46 | "Married", 47 | "Left+Marr", 48 | "Child", 49 | "Left+Child", 50 | "Left+Marr+Child", 51 | "Divorced" 52 | ) 53 | b.short <- c("P","L","M","L+M","C","L+C","L+M+C","D") 54 | set.seed(5) 55 | cases <- sample(nrow(biofam),100) 56 | b.seq <- seqdef(biofam[cases,10:25], labels=b.lab, states=b.short, 57 | weights=biofam[cases,"wp00tbgs"]) 58 | 59 | ## Tokens occurring at least once on average 60 | alphabet(b.seq)[seqmaintokens(b.seq, mint=1)] 61 | #[1] "P" "L" "L+M" "L+M+C" 62 | 63 | ## Three more frequent tokens 64 | main.tokens <- seqmaintokens(b.seq, k=3) 65 | ## Labels of main tokens 66 | attr(b.seq,"labels")[main.tokens] 67 | #[1] "Parent" "Left" "Left+Marr+Child" 68 | ## Colors of main tokens 69 | cpal(b.seq)[main.tokens] 70 | #[1] "#7FC97F" "#BEAED4" "#BF5B17"} -------------------------------------------------------------------------------- /src/prefixtree.cpp: -------------------------------------------------------------------------------- 1 | #include "prefixtree.h" 2 | #include "eventseq.h" 3 | #include "constraint.h" 4 | //#include "subsequence.h" 5 | 6 | PrefixTree::PrefixTree() { 7 | //ctor 8 | } 9 | //dtor 10 | PrefixTree::~PrefixTree() { 11 | /*TreeEventMapIterator it; 12 | for (it = child.begin();it != child.end();it++) { 13 | delete it->second; 14 | }*/ 15 | this->child.clearAllPointers(); 16 | } 17 | 18 | //void PrefixTree::addSequence(Sequence *s,const double &maxGap,const double &windowSize, const double & ageMin, const double & ageMax,const double & ageMaxEnd, const int& k) { 19 | void PrefixTree::addSequence(Sequence *s,Constraint * cst, const int& k) { 20 | 21 | //subsequences (actually first symbol) 22 | if (!s->hasEvent())return; 23 | SequenceEventNode * e=s->getEvent(); 24 | //Iterator to search for brother and child 25 | TreeEventMapIterator it; 26 | TreeEventNode *ten=NULL; 27 | double age=0; 28 | while (e!=NULL) { 29 | age+=e->getGap(); 30 | //Only start when ageMin is reached 31 | if (age>cst->getageMaxBegin()) break; 32 | if (age>=cst->getageMinBegin()) { 33 | it=this->child.find(e->getType()); 34 | if (it!=this->child.end()) { 35 | it->second->addSequenceInternal(s,e,cst,0,age,k, 2); 36 | } else if (k==1) { //Build new node only when k==1 37 | ten=new TreeEventNode(e->getType()); 38 | this->child[e->getType()]=ten; 39 | //Rprintf("Adding event %i\n",ten->getType()); 40 | ten->addSequenceInternal(s,e,cst,0,age,k, 2); 41 | } 42 | } 43 | e=e->getNext();//Get Next element 44 | //Rprintf("Adding starting at event %i",ten->getType()); 45 | } 46 | } 47 | 48 | 49 | 50 | void PrefixTree::simplifyTree(double minSup) { 51 | 52 | this->child.simplifyTreeMap(minSup); 53 | } 54 | 55 | //Give an overview of this tree (paramètre prof==profondeur, interne) 56 | void PrefixTree::print() { 57 | this->child.print(0,true); 58 | } 59 | 60 | 61 | 62 | int PrefixTree::countSubsequence(double minSup) { 63 | return this->child.countSubsequence(minSup); 64 | } 65 | 66 | 67 | void PrefixTree::getSubsequences(SEXP result,double * support, int *index, SEXP classname,EventDictionary * ed) { 68 | this->child.getSubsequences(result,support,NULL,index,0,classname,ed); 69 | } 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /R/seqrf.R: -------------------------------------------------------------------------------- 1 | ## creating rf sequence object 2 | seqrf <- function(seqdata, diss, k=NULL, sortv="mds", weights=NULL, 3 | weighted=TRUE, 4 | grp.meth = "prop", squared = FALSE, pow = NULL){ 5 | 6 | if (is.null(weights) & weighted) weights <- attr(seqdata,"weights") 7 | if (!weighted) weights <- NULL 8 | ## computing sortv 9 | if (!is.null(sortv)) { 10 | if (length(sortv)==1){ 11 | if (sortv %in% c("from.start", "from.end")) { 12 | end <- if (sortv=="from.end") { max(seqlength(seqdata)) } else { 1 } 13 | beg <- if (sortv=="from.end") { 1 } else { max(seqlength(seqdata)) } 14 | 15 | sortv <- do.call(order, as.data.frame(seqdata)[,end:beg]) 16 | sortv <- order(sortv) 17 | } else if (sortv != "mds") { 18 | stop(call.=FALSE, "If of length one, sortv must be one of 'mds', 'from.start', and 'from.end'") 19 | } 20 | } else if (length(sortv)!=nrow(seqdata)) { 21 | stop(call.=FALSE, "sortv must contain one value for each row in the sequence object ") 22 | } else { 23 | if (is.factor(sortv)) { sortv <- as.integer(sortv) } 24 | } 25 | } 26 | rf <- dissrf(diss, k=k, sortv=sortv, weights=weights, 27 | grp.meth = grp.meth, squared = squared, pow = pow) 28 | seqtoplot <- seqdata[rf[["medoids"]],] 29 | attr(seqtoplot,"weights") <- rf[["heights"]] 30 | srf <- list(seqtoplot=seqtoplot,rf=rf) 31 | class(srf) <- c("seqrf",class(srf)) 32 | if (inherits(rf,"dissrfprop")){ 33 | class(srf) <- c("seqrfprop",class(srf)) 34 | } 35 | else 36 | class(srf) <- c("seqrfcrisp",class(srf)) 37 | 38 | return(srf) 39 | } 40 | 41 | ###### 42 | print.seqrf <- function(x, ...){ 43 | print(x[["seqtoplot"]], ...) 44 | } 45 | 46 | ##### 47 | summary.seqrf <- function(object, format="SPS", dist.idx = 1:10, ...){ 48 | #limit <- max(seqlength(seqdss(x[["seqtoplot"]]))) 49 | sry <- summary(object[["rf"]], dist.idx = dist.idx) 50 | meds <- suppressMessages(seqformat(object[["seqtoplot"]], to=format, compress=TRUE, 51 | SPS.out = list(xfix = "", sdsep = "/"))) 52 | mnames <- sub("\\..","",x=rownames(meds)) 53 | ##print(id) 54 | medoids <- data.frame(index=sry[["medoids"]],names=mnames,medoids=meds) 55 | rownames(medoids)<-NULL 56 | sry[["medoids"]] <- medoids 57 | return(sry) 58 | } 59 | -------------------------------------------------------------------------------- /R/plot.stslist.meant.R: -------------------------------------------------------------------------------- 1 | plot.stslist.meant <- function(x, cpal = NULL, ylab = NULL, yaxis = TRUE, 2 | xaxis = TRUE, cex.axis = par("cex.axis"), ylim = NULL, bar.labels = NULL, 3 | cex.barlab = cex.axis, offset.barlab = .1, cex.plot, ...) { 4 | 5 | TraMineR.check.depr.args(alist(cex.axis = cex.plot)) 6 | 7 | ## Storing the optional graphical parameters in a list 8 | glist <- list(...) 9 | parlist <- par() 10 | glist <- glist[names(glist) %in% names(parlist)] 11 | 12 | sep.ylab <- (isFALSE(yaxis) && (is.null(ylab) || !is.na(ylab))) 13 | cex.lab <- par("cex.lab") 14 | if ("cex.lab" %in% names(list(...))) cex.lab <- list(...)[["cex.lab"]] 15 | 16 | n <- attr(x,"nbseq") 17 | seql <- length(attr(x,"xtlab")) 18 | errbar <- attr(x, "se") 19 | 20 | weighted <- attr(x, "weighted") 21 | if (weighted) {wlab <- "weighted "} 22 | else {wlab <- NULL} 23 | 24 | if (is.null(ylab)) 25 | ylab <- paste("Mean time (", wlab, "n=",round(n,2),")",sep="") 26 | 27 | if (is.null(ylim)) 28 | ylim <- c(0,seql) 29 | 30 | if (is.null(cpal)) 31 | cpal <- attr(x,"cpal") 32 | 33 | if (sep.ylab) { 34 | sylab <- ylab 35 | ylab <- NA 36 | } 37 | 38 | mt <- as.vector(x[,"Mean"]) 39 | mp <- barplot(mt, 40 | ## mgp=c(2.5,0.6,0), 41 | names.arg=if (xaxis) rownames(x) else NULL, 42 | cex.names=cex.axis, 43 | cex.axis=cex.axis, 44 | col=cpal, 45 | ylim=ylim, 46 | ylab=ylab, 47 | axes=FALSE, 48 | ...) 49 | 50 | ## Plotting the axes 51 | ## axis(1, at=1:nbstat, labels=ltext, cex.axis=cex.axis) 52 | 53 | if (yaxis){ 54 | plist <- list(side=2, at=round(seq(0, max(ylim), length.out=6),0), cex.axis=cex.axis) 55 | do.call(axis, args=c(plist,glist)) 56 | #axis(2, at=round(seq(0, max(ylim), length.out=6),0), cex.axis=cex.axis, ...) 57 | } 58 | 59 | if (errbar){ 60 | se.mt <- x[,"SE"] 61 | xx <- 1:nrow(x) 62 | df=attr(x,"nbseq")-1 63 | if(df >= 1) { 64 | qt <- qt(.975, df=df) 65 | tmr.errbar(1.2*xx - .5, mt, mt-qt*se.mt, mt+qt*se.mt, add=TRUE) 66 | } 67 | else { 68 | warning(paste("Error bars not displayed because df =", df, "too small")) 69 | } 70 | } 71 | 72 | if (!is.null(bar.labels)) { 73 | text(mp, mt + offset.barlab*ylim[2], format(bar.labels), xpd = TRUE, cex = cex.barlab) 74 | 75 | } 76 | if (sep.ylab) 77 | title(ylab=sylab, line=1, cex.lab=cex.lab) 78 | 79 | } 80 | -------------------------------------------------------------------------------- /R/seqtab.R: -------------------------------------------------------------------------------- 1 | ## ========================= 2 | ## Sequences frequency table 3 | ## ========================= 4 | 5 | seqtab <- function(seqdata, idxs = 1:10, weighted = TRUE, format = "SPS", tlim) { 6 | 7 | TraMineR.check.depr.args(alist(idxs = tlim)) 8 | 9 | if (!inherits(seqdata,"stslist")) 10 | stop("data is not a sequence object, use seqdef function to create one") 11 | 12 | if(is.null(idxs) || (length(idxs)>1 && min(idxs)<1) || any(idxs<0)) 13 | stop("idxs should be a non negative integer or a strictly positive vector.") 14 | 15 | 16 | ## Eliminating empty sequences 17 | seqdata <- seqdata[rowSums(seqdata!=attr(seqdata,"nr"))!=0,] 18 | 19 | ## Weights 20 | weights <- attr(seqdata, "weights") 21 | 22 | if (!weighted || is.null(weights)) { 23 | weights <- rep(1.0, nrow(seqdata)) 24 | } 25 | ## Also takes into account that in unweighted sequence objects created with 26 | ## older TraMineR versions the weights attribute is a vector of 1 27 | ## instead of NULL 28 | if (all(weights==1)) 29 | weighted <- FALSE 30 | 31 | if (seqfcheck(seqdata)=="-X") 32 | warning("'-' character in state codes may cause invalid results") 33 | 34 | if (format=="SPS") { 35 | seqlist <- suppressMessages(seqformat(seqdata, from = "STS", to = "SPS", 36 | SPS.out = list(xfix = "", sdsep = "/"), compress = TRUE)) 37 | } 38 | else if (format=="STS") 39 | seqlist <- seqconc(seqdata) 40 | else 41 | stop("Format must be one of: STS or SPS") 42 | 43 | Freq <- tapply(weights, seqlist, sum) 44 | 45 | Freq <- sort(Freq, decreasing=TRUE) 46 | Percent <- Freq/sum(Freq)*100 47 | 48 | nbuseq <- length(Freq) 49 | 50 | if (idxs[1]==0 || max(idxs)>nbuseq) { 51 | idxs <- 1:nbuseq 52 | } 53 | 54 | ##if (nbuseq >1){ 55 | idxf <- match(names(Freq), seqlist)[idxs] 56 | res <- seqdata[idxf,] 57 | ##} 58 | ##else { 59 | ## res <- names(Freq) 60 | ##} 61 | table <- data.frame(Freq, Percent)[idxs,] 62 | 63 | ## ================================== 64 | ## DEFINING CLASS AND SOME ATTRIBUTES 65 | ## ================================== 66 | class(res) <- c("stslist.freq",class(res)) 67 | 68 | ## Setting the weights of the object equal to the frequencies 69 | attr(res, "weights") <- table$Freq 70 | 71 | attr(res,"freq") <- table 72 | attr(res,"nbseq") <- sum(weights) 73 | attr(res,"weighted") <- weighted 74 | attr(res,"idxs") <- idxs 75 | attr(res,"idxf") <- idxf 76 | attr(res,"format") <- format 77 | 78 | return(res) 79 | } 80 | -------------------------------------------------------------------------------- /man/seqtransn.Rd: -------------------------------------------------------------------------------- 1 | \name{seqtransn} 2 | \alias{seqtransn} 3 | \title{Number of transitions in a sequence} 4 | \description{ 5 | Computes the number of transitions (state changes) in each sequence of a sequence object. 6 | } 7 | \usage{ 8 | seqtransn(seqdata, with.missing=FALSE, norm=FALSE, pweight=FALSE) 9 | } 10 | \arguments{ 11 | \item{seqdata}{a state sequence object as defined by the 12 | \code{\link{seqdef}} function.} 13 | 14 | \item{with.missing}{logical: should non-void missing values be treated as a regular state? If \code{FALSE} (default) missing values are ignored.} 15 | 16 | \item{norm}{logical. If set as \code{TRUE}, the number of transitions is 17 | divided by its theoretical maximum, length of the sequence minus 18 | 1. When the length of the sequence is 1, the normalized value is set as 0.} 19 | 20 | \item{pweight}{logical. EXPERIMENTAL! If set as \code{TRUE}, return count of transitions weighted 21 | by their probability to not occur to give higher weights to rare transitions.} 22 | } 23 | 24 | \details{A transition in a sequence is a state change between 25 | time/position \eqn{t} and \eqn{t+1}. For example, the sequence 26 | \code{"A-A-A-A-B-B-A-D-D-D"} contains 3 transitions. The maximum 27 | number of transitions a sequence can contain is \eqn{\ell-1}{l-1} 28 | where \eqn{\ell}{l} is the length of the sequence. The number of 29 | transitions is obtained by subtracting 1 to the length of the sequence 30 | of distinct successive states (DSS). } 31 | 32 | \value{a one column matrix with the number of transitions 33 | in each sequence.} 34 | 35 | \references{ 36 | Gabadinho, A., G. Ritschard, N. S. Müller and M. Studer (2011). Analyzing and Visualizing State Sequences in R with TraMineR. \emph{Journal of Statistical Software} \bold{40}(4), 1-37. 37 | } 38 | 39 | \seealso{ 40 | \code{\link{seqdss}}. 41 | } 42 | 43 | \examples{ 44 | ## Creating a sequence object from columns 13 to 24 45 | ## in the 'actcal' example data set 46 | data(actcal) 47 | actcal.seq <- seqdef(actcal,13:24) 48 | 49 | ## Computing the number of transitions 50 | actcal.trans <- seqtransn(actcal.seq) 51 | 52 | ## Displaying number of transitions in the first 10 sequences 53 | actcal.trans[1:10] 54 | 55 | ## Example with with.missing argument 56 | data(ex1) 57 | ex1.seq <- seqdef(ex1, 1:13) 58 | 59 | seqtransn(ex1.seq) 60 | seqtransn(ex1.seq, with.missing=TRUE) 61 | } 62 | 63 | \author{Alexis Gabadinho (with Gilbert Ritschard for the help page)} 64 | 65 | \keyword{Longitudinal characteristics} 66 | --------------------------------------------------------------------------------