├── data
    ├── ex1.rda
    ├── ex2.rda
    ├── actcal.RData
    ├── biofam.RData
    ├── ex1.txt.gz
    ├── famform.rda
    ├── mvad.RData
    ├── bfspell.RData
    └── actcal.tse.RData
├── inst
    └── doc
    │   └── TraMineR-state-sequence.pdf
├── demo
    ├── 00Index
    ├── Seqdist.R
    └── Events.R
├── R
    ├── TraMineR-length.R
    ├── debuglevel.R
    ├── seqHtplot.R
    ├── entropy.R
    ├── seqe2tse.R
    ├── seqfplot.R
    ├── TraMineR-mscore.R
    ├── cpal.R
    ├── seqmsplot.R
    ├── seqrfplot.R
    ├── seqrplot.R
    ├── stlab.R
    ├── seqmtplot.R
    ├── seqcomp.R
    ├── seqedist.R
    ├── seqshift.R
    ├── seqdplot.R
    ├── seqiplot.R
    ├── stslist.meant-methods.R
    ├── dist2matrix.R
    ├── TraMineR-extractVer.R
    ├── seqdim.R
    ├── seqgbar.R
    ├── cpal-set.R
    ├── seqgen.R
    ├── stlab-set.R
    ├── seqpm.R
    ├── read.tda.mdist.R
    ├── seqxtract.R
    ├── seqecontain.R
    ├── checktriangleineq.R
    ├── seqfcheck.R
    ├── stslist.freq-methods.R
    ├── seqsep.R
    ├── seqfpos.R
    ├── seqfposend.R
    ├── seqlength.R
    ├── alphabet.R
    ├── seqhasmiss.R
    ├── zzz.R
    ├── seqibad.R
    ├── seqstatf.R
    ├── seqfind.R
    ├── TraMineRInternal.R
    ├── seqasnum.R
    ├── seqici.R
    ├── seqmpos.R
    ├── seqeid.R
    ├── seqLLCS.R
    ├── seqLLCP.R
    ├── seqformat-STS_to_SRS.R
    ├── seqlength-align.R
    ├── seqient.R
    ├── alphabet-set.R
    ├── stslist.modst-methods.R
    ├── seqientdiff.R
    ├── seqmaintokens.R
    ├── stslist.statd-methods.R
    ├── seqistatd.R
    ├── dissvar.R
    ├── TraMineR-check_helpers.R
    ├── seqformat-STS_to_DSS.R
    ├── seqelength.R
    ├── seqformat-SPS_to_STS.R
    ├── seqivolatility.R
    ├── seqstatl.R
    ├── seqconc.R
    ├── seqdecomp.R
    ├── seqeweight.R
    ├── seqnum.R
    ├── diss.rep-methods.R
    ├── seqtransn.R
    ├── TraMineR-legend.R
    ├── stslist.rep-methods.R
    ├── TraMineR-group.R
    ├── seqformat-STS_to_SPS.R
    ├── seqdur.R
    ├── TraMineR-trunc.R
    ├── seqmeant.R
    ├── seqprep.R
    ├── implicativestat.R
    ├── seqetm.R
    ├── subseqelist.R
    ├── seqintegr.R
    ├── seqtree.R
    ├── seqsubsn.R
    ├── seqlegend.R
    ├── seqstatd.R
    ├── seqdss.R
    ├── seqmodst.R
    ├── seqformat-STS_to_TSE.R
    ├── seqrf.R
    ├── plot.stslist.meant.R
    └── seqtab.R
├── src
    ├── distancecalculator.cpp
    ├── traminerdebug.cpp
    ├── TWEDdistance.h
    ├── OMdistance.h
    ├── DHDdistance.cpp
    ├── LCPdistance.h
    ├── LCPdistance.cpp
    ├── NMSMSTdistance.h
    ├── NMSMSTSoftdistanceII.h
    ├── DHDdistance.h
    ├── NMSMSTSoftdistance.h
    ├── NMSDURSoftdistance.h
    ├── treeeventmap.h
    ├── constraint.h
    ├── OMvdistance.h
    ├── dist2matrix.c
    ├── eventdictionary.h
    ├── prefixtree.h
    ├── distanceobject.cpp
    ├── checktriangleineq.c
    ├── OMPerdistance.h
    ├── constraint.cpp
    ├── eventdictionary.cpp
    ├── OMPerdistanceII.h
    ├── NMSdistance.h
    ├── ffunctions.c
    ├── chisq.cpp
    ├── treeeventnode.h
    └── prefixtree.cpp
└── man
    ├── read.tda.mdist.Rd
    ├── seqdim.Rd
    ├── seqsep.Rd
    ├── ex1.Rd
    ├── seqeid.Rd
    ├── seqhasmiss.Rd
    ├── seqLLCS.Rd
    ├── seqfind.Rd
    ├── seqfpos.Rd
    ├── seqcomp.Rd
    ├── seqLLCP.Rd
    ├── seqgen.Rd
    ├── seqmpos.Rd
    ├── famform.Rd
    ├── seqlength.Rd
    ├── seqlength-align.Rd
    ├── is.stslist.Rd
    ├── seqfposend.Rd
    ├── bfspell.Rd
    ├── seqdecomp.Rd
    ├── disstree-get-rules.Rd
    ├── seqpm.Rd
    ├── disstree-assign.Rd
    ├── print.stslist.Rd
    ├── plot.subseqelist.Rd
    ├── str.eseq.Rd
    ├── TraMineRInternal.Rd
    ├── actcal.tse.Rd
    ├── seqstatl.Rd
    ├── ex2.Rd
    ├── seqe.Rd
    ├── seqnum.Rd
    ├── disstreeleaf.Rd
    ├── seqdur.Rd
    ├── seqistatd.Rd
    ├── seqconc.Rd
    ├── seqelength.Rd
    ├── seqeweight.Rd
    ├── TraMineR.check.depr.args.Rd
    ├── stlab.Rd
    ├── actcal.Rd
    ├── cpal.Rd
    ├── seqfcheck.Rd
    ├── seqecontain.Rd
    ├── seqstatf.Rd
    ├── seqsubsn.Rd
    ├── seqdss.Rd
    ├── plot.subseqelistchisq.Rd
    ├── seqibad.Rd
    ├── seqmaintokens.Rd
    └── seqtransn.Rd


/data/ex1.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/ex1.rda


--------------------------------------------------------------------------------
/data/ex2.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/ex2.rda


--------------------------------------------------------------------------------
/data/actcal.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/actcal.RData


--------------------------------------------------------------------------------
/data/biofam.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/biofam.RData


--------------------------------------------------------------------------------
/data/ex1.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/ex1.txt.gz


--------------------------------------------------------------------------------
/data/famform.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/famform.rda


--------------------------------------------------------------------------------
/data/mvad.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/mvad.RData


--------------------------------------------------------------------------------
/data/bfspell.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/bfspell.RData


--------------------------------------------------------------------------------
/data/actcal.tse.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/TraMineR/HEAD/data/actcal.tse.RData


--------------------------------------------------------------------------------
/inst/doc/TraMineR-state-sequence.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cran/TraMineR/HEAD/inst/doc/TraMineR-state-sequence.pdf


--------------------------------------------------------------------------------
/demo/00Index:
--------------------------------------------------------------------------------
1 | Rendering   Examples for desribing and visualazing sequences
2 | Seqdist   Examples for computing distances between sequences
3 | Events   Examples for analysing sequences of events
4 | 
5 | 


--------------------------------------------------------------------------------
/R/TraMineR-length.R:
--------------------------------------------------------------------------------
 1 | 
 2 | TraMineR.length <- function(seq, void) {
 3 | 
 4 | 	totlength <- length(seq)
 5 | 	nvoid <- sum(seq==void, na.rm=TRUE)
 6 | 	l <- totlength - nvoid
 7 | 
 8 | 	return(l)
 9 | 
10 | }
11 | 


--------------------------------------------------------------------------------
/R/debuglevel.R:
--------------------------------------------------------------------------------
 1 | 
 2 | debuglevel <- function(level=NULL) {
 3 | 	if(is.null(level)){
 4 | 		return(.Call(C_getTraMineRDebugLevel))
 5 | 	}
 6 | 	.Call(C_setTraMineRDebugLevel,as.integer(level))
 7 | 	return(level)
 8 | 	
 9 | }
10 | 


--------------------------------------------------------------------------------
/R/seqHtplot.R:
--------------------------------------------------------------------------------
1 | ## ======================
2 | ## Plotting entropy index
3 | ## ======================
4 | 
5 | seqHtplot <- function(seqdata, group = NULL, main = "auto", ...) {
6 | 	seqplot(seqdata, group=group, type="Ht", main=main, ...)
7 | }
8 | 


--------------------------------------------------------------------------------
/R/entropy.R:
--------------------------------------------------------------------------------
 1 | ## Compute the entropy of a distribution
 2 | 
 3 | entropy <- function(distrib, base=exp(1))
 4 | 	{
 5 | 		distrib <- distrib[distrib!=0]
 6 | 		p <- distrib/sum(distrib)
 7 | 		e <- -sum(p*log(p, base=base))
 8 | 		return(e)
 9 | 	}
10 | 


--------------------------------------------------------------------------------
/R/seqe2tse.R:
--------------------------------------------------------------------------------
1 | seqe2TSE <- function(eseq){
2 | 	tse <- .Call(C_tmrseqetotse,  unlist(list(eseq)))
3 | 	ll <- levels(eseq)
4 | 	tse <- data.frame(id=tse[[1]], timestamp=tse[[2]], event=factor(tse[[3]], levels=1:length(ll), labels=ll))
5 | 	return(tse)
6 | }
7 | 


--------------------------------------------------------------------------------
/src/distancecalculator.cpp:
--------------------------------------------------------------------------------
1 | #include "distancecalculator.h"
2 | 
3 | void DistanceCalculator::finalizeDistanceCalculator(SEXP ptr){
4 | 	DistanceCalculator * sdo;
5 | 	sdo= static_cast<DistanceCalculator *>(R_ExternalPtrAddr(ptr));
6 | 	delete sdo;
7 | }
8 | 


--------------------------------------------------------------------------------
/R/seqfplot.R:
--------------------------------------------------------------------------------
1 | ## ================================
2 | ## PLot of the sequences frequency
3 | ## ================================
4 | 
5 | seqfplot <- function(seqdata, group = NULL, main = "auto", ...) {
6 | 	seqplot(seqdata, group=group, type="f", main=main, ...)
7 | }
8 | 


--------------------------------------------------------------------------------
/R/TraMineR-mscore.R:
--------------------------------------------------------------------------------
 1 | 
 2 | TraMineR.mscore <- function(seq, slength, statelist, freq) {
 3 | 	mscore <- 0
 4 | 
 5 | 	for (i in 1:slength) {
 6 | 		si <- which(seq[i]==statelist)
 7 | 		if (length(si)==1)
 8 | 			mscore <- mscore+freq[si, i]
 9 | 	}
10 | 	return(mscore)
11 | }
12 | 
13 | 


--------------------------------------------------------------------------------
/R/cpal.R:
--------------------------------------------------------------------------------
 1 | ## ================================================
 2 | ## Retrieve the color palette from a sequence object
 3 | ## ================================================
 4 | 
 5 | cpal <- function(seqdata) {
 6 | 	palette <- attr(seqdata,"cpal")
 7 | 	return(palette)
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/R/seqmsplot.R:
--------------------------------------------------------------------------------
1 | ## ====================================
2 | ## PLOT OF THE SEQUENCE OF MODAL STATES
3 | ## ====================================
4 | 
5 | seqmsplot <- function(seqdata, group = NULL, main = "auto", ...) {
6 | 	seqplot(seqdata, group=group, type="ms", main=main, ...)
7 | }
8 | 


--------------------------------------------------------------------------------
/R/seqrfplot.R:
--------------------------------------------------------------------------------
1 | ## =============================
2 | ## Plotting medoids of relative frequeny group of sequences
3 | ## =============================
4 | 
5 | seqrfplot <- function(seqdata, group = NULL, main = "auto", ...) {
6 | 	seqplot(seqdata, group=group, type="rf", main=main, ...)
7 | }
8 | 


--------------------------------------------------------------------------------
/R/seqrplot.R:
--------------------------------------------------------------------------------
1 | ## ============================================
2 | ## PLOT A REPRESENTATIVE SEQUENCE
3 | ## ============================================
4 | 
5 | seqrplot <- function(seqdata, group = NULL, main = "auto", ...) {
6 | 	seqplot(seqdata, group=group, type="r", main=main, ...)
7 | }
8 | 


--------------------------------------------------------------------------------
/R/stlab.R:
--------------------------------------------------------------------------------
 1 | ## ================================================
 2 | ## Retrieve the state labels from a sequence object
 3 | ## ================================================
 4 | 
 5 | stlab <- function(seqdata) {
 6 | 	statelab <- attr(seqdata,"labels")
 7 | 	return(statelab)
 8 | }
 9 | 
10 | 


--------------------------------------------------------------------------------
/R/seqmtplot.R:
--------------------------------------------------------------------------------
1 | ## ============================================
2 | ## PLOT OF THE STATES DISTRIBUTION BY TIME UNIT
3 | ## ============================================
4 | 
5 | seqmtplot <- function(seqdata, group = NULL, main = "auto", ...) {
6 | 	seqplot(seqdata, group=group, type="mt", main=main, ...)
7 | 
8 | }
9 | 


--------------------------------------------------------------------------------
/R/seqcomp.R:
--------------------------------------------------------------------------------
 1 | ## =======================
 2 | ## Comparing two sequences
 3 | ## =======================
 4 | 
 5 | seqcomp <- function(x, y) {
 6 | 
 7 | 	lx <- seqlength(x)		
 8 | 	ly <- seqlength(y)
 9 | 
10 | 	if (lx!=ly) 
11 | 		return(FALSE)
12 | 	else	if (sum(x[1:lx]==y[1:ly])==lx) 
13 | 		return(TRUE)
14 | 	else 
15 | 		return(FALSE)
16 | }	
17 | 


--------------------------------------------------------------------------------
/R/seqedist.R:
--------------------------------------------------------------------------------
 1 | seqedist <- function(eseq, idcost, vparam, interval=TRUE, norm=TRUE){
 2 |     norm <- as.integer(norm)
 3 |     interval <- as.integer(interval)
 4 |     return(.Call(C_tmrseqedist, eseq, idcost, vparam, norm, interval));
 5 | }
 6 | 
 7 | seqeage <- function(eseq, event.list) {
 8 | 	return(.Call(C_tmreventinseq, eseq, as.integer(event.list)))
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/traminerdebug.cpp:
--------------------------------------------------------------------------------
 1 | #include "TraMineR.h"
 2 | int TRAMINER_DEBUG_LEVEL=TRAMINER_DEBUG_LEVEL_DEFAULT;
 3 | 
 4 | extern "C"{
 5 | 	SEXP setTraMineRDebugLevel(SEXP level){
 6 | 		TRAMINER_DEBUG_LEVEL= INTEGER(level)[0];
 7 | 		return R_NilValue;
 8 | 	}
 9 | 	
10 | 	SEXP getTraMineRDebugLevel(void){
11 | 		return Rf_ScalarReal(TRAMINER_DEBUG_LEVEL);
12 | 	}
13 | }
14 | 


--------------------------------------------------------------------------------
/R/seqshift.R:
--------------------------------------------------------------------------------
 1 | ## ==========================================
 2 | ## shifting sequence
 3 | ## function used by seqformat, option to='SRS'
 4 | ## ===========================================
 5 | 
 6 | seqshift <- function (seq, nbshift) {
 7 | 	seql <- length(seq)
 8 | 	
 9 | 	sseq <- c(rep(NA,(seql-nbshift)), seq[1:nbshift])
10 | 
11 | 	return(sseq)
12 | }
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/R/seqdplot.R:
--------------------------------------------------------------------------------
 1 | ## ========================
 2 | ## State distribution plot
 3 | ## ========================
 4 | 
 5 | seqdplot <- function(seqdata, group=NULL, main="auto", ...) {
 6 | 	seqplot(seqdata, group=group, type="d", main=main, ...)
 7 | }
 8 | 
 9 | seqdHplot <- function(seqdata, group=NULL, main="auto", ...) {
10 | 	seqplot(seqdata, group=group, type="dH", main=main, ...)
11 | }
12 | 


--------------------------------------------------------------------------------
/R/seqiplot.R:
--------------------------------------------------------------------------------
 1 | ## =============================
 2 | ## Plotting individual sequences
 3 | ## =============================
 4 | 
 5 | seqiplot <- function(seqdata, group = NULL, main = "auto", ...) {
 6 | 	seqplot(seqdata, group=group, type="i", main=main, ...)
 7 | }
 8 | seqIplot <- function(seqdata, group = NULL, main = "auto", ...) {
 9 | 	seqplot(seqdata, group=group, type="I", main=main, ...)
10 | }
11 | 


--------------------------------------------------------------------------------
/R/stslist.meant-methods.R:
--------------------------------------------------------------------------------
 1 | ## ============================
 2 | ## Methods for stsmeant objects
 3 | ## ============================
 4 | 
 5 | print.stslist.meant <- function(x, digits=2, ...) {
 6 | 
 7 | 	cn <- colnames(x)
 8 | 	## Conversion for printing without attributes
 9 | 	x <- as.matrix(x[1:nrow(x),1:ncol(x)])
10 | 	colnames(x) <- cn
11 | 
12 | 	NextMethod("print", digits=digits,...)
13 | }
14 | 


--------------------------------------------------------------------------------
/R/dist2matrix.R:
--------------------------------------------------------------------------------
 1 | ###########################
 2 | ## Transform dist object to a matrix using fast and efficiant C code
 3 | ###########################
 4 | 
 5 | dist2matrix <- function(dist) {
 6 | 	if (inherits(dist, "dist")) {
 7 | 		return(.Call(C_dist2matrix, as.double(dist), attr(dist, "Size")))
 8 | 	}
 9 | 	else if (is.matrix(dist)) {
10 | 		return(dist)
11 | 	}
12 | 	stop("dist should be a matrix or a \"dist\" object")
13 | }


--------------------------------------------------------------------------------
/R/TraMineR-extractVer.R:
--------------------------------------------------------------------------------
 1 | extract.ver <- function(x) {
 2 | 
 3 | 	ver.list <- strsplit(x, "\\.")
 4 | 	ver.unit <- ver.list[[1]][1]
 5 | 	if(length(grep("-", ver.list[[1]][2]))>0) {
 6 | 		ver.dec.list <- strsplit(ver.list[[1]][2], "-")
 7 | 		ver.dec <- ver.dec.list[[1]][1]
 8 | 		ver.bug <- ver.dec.list[[1]][2]
 9 | 		return(c(ver.unit, ver.dec, ver.bug))
10 | 	}
11 | 
12 | 	ver.dec <- ver.list[[1]][2]
13 | 	return(c(ver.unit, ver.dec))
14 | }
15 | 


--------------------------------------------------------------------------------
/R/seqdim.R:
--------------------------------------------------------------------------------
 1 | ## RETURNS THE DIMENSION OF ONE OR MORE SEQUENCES
 2 | 
 3 | seqdim <- function(seqdata) {
 4 | 	## search for compressed sequences
 5 | 	format <- seqfcheck(seqdata)
 6 | 
 7 | 	if (format %in% c(":","-")) seqdata <- seqdecomp(seqdata,sep=format)
 8 | 
 9 | 	if (is.vector(seqdata)) sdim <- c(1,length(seqdata))
10 | 	else sdim <- c(nrow(seqdata),ncol(seqdata))
11 | 
12 | 	names(sdim) <- c("Rows","Columns")
13 | 
14 | 	return(sdim)
15 | 
16 | 	}
17 | 


--------------------------------------------------------------------------------
/R/seqgbar.R:
--------------------------------------------------------------------------------
 1 | ## ========================================================
 2 | ## Creates a vector of 0 and 1 used for plotting a sequence
 3 | ## ========================================================
 4 | 
 5 | seqgbar <- function(seq, statl, seql) {
 6 | 
 7 | 	nbstat <- length(statl)
 8 | 	
 9 | 	gbar <- vector("integer", seql*nbstat)
10 | 
11 | 	for (j in 1:seql)
12 | 		gbar[((j-1)*nbstat)+which(statl==seq[j])] <- 1
13 | 
14 | 	return(gbar)
15 | }
16 | 	
17 | 


--------------------------------------------------------------------------------
/R/cpal-set.R:
--------------------------------------------------------------------------------
 1 | ## ====================================
 2 | ## Function to set the color palette
 3 | ## of a sequence object
 4 | ## ==================================== 
 5 | 
 6 | "cpal<-" <- function(seqdata, value) {
 7 | 
 8 | 	if (!inherits(seqdata,"stslist")) 
 9 | 		stop("data is not a sequence object, see seqdef function to create one")
10 | 
11 | 	nbstate <- length(alphabet(seqdata))
12 | 	
13 | 	if (length(value)!=nbstate) 
14 | 		stop("number of colors must be",nbstate)
15 | 	else
16 | 		attr(seqdata,"cpal") <- value
17 | 
18 | 	seqdata
19 | } 
20 | 


--------------------------------------------------------------------------------
/R/seqgen.R:
--------------------------------------------------------------------------------
 1 | ## ============================
 2 | ## Random sequences generation
 3 | ## ============================
 4 | 
 5 | seqgen <- function(n, length, alphabet, p=NULL) {
 6 | 
 7 | 	## FROM http://www.cs.chalmers.se/~dubhashi/Courses/BioAlg/Markov%20chains%20and%20DNA%20sequence%20analysis%20in%20R.htm
 8 | 
 9 | 	m <- matrix(nrow=n, ncol=length)
10 | 
11 | 	for (i in 1:n) 
12 | 		m[i,] <- sample(alphabet, length, replace=TRUE, p)
13 | 
14 | 	colnames(m) <- paste("[",1:length,"]", sep="")
15 | 	rownames(m) <- 1:n
16 | 
17 | 	return(m)
18 | }
19 | 		
20 | 


--------------------------------------------------------------------------------
/R/stlab-set.R:
--------------------------------------------------------------------------------
 1 | ## ====================================
 2 | ## Function to set the state labels
 3 | ## of a sequence object
 4 | ## ==================================== 
 5 | 
 6 | "stlab<-" <- function(seqdata, value) {
 7 | 
 8 | 	if (!inherits(seqdata,"stslist")) 
 9 | 		stop("data is not a sequence object, see seqdef function to create one")
10 | 
11 | 	nbstate <- length(alphabet(seqdata))
12 | 	
13 | 	if (length(value)!=nbstate) 
14 | 		stop("number of labels must be",nbstate)
15 | 	else
16 | 		attr(seqdata,"labels") <- value
17 | 
18 | 	seqdata
19 | } 
20 | 


--------------------------------------------------------------------------------
/man/read.tda.mdist.Rd:
--------------------------------------------------------------------------------
 1 | \name{read.tda.mdist}
 2 | \alias{read.tda.mdist}
 3 | \title{Read a distance matrix produced by TDA.}
 4 | \description{This function reads a distance matrix produced by TDA into an R object. When computing OM distances in TDA, the output is a 'half' matrix stored in a text file as a vector.}
 5 | \usage{
 6 | read.tda.mdist(file)
 7 | }
 8 | \arguments{
 9 |   \item{file}{the path to the file containing TDA output.}
10 | }
11 | \value{a R matrix containing the distances.}
12 | %\seealso{\code{\link{}}
13 | %\examples{}
14 | \keyword{Data handling}
15 | 


--------------------------------------------------------------------------------
/R/seqpm.R:
--------------------------------------------------------------------------------
 1 | ## RECHERCHE DE SOUS-SEQUENCES
 2 | 
 3 | seqpm <- function(seqdata, pattern, sep="") {
 4 | 
 5 | 	if (!inherits(seqdata,"stslist")) {
 6 | 		stop("data is not a sequence object, use 'seqdef' function to create one")
 7 | 	}
 8 | 
 9 | 	seqdata <- seqconc(seqdata,sep=sep)
10 | 
11 | 	pm <- grep(pattern,seqdata)
12 | 	nbocc <- length(pm)
13 | 
14 | 	message(" [>] pattern ",pattern," has been found in ", nbocc," sequences")
15 | 
16 | 	res <- list(data.frame(pattern,nbocc),pm)
17 | 	names(res) <- c("MTab","MIndex")
18 | 
19 | 
20 | 	return(res)
21 | 	}
22 | 	
23 | 	
24 | 


--------------------------------------------------------------------------------
/R/read.tda.mdist.R:
--------------------------------------------------------------------------------
 1 | ## ========================================
 2 | ## Read a TDA output into a distance matrix
 3 | ## ========================================
 4 | 
 5 | read.tda.mdist <- function(file) {
 6 | 	tmp <- read.table(file=file,header=FALSE)
 7 | 	Y <- nrow(tmp)
 8 | 	d <- 1+8*Y
 9 | 	n <- (1+sqrt(d))/2
10 | 
11 | 	cat(" [>] reading a",n,"x",n,"matrix\n")
12 | 	m <- matrix(nrow=n,ncol=n)
13 | 	diag(m) <- 0
14 | 
15 | 	for (i in 1:Y) {
16 | 		m[tmp[i,"V1"],tmp[i,"V2"]] <- tmp[i,"V5"]
17 | 	}
18 | 
19 | 	m[upper.tri(m)] <- t(m)[upper.tri(m)]
20 | 
21 | 	return(m)
22 | }
23 | 		
24 | 			
25 | 


--------------------------------------------------------------------------------
/R/seqxtract.R:
--------------------------------------------------------------------------------
 1 | ## =================================
 2 | ## EXTRACT SEQUENCES FROM A DATA SET
 3 | ## =================================
 4 | 
 5 | seqxtract <- function(data, var, data.frame=FALSE) {
 6 | 
 7 |     ## tibble transformed into data frame
 8 |     if (inherits(data, "tbl_df")) data <- as.data.frame(data) 	
 9 | 	## Extracting the sequences from the data set
10 | 	if (missing(var) || is.null(var) || is.na(var[1]))
11 | 		seqdata <- data
12 | 	else
13 | 		seqdata <- subset(data,,var)
14 | 
15 | 	if (data.frame==FALSE)
16 | 		seqdata <- as.matrix(seqdata)
17 | 
18 | 	return(seqdata)
19 | 
20 | 	}
21 | 


--------------------------------------------------------------------------------
/R/seqecontain.R:
--------------------------------------------------------------------------------
 1 | seqecontain <- function(eseq, event.list, unknown.exclude = FALSE, seq,
 2 |   eventList, exclude) {
 3 | 
 4 |   TraMineR.check.depr.args(alist(eseq = seq, event.list = eventList, unknown.exclude = exclude))
 5 | 
 6 |   if(is.subseqelist(eseq))eseq <- eseq$subseq
 7 |   if(!is.seqelist(eseq))stop("eseq should be a seqelist. See help on seqecreate.")
 8 |   dict<-levels.seqelist(eseq)
 9 | 
10 |   elist<-factor(event.list,levels=dict)
11 |   if(unknown.exclude)excl=as.integer(c(1))
12 |   else excl=as.integer(c(0))
13 |   return(.Call(C_tmrsequencecontainevent, eseq, as.integer(elist), excl))
14 | }
15 | 


--------------------------------------------------------------------------------
/R/checktriangleineq.R:
--------------------------------------------------------------------------------
 1 | ## Check all possible 3 distance and check for triangle inequality consistency
 2 | 
 3 | checktriangleineq <- function(mat, warn=TRUE, indices = FALSE, tol = 1e-7) {
 4 | 	## Take care to get a matrix
 5 | 	mat <- dist2matrix(mat)
 6 | 	ind <- .Call(C_checktriangleineq, mat, as.integer(nrow(mat)), as.double(tol))
 7 | 	if(is.null(ind)){
 8 | 		return(TRUE)
 9 | 	}
10 | 	if (warn) {
11 | 		warning("At least the indices [", ind[1], ", ", ind[2], "] does not respect the triangle inequality when going through ", ind[3])
12 | 	}
13 | 	if (indices) {
14 | 		return(ind)
15 | 	}
16 | 	return(FALSE)
17 | 	
18 | }


--------------------------------------------------------------------------------
/R/seqfcheck.R:
--------------------------------------------------------------------------------
 1 | ## CHECK IF THE SEQUENCES ARE IN THE COMPRESSED FORMAT
 2 | 
 3 | seqfcheck <- function(seqdata) {
 4 | 	seqdata <- as.matrix(seqdata)
 5 | 
 6 | 	if (is.numeric(seqdata)) { 
 7 | 		if (any(seqdata<0,na.rm=TRUE)) format <- "-X"
 8 | 		else format <- "X"
 9 | 	}
10 | 	else {
11 | 		if (ncol(seqdata)==1) {
12 | 			if (length(grep("-",seqdata))>0) format <- "-"
13 | 			else if (length(grep(":",seqdata))>0) format <- ":"
14 | 			else format <- "?"
15 | 		}
16 | 		else {
17 | 			if (length(grep("-",seqdata))>0) format <- "-X"
18 | 			else format <- "X"
19 | 		}
20 | 	}
21 | 
22 | 	return(format)
23 | }
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/src/TWEDdistance.h:
--------------------------------------------------------------------------------
 1 | #ifndef TWEDDISTANCECALCULATOR_H
 2 | #define TWEDDISTANCECALCULATOR_H
 3 | #include "distancecalculator.h"
 4 | #include "OMdistance.h"
 5 | 
 6 | class TWEDdistance: public OMdistance{
 7 |   protected:
 8 |     double nu;
 9 | 	double lambda;
10 | 	
11 |   public:
12 | 	TWEDdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);
13 | 	TWEDdistance(TWEDdistance *dc);
14 | 	virtual DistanceCalculator * copy(){return new TWEDdistance(this);}
15 | 	virtual void setParameters(SEXP params);
16 |     virtual ~TWEDdistance();
17 |     virtual double distance(const int&is, const int& js);
18 | };
19 | 
20 | 
21 | #endif
22 | 


--------------------------------------------------------------------------------
/R/stslist.freq-methods.R:
--------------------------------------------------------------------------------
 1 | ## ===========================
 2 | ## Methods for stsstatd objects
 3 | ## ===========================
 4 | 
 5 | print.stslist.freq <- function(x, digits=2, ...) {
 6 | 	table <- attr(x,"freq")
 7 | 	print(table, digits=digits, ...)
 8 | }
 9 | 
10 | "[.stslist.freq" <- function(x,i,j,drop=FALSE) {
11 | 	## Column subscript are not allowed
12 | 	if (!missing(j))
13 | 		stop(" [!] Column subscripts are not allowed", call.=FALSE)
14 | 
15 | 	x <- NextMethod("[")
16 | 
17 | 	if (!missing(i)) {
18 | 		attr(x,"weights") <- attr(x,"weights")[i]
19 | 		attr(x,"freq") <- attr(x,"freq")[i,]
20 | 	}
21 | 	
22 | 	return(x)
23 |  }
24 | 


--------------------------------------------------------------------------------
/R/seqsep.R:
--------------------------------------------------------------------------------
 1 | 
 2 | seqsep <- function (seqdata,sl=1, sep="-") {
 3 | 	for (i in 1:length(seqdata)) {
 4 | 		oseq <- seqdata[i]
 5 | 		seql <- nchar(oseq)
 6 | 
 7 | 		if ((seql %% sl)==0) {
 8 | 			nbs <- seql/sl
 9 | 			if (nbs>0) subseq <- substr(oseq,1,sl)
10 | 			if (nbs>1) {
11 | 				for (j in 2:nbs) {
12 | 					start <- ((j-1)*sl)+1
13 | 					stop <- start+(sl-1)							
14 | 					subseq <- paste(subseq,sep,substr(oseq,start,stop),sep="")
15 | 					}
16 | 				}
17 | 					seqdata[i] <- subseq
18 | 				}
19 | 			else 
20 | 				stop("Number of characters does not match number of states*states length in sequence",i)
21 | 		}	
22 | 	return(seqdata)
23 | }
24 | 


--------------------------------------------------------------------------------
/R/seqfpos.R:
--------------------------------------------------------------------------------
 1 | ## ===============================================================
 2 | ## Search for the first occurence of a given element in a sequence
 3 | ## ===============================================================
 4 | 
 5 | statefpos <- function (seqdata, state) {
 6 | 	pos <- which(seqdata==state)
 7 | 	if (length(pos)==0) fpos <- NA
 8 | 	else fpos <- min(pos)
 9 | 	return(fpos)
10 | 	}
11 | 
12 | 
13 | seqfpos <- function (seqdata, state) {
14 | 
15 | 	if (!inherits(seqdata,"stslist")) {
16 | 		stop("data is not a sequence object, use 'seqdef' function to create one")
17 | 	}
18 | 
19 | 	fpos <- apply(seqdata,1,statefpos,state)
20 | 
21 | 	return(fpos)
22 | 	}
23 | 


--------------------------------------------------------------------------------
/R/seqfposend.R:
--------------------------------------------------------------------------------
 1 | seqfposend <- function(seqdata,state, with.missing=FALSE, lead=0, from.seq.start=TRUE){
 2 |   if (!inherits(seqdata,"stslist")) {
 3 |   	stop("data is not a sequence object, use 'seqdef' function to create one")
 4 |   }
 5 |   s.dss <- seqdss(seqdata, with.missing=with.missing)
 6 |   pos <- seqfpos(s.dss,state)
 7 |   s.dur <- seqdur(seqdata, with.missing)
 8 |   if (from.seq.start) {
 9 |     ## cumulated duration
10 |     s.dur <- t(apply(s.dur,1,cumsum))
11 |   }
12 |   tl <- vector(length=nrow(s.dur))
13 |   for (i in 1:nrow(s.dur)){
14 |     tl[i] <- ifelse(is.na(pos[i]),0,s.dur[i,pos[i]] + lead)
15 |   }
16 |   return(tl)
17 | }
18 | 


--------------------------------------------------------------------------------
/R/seqlength.R:
--------------------------------------------------------------------------------
 1 | ## ==================================
 2 | ## Returns a vector with the lengths
 3 | ## of the sequences in seqdata
 4 | ## ==================================
 5 | 
 6 | seqlength <- function(seqdata, with.missing=TRUE) {
 7 | 
 8 | 	if (!inherits(seqdata,"stslist"))
 9 | 		stop("data is not a sequence object, use 'seqdef' function to create one")
10 | 
11 | 	sl <- ncol(seqdata)-rowSums( seqdata==attr(seqdata,"void"), na.rm=TRUE )
12 | 
13 |   if (!with.missing)
14 | 	   sl <- sl-rowSums( seqdata==attr(seqdata,"nr"), na.rm=TRUE )
15 | 
16 | 	sl <- as.matrix(sl)
17 | 	colnames(sl) <- "Length"
18 | 	rownames(sl) <- rownames(seqdata)
19 | 
20 | 	return(sl)
21 | }
22 | 


--------------------------------------------------------------------------------
/R/alphabet.R:
--------------------------------------------------------------------------------
 1 | ## ============================================
 2 | ## Retrieve the alphabet from a sequence object
 3 | ## ============================================
 4 | 
 5 | alphabet <- function(seqdata, with.missing=FALSE) {
 6 | 
 7 | 	if (inherits(seqdata,c("stslist","PSTf"))){
 8 |     statl <- attr(seqdata,"alphabet")
 9 |     if (isTRUE(with.missing)) statl <- c(statl, attr(seqdata,"nr"))
10 |   }
11 |   else if (inherits(seqdata,"seqelist")){
12 |     statl <- levels(seqdata)
13 |   }
14 |   else {
15 | 		stop("seqdata should be a state sequence object, an event sequence object, or a suffix tree. Use seqdef or seqecreate.")
16 |   }
17 | 
18 | return(statl)
19 | }
20 | 	
21 | 


--------------------------------------------------------------------------------
/man/seqdim.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqdim}
 2 | \alias{seqdim}
 3 | \title{Dimension of a set of sequences}
 4 | \description{
 5 | Returns the number of sequences (rows) and the maximum length of a set of sequences.
 6 | }
 7 | \details{
 8 | The function will first search for separators '-' or ':' in the sequences in order to detect wether they are in the compressed or extended format.
 9 | }
10 | \usage{
11 |  seqdim(seqdata)
12 | }
13 | \arguments{
14 |   \item{seqdata}{a set of sequences.}
15 | }
16 | \value{a vector with the number of sequences and the maximum sequence length.}
17 | %\seealso{}
18 | %\examples{}
19 | 
20 | \author{Alexis Gabadinho}
21 | 
22 | \keyword{Sequence-object attributes}
23 | 


--------------------------------------------------------------------------------
/man/seqsep.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqsep}
 2 | \alias{seqsep}
 3 | \title{Adds separators to sequences stored as character string}
 4 | \description{
 5 | Adds separators to sequences stored as character string.
 6 | }
 7 | \usage{
 8 |  seqsep(seqdata, sl=1, sep="-")
 9 | }
10 | \arguments{
11 |   \item{seqdata}{a dataframe or matrix containing sequence data, as vectors of states or events.}
12 |   \item{sl}{the length of the states (the number of characters used to represent them). Default is 1.}
13 |   \item{sep}{the character used as separator. Set by default as \code{"-"}.}
14 | }
15 | \seealso{
16 |  \code{\link{seqdecomp}}.
17 | }
18 | \examples{
19 | seqsep("ABAAAAAAD")}
20 | \keyword{Data handling}
21 | 


--------------------------------------------------------------------------------
/src/OMdistance.h:
--------------------------------------------------------------------------------
 1 | #ifndef OMDISTANCECALCULATOR_H
 2 | #define OMDISTANCECALCULATOR_H
 3 | #include "distancecalculator.h"
 4 | 
 5 | class OMdistance: public DistanceCalculator{
 6 |   protected:
 7 | 	double * fmat;
 8 |     double * scost;
 9 |     int alphasize;
10 |     double indel;
11 | 	int fmatsize;
12 | 	double maxscost;
13 | 	
14 |   public:
15 | 	OMdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);
16 | 	OMdistance(OMdistance *dc);
17 | 	virtual DistanceCalculator* copy(){return new OMdistance(this);}
18 | 	virtual void setParameters(SEXP params);
19 |     virtual ~OMdistance();
20 |     virtual double distance(const int&is, const int& js);
21 | };
22 | 
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/R/seqhasmiss.R:
--------------------------------------------------------------------------------
 1 | seqhasmiss <- function(seqdata){
 2 |   if (!is.stslist(seqdata)){
 3 |     msg.stop("seqdata must be a stslist object!" )
 4 |   }
 5 |   misscode <- c(attr(seqdata,"nr"),attr(seqdata,"void"))
 6 |   nrcode <- attr(seqdata,"nr")
 7 |   voidcode <- attr(seqdata,"void")
 8 |   has.miss <- apply(seqdata,1,function(x) any(x %in% misscode))
 9 |   has.nr <- apply(seqdata,1,function(x) any(x %in% nrcode))
10 |   has.void <- apply(seqdata,1,function(x) any(x %in% voidcode))
11 |   cat("There are ",sum(has.miss)," sequences with missings\n", sum(has.nr),"have nr's,", sum(has.void), " have voids")
12 |   return(invisible(list(has.miss=has.miss,has.nr=has.nr,has.void=has.void)))
13 | }
14 | 


--------------------------------------------------------------------------------
/R/zzz.R:
--------------------------------------------------------------------------------
 1 | ## .First.lib <- function(lib, pkg) {library.dynam("TraMineR", pkg, lib)}
 2 | 
 3 | .onAttach <- function(libname, pkgname){
 4 | 	suppressWarnings(descr <- utils::packageDescription("TraMineR"))
 5 | 	Tver <- extract.ver(descr$Version)
 6 | 	if(as.numeric(Tver[2])%%2==0) {
 7 | 		state <- "stable"
 8 | 	}
 9 | 	else {
10 | 		state <- "development"
11 | 	}
12 | 	builtDate <- strsplit(strsplit(descr$Built, ";")[[1]][3], " ")[[1]][2]
13 | 	packageStartupMessage("\n",descr$Package," ", state, " version ", descr$Version, " (Built: ", builtDate, ")")
14 | 	packageStartupMessage("Website: ", descr$URL)
15 | 	packageStartupMessage("Please type 'citation(\"TraMineR\")' for citation information.\n")
16 | }
17 | 


--------------------------------------------------------------------------------
/R/seqibad.R:
--------------------------------------------------------------------------------
 1 | seqibad <- function(seqdata, pow=1, with.missing=FALSE, ...){
 2 | 	if (!inherits(seqdata, "stslist")) {
 3 |         stop("[!] seqdata is not a sequence object, see seqdef function to create one")
 4 |   }
 5 |   alph <- alphabet(seqdata, with.missing=with.missing)
 6 |   lalph <- length(alph)
 7 | 
 8 |   stprec <- suppressMessages(seqprecstart(seqdata, with.missing=with.missing, ...))
 9 |   integr <- suppressMessages(seqintegr(seqdata, with.missing=with.missing, pow=pow))
10 | 
11 |   bad <- stprec[1] * integr[,1]
12 | 
13 |   if (lalph > 1) {
14 |     for (i in 2:lalph){
15 |       bad <- bad + stprec[i] * integr[,i]
16 |     }
17 |   }
18 | 
19 |   return(bad)
20 | 
21 | }
22 | 


--------------------------------------------------------------------------------
/src/DHDdistance.cpp:
--------------------------------------------------------------------------------
 1 | #include "DHDdistance.h"
 2 | 
 3 | double DHDdistance::distance(const int&is, const int& js){
 4 | 
 5 | 
 6 |     int m=slen[is];
 7 |     int n=slen[js];
 8 |     // Computing min length
 9 |     int minimum = m;
10 |     if (n<m) minimum = n;
11 |     double cost=0;
12 |     for (int i=0;i<minimum;i++) {
13 |         cost += scost[ARINDICE(sequences[MINDICE(is,i,nseq)], sequences[MINDICE(js,i,nseq)], i, alphasize)];
14 |     }
15 |     TMRLOG(5, "DHD distance");
16 | 	
17 |     //return normalizeDistance(cost, maxdist, ml, nl);
18 | 	// GR maxdist is the sum of maximum scost per position, i.e the length weighted by max cost
19 |     return normalizeDistance(cost, maxdist, maxdist, maxdist);
20 | }
21 | 


--------------------------------------------------------------------------------
/R/seqstatf.R:
--------------------------------------------------------------------------------
 1 | ## =========================
 2 | ## Sequences frequency table
 3 | ## =========================
 4 | 
 5 | seqstatf <- function(seqdata, weighted=TRUE, with.missing=FALSE) {
 6 | 
 7 | 	if (!is.stslist(seqdata))
 8 | 		stop("data is not a sequence object, use seqdef function to create one")
 9 | 
10 | 	istatd <- suppressMessages(seqistatd(seqdata, with.missing=with.missing))
11 | 
12 | 	## Weights
13 | 	weights <- attr(seqdata, "weights")
14 | 
15 | 	if (!weighted || is.null(weights))
16 | 		weights <- rep(1, nrow(seqdata))
17 | 
18 | 	istatd <- istatd*weights
19 | 
20 | 	Freq <- colSums(istatd)
21 | 
22 | 	Percent <- Freq/sum(Freq)*100
23 | 
24 | 	res <- data.frame(Freq, Percent)
25 | 
26 | 	return(res)
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/src/LCPdistance.h:
--------------------------------------------------------------------------------
 1 | #ifndef LCPDISTANCECALCULATOR_H
 2 | #define LCPDISTANCECALCULATOR_H
 3 | #include "distancecalculator.h"
 4 | 
 5 | class LCPdistance: public DistanceCalculator{
 6 |   protected:
 7 | 	int sign;
 8 |   public:
 9 | 	LCPdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS):DistanceCalculator(normS, Ssequences, seqdim, lenS), sign(0){}
10 | 	LCPdistance(LCPdistance *dc):DistanceCalculator(dc), sign(dc->sign){}
11 | 	virtual DistanceCalculator* copy(){return new LCPdistance(this);}
12 | 	virtual void setParameters(SEXP params){
13 | 		sign = INTEGER(getListElement(params, "sign"))[0];
14 | 	}
15 |     virtual ~LCPdistance(){}
16 | 	virtual double distance(const int&is, const int& js);
17 | };
18 | 
19 | #endif
20 | 


--------------------------------------------------------------------------------
/R/seqfind.R:
--------------------------------------------------------------------------------
 1 | ## =================================
 2 | ## Find the ocurrences of a sequence
 3 | ## =================================
 4 | 
 5 | seqfind <- function(x,y) {
 6 | 
 7 | 	if (!inherits(x,"stslist")) 
 8 | 		stop("x is not a sequence object, use 'seqdef' function to create one")
 9 | 
10 | 	if (!inherits(y,"stslist")) 
11 | 		stop("y is not a sequence object, use 'seqdef' function to create one")
12 | 	
13 | 	x.void <- attr(x,"void")
14 | 	y.void <- attr(y,"void")
15 | 
16 | 	x.conc <- as.vector(seqconc(x, void=x.void))
17 | 	y.conc <- as.vector(seqconc(y, void=y.void))
18 | 
19 | 	occ <- NULL
20 | 
21 | 	for (i in 1:length(x.conc))
22 | 		occ <- c(occ,which(y.conc==x.conc[i]))
23 | 
24 | 	return(occ)
25 | }
26 | 	 
27 | 	
28 | 	
29 | 


--------------------------------------------------------------------------------
/R/TraMineRInternal.R:
--------------------------------------------------------------------------------
 1 | TraMineRInternalNodeInit <- function(...){
 2 | 	return(DTNInit(...))
 3 | }
 4 | 
 5 | TraMineRInternalSplitInit <- function(...){
 6 | 	return(DTNsplit(...))
 7 | }
 8 | 
 9 | TraMineRInternalLayout <- function(...){
10 | 	return(TraMineR.setlayout(...))
11 | }
12 | 
13 | TraMineRInternalSeqeage <- function(...){
14 | 	return(seqeage(...))
15 | }
16 | 
17 | TraMineRInternalLegend <- function(...){
18 | 	return(TraMineR.legend(...))
19 | }
20 | 
21 | TraMineRInternalSeqgbar <- function(...){
22 | 	return(seqgbar(...))
23 | }
24 | 
25 | TraMineRInternalWeightedInertiaDist <- function(diss, diss.size, is.dist, individuals, sweights, var) {
26 |   return(.Call(C_tmrWeightedInertiaDist, diss, diss.size, is.dist, individuals, sweights, var))
27 | }
28 | 


--------------------------------------------------------------------------------
/R/seqasnum.R:
--------------------------------------------------------------------------------
 1 | ## TRANSFORMATION DE SEQUENCES DE CARACTERES EN SEQUENCES NUMERIQUES
 2 | ## Returns a matrix with integer coding of the sequences
 3 | 
 4 | ## ToDo: Check if different from seqnum
 5 | ## seqnum returns an stslist object with a numeric alphabet
 6 | ## seqasnum returns a numeric matrix
 7 | 
 8 | seqasnum <- function(seqdata, with.missing=FALSE) {
 9 | 
10 | 	mnum <- matrix(NA,nrow=seqdim(seqdata)[1],ncol=seqdim(seqdata)[2])
11 | 
12 | 	rownames(mnum) <- rownames(seqdata)
13 | 	colnames(mnum) <- colnames(seqdata)
14 | 
15 | 	statl <- attr(seqdata,"alphabet")
16 | 
17 | 	if (with.missing)
18 | 		statl <- c(statl, attr(seqdata, "nr"))
19 | 
20 | 	for (i in 1:length(statl)) {
21 | 		mnum[seqdata==statl[i]] <- i-1
22 | 		}
23 | 
24 | 	return(mnum)
25 | 
26 | 	}
27 | 


--------------------------------------------------------------------------------
/R/seqici.R:
--------------------------------------------------------------------------------
 1 | ## ================
 2 | ## Complexity index
 3 | ## ================
 4 | 
 5 | seqici <- function(seqdata, with.missing=FALSE, silent=TRUE) {
 6 | 
 7 | 	if (!inherits(seqdata,"stslist"))
 8 | 		stop("data is NOT a sequence object, see seqdef function to create one")
 9 | 
10 | 	if(!silent) message(" [>] computing complexity index for ",nrow(seqdata)," sequences ...")
11 | 
12 | 	## Number of transitions
13 | 	trans <- seqtransn(seqdata, with.missing=with.missing, norm=TRUE)
14 | 
15 | 	## Longitudinal Entropy
16 | 	ient <- suppressMessages(
17 | 		seqient(seqdata, with.missing=with.missing, norm=TRUE)
18 | 	)
19 | 
20 | 	## Complexity index
21 | 	comp.index <- sqrt(trans * ient)
22 | 
23 | 	colnames(comp.index) <- "C"
24 | 
25 | 	return(comp.index)
26 | }
27 | 


--------------------------------------------------------------------------------
/R/seqmpos.R:
--------------------------------------------------------------------------------
 1 | ## ============================
 2 | ## Number of matching positions
 3 | ## ============================
 4 | 
 5 | seqmpos <- function(seq1, seq2, with.missing=FALSE) {
 6 | 
 7 | 	if (!inherits(seq1,"stslist") | !inherits(seq2,"stslist")) 
 8 | 		stop("sequences must be sequence objects")
 9 | 
10 | 	## Defining the positions to compare with logical values
11 | 	## void positions are set to FALSE and hence will not be
12 | 	## counted in the sum of matching positions
13 | 	comp1 <- seq1!=attr(seq1,"void")
14 | 	comp2 <- seq2!=attr(seq2,"void")
15 | 
16 | 	if (!with.missing) {
17 | 		comp1 <- comp1 & seq1!=attr(seq1,"nr")
18 | 		comp2 <- comp2 & seq2!=attr(seq2,"nr")
19 | 	}
20 | 
21 | 	mpos <- sum(seq1==seq2 & comp1 & comp2)
22 | 
23 | 	return(mpos)
24 | }
25 | 


--------------------------------------------------------------------------------
/src/LCPdistance.cpp:
--------------------------------------------------------------------------------
 1 | #include "LCPdistance.h"
 2 | 
 3 | double LCPdistance::distance(const int&is, const int& js){
 4 | 
 5 |     int m=slen[is];
 6 |     int n=slen[js];
 7 |     // Computing min length
 8 |     int minimum = m;
 9 |     if (n<m) minimum = n;
10 |     int i;
11 |     if (sign>0) {
12 |         i=0;
13 |         while (sequences[MINDICE(is,i,nseq)]==sequences[MINDICE(js,i,nseq)] && i<minimum) {
14 |             i++;
15 |         }
16 |     } else {
17 |         i=1;
18 |         while (sequences[MINDICE(is,(m-i),nseq)]==sequences[MINDICE(js,(n-i),nseq)] && i<=minimum) {
19 |             i++;
20 |         }
21 |         i--;
22 |     }
23 |     return normalizeDistance((double)n+(double)m-2.0*(double)i, (double)n+(double)m, (double)m, (double)n);
24 | }
25 | 


--------------------------------------------------------------------------------
/R/seqeid.R:
--------------------------------------------------------------------------------
 1 | ## ========================================
 2 | ## Return a id of given subsequences
 3 | ## ========================================
 4 | 
 5 | seqeid <- function(eseq, s) {
 6 | 
 7 |   TraMineR.check.depr.args(alist(eseq = s))
 8 | 
 9 |   tmrsequenceid.internal <- function(eseq) {
10 |     if(is.eseq(eseq)){
11 |       return(.Call(C_tmrsequencegetid, eseq))
12 |     }
13 |     return(NA)
14 |   }
15 |   #message("Event sequence analysis module is still experimental")
16 |   if(is.seqelist(eseq)){
17 |     return(as.integer(sapply(unlist(eseq),tmrsequenceid.internal)))
18 |   }else if(is.eseq(eseq)){
19 |     return(tmrsequenceid.internal(eseq))
20 |   }else{
21 |     stop("eseq should be a seqelist. See help on seqecreate.")
22 |   }
23 |   return(NA)
24 | }
25 | 


--------------------------------------------------------------------------------
/man/ex1.Rd:
--------------------------------------------------------------------------------
 1 | \name{ex1}
 2 | \docType{data}
 3 | \alias{ex1}
 4 | \title{Example data set with missing values and weights}
 5 | 
 6 | \description{
 7 | Example data set used in \code{\link{seqdef}} to demonstrate the handling of missing values and weights.
 8 | 
 9 | The state columns (variable) are named \code{'P1'} to \code{'P13'}.
10 | 
11 | The alphabet is made of four possible states: \code{A, B, C} and \code{D}.
12 | \cr
13 | 
14 | The data set contains also case weights (variable \code{weights}). The sum of the weights is 60.
15 | }
16 | \usage{data(ex1)}
17 | 
18 | \format{A data frame with 7 rows, 13 state variables, 1 weight variable.}
19 | 
20 | \source{The brain of the TraMineR package team.}
21 | \author{Gilbert Ritschard and Alexis Gabadinho}
22 | 
23 | \keyword{Datasets}
24 | 


--------------------------------------------------------------------------------
/R/seqLLCS.R:
--------------------------------------------------------------------------------
 1 | # TraMineR 2, Pierre-Alexandre Fonta (2017):
 2 | # not deleted because it used in the JSS article
 3 | 
 4 | ## ========================================
 5 | ## Length of the longest common subsequence
 6 | ## ========================================
 7 | 
 8 | seqLLCS <- function(seq1, seq2) {
 9 |   if (!inherits(seq1, "stslist") | !inherits(seq2, "stslist")) {
10 |     stop(" [!] sequences must be sequence objects")
11 |   }
12 |   a1 <- alphabet(seq1)
13 |   a2 <- alphabet(seq2)
14 |   if (length(a1) != length(a2) || any(a1 != a2)) {
15 |     stop(" [!] The alphabet of both sequences have to be same.")
16 |   }
17 |   l1 <- seqlength(seq1)
18 |   l2 <- seqlength(seq2)
19 |   result <- .C(C_cLCS, as.integer(seq1), as.integer(seq2), as.double(c(l1, l2)),
20 |     result = as.integer(0))$result
21 |   return(result)
22 | }
23 | 


--------------------------------------------------------------------------------
/R/seqLLCP.R:
--------------------------------------------------------------------------------
 1 | # TraMineR 2, Pierre-Alexandre Fonta (2017):
 2 | # not deleted because it used in the JSS article
 3 | 
 4 | ## ==================================================
 5 | ## Calculates the Length of the Longest Common Prefix
 6 | ## ==================================================
 7 | 
 8 | seqLLCP <- function(seq1, seq2) {
 9 |   if (!inherits(seq1, "stslist") | !inherits(seq2, "stslist")) {
10 |     stop(" [!] sequences must be sequence objects")
11 |   }
12 |   a1 <- alphabet(seq1)
13 |   a2 <- alphabet(seq2)
14 |   if (length(a1) != length(a2) || any(a1 != a2)) {
15 |     stop(" [!] The alphabet of both sequences have to be same.")
16 |   }
17 |   l1 <- seqlength(seq1)
18 |   l2 <- seqlength(seq2)
19 |   result <- .C(C_cLCP, as.integer(seq1), as.integer(seq2), as.double(c(l1, l2)),
20 |     result = as.integer(0))$result
21 |   return(result)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/seqeid.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqeid}
 2 | \alias{seqeid}
 3 | \title{Retrieve unique ids from an event sequence object.}
 4 | \description{
 5 |   Retrieve the unique ids from an event sequence object or from a list of event sequence object.
 6 | }
 7 | \usage{
 8 | seqeid(eseq, s)
 9 | }
10 | \arguments{
11 |   \item{eseq}{An event sequence object (as created with \code{\link{seqecreate}}) or a list of event sequence objects}
12 |   \item{s}{Deprecated. Use \code{eseq} instead.}
13 | }
14 | %\seealso{}
15 | \examples{
16 | data(actcal.tse)
17 | actcal.eseq <- seqecreate(actcal.tse)
18 | sid <- seqeid(actcal.eseq)
19 | length(sid)
20 | head(sid)
21 | }
22 | 
23 | \author{Matthias Studer (with Gilbert Ritschard for the help page)}
24 | % Add one or more standard keywords, see file 'KEYWORDS' in the
25 | % R documentation directory.
26 | \keyword{Event sequences}
27 | \keyword{Sequence-object attributes}
28 | 


--------------------------------------------------------------------------------
/man/seqhasmiss.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqhasmiss}
 2 | \alias{seqhasmiss}
 3 | \title{Count and identification of sequences with nr and void missings}
 4 | \description{The function prints the counts of sequences with included (nr) and ignored (void) missings. It returns invisibly three logical vectors with \code{TRUE} for sequences with nr or void missing (\code{has.miss}), included missing (\code{has.nr}), and ignored missing (\code{has.void}) respectively.
 5 | }
 6 | 
 7 | \usage{
 8 | seqhasmiss(seqdata)
 9 | }
10 | \arguments{
11 |  \item{seqdata}{State sequence object of class \code{stslist}.}
12 | }
13 | \value{
14 |  List of three logical indicator vectors: \code{has.miss}, \code{has.nr}, and \code{has.void}.
15 | }
16 | \examples{
17 | data(ex1)
18 | s <- seqdef(ex1[,1:12])
19 | res <- seqhasmiss(s)
20 | res$has.nr
21 | }
22 | 
23 | \author{Gilbert Ritschard}
24 | 
25 | \keyword{State sequences}
26 | 


--------------------------------------------------------------------------------
/R/seqformat-STS_to_SRS.R:
--------------------------------------------------------------------------------
 1 | # Should only be used through seqformat()
 2 | 
 3 | ## ==============================
 4 | ## Convert from STS to SRS format
 5 | ## ==============================
 6 | 
 7 | STS_to_SRS <- function(seqdata,nrep) {
 8 | 
 9 | 	nbseq <- seqdim(seqdata)[1]
10 | 	seql <- seqdim(seqdata)[2]
11 | 
12 | 	if (is.null(nrep)) nrep <- 0
13 | 
14 | 	out <- data.frame(id=1:nbseq,idx=(1+nrep),t(apply(seqdata,1,seqshift,(1+nrep))))
15 | 
16 | 	names(out) <- c("id","idx",paste("T-",seq(seql-1,1),sep=""),"T")
17 | 
18 | 	for (i in (2+nrep):seql) {
19 | 			tmp <- data.frame(id=seq(1:nbseq),idx=i,t(apply(seqdata,1,seqshift,i)))
20 | 			names(tmp) <- c("id","idx",paste("T-",seq(seql-1,1),sep=""),"T")
21 | 			out <- rbind(out,tmp)
22 | 		}
23 | 
24 | 	out <- out[order(out$id,out$idx),]
25 | 	names(out) <- c("id","idx",paste("T-",seq(seql-1,1),sep=""),"T")
26 | 
27 | 	return(out)
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/R/seqlength-align.R:
--------------------------------------------------------------------------------
 1 | seqlength.align <- function(seq.list){
 2 | 
 3 |   nsl <- length(seq.list)
 4 |   if (!is.list(seq.list))
 5 |     stop("seq.list should be a list of two or more stslist objects")
 6 | 
 7 | 
 8 |   nr <- nrow(seq.list[[1]])
 9 |   lgth <- matrix(NA,nrow=nr,ncol=nsl)
10 |   for (i in 1:nsl) {
11 |     if (!is.stslist(seq.list[[i]]))
12 |         stop("At least one element of seq.list is not a stslist object!")
13 |     if (nrow(seq.list[[i]]) != nr)
14 |         stop("Sequence objects must all have same number of sequences")
15 |     lgth[,i] <- seqlength(seq.list[[i]])
16 |   }
17 |   lgthmin <- apply(lgth,1,min)
18 |   for (i in 1:nsl) {
19 |     for (j in 1:nr) {
20 |       if (lgth[j,i] > lgthmin[j] )
21 |         seq.list[[i]][j,(lgthmin[j]+1):lgth[j,i]] <- attr(seq.list[[i]],"void")
22 |     }
23 |   }
24 | 
25 | 
26 |   return(seq.list)
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/R/seqient.R:
--------------------------------------------------------------------------------
 1 | ## =======================
 2 | ## Within Sequence Entropy
 3 | ## =======================
 4 | 
 5 | seqient <- function(seqdata, norm=TRUE, base=exp(1), with.missing=FALSE, silent=TRUE) {
 6 | 
 7 | 	if (!inherits(seqdata,"stslist"))
 8 | 		stop("data is NOT a sequence object, see seqdef function to create one")
 9 | 
10 | 	statl <- attr(seqdata,"alphabet")
11 | 
12 | 	if (with.missing) {
13 | 		statl <- c(statl, attr(seqdata,"nr"))
14 | 	}
15 | 
16 | 	if (!silent) message(" [>] computing entropy for ",nrow(seqdata)," sequences ...")
17 | 
18 | 	iseqtab <- suppressMessages(seqistatd(seqdata, with.missing=with.missing))
19 | 
20 | 	ient <- apply(iseqtab,1,entropy, base=base)
21 | 	ient <- as.matrix(ient)
22 | 	if (norm==TRUE) {
23 | 		emax <- log(length(statl))
24 | 		ient <- ient/emax
25 | 		}
26 | 
27 | 	colnames(ient) <- "Entropy"
28 | 	rownames(ient) <- rownames(seqdata)
29 | 
30 | 	return(ient)
31 | 
32 | 	}
33 | 


--------------------------------------------------------------------------------
/R/alphabet-set.R:
--------------------------------------------------------------------------------
 1 | ## ====================================
 2 | ## Function to set the alphabet
 3 | ## of a sequence object
 4 | ## ====================================
 5 | 
 6 | "alphabet<-" <- function(seqdata, value) {
 7 | 
 8 |   if (any(duplicated(value)))
 9 | 		stop("[!] duplicated element(s) in value assigned to alphabet: ",value[duplicated(value)])
10 | 
11 | 	if (!inherits(seqdata,"stslist"))
12 | 		stop("[!] data is not a sequence object, see seqdef function to create one")
13 | 
14 | 	nbstate <- length(alphabet(seqdata))
15 | 	
16 | 	if (length(value)!=nbstate)
17 | 		stop("[!] number of states must be ",nbstate)
18 | 	else {
19 | 		for (i in 1:dim(seqdata)[2])
20 | 			levels(seqdata[,i])[1:nbstate] <- value
21 | 
22 | 		if (isTRUE(all.equal(attr(seqdata,"alphabet"),attr(seqdata,"labels"))))
23 | 			attr(seqdata,"labels") <- value
24 | 
25 | 		attr(seqdata,"alphabet") <- value
26 | 	}
27 | 
28 | 	seqdata
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/src/NMSMSTdistance.h:
--------------------------------------------------------------------------------
 1 | #ifndef NMSMSTDISTANCECALCULATOR_H
 2 | #define NMSMSTDISTANCECALCULATOR_H
 3 | #include "NMSdistance.h"
 4 | 
 5 | class NMSMSTdistance: public SUBSEQdistance{
 6 |   protected:
 7 | 	double *e;
 8 | 	double *e1;
 9 | 	double *t;
10 | 	double *t1;
11 | 	int rowsize;
12 | 	double * seqdur;
13 | 	
14 |   public:
15 |   	NMSMSTdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){}
16 |   	NMSMSTdistance(NMSMSTdistance* dc);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){}
17 | 	virtual DistanceCalculator* copy(){return new NMSMSTdistance(this);}
18 | 	//NMSMSTdistance(const int& pnorm, int * psequences, const int & pnseq,  int * pslen, const int &pmaxlen, double *pseqdur);
19 |     virtual ~NMSMSTdistance();
20 | 	virtual void setParameters(SEXP params);
21 | 	void computeattr(const int&is, const int& js);
22 | };
23 | 
24 | #endif
25 | 


--------------------------------------------------------------------------------
/R/stslist.modst-methods.R:
--------------------------------------------------------------------------------
 1 | ## ===========================
 2 | ## Methods for stsstatd objects
 3 | ## ===========================
 4 | 
 5 | print.stslist.modst <- function(x, digits=2, ...) {
 6 | 
 7 | 	cat("  [Modal state sequence]\n")
 8 | 	## x <- NextMethod("print",...)
 9 | 	print.stslist(x,...)	
10 | 
11 | 	cat("\n  [State frequencies]\n")
12 | 	print(attr(x,"Frequencies"), digits=digits)
13 | }
14 | 
15 | "[.stslist.modst" <- function(x,i,j,drop=FALSE) {
16 | 	## Specialized only for column subscript
17 | 	## If one column we keep the original data.frame method
18 | 	## Otherwise we copy attributes and update "start" value
19 | 	if (!missing(i))
20 | 		stop("row subscripts not allowed!", call.=FALSE)
21 | 	
22 | 	if (!missing(j)) {
23 | 		freq <- attr(x,"Frequencies")
24 | 
25 | 		## Applying method
26 | 	     x <- NextMethod("[")
27 | 	
28 | 		## Adapting frequencies
29 | 		attr(x,"Frequencies") <- freq[j]
30 | 	}
31 | 	
32 | 	return(x)
33 |  }
34 | 


--------------------------------------------------------------------------------
/R/seqientdiff.R:
--------------------------------------------------------------------------------
 1 | ## =======================
 2 | ## Within Sequence Entropy
 3 | ## =======================
 4 | 
 5 | seqientdiff <- function(seqdata, norm=TRUE) {
 6 | 
 7 | 	if (!inherits(seqdata, "stslist"))
 8 | 		stop("data is NOT a sequence object, see seqdef function to create one")
 9 | 
10 |     entropydiff <- function(dur, norm) {
11 | 		len <- sum(dur)
12 | 		## If we don't check, the funcion returns NA (division by 0)
13 | 		ent <- entropy(dur)
14 | 		if(ent>0 && norm) {
15 | 			## The maximum entropy is when length of the DSS=length of the sequence 
16 | 			p <- 1/len
17 | 			entmax <- (-len)*(p*log(p))
18 | 			ent <- ent/entmax
19 | 		}
20 | 		return(ent)
21 |     }
22 | 	iseqtab <- seqdur(seqdata)
23 | 	iseqtab[is.na(iseqtab)] <- 0
24 | 	ient <- apply(iseqtab,1,entropydiff, norm=norm)
25 |     
26 | 	ient <- as.matrix(ient)
27 | 	colnames(ient) <- "Hdss"
28 | 	rownames(ient) <- paste("[",seq(1:length(ient)),"]",sep="")
29 | 	return(ient)
30 | }	
31 | 


--------------------------------------------------------------------------------
/R/seqmaintokens.R:
--------------------------------------------------------------------------------
 1 | seqmaintokens <- function(seqdata, k=8L, mint=NULL, ...) {
 2 | 
 3 |     if (!inherits(seqdata,"stslist")){
 4 | 		stop("data is NOT a state sequence object, see seqdef function to create one",
 5 |             call. = FALSE)
 6 | 	}
 7 |     if (!k>=1){
 8 |         stop("k must be a strictly positive integer!",
 9 |             call. = FALSE)
10 |     }
11 | 
12 |     meant <- seqmeant(seqdata, ...)
13 |     if (!is.null(mint)){
14 |         main.tokens <- which(meant >= mint)
15 |         if (length(main.tokens) > k){
16 |             meant.o <- order(meant[main.tokens], decreasing=TRUE)
17 |             main.tokens <- sort(meant.o[1:k])
18 |         }
19 |         else if (length(main.tokens) < 1)
20 |             message(" !!! No token occurs more than mint times on average!")
21 |     } else {
22 |         meant.o <- order(meant, decreasing=TRUE)
23 |         main.tokens <- sort(meant.o[1:k])
24 |     }
25 |     return(main.tokens)
26 | }
27 | 


--------------------------------------------------------------------------------
/src/NMSMSTSoftdistanceII.h:
--------------------------------------------------------------------------------
 1 | #ifndef NMSMSTSoftdistanceIICALCULATOR_H
 2 | #define NMSMSTSoftdistanceIICALCULATOR_H
 3 | #include "NMSdistance.h"
 4 | 
 5 | class NMSMSTSoftdistanceII: public SUBSEQdistance{
 6 |   protected:
 7 | 	double *e;
 8 | 	double *e1;
 9 | 	int rowsize;
10 | 	double * softmatch;
11 | 	int alphasize;
12 | 	
13 |   public:
14 |   	NMSMSTSoftdistanceII(NMSMSTSoftdistanceII *dc);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){}
15 |   	NMSMSTSoftdistanceII(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){}
16 | 	virtual DistanceCalculator* copy(){return new NMSMSTSoftdistanceII(this);}
17 | 	//NMSMSTdistance(const int& pnorm, int * psequences, const int & pnseq,  int * pslen, const int &pmaxlen, double *pseqdur);
18 |     virtual ~NMSMSTSoftdistanceII();
19 | 	virtual void setParameters(SEXP params);
20 | 	void computeattr(const int&is, const int& js);
21 | };
22 | 
23 | #endif
24 | 


--------------------------------------------------------------------------------
/R/stslist.statd-methods.R:
--------------------------------------------------------------------------------
 1 | ## ===========================
 2 | ## Methods for stsstatd objects
 3 | ## ===========================
 4 | 
 5 | print.stslist.statd <- function(x, digits=2, ...) {
 6 | 
 7 | 	## computing max length of state label
 8 | 	## to align valid states and entropy tables
 9 | 	rl <- max(nchar(rownames(x$Frequencies)))
10 | 	## width <- max(nchar(colnames(x$Frequencies)), 4)	
11 | 
12 | 	ident1 <- rep(" ",rl)
13 | 	ident2 <- paste(rep(" ",rl-1), collapse="")
14 | 
15 | 	cat(ident1,"[State frequencies]\n")
16 | 	print(x$Frequencies, digits=digits)
17 | 
18 | 	VS <- t(as.matrix(x$ValidStates))
19 | 	rownames(VS) <- paste("N",ident2,sep="")
20 | 	cat("\n", ident1,"[Valid states]\n")
21 | 	print(VS, digits=digits)
22 | 	
23 | 	H <- t(as.matrix(x$Entropy))	
24 | 	rownames(H) <- paste("H",ident2,sep="")
25 | 	cat("\n", ident1,"[Entropy index]\n")
26 | 	print(H, digits=digits)
27 | }
28 | 
29 | "[.stslist.statd" <- function(...) {
30 | 	stop(" [!] Operation not allowed", call.=FALSE)
31 | } 
32 | 


--------------------------------------------------------------------------------
/R/seqistatd.R:
--------------------------------------------------------------------------------
 1 | ## ======================================
 2 | ## State distribution for each individual
 3 | ## ======================================
 4 | 
 5 | seqistatd <- function(seqdata, with.missing=FALSE, prop=FALSE) {
 6 | 
 7 | 	if (!inherits(seqdata,"stslist")) {
 8 | 		stop("data is not a sequence object, see seqdef function to create one")
 9 | 	return()
10 | 	}
11 | 
12 | 	statl <- alphabet(seqdata)
13 | 	if (with.missing) {
14 | 		statl <- c(statl, attr(seqdata,"nr"))
15 | 	}
16 | 
17 | 	nbstat <- length(statl)
18 | 	nbseq <- nrow(seqdata)
19 | 
20 | 	iseqtab <- matrix(nrow=nbseq, ncol=nbstat)
21 | 
22 | 	colnames(iseqtab) <- statl
23 | 	rownames(iseqtab) <- rownames(seqdata)
24 | 
25 | 	message(" [>] computing state distribution for ", nbseq," sequences ...")
26 | 
27 | 	for (i in 1:nbstat) {
28 | 		iseqtab[,i] <- apply(seqdata,1,function(x) sum(x==statl[i],na.rm=TRUE))
29 | 	}
30 | 
31 | 	if (prop) {
32 | 		iseqtab <- iseqtab/rowSums(iseqtab)
33 | 	}
34 | 	
35 | 	return(iseqtab)
36 | 
37 | }	
38 | 


--------------------------------------------------------------------------------
/src/DHDdistance.h:
--------------------------------------------------------------------------------
 1 | #ifndef DHDDISTANCECALCULATOR_H
 2 | #define DHDDISTANCECALCULATOR_H
 3 | #include "distancecalculator.h"
 4 | 
 5 | class DHDdistance: public DistanceCalculator{
 6 |   protected:
 7 |     double * scost;
 8 |     int alphasize;
 9 | 	double maxdist;
10 | 	
11 |   public:
12 | 	DHDdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS)
13 | 		:DistanceCalculator(normS, Ssequences, seqdim, lenS), scost(NULL), alphasize(0), maxdist(0){}
14 | 	DHDdistance(DHDdistance *dc): DistanceCalculator(dc),  scost(dc->scost), alphasize(dc->alphasize), maxdist(dc->maxdist){}
15 | 	virtual DistanceCalculator* copy(){return new DHDdistance(this);}
16 | 	virtual void setParameters(SEXP params){
17 | 		scost = REAL(getListElement(params, "scost"));
18 | 		alphasize = INTEGER(getListElement(params, "alphasize"))[0];
19 | 		maxdist = REAL(getListElement(params, "maxdist"))[0];
20 | 	}
21 |     virtual ~DHDdistance(){}
22 |     virtual double distance(const int&is, const int& js);
23 | };
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/src/NMSMSTSoftdistance.h:
--------------------------------------------------------------------------------
 1 | #ifndef NMSMSTSOFTDISTANCECALCULATOR_H
 2 | #define NMSMSTSOFTDISTANCECALCULATOR_H
 3 | #include "NMSdistance.h"
 4 | 
 5 | class NMSMSTSoftdistance: public SUBSEQdistance{
 6 |   protected:
 7 | 	double *e;
 8 | 	double *e1;
 9 | 	double *t;
10 | 	double *t1;
11 | 	int rowsize;
12 | 	double * seqdur;
13 | 	double * softmatch;
14 | 	int alphasize;
15 | 	
16 |   public:
17 |   	NMSMSTSoftdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){}
18 |   	NMSMSTSoftdistance(NMSMSTSoftdistance *dc);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){}
19 | 	virtual DistanceCalculator* copy(){return new NMSMSTSoftdistance(this);}
20 | 	//NMSMSTdistance(const int& pnorm, int * psequences, const int & pnseq,  int * pslen, const int &pmaxlen, double *pseqdur);
21 |     virtual ~NMSMSTSoftdistance();
22 | 	virtual void setParameters(SEXP params);
23 | 	void computeattr(const int&is, const int& js);
24 | };
25 | 
26 | #endif
27 | 


--------------------------------------------------------------------------------
/R/dissvar.R:
--------------------------------------------------------------------------------
 1 | ###########################
 2 | ## Compute discrepancy
 3 | ###########################
 4 | 
 5 | dissvar <- function(diss, weights=NULL, squared=FALSE) {
 6 | 	if (squared) {
 7 | 		diss <- diss^2
 8 | 	}
 9 | 	if (is.null(weights)) {
10 | 		if (inherits(diss, "dist")) {
11 | 			return(sum(diss)/(attr(diss, "Size")^2))
12 | 		} else if (is.matrix(diss)) {
13 | 			return(sum(diss)/(2*(nrow(diss)^2)))
14 | 		} else {
15 | 			stop("diss argument should be a dist object or a dissimilarity matrix")
16 | 		}
17 | 	} 
18 | 	else {
19 | 		isdist <- inherits(diss, "dist")
20 | 		if (isdist) {
21 | 			n <- attr(diss, "Size")
22 | 		} else if (is.matrix(diss)) {
23 | 			n <- nrow(diss)
24 | 		} else {
25 | 			stop("diss argument should be a dist object or a dissimilarity matrix")
26 | 		}
27 | 		if(is.null(weights)) {
28 | 			weights <- rep(1, n)
29 | 		}
30 | 		dvar <- .Call(C_tmrWeightedInertiaDist, diss, as.integer(n), as.integer(isdist), as.integer(1:n), as.double(weights), as.integer(TRUE))
31 | 		return(dvar)
32 | 	}
33 | }


--------------------------------------------------------------------------------
/man/seqLLCS.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqLLCS}
 2 | \alias{seqLLCS}
 3 | \title{Compute the length of the longest common subsequence of two sequences}
 4 | \description{
 5 | Returns the length of the longest common subsequence of two sequences. This attribute is described in \cite{Elzinga (2008)}.
 6 | }
 7 | \usage{
 8 | seqLLCS(seq1, seq2)
 9 | }
10 | \arguments{
11 |   \item{seq1}{a sequence from a sequence object}
12 |   \item{seq2}{a sequence from a sequence object}
13 | }
14 | 
15 | \value{an integer being the length of the longest common subsequence of the two sequences.}
16 | 
17 | \references{
18 | Elzinga, Cees H. (2008). Sequence analysis: Metric representations of categorical time
19 | series. \emph{Technical Report}, Department of Social Science Research Methods, Vrije Universiteit, Amsterdam.
20 | }
21 | 
22 | \seealso{\code{\link{seqdist}}}
23 | 
24 | \examples{
25 | LCS.ex <- c("S-U-S-M-S-U", "U-S-SC-MC", "S-U-M-S-SC-UC-MC")
26 | LCS.ex <- seqdef(LCS.ex)
27 | seqLLCS(LCS.ex[1,],LCS.ex[3,])
28 | }
29 | \keyword{Dissimilarity measures}


--------------------------------------------------------------------------------
/man/seqfind.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqfind}
 2 | \alias{seqfind}
 3 | \title{Indexes of state sequence(s) x in state sequence object y}
 4 | \description{
 5 | Finds the row indexes of state sequence(s) x in the state sequence object y. 
 6 | }
 7 | \usage{
 8 |  seqfind(x, y)
 9 | }
10 | \arguments{
11 |   \item{x}{a state sequence object containing one or more sequences (\code{\link{seqdef}}).}
12 |   \item{y}{a state sequence object.}
13 | }
14 | \seealso{
15 |  \code{}.
16 | }
17 | \value{row index(es) of sequence(s) x in the set of sequences y.}
18 | 
19 | \examples{
20 | data(mvad)
21 | mvad.shortlab <- c("EM", "FE", "HE", "JL", "SC", "TR")
22 | mvad.seq <- seqdef(mvad, states=mvad.shortlab, 15:86)
23 | 
24 | ## Finding occurrences of sequence 176 in mvad.seq
25 | seqfind(mvad.seq[176,],mvad.seq)
26 | 
27 | ## Finding occurrences of sequence 1 to 8 in mvad.seq
28 | seqfind(mvad.seq[1:8,],mvad.seq)
29 | }
30 | 
31 | \author{Alexis Gabadinho (with Gilbert Ritschard for the help page)}
32 | 
33 | \keyword{Data handling}
34 | \keyword{State sequences} 
35 | 


--------------------------------------------------------------------------------
/R/TraMineR-check_helpers.R:
--------------------------------------------------------------------------------
 1 | # Check if an object is a valid data frame or matrix index.
 2 | #
 3 | # Otherwise, an error is raised and an information message is displayed.
 4 | #
 5 | # @param x     Object to test.
 6 | #        name  Name of the object to test.
 7 | #
 8 | # @return Nothing.
 9 | #
10 | # @author Pierre-Alexandre Fonta (2016-2017)
11 | 
12 | checkindex <- function(x, name) {
13 |   if (!is.index(x))
14 |     msg.stop(aprint(name), "must be a positive integer or a string")
15 | }
16 | 
17 | # Check if an object is a vector of valid data frame or matrix indexes.
18 | #
19 | # Otherwise, an error is raised and an information message is displayed.
20 | #
21 | # @param x     Object to test.
22 | #        name  Name of the object to test.
23 | #
24 | # @return Nothing.
25 | #
26 | # @author Pierre-Alexandre Fonta (2016-2017)
27 | 
28 | checkindexes <- function(x, name) {
29 |   if (!is.vector(x) || !xor(is.positive.integers(x), is.strings(x)))
30 |     msg.stop(aprint(name), "must be a vector of positive integers or strings")
31 | }
32 | 


--------------------------------------------------------------------------------
/R/seqformat-STS_to_DSS.R:
--------------------------------------------------------------------------------
 1 | # Should only be used through seqformat()
 2 | 
 3 | ## ==============================
 4 | ## Convert from STS to DSS format
 5 | ## ==============================
 6 | 
 7 | STS_to_DSS <- function(seqdata,
 8 | 	left=NA, right="DEL", gaps=NA, missing=NA, void="%", nr="*") {
 9 | 
10 | 	nbseq <- seqdim(seqdata)[1]
11 | 	maxsl <- seqdim(seqdata)[2]
12 | 
13 | 	out <- matrix(NA, nrow=nbseq, ncol=maxsl)
14 | 
15 | 	rownames(out) <- paste("[",seq(1:nbseq),"]",sep="")
16 | 
17 | 	## PREPARING THE DATA
18 | 	seqdata <- as.matrix(seqdata)
19 | 	seqdata <- seqprep(seqdata, missing=missing, left=left, gaps=gaps, right=right, void=void, nr=nr)
20 | 
21 | 	for (i in 1:nbseq) {
22 | 		idx <- 1
23 | 		j <- 1
24 | 		tmpseq <- seqdata[i,]
25 | 		sl <- TraMineR.length(tmpseq, void)
26 | 
27 | 		while (j <= sl) {
28 | 			iseq <- tmpseq[j]
29 | 
30 | 			out[i,idx] <- iseq
31 | 
32 | 			while (j < sl & tmpseq[j+1]==iseq) {
33 | 				j <- j+1
34 | 			}
35 | 
36 | 			j <- j+1
37 | 			idx <- idx+1
38 | 		}
39 | 	}
40 | 	return(out)
41 | }
42 | 
43 | 


--------------------------------------------------------------------------------
/R/seqelength.R:
--------------------------------------------------------------------------------
 1 | ## ========================================
 2 | ## Get and set length of eseq
 3 | ## ========================================
 4 | 
 5 | seqelength <- function(eseq, s) {
 6 | 
 7 |   TraMineR.check.depr.args(alist(eseq = s))
 8 | 
 9 | 	seqelength.internal<-function(eseq){
10 | 		if(is.eseq(eseq)){
11 | 			return(.Call(C_tmrsequencegetlength, eseq))
12 | 		}
13 | 		return(-1)
14 | 	}
15 | 	if(is.seqelist(eseq)){
16 | 		as.numeric(sapply(unlist(eseq),seqelength.internal))
17 | 	}else if(is.eseq(eseq)){
18 | 		as.numeric(seqelength.internal(eseq))
19 | 	}else{
20 | 		stop("eseq should be a seqelist. See help on seqecreate.")
21 | 	}
22 | }
23 | 
24 | "seqelength<-" <- function(eseq, s, value){
25 | 
26 |   TraMineR.check.depr.args(alist(eseq = s))
27 | 
28 | 	if(!is.seqelist(eseq)) {
29 | 		stop("eseq should be a seqelist. See help on seqecreate.")
30 | 	}
31 | 	if(length(eseq)!=length(value)) {
32 | 		stop("eseq and len should be of the same size.")
33 | 	}
34 | 	.Call(C_tmrsequencesetlength, eseq, as.double(value))
35 | 	return(eseq)
36 | }
37 | 


--------------------------------------------------------------------------------
/R/seqformat-SPS_to_STS.R:
--------------------------------------------------------------------------------
 1 | # Should only be used through seqformat()
 2 | 
 3 | SPS_to_STS <- function(seqdata, spsformat, missing = "*") {
 4 |   nseq <- nrow(seqdata)
 5 |   trans <- matrix("", nrow = nseq, ncol = 1)
 6 |   xfix <- spsformat$xfix
 7 |   if (!identical(xfix, ""))
 8 |     xfix <- paste0("[", xfix, "]")
 9 |   sdsep <- spsformat$sdsep
10 |   for (i in 1:nseq) {
11 |     tmpseq <- na.omit(seqdata[i, ])
12 |     if (length(tmpseq)>0) {
13 |       for (s in 1:length(tmpseq)) {
14 |         sps <- strsplit(gsub(xfix, "", tmpseq[s]), split = sdsep)[[1]]
15 |         seq <- sps[1]
16 |         if (seq %in% missing)
17 |           seq <- NA
18 |         dur <- as.integer(sps[2])
19 |         trans[i] <-
20 |           if (s == 1)
21 |             paste(trans[i], seq, sep = "")
22 |           else
23 |             paste(trans[i], seq, sep = "-")
24 |         if (dur > 1)
25 |           for (r in 2:dur)
26 |             trans[i] <- paste(trans[i], "-", seq, sep = "")
27 |       }
28 |     }
29 |   }
30 |   sts <- seqdecomp(trans)
31 |   return(sts)
32 | }
33 | 


--------------------------------------------------------------------------------
/man/seqfpos.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqfpos}
 2 | \alias{seqfpos}
 3 | \title{Search for the first occurrence of a given element in a sequence}
 4 | \description{
 5 | Returns a vector containing the position of the first occurrence of the given element in each of the sequences in the data set.
 6 | }
 7 | \details{the state to search for has to be passed as a character string, and must be one of the state returned by the \code{\link{alphabet}} function. If the state is not contained in a sequence, NA is returned for this sequence.}
 8 | \usage{
 9 |  seqfpos(seqdata, state)
10 | }
11 | \arguments{
12 |   \item{seqdata}{a sequence object (see \code{\link{seqdef}} function).}
13 |   \item{state}{the state element to search in the sequences}
14 | }
15 | 
16 | \examples{
17 | data(biofam)
18 | biofam.seq <- seqdef(biofam,10:25)
19 | 
20 | ## Searching for the first occurrence of state 1
21 | ## in each of the 5 first sequence of the biofam data set.
22 | seqfpos(biofam.seq[1:5,],"1")
23 | }
24 | 
25 | \author{Alexis Gabadinho}
26 | 
27 | \keyword{Longitudinal characteristics}
28 | 


--------------------------------------------------------------------------------
/man/seqcomp.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqcomp}
 2 | \alias{seqcomp}
 3 | \title{Compare two state sequences}
 4 | \description{
 5 | Check whether two state sequences are identical.
 6 | }
 7 | \usage{
 8 |  seqcomp(x, y)
 9 | }
10 | \arguments{
11 |   \item{x}{a state sequence object containing a single sequence (typically the row of a main sequence
12 |       object, see \code{\link{seqdef}})}
13 |   \item{y}{a state sequence object containing a single sequence (typically the row of a main sequence
14 |       object, see \code{\link{seqdef}})}
15 | }
16 | 
17 | \value{\code{TRUE} if sequences are identical, \code{FALSE} otherwise}
18 | 
19 | \seealso{
20 |  \code{\link{seqfind}, \link{seqfpos}, \link{seqpm}}
21 | }
22 | \examples{
23 | data(mvad)
24 | mvad.shortlab <- c("EM", "FE", "HE", "JL", "SC", "TR")
25 | mvad.seq <- seqdef(mvad, states=mvad.shortlab, 15:86)
26 | 
27 | ## Comparing sequences 1 and 2 in mvad.seq
28 | seqcomp(mvad.seq[1,],mvad.seq[2,])
29 | 
30 | ## Comparing sequences 176 and 211 in mvad.seq
31 | seqcomp(mvad.seq[176,],mvad.seq[211,])
32 | }
33 | \keyword{Data handling}
34 | 


--------------------------------------------------------------------------------
/man/seqLLCP.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqLLCP}
 2 | \alias{seqLLCP}
 3 | \title{Compute the length of the longest common prefix of two sequences}
 4 | \description{
 5 | Returns the length of the longest common prefix of two sequences. This attribute is described in \cite{Elzinga (2008)}.
 6 | }
 7 | \usage{
 8 | seqLLCP(seq1, seq2)
 9 | }
10 | \arguments{
11 |   \item{seq1}{a sequence from a sequence object.}
12 |   \item{seq2}{a sequence from a sequence object.}
13 | }
14 | 
15 | \value{an integer being the length of the longest common prefix of the two sequences.}
16 | \seealso{\code{\link{seqdist}}}
17 | 
18 | \references{
19 | Elzinga, Cees H. (2008). Sequence analysis: Metric representations of categorical time
20 | series. \emph{Technical Report}, Department of Social Science Research Methods, Vrije Universiteit, Amsterdam.
21 | }
22 | 
23 | \examples{
24 | data(famform)
25 | famform.seq <- seqdef(famform)
26 | 
27 | ## The LCP's length between sequences 1 and 2
28 | ## in the famform sequence object is 2
29 | seqLLCP(famform.seq[1,],famform.seq[2,])
30 | }
31 | \keyword{Dissimilarity measures}
32 | 


--------------------------------------------------------------------------------
/src/NMSDURSoftdistance.h:
--------------------------------------------------------------------------------
 1 | #ifndef NMSDURSOFTDISTANCECALCULATOR_H
 2 | #define NMSDURSOFTDISTANCECALCULATOR_H
 3 | #include "NMSdistance.h"
 4 | 
 5 | class NMSDURSoftdistance: public SUBSEQdistance{
 6 |   protected:
 7 | 	double *e;
 8 | 	double *e1;
 9 | 	double *t_i;
10 | 	double *t1_i;
11 | 	double *t1_j;
12 | 	double *t_j;
13 | 	double *t_ij;
14 | 	int rowsize;
15 | 	double * seqdur;
16 | 	double * softmatch;
17 | 	int alphasize;
18 | 	
19 |   public:
20 |   	NMSDURSoftdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){}
21 |   	NMSDURSoftdistance(NMSDURSoftdistance * dc);//:SUBSEQdistance(normS, Ssequences, seqdim, lenS), seqdur(NULL){}
22 | 	virtual DistanceCalculator* copy(){return new NMSDURSoftdistance(this);}
23 | 	//NMSDURdistance(const int& pnorm, int * psequences, const int & pnseq,  int * pslen, const int &pmaxlen, double *pseqdur);
24 |     virtual ~NMSDURSoftdistance();
25 | 	virtual void setParameters(SEXP params);
26 | 	void computeattr(const int&is, const int& js);
27 | };
28 | 
29 | #endif
30 | 


--------------------------------------------------------------------------------
/R/seqivolatility.R:
--------------------------------------------------------------------------------
 1 | seqivolatility <- function(seqdata, w=.5, with.missing=FALSE, adjust=TRUE){
 2 | 
 3 | 	if (!inherits(seqdata,"stslist"))
 4 | 		stop(" [!] data is NOT a sequence object, see seqdef function to create one")
 5 |   if (!is.logical(adjust))
 6 | 		stop(" [!] adjust should be logical")
 7 |   if (w>1 | w<0)
 8 | 		stop(" [!] w should be in the range [0, 1]!")
 9 | 
10 | 
11 |   alph <- alphabet(seqdata)
12 |   ##void <- attr(seqdata,"void")
13 | 	nr <- attr(seqdata,"nr")
14 |   if (with.missing)
15 |     alph <- c(alph,nr)
16 |   alph.size <- length(alph)
17 | 
18 | 
19 |   transp <- suppressMessages(
20 |       seqtransn(seqdata, with.missing=with.missing, norm=TRUE))
21 |   sdist <- suppressMessages(
22 |       seqistatd(seqdata, with.missing=with.missing))
23 |   nvisit <- rowSums(sdist>0)
24 |   if (adjust) {
25 |     ret <- ifelse(nvisit - 1 <= 0, 0, (nvisit - 1)/(alph.size -1))
26 |     ret <- w * ret + (1-w) * transp
27 |   } else {
28 |     ret <- w * nvisit/alph.size + (1-w) * transp
29 |   }
30 | 
31 |   return(as.vector(ret))
32 | }
33 | 


--------------------------------------------------------------------------------
/src/treeeventmap.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREEEVENTMAP_H
 2 | #define TREEEVENTMAP_H
 3 | //#include "treeeventnode.h"
 4 | //#include <map>
 5 | #include "eventseq.h"
 6 | extern "C"{
 7 | 	#include <R.h>
 8 | 	#include <Rinternals.h>
 9 | }
10 | 
11 | //using namespace std;
12 | 
13 | //Forward declaration
14 | class TreeEventNode;
15 | //Iterateur pour la map
16 | typedef std::map<int,TreeEventNode*>::iterator TreeEventMapIterator;
17 | //Définition de type pour aleger le code
18 | //Une map sur le type d'événements, et la classe événement
19 | class TreeEventMap: public std::map<int,TreeEventNode*> {
20 | 
21 | public:
22 | TreeEventMap():std::map<int,TreeEventNode*>() {}
23 |     ~TreeEventMap() {}
24 |     void simplifyTreeMap(const double &minSup);
25 |     void print(const int & prof, const bool& isbrother);
26 |     int countSubsequence(double minSup);
27 |     void getSubsequences(SEXP result,double * support, Sequence *s2, int *index,const double &step, SEXP classname,EventDictionary * ed);
28 |     void clearAllPointers();
29 |     void clearSupport();
30 | 
31 | };
32 | 
33 | #endif // TREEEVENTMAP_H
34 | 


--------------------------------------------------------------------------------
/src/constraint.h:
--------------------------------------------------------------------------------
 1 | #ifndef _CONSTRAINT_INCLUDED_
 2 | #define _CONSTRAINT_INCLUDED_
 3 | #include <R.h>
 4 | #include <Rinternals.h>
 5 | 
 6 | class Constraint {
 7 | 
 8 | protected:
 9 | 	double maxGap;
10 | 	double windowSize;
11 | 	double ageMinBegin;
12 | 	double ageMaxBegin;
13 | 	double ageMaxEnd;
14 | 	int countMethod;
15 | public:
16 | 	Constraint(const double &mg, 
17 | 		   const double &ws, 
18 | 		   const double &aminb, 
19 | 		   const double &amaxb, 
20 | 		   const double &amaxe, 
21 | 		   const int &cmethod);
22 | 	virtual ~Constraint() 
23 | 	  {
24 | 	    
25 | 	  }
26 | 	inline double getmaxGap() 
27 | 	{
28 | 	  return(this->maxGap);
29 | 	}
30 | 	inline double getwindowSize() 
31 | 	{
32 | 	  return(this->windowSize);
33 | 	}
34 | 	inline double getageMinBegin() 
35 | 	{
36 | 	  return(this->ageMinBegin);
37 | 	}
38 | 	inline double getageMaxBegin()
39 | 	{
40 | 	  return(this->ageMaxBegin);
41 | 	}
42 | 	inline double getageMaxEnd() 
43 | 	{
44 | 	  return(this->ageMaxEnd);
45 | 	}
46 | 	inline int getcountMethod() 
47 | 	{
48 | 	  return(this->countMethod);
49 | 	}
50 | };
51 | #endif
52 | 


--------------------------------------------------------------------------------
/src/OMvdistance.h:
--------------------------------------------------------------------------------
 1 | #ifndef OMvdistanceCALCULATOR_H
 2 | #define OMvdistanceCALCULATOR_H
 3 | #include "OMdistance.h"
 4 | 
 5 | class OMvdistance: public OMdistance{
 6 | 	double * seqdur;
 7 | 	double * indellist;
 8 | 	int sublink;
 9 |   public:
10 | 	OMvdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);
11 | 	OMvdistance(OMvdistance *dc);
12 | 	virtual DistanceCalculator* copy(){return new OMvdistance(this);}
13 | 	virtual void setParameters(SEXP params);
14 |     virtual ~OMvdistance();
15 |     virtual double distance(const int&is, const int& js);
16 | 	inline double getIndel(const int& indice, const int& state){
17 | 		return this->indellist[state]*seqdur[indice];
18 | 	}
19 |   inline double getSubCost(const int& i_state, const int& j_state, const int& i_state_indice, const int& j_state_indice){
20 |     if (sublink==1) {
21 |       return(scost[MINDICE(i_state, j_state, alphasize)] * (seqdur[i_state_indice]+seqdur[j_state_indice]));
22 |     }
23 |     return(scost[MINDICE(i_state, j_state, alphasize)] * sqrt(seqdur[i_state_indice]*seqdur[j_state_indice]));
24 |   }
25 | };
26 | 
27 | #endif
28 | 


--------------------------------------------------------------------------------
/src/dist2matrix.c:
--------------------------------------------------------------------------------
 1 | #include "TraMineR.h"
 2 | 
 3 | SEXP dist2matrix(SEXP diss, SEXP diss_size) {
 4 |     int n=INTEGER(diss_size)[0];
 5 |     SEXP ans;
 6 |     PROTECT(ans = allocMatrix(REALSXP, n, n));
 7 |     double *matrix=REAL(ans);
 8 |     double *dmat=REAL(diss);
 9 | //  Rprintf("mlen = %i \n", mlen);
10 | //  Rprintf("ilen = %i \n", ilen);
11 |     int i, j, i_indiv, base_indice;
12 |     double r;
13 |     for (i=0;i<n;i++) {
14 |         i_indiv=i+1;
15 |         base_indice=n*(i_indiv-1) - i_indiv*(i_indiv-1)/2 -i_indiv-1;
16 |         matrix[i+i*n]=0;
17 |         for (j=i+1;j<n;j++) {
18 | //      Rprintf("index coord(%i,%i)=%f\n",indiv[i],indiv[j], distmatrix[TMRMATRIXINDEX(indiv[i],indiv[j],mlen)]);
19 | //      Rprintf("cindex coord(%i,%i)=%f, (%i)\n",i,j, distmatrix[TMRMATRIXINDEX(indiv[i],indiv[j],mlen)],TMRMATRIXINDEX(indiv[i],indiv[j],mlen));
20 |             r=dmat[base_indice+j+1];
21 |             matrix[j+i*n]=r;
22 |             matrix[i+j*n]=r;
23 |         }
24 |     }
25 | //  Rprintf("Sum = %f\n",(*result));
26 |     UNPROTECT(1);
27 |     return ans;
28 | //   Rprintf("Inertia = %f\n",(*result));
29 | }
30 | 


--------------------------------------------------------------------------------
/man/seqgen.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqgen}
 2 | \alias{seqgen}
 3 | \title{Random sequences generation}
 4 | \description{
 5 | Generates random sequences.
 6 | }
 7 | \details{
 8 | Each sequence is generated by choosing a set of random numbers (with min=1 and max=length of the alphabet) using the \code{\link{runif}} function. When the probability distribution is not specified, the uniform probability distribution giving same probability to each state is used to generate the sequences.
 9 | }
10 | \usage{
11 |  seqgen(n, length, alphabet, p)
12 | }
13 | \arguments{
14 |   \item{n}{number of sequences to generate}
15 |   \item{length}{sequences length}
16 |   \item{alphabet}{the alphabet from which the sequences are generated}
17 |   \item{p}{an optional vector of probabilities for the states in the alphabet. Must be of the same length as the alphabet. If not specified, equal probabilities are used.}
18 | }
19 | \value{a sequence object.}
20 | %\seealso{}
21 | \examples{
22 | seq <- seqgen(1000,10,1:4,c(0.2,0.1,0.3,0.4))
23 | seqstatd(seqdef(seq))
24 | }
25 | 
26 | \author{Alexis Gabadinho (with Gilbert Ritschard for the help page)}
27 | \keyword{Data handling}
28 | \keyword{State sequences} 
29 | 


--------------------------------------------------------------------------------
/src/eventdictionary.h:
--------------------------------------------------------------------------------
 1 | #ifndef EVENTDICTIONARY_H
 2 | #define EVENTDICTIONARY_H
 3 | #include <map>
 4 | #include <set>
 5 | #include <string>
 6 | #include <Rinternals.h>
 7 | #include "TraMineR.h"
 8 | 
 9 | 
10 | 
11 | class EventDictionary: public std::map<int,std::string> {
12 |     int numseq;
13 | public:
14 | EventDictionary():numseq(0) {}
15 |     EventDictionary(SEXP flist);
16 |     virtual ~EventDictionary();
17 |     bool codeExists(const int &code) const;
18 |     //int sprint(char * buffer, const char* start, const int&code)const;
19 |     void addSequence() {
20 |         this->numseq++;
21 |     }
22 |     void removeSequence() {
23 |         this->numseq--;
24 |     }
25 |     bool shouldDelete() {
26 |         return this->numseq<1;
27 |     }
28 |     SEXP getDictionary()const;
29 | 
30 | protected:
31 | 
32 | private:
33 | 
34 | };
35 | typedef EventDictionary::iterator EventDictionaryIterator;
36 | 
37 | class EventSet: public std::set<int>{
38 | 
39 | public:
40 | 	EventSet():std::set<int>(){}
41 | 	inline bool contain(const int& code) const{
42 | 		return this->find(code)!=this->end();
43 | 	}
44 | 	void add(SEXP elist);
45 | 
46 | };
47 | #endif // EVENTDICTIONARY_H
48 | 


--------------------------------------------------------------------------------
/R/seqstatl.R:
--------------------------------------------------------------------------------
 1 | ## ============================================
 2 | ## Returns the alphabet for a sequence data set
 3 | ## ============================================
 4 | 
 5 | seqstatl <- function(data, var=NULL, format='STS') {
 6 | 
 7 | 	listform <- c("STS","SPS","SPELL","DSS")
 8 | 	if (!format %in% listform)
 9 | 		stop("Input format must be one of: ", listform)
10 | 
11 | 	## Extracting the sequences from the data set
12 | 	seqdata <- seqxtract(data, var)
13 | 
14 | 	##
15 | 	if (format=='SPS') seqdata <- seqformat(seqdata, from = "SPS", to = "STS")
16 | 
17 | 	## Convert into the extended format to list states/events
18 | 	if (seqfcheck(seqdata) %in% c(":","-"))
19 | 		seqdata <- seqdecomp(seqdata, sep=seqfcheck(seqdata))
20 | 
21 | 	statl <- sort(unique(as.vector(seqdata)))
22 | 
23 | 	## IF states are numeric values, sort them as integer
24 | 	## (if sorted as characters, 10 will be after 1,
25 | 	## not after 9)
26 | 	#statnum <- suppressWarnings(as.integer(statl))
27 | 	## using as.numeric to avoid issue with non-integer state value
28 |     statnum <- suppressWarnings(as.numeric(statl))
29 | 
30 | 	if (all(is.na(statnum)==FALSE)) statl <- sort(statnum)
31 | 
32 | 	return(statl)
33 | 	}
34 | 


--------------------------------------------------------------------------------
/man/seqmpos.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqmpos}
 2 | \alias{seqmpos}
 3 | \title{Number of matching positions between two sequences.}
 4 | \description{
 5 | Returns the number of common elements, i.e., same states appearing at the same position in the two sequences.
 6 | }
 7 | \usage{
 8 |  seqmpos(seq1, seq2, with.missing=FALSE)
 9 | }
10 | \arguments{
11 |   \item{seq1}{a sequence from a sequence object.}
12 |   \item{seq2}{a sequence from a sequence object.}
13 |   \item{with.missing}{if \code{TRUE}, gaps appearing at the same position in both sequences are also considered as common elements}.
14 | }
15 | \seealso{
16 |  \code{\link{seqLLCP}}, \code{\link{seqLLCS} }.
17 | }
18 | \examples{
19 | data(famform)
20 | famform.seq <- seqdef(famform)
21 | 
22 | seqmpos(famform.seq[1,],famform.seq[2,])
23 | seqmpos(famform.seq[2,],famform.seq[4,])
24 | 
25 | ## Example with gaps in sequences
26 | a <- c(NA,"A",NA,"B","C")
27 | b <- c(NA,"C",NA,"B","C")
28 | 
29 | ex1.seq <- seqdef(rbind(a,b))
30 | 
31 | seqmpos(ex1.seq[1,], ex1.seq[2,])
32 | seqmpos(ex1.seq[1,], ex1.seq[2,], with.missing=TRUE)
33 | 
34 | }
35 | 
36 | \author{Alexis Gabadinho (with Gilbert Ritschard for help page)}
37 | 
38 | \keyword{Dissimilarity measures}
39 | 


--------------------------------------------------------------------------------
/src/prefixtree.h:
--------------------------------------------------------------------------------
 1 | #ifndef PREFIXTREE_H
 2 | #define PREFIXTREE_H
 3 | #include "eventseq.h"
 4 | #include "treeeventnode.h"
 5 | #include "treeeventmap.h"
 6 | #include<R.h>
 7 | #include "eventdictionary.h"
 8 | #include "constraint.h"
 9 | 
10 | class TreeEventNode;
11 | 
12 | 
13 | /**
14 |  Prefix Tree base node
15 | */
16 | 
17 | 
18 | class PrefixTree {
19 |     //Start of the tree
20 |     TreeEventMap child;
21 | public:
22 |     //Ctor
23 |     PrefixTree();
24 |     //Dtor
25 |     virtual ~PrefixTree();
26 |    // void addSequence(Sequence *s,const double &maxGap,const double &windowSize, const double & ageMin, const double & ageMax,const double & ageMaxEnd, const int& k);
27 |     void addSequence(Sequence *s, Constraint *cst, const int& k);
28 |     void simplifyTree(double minSup);
29 |     int countSubsequence(double minSup);
30 |     //Give an overview of this tree (paramètre prof==profondeur, interne)
31 |     void print();
32 |     //Type of this event
33 |     void getSubsequences(SEXP result,double * support, int *index, SEXP classname, EventDictionary * ed);
34 |     void clearSupport() {
35 |         this->child.clearSupport();
36 |     }
37 | };
38 | 
39 | #endif // PREFIXTREE_H
40 | 


--------------------------------------------------------------------------------
/man/famform.Rd:
--------------------------------------------------------------------------------
 1 | \name{famform}
 2 | \docType{data}
 3 | \alias{famform}
 4 | \title{Example data set: sequences of family formation}
 5 | \description{
 6 | This data set contains 5 sequences of family formation histories, used by Elzinga (2008) to introduce several metrics for computing distances between sequences. These sequences don't contain information about the duration spent in each state, they contain only distinct successive states.
 7 | }
 8 | \details{
 9 | The sequences are in `STS' format and stored in character strings with states separated with `-'. The alphabet is made of the five tokens M, MC, S, SC, and U.
10 | 
11 | This data set is used in TraMineR's manual to crosscheck some results with those presented by Elzinga.
12 | }
13 | \usage{data(famform)}
14 | \format{A data frame with 5 rows and 1 variable.}
15 | \source{Elzinga (2008)}
16 | \references{
17 | Elzinga, Cees H. (2008). Sequence analysis: Metric representations of categorical time
18 | series. Non published manuscript. VU University, Amsterdam.
19 | }
20 | 
21 | \examples{
22 | data(famform)
23 | ff.seq <- seqdef(famform)
24 | seqiplot(ff.seq)
25 | }
26 | 
27 | \author{Gilbert Ritschard and Alexis Gabadinho}
28 | \keyword{Datasets}
29 | 


--------------------------------------------------------------------------------
/src/distanceobject.cpp:
--------------------------------------------------------------------------------
 1 | /* #include "distanceobject.h"
 2 | 
 3 | DistanceObject::DistanceObject(SEXP magicIndexS, SEXP magicSeqS){
 4 |   this->magicIndex=INTEGER(magicIndexS);
 5 |   this->magicSeq=INTEGER(magicSeqS);
 6 |   this->finalnseq=Rf_length(magicSeqS);
 7 |   PROTECT(ans = Rf_allocVector(REALSXP, (finalnseq*(finalnseq-1)/2)));
 8 |   result=REAL(ans);
 9 | }
10 | DistanceObject::~DistanceObject(){
11 |   UNPROTECT(1);
12 | }
13 | 
14 | void DistanceObject::setDistance(const int &is,const int &js, const double& cmpres){
15 |       int j_start=magicIndex[js];
16 | 		  int j_end=magicIndex[js+1];
17 | 		  int i_start=magicIndex[is];
18 | 		  int i_end=magicIndex[is+1];
19 | 		  int i_index, j_index, i, j, base_index;
20 | 		  for(i=i_start;i<i_end;i++){
21 | 		    i_index=magicSeq[i];
22 | 		    //n*(i-1) - i*(i-1)/2 + j-i
23 | 		    for(j=j_start;j<j_end;j++){
24 | 		      j_index=magicSeq[j];
25 | 		      if(i_index!=j_index){
26 | 		        base_index=distIndex(i_index,j_index,finalnseq);
27 | //		        REprintf("Unique (%d,%d) => (%d,%d)(%d) => %f \n",is,js,i_index,j_index,(base_index),cmpres);
28 |             result[base_index]=cmpres;
29 |           }		      
30 |         }
31 |       }
32 | }
33 |  */
34 |  
35 | 


--------------------------------------------------------------------------------
/man/seqlength.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqlength}
 2 | \alias{seqlength}
 3 | \title{Sequence length}
 4 | \description{
 5 |   Returns the length of sequences.
 6 | }
 7 | \details{
 8 | The length of a sequence is computed by counting its number of non void elements, i.e. including non-void missing values. The \code{seqlength} function returns a vector containing the length of each sequence in the provided sequence object.
 9 | }
10 | \usage{
11 |  seqlength(seqdata, with.missing=TRUE)
12 | }
13 | \arguments{
14 |   \item{seqdata}{a sequence object created with the \code{\link{seqdef}} function.}
15 | 
16 | 	\item{with.missing}{logical: should non-void missing values be treated as a regular state?
17 |       Default is \code{TRUE}.
18 |       If \code{FALSE} missing values are considered as void.}
19 | }
20 | \seealso{\code{\link{seqlength.align}}
21 | }
22 | \examples{
23 | ## Loading the 'famform' example data set
24 | data(famform)
25 | 
26 | ## Defining a sequence object with the 'famform' data set
27 | ff.seq <- seqdef(famform)
28 | 
29 | ## Retrieving the length of the sequences
30 | ## in the ff.seq sequence object
31 | seqlength(ff.seq)
32 | }
33 | 
34 | \author{Alexis Gabadinho and Gilbert Ritschard}
35 | \keyword{Longitudinal characteristics}
36 | 


--------------------------------------------------------------------------------
/R/seqconc.R:
--------------------------------------------------------------------------------
 1 | ## ========================================
 2 | ## Concatenates vectors of states or events
 3 | ## into character strings
 4 | ## ========================================
 5 | 
 6 | sconc <- function(seqdata, sep, void) {
 7 | 
 8 | 	if (is.na(void))
 9 | 		vi <- !is.na(seqdata)
10 | 	else if (!is.null(void))
11 | 		vi <- seqdata!=void
12 | 	else vi <- 1:length(seqdata)
13 | 
14 | 	return(paste(seqdata[vi], collapse=sep))
15 | 	}
16 | 
17 | seqconc <- function (data, var=NULL, sep="-", vname="Sequence", void=NA) {
18 | 
19 | 	if (inherits(data,"stslist")) {
20 | 		void <- attr(data,"void")
21 | 		cseq <- apply(data, 1, sconc, sep, void)
22 | 		cseq <- as.matrix(cseq)
23 | 		rownames(cseq) <- rownames(data)
24 | 	}
25 | 	else {
26 | 		seqdata <- seqxtract(data, var)
27 | 
28 | 		if (seqdim(seqdata)[1]==1)
29 | 			cseq <- sconc(seqdata,sep, void)
30 | 		else
31 | 			cseq <- apply(seqdata, 1, sconc, sep, void)
32 | 
33 | 		cseq <- as.matrix(cseq)
34 | 
35 | 		## Rows and column names for the output
36 |     if (is.null(rownames(data))){
37 | 		  rownames(cseq) <- paste("[",seq(1:length(cseq)),"]",sep="")
38 |     } else {
39 |       rownames(cseq) <- rownames(data)
40 |     }
41 | 	}
42 | 
43 | 	colnames(cseq) <- vname
44 | 
45 | 	return(cseq)
46 | }
47 | 


--------------------------------------------------------------------------------
/man/seqlength-align.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqlength.align}
 2 | \alias{seqlength.align}
 3 | \title{Align sequence length across domains}
 4 | \description{
 5 |   Sets lengths of sequences of multiple domains as the shortest lengths across domains.
 6 | }
 7 | \details{
 8 | Sequences in the sequence objects are assumed to be ordered conformably. The length of the \eqn{i}-th sequence in each domain is set as the length of the shortest \eqn{i}-th sequence of the domains. The reduction of length is done by filling end positions with voids.
 9 | }
10 | \usage{
11 |  seqlength.align(seq.list)
12 | }
13 | \arguments{
14 |   \item{seq.list}{list of sequence objects (of class \code{stslist}) created with the \code{\link{seqdef}} function. The sequence objects must all have the same number of sequences.}
15 | }
16 | \seealso{\code{\link{seqlength}}}
17 | \examples{
18 | ## Using the ex1 data set with sequences of different length
19 | data(ex1)
20 | s1 <- seqdef(ex1[,1:13])
21 | seqlength(s1)
22 | 
23 | ## sequence object s2 with a shorter 1st sequence
24 | 
25 | s2 <- s1
26 | s2[1,8:13] <- attr(s2,"void")
27 | seqlength(s2)
28 | 
29 | ## aligning sequence lengths
30 | seqlength.align(list(s1,s2))
31 | 
32 | }
33 | 
34 | \author{Gilbert Ritschard}
35 | \keyword{Longitudinal characteristics}
36 | 


--------------------------------------------------------------------------------
/src/checktriangleineq.c:
--------------------------------------------------------------------------------
 1 | #include "TraMineR.h"
 2 | 
 3 | SEXP checktriangleineq(SEXP mat, SEXP matsize, SEXP tolS) {
 4 |     int n=INTEGER(matsize)[0];
 5 | 	double tol = REAL(tolS)[0];
 6 |     SEXP ans;
 7 |     
 8 |     double *matrix=REAL(mat);
 9 | //  Rprintf("mlen = %i \n", mlen);
10 | //  Rprintf("ilen = %i \n", ilen);
11 |     int i, j, z, i_indiv, j_indiv;
12 |     double d;
13 |     for (i=0;i<n;i++) {
14 |         i_indiv=i*n;
15 |         matrix[i+i*n]=0;
16 |         for (j=i+1;j<n;j++) {
17 | 			d = matrix[j+i_indiv];
18 | 			j_indiv=j*n;
19 | 			for (z=0; z<n; z++) {
20 | 				if (d-(matrix[i_indiv+ z] + matrix[j_indiv+z]) >= tol) {
21 | 					PROTECT(ans = Rf_allocVector(INTSXP, 3));
22 | 					INTEGER(ans)[0] =i+1;
23 | 					INTEGER(ans)[1] =j+1;
24 | 					INTEGER(ans)[2] =z+1;
25 | 					UNPROTECT(1);
26 | 					return ans;
27 | 				}
28 | 
29 | 			}
30 | //      Rprintf("index coord(%i,%i)=%f\n",indiv[i],indiv[j], distmatrix[TMRMATRIXINDEX(indiv[i],indiv[j],mlen)]);
31 | //      Rprintf("cindex coord(%i,%i)=%f, (%i)\n",i,j, distmatrix[TMRMATRIXINDEX(indiv[i],indiv[j],mlen)],TMRMATRIXINDEX(indiv[i],indiv[j],mlen));
32 |         }
33 |     }
34 | //  Rprintf("Sum = %f\n",(*result));
35 |    return R_NilValue;
36 | //   Rprintf("Inertia = %f\n",(*result));
37 | }
38 | 


--------------------------------------------------------------------------------
/R/seqdecomp.R:
--------------------------------------------------------------------------------
 1 | ## =====================================================
 2 | ## Translate sequences as character strings into vectors
 3 | ## (one column (variable) for each state/event)
 4 | ## =====================================================
 5 | 
 6 | seqdecomp <- function(data, var=NULL, sep="-", miss="NA", vnames=NULL) {
 7 | 
 8 |   rownames <- rownames(data)
 9 | 	## Extracting the sequences from the data set
10 | 	seqdata <- seqxtract(data, var)
11 | 
12 | 	seqdata <- as.vector(seqdata)
13 | 	nbseq <- length(seqdata)
14 | 
15 | 	## Splitting the character strings
16 | 	tmp <- strsplit(seqdata, split=sep)
17 | 
18 | 	## We first look for the max sequence length
19 | 	sl <- sapply(tmp,length)
20 | 	lmax <- max(sl)
21 | 	
22 | 	sdecomp <- matrix(nrow=nbseq, ncol=lmax)
23 |   if (is.null(rownames)){
24 | 	  rownames(sdecomp) <- paste("[",seq(1:nbseq),"]",sep="")
25 |   } else {
26 |     rownames(sdecomp) <- rownames
27 |   }
28 | 	if (is.null(vnames))
29 | 		colnames(sdecomp) <- paste("[",seq(1:lmax),"]",sep="")
30 | 	else
31 | 		colnames(sdecomp) <- vnames
32 | 
33 | 	for (i in 1:nbseq) {
34 | 		seq <- tmp[[i]]
35 | 		seq[seq %in% miss] <- NA
36 | 		if (sl[i] < lmax) seq <- c(seq,rep(NA,lmax-sl[i]))
37 | 		sdecomp[i,] <- seq
38 | 		}
39 | 
40 | 	return(sdecomp)
41 | 
42 | }
43 | 
44 | 	
45 | 


--------------------------------------------------------------------------------
/man/is.stslist.Rd:
--------------------------------------------------------------------------------
 1 | \name{is.stslist}
 2 | \alias{is.stslist}
 3 | \title{Test if is a proper state sequence (stslist) object}
 4 | \description{The function tests whether \code{x} is of class \code{stslist} and if its \code{weights} attribute has the expected length and names.
 5 | }
 6 | 
 7 | \usage{
 8 | is.stslist(x)
 9 | }
10 | \arguments{
11 |  \item{x}{object to be tested.}
12 |  }
13 | \value{
14 |  Logical: result of the test.
15 | }
16 | \seealso{
17 |  \code{\link{seqdef}}
18 | }
19 | \examples{
20 | ## Creating a sequence object with the columns 13 to 24
21 | ## in the 'actcal' example data set
22 | data(biofam)
23 | biofam <- biofam[sample(nrow(biofam),300),]
24 | biofam.lab <- c("Parent", "Left", "Married", "Left+Marr",
25 |                 "Child", "Left+Child", "Left+Marr+Child", "Divorced")
26 | biofam.seq <- seqdef(biofam[,10:25], weights=biofam$wp00tbgs)
27 | 
28 | is.stslist(biofam.seq) #TRUE
29 | 
30 | attr(biofam.seq,"weights") <- NULL
31 | is.stslist(biofam.seq) #TRUE
32 | 
33 | attr(biofam.seq,"weights") <- rep(1, nrow(biofam.seq))
34 | is.stslist(biofam.seq) #FALSE
35 | 
36 | w <- rep(1, nrow(biofam.seq))
37 | names(w) <- rownames(biofam.seq)
38 | attr(biofam.seq,"weights") <- w
39 | is.stslist(biofam.seq) #TRUE
40 | 
41 | }
42 | 
43 | \author{Gilbert Ritschard}
44 | 
45 | \keyword{State sequences}
46 | 


--------------------------------------------------------------------------------
/man/seqfposend.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqfposend}
 2 | \alias{seqfposend}
 3 | \title{End of first spell in given state}
 4 | \description{Returns the position in the sequences of end of first spell in a given state
 5 | }
 6 | 
 7 | \usage{
 8 | seqfposend(seqdata, state, with.missing=FALSE, lead=0, from.seq.start=TRUE)
 9 | }
10 | 
11 | \arguments{
12 |     \item{seqdata}{State sequence object of class \code{stslist} as produced by \code{\link{seqdef}}.}
13 |     \item{state}{Element of the alphabet of \code{seqdata}.}
14 |     \item{with.missing}{Logical. Should non-void missing values be considered as regular states? See \code{\link{seqdss}} and \code{\link{seqdur}}.}
15 |     \item{lead}{Integer. Value to be added to the end position.}
16 |     \item{from.seq.start}{Logical. Should position be computed from the start of the sequence? Default is \code{TRUE}. If \code{FALSE}, position is computed from the start of the spell.}
17 | }
18 | 
19 | \value{
20 |     Vector of integers giving position of end of spell in the sequences.
21 | }
22 | 
23 | \seealso{
24 |     \code{\link{seqfpos}}
25 | }
26 | 
27 | \author{Gilbert Ritschard}
28 | 
29 | \examples{
30 | ## End of spell in further education (FE) in first 10 mvad sequences
31 | 
32 | data(mvad)
33 | m.seq <- seqdef(mvad[1:10,17:86])
34 | seqfposend(m.seq, state="FE")
35 | 
36 | }
37 | 
38 | 


--------------------------------------------------------------------------------
/man/bfspell.Rd:
--------------------------------------------------------------------------------
 1 | \name{bfspell}
 2 | \docType{data}
 3 | \alias{bfspell}
 4 | \alias{bfspell20}
 5 | \alias{bfpdata20}
 6 | 
 7 | \title{Example data set: First 20 biofam sequences in SPELL form}
 8 | 
 9 | \description{First 20 sequences of the \code{\link{biofam}} data set in SPELL form. The data serve to illustrate the use of \code{\link{seqformat}} for converting SPELL data into STS (horizontal) form.
10 | }
11 | 
12 | 
13 | \details{
14 | The states are coded with the following short labels\cr
15 | \cr
16 | P = "Parent" \cr
17 | L = "Left" \cr
18 | M = "Married" \cr
19 | LM = "Left+Marr" \cr
20 | C = "Child" \cr
21 | LC = "Left+Child" \cr
22 | LMC = "Left+Marr+Child" \cr
23 | D = "Divorced" \cr
24 | \cr
25 | The data is a SPELL representation of \code{biofam[1:20,10:25]}, corresponding to 20 family life sequences between ages 15 and 30.
26 | }
27 | 
28 | \usage{
29 | data(bfspell)
30 | }
31 | 
32 | \format{A data set with two data frames: \code{bfspell20} with one row per spell and \code{bfpdata20} with one row per id. The \code{bfspell20} data frame contains the spell data themselves (4 variables \code{id}, \code{begin}, \code{end}, \code{states}) and \code{bfpdata20} the year when aged 15 (2 variables \code{id}, \code{when15}).
33 | }
34 | \seealso{
35 | \code{\link{biofam}}
36 | }
37 | \author{Gilbert Ritschard and Alexis Gabadinho}
38 | 
39 | \keyword{Datasets}
40 | 


--------------------------------------------------------------------------------
/R/seqeweight.R:
--------------------------------------------------------------------------------
 1 | ## ========================================
 2 | ## Get and set weight of eseq
 3 | ## ========================================
 4 | 
 5 | seqeweight <- function(eseq, s) {
 6 | 
 7 |   TraMineR.check.depr.args(alist(eseq = s))
 8 | 
 9 | 	seqeweight.internal<-function(eseq){
10 | 		if(is.eseq(eseq)) {
11 | 			return(.Call(C_tmrsequencegetweight, eseq))
12 | 		}
13 | 		return(-1)
14 | 	}
15 | 
16 | 	if (is.seqelist(eseq)) {
17 | 		as.numeric(sapply(unlist(eseq),seqeweight.internal))
18 | 	}else if(is.eseq(eseq)) {
19 | 		as.numeric(seqeweight.internal(eseq))
20 | 	} else {
21 | 		stop(" [!] eseq should be a seqelist. See help on seqecreate.")
22 | 	}
23 | }
24 | 
25 | "seqeweight<-" <- function(eseq, s, value) {
26 | 
27 |   TraMineR.check.depr.args(alist(eseq = s))
28 | 
29 | 	if(!is.seqelist(eseq)) {
30 | 		stop(" [!] eseq should be a seqelist. See help on seqecreate.")
31 | 	}
32 | 	if(length(eseq)!=length(value)) {
33 | 		stop(" [!] eseq and weights should be of the same size.")
34 | 	}
35 | 	.Call(C_tmrsequencesetweight, eseq, as.double(value))
36 | 	return(eseq)
37 | }
38 | 
39 | seqeisweighted <- function(eseq, s) {
40 | 
41 |   TraMineR.check.depr.args(alist(eseq = s))
42 | 
43 | 	if(!is.seqelist(eseq)) {
44 | 		stop(" [!] eseq should be a seqelist. See help on seqecreate.")
45 | 	}
46 | 	weights <- seqeweight(eseq)
47 | 	return(any(weights!=1))
48 | }
49 | 


--------------------------------------------------------------------------------
/R/seqnum.R:
--------------------------------------------------------------------------------
 1 | ## ========================================
 2 | ## Change the alphabet of a sequence object
 3 | ## ========================================
 4 | 
 5 | seqnum <- function(seqdata, with.missing=FALSE) {
 6 | 	if (!inherits(seqdata,"stslist"))
 7 | 		stop("data is not a sequence object, see seqdef function to create one")
 8 | 
 9 | 	alphabet.orig <- attr(seqdata,"alphabet")
10 |   levels.orig <- levels(seqdata[[1]])
11 | 
12 | 	nbstat <- length(alphabet.orig)
13 | 
14 | 	alphabet.new <- 0:(nbstat-1)
15 | 	levels.new <- alphabet.new
16 | 
17 | 	if (with.missing) {
18 | 		## Changing missing code into numerical value
19 | 		nr.new <- which(levels.orig==attr(seqdata,"nr"))-1
20 | 		levels.new <- c(levels.new, nr.new)
21 | 		attr(seqdata,"nr") <- nr.new
22 | 	}
23 | 	else
24 | 		levels.new <- c(levels.new, attr(seqdata,"nr"))
25 | 
26 | 	levels.new <- c(levels.new,attr(seqdata,"void"))
27 | 
28 | 	if (length(alphabet.new)!=nbstat)
29 | 		stop("lengths of old and new alphabet are different")
30 | 
31 | 	#for (i in 1:seqdim(seqdata)[2])
32 | 	#	#seqdata[,i] <- factor(seqdata[,i], levels=levels.orig, labels=levels.new)
33 | 	#	seqdata[[i]]  <- factor(seqdata[[i]], levels=levels.orig, labels=levels.new)
34 | 
35 |   seqdata[] <- lapply(seqdata, factor, levels=levels.orig, labels=levels.new)
36 | 
37 | 	attr(seqdata,"alphabet") <- alphabet.new
38 | 
39 | 	return(seqdata)
40 | 
41 | 	}
42 | 


--------------------------------------------------------------------------------
/R/diss.rep-methods.R:
--------------------------------------------------------------------------------
 1 | ## ===========================
 2 | ## Methods for stsstatd objects
 3 | ## ===========================
 4 | 
 5 | print.diss.rep <- function(x, ...) {
 6 | 	criterion <- attr(x,"criterion")
 7 | 	n <- attr(x,"n")
 8 | 	quality <- attr(x,"Quality")
 9 | 
10 | 	cat(" [>] criterion:",criterion,"\n")
11 | 	cat(" [>]", n,"objects in the original data set\n")
12 | 	cat(" [>]", length(x),"representative(s)\n")
13 | 	cat(" [>] overall quality:", round(quality*100,2),"\n")
14 | 	cat(" [>] representative(s) index(es):", x[1:length(x)],"\n")
15 | }
16 | 
17 | summary.diss.rep <- function(object, ...) {
18 | 	criterion <- attr(object,"criterion")
19 | 	n <- attr(object,"n")
20 | 
21 | 	cat(" [>] criterion:",criterion,"\n")
22 | 	cat(" [>]", n,"objects in the original data set\n")
23 | 	cat(" [>]", nrow(object),"representative object(s)\n")
24 | 	cat(" [>] statistics for the representative set:\n\n")
25 | 	print(attr(object,"Statistics"), digits=3, ...)
26 | 	cat("\n    na: number of assigned objects\n")
27 | 	cat("    nb: number of objects in the neighborhood\n")
28 | 	cat("    SD: sum of the na distances to the representative\n")
29 | 	cat("    MD: mean of the na distances to the representative\n")
30 | 	cat("    DC: sum of the na distances to the center of the complete set\n")
31 | 	cat("    V: discrepancy of the subset\n")
32 | 	cat("    Q: quality of the representative\n")
33 | }
34 | 


--------------------------------------------------------------------------------
/man/seqdecomp.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqdecomp}
 2 | \alias{seqdecomp}
 3 | \title{Convert a character string into a vector of states or events}
 4 | \description{
 5 |   States can be represented by any substring that does not include the \code{sep} value. An empty separator \code{sep = ""} can only be used when each state is coded with a single character.
 6 | }
 7 | \usage{
 8 |  seqdecomp(data, var=NULL, sep='-', miss="NA", vnames=NULL)
 9 | }
10 | \arguments{
11 |  \item{data}{a dataframe, matrix, or character string vector containing sequence data (tibble will be converted with \code{as.data.frame}).}
12 |   \item{var}{the list of columns containing the sequences. Default is \code{NULL}, ie all the columns. Whether the sequences are in the compressed (character strings) or extended format is automatically detected by counting the number of columns.}
13 |   \item{sep}{the between states/events separator used in the input data set. Default is '\code{-}'.}
14 |   \item{miss}{the symbol for missing values (if any) used in the input data set. Default is \code{NA}.}
15 |   \item{vnames}{optional names for the column/variables of the output data set. Default is \code{NULL}.}
16 | }
17 | \seealso{
18 |  \code{\link{seqconc}}.
19 | }
20 | \examples{
21 | # 1 sequence of length 4
22 | seqdecomp("A-BB-C-DD")
23 | 
24 | # 2 sequences of length 6
25 | seqdecomp(c("ABBCDD","BBCCAD"),sep="")
26 | }
27 | \keyword{Data handling}
28 | 


--------------------------------------------------------------------------------
/R/seqtransn.R:
--------------------------------------------------------------------------------
 1 | ## =====================================
 2 | ## Number of transitions in the sequence
 3 | ## =====================================
 4 | 
 5 | seqtransn <- function(seqdata, with.missing=FALSE, norm=FALSE, pweight=FALSE) {
 6 | 
 7 | 	if (!inherits(seqdata,"stslist"))
 8 | 		stop("data is NOT a sequence object, see seqdef function to create one")
 9 | 
10 | 	## Number of transitions
11 | 	dss <- seqdss(seqdata, with.missing=with.missing)
12 | 	dssl <- seqlength(dss)
13 | 	nbseq <- nrow(dss)
14 | 
15 | 	if (pweight) {
16 | 		tr <- seqtrate(seqdata)
17 | 		dss.num <- seqasnum(dss)+1
18 | 		trans <- matrix(0, nrow=nbseq, ncol=1)
19 | 		rownames(trans) <- rownames(seqdata)
20 | 
21 | 		for (i in 1:nbseq) {
22 | 			if (dssl[i]>1) {
23 | 				for (j in 2:dssl[i]) {
24 | 					trans[i] <- trans[i] + (1-tr[dss.num[i,j-1], dss.num[i,j]])
25 | 				}
26 | 			}
27 | 		}
28 | 	}
29 | 	else {
30 | 		trans <- dssl-1
31 | 		if (any(dssl==0)) {
32 | 			trans[dssl==0] <- 0
33 | 		}
34 | 	}
35 | 
36 | 	if (norm) {
37 | 		seql <- seqlength(seqdata)
38 | 		trans <- trans/(seql-1)
39 | 		if (any(seql==1)) {
40 | 			trans[seql==1] <- 0
41 | 		}
42 | 	}
43 | 
44 | 	colnames(trans) <- "Trans."
45 | 
46 | 	return(trans)
47 | }
48 | 
49 | trans.pweight <- function(seqdata, tr) {
50 | 	res <- 0
51 | 
52 | 	for (i in 2:seqlength(seqdata)) {
53 | 		res <- res + (1-tr[seqdata[i-1], seqdata[i]])
54 | 	}
55 | 
56 | 	return(res)
57 | }
58 | 


--------------------------------------------------------------------------------
/man/disstree-get-rules.Rd:
--------------------------------------------------------------------------------
 1 | \name{disstree.get.rules}
 2 | \alias{disstree.get.rules}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{Tree classification rules}
 5 | \description{
 6 |   Tree classification rules.
 7 | }
 8 | \usage{
 9 | disstree.get.rules(tree, collapse="; ")
10 | }
11 | %- maybe also 'usage' for other objects documented here.
12 | \arguments{
13 |   \item{tree}{A tree (\code{disstree} or \code{DissTreeNode} object).}
14 |   \item{collapse}{Character string. Separator between categories in class of categorical values.}
15 | }
16 | \details{
17 | \code{disstree.get.rules} extracts the classification rules defined by a tree grown from a dissimilarity matrix and returns them as a vector of character strings. The rules are expressed as R commands and the i-th rule, for example, can be applied using \code{\link{eval}(parse(text=rule[i]))}. Rules are built through a call to \code{\link{disstreeleaf}}.
18 | }
19 | 
20 | \value{Character vector with the rules as R commands and an attribute \code{covariates} providing the names of the variables involved in the rules.
21 | }
22 | 
23 | 
24 | \seealso{\code{\link{disstree}} and examples therein, \code{\link{disstreeleaf}}, \code{\link{disstree.assign}}}
25 | \author{Gilbert Ritschard)}
26 | % Add one or more standard keywords, see file 'KEYWORDS' in the
27 | % R documentation directory.
28 | \keyword{Dissimilarity-based analysis}
29 | 


--------------------------------------------------------------------------------
/src/OMPerdistance.h:
--------------------------------------------------------------------------------
 1 | #ifndef OMPERDISTANCECALCULATOR_H
 2 | #define OMPERDISTANCECALCULATOR_H
 3 | #include "OMdistance.h"
 4 | 
 5 | class OMPerdistance: public OMdistance{
 6 | 	double timecost;
 7 | 	double * seqdur;
 8 | 	double * indellist;
 9 | 	int * seqlen;
10 |   public:
11 | 	OMPerdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);
12 | 	OMPerdistance(OMPerdistance *dc);
13 | 	virtual void setParameters(SEXP params);
14 |     virtual ~OMPerdistance();
15 |     virtual double distance(const int&is, const int& js);
16 | 	inline double getIndel(const int& indice, const int& state){
17 | 		return this->indellist[state]+timecost*(seqdur[indice]);
18 | 	}
19 | 	virtual DistanceCalculator* copy(){return new OMPerdistance(this);}
20 | 	inline double getSubCost(const int& i_state, const int& j_state, const int& i_state_indice, const int& j_state_indice){
21 | 		
22 | 		if(i_state==j_state){
23 | 			double diffdur= seqdur[i_state_indice]-seqdur[j_state_indice];
24 | 			if(diffdur<0) {
25 | 				return -1.0*(timecost*diffdur);
26 | 			}else{
27 | 				return(timecost*diffdur);
28 | 			}
29 | 		}else{
30 | 			//double commondur =fmin2(seqdur[i_state_indice], seqdur[j_state_indice]);
31 | 			//return(commondur*scost[MINDICE(i_state, j_state, alphasize)] + diffdur*timecost);
32 | 			return(scost[MINDICE(i_state, j_state, alphasize)] + (seqdur[i_state_indice]+seqdur[j_state_indice])*timecost);
33 | 		}
34 | 	}
35 | };
36 | 
37 | #endif
38 | 


--------------------------------------------------------------------------------
/man/seqpm.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqpm}
 2 | \alias{seqpm}
 3 | \title{Find substring patterns in sequences}
 4 | \description{
 5 |   Search for a pattern (substring) into sequences.
 6 | }
 7 | \usage{
 8 |  seqpm(seqdata, pattern, sep="")
 9 | }
10 | \arguments{
11 |   \item{seqdata}{a sequence object as defined by the \code{\link{seqdef}} function.}
12 |   \item{pattern}{a character string representing the pattern (substring) to search for.}
13 |   \item{sep}{state separator used in the pattern definition.}
14 | }
15 | \details{
16 | 	This function searches a pattern (a character string) into a set of sequences and returns the results as a list with two elements: '\code{Nbmatch}' the number of occurrences of the pattern and '\code{MatchesIndex}' the vector of indexes (row numbers) of the sequences that match the pattern (see examples below).
17 | 	}
18 | 
19 | \value{a list with two elements (see details).}
20 | 
21 | %\seealso{}
22 | \examples{
23 | data(actcal)
24 | actcal.seq <- seqdef(actcal,13:24)
25 | 
26 | ## search for pattern "DAAD"
27 | ## (no work-full time work-full time work-no work)
28 | ## results are stored in the 'daad' object
29 | daad <- seqpm(actcal.seq,"DAAD")
30 | 
31 | ## Looking at the sequences
32 | ## containing the pattern
33 | actcal.seq[daad$MIndex,]
34 | 
35 | ## search for pattern "AD"
36 | ## (full time work-no work)
37 | seqpm(actcal.seq,"AD")
38 | }
39 | 
40 | \author{Alexis Gabadinho}
41 | \keyword{State sequences}
42 | 


--------------------------------------------------------------------------------
/R/TraMineR-legend.R:
--------------------------------------------------------------------------------
 1 | ## ====================
 2 | ## Plotting the legend
 3 | ## ====================
 4 | 
 5 | TraMineR.legend <- function(pos, text, colors, cex=1, leg.ncol = NULL, ... ) {
 6 | 
 7 | 	nbstat <- length(text)
 8 | 
 9 | 	## Computing some parameters for the legend's plotting '
10 | 
11 |   if (is.null(leg.ncol)) {
12 | 	   if (pos=="bottom") {
13 |   		if (nbstat > 6)
14 |   			nbcol <- 3
15 |   		else
16 |   			nbcol <- 2
17 | 
18 |   		leg.ncol <- ceiling(nbstat/nbcol)
19 | 	   }
20 | 	   else
21 | 		  leg.ncol <- 1
22 |    }
23 | 
24 | 	## leg.inset <- -0.2 + ((2-leg.ncol)*0.025)
25 | 
26 | 	## Setting graphical parameters while saving them in savepar
27 | 	savepar <- par(mar = c(1, 1, 0.5, 1) + 0.1, xpd=FALSE)
28 | 
29 | 	## Restoring graphical parameters
30 | 	on.exit(par(savepar))
31 | 
32 | 	plot(0, type = "n", axes = FALSE, xlab = "", ylab = "")
33 | 		## legend(position, fill = cpal, legend = ltext, cex = fontsize)
34 | 
35 |     oolist <- list(...)
36 |     oolist <- oolist[!names(oolist) %in% c("x","y","legend","ncol","fill","border", "lty", "lwd")]
37 | 
38 |     legargs <- list(x=pos, legend=text, fill=colors, ncol=leg.ncol, cex=cex, border="black")
39 |     legargs <- c(legargs,oolist)
40 | 
41 |     do.call(legend, legargs)
42 | 
43 | 
44 | ## 	legend(pos,
45 | ## 		## inset=c(0,leg.inset),
46 | ## 		legend=text,
47 | ## 		fill=colors,
48 | ## 		ncol=leg.ncol,
49 | ## 		##bty="o",
50 | ## 		cex=cex,
51 | ## 		...)
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/R/stslist.rep-methods.R:
--------------------------------------------------------------------------------
 1 | ## ===========================
 2 | ## Methods for stsstatd objects
 3 | ## ===========================
 4 | 
 5 | print.stslist.rep <- function(x, ...) {
 6 | 	criterion <- attr(x,"criterion")
 7 | 	nbseq <- attr(x,"nbseq")
 8 | 	quality <- attr(x,"Quality")
 9 | 
10 | 	cat("\n [>] criterion:",criterion,"\n")
11 | 	cat(" [>]", nbseq,"sequence(s) in the original data set\n")
12 | 	cat(" [>]", nrow(x),"representative sequence(s)\n")
13 | 	cat(" [>] overall quality:", round(quality*100,2),"\n\n")
14 | 	NextMethod(x,...)
15 | }
16 | 
17 | summary.stslist.rep <- function(object, ...) {
18 | 	criterion <- attr(object,"criterion")
19 | 	nbseq <- attr(object,"nbseq")
20 | 	quality <- attr(object,"Quality")
21 | 
22 | 	cat("\n [>] criterion:",criterion,"\n")
23 | 	cat(" [>]", nbseq,"sequence(s) in the original data set\n")
24 | 	cat(" [>]", nrow(object),"representative sequences\n")
25 | 	cat(" [>] overall quality:", quality,"\n")
26 | 	cat(" [>] statistics for the representative set:\n\n")
27 | 	print(attr(object,"Statistics"), digits=3, ...)
28 | 	cat("\n    na: number of assigned objects\n")
29 | 	cat("    nb: number of objects in the neighborhood\n")
30 | 	cat("    SD: sum of the na distances to the representative\n")
31 | 	cat("    MD: mean of the na distances to the representative\n")
32 | 	cat("    DC: sum of the na distances to the center of the complete set\n")
33 | 	cat("    V: discrepancy of the subset\n")
34 | 	cat("    Q: quality of the representative\n")
35 | }
36 | 


--------------------------------------------------------------------------------
/R/TraMineR-group.R:
--------------------------------------------------------------------------------
 1 | ## Returns a factor from one variable or a list of variable
 2 | ## some code taken from the tapply function
 3 | 
 4 | group <- function (INDEX, factor=TRUE) {
 5 | 
 6 | 	if (!is.factor(INDEX)) {
 7 | 		if (!is.list(INDEX)) { INDEX <- list(INDEX) }
 8 | 
 9 | 		il <- sapply(INDEX, length)
10 | 
11 | 		if (min(il)!=max(il)) {
12 | 			stop(" [!] all factors must have the same length")
13 | 		}
14 | 
15 | 		if (factor) {
16 | 			group <- rep("", length(INDEX[[1]]))
17 | 			for (i in seq_along(INDEX)) {
18 | 		        	index <- as.factor(INDEX[[i]])
19 | 		        	group <- paste(group, index)
20 | 				group[is.na(index)] <- NA
21 | 			}
22 | 			group <- as.factor(group)
23 | 		}
24 | 		else {
25 | 			## fromt apply
26 | 			nI <- length(INDEX)
27 | 
28 | 			namelist <- vector("list", nI)
29 | 			names(namelist) <- names(INDEX)
30 | 			extent <- integer(nI)
31 | 			one <- 1L
32 | 
33 | 			group <- rep.int(one, length(INDEX[[1]]))
34 | 			ngroup <- one
35 | 
36 | 			for (i in seq_along(INDEX)) {
37 | 		        	index <- as.factor(INDEX[[i]])
38 | 		        	namelist[[i]] <- levels(index)
39 | 		        	extent[i] <- nlevels(index)
40 | 		        	group <- group + ngroup * (as.integer(index) - one)
41 | 		        	ngroup <- ngroup * nlevels(index)
42 | 			}
43 | 		}
44 | 	} else {
45 | 		## Eliminate the unused levels
46 | 		uf <- unique(INDEX)
47 | 		fl <- levels(INDEX)[levels(INDEX) %in% uf]
48 | 		group <- factor(INDEX, levels=fl)
49 | 	}
50 | 
51 | 	return(group)
52 | }
53 | 
54 | 


--------------------------------------------------------------------------------
/src/constraint.cpp:
--------------------------------------------------------------------------------
 1 | #include "constraint.h"
 2 | /*
 3 |  * constraint.cpp
 4 |  *
 5 |  *  Created on: Jun 6, 2011
 6 |  *  Author: nmuller modified by buergin
 7 |  */
 8 | 
 9 | Constraint::Constraint(const double &mg, 
10 | 		       const double &ws, 
11 | 		       const double &aminb, 
12 | 		       const double &amaxb, 
13 | 		       const double &amaxe, 
14 | 		       const int &cmethod) 
15 | {
16 |   // if (wSize==-1)wSize=DBL_MAX;
17 |   // if (mGap==-1)mGap=DBL_MAX;
18 |   // if (aMax==-1)aMax=DBL_MAX;
19 |   // if (aMaxEnd==-1)aMaxEnd=DBL_MAX;
20 |   if (mg==-1) 
21 |     {
22 |       this->maxGap=DBL_MAX;
23 |     } else {
24 |     this->maxGap=mg;
25 |   }
26 |   if (ws==-1) 
27 |     {
28 |       this->windowSize=DBL_MAX;
29 |     } else {
30 |     this->windowSize=ws;
31 |   }
32 |     
33 |   if (aminb==-1) 
34 |     {
35 |       this->ageMinBegin=-DBL_MAX;
36 |     } else {
37 |     this->ageMinBegin=aminb;
38 |   }
39 | 
40 |   if (amaxb==-1) 
41 |     {
42 |       this->ageMaxBegin=DBL_MAX;
43 |     } else {
44 |     this->ageMaxBegin=amaxb;
45 |   }
46 |   if (amaxe==-1) 
47 |     {
48 |       this->ageMaxEnd=DBL_MAX;
49 |     } else {
50 |     this->ageMaxEnd=amaxe;
51 |   }
52 |   if (cmethod==-1) 
53 |     {
54 |       this->countMethod=1;
55 |     } else {
56 |     this->countMethod=cmethod;
57 |   }  
58 |   //	this->maxGap = mg;
59 |   //	this->windowSize = ws;
60 |   //	this->ageMinBegin = aminb;
61 |   //	this->ageMaxBegin = amaxb;
62 |   //	this->ageMaxEnd = amaxe;
63 | }
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/R/seqformat-STS_to_SPS.R:
--------------------------------------------------------------------------------
 1 | # Should only be used through seqformat()
 2 | 
 3 | ## ==============================
 4 | ## Convert from STS to SPS format
 5 | ## ==============================
 6 | 
 7 | STS_to_SPS <- function(seqdata, spsformat,
 8 | 	left=NA, right="DEL", gaps=NA, missing=NA, void="%", nr="*") {
 9 | 
10 | 	nbseq <- seqdim(seqdata)[1]
11 | 	maxsl <- seqdim(seqdata)[2]
12 | 
13 | 	out <- matrix(NA, nrow=nbseq, ncol=maxsl)
14 | 
15 | 	if (is.null(rownames(seqdata))) {
16 |     rownames(out) <- paste("[",seq(1:nbseq),"]",sep="")
17 |   } else {
18 |     rownames(out) <- rownames(seqdata)
19 |   }
20 | 	colnames(out) <- paste("[",seq(1:maxsl),"]",sep="")
21 | 
22 | 	## Defining the format options
23 | 	prefix <- substring(spsformat$xfix,1,1)
24 | 	suffix <- substring(spsformat$xfix,2,2)
25 | 	stdursep <- spsformat$sdsep
26 | 
27 | 	## PREPARING THE DATA
28 | 	seqdata <- as.matrix(seqdata)
29 | 	seqdata <- seqprep(seqdata, missing=missing, left=left, gaps=gaps, right=right, void=void, nr=nr)
30 | 
31 | 	for (i in 1:nbseq) {
32 | 		idx <- 1
33 | 		j <- 1
34 | 
35 | 		tmpseq <- seqdata[i,]
36 | 		sl <- TraMineR.length(tmpseq, void)
37 | 
38 | 		while (j <= sl) {
39 | 			iseq <- tmpseq[j]
40 | 
41 | 			dur <- 1
42 | 			while (j < sl & tmpseq[j+1]==iseq) {
43 | 				dur <- dur+1
44 | 				j <- j+1
45 | 			}
46 | 
47 | 			## adding suffix
48 | 			sps <- paste(prefix, iseq, stdursep, dur, suffix, sep="")
49 | 
50 | 			out[i,idx] <- sps
51 | 
52 | 			j <- j+1
53 | 			idx <- idx+1
54 | 		}
55 | 	}
56 | 
57 | 	return(out)
58 | }
59 | 


--------------------------------------------------------------------------------
/src/eventdictionary.cpp:
--------------------------------------------------------------------------------
 1 | #include "eventdictionary.h"
 2 | 
 3 | EventDictionary::EventDictionary(SEXP flist) :numseq(0){
 4 |     for (int i = 0; i < Rf_length(flist); i++) {
 5 |         this->insert(std::make_pair(i+1,std::string(CHAR(STRING_ELT(flist, i)))));// add to respect R indice system
 6 |     }
 7 |     //ctor
 8 | }
 9 | 
10 | EventDictionary::~EventDictionary() {
11 |     //dtor
12 | }
13 | 
14 | bool EventDictionary::codeExists(const int &code) const {
15 |     return this->find(code)==this->end();
16 | }
17 | /*int EventDictionary::sprint(char * buffer, const char* start, const int&code) const {
18 |     const_iterator it=this->find(code);
19 | 	if (it!=this->end()) {
20 | 		return sprintf(buffer,"%s%s",start,it->second.c_str());
21 | 	}
22 | 	return sprintf(buffer,"%s%i",start,code);
23 | }*/
24 | 
25 | SEXP EventDictionary::getDictionary()const{
26 | 	SEXP ret;
27 | 	int s=this->size();
28 | 	//REprintf((char*)"size %i\n",s);
29 | 	PROTECT(ret = Rf_allocVector(STRSXP, s));
30 | 	for(const_iterator it=this->begin();it!=this->end();it++){
31 | 		//REprintf((char*)"Code %i=%s\n",it->first,it->second.c_str());
32 | 		if(it->first<=s){
33 | 			SET_STRING_ELT(ret, it->first-1, Rf_mkChar(it->second.c_str()));
34 | 			//REprintf((char*)"Code %i=%s\n",it->first,it->second.c_str());
35 | 		}
36 | 	}
37 |     UNPROTECT(1);
38 |     return ret;
39 | 
40 | }
41 | 
42 | void EventSet::add(SEXP elist){
43 | 	int * code=INTEGER(elist);
44 | 	for (int i = 0; i < Rf_length(elist); i++) {
45 |         this->insert(code[i]);// add to respect R indice system
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/OMPerdistanceII.h:
--------------------------------------------------------------------------------
 1 | #ifndef OMPERDISTANCEIICALCULATOR_H
 2 | #define OMPERDISTANCEIICALCULATOR_H
 3 | #include "OMdistance.h"
 4 | 
 5 | class OMPerdistanceII: public OMdistance{
 6 | 	double timecost;
 7 | 	double * seqdur;
 8 | 	double * indellist;
 9 | 	double * tokdeplist;
10 | 	int * seqlen;
11 |   public:
12 | 	OMPerdistanceII(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);
13 | 	OMPerdistanceII(OMPerdistanceII *dc);
14 | 	virtual void setParameters(SEXP params);
15 |     virtual ~OMPerdistanceII();
16 |     virtual double distance(const int&is, const int& js);
17 | 	inline double getIndel(const int& indice, const int& state){
18 | 		return this->indellist[state]+timecost*tokdeplist[state]*(seqdur[indice]);
19 | 	}
20 | 	virtual DistanceCalculator* copy(){return new OMPerdistanceII(this);}
21 | 	inline double getSubCost(const int& i_state, const int& j_state, const int& i_state_indice, const int& j_state_indice){
22 | 		
23 | 		if(i_state==j_state){
24 | 			double diffdur= seqdur[i_state_indice]-seqdur[j_state_indice];
25 | 			if(diffdur<0) {
26 | 				return -1.0*(timecost*diffdur*tokdeplist[i_state]);
27 | 			}else{
28 | 				return(timecost*diffdur*tokdeplist[i_state]);
29 | 			}
30 | 		}else{
31 | 			//double commondur =fmin2(seqdur[i_state_indice], seqdur[j_state_indice]);
32 | 			//return(commondur*scost[MINDICE(i_state, j_state, alphasize)] + diffdur*timecost);
33 | 			return(scost[MINDICE(i_state, j_state, alphasize)] + (tokdeplist[i_state]*seqdur[i_state_indice]+tokdeplist[j_state]*seqdur[j_state_indice])*timecost);
34 | 		}
35 | 	}
36 | };
37 | 
38 | #endif
39 | 


--------------------------------------------------------------------------------
/man/disstree-assign.Rd:
--------------------------------------------------------------------------------
 1 | \name{disstree.assign}
 2 | \alias{disstree.assign}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{Assign rules to profiles provided}
 5 | \description{
 6 |   Find the tree classification rule that applies to provided cases.
 7 | }
 8 | \usage{
 9 | disstree.assign(rules, profile, covar=attr(rules,"covariates"))
10 | }
11 | %- maybe also 'usage' for other objects documented here.
12 | \arguments{
13 |   \item{rules}{Character vector. List of classification rules such as those returned by \code{\link{disstree.get.rules}}.}
14 |   \item{profile}{Data frame. Profiles of cases to be classified with the rules.}
15 |   \item{covar}{Character vector. List of names of covariates used by the rules.}
16 | }
17 | \details{
18 | \code{rules} must be given as strings of R commands. Use \code{\link{disstree.get.rules}} to get the classification rules of a tree of class \code{disstree}.
19 | 
20 | Rules are expected to create a full partition of the space of possible values of the covariates, i.e., any profile must satisfy one and only one of the rules.
21 | 
22 | }
23 | 
24 | \value{A vector of length equal to the number of rows of \code{profile} with for each case the index of the rule that applies.
25 | }
26 | 
27 | 
28 | \seealso{\code{\link{disstree}} and examples therein, \code{\link{disstreeleaf}}, \code{\link{disstree.get.rules}}}
29 | \author{Gilbert Ritschard)}
30 | % Add one or more standard keywords, see file 'KEYWORDS' in the
31 | % R documentation directory.
32 | \keyword{Dissimilarity-based analysis}
33 | 


--------------------------------------------------------------------------------
/man/print.stslist.Rd:
--------------------------------------------------------------------------------
 1 | \name{print.stslist}
 2 | \alias{print.stslist}
 3 | \title{Print method for state sequence objects}
 4 | \description{
 5 | This is the print method for state sequence objects of class \code{stslist} created by the \code{seqdef} function.
 6 | }
 7 | \usage{
 8 | \method{print}{stslist}(x, format='STS', extended=FALSE, ...)
 9 | }
10 | \arguments{
11 |   \item{x}{A state sequence (\code{stslist}) object.}
12 | 
13 |   \item{format}{String: print format. One of \code{"STS"} (default) or \code{"SPS"}.}
14 | 
15 |   \item{extended}{Logical: should the output be printed in extended matrix form?}
16 |   
17 |   \item{...}{Additional print arguments.}
18 | }
19 | 
20 | \seealso{\code{\link{seqdef}}, \code{\link{plot.stslist}}
21 | }
22 | 
23 | \examples{
24 | ## Defining a sequence object with the data in columns 10 to 25
25 | ## (family status from age 15 to 30) in the biofam data set
26 | data(biofam)
27 | biofam <- biofam[500:600,] ## using a subsample only
28 | biofam.lab <- c("Parent", "Left", "Married", "Left+Marr",
29 |     "Child", "Left+Child", "Left+Marr+Child", "Divorced")
30 | biofam.shortlab <- c("P","L","M","LM","C","LC","LMC","D")
31 | biofam.seq <- seqdef(biofam, 10:25, states=biofam.shortlab, 
32 |   labels=biofam.lab)
33 | 
34 | ## Print of first 5 sequences
35 | print(biofam.seq[1:5,])
36 | print(biofam.seq[1:5,], extended=TRUE)
37 | print(biofam.seq[1:5,], format="SPS")
38 | print(biofam.seq[1:5,], format="SPS", SPS.out = list(xfix = "", sdsep = "/"))
39 | 
40 | }
41 | 
42 | \author{Gilbert Ritschard}
43 | \keyword{Method}
44 | \keyword{State sequences}
45 | 


--------------------------------------------------------------------------------
/R/seqdur.R:
--------------------------------------------------------------------------------
 1 | ## ========================================
 2 | ## Extracts states durations from sequences
 3 | ## ========================================
 4 | 
 5 | seqdur <- function(seqdata, with.missing=FALSE) {
 6 | 
 7 | 	if (!inherits(seqdata,"stslist"))
 8 | 		stop("data is not a sequence object, see seqdef function to create one")
 9 | 
10 | 	nbseq <- nrow(seqdata)
11 | 	sl <- seqlength(seqdata, with.missing=TRUE)
12 | 
13 | 	maxsl <- max(sl)
14 | 	trans <- matrix(nrow=nbseq, ncol=maxsl)
15 | 	rownames(trans) <- rownames(seqdata)
16 | 	colnames(trans) <- paste("DUR",1:maxsl, sep="")
17 | 
18 | 	seqdatanum <- seqasnum(seqdata, with.missing=with.missing)
19 | 	#if (!with.missing)
20 | 	seqdatanum[is.na(seqdatanum)] <- -99
21 | 
22 | 	maxcol <- 0
23 | 	for (i in 1:nbseq) {
24 | 		idx <- 1
25 | 		j <- 1
26 | 
27 | 		tmpseq <- seqdatanum[i,]
28 | 
29 |     while (idx <= sl[i] && tmpseq[idx]==-99) idx <- idx + 1 ## Skipping initial -99 values
30 | 
31 | 		while (idx <= sl[i]) {
32 | 			iseq <- tmpseq[idx]
33 | 			dur <- 1
34 | 
35 | 			while (idx < sl[i] && (tmpseq[idx+1]==iseq || tmpseq[idx+1]==-99)) {
36 |           if (tmpseq[idx+1]!=-99) dur <- dur+1
37 | 					idx <- idx+1
38 | 			}
39 | 
40 | 			## The range of the numeric alphabet
41 | 			## obtained with seqasnum is 0..n
42 | 			if (iseq!=-99) {
43 | 				trans[i,j] <- dur
44 | 				j <- j+1
45 | 			}
46 | 
47 | 			idx <- idx+1
48 | 		}
49 | 		if (j>maxcol) {maxcol <- j}
50 | 	}
51 | 	## drop=FALSE ensures that the result is a matrix even if trans has only one row
52 | 	trans <- trans[,1:(maxcol-1), drop=FALSE]
53 | 
54 | 	return(trans)
55 | }
56 | 


--------------------------------------------------------------------------------
/man/plot.subseqelist.Rd:
--------------------------------------------------------------------------------
 1 | \name{plot.subseqelist}
 2 | \alias{plot.subseqelist}
 3 | \title{Plot frequencies of subsequences}
 4 | \description{
 5 | Plot frequencies of subsequences.
 6 | }
 7 | \usage{
 8 | \method{plot}{subseqelist}(x, freq=NULL,cex=1,...)
 9 | }
10 | %- maybe also 'usage' for other objects documented here.
11 | \arguments{
12 |   \item{x}{The subsequences to plot (a \code{subseqelist} object}
13 |   \item{freq}{The frequencies to plot, support if \code{NULL}}
14 |   \item{cex}{Plotting text and symbols magnification. See \code{\link{par}}.}
15 |   % \item{ylim}{A 2 length vector indicating minimum and maximum y values (by default to c(0,1))}
16 |   \item{\dots}{arguments passed to \code{\link{barplot}}}
17 | }
18 | %\details{}
19 | %\references{ ~put references to the literature/web site here ~ }
20 | %\author{ ~~who you are~~ }
21 | %\note{ ~~further notes~~
22 | 
23 | % ~Make other sections like Warning with \section{Warning }{....} ~
24 | %}
25 | \seealso{
26 |     \code{\link{seqefsub}}
27 |     }
28 | \examples{
29 | ## loading data
30 | data(actcal.tse)
31 | 
32 | ## creating sequences
33 | actcal.eseq <- seqecreate(actcal.tse)
34 | 
35 | ## Looking for frequent subsequences
36 | fsubseq <- seqefsub(actcal.eseq,pmin.support=0.01)
37 | 
38 | ## Frequence of first ten subsequences
39 | plot(fsubseq[1:10], cex=2)
40 | plot(fsubseq[1:10])
41 | 
42 | }
43 | \author{Matthias Studer (with Gilbert Ritschard for the help page)}
44 | % Add one or more standard keywords, see file 'KEYWORDS' in the
45 | % R documentation directory.
46 | \keyword{Event sequences}
47 | \keyword{Method}
48 | 


--------------------------------------------------------------------------------
/man/str.eseq.Rd:
--------------------------------------------------------------------------------
 1 | \name{str.eseq}
 2 | \alias{str.eseq}
 3 | \alias{str.seqelist}
 4 | \alias{as.character.eseq}
 5 | \alias{as.character.seqelist}
 6 | 
 7 | \title{String representation of an event sequence object}
 8 | \description{
 9 |   Methods to get a string representation of an event sequence or event subsequence object.
10 |   These are the generic \code{str} and \code{as.character} methods for objects of class \code{eseq} (single event sequence) and \code{seqelist} (list of event sequences).
11 | }
12 | \details{
13 |    String representations have the following form:
14 |   \code{time-(e1,e2,...)-elapsedtime-(e3,...)...}
15 |   where \code{time} is the time elapsed from start to the first set of simultaneous events \code{(e1,e2,...)} and \code{elapsedtime} the time between two consecutive sets of simultaneous events. Time is only displayed when events are time stamped.
16 |   Use \code{str} (\code{str.seqelist}) to get a single string for a whole list, and \code{as.charcater} (\code{as.character.seqelist}) to get a vector of strings.
17 |   \code{str.eseq} and \code{as.character.eseq} have mainly an internal purpose (invoked by print methods for example).
18 | }
19 | 
20 | \seealso{\code{\link{seqecreate}} for a full example }
21 | \examples{
22 | data(actcal.tse)
23 | actcal.eseq <- seqecreate(actcal.tse)
24 | head(as.character(actcal.eseq))
25 | }
26 | 
27 | \author{Matthias Studer (with Gilbert Ritschard for the help page)}
28 | % Add one or more standard keywords, see file 'KEYWORDS' in the
29 | % R documentation directory.
30 | 
31 | \keyword{internal}
32 | \keyword{Event Sequences}
33 | 


--------------------------------------------------------------------------------
/man/TraMineRInternal.Rd:
--------------------------------------------------------------------------------
 1 | \name{TraMineRInternal}
 2 | %%\alias{TraMineRInternalCheckArgs}
 3 | \alias{TraMineRInternalLayout}
 4 | \alias{TraMineRInternalLegend}
 5 | \alias{TraMineRInternalNodeInit}
 6 | \alias{TraMineRInternalSeqeage}
 7 | \alias{TraMineRInternalSeqgbar}
 8 | \alias{TraMineRInternalSplitInit}
 9 | \alias{TraMineRInternalWeightedInertiaDist}
10 | %- Also NEED an '\alias' for EACH other topic documented here.
11 | \title{Access to TraMineR internal functions}
12 | \description{
13 |   Functions allowing other packages to access some TraMineR internal functions.
14 |   Corresponding functions are respectively \code{TraMineR.setlayout},
15 |   \code{TraMineR.Legend}, \code{DTNInit}, \code{seqeage}, \code{seqgbar},
16 |   \code{DTNsplit}, and \code{tmrWeightedInertiaDist}. For experts only.
17 | }
18 | \usage{
19 | %%TraMineRInternalCheckArgs(...)
20 | TraMineRInternalLayout(...)
21 | TraMineRInternalLegend(...)
22 | TraMineRInternalNodeInit(...)
23 | TraMineRInternalSeqeage(...)
24 | TraMineRInternalSeqgbar(...)
25 | TraMineRInternalSplitInit(...)
26 | TraMineRInternalWeightedInertiaDist(diss, diss.size, is.dist, individuals, sweights, var)
27 | }
28 | %- maybe also 'usage' for other objects documented here.
29 | \arguments{
30 |   \item{\dots}{Arguments passed to or from other methods.}
31 |   \item{diss}{See tmrWeightedInertiaDist().}
32 |   \item{diss.size}{See tmrWeightedInertiaDist().}
33 |   \item{is.dist}{See tmrWeightedInertiaDist().}
34 |   \item{individuals}{See tmrWeightedInertiaDist().}
35 |   \item{sweights}{See tmrWeightedInertiaDist().}
36 |   \item{var}{See tmrWeightedInertiaDist().}
37 | }
38 | 


--------------------------------------------------------------------------------
/R/TraMineR-trunc.R:
--------------------------------------------------------------------------------
 1 | ## SEQUENCE TRUNCATION
 2 | 
 3 | TraMineR.trunc <- function(seqdata, mstate, sl, left = "DEL", right = "DEL",
 4 |   gaps = "DEL", neutral = "#", void = "%") {
 5 | 
 6 | 	sidx <- 1:sl
 7 | 
 8 | 	## Index des missing et index des etats valides
 9 | 	na.pos <- sidx[mstate]
10 | 	notna.pos <- sidx[!mstate]
11 |   lc <- 0
12 | 
13 |   if(length(notna.pos)<1) { ## only missing values in sequence
14 |     c1 <- 0
15 |     rc <- 1
16 |     mm <- NULL # most probably not used
17 |   }
18 |   else {
19 | 
20 |   	## Position of first valid state
21 |   	c1 <- notna.pos[1]
22 | 
23 |   	if (c1>1)
24 |   		lc <- c1-1
25 |   	# else lc=0
26 | 
27 |   	rc <- max(notna.pos)+1
28 |   	mm <- na.pos[na.pos > lc+1 & na.pos < rc-1]
29 |   }
30 | 
31 | 	seqdata.trunc <- seqdata
32 | 
33 | 	if (!is.na(left) & lc>0) {
34 | 		if (left=="DEL") seqdata.trunc[1:lc] <- void
35 | 		else if (left=="NEUTRAL") seqdata.trunc[1:lc] <- neutral
36 | 		else seqdata.trunc[1:lc] <- left
37 | 	}
38 | 
39 | 	if (!is.na(right) & rc<=sl) {
40 | 		if (right=="DEL") seqdata.trunc[rc:sl] <- void
41 | 		else if (right=="NEUTRAL") seqdata.trunc[rc:sl] <- neutral
42 | 		else seqdata.trunc[rc:sl] <- right
43 | 	}
44 | 
45 | 	if (!is.na(gaps) & length(mm>0)) {
46 | 		if (gaps=="DEL") seqdata.trunc[mm] <- void
47 | 		else if (gaps=="NEUTRAL") seqdata.trunc[mm] <- neutral
48 | 		else seqdata.trunc[mm] <- gaps
49 | 	}
50 | 
51 | 	ndel <- sum(seqdata.trunc==void, na.rm=TRUE)
52 | 
53 | 	if (ndel>0) {
54 | 		seqdata.trunc <- seqdata.trunc[seqdata.trunc!=void]
55 | 		seqdata.trunc <- c(seqdata.trunc,rep(void,ndel))
56 | 	}
57 | 
58 | 	return(seqdata.trunc)
59 | }
60 | 


--------------------------------------------------------------------------------
/R/seqmeant.R:
--------------------------------------------------------------------------------
 1 | ## ==============
 2 | ## Mean durations
 3 | ## ==============
 4 | 
 5 | seqmeant <- function(seqdata, weighted=TRUE, with.missing=FALSE, prop=FALSE, serr=FALSE) {
 6 | 
 7 | 	if (!inherits(seqdata,"stslist"))
 8 | 		stop("seqmeant: seqdata is not a sequence object, use seqdef function to create one")
 9 | 
10 | 	istatd <- suppressMessages(seqistatd(seqdata, with.missing=with.missing, prop=prop))
11 | 
12 | 	weights <- attr(seqdata, "weights")
13 | 
14 | 	if (!weighted || is.null(weights))
15 | 		weights <- rep(1, nrow(seqdata))
16 | 	## Also takes into account that in unweighted sequence objects created with
17 | 	## older TraMineR versions the weights attribute is a vector of 1
18 | 	## instead of NULL
19 | 	if (all(weights==1))
20 | 		weighted <- FALSE
21 | 	
22 | 	wtot <- sum(weights)
23 | 
24 | 	mtime <- apply(istatd*weights,2,sum)
25 | 	
26 | 	res <- mtime/wtot
27 | 
28 | 	res <- as.matrix(res)
29 | 	colnames(res) <- "Mean"
30 | 
31 | 	col <- cpal(seqdata)
32 | 	if (with.missing) {
33 | 		col <- c(col, attr(seqdata,"missing.color"))
34 | 	}
35 | 
36 | 	if(serr){
37 | 	  w2tot <- sum(weights^2)
38 | 	  vcent <- t(t(istatd) - mtime/wtot)
39 | 	  var <- apply(weights*(vcent^2),2,sum) * wtot/(wtot^2 - w2tot)
40 | 	  sd <- sqrt(var)
41 | 	  SE <- sqrt(var/wtot)
42 | 	  res <- cbind(res,var,sd,SE)
43 | 	  colnames(res) <- c("Mean", "Var", "Stdev", "SE")
44 | 	}
45 | 
46 | 	attr(res,"nbseq") <- sum(weights)
47 | 	attr(res,"cpal") <- col
48 | 	attr(res,"xtlab") <- colnames(seqdata)
49 | 	attr(res,"weighted") <- weighted
50 | 	attr(res,"se") <- serr
51 | 	
52 | 	class(res) <- c("stslist.meant", "matrix")
53 | 
54 | 	return(res)
55 | }
56 | 


--------------------------------------------------------------------------------
/man/actcal.tse.Rd:
--------------------------------------------------------------------------------
 1 | \name{actcal.tse}
 2 | \docType{data}
 3 | \alias{actcal.tse}
 4 | \title{Example data set: Activity calendar from the Swiss Household Panel (time stamped event format)}
 5 | \description{
 6 | This data set contains events defined from the state sequences in the actcal data set. It was created with the code shown in the examples section. It is provided to symplify example of event sequence mining.
 7 | }
 8 | \usage{data(actcal.tse)}
 9 | \format{Time stamped events derived from state sequences in the actcal data set.}
10 | \source{Swiss Household Panel}
11 | \seealso{
12 | \code{\link{seqformat}, \link{actcal}}}
13 | %\references{}
14 | \examples{
15 | data(actcal)
16 | actcal.seq <- seqdef(actcal[,13:24])
17 | 
18 | ## Defining the transition matrix
19 | transition <- seqetm(actcal.seq, method="transition")
20 | transition[1,1:4] <- c("FullTime"         , "Decrease,PartTime",
21 |      "Decrease,LowPartTime", "Stop")
22 | transition[2,1:4] <- c("Increase,FullTime", "PartTime"         ,
23 |      "Decrease,LowPartTime", "Stop")
24 | transition[3,1:4] <- c("Increase,FullTime", "Increase,PartTime",
25 |     "LowPartTime"         , "Stop")
26 | transition[4,1:4] <- c("Start,FullTime"   , "Start,PartTime"   ,
27 |     "Start,LowPartTime"   , "NoActivity")
28 | transition
29 | 
30 | ## Converting STS data to TSE
31 | actcal.tse <- seqformat(actcal, 13:24, from = "STS",to = "TSE",
32 |   tevent = transition)
33 | 
34 | ## Defining the event sequence object
35 | actcal.eseq <- seqecreate(id=actcal.tse$id,
36 | 	time=actcal.tse$time, event=actcal.tse$event)
37 | }
38 | \author{Gilbert Ritschard and Matthias Studer}
39 | \keyword{Datasets}
40 | 


--------------------------------------------------------------------------------
/man/seqstatl.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqstatl}
 2 | \alias{seqstatl}
 3 | \title{List of distinct states or events (alphabet) in a sequence data set.}
 4 | \description{
 5 |   Returns a list containing distinct states or events found in a data frame or matrix containing sequence data, the alphabet.
 6 | }
 7 | \usage{
 8 |  seqstatl(data, var=NULL, format='STS')
 9 | }
10 | \arguments{
11 |   \item{data}{a data frame, matrix, or character string vector containing sequence data (tibble will be converted with \code{as.data.frame}).}
12 |   \item{var}{the list of columns containing the sequences. Default \code{NULL} means all columns. Whether the sequences are in the compressed (character strings) or extended format is automatically detected from the number of columns.}
13 |   \item{format}{the format of the sequence data set. One of \code{"STS"}, \code{"SPS"}, \code{"DSS"}. Default is \code{"STS"}. The \code{seqstatl} function uses the \code{\link{seqformat}} function to translate between formats when necessary.}
14 | }
15 | 
16 | \references{
17 | Gabadinho, A., G. Ritschard, N. S. Müller and M. Studer (2011). Analyzing and Visualizing State Sequences in R with TraMineR. \emph{Journal of Statistical Software} \bold{40}(4), 1-37.
18 | 
19 | Gabadinho, A., G. Ritschard, M. Studer and N. S. Müller (2009). Mining Sequence Data in
20 | \code{R} with the \code{TraMineR} package: A user's guide. Department of Econometrics and
21 | Laboratory of Demography, University of Geneva.
22 | }
23 | 
24 | \seealso{
25 |   \code{\link{seqformat}}
26 | }
27 | \examples{
28 | data(actcal)
29 | seqstatl(actcal,13:24)
30 | }
31 | 
32 | \author{Alexis Gabadinho}
33 | 
34 | \keyword{Data handling}
35 | 


--------------------------------------------------------------------------------
/man/ex2.Rd:
--------------------------------------------------------------------------------
 1 | \name{ex2}
 2 | \docType{data}
 3 | \alias{ex2}
 4 | \alias{ex2.weighted}
 5 | \alias{ex2.unweighted}
 6 | 
 7 | \title{Example data sets with weighted and unweighted sequence data}
 8 | 
 9 | \description{
10 | Example data sets used to demonstrate the handling of weights. The \code{'ex2.weighted'} data set contains 6 sequences with weights inflating to 100 sequences (sum of weights is 100). The second data frame \code{'ex2.unweighted'} contains the corresponding 100 sequences.
11 | 
12 | The sequences are, in both data frames, in the \code{'seq'} column, and weights in the \code{'weight'} column of \code{'ex2.weighted'}.
13 | 
14 | The alphabet is made of four possible states: \code{A, B, C} and \code{D}.
15 | \cr
16 | 
17 | These data sets are mainly intended to test and illustrate the handling of weights in TraMineR's functions. Weighted results obtained with '\code{ex2.weighted}' data set should be exactly the same as unweighted results obtained with the '\code{ex2.unweighted}' data set.
18 | }
19 | 
20 | \usage{data(ex2)}
21 | \format{
22 | The command \code{data(ex2)} generates two data frames:
23 | \cr
24 | \code{ex2.weighted}: a data frame with 6 rows, 1 variable containing sequences as character strings, 1 weight variable.
25 | \cr
26 | \code{ex2.unweighted}: a data frame with 100 rows, 1 variable containing sequences as character strings.
27 | 
28 | }
29 | 
30 | \examples{
31 | data(ex2)
32 | 
33 | ex2w.seq <- seqdef(ex2.weighted, 1, weights=ex2.weighted$weight)
34 | ex2u.seq <- seqdef(ex2.unweighted)
35 | }
36 | 
37 | 
38 | \source{The brain of the TraMineR package team.}
39 | \author{Gilbert Ritschard and Alexis Gabadinho}
40 | 
41 | \keyword{Datasets}
42 | 


--------------------------------------------------------------------------------
/R/seqprep.R:
--------------------------------------------------------------------------------
 1 | ## ===========================
 2 | ## Treatment of missing values
 3 | ## ===========================
 4 | 
 5 | seqprep <- function(seqdata, left=NA, right="DEL", gaps=NA,
 6 | 	neutral="#", missing=NA, void="%", nr="*") {
 7 | 
 8 | 	nbseq <- nrow(seqdata)
 9 | 	sl <- ncol(seqdata)
10 | 
11 | 	message(" [>] preparing ",nbseq, " sequences")
12 | 	message(" [>] coding void elements with '", void, "' and missing values with '", nr,"'")
13 | 
14 | 	if (is.na(missing)) {
15 | 		mstate <- is.na(seqdata)
16 | 	}
17 | 	else {
18 | 		mstate <- seqdata==missing
19 | 	}
20 | 
21 | 	allmiss <- NULL
22 | 	for (i in 1:nbseq) {
23 | 		nbmiss <- sum(mstate[i,], na.rm=TRUE)
24 | #		if (nbmiss>0 && nbmiss<sl) {
25 | #			seqdata[i,] <- TraMineR.trunc(seqdata=seqdata[i,], mstate=mstate[i,], sl=sl,
26 | #				left=left, right=right, gaps=gaps,
27 | #				neutral=neutral, void=void)
28 | #		}
29 | #		else
30 |     if (nbmiss==sl) {
31 | 			allmiss <- c(allmiss,i)
32 | 		}
33 |     if (nbmiss > 0){
34 | 			seqdata[i,] <- TraMineR.trunc(seqdata=seqdata[i,], mstate=mstate[i,], sl=sl,
35 | 				left=left, right=right, gaps=gaps,
36 | 				neutral=neutral, void=void)
37 |     }
38 | 	}
39 | 
40 |   nempty <- length(allmiss)
41 | 	if (nempty>0) {
42 |     dots <- ""
43 |     if (nempty > 10){
44 |       allmiss <- allmiss[1:10]
45 |       dots <- ", ..."
46 |     }
47 | 		message(" [!!] ",nempty," empty sequence(s) with index: ", paste(allmiss, collapse=","),dots,"\n      may produce inconsistent results.")
48 | 	}
49 | 
50 | 	## Setting a new code for missing statuses
51 | 	if (is.na(missing)) seqdata[is.na(seqdata)] <- nr
52 | 	else seqdata[seqdata==missing] <- nr
53 | 
54 | 	return(seqdata)
55 | }
56 | 


--------------------------------------------------------------------------------
/man/seqe.Rd:
--------------------------------------------------------------------------------
 1 | \name{is.eseq}
 2 | \alias{is.eseq}
 3 | \alias{is.seqe}
 4 | \alias{is.seqelist}
 5 | \alias{print.eseq}
 6 | \alias{print.seqelist}
 7 | \alias{levels.seqelist}
 8 | \alias{[.seqelist}
 9 | \alias{levels.eseq}
10 | \alias{Math.eseq}
11 | \alias{Math.seqelist}
12 | \alias{Ops.eseq}
13 | \alias{Ops.seqelist}
14 | \alias{Summary.eseq}
15 | \alias{Summary.seqelist}
16 | 
17 | 
18 | \title{Event sequence object}
19 | \description{
20 |   TraMineR uses an internal event sequence object for all its operations
21 |   with event sequences such as mining frequent subsequences. The
22 |   function \code{is.seqelist} checks wether the argument is an event
23 |   sequence object while \code{is.eseq} checks wether the argument is a
24 |   single element of an event sequence object.
25 |   %\code{as.seqelist} transform a list of event sequences into an internal event sequence object.
26 | 
27 |   There is a print method that can be applied to such event sequence object.
28 | 
29 |   is.seqe() has been removed in TraMineR 2.x, use is.eseq() instead.
30 | }
31 | \usage{
32 | is.eseq(eseq, s)
33 | is.seqelist(eseq, s)
34 | }
35 | \arguments{
36 |   \item{eseq}{Sequence}
37 |   \item{s}{Deprecated. Use \code{eseq} instead.}
38 | }
39 | \seealso{\code{\link{str.seqelist}}, \code{\link{str.eseq}},\code{\link{as.character.seqelist}} and \code{\link{as.character.eseq}} to convert sequences from an event sequence object into character strings}
40 | %\examples{}
41 | 
42 | \author{Matthias Studer (with Gilbert Ritschard for the help page)}
43 | % Add one or more standard keywords, see file 'KEYWORDS' in the
44 | % R documentation directory.
45 | \keyword{Event sequences}
46 | \keyword{internal}
47 | 


--------------------------------------------------------------------------------
/R/implicativestat.R:
--------------------------------------------------------------------------------
 1 | ###########################
 2 | ## Compute the implicative statistic of a rule
 3 | ###########################
 4 | 
 5 | 
 6 | implicativestat <- function(x, y, type="intensity", resid="standard") {
 7 | 	if (!(type %in% c("intensity", "indice"))) {
 8 | 		stop("type should be intensity or indice")
 9 | 	}
10 | 	if (!(resid %in% c("standard", "deviance", "Freeman-Tukey", "adjusted"))) {
11 | 		stop("resid should be one of standard, deviance, Freeman-Tukey or ajusted")
12 | 	}
13 | 	x <- factor(x)
14 |   if (length(levels(x)==1)) x <- factor(x, levels=c("0","1"))
15 | 	y <- factor(y)
16 |   if (length(levels(y)==1)) y <- factor(y, levels=c("0","1"))
17 | 	xgrp <- levels(x)
18 |   ygrp <- levels(y)
19 | 	result <- matrix(0, nrow=length(xgrp), ncol=length(ygrp))
20 | 	n <- length(x)
21 | 	rownames(result) <- xgrp
22 | 	colnames(result) <- ygrp
23 | 	for (i in 1:length(xgrp)) {
24 | 		condi <- x==xgrp[i]
25 | 		for (j in 1:length(ygrp)) {
26 | 			condj <- y==ygrp[j]
27 | 			Nnbj <- sum((condi)&!condj)
28 | 			Nexpnbj <- sum(condi)*sum(!condj)/n
29 | 			if (resid=="standard") {
30 | 				indice <- (Nnbj-Nexpnbj)/sqrt(Nexpnbj)
31 | 			}
32 | 			else if (resid=="deviance") {
33 | 				indice <- sign(Nnbj-Nexpnbj)*sqrt(abs(2*Nnbj*log(Nnbj/Nexpnbj)))
34 | 			}
35 | 			else if (resid=="Freeman-Tukey") {
36 | 				indice <- sqrt(Nnbj)+sqrt(1+Nnbj)-sqrt(4*Nexpnbj+1)
37 | 			}
38 | 			else if (resid=="adjusted") {
39 | 				indice <- (Nnbj-Nexpnbj)/sqrt(Nexpnbj*(sum(condj)/n)*(1-sum(condi)/n))
40 | 			}
41 | 						
42 | 			
43 | 			if (type=="indice") {
44 | 				result[i, j] <- indice
45 | 			}
46 | 			else {
47 | 				result[i, j] <- pnorm(-indice)
48 | 			}
49 | 		}
50 | 	}
51 | 	return(result)
52 | }
53 | 	
54 | 


--------------------------------------------------------------------------------
/man/seqnum.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqnum}
 2 | \alias{seqnum}
 3 | \title{Transform into a sequence object with numerical alphabet.}
 4 | 
 5 | \description{
 6 |   The function \code{seqnum} transforms the provided state sequence object into an equivalent sequence object in which the original alphabet is replaced with an alphabet of numbers ranging from \code{0} to \code{(nbstates-1)}.
 7 | }
 8 | \usage{
 9 |  seqnum(seqdata, with.missing=FALSE)
10 | }
11 | \arguments{
12 | 	\item{seqdata}{a state sequence object as defined by the \code{\link{seqdef}} function.}
13 | 
14 | 	\item{with.missing}{logical: Should missing elements in the sequences be turned into numerical values as well?  The code for missing values in the sequences is retrieved from the \code{'nr'} attribute of \code{seqdata}.}
15 | }
16 | 
17 | \details{
18 | The first state (for example \code{'A'}) is coded with the value \code{0}, the second state (for example \code{'B'}) is coded with the value \code{1}, etc... The function returns a sequence object containing the original sequences coded with the new numerical alphabet
19 | ranging from \code{0} to \code{(nbstates-1)}}
20 | 
21 | \seealso{\code{\link{seqdef}}, \code{\link{alphabet} } }
22 | 
23 | \examples{
24 | data(actcal)
25 | actcal.seq <- seqdef(actcal,13:24)
26 | 
27 | ## The first 10 sequences in the actcal.seq
28 | ## sequence object
29 | actcal.seq[1:10,]
30 | alphabet(actcal.seq)
31 | 
32 | ## The first 10 sequences in the actcal.seq
33 | ## sequence object with numerical alphabet
34 | seqnum(actcal.seq[1:10,])
35 | 
36 | ## states A,B,C,D are now coded 0,1,2,3
37 | alphabet(seqnum(actcal.seq))
38 | }
39 | 
40 | \author{Alexis Gabadinho}
41 | 
42 | \keyword{Data handling}
43 | \keyword{State sequences} 
44 | 


--------------------------------------------------------------------------------
/man/disstreeleaf.Rd:
--------------------------------------------------------------------------------
 1 | \name{disstreeleaf}
 2 | \alias{disstreeleaf}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{Terminal node membership}
 5 | \description{
 6 |   Get the terminal node membership of each case.
 7 | }
 8 | \usage{
 9 | disstreeleaf(tree, label=FALSE, collapse=", ")
10 | }
11 | %- maybe also 'usage' for other objects documented here.
12 | \arguments{
13 |   \item{tree}{A tree (\code{disstree} or \code{DissTreeNode} object).}
14 |   \item{label}{Logical. Should leaf memberships be labelled with classification rules?}
15 |   \item{collapse}{Character string. Separator between categories in class of categorical values.}
16 | }
17 | \details{
18 | \code{disstreeleaf} returns the terminal node membership of the cases either as the leaf number of the terminal node to which the cases are assigned or, when \code{label=TRUE}, as the classification rule leading to the assigned terminal node. In the latter case, \code{collapse} is used as separator between categorical values in classes of categorical values. The default \code{collapse} is \code{", "}. It is advisable to change this default when categorical values contain commas.
19 | }
20 | 
21 | \value{Either a vector of leaf numbers or a factor. When \code{label=FALSE} (default), vector of assigned terminal node numbers. When \code{label=TRUE}, a factor with levels labelled with classification rules.
22 | }
23 | 
24 | 
25 | \seealso{\code{\link{disstree}} and examples therein, \code{\link{disstree.get.rules}}, and \code{\link{disstree.assign}}.}
26 | \author{Matthias Studer and Gilbert Ritschard)}
27 | % Add one or more standard keywords, see file 'KEYWORDS' in the
28 | % R documentation directory.
29 | \keyword{Dissimilarity-based analysis}
30 | 


--------------------------------------------------------------------------------
/man/seqdur.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqdur}
 2 | \alias{seqdur}
 3 | \title{Extract state durations from a sequence object.}
 4 | \description{
 5 | Extracts states durations from a sequence object. Returns a matrix containing the states durations for the sequences. The states durations in \code{'D-D-D-D-A-A-A-A-A-A-A-D'} are 4,7,1. Distinct states can be extracted with the \code{\link{seqdss}} function.
 6 |  }
 7 | \usage{
 8 | seqdur(seqdata, with.missing=FALSE)
 9 | }
10 | \arguments{
11 |    \item{seqdata}{a sequence object as defined by the \code{\link{seqdef}} function.}
12 | 	\item{with.missing}{Should non-void missing values be considered as regular states? See Details.}
13 | 
14 | }
15 | \details{
16 | When \code{with.missing=FALSE} (default) missing values are ignored and a substring \code{AA***A} for example will be considered as a spell \code{AAA} of duration 3. When \code{with.missing=TRUE}, durations are also computed for spells of missing values (gaps in sequences).
17 | 
18 | See \link{seqdef} on options for handling missing values when creating sequence objects.
19 | }
20 | 
21 | \value{a matrix containing the states durations for each distinct state in each sequence.}
22 | 
23 | \seealso{
24 | \code{\link{seqdss}}.
25 | }
26 | 
27 | \examples{
28 | ## Creating a sequence object from columns 13 to 24
29 | ## in the 'actcal' example data set
30 | ## Here we retain the first 10 sequences only.
31 | data(actcal)
32 | actcal.seq <- seqdef(actcal[1:10,13:24])
33 | 
34 | ## Retrieving the spell durations
35 | actcal.dur <- seqdur(actcal.seq)
36 | 
37 | ## Displaying the durations for the first 10 sequences
38 | actcal.dur
39 | }
40 | 
41 | \author{Alexis Gabadinho and Gilbert Ritschard}
42 | 
43 | \keyword{Longitudinal characteristics}
44 | 


--------------------------------------------------------------------------------
/man/seqistatd.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqistatd}
 2 | \alias{seqistatd}
 3 | \title{State frequencies in each individual sequence}
 4 | \description{
 5 |   Returns the state frequencies (total durations) for each sequence in the sequence object.
 6 | }
 7 | \usage{
 8 |  seqistatd(seqdata, with.missing=FALSE, prop=FALSE)
 9 | }
10 | \arguments{
11 |   \item{seqdata}{a sequence object (see \code{\link{seqdef}} function).}
12 |   \item{with.missing}{logical: if set as \code{TRUE}, total durations are also computed for the missing status (gaps in the sequences). See \link{seqdef} on options for handling missing values when creating sequence objects.}
13 |   \item{prop}{logical: if \code{TRUE}, proportions of time spent in each state are returned instead of absolute values. This option is specially useful when sequences contain missing states, since the sum of the state durations may not be the same for all sequences.}
14 | }
15 | 
16 | \references{
17 | Gabadinho, A., G. Ritschard, N. S. Müller and M. Studer (2011). Analyzing and Visualizing State Sequences in R with TraMineR. \emph{Journal of Statistical Software} \bold{40}(4), 1-37.
18 | 
19 | Ritschard, G. (2023), "Measuring the nature of individual sequences", \emph{Sociological Methods and Research}, 52(4), 2016-2049. \doi{10.1177/00491241211036156}.
20 | }
21 | 
22 | %\seealso{}
23 | 
24 | \examples{
25 | data(actcal)
26 | actcal.seq <- seqdef(actcal,13:24)
27 | seqistatd(actcal.seq[1:10,])
28 | 
29 | ## Example using "with.missing" argument
30 | data(ex1)
31 | ex1.seq <- seqdef(ex1, 1:13, weights=ex1$weights)
32 | 
33 | seqistatd(ex1.seq)
34 | seqistatd(ex1.seq, with.missing=TRUE)
35 | 
36 | }
37 | 
38 | \author{Alexis Gabadinho}
39 | 
40 | \keyword{Longitudinal characteristics}
41 | \keyword{State sequences} 
42 | 
43 | 


--------------------------------------------------------------------------------
/R/seqetm.R:
--------------------------------------------------------------------------------
 1 | seqetm <- function(seqdata, method = "transition", use.labels = TRUE, sep = ">",
 2 |   bp = "", ep = "end", seq) {
 3 | 
 4 |   TraMineR.check.depr.args(alist(seqdata = seq))
 5 | 
 6 | 	statl <- alphabet(seqdata)#seqstatl(seqdata)
 7 | 	nr <- attr(seqdata, "nr")
 8 | 	has.nr <- any(seqdata==nr)
 9 | 	if (has.nr) {
10 | 		statl <- c(statl, nr)
11 | 	}
12 | 	void <- attr(seqdata, "void")
13 | 	has.void <- any(seqdata==void)
14 | 	if (has.void) {
15 | 		statl <- c(statl, void)
16 | 	}
17 | 	nbstat <- length(statl)
18 | 	tevent <- matrix(nrow=nbstat, ncol=nbstat)
19 | 	rownames(tevent) <- statl
20 | 	colnames(tevent) <- statl
21 | 	alphabet <- statl
22 | 	if (use.labels && inherits(seqdata, "stslist")) {
23 | 		#label<-alphabet(seqdata)
24 | 		label <- attr(seqdata, "labels")
25 | 		if (has.nr) {
26 | 			label <- c(label, nr)
27 | 		}
28 | 		if (has.void) {
29 | 			label <- c(label, void)
30 | 		}
31 | 		if(length(label)==length(alphabet)){
32 | 			alphabet <- label
33 | 		}
34 | 		else if(length(label)>0){
35 | 			warning("Length of the labels and of the alphabet are not equal")
36 | 		}
37 | 	}
38 | 	if(any(grepl(",", alphabet))){
39 | 		warning(" [!] Alphabet and/or state labels should not contain commas ',' which are reserved for separating multiple events of a same transition!\n")
40 | 	}
41 | 	for(i in 1:nbstat){
42 | 		for(j in 1:nbstat){
43 | 			if(i==j){
44 | 				tevent[i,j] <- alphabet[[i]]
45 | 			}else{
46 | 				if(method=="transition"){
47 | 					tevent[i,j] <- paste(alphabet[[i]], alphabet[[j]], sep=sep)
48 | 				}else if(method == "period"){
49 | 					tevent[i,j] <- paste(ep, alphabet[[i]], ",", bp, alphabet[[j]], sep="")
50 | 				}else if(method == "state"){
51 | 					tevent[i,j] <- alphabet[[j]]
52 | 				}
53 | 			}
54 | 		}
55 | 	}
56 | 	return(tevent)
57 | 
58 | }
59 | 


--------------------------------------------------------------------------------
/man/seqconc.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqconc}
 2 | \alias{seqconc}
 3 | \title{Concatenate vectors of states or events into a character string}
 4 | \description{
 5 | Concatenate vectors of states or events into a character string. In the string, each state is separated by 'sep'. The void elements in the input sequences are eliminated.
 6 | }
 7 | \usage{
 8 | seqconc(data, var=NULL, sep="-", vname="Sequence", void=NA)
 9 | }
10 | \arguments{
11 |   \item{data}{A data frame or matrix containing sequence data (tibble will be converted with \code{as.data.frame}).}
12 | 
13 |   \item{var}{List of the columns containing the sequences. Default is \code{NULL} in which case all  columns are retained. Whether the sequences are in the compressed (character strings) or extended format is automatically detected by counting the number of columns.}
14 | 
15 |   \item{sep}{Character used as separator. By default, "\code{-}".}
16 | 
17 |   \item{vname}{an optional name for the variable containing the sequences. By default, \code{"Sequence"}.}
18 | 
19 | 	\item{void}{the code used for void elements appearing in the sequences (see \cite{Gabadinho et al. (2009)} for more details on missing values and void elements in sequences). Default is \code{NA}.}
20 | 
21 | }
22 | \value{a vector of character strings, one for each row in the input data.}
23 | 
24 | \author{Alexis Gabadinho}
25 | 
26 | \references{
27 | Gabadinho, A., G. Ritschard, M. Studer and N. S. Müller (2009). Mining Sequence Data in \code{R} with the \code{TraMineR} package: A user's guide. \emph{Department of Econometrics and Laboratory of Demography, University of Geneva}.
28 | }
29 | 
30 | \seealso{
31 |  \code{\link{seqdecomp}}.
32 | }
33 | \examples{
34 | data(actcal)
35 | actcal.string <- seqconc(actcal,13:24)
36 | head(actcal.string)
37 | }
38 | \keyword{Data handling}
39 | 


--------------------------------------------------------------------------------
/man/seqelength.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqelength}
 2 | \alias{seqelength}
 3 | \alias{seqelength<-}
 4 | %- Also NEED an '\alias' for EACH other topic documented here.
 5 | \title{Lengths of event sequences}
 6 | \description{
 7 | The length of an event sequence is its time span, i.e., the total time of observation. This information is useful to perform for instance a survival analysis. The function
 8 | \code{seqelength} retrieves the lengths of the provided sequences, while
 9 | \code{seqelength <-} sets the length of the sequences.
10 | }
11 | \usage{
12 | seqelength(eseq, s)
13 | seqelength(eseq, s) <- value
14 | }
15 | %- maybe also 'usage' for other objects documented here.
16 | \arguments{
17 |   \item{eseq}{An event sequence object (\code{seqelist}).}
18 |   \item{value}{A list of sequence lengths.}
19 |   \item{s}{Deprecated. Use \code{eseq} instead.}
20 | }
21 | \value{
22 |   A numeric vector with the lengths of the sequences.
23 | }
24 | %\references{ ~put references to the literature/web site here ~ }
25 | %\author{ ~~who you are~~ }
26 | %\note{ ~~further notes~~}
27 | 
28 | %\seealso{ ~~objects to See Also as \code{\link{help}}, ~~~ }
29 | \examples{
30 | data(actcal.tse)
31 | actcal.eseq <- seqecreate(actcal.tse)
32 | ## Since end.event is not specified, contains no sequence lengths
33 | ## We set them manually as 12 for all sequences
34 | sl <- numeric()
35 | sl[1:2000] <- 12
36 | seqelength(actcal.eseq) <- sl
37 | actcal.eseq[1:10]
38 | ## Retrieve lengths
39 | slen <- seqelength(actcal.eseq)
40 | summary(slen)
41 | }
42 | %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
43 | \author{Matthias Studer (with Gilbert Ritschard for the help page)}
44 | % Add one or more standard keywords, see file 'KEYWORDS' in the
45 | % R documentation directory.
46 | \keyword{Event sequences}
47 | \keyword{Longitudinal characteristics}
48 | 


--------------------------------------------------------------------------------
/man/seqeweight.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqeweight}
 2 | \alias{seqeweight}
 3 | \alias{seqeweight<-}
 4 | %- Also NEED an '\alias' for EACH other topic documented here.
 5 | \title{
 6 | 	Setting or retrieving weights of an event sequence object.
 7 | }
 8 | \description{
 9 | 	Event sequence objects can be weighted. Weights are used by other functions such as \code{\link{seqefsub}} or \code{\link{seqecmpgroup}} to compute weighted statistics.
10 | }
11 | \usage{
12 | seqeweight(eseq, s)
13 | seqeweight(eseq, s) <- value
14 | }
15 | %- maybe also 'usage' for other objects documented here.
16 | \arguments{
17 |   \item{eseq}{An event sequence object (\code{seqelist}).}
18 |   \item{value}{Numerical vector containing weights}
19 |   \item{s}{Deprecated. Use \code{eseq} instead.}
20 | }
21 | \value{
22 | \code{seqeweight} returns a numerical vector containing the weights associated to each event sequence.
23 | }
24 | 
25 | \examples{
26 | ##Starting with states sequences
27 | ##Loading data
28 | data(biofam)
29 | ## Creating state sequences
30 | biofam.seq <- seqdef(biofam,10:25,informat='STS')
31 | 
32 | ## Creating event sequences from biofam
33 | biofam.eseq <- seqecreate(biofam.seq, weighted=FALSE)
34 | 
35 | ## Using the weights
36 | seqeweight(biofam.eseq) <- biofam$wp00tbgs
37 | 
38 | ## Now seqefsub accounts for weights unless weighted is set to FALSE
39 | fsubseq <- seqefsub(biofam.eseq, pmin.support=0.01)
40 | 
41 | ## Searching for weighted susbsequences which best
42 | ## discriminate the birth cohort
43 | discr <- seqecmpgroup(fsubseq, group=biofam$birthyr>=1940)
44 | plot(discr[1:15])
45 | 
46 | }
47 | \author{Matthias Studer (with Gilbert Ritschard for the help page)}
48 | % Add one or more standard keywords, see file 'KEYWORDS' in the
49 | % R documentation directory.
50 | \keyword{Sequence-object attributes}
51 | \keyword{Event sequences}
52 | 


--------------------------------------------------------------------------------
/src/NMSdistance.h:
--------------------------------------------------------------------------------
 1 | #ifndef NMSDISTANCECALCULATOR_H
 2 | #define NMSDISTANCECALCULATOR_H
 3 | #include "distancecalculator.h"
 4 | 
 5 | class SUBSEQdistance: public DistanceCalculator{
 6 | 	protected:
 7 | 		double *selfmatvect;
 8 | 		double *kvect;
 9 | 		double *kweights;
10 | 		int distMethod;
11 | 		int distTransform;
12 | 	public:
13 | 		SUBSEQdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS):DistanceCalculator(normS, Ssequences, seqdim, lenS),kweights(NULL), distMethod(0){
14 | 			selfmatvect= new double[nseq*maxlen];
15 | 			kvect =  new double[maxlen];
16 | 		}
17 | 		SUBSEQdistance(SUBSEQdistance *dc):DistanceCalculator(dc),kweights(dc->kweights), distMethod(dc->distMethod){
18 | 			selfmatvect= new double[nseq*maxlen];
19 | 			memcpy(this->selfmatvect, dc->selfmatvect, nseq*maxlen*sizeof(double));
20 | 			kvect =  new double[maxlen];
21 | 			
22 | 		}
23 | 		//virtual DistanceCalculator* copy(){return new SUBSEQdistance(this);}
24 | 		virtual ~SUBSEQdistance(){
25 | 			delete[] selfmatvect;
26 | 			delete[] kvect;
27 | 		}
28 | 		void resetKvect(){
29 | 			for(int k=0;k <maxlen;k++){
30 | 				this->kvect[k]=0.0;
31 | 			}
32 | 		}
33 | 		virtual void computeattr(const int&is, const int& js)=0;
34 | 		virtual void setParameters(SEXP params);
35 | 		double distance(const int&is, const int& js);
36 | };
37 | 
38 | 
39 | class NMSdistance: public SUBSEQdistance{
40 |   protected:
41 |     int zmatsize;
42 | 	double *hmat;
43 | 	double *vmat;
44 | 	int *zmat;
45 | 	
46 | 	
47 |   public:
48 | 	NMSdistance(SEXP normS, SEXP Ssequences, SEXP seqdim, SEXP lenS);//:	SUBSEQdistance( normS, Ssequences, seqdim, lenS){}
49 | 	NMSdistance(NMSdistance * dc);
50 | 	virtual DistanceCalculator* copy(){return new NMSdistance(this);}
51 |     virtual ~NMSdistance();
52 | 	void computeattr(const int&is, const int& js);
53 | };
54 | 
55 | #endif
56 | 


--------------------------------------------------------------------------------
/R/subseqelist.R:
--------------------------------------------------------------------------------
 1 | is.subseqelist<-function(eseq){
 2 | #   return(.Call(C_istmrsequence,eseq))
 3 | 	return(inherits(eseq, "subseqelist"))
 4 | }
 5 | 
 6 | createsubseqelist<-function(eseq, constraint, subseq, data, type="frequent"){
 7 | 	if(!is.seqelist(eseq)) {
 8 | 		stop(" [!] eseq should be a seqelist")
 9 | 	}
10 | 	ret <- list()
11 | 	ret$eseq <- eseq
12 | 	ret$constraint <- constraint
13 | 	ret$subseq <- subseq
14 | 	ret$data <- as.data.frame(data, optional=TRUE)
15 | 	ret$type <- type
16 | 	class(ret$subseq) <- c("seqelist", "list")
17 | #  attr(ret$subseq,"dictionnary")<-attr(seq,"dictionnary")
18 | 	class(ret) <- "subseqelist"
19 | 	return(ret)
20 | }
21 | print.subseqelist<-function(x,...){
22 | 	z <- data.frame(data.frame(Subsequence=as.character(x$subseq), check.names=FALSE), x$data, row.names=NULL, check.names=FALSE)
23 | 	print(z, ...)
24 | 	cat("\nComputed on", length(x$eseq), "event sequences\n")
25 | 	print(x$constraint,...)
26 | }
27 | 
28 | "[.subseqelist" <- function(x, i,j, drop=FALSE) {
29 |     # If only 1 subscript is given, the result will still be a Surv object
30 |     #  If the second is given extract the relevant columns as a matrix
31 | 	if (missing(j)) {
32 | 		ret <- createsubseqelist(x$eseq, x$constraint, x$subseq[i,drop=drop], data=x$data[i,], type=x$type)
33 | 		if(!is.null(x$labels)) {
34 | 			ret$labels<-x$labels
35 | 		}
36 | 		if(!is.null(x$bonferroni)) {
37 | 			ret$bonferroni<-x$bonferroni
38 | 		}
39 | 		class(ret)<-class(x)
40 | 		return(ret)
41 | 	} else {
42 | 		class(x) <- NULL
43 | 		NextMethod("[")
44 | 	}
45 | }
46 | 
47 | plot.subseqelist<-function(x, freq=NULL, cex=1,...){
48 | 	slegend <- as.character(x$subseq)
49 | 	if(is.null(freq)) {
50 | 		freq<-x$data[,1]
51 | 	}
52 | 	barpos <- barplot(freq, names.arg=c(""), ...)
53 | 	text(x=barpos, y=0.02, labels=slegend, srt=90, adj=c(0,0.5), cex=cex)
54 | }
55 | 


--------------------------------------------------------------------------------
/R/seqintegr.R:
--------------------------------------------------------------------------------
 1 | seqintegration <- function(...){
 2 |   msg.stop("Seqintegration is obsolete, use seqintegr instead!")
 3 | }
 4 | 
 5 | seqintegr <- function(seqdata, state=NULL, pow=1, with.missing=FALSE){
 6 | 	if (!inherits(seqdata, "stslist")) {
 7 |         stop("[!] seqdata is not a sequence object, see seqdef function to create one")
 8 |   }
 9 |   alph <- alphabet(seqdata)
10 |   nr <- attr(seqdata, "nr")
11 |   if (with.missing) {
12 |       alph <- c(alph, nr)
13 |   }
14 |   if (length(state)>1)
15 |     msg.stop("When non null, 'state' must be a single state")
16 |   if (!is.null(state)){
17 |     if (!state %in% alph){
18 |         msg.stop("state ", state, " not in the alphabet!")
19 |     }
20 |   }
21 |   nbstat <- ifelse(is.null(state),length(alph),1)
22 |   nbseq <- nrow(seqdata)
23 | 
24 |   iseqtab <- matrix(nrow = nbseq, ncol = nbstat)
25 |   if (is.null(state))
26 |     colnames(iseqtab) <- alph
27 |   else
28 |     colnames(iseqtab) <- state
29 | 
30 |   rownames(iseqtab) <- rownames(seqdata)
31 |   ## message(" [>] computing state distribution for ", nbseq, " sequences ...")
32 | 	integVector <- (1:ncol(seqdata))^pow
33 |   suminteg <- sum(integVector, na.rm = TRUE)
34 | 
35 |   ## when with.missing=FALSE we do not account for positions occupied by missings
36 |   miss <- c(nr, attr(seqdata,"void"))
37 |   if(!with.missing)
38 |     suminteg <- suminteg - apply(seqdata, 1, function(x) sum(integVector[x %in% miss], na.rm = TRUE))
39 | 
40 |   if(is.null(state)){
41 |     for(i in 1:nbstat) {
42 |       iseqtab[, i] <- apply(seqdata, 1, function(x) sum(integVector[x ==  alph[i]], na.rm =
43 |         TRUE))/suminteg
44 |     }
45 |   } else {
46 |         iseqtab[, 1] <- apply(seqdata, 1, function(x) sum(integVector[x ==  state], na.rm = TRUE))/suminteg
47 |   }
48 | 
49 |   return(iseqtab)
50 | }
51 | 


--------------------------------------------------------------------------------
/R/seqtree.R:
--------------------------------------------------------------------------------
 1 | seqtree <- function(formula, data = NULL, weighted = TRUE, min.size = 0.05,
 2 |   max.depth = 5, R = 1000, pval = 0.01, weight.permutation = "replicate",
 3 |   seqdist.args = list(method = "LCS", norm = "auto"), diss = NULL, squared = FALSE,
 4 |   first = NULL, minSize, maxdepth, seqdist_arg) {
 5 | 
 6 |   TraMineR.check.depr.args(alist(min.size = minSize, max.depth = maxdepth, seqdist.args = seqdist_arg))
 7 | 
 8 | 	##formula.call <- formula
 9 | 	tterms <- terms(formula)
10 | 	seqdata <- eval(formula[[2]], data, parent.frame()) # to force evaluation
11 | 	if (!inherits(seqdata, "stslist")) {
12 | 		stop("Left hand term in formula should be a stslist object (see seqdef)")
13 |  	}
14 | 	if(is.null(diss)){
15 | 		seqdist.args$seqdata <- seqdata
16 | 		diss <- do.call(seqdist, seqdist.args)
17 | 	}
18 | 	if(weighted){
19 | 		weights <- attr(seqdata, "weights")
20 | 	}
21 | 	else {
22 | 		weights <- NULL
23 | 	}
24 | 	dissmatrix <- diss
25 | 	formula[[2]] <- NULL
26 | 	## Model matrix from forumla
27 | 	predictor <- as.data.frame(model.frame(formula, data, drop.unused.levels = TRUE, na.action=NULL))
28 | 	tree <- DTNdisstree(dissmatrix=dissmatrix, predictor=predictor, terms=tterms,
29 | 						weights=weights, min.size=min.size, max.depth=max.depth, R=R,
30 | 						pval=pval, object=seqdata, weight.permutation=weight.permutation,
31 | 						squared=squared, first=first)
32 | 	class(tree) <- c("seqtree", class(tree))
33 | 	return(tree)
34 | 
35 | }
36 | 
37 | 
38 | print.seqtree <- function(x, digits = getOption("digits") - 2, medoid=TRUE, ...){
39 | 	stslistmedoid <- function(object, index) {
40 | 		x <- seqconc(object[index,], void=attr(object,"void"))
41 | 		x <- suppressMessages(seqformat(x, from = "STS", to = "SPS", compress = TRUE))
42 | 		return(x)
43 | 	}
44 | 	print.disstree(x, digits = digits, medoid=medoid, medoid.fun=stslistmedoid,...)
45 | }
46 | 


--------------------------------------------------------------------------------
/man/TraMineR.check.depr.args.Rd:
--------------------------------------------------------------------------------
 1 | \name{TraMineR.check.depr.args}
 2 | \alias{TraMineR.check.depr.args}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{
 5 | Checking and managing deprecated arguments
 6 | }
 7 | \description{
 8 | Checks the presence of deprecated arguments, assigns value of a deprecated argument to the corresponding new argument name, and issues warning messages.
 9 | }
10 | \usage{
11 | TraMineR.check.depr.args(arg.pairs)
12 | }
13 | %- maybe also 'usage' for other objects documented here.
14 | \arguments{
15 |   \item{arg.pairs}{List of pairs of old and new argument names
16 |   \cr(e.g. \code{alist(newname1 = oldname1, newname2 = oldname2)})}
17 | }
18 | 
19 | \details{To be used inside functions. For developers only.
20 | 
21 | For each specified pair of new and old argument names, the function checks if the old
22 | argument name is specified. If so and the new one is not, a warning message is raised
23 | and the argument value is assigned to the new argument name. If one of the names declared in check.depr.args() arguments si not an argument of the parent function
24 | or if both the new and old argument names are specified an error is raised.
25 | 
26 | The function does not detect when the new and the old argument names are specified
27 | together and the new argument value is its default value. In this case,
28 | the value associated with the old argument name is assigned to the new name and a warning
29 | message is raised.
30 | 
31 | The function works whether the argument names are explicitly declared or not in the call to the checked function.
32 | 
33 | The only requirement for the function to work is that the deprecated arguments should be listed WITHOUT default values in the definition of the checked function.
34 | }
35 | 
36 | \author{Pierre-Alexandre Fonta, Gilbert Ritschard}
37 | 
38 | \value{None.} 


--------------------------------------------------------------------------------
/R/seqsubsn.R:
--------------------------------------------------------------------------------
 1 | ## =============================================
 2 | ## Number of distinct subsequences in a sequence
 3 | ## =============================================
 4 | 
 5 | nsubs <- function (x, nbstat, statlist, void, nr, with.missing) {
 6 | 		l <- vector(mode="integer", nbstat)
 7 | 		x <- x[x!=void]
 8 |     if (!with.missing) x <- x[x!=nr]
 9 | 		slength <- length(x)
10 |     if (slength == 0) return(1) ## empty sequence has one subseq (the empty one)
11 | 
12 | 		N <- vector(mode="integer",(slength+1))
13 | 		N[1] <- 1
14 | 
15 | 		for (i in 2:(slength+1)) {
16 | 			N[i] <- 2*N[i-1]
17 | 			cidx <- which(statlist==x[i-1])
18 | 
19 | 			if (l[cidx[1]] > 0) N[i] <- N[i] - N[l[cidx[1]]]
20 | 			l[cidx[1]] <- i-1
21 | 		}
22 | 		return(N[(slength+1)])
23 | 	}
24 | 
25 | 
26 | seqsubsn <- function(seqdata, DSS=TRUE, with.missing=FALSE) {
27 | 
28 | 	if (!inherits(seqdata,"stslist"))
29 | 		stop("data is not a sequence object, see seqdef function to create one")
30 | 
31 | ## Since v 2.0.13 we use thw with.missing argument
32 | 	## with.missing=FALSE
33 | 	##nr <- attr(seqdata,"nr")
34 | 	##if (any(seqdata==nr)) {
35 | 	##	message(" [!] found missing state in the sequence(s), adding missing state to the alphabet")
36 | 	##	with.missing=TRUE
37 | 	##}
38 | 
39 | 	if (DSS==TRUE) {
40 | 		seqdata <- suppressMessages(seqdss(seqdata, with.missing=with.missing))
41 | 	}
42 | 
43 | 	## alphabet
44 | 	#sl <- attr(seqdata,"alphabet")
45 | 	#if (with.missing) {
46 | 	#	sl <- c(sl, nr)
47 | 	#}
48 |   sl <- alphabet(seqdata, with.missing=with.missing)
49 | 
50 | 	ns <- length(sl)
51 | 
52 | 	void <- attr(seqdata,"void")
53 | 	nr <- attr(seqdata,"nr")
54 | 		
55 | 	result <- apply(seqdata, 1, nsubs, nbstat=ns, statlist=sl, void=void, nr=nr, with.missing=with.missing)
56 | 
57 | 	result <- as.matrix(result)
58 | 	colnames(result) <- "Subseq."
59 | 	rownames(result) <- rownames(seqdata)
60 | 
61 | 	return(result)
62 | }
63 | 


--------------------------------------------------------------------------------
/R/seqlegend.R:
--------------------------------------------------------------------------------
 1 | ## =========================================
 2 | ## Plotting the legend for a sequence object
 3 | ## =========================================
 4 | 
 5 | seqlegend <- function(seqdata, with.missing = "auto", cpal = NULL,
 6 |   missing.color = NULL, ltext = NULL, position = "topleft", cex = 1,
 7 |   boxes=TRUE, fontsize, ...) {
 8 | 
 9 |   TraMineR.check.depr.args(alist(cex = fontsize))
10 | 
11 | 
12 | 	if (!inherits(seqdata,"stslist"))
13 | 		stop("data is not a sequence object, use seqdef function to create one")
14 | 
15 | 	if (is.null(cpal))
16 | 		cpal <- attr(seqdata,"cpal")
17 | 
18 | 	if (is.null(ltext))
19 | 		ltext <- attr(seqdata,"labels")
20 | 
21 | 	if (is.null(missing.color))
22 | 		missing.color <- attr(seqdata,"missing.color")
23 | 
24 | 	## Adding an entry for missing in the legend
25 | 	nr <- attr(seqdata,"nr")
26 | 
27 | 	if ((with.missing=="auto" && any(seqdata==nr)) || with.missing==TRUE) {
28 | 		cpal <- c(cpal,missing.color)
29 | 		ltext <- c(ltext,"missing")
30 | 	## statl <- c(statl,nr)
31 | 	## nbstat <- nbstat+1
32 | 	}
33 | 
34 |  	oolist <- list(...)
35 |   if (! "col" %in% names(oolist)) oolist[["col"]] <- cpal
36 |   if (! "x" %in% names(oolist)) oolist[["x"]] <- position
37 |   if (! "legend" %in% names(oolist)) oolist[["legend"]] <- ltext
38 |   oolist <- c(list(cex=cex), oolist)
39 | 	plot(0, type= "n", axes=FALSE, xlab="", ylab="")
40 |   if (boxes) {
41 | 	  #legend(position, fill=cpal, legend=ltext, cex=cex,...)
42 |     if (! "fill" %in% names(oolist)) oolist[["fill"]] <- cpal
43 | 	  res <- do.call(legend,oolist)
44 |   } else {
45 |  	  if (! "lty" %in% names(oolist)) oolist[["lty"]] <- 1
46 |     if (! "lwd" %in% names(oolist)) oolist[["lwd"]] <- 15
47 |     if (! "seg.len" %in% names(oolist)) oolist[["seg.len"]] <- .4
48 |     if (! "x.intersp" %in% names(oolist)) oolist[["x.intersp"]] <- 1.5
49 | 	  res <- do.call(legend,oolist)
50 |   }
51 |   invisible(res)
52 | }
53 | 


--------------------------------------------------------------------------------
/R/seqstatd.R:
--------------------------------------------------------------------------------
 1 | ## States frequency by time unit
 2 | 
 3 | seqstatd <- function(seqdata, weighted=TRUE, with.missing=FALSE, norm=TRUE) {
 4 | 
 5 | 	if (!inherits(seqdata,"stslist"))
 6 | 		stop("data is not a sequence object, use seqdef function to create one")
 7 | 	
 8 | 	## Retrieving the alphabet
 9 | 	statl <- attr(seqdata,"alphabet")
10 | 	col <- cpal(seqdata)
11 | 
12 | 	if (with.missing) {
13 | 		statl <- c(statl, attr(seqdata,"nr"))
14 | 		col <- c(col, attr(seqdata,"missing.color"))
15 | 	}
16 | 
17 | 	nbstat <- length(statl)
18 | 
19 | 	seql <- ncol(seqdata)
20 | 
21 | 	sd <- matrix(nrow=nbstat,ncol=seql)
22 | 	row.names(sd) <- statl
23 | 	colnames(sd) <- colnames(seqdata)
24 | 
25 | 	## Weights
26 | 	weights <- attr(seqdata, "weights")
27 | 
28 | 	if (!weighted || is.null(weights)) {
29 | 		weights <- rep(1.0, nrow(seqdata))
30 | 	}
31 | 	## Also takes into account that in unweighted sequence objects created with
32 | 	## older TraMineR versions the weights attribute is a vector of 1
33 | 	## instead of NULL
34 | 	if (all(weights==1))
35 | 		weighted <- FALSE
36 | 
37 | 	for (i in 1:nbstat)
38 | 		for (j in 1:seql)
39 | 			sd[i,j] <- sum((seqdata[,j]==statl[i])*weights)
40 | 
41 | 	## sd <-	apply(seqdata,2,table)
42 | 	N <- apply(sd,2,sum)
43 | 	for (i in 1:seql) sd[,i] <- sd[,i]/N[i]
44 | 
45 | 	E <- apply(sd,2,entropy)
46 | 	## Maximum entropy is the entropy of the alphabet
47 | 	if (norm==TRUE) {
48 | 		E.max <- entropy(rep(1/nbstat,nbstat))
49 | 		E <- E/E.max
50 | 	}
51 | 
52 | 	res <- list(sd,N,E)
53 | 	names(res) <- c("Frequencies", "ValidStates", "Entropy")
54 | 	
55 | 	class(res) <- c("stslist.statd","list")
56 | 
57 | 	attr(res,"nbseq") <- sum(weights)
58 | 	attr(res,"cpal") <- col
59 | 	attr(res,"xtlab") <- colnames(seqdata)
60 | 	attr(res,"xtstep") <- attr(seqdata,"xtstep")
61 | 	attr(res,"tick.last") <- attr(seqdata,"tick.last")
62 | 	attr(res,"weighted") <- weighted
63 |     attr(res,"norm") <- norm
64 | 
65 | 	return(res)
66 | }
67 | 


--------------------------------------------------------------------------------
/man/stlab.Rd:
--------------------------------------------------------------------------------
 1 | \name{stlab}
 2 | \alias{stlab}
 3 | \alias{stlab<-}
 4 | \title{Get or set the state labels of a sequence object}
 5 | \description{
 6 | This function gets or sets the state labels of a sequence object, that is, the long labels used when displaying the state legend in plotting functions.
 7 | }
 8 | \details{The state legend is plotted either automatically by the plot functions provided for visualizing sequence objects or with the \code{\link{seqlegend}} function. A long label is associated to each state of the alphabet and displayed in the legend. The state labels are defined when creating the sequence object, either automatically using the values found in the data or by specifying a user defined vector of labels. The \code{stlab} function can be used to get or set the state labels of a previously defined sequence object.}
 9 | \usage{
10 | stlab(seqdata)
11 | stlab(seqdata) <- value
12 | }
13 | \arguments{
14 |   \item{seqdata}{a state sequence object as defined by the \code{\link{seqdef}} function.}
15 |   \item{value}{a vector of character strings containing the labels, of length equal to the number of states in the alphabet. Each string is attributed to the corresponding state in the alphabet, the order being the one returned by the \code{\link{alphabet}}.}
16 | }
17 | \value{For 'stlab' a vector containing the labels.
18 | 
19 | For 'stlab<-' the updated sequence object.
20 | }
21 | \seealso{
22 |  \code{\link{seqdef}}
23 | }
24 | \examples{
25 | ## Creating a sequence object with the columns 13 to 24
26 | ## in the 'actcal' example data set
27 | ## The color palette is automatically set
28 | data(actcal)
29 | actcal.seq <- seqdef(actcal,13:24)
30 | 
31 | ## Retrieving the color palette
32 | stlab(actcal.seq)
33 | seqiplot(actcal.seq)
34 | 
35 | ## Changing the state labels
36 | stlab(actcal.seq) <- c("Full time","Part time (19-36 hours)",
37 |     "Part time (1-18 hours)", "No work")
38 | seqiplot(actcal.seq)
39 | }
40 | \keyword{Sequence-object attributes}
41 | 


--------------------------------------------------------------------------------
/man/actcal.Rd:
--------------------------------------------------------------------------------
 1 | \name{actcal}
 2 | \docType{data}
 3 | \alias{actcal}
 4 | \title{Example data set: Activity calendar from the Swiss Household Panel}
 5 | 
 6 | \description{
 7 | This data set contains 2000 individual sequences of monthly activity statuses from
 8 | January to December 2000.}
 9 | 
10 | \details{
11 | The data set is a subsample of the data collected by the Swiss Household Panel (SHP).
12 | 
13 | The state column (variable) names are `\code{jan00}', `\code{feb00}', etc... and correspond to columns 13 to 24.
14 | 
15 | There are four possible states:\cr
16 | \cr
17 | A = Full-time paid job (> 37 hours)\cr
18 | B = Long part-time paid job (19-36 hours) \cr
19 | C = Short part-time paid job (1-18 hours) \cr
20 | D = Unemployed (no work) \cr
21 | 
22 | The data set contains also the following covariates:\cr
23 | 
24 | \tabular{ll}{
25 | \code{age00}    \tab (age in 2000) \cr
26 | \code{educat00} \tab (education level) \cr
27 | \code{civsta00} \tab (civil status) \cr
28 | \code{nbadul00} \tab (number of adults in household) \cr
29 | \code{nbkid00}  \tab (number of children) \cr
30 | \code{aoldki00} \tab (age of oldest kid) \cr
31 | \code{ayouki00} \tab (age of youngest kid) \cr
32 | \code{region00} \tab (residence region) \cr
33 | \code{com2.00}  \tab (residence commune type) \cr
34 | \code{sex}      \tab (sex of respondent)\cr
35 | \code{birthy}   \tab (birth year) \cr
36 | }
37 | }
38 | \usage{data(actcal)}
39 | \format{A data frame with 2000 rows, 12 state variables, 1 id variable and 11 covariates.}
40 | \source{Swiss Household Panel}
41 | \references{\url{https://forscenter.ch/projects/swiss-household-panel/}}
42 | 
43 | \examples{
44 | data(actcal)
45 | # labels for plot color legends
46 | actcal.lab <- c("> 37 hours", "19-36 hours", "1-18 hours", "no work")
47 | # state sequence object of the first six sequences
48 | actcal.seq <- seqdef(actcal[1:6 ,13:24], labels=actcal.lab)
49 | 
50 | print(actcal.seq)
51 | }
52 | 
53 | \author{Gilbert Ritschard and Alexis Gabadinho}
54 | \keyword{Datasets}
55 | 


--------------------------------------------------------------------------------
/man/cpal.Rd:
--------------------------------------------------------------------------------
 1 | \name{cpal}
 2 | \alias{cpal}
 3 | \alias{cpal<-}
 4 | \title{Get or set the color palette of a sequence object}
 5 | \description{
 6 | This function gets or sets the color palette of a sequence object, that is, the list of colors used to represent the states.
 7 | }
 8 | \details{In the plot functions provided for visualizing sequence objects, a different color is associated to each state of the alphabet. The color palette is defined when creating the sequence object, either automatically or by specifying a user defined color vector. The \code{cpal} function can be used to get or set the color palette of a previously defined sequence object.}
 9 | \usage{
10 | cpal(seqdata)
11 | cpal(seqdata) <- value
12 | }
13 | \arguments{
14 |   \item{seqdata}{a state sequence object as defined by the \code{\link{seqdef}} function.}
15 |   \item{value}{a vector containing the colors, of length equal to the number of states in the alphabet. The colors can be passed as character strings representing color names such as returned by the \code{\link[grDevices]{colors}} function, as hexadecimal values or as RGB vectors using the \code{\link{rgb}} function. Each color is attributed to the corresponding state in the alphabet, the order being the one returned by the \code{\link{alphabet}}.}
16 | }
17 | \value{For '\code{cpal(seqdata)}' a vector containing the colors.
18 | 
19 | For '\code{cpal(seqdata) <-}' the updated sequence object.
20 | }
21 | \seealso{
22 |  \code{\link{seqdef}}
23 | }
24 | \examples{
25 | ## Creating a sequence object with the columns 13 to 24
26 | ## in the 'actcal' example data set
27 | ## The color palette is automatically set
28 | data(actcal)
29 | actcal.seq <- seqdef(actcal,13:24)
30 | 
31 | ## Retrieving the color palette
32 | cpal(actcal.seq)
33 | seqiplot(actcal.seq)
34 | 
35 | ## Setting a user defined color palette
36 | cpal(actcal.seq) <- c("blue","red", "green", "yellow")
37 | seqiplot(actcal.seq)
38 | }
39 | 
40 | \author{Alexis Gabadinho}
41 | 
42 | \keyword{Sequence-object attributes}
43 | 


--------------------------------------------------------------------------------
/man/seqfcheck.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqfcheck}
 2 | \alias{seqfcheck}
 3 | \title{Check if sequences are in the compressed or extended format}
 4 | \description{Check whether \emph{seqdata} contains sequences in the compressed format (as character strings with states separated by a separator) or in the extended format (sequences stored in a matrix with each successive state in a separate column.) For a more detailed description of the compressed and extended format, see \cite{Gabadinho, 2009}.
 5 | }
 6 | \details{
 7 | Whether the sequence(s) are in compressed format is checked by counting the number of columns and searching for the \code{'-'} or \code{':'} separator. The function returns the separator if it has been found in the data. If the data contains more than one column, the data is supposed to be in the extended format, and \code{'X'} is returned, unless some state codes contain the \code{'-'} character (e.g., states coded with negative integer values), in which case \code{'-X'} is returned.
 8 | 
 9 | }
10 | \usage{
11 | seqfcheck(seqdata)
12 | }
13 | \arguments{
14 |  \item{seqdata}{a vector, data frame or matrix containing sequence data.}
15 |  }
16 | \value{a character string coding the format of the sequence data, either \code{':'},\code{'-'},\code{'X'} or \code{'-X'}.}
17 | \seealso{
18 |  \code{\link{seqconc}}, \code{\link{seqdecomp}}
19 | }
20 | 
21 | \references{
22 | Gabadinho, A., G. Ritschard, M. Studer and N. S. Müller (2009). Mining Sequence Data in \code{R} with \code{TraMineR}: A user's guide. \emph{Department of Econometrics and Laboratory of Demography, University of Geneva}.
23 | }
24 | 
25 | \examples{
26 | ## The sequences in the actcal data set
27 | ## are in the extended format
28 | data(actcal)
29 | head(actcal[,13:24])
30 | seqfcheck(actcal[,13:24])
31 | 
32 | ## The sequences in the famform data set
33 | ## are in the compressed format
34 | data(famform)
35 | famform
36 | seqfcheck(famform)
37 | }
38 | 
39 | \author{Alexis Gabadinho}
40 | 
41 | \keyword{Data handling}
42 | \keyword{internal}
43 | 


--------------------------------------------------------------------------------
/src/ffunctions.c:
--------------------------------------------------------------------------------
 1 | #include<R.h>
 2 | #include<Rmath.h>
 3 | 
 4 | // TraMineR 2, Pierre-Alexandre Fonta (2017): not deleted because it used by seqLLCS() in the JSS article
 5 | void cLCS(int *iseq, int *jseq , double *length, int *result) {
 6 | 
 7 |     int np,mp,max,a,b,i,j,n,m,temp;
 8 |     n = (int)length[1];
 9 |     m = (int)length[0];
10 |     np = n + 1;
11 |     mp = m + 1;
12 |     //  Rprintf("m = %i, n = %i \n", m, n);
13 |     //    int** L = (int**) malloc(np * sizeof(int*));
14 |     //    for (k = 0; k< mp;k++) L[k] = (int*)malloc(mp * sizeof(int));
15 |     int L [mp][np];
16 |     //Rprintf("Creation du tableau ok");
17 |     for (i = 0; i<mp; i++) {
18 |         for (j= 0; j<np; j++) {
19 |             L[i][j] = (int)0;
20 |             //	 Rprintf("i = %i, j = %i\n", i, j);
21 |         }
22 |     }
23 |     //    Rprintf("tab : %i \n", L[0][1]);
24 | 
25 | 
26 |     for (i = 1; i <= m;i++) {
27 |         temp = iseq[i-1];
28 |         //  Rprintf("i = %i, temp = %i\n", i, temp);
29 |         for (j = 1; j<= n;j++) {
30 |             if (temp == jseq[j-1]) {
31 |                 L[i][j] = 1 + L[i-1][j-1];
32 |                 //	   Rprintf("temp = %i jseq = %i, L[%i][%i] = %i\n", temp, jseq[j-1], i, j, L[i][j]);
33 |             } else {
34 |                 //   Rprintf("égal 0 pour i %d", i);
35 |                 a = L[i-1][j];
36 |                 b = L[i][j-1];
37 |                 max = a;
38 |                 if (a < b) max = b;
39 |                 //   Rprintf("max = %i \n", max);
40 |                 L[i][j] = max;
41 |             }
42 |         }
43 |     }
44 | 
45 |     *result = L[m][n];
46 | }
47 | 
48 | 
49 | // TraMineR 2, Pierre-Alexandre Fonta (2017): not deleted because it used by seqLLCP() in the JSS article
50 | void cLCP(int *iseq, int *jseq , double *length, int *result) {
51 | 
52 | 	int minlength=imin2(length[0], length[1]);
53 |     int i=0;
54 | 
55 | 
56 |     while (i<minlength && iseq[i]==jseq[i]) {
57 | 		i++;
58 |     }
59 | 
60 |     *result = i;
61 | }
62 | 
63 | 


--------------------------------------------------------------------------------
/demo/Seqdist.R:
--------------------------------------------------------------------------------
 1 | ## ============================
 2 | ## Examples for the chapter
 3 | ## 'Measuring similarities and
 4 | ## distances between sequences'
 5 | ## in TraMineR User's Guide
 6 | ## ============================
 7 | 
 8 | ##require(grDevices); require(graphics)
 9 | ##oask <- devAskNewPage(dev.interactive(orNone = TRUE))
10 | 
11 | require(TraMineR)
12 | 
13 | ## ----
14 | ## MPOS
15 | ## ----
16 | data(famform)
17 | famform.seq <- seqdef(famform)
18 | famform.seq
19 | 
20 | ## number of matching positions
21 | seqmpos(famform.seq[1, ], famform.seq[2, ])
22 | seqmpos(famform.seq[2, ], famform.seq[4, ])
23 | 
24 | ## ---
25 | ## LCP
26 | ## ---
27 | data(famform)
28 | famform.seq <- seqdef(famform)
29 | famform.seq
30 | 
31 | seqLLCP(famform.seq[1, ], famform.seq[2, ])
32 | seqLLCP(famform.seq[3, ], famform.seq[4, ])
33 | seqLLCP(famform.seq[3, ], famform.seq[5, ])
34 | 
35 | seqdist(famform.seq, method = "LCP")
36 | 
37 | seqdist(famform.seq, method = "LCP", norm = "auto")
38 | 
39 | ## Deriving proximity measures from normalized distances
40 | 1 - seqdist(famform.seq, method = "LCP", norm = "auto")
41 | 
42 | ## ---
43 | ## LCS
44 | ## ---
45 | data(biofam)
46 | biofam.seq <- seqdef(biofam, 10:25)
47 | biofam.lcs <- seqdist(biofam.seq, method = "LCS")
48 | 
49 | ## --
50 | ## OM
51 | ## --
52 | costs <- seqsubm(biofam.seq, method = "TRATE")
53 | costs
54 | 
55 | biofam.om <- seqdist(biofam.seq, method = "OM", indel = 3, sm = costs)
56 | 
57 | object.size(biofam.om)/1024^2
58 | 
59 | round(biofam.om[1:10, 1:10], 1)
60 | 
61 | ## ----------
62 | ## LCS <=> OM
63 | ## ----------
64 | ccosts <- seqsubm(biofam.seq, method = "CONSTANT", cval = 2)
65 | ccosts
66 | 
67 | biofam.om2 <- seqdist(biofam.seq, method = "OM", indel = 1, sm = ccosts)
68 | biofam.om2[1:10, 1:10]
69 | 
70 | all.equal(biofam.om2, biofam.lcs)
71 | 
72 | ## ---
73 | ## DHD
74 | ## ---
75 | dhdcosts <- seqsubm(biofam.seq, method = "TRATE", time.varying = TRUE)
76 | 
77 | biofam.dhd <- seqdist(biofam.seq, method = "DHD", sm = dhdcosts)
78 | 
79 | 
80 | ## devAskNewPage(oask)
81 | 


--------------------------------------------------------------------------------
/R/seqdss.R:
--------------------------------------------------------------------------------
 1 | ## =======================================
 2 | ## Extracts distinct states from sequences
 3 | ## =======================================
 4 | 
 5 | seqdss <- function(seqdata, with.missing=FALSE) {
 6 | 
 7 | 	if (!inherits(seqdata,"stslist"))
 8 | 		stop("data is NOT a sequence object, see seqdef function to create one")
 9 | 
10 | 	nbseq <- nrow(seqdata)
11 | 
12 | 	sl <- seqlength(seqdata, with.missing=TRUE)
13 | 	maxsl <- max(sl)
14 | 
15 | 	void <- attr(seqdata, "void")
16 | 	statl <- attr(seqdata,"alphabet")
17 | 	nr <- attr(seqdata, "nr")
18 | 
19 | 	trans <- matrix(void, nrow=nbseq, ncol=maxsl)
20 | 
21 | 	if (with.missing) {
22 | 		statl <- c(statl, nr)
23 | 	}
24 | 
25 | 	seqdatanum <- seqasnum(seqdata, with.missing=with.missing)
26 | 
27 | 	if (!with.missing)
28 | 		seqdatanum[is.na(seqdatanum)] <- -99
29 | 
30 | 	maxcol <- 0
31 | 	for (i in 1:nbseq) {
32 | 		idx <- 1
33 | 		j <- 1
34 | 
35 | 		tmpseq <- seqdatanum[i,]
36 | 		
37 | 		while (idx <= sl[i]) {
38 | 			iseq <- tmpseq[idx]
39 | 
40 | 			while (idx < sl[i] & (tmpseq[idx+1]==iseq || tmpseq[idx+1]==-99)) {
41 | 				idx <- idx+1
42 | 			}
43 | 
44 | 			## The range of the numeric alphabet
45 | 			## obtained with seqasnum is 0..n
46 | 			if (iseq!=-99) {
47 | 				trans[i,j] <- statl[(iseq+1)]
48 | 				j <- j+1
49 | 			}
50 | 			idx <- idx+1
51 | 		}
52 | 		if (j>maxcol) {maxcol <- j}
53 | 
54 | 	}
55 | 	## drop=FALSE ensures that the result is a matrix even if trans has only one row
56 | 	trans <- trans[,1:(maxcol-1), drop=FALSE]
57 | 
58 | 	trans <-
59 |           suppressMessages(
60 |                            seqdef(trans, alphabet=alphabet(seqdata),
61 |                                   labels=stlab(seqdata),
62 |                                   missing=nr, right=NA,
63 |                                   cnames=paste("ST",seq(1:(maxcol-1)),sep=""),
64 |                                   cpal=cpal(seqdata),
65 |                                   id=rownames(seqdata),
66 |                                   weights=attr(seqdata, "weights")))
67 | 
68 | 	return(trans)
69 | }
70 | 


--------------------------------------------------------------------------------
/R/seqmodst.R:
--------------------------------------------------------------------------------
 1 | ## ====================
 2 | ## Modal state sequence
 3 | ## ====================
 4 | 
 5 | seqmodst <- function(seqdata, weighted=TRUE, with.missing=FALSE) {
 6 | 
 7 | 	if (!inherits(seqdata,"stslist"))
 8 | 		stop("data is not a sequence object, see seqdef function to create one", call.=FALSE)
 9 | 
10 | 	slength <- ncol(seqdata)
11 | 	statl <- alphabet(seqdata)
12 | 	cnames <- colnames(seqdata)
13 | 
14 | 	if (with.missing) {
15 | 		statl <- c(statl, attr(seqdata,"nr"))
16 | 	}
17 | 
18 | 	## State distribution
19 | 	freq <- seqstatd(seqdata, weighted, with.missing)$Frequencies
20 | 
21 | 	ctype <- matrix(nrow=1, ncol=slength)
22 | 	stfreq <- matrix(nrow=1, ncol=slength)
23 | 	colnames(stfreq) <- cnames
24 | 	rownames(stfreq) <- "Freq."
25 | 	colnames(ctype) <- cnames
26 | 
27 | 	## Constructing the transversal modal sequence
28 | 	for (i in 1:slength) {
29 | 		smax <- which(freq[,i]==max(freq[,i]))[1]
30 | 		stfreq[,i] <- freq[smax,i]
31 | 		ctype[,i] <- statl[smax]
32 | 	}
33 | 	
34 | 	res <- suppressMessages(seqdef(ctype, alphabet=alphabet(seqdata),
35 | 		missing=attr(seqdata,"nr"), nr=attr(seqdata,"nr"),
36 | 		left=NA, gaps=NA, right=NA,
37 | 		labels=stlab(seqdata),
38 | 		cpal=cpal(seqdata), missing.color=attr(seqdata,"missing.color"),
39 | 		xtstep=attr(seqdata, "xtstep"),
40 | 		tick.last=attr(seqdata, "tick.last")))
41 | 
42 | 	nbocc <- length(seqfind(res, seqdata))
43 | 
44 | 	## Distance to modal state sequence
45 | 	## if (dist)
46 | 	##	dist.modst <- seqdist(seqdata, refseq=res, ...)
47 | 	## else
48 | 	##	dist.modst <- NULL
49 | 
50 | 	class(res) <- c("stslist.modst", class(res))
51 | 
52 | 	attr(res, "Frequencies") <- stfreq
53 | 	## attr(res, "Distances") <- dist.modst
54 | 	attr(res, "Occurrences") <- nbocc
55 | 
56 | 	## Weights
57 | 	weights <- attr(seqdata, "weights")
58 | 
59 | 	if (!weighted || is.null(weights)) {
60 | 		weights <- rep(1.0, nrow(seqdata))
61 | 	}
62 | 	if (all(weights==1))
63 | 		weighted <- FALSE
64 | 
65 | 	attr(res, "nbseq") <- sum(weights)
66 | 	attr(res,"weighted") <- weighted
67 | 
68 | 	return(res)
69 |  }
70 | 


--------------------------------------------------------------------------------
/man/seqecontain.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqecontain}
 2 | \alias{seqecontain}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{Check if sequence contains events}
 5 | \description{
 6 |   Check if an event sequence or subsequence contains given events
 7 | }
 8 | \usage{
 9 | seqecontain(eseq, event.list, unknown.exclude = FALSE,
10 |   seq, eventList, exclude)
11 | }
12 | %- maybe also 'usage' for other objects documented here.
13 | \arguments{
14 |   \item{eseq}{A event sequence object (\code{seqelist}) or a an event subsequence object (\code{subseqelist})}
15 |   \item{event.list}{A list of events}
16 |   \item{unknown.exclude}{if \code{TRUE} the search is exclusive and returns \code{FALSE} for any subsequence containing an event that is not in \code{event.list}}
17 |   \item{seq}{Deprecated. Use \code{eseq} instead.}
18 |   \item{eventList}{Deprecated. Use \code{event.list} instead.}
19 |   \item{exclude}{Deprecated. Use \code{unknown.exclude} instead.}
20 | }
21 | \details{
22 |   Checks, for each provided event sequence, if it contains one of the events in \code{event.list}.
23 |   If \code{unknown.exclude} is \code{TRUE}, \code{seqecontain} looks if all events of the subsequence are in \code{event.list}.
24 | }
25 | \value{
26 |   A logical vector.
27 | }
28 | \seealso{
29 | \code{\link{seqecreate}} for creating event sequence objects and \code{\link{seqefsub}} for creating event subsequence objects.
30 | }
31 | \examples{
32 | data(actcal.tse)
33 | actcal.eseq <- seqecreate(actcal.tse)
34 | 
35 | ##Searching for frequent subsequences, that is appearing at least 20 times
36 | fsubseq <- seqefsub(actcal.eseq,min.support=20)
37 | 
38 | ##looking for subsequence with FullTime
39 | seqecontain(fsubseq,c("FullTime"))
40 | 
41 | }
42 | % Add one or more standard keywords, see file 'KEYWORDS' in the
43 | % R documentation directory.
44 | %\keyword{ ~kwd2 }% __ONLY ONE__ keyword per line
45 | 
46 | \author{Matthias Studer (with Gilbert Ritschard for the help page)}
47 | % Add one or more standard keywords, see file 'KEYWORDS' in the
48 | % R documentation directory.
49 | \keyword{Event sequences}
50 | 


--------------------------------------------------------------------------------
/man/seqstatf.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqstatf}
 2 | \alias{seqstatf}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{State frequencies in the whole sequence data set}
 5 | \description{
 6 | Overall frequency of each state of the alphabet in the state sequence object.
 7 | }
 8 | \usage{
 9 | seqstatf(seqdata, weighted = TRUE, with.missing=FALSE)
10 | }
11 | %- maybe also 'usage' for other objects documented here.
12 | \arguments{
13 |   \item{seqdata}{a sequence object as defined by the \code{\link{seqdef}} function.}
14 |   \item{weighted}{Logical. Should frequencies account for weights when present in the state sequence object
15 |   (see \code{\link{seqdef}}). Default is \code{TRUE}.}
16 |   \item{with.missing}{Logical. Should non void missing states be treated as regular values? Default is \code{FALSE}.}
17 | }
18 | \details{
19 | The \code{seqstatf} function computes the (weighted) count and frequency of each state of the alphabet in \code{seqdata}, i.e., the (weighted) sum of the occurrences of a state in \code{seqdata}.
20 | }
21 | \value{
22 | A data frame with as many rows as states in the alphabet and two columns, one for the count (Freq) and one for the percentage frequencies (Percent).
23 | }
24 | 
25 | \seealso{
26 | \code{\link{seqstatd}} for the state distribution by time point (position), \code{\link{seqistatd}} for the state distribution within each sequence.
27 | }
28 | 
29 | \examples{
30 | ## Creating a sequence object from the actcal data set
31 | data(actcal)
32 | actcal.lab <- c("> 37 hours", "19-36 hours", "1-18 hours", "no work")
33 | actcal.seq <- seqdef(actcal, 13:24, labels=actcal.lab)
34 | 
35 | ## States frequencies
36 | seqstatf(actcal.seq)
37 | 
38 | ## Example with weights
39 | data(ex1)
40 | ex1.seq <- seqdef(ex1, 1:13, weights=ex1$weights)
41 | 
42 | ## Unweighted
43 | seqstatf(ex1.seq, weighted=FALSE)
44 | 
45 | ## Weighted
46 | seqstatf(ex1.seq, weighted=TRUE)
47 | }
48 | 
49 | \author{Alexis Gabadinho}
50 | 
51 | % Add one or more standard keywords, see file 'KEYWORDS' in the
52 | % R documentation directory.
53 | \keyword{State sequences}
54 | \keyword{Global characteristics}
55 | 
56 | 


--------------------------------------------------------------------------------
/man/seqsubsn.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqsubsn}
 2 | \alias{seqsubsn}
 3 | \title{Number of distinct subsequences in a sequence.}
 4 | \description{
 5 | Computes the number of distinct subsequences in a sequence using Elzinga's algorithm.
 6 | }
 7 | \details{
 8 | The function first searches for missing states in the sequences and if found, adds the missing state to the alphabet for the extraction of the distinct subsequences. A missing state in a sequence is considered as the occurrence of an additional symbol of the alphabet, and two or more consecutive missing states are considered as two or more occurrences of the same state. The \code{with.missing=TRUE} argument is used for calling the \code{\link{seqdss}} function when \code{DSS=TRUE}.
 9 | }
10 | \usage{
11 |  seqsubsn(seqdata, DSS=TRUE, with.missing=FALSE)
12 | }
13 | \arguments{
14 |   \item{seqdata}{a state sequence object as defined by the \code{\link{seqdef}} function.}
15 |   
16 |   \item{DSS}{if \code{TRUE}, the sequences of Distinct Successive States (DSS, see \code{\link{seqdss}}) are first extracted (e.g., the DSS contained in \code{'D-D-D-D-A-A-A-A-A-A-A-D'} is \code{'D-A-D'}), and the number of distinct subsequences in the DSS is computed. If \code{FALSE}, the number of distinct subsequences is computed from sequences as they appear in the input sequence object. Hence the number of distinct subsequences is in most cases much higher with the \code{DSS=FALSE} option.}
17 |       
18 | 	\item{with.missing}{logical: should non-void missing values be treated as a regular state? 
19 |       If \code{FALSE} (default) missing values are ignored.}
20 |       
21 | }
22 | \value{Vector with the number of distinct subsequences for each sequence in the input state sequence object.}
23 | \seealso{
24 |  \code{\link{seqdss}}.
25 | }
26 | \examples{
27 | data(actcal)
28 | actcal.seq <- seqdef(actcal,13:24)
29 | 
30 | ## Number of subsequences with DSS=TRUE
31 | seqsubsn(actcal.seq[1:10,])
32 | 
33 | ## Number of subsequences with DSS=FALSE
34 | seqsubsn(actcal.seq[1:10,],DSS=FALSE)
35 | }
36 | 
37 | \author{Alexis Gabadinho (with Gilbert Ritschard for the help page)}
38 | 
39 | \keyword{Longitudinal characteristics}
40 | 


--------------------------------------------------------------------------------
/man/seqdss.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqdss}
 2 | \alias{seqdss}
 3 | \title{Extract sequences of distinct successive states}
 4 | \description{
 5 | Extract the sequence of distinct successive states from each sequence in a  object.
 6 | }
 7 | \details{
 8 | Returns a sequence object containing the sequences of distinct successive states (DSS). The spell durations are not taken into account. E.g., the DSS contained in \code{'D-D-D-D-A-A-A-A-A-A-A-D'} is \code{'D-A-D'}. Associated durations can be extracted with the \code{\link{seqdur}} function.
 9 | 
10 | When \code{{with.missing=TRUE}}, non-void missing values are considered as a regular state of the alphabet. For example, the DSS of \code{A-A-*-*-*-B-B-C-C-D} is \code{A-*-B-C-D}.
11 | 
12 | When \code{with.missing=FALSE} (default) missing values are ignored and a substring \code{A-A-*-*-*A} for example will be considered as a single spell in \code{A} while the DSS of this substring would be \code{A-*-A} whith \code{with.missing=TRUE}.
13 | 
14 | See \link{seqdef} on options for handling missing values when creating sequence objects.
15 | 
16 | }
17 | \usage{
18 | seqdss(seqdata, with.missing=FALSE)
19 | }
20 | \arguments{
21 |    \item{seqdata}{a sequence object as defined by the \code{\link{seqdef}} function.}
22 | 	\item{with.missing}{Should non-void missing values be considered as regular states? See Details.}
23 | }
24 | \value{a sequence object containing the distinct state sequence (DSS) for each sequence in the object given as argument. }
25 | \seealso{
26 |  \code{\link{seqdur}}.
27 | }
28 | \examples{
29 | ## Creating a sequence object from columns 13 to 24
30 | ## in the 'actcal' example data set
31 | ## Here we retain the first 10 sequences only.
32 | data(actcal)
33 | actcal.seq <- seqdef(actcal[1:10,13:24])
34 | 
35 | ## Retrieving the DSS
36 | actcal.dss <- seqdss(actcal.seq)
37 | 
38 | ## Displaying the DSS for the first 10 sequences
39 | actcal.dss
40 | 
41 | ## Example with with.missing argument
42 | data(ex1)
43 | ex1.seq <- seqdef(ex1[, 1:13])
44 | 
45 | seqdss(ex1.seq)
46 | seqdss(ex1.seq, with.missing=TRUE)
47 | }
48 | \author{Alexis Gabadinho and Gilbert Ritschard}
49 | 
50 | \keyword{Longitudinal characteristics}
51 | 


--------------------------------------------------------------------------------
/src/chisq.cpp:
--------------------------------------------------------------------------------
 1 | #include "TraMineR.h"
 2 | 
 3 | 
 4 | 
 5 | // Getting a element by its names
 6 | 
 7 | 
 8 | extern "C" {
 9 | 
10 |     SEXP tmrChisq(SEXP ChiTableS, SEXP tdimS, SEXP margeS) {
11 | 		TMRLOG(5, "tmrChisq\n");
12 |         SEXP distS;
13 | 		int n = INTEGER(tdimS)[0];
14 | 		int n1 = n-1;
15 | 		PROTECT(distS=Rf_allocVector(REALSXP, n*(n-1)/2));
16 | 		double * dist=REAL(distS);
17 | 		int col =INTEGER(tdimS)[1];
18 | 		double * chitable=REAL(ChiTableS);
19 | 		double * marge=REAL(margeS);
20 | 
21 | 		for(int i=0; i< n1; i++){
22 | 			int base_index= TMRDISTINDEX(i+1, 1, n);
23 | 			for(int j = i+1; j < n; j++){
24 | 				
25 | 				double cmpres=0;
26 | 				for(int c=0; c < col;c++){
27 | 					double diff=chitable[MINDICE(i, c, n)]-chitable[MINDICE(j, c, n)];
28 | 					cmpres += diff*diff/marge[c];
29 | 				}
30 | 				dist[base_index+j] = sqrt(cmpres);
31 | 			}
32 | 		}
33 |         UNPROTECT(1);
34 |         return distS;
35 |     }    
36 | 
37 |     SEXP tmrChisqRef(SEXP ChiTableS, SEXP tdimS, SEXP margeS, SEXP refid) {
38 | 		TMRLOG(5, "tmrChisq\n");
39 |         SEXP distS;
40 | 		int n = INTEGER(tdimS)[0];
41 | 		int nseq = n;
42 | 		int na = n;
43 | 		//int rseq= INTEGER(refid)[0]-1;
44 | 		int rseq1= INTEGER(refid)[0];
45 | 		int rseq2= INTEGER(refid)[1];
46 | 		if (rseq1 < rseq2) {
47 | 			nseq = rseq1;
48 | 			na = nseq * (rseq2 - rseq1);
49 | 		} else {
50 | 			rseq1 = rseq1 - 1;
51 | 		}
52 | 		//int n1 = n-1;
53 | 		PROTECT(distS=Rf_allocVector(REALSXP, na));
54 | 		double * dist=REAL(distS);
55 | 		int col =INTEGER(tdimS)[1];
56 | 		double * chitable=REAL(ChiTableS);
57 | 		double * marge=REAL(margeS);
58 | 
59 | 		for (int rseq=rseq1; rseq<rseq2; rseq++){
60 | 			for(int i=0; i< nseq; i++){
61 | 				//int base_index= TMRDISTINDEX(i+1, 1, n);
62 | 				//for(int j = i+1; j < n; j++){
63 | 					
64 | 				double cmpres=0;
65 | 				if(i!=rseq) {
66 | 					for(int c=0; c < col;c++){
67 | 						double diff=chitable[MINDICE(i, c, n)]-chitable[MINDICE(rseq, c, n)];
68 | 						cmpres += diff*diff/marge[c];
69 | 					}
70 | 				}
71 | 				dist[i + nseq*(rseq-rseq1)] = sqrt(cmpres);
72 | 			}
73 | 		}
74 |         UNPROTECT(1);
75 |         return distS;
76 |     }    
77 | }
78 | 


--------------------------------------------------------------------------------
/R/seqformat-STS_to_TSE.R:
--------------------------------------------------------------------------------
 1 | # Should only be used through seqformat()
 2 | 
 3 | ## ================================
 4 | ## Convert from STS to TSE format
 5 | ## Version 1.3
 6 | ## ================================
 7 | 
 8 | STS_to_TSE <- function(seqdata, id=NULL, tevent) {
 9 | 	nseq <- nrow(seqdata)
10 | 	slength <- ncol(seqdata)
11 | 
12 | 	## Transforming into a matrix dramatically increases speed
13 | 	seqdata <- as.matrix(seqdata)
14 | 
15 | 	if (is.null(id)) id <- 1:nseq
16 | 	maxsize <- nseq*slength
17 | 	ids <- vector(mode=mode(id),length=maxsize)
18 | 	times <- numeric(length=maxsize)
19 | 	events <- vector(mode=mode(tevent),length=maxsize)
20 | 	myi <- 1
21 | 
22 | 	levent <- list()
23 | 	nl <- nrow(tevent)
24 | 	nc <- ncol(tevent)
25 | 
26 | 	## Storing the cells of tevent in a list
27 | 	for (i in 1:nc){
28 | 		for (j in 1:nl) {
29 | 			if (is.character(tevent[j,i])){
30 | 				ll <- strsplit(tevent[j,i],",")[[1]]
31 | 				if(length(ll)==0){
32 | 					ll <- NA
33 | 				}
34 | 				levent[[paste(rownames(tevent)[j],">",colnames(tevent)[i],sep="")]] <- ll
35 | 			} else {
36 | 				levent[[paste(rownames(tevent)[j],">",colnames(tevent)[i],sep="")]] <- as.character(tevent[j,i])
37 | 			}
38 | 		}
39 | 	}
40 | 	for (i in 1:nseq) {
41 | 		## First status=> entrance event (diagonal of tevent)
42 |         s1 <- seqdata[i,1]
43 | 		e1 <- levent[[paste(s1,">",s1,sep="")]]
44 | 
45 | 		if (!is.null(e1) && !is.na(e1[1])) { ## if NA, we don't generate an event
46 | 			for (k in e1){
47 | 				ids[myi] <- id[i]
48 | 				times[myi] <- 0
49 | 				events[myi] <- k
50 | 				myi <- myi+1
51 | 			}
52 | 	     } #end if
53 | 		## Rest of the sequence
54 | 		for (j in 1:(slength-1)) {
55 | 			s1 <- seqdata[i,j] ## Status at time t
56 | 			s2 <- seqdata[i,j+1] ## Status at time t+1
57 | 
58 | 			if (!is.na(s1) && !is.na(s2) && s2!=s1 && s1!="" && s2!="") {
59 | 				ev <- levent[[paste(s1,">",s2,sep="")]]
60 | 				if(!is.na(ev[1])){
61 | 					for (k in ev){
62 | 						ids[myi] <- id[i]
63 | 						times[myi] <- j
64 | 						events[myi] <- k
65 | 						myi <- myi+1
66 | 					}
67 | 				}
68 | 			}
69 | 		}
70 | 	}
71 | 
72 | 	sel <- 1:(myi-1)
73 | 	trans <- data.frame(id=ids[sel],time=times[sel],event=events[sel])
74 | 	return(trans)
75 | }
76 | 


--------------------------------------------------------------------------------
/man/plot.subseqelistchisq.Rd:
--------------------------------------------------------------------------------
 1 | \name{plot.subseqelistchisq}
 2 | \alias{plot.subseqelistchisq}
 3 | \title{Plot discriminant subsequences}
 4 | \description{
 5 | Plot the result of \code{\link{seqecmpgroup}}
 6 | }
 7 | \usage{
 8 | \method{plot}{subseqelistchisq}(x, ylim = "uniform", rows = NA, cols = NA,
 9 |   resid.levels = c(0.05,0.01),
10 |   cpal = brewer.pal(1 + 2 * length(resid.levels), "RdBu"), vlegend = NULL,
11 |   cex.legend = 1, ptype = "freq", legend.title = NULL, 
12 |   with.legend = TRUE, residlevels, legendcol, legend.cex, ...)
13 | }
14 | %- maybe also 'usage' for other objects documented here.
15 | \arguments{
16 |   \item{x}{The subsequences to plot (a \code{subseqelist} object).}
17 |   \item{ylim}{if \code{"uniform"} all axes have same limits.}
18 |   \item{rows}{Number of graphic rows}
19 |   \item{cols}{Number of graphic columns}
20 |   \item{resid.levels}{Significance levels used to colorize the Pearson residual}
21 |   \item{cpal}{Color palette used to color the results}
22 |   \item{vlegend}{When \code{TRUE} the legend is printed vertically, when \code{FALSE} it is printed horizontally. If \code{NULL} (default) the best position will be chosen.}
23 |   \item{cex.legend}{Scale parameters for text legend.}
24 |   \item{ptype}{If set to \code{"resid"}, Pearson residuals are plotted instead of frequencies}
25 |   \item{legend.title}{Legend title.}
26 |   \item{with.legend}{Logical. Should legend be displayed?}
27 |   \item{residlevels}{Deprecated. Use \code{resid.levels} instead.}
28 |   \item{legendcol}{Deprecated. Use \code{vlegend} instead.}
29 |   \item{legend.cex}{Deprecated. Use \code{cex.legend} instead.}
30 |   \item{\dots}{Additional parameters passed to \code{\link{barplot}}}
31 | }
32 | %\details{}
33 | \value{
34 |   nothing
35 | }
36 | %\references{ ~put references to the literature/web site here ~ }
37 | %\author{ ~~who you are~~ }
38 | %\note{ ~~further notes~~
39 | 
40 | % ~Make other sections like Warning with \section{Warning }{....} ~
41 | %}
42 | \seealso{\code{\link{seqecmpgroup}}}
43 | 
44 | \author{Matthias Studer (with Gilbert Ritschard for the help page)}
45 | % Add one or more standard keywords, see file 'KEYWORDS' in the
46 | % R documentation directory.
47 | \keyword{Event sequences}
48 | \keyword{Method}
49 | 


--------------------------------------------------------------------------------
/man/seqibad.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqibad}
 2 | \alias{seqibad}
 3 | %
 4 | \author{Gilbert Ritschard}
 5 | %
 6 | \title{Badness index}
 7 | %
 8 | \description{
 9 |   Badness index of each sequence, i.e. the sum of undesirableness of each state weighted by the potential to integrate that state in the sequence.
10 | }
11 | \usage{
12 |   seqibad(seqdata, pow=1, with.missing=FALSE, ...)
13 | }
14 | \arguments{
15 |   \item{seqdata}{a state sequence object (\code{stslist}) as returned by \code{\link[TraMineR]{seqdef}}.}
16 | 
17 |   \item{pow}{real. Exponent applied to the position in the sequence. Higher value increase the importance of recency (see \code{\link{seqintegration}}). Default is 1.}
18 | 
19 | 	\item{with.missing}{logical: should non-void missing values be treated as a regular state? If \code{FALSE} (default) missing values are ignored.}
20 | 
21 |   \item{...}{arguments such as \code{stprec} or \code{state.order} required by \code{\link{seqprecstart}} to determine/normalize the state undesirableness degrees.}
22 | 
23 | }
24 | \details{
25 |   For each sequence, the badness is the sum of the undesirableness of each state weighted by the potential to integrate the state. As long as \code{pow} is strictly greater than zero, the undesirableness of states occurring at the end of the sequence get higher weights than those at the beginning. The index reaches its maximum 1 for a sequence made of a single spell in the worst state and the minimum 0 for a sequence made of a single spell is the most favorable state.
26 | 
27 | }
28 | 
29 | \value{
30 |   A vector with the badness index for each sequence.
31 | }
32 | 
33 | \references{
34 | Ritschard, G. (2023), "Measuring the nature of individual sequences", \emph{Sociological Methods and Research}, 52(4), 2016-2049. \doi{10.1177/00491241211036156}.
35 | }
36 | 
37 | \seealso{
38 | \code{\link{seqintegr}}, \code{\link{seqidegrad}}, \code{\link{seqprecarity}}
39 | }
40 | \examples{
41 | data(ex1)
42 | sx <- seqdef(ex1[,1:13], right="DEL")
43 | 
44 | seqibad(sx) ## using original alphabet order
45 | seqibad(sx, stprec=c(1,2,3,6)) ## user defined undesirableness values
46 | seqibad(sx, with.missing=TRUE, state.order=c('A','B','C','D'))
47 | 
48 | }
49 | \keyword{Longitudinal characteristics}
50 | 


--------------------------------------------------------------------------------
/demo/Events.R:
--------------------------------------------------------------------------------
 1 | require(grDevices); require(graphics)
 2 | oask <- devAskNewPage(dev.interactive(orNone = TRUE))
 3 | 
 4 | library(TraMineR)
 5 | data(actcal)
 6 | actcal.seq <- seqdef(actcal[,13:24])
 7 | transition <- seqetm(actcal.seq, method="transition")
 8 | transition[1,1:4] <- c("FullTime", "Decrease,PartTime",
 9 | 		"Decrease,LowPartTime", "Stop")
10 | transition[2,1:4] <- c("Increase,FullTime", "PartTime",
11 | 		"Decrease,LowPartTime", "Stop")
12 | transition[3,1:4] <- c("Increase,FullTime", "Increase,PartTime",
13 | 		"LowPartTime", "Stop")
14 | transition[4,1:4] <- c("Start,FullTime", "Start,PartTime",
15 | 		"Start,LowPartTime", "NoActivity")
16 | print(transition)
17 | 
18 | actcal.eseq <- seqecreate(actcal.seq, tevent=transition)
19 | 
20 | fsubseq <- seqefsub(actcal.eseq, min.support=100)
21 | msubcount <- seqeapplysub(fsubseq, method="count")
22 | #First lines...
23 | msubcount[1:9, 6:7]
24 | ## Using time constraints
25 | ## Searching subsequences starting in summer (between June and September)
26 | fsubseq <- seqefsub(actcal.eseq, min.support=10,
27 | 		constraint=seqeconstraint(age.min=6, age.max=9))
28 | fsubseq[1:10]
29 | ## Searching subsequences occurring in summer (between June and September)
30 | fsubseq <- seqefsub(actcal.eseq, min.support=10,
31 | 		constraint=seqeconstraint(age.min=6, age.max=9, age.max.end=9))
32 | fsubseq[1:10]
33 | ## Searching subsequences enclosed in a 6 months period
34 | ## and with a maximum gap of 2 months
35 | fsubseq <- seqefsub(actcal.eseq, min.support=10,
36 | 		constraint=seqeconstraint(max.gap=2, window.size=6))
37 | fsubseq[1:10]
38 | 
39 | ## Looking for frequent subsequences
40 | fsubseq <- seqefsub(actcal.eseq, pmin.support=0.01)
41 | ## Frequences of 10 first subsequences
42 | omar <- par(mar=c(5,4,4,2)+.1)
43 | plot(fsubseq[1:10], col="cyan")
44 | par(omar)
45 | ## looking for subsequence with FullTime
46 | seqecontain(fsubseq, c("FullTime"))
47 | 
48 | 
49 | ## Looking for the discriminating subsequences for sex
50 | sexsubseq <- seqecmpgroup(fsubseq, group=actcal$sex,
51 |                     method="bonferroni")
52 | ## Plotting the ten most discriminating subsequences in 2 x 4 format
53 | ## frequencies
54 | plot(sexsubseq[1:10],ptype="freq")
55 | ## residuals
56 | plot(sexsubseq[1:10],ptype="resid")
57 | 
58 | devAskNewPage(oask)
59 | 


--------------------------------------------------------------------------------
/src/treeeventnode.h:
--------------------------------------------------------------------------------
 1 | #ifndef TREEEVENTNODE_H
 2 | #define TREEEVENTNODE_H
 3 | #include "treeeventmap.h"
 4 | #include<R.h>
 5 | #include <Rinternals.h>
 6 | #include "eventseq.h"
 7 | #include "constraint.h"
 8 | 
 9 | 
10 | class TreeEventMap;
11 | class TreeEventNode {
12 |     //type of the event
13 |     int type;
14 |     //CurrentSupport
15 |     double support;
16 |     //Last sequence that has incremented the support of this subsequence
17 |     int lastID;
18 |     //Next event at same time (but event bigger!)
19 |     TreeEventMap brother;
20 |     //Next event with gap
21 |     TreeEventMap child;
22 |     static int nodeCount;
23 | public:
24 |     static int getNodeCount();
25 |     //Ctor
26 |     TreeEventNode(const int& t);
27 |     //Dtor
28 |     virtual ~TreeEventNode();
29 |     //Ajoute une séquence et l'ensemble des sous-séquences qui la compose. Méthode récursive (dernier paramètre = paramètre interne)
30 |     //void addSequence(Sequence *s,const double &maxGap,const double &windowSize);
31 |     //Ajoute une séquence et l'ensemble des sous-séquences qui la compose. Méthode récursive (dernier paramètre = paramètre interne)
32 | //    void addSequenceInternal(Sequence *s, SequenceEventNode * en, const double &maxGap,const double &windowSize,const double & ageMax, const double &gapConsumed,  const double& currentAge, const int& k, const int&currentK);
33 |     void addSequenceInternal(Sequence *s, SequenceEventNode * en, Constraint * cst, const double &gapConsumed,  const double& currentAge, const int& k, const int&currentK);
34 | 
35 |     //Simplifie l'arbre pour enlever l'ensemble des sous-séquences qui ne satisfont pas le support minimum (nb occurrences)
36 |     void simplifyTree(double minSup);
37 |     //Give an overview of this tree (paramètre prof==profondeur, interne)
38 |     void print(const int & prof=0, const bool& isbrother=true);
39 |     //Type of this event
40 |     const int& getType() {
41 |         return this->type;
42 |     }
43 |     //Actual support of this event
44 |     const double& getSupport() {
45 |         return this->support;
46 |     }
47 |     int countSubsequence(double minSup);
48 |     void getSubsequences(SEXP result,double * isupport, Sequence *s, int *index,const double &step, SEXP classname,EventDictionary * ed);
49 |     void clearSupport();
50 | };
51 | #endif // TREEEVENTNODE_H
52 | 


--------------------------------------------------------------------------------
/man/seqmaintokens.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqmaintokens}
 2 | \alias{seqmaintokens}
 3 | %- Also NEED an '\alias' for EACH other topic documented here.
 4 | \title{Indexes of most frequent tokens
 5 | %%  ~~function to do ... ~~
 6 | }
 7 | \description{Extracts the indexes of the most frequent token, either the \code{k} most frequent tokens or the tokens that occur on average more than \code{mint} times.
 8 | }
 9 | \usage{
10 | seqmaintokens(seqdata, k=8L, mint=NULL, ...)
11 | }
12 | \arguments{
13 |   \item{seqdata}{state sequence \code{stslist} object as produced by \code{\link{seqdef}}.
14 |   }
15 |   \item{k}{Integer. Number of main states.
16 |   }
17 |   \item{mint}{Real. Minimal mean number of occurrences per sequence.
18 |   }
19 |   \item{...}{Additional arguments passed to \code{\link{seqmeant}}
20 |   }
21 | }
22 | 
23 | \details{
24 | When \code{mint} is \code{NULL}, indexes of the \code{k} most frequent tokens. Otherwise, indexes of tokens occurring on average more than \code{tmin} times are returned as long as their number does not exceed \code{k}. If more than \code{k}, indexes of the \code{k} most frequent are returned.
25 | }
26 | 
27 | \value{
28 |  Vector of indexes of the most frequent tokens repecting order of the alphabet.
29 | }
30 | 
31 | \seealso{\code{\link{seqmeant}}
32 | }
33 | 
34 | %%\references{
35 | %% ~put references to the literature/web site here ~
36 | %%}
37 | 
38 | \author{
39 | Gilbert Ritschard
40 | }
41 | 
42 | \examples{
43 | data(biofam)
44 | b.lab <- c("Parent",
45 |            "Left",
46 |            "Married",
47 |            "Left+Marr",
48 |            "Child",
49 |            "Left+Child",
50 |            "Left+Marr+Child",
51 |            "Divorced"
52 | )
53 | b.short <- c("P","L","M","L+M","C","L+C","L+M+C","D")
54 | set.seed(5)
55 | cases <- sample(nrow(biofam),100)
56 | b.seq <- seqdef(biofam[cases,10:25], labels=b.lab, states=b.short,
57 |                 weights=biofam[cases,"wp00tbgs"])
58 | 
59 | ## Tokens occurring at least once on average
60 | alphabet(b.seq)[seqmaintokens(b.seq, mint=1)]
61 | #[1] "P"     "L"     "L+M"   "L+M+C"
62 | 
63 | ## Three more frequent tokens
64 | main.tokens <- seqmaintokens(b.seq, k=3)
65 | ## Labels of main tokens
66 | attr(b.seq,"labels")[main.tokens]
67 | #[1] "Parent"  "Left"    "Left+Marr+Child"
68 | ## Colors of main tokens
69 | cpal(b.seq)[main.tokens]
70 | #[1] "#7FC97F" "#BEAED4" "#BF5B17"} 


--------------------------------------------------------------------------------
/src/prefixtree.cpp:
--------------------------------------------------------------------------------
 1 | #include "prefixtree.h"
 2 | #include "eventseq.h"
 3 | #include "constraint.h"
 4 | //#include "subsequence.h"
 5 | 
 6 | PrefixTree::PrefixTree() {
 7 |     //ctor
 8 | }
 9 | //dtor
10 | PrefixTree::~PrefixTree() {
11 |     /*TreeEventMapIterator it;
12 |     for (it = child.begin();it != child.end();it++) {
13 |         delete it->second;
14 |     }*/
15 |     this->child.clearAllPointers();
16 | }
17 | 
18 | //void PrefixTree::addSequence(Sequence *s,const double &maxGap,const double &windowSize, const double & ageMin, const double & ageMax,const double & ageMaxEnd, const int& k) {
19 | void PrefixTree::addSequence(Sequence *s,Constraint * cst, const int& k) {
20 | 
21 | //subsequences (actually first symbol)
22 |     if (!s->hasEvent())return;
23 |     SequenceEventNode * e=s->getEvent();
24 |     //Iterator to search for brother and child
25 |     TreeEventMapIterator it;
26 |     TreeEventNode *ten=NULL;
27 |     double age=0;
28 |     while (e!=NULL) {
29 |         age+=e->getGap();
30 |         //Only start when ageMin is reached
31 |         if (age>cst->getageMaxBegin()) break;
32 |         if (age>=cst->getageMinBegin()) {
33 |             it=this->child.find(e->getType());
34 |             if (it!=this->child.end()) {
35 |                 it->second->addSequenceInternal(s,e,cst,0,age,k, 2);
36 |             } else if (k==1) { //Build new node only when k==1
37 |                 ten=new TreeEventNode(e->getType());
38 |                 this->child[e->getType()]=ten;
39 |                 //Rprintf("Adding event %i\n",ten->getType());
40 |                 ten->addSequenceInternal(s,e,cst,0,age,k, 2);
41 |             }
42 |         }
43 |         e=e->getNext();//Get Next element
44 |         //Rprintf("Adding starting at event %i",ten->getType());
45 |     }
46 | }
47 | 
48 | 
49 | 
50 | void PrefixTree::simplifyTree(double minSup) {
51 | 
52 |     this->child.simplifyTreeMap(minSup);
53 | }
54 | 
55 | //Give an overview of this tree (paramètre prof==profondeur, interne)
56 | void PrefixTree::print() {
57 |     this->child.print(0,true);
58 | }
59 | 
60 | 
61 | 
62 | int PrefixTree::countSubsequence(double minSup) {
63 |     return this->child.countSubsequence(minSup);
64 | }
65 | 
66 | 
67 | void PrefixTree::getSubsequences(SEXP result,double * support, int *index, SEXP classname,EventDictionary * ed) {
68 |     this->child.getSubsequences(result,support,NULL,index,0,classname,ed);
69 | }
70 | 
71 | 
72 | 
73 | 
74 | 


--------------------------------------------------------------------------------
/R/seqrf.R:
--------------------------------------------------------------------------------
 1 | ## creating rf sequence object
 2 | seqrf <- function(seqdata, diss, k=NULL, sortv="mds", weights=NULL,
 3 |                   weighted=TRUE,
 4 |                   grp.meth = "prop", squared = FALSE, pow = NULL){
 5 | 
 6 |   if (is.null(weights) & weighted) weights <- attr(seqdata,"weights")
 7 |   if (!weighted) weights <- NULL
 8 |   ## computing sortv
 9 |   if (!is.null(sortv)) {
10 |     if (length(sortv)==1){
11 |         if (sortv %in% c("from.start", "from.end")) {
12 |           end <- if (sortv=="from.end") { max(seqlength(seqdata)) } else { 1 }
13 |           beg <- if (sortv=="from.end") { 1 } else { max(seqlength(seqdata)) }
14 | 
15 |           sortv <- do.call(order, as.data.frame(seqdata)[,end:beg])
16 |           sortv <- order(sortv)
17 |         } else if (sortv != "mds") {
18 |             stop(call.=FALSE, "If of length one, sortv must be one of 'mds', 'from.start', and 'from.end'")
19 |         }
20 |     } else if (length(sortv)!=nrow(seqdata)) {
21 |       stop(call.=FALSE, "sortv must contain one value for each row in the sequence object ")
22 |     } else {
23 |       if (is.factor(sortv)) { sortv <- as.integer(sortv) }
24 |     }
25 |   }
26 |   rf <- dissrf(diss, k=k, sortv=sortv, weights=weights,
27 |                grp.meth = grp.meth, squared = squared, pow = pow)
28 |   seqtoplot <- seqdata[rf[["medoids"]],]
29 |   attr(seqtoplot,"weights") <- rf[["heights"]]
30 |   srf <- list(seqtoplot=seqtoplot,rf=rf)
31 |   class(srf) <- c("seqrf",class(srf))
32 |   if (inherits(rf,"dissrfprop")){
33 |     class(srf) <- c("seqrfprop",class(srf))
34 |   }
35 |   else
36 |     class(srf) <- c("seqrfcrisp",class(srf))
37 | 
38 |   return(srf)
39 | }
40 | 
41 | ######
42 | print.seqrf <- function(x, ...){
43 |   print(x[["seqtoplot"]], ...)
44 | }
45 | 
46 | #####
47 | summary.seqrf <- function(object, format="SPS", dist.idx = 1:10, ...){
48 |   #limit <- max(seqlength(seqdss(x[["seqtoplot"]])))
49 |   sry <- summary(object[["rf"]], dist.idx = dist.idx)
50 |   meds <- suppressMessages(seqformat(object[["seqtoplot"]], to=format, compress=TRUE,
51 |                                                  SPS.out = list(xfix = "", sdsep = "/")))
52 |   mnames <- sub("\\..","",x=rownames(meds))
53 |   ##print(id)
54 |   medoids <- data.frame(index=sry[["medoids"]],names=mnames,medoids=meds)
55 |   rownames(medoids)<-NULL
56 |   sry[["medoids"]] <- medoids
57 |   return(sry)
58 | }
59 | 


--------------------------------------------------------------------------------
/R/plot.stslist.meant.R:
--------------------------------------------------------------------------------
 1 | plot.stslist.meant <- function(x, cpal = NULL, ylab = NULL, yaxis = TRUE,
 2 |   xaxis = TRUE, cex.axis = par("cex.axis"), ylim = NULL, bar.labels = NULL,
 3 |   cex.barlab = cex.axis, offset.barlab = .1, cex.plot, ...) {
 4 | 
 5 |   TraMineR.check.depr.args(alist(cex.axis = cex.plot))
 6 | 
 7 | 	## Storing the optional graphical parameters in a list
 8 | 	glist <- list(...)
 9 |     parlist <- par()
10 |     glist <- glist[names(glist) %in% names(parlist)]
11 | 
12 |   sep.ylab <- (isFALSE(yaxis) && (is.null(ylab) || !is.na(ylab)))
13 |   cex.lab <- par("cex.lab")
14 |   if ("cex.lab" %in% names(list(...))) cex.lab <- list(...)[["cex.lab"]]
15 | 
16 |   n <- attr(x,"nbseq")
17 |   seql <- length(attr(x,"xtlab"))
18 |   errbar <- attr(x, "se")
19 | 
20 |   weighted <- attr(x, "weighted")
21 |   if (weighted) {wlab <- "weighted "}
22 |   else {wlab <- NULL}
23 | 
24 |   if (is.null(ylab))
25 |     ylab <- paste("Mean time (", wlab, "n=",round(n,2),")",sep="")
26 | 
27 |   if (is.null(ylim))
28 |     ylim <- c(0,seql)
29 | 
30 |   if (is.null(cpal))
31 |     cpal <- attr(x,"cpal")
32 | 
33 |   if (sep.ylab) {
34 |       sylab <- ylab
35 |       ylab <- NA
36 |   }
37 | 
38 |   mt <- as.vector(x[,"Mean"])
39 |   mp <- barplot(mt,
40 |           ## mgp=c(2.5,0.6,0),
41 |           names.arg=if (xaxis) rownames(x) else NULL,
42 |           cex.names=cex.axis,
43 |           cex.axis=cex.axis,
44 |           col=cpal,
45 |           ylim=ylim,
46 |           ylab=ylab,
47 |           axes=FALSE,
48 |           ...)
49 | 
50 |   ## Plotting the axes
51 |   ## axis(1, at=1:nbstat, labels=ltext, cex.axis=cex.axis)
52 | 
53 |   if (yaxis){
54 |     plist <- list(side=2, at=round(seq(0, max(ylim), length.out=6),0), cex.axis=cex.axis)
55 |     do.call(axis, args=c(plist,glist))
56 |     #axis(2, at=round(seq(0, max(ylim), length.out=6),0), cex.axis=cex.axis, ...)
57 |   }
58 | 
59 |   if (errbar){
60 |     se.mt <- x[,"SE"]
61 |     xx <- 1:nrow(x)
62 |     df=attr(x,"nbseq")-1
63 |     if(df >= 1) {
64 |        qt <- qt(.975, df=df)
65 |        tmr.errbar(1.2*xx - .5, mt, mt-qt*se.mt, mt+qt*se.mt, add=TRUE)
66 |     }
67 |     else {
68 |        	warning(paste("Error bars not displayed because df =", df, "too small"))
69 |     }
70 |   }
71 | 
72 |   if (!is.null(bar.labels)) {
73 |     text(mp, mt + offset.barlab*ylim[2], format(bar.labels), xpd = TRUE, cex = cex.barlab)
74 | 
75 |   }
76 |     if (sep.ylab)
77 |         title(ylab=sylab, line=1, cex.lab=cex.lab)
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/R/seqtab.R:
--------------------------------------------------------------------------------
 1 | ## =========================
 2 | ## Sequences frequency table
 3 | ## =========================
 4 | 
 5 | seqtab <- function(seqdata, idxs = 1:10, weighted = TRUE, format = "SPS", tlim) {
 6 | 
 7 |   TraMineR.check.depr.args(alist(idxs = tlim))
 8 | 
 9 | 	if (!inherits(seqdata,"stslist"))
10 | 		stop("data is not a sequence object, use seqdef function to create one")
11 | 
12 |   if(is.null(idxs) || (length(idxs)>1 && min(idxs)<1) || any(idxs<0))
13 | 		stop("idxs should be a non negative integer or a strictly positive vector.")
14 | 
15 | 
16 | 	## Eliminating empty sequences
17 | 	seqdata <- seqdata[rowSums(seqdata!=attr(seqdata,"nr"))!=0,]
18 | 
19 | 	## Weights
20 | 	weights <- attr(seqdata, "weights")
21 | 
22 | 	if (!weighted || is.null(weights)) {
23 | 		weights <- rep(1.0, nrow(seqdata))
24 | 	}
25 | 	## Also takes into account that in unweighted sequence objects created with
26 | 	## older TraMineR versions the weights attribute is a vector of 1
27 | 	## instead of NULL
28 | 	if (all(weights==1))
29 | 		weighted <- FALSE
30 | 
31 | 	if (seqfcheck(seqdata)=="-X")
32 | 		warning("'-' character in state codes may cause invalid results")
33 | 
34 | 	if (format=="SPS") {
35 | 		seqlist <- suppressMessages(seqformat(seqdata, from = "STS", to = "SPS",
36 | 		  SPS.out = list(xfix = "", sdsep = "/"), compress = TRUE))
37 | 	}
38 | 	else if (format=="STS")
39 | 		seqlist <- seqconc(seqdata)
40 | 	else
41 | 		stop("Format must be one of: STS or SPS")
42 | 
43 | 	Freq <- tapply(weights, seqlist, sum)
44 | 
45 | 	Freq <- sort(Freq, decreasing=TRUE)
46 | 	Percent <- Freq/sum(Freq)*100
47 | 
48 | 	nbuseq <- length(Freq)
49 | 
50 |   if (idxs[1]==0 || max(idxs)>nbuseq) {
51 | 		idxs <- 1:nbuseq
52 | 	}
53 | 
54 | 	##if (nbuseq >1){
55 |         idxf <- match(names(Freq), seqlist)[idxs]
56 |         res <- seqdata[idxf,]
57 |     ##}
58 |     ##else {
59 |     ##    res <- names(Freq)
60 |     ##}
61 | 	table <- data.frame(Freq, Percent)[idxs,]
62 | 
63 | 	## ==================================
64 | 	## DEFINING CLASS AND SOME ATTRIBUTES
65 | 	## ==================================
66 | 	class(res) <- c("stslist.freq",class(res))
67 | 
68 | 	## Setting the weights of the object equal to the frequencies
69 | 	attr(res, "weights") <- table$Freq
70 | 
71 | 	attr(res,"freq") <- table
72 | 	attr(res,"nbseq") <- sum(weights)
73 | 	attr(res,"weighted") <- weighted
74 | 	attr(res,"idxs") <- idxs
75 | 	attr(res,"idxf") <- idxf
76 | 	attr(res,"format") <- format
77 | 
78 | 	return(res)
79 | }
80 | 


--------------------------------------------------------------------------------
/man/seqtransn.Rd:
--------------------------------------------------------------------------------
 1 | \name{seqtransn}
 2 | \alias{seqtransn}
 3 | \title{Number of transitions in a sequence}
 4 | \description{
 5 | Computes the number of transitions (state changes) in each sequence of a sequence object.
 6 | }
 7 | \usage{
 8 | seqtransn(seqdata, with.missing=FALSE, norm=FALSE, pweight=FALSE)
 9 | }
10 | \arguments{
11 | 	\item{seqdata}{a state sequence object as defined by the
12 | \code{\link{seqdef}} function.}
13 | 
14 | 	\item{with.missing}{logical: should non-void missing values be treated as a regular state? If \code{FALSE} (default) missing values are ignored.}
15 | 
16 | 	\item{norm}{logical. If set as \code{TRUE}, the number of transitions is
17 | divided by its theoretical maximum, length of the sequence minus
18 | 1. When the length of the sequence is 1, the normalized value is set as 0.}
19 | 
20 | 	\item{pweight}{logical. EXPERIMENTAL! If set as \code{TRUE}, return count of transitions weighted
21 | by their probability to not occur to give higher weights to rare transitions.}
22 | }
23 | 
24 | \details{A transition in a sequence is a state change between
25 | time/position \eqn{t} and \eqn{t+1}. For example, the sequence
26 | \code{"A-A-A-A-B-B-A-D-D-D"} contains 3 transitions. The maximum
27 | number of transitions a sequence can contain is \eqn{\ell-1}{l-1}
28 | where \eqn{\ell}{l} is the length of the sequence. The number of
29 | transitions is obtained by subtracting 1 to the length of the sequence
30 | of distinct successive states (DSS). }
31 | 
32 | \value{a one column matrix with the number of transitions
33 | in each sequence.}
34 | 
35 | \references{
36 | Gabadinho, A., G. Ritschard, N. S. Müller and M. Studer (2011). Analyzing and Visualizing State Sequences in R with TraMineR. \emph{Journal of Statistical Software} \bold{40}(4), 1-37.
37 | }
38 | 
39 | \seealso{
40 |  \code{\link{seqdss}}.
41 | }
42 | 
43 | \examples{
44 | ## Creating a sequence object from columns 13 to 24
45 | ## in the 'actcal' example data set
46 | data(actcal)
47 | actcal.seq <- seqdef(actcal,13:24)
48 | 
49 | ## Computing the number of transitions
50 | actcal.trans <- seqtransn(actcal.seq)
51 | 
52 | ## Displaying number of transitions in the first 10 sequences
53 | actcal.trans[1:10]
54 | 
55 | ## Example with with.missing argument
56 | data(ex1)
57 | ex1.seq <- seqdef(ex1, 1:13)
58 | 
59 | seqtransn(ex1.seq)
60 | seqtransn(ex1.seq, with.missing=TRUE)
61 | }
62 | 
63 | \author{Alexis Gabadinho (with Gilbert Ritschard for the help page)}
64 | 
65 | \keyword{Longitudinal characteristics}
66 | 


--------------------------------------------------------------------------------