├── .Rinstignore ├── src ├── .gitignore ├── Makevars ├── erfz.c ├── init.c └── RobStatTM.h ├── data ├── bus.RData ├── algae.RData ├── flour.RData ├── glass.RData ├── image.RData ├── oats.RData ├── resex.RData ├── shock.RData ├── skin.RData ├── waste.RData ├── wine.RData ├── alcohol.RData ├── biochem.RData ├── hearing.RData ├── leuk.dat.RData ├── mineral.RData ├── vehicle.RData ├── neuralgia.RData ├── stackloss.RData └── breslow.dat.RData ├── sandbox └── SSRN-id3902862.pdf ├── vignettes ├── VignetteRobStatTM.pdf ├── VignetteRobStatTM.pdf.asis ├── fitmodelsusingRobStatTM.pdf ├── PolynomialOptandmOptRhoFunctions.pdf ├── fitmodelsusingRobStatTM.pdf.asis ├── OptimalBiasRobustRegressionPsiandRho.pdf ├── OptimalBiasRobustRegressionPsiandRho.pdf.asis └── PolynomialOptandmOptRhoFunctions.pdf.asis ├── .Rbuildignore ├── R ├── neuralgia.R ├── biochem.R ├── oats.R ├── flour.R ├── image.R ├── resex.R ├── mineral.R ├── skin.R ├── shock.R ├── stackloss.R ├── leuk.dat.R ├── glass.R ├── alcohol.R ├── hearing.R ├── utils.R ├── print.lsRobTest.R ├── wine.R ├── waste.R ├── algae.R ├── bus.R ├── vehicle.R ├── breslow.dat.R ├── fastmve.R ├── INVTR2.R ├── prcompRob.R ├── RobPCA_SM.R ├── WMLlogreg.R ├── lmrob.lar.R ├── lsRobTestMM.R └── MLocDis.R ├── man ├── neuralgia.Rd ├── print.lsRobTest.Rd ├── biochem.Rd ├── oats.Rd ├── flour.Rd ├── image.Rd ├── resex.Rd ├── mineral.Rd ├── huber.Rd ├── bisquare.Rd ├── skin.Rd ├── opt.Rd ├── stackloss.Rd ├── optv0.Rd ├── shock.Rd ├── mopt.Rd ├── moptv0.Rd ├── leuk.dat.Rd ├── glass.Rd ├── alcohol.Rd ├── hearing.Rd ├── cov.dcml.Rd ├── wine.Rd ├── prcompRob.Rd ├── MMPY.Rd ├── waste.Rd ├── INVTR2.Rd ├── rhoprime.Rd ├── algae.Rd ├── rhoprime2.Rd ├── SMPY.Rd ├── DCML.Rd ├── rob.linear.test.Rd ├── rho.Rd ├── bus.Rd ├── vehicle.Rd ├── WMLlogreg.Rd ├── lmrobdetMM.RFPE.Rd ├── KurtSDNew.Rd ├── SMPCA.Rd ├── MLocDis.Rd ├── fastmve.Rd ├── MMultiSHR.Rd ├── BYlogreg.Rd ├── breslow.dat.Rd ├── lsRobTestMM.Rd ├── WBYlogreg.Rd ├── refine.sm.Rd ├── drop1.lmrobdetMM.Rd ├── covClassic.Rd ├── Multirobu.Rd ├── lmrobM.control.Rd ├── scaleM.Rd ├── RockeMulti.Rd ├── step.lmrobdetMM.Rd ├── lmrobdetDCML.Rd └── lmrobM.Rd ├── .gitignore ├── inst └── scripts │ ├── ExactFit.R │ ├── biochem.R │ ├── MA1-AO.R │ ├── algae.R │ ├── flour.R │ ├── step.R │ ├── wine.R │ ├── ar1.R │ ├── bus.R │ ├── vehicle.R │ ├── skin.R │ ├── shock.R │ ├── leukemia.R │ ├── fitmodelsRobStatTM.R │ ├── identMA1.R │ ├── ar3.R │ ├── wood.R │ ├── resex.R │ ├── identAR2.R │ ├── oats.R │ ├── wine1.R │ ├── mineral.R │ ├── autism.R │ ├── epilepsy.R │ └── VignetteRobStatTM.R ├── NAMESPACE ├── DESCRIPTION ├── README.md └── NEWS.md /.Rinstignore: -------------------------------------------------------------------------------- 1 | inst/doc/Makefile 2 | -------------------------------------------------------------------------------- /src/.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.so 3 | *.dll 4 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_LIBS= $(LAPACK_LIBS) $(BLAS_LIBS) $(FLIBS) 2 | -------------------------------------------------------------------------------- /data/bus.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/bus.RData -------------------------------------------------------------------------------- /data/algae.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/algae.RData -------------------------------------------------------------------------------- /data/flour.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/flour.RData -------------------------------------------------------------------------------- /data/glass.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/glass.RData -------------------------------------------------------------------------------- /data/image.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/image.RData -------------------------------------------------------------------------------- /data/oats.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/oats.RData -------------------------------------------------------------------------------- /data/resex.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/resex.RData -------------------------------------------------------------------------------- /data/shock.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/shock.RData -------------------------------------------------------------------------------- /data/skin.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/skin.RData -------------------------------------------------------------------------------- /data/waste.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/waste.RData -------------------------------------------------------------------------------- /data/wine.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/wine.RData -------------------------------------------------------------------------------- /data/alcohol.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/alcohol.RData -------------------------------------------------------------------------------- /data/biochem.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/biochem.RData -------------------------------------------------------------------------------- /data/hearing.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/hearing.RData -------------------------------------------------------------------------------- /data/leuk.dat.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/leuk.dat.RData -------------------------------------------------------------------------------- /data/mineral.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/mineral.RData -------------------------------------------------------------------------------- /data/vehicle.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/vehicle.RData -------------------------------------------------------------------------------- /data/neuralgia.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/neuralgia.RData -------------------------------------------------------------------------------- /data/stackloss.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/stackloss.RData -------------------------------------------------------------------------------- /data/breslow.dat.RData: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/data/breslow.dat.RData -------------------------------------------------------------------------------- /sandbox/SSRN-id3902862.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/sandbox/SSRN-id3902862.pdf -------------------------------------------------------------------------------- /vignettes/VignetteRobStatTM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/vignettes/VignetteRobStatTM.pdf -------------------------------------------------------------------------------- /vignettes/VignetteRobStatTM.pdf.asis: -------------------------------------------------------------------------------- 1 | %\VignetteIndexEntry{RobStatTM Package Vignette} 2 | %\VignetteEngine{R.rsp::asis} 3 | 4 | -------------------------------------------------------------------------------- /vignettes/fitmodelsusingRobStatTM.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/vignettes/fitmodelsusingRobStatTM.pdf -------------------------------------------------------------------------------- /vignettes/PolynomialOptandmOptRhoFunctions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/vignettes/PolynomialOptandmOptRhoFunctions.pdf -------------------------------------------------------------------------------- /vignettes/fitmodelsusingRobStatTM.pdf.asis: -------------------------------------------------------------------------------- 1 | %\VignetteIndexEntry{Using the fit.models Package with RobStatTM} 2 | %\VignetteEngine{R.rsp::asis} 3 | 4 | -------------------------------------------------------------------------------- /vignettes/OptimalBiasRobustRegressionPsiandRho.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/msalibian/RobStatTM/HEAD/vignettes/OptimalBiasRobustRegressionPsiandRho.pdf -------------------------------------------------------------------------------- /vignettes/OptimalBiasRobustRegressionPsiandRho.pdf.asis: -------------------------------------------------------------------------------- 1 | %\VignetteIndexEntry{Optimal Bias Robust Regression Psi and Rho} 2 | %\VignetteEngine{R.rsp::asis} 3 | 4 | -------------------------------------------------------------------------------- /vignettes/PolynomialOptandmOptRhoFunctions.pdf.asis: -------------------------------------------------------------------------------- 1 | %\VignetteIndexEntry{Polynomial Opt and mOpt Rho Functions in RobStatTM} 2 | %\VignetteEngine{R.rsp::asis} 3 | 4 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^Meta$ 2 | ^doc$ 3 | ^LICENSE\.md$ 4 | ^.*\.Rproj$ 5 | ^\.Rproj\.user$ 6 | vignettes/VignetteRobStatTM.lyx 7 | vignettes/VignetteRobStatTM.R 8 | ^README\.* 9 | sandbox 10 | -------------------------------------------------------------------------------- /R/neuralgia.R: -------------------------------------------------------------------------------- 1 | #' Neuralgia data 2 | #' 3 | #' Neuralgia data. More details here. 4 | #' 5 | #' @docType data 6 | #' 7 | #' @usage data(neuralgia) 8 | #' 9 | #' @format An object of class \code{"data.frame"}. 10 | #' 11 | #' @references References go here. 12 | #' 13 | #' @source Source goes here. 14 | #' 15 | #' @examples 16 | #' data(neuralgia) 17 | "neuralgia" -------------------------------------------------------------------------------- /R/biochem.R: -------------------------------------------------------------------------------- 1 | #' Biochem data 2 | #' 3 | #' Two biochemical measurements on 12 men with similar weights. 4 | #' 5 | #' Format: Numeric, 12 rows, two columns 6 | #' 7 | #' @docType data 8 | #' 9 | #' @usage data(biochem) 10 | #' 11 | #' @format An object of class \code{"data.frame"}. 12 | #' 13 | #' @source Seber, G.A.F. (1984), Multivariate Observations. New York: John Wiley. 14 | #' 15 | #' @examples 16 | #' data(biochem) 17 | "biochem" 18 | -------------------------------------------------------------------------------- /R/oats.R: -------------------------------------------------------------------------------- 1 | #' Oats data 2 | #' 3 | #' Yield of grain for eight varieties of oats in five replications of a randomized-block experiment 4 | #' 5 | #' Format: Two-way ANOVA table with 8 rows and 5 columns. 6 | #' 7 | #' @docType data 8 | #' 9 | #' @usage data(oats) 10 | #' 11 | #' @format An object of class \code{"data.frame"}. 12 | #' 13 | #' @references References go here. 14 | #' 15 | #' @source Scheffe, H. (1959), Analysis of Variance. New York: John Wiley. 16 | #' 17 | #' @examples 18 | #' data(oats) 19 | "oats" 20 | -------------------------------------------------------------------------------- /man/neuralgia.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/neuralgia.R 3 | \docType{data} 4 | \name{neuralgia} 5 | \alias{neuralgia} 6 | \title{Neuralgia data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Source goes here. 12 | } 13 | \usage{ 14 | data(neuralgia) 15 | } 16 | \description{ 17 | Neuralgia data. More details here. 18 | } 19 | \examples{ 20 | data(neuralgia) 21 | } 22 | \references{ 23 | References go here. 24 | } 25 | \keyword{datasets} 26 | -------------------------------------------------------------------------------- /R/flour.R: -------------------------------------------------------------------------------- 1 | #' Flour data 2 | #' 3 | #' Determinations of the copper content in wholemeal flour 4 | #' (in parts per million), sorted in ascending order. 5 | #' Format: numeric vector of size 24. 6 | #' 7 | #' @docType data 8 | #' 9 | #' @usage data(flour) 10 | #' 11 | #' @format An object of class \code{"data.frame"}. 12 | #' 13 | #' @references References go here. 14 | #' 15 | #' @source Analytical Methods Committee (1989), Robust statistics-How not to reject 16 | #' outliers, Analyst, 114, 1693-1702. 17 | #' 18 | #' @examples 19 | #' data(flour) 20 | "flour" 21 | -------------------------------------------------------------------------------- /R/image.R: -------------------------------------------------------------------------------- 1 | #' Image data 2 | #' 3 | #' These data are part of a synthetic aperture satellite radar image corresponding 4 | #' to a suburb of Munich, and contain the values corresponding to three frequency 5 | #' bands for each of 1573 pixels of a radar image. 6 | #' 7 | #' Format: 1573 cases and 3 variables. 8 | #' 9 | #' @docType data 10 | #' 11 | #' @usage data(image) 12 | #' 13 | #' @format An object of class \code{"data.frame"}. 14 | #' 15 | #' @source Source: Frery, A. (2005), Personal communication. 16 | #' 17 | #' @examples 18 | #' data(image) 19 | "image" 20 | -------------------------------------------------------------------------------- /man/print.lsRobTest.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/print.lsRobTest.R 3 | \name{print.lsRobTest} 4 | \alias{print.lsRobTest} 5 | \title{Print an lsRobTest Object} 6 | \usage{ 7 | \method{print}{lsRobTest}(x, digits = 4, ...) 8 | } 9 | \arguments{ 10 | \item{x}{lmrobdetMM fitted model object} 11 | 12 | \item{digits}{significant digits printed, default digits = 4} 13 | 14 | \item{...}{pass through parameters} 15 | } 16 | \value{ 17 | print selected components of lmrobdetMM object 18 | } 19 | \description{ 20 | Print an lsRobTest Object 21 | } 22 | -------------------------------------------------------------------------------- /man/biochem.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/biochem.R 3 | \docType{data} 4 | \name{biochem} 5 | \alias{biochem} 6 | \title{Biochem data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Seber, G.A.F. (1984), Multivariate Observations. New York: John Wiley. 12 | } 13 | \usage{ 14 | data(biochem) 15 | } 16 | \description{ 17 | Two biochemical measurements on 12 men with similar weights. 18 | } 19 | \details{ 20 | Format: Numeric, 12 rows, two columns 21 | } 22 | \examples{ 23 | data(biochem) 24 | } 25 | \keyword{datasets} 26 | -------------------------------------------------------------------------------- /R/resex.R: -------------------------------------------------------------------------------- 1 | #' Resex data 2 | #' 3 | #' A monthly series of inward movement of residential telephone extensions in a 4 | #' fixed geographic area from January 1966 to May 1973. 5 | #' 6 | #' Format: numeric vector of size 89. 7 | #' 8 | #' @docType data 9 | #' 10 | #' @usage data(resex) 11 | #' 12 | #' @format An object of class \code{"data.frame"}. 13 | #' 14 | #' @references Brubacher. S.R. (1974), Time series outlier detection and modeling 15 | #' with interpolation, Bell Laboratories Technical Memo. 16 | #' 17 | #' @source Source Engineering, 2nd. Edition, New York, John Wiley. 18 | #' 19 | #' @examples 20 | #' data(resex) 21 | "resex" 22 | -------------------------------------------------------------------------------- /man/oats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/oats.R 3 | \docType{data} 4 | \name{oats} 5 | \alias{oats} 6 | \title{Oats data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Scheffe, H. (1959), Analysis of Variance. New York: John Wiley. 12 | } 13 | \usage{ 14 | data(oats) 15 | } 16 | \description{ 17 | Yield of grain for eight varieties of oats in five replications of a randomized-block experiment 18 | } 19 | \details{ 20 | Format: Two-way ANOVA table with 8 rows and 5 columns. 21 | } 22 | \examples{ 23 | data(oats) 24 | } 25 | \references{ 26 | References go here. 27 | } 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /man/flour.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/flour.R 3 | \docType{data} 4 | \name{flour} 5 | \alias{flour} 6 | \title{Flour data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Analytical Methods Committee (1989), Robust statistics-How not to reject 12 | outliers, Analyst, 114, 1693-1702. 13 | } 14 | \usage{ 15 | data(flour) 16 | } 17 | \description{ 18 | Determinations of the copper content in wholemeal flour 19 | (in parts per million), sorted in ascending order. 20 | Format: numeric vector of size 24. 21 | } 22 | \examples{ 23 | data(flour) 24 | } 25 | \references{ 26 | References go here. 27 | } 28 | \keyword{datasets} 29 | -------------------------------------------------------------------------------- /R/mineral.R: -------------------------------------------------------------------------------- 1 | #' Mineral data 2 | #' 3 | #' Contents (in parts per million) of 22 chemical elements in 4 | #' 53 samples of rocks in Western Australia. 5 | #' Two columns (8 and 9) were selected for use in this book. 6 | #' 7 | #' Format: Numeric with 53 rows and 2 columns: 8 | #' 9 | #' @docType data 10 | #' 11 | #' @usage data(mineral) 12 | #' 13 | #' @format An object of class \code{"data.frame"}. 14 | #' 15 | #' @source Smith, R.E., Campbell, N.A. and Lichfield, A. (1984), Multivariate 16 | #' statistical techniques applied to pisolitic laterite geochemistry at Golden Grove, 17 | #' Western Australia, Journal of Geochemical Exploration, 22, 193-216. 18 | #' 19 | #' @examples 20 | #' data(mineral) 21 | "mineral" 22 | -------------------------------------------------------------------------------- /man/image.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/image.R 3 | \docType{data} 4 | \name{image} 5 | \alias{image} 6 | \title{Image data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Source: Frery, A. (2005), Personal communication. 12 | } 13 | \usage{ 14 | data(image) 15 | } 16 | \description{ 17 | These data are part of a synthetic aperture satellite radar image corresponding 18 | to a suburb of Munich, and contain the values corresponding to three frequency 19 | bands for each of 1573 pixels of a radar image. 20 | } 21 | \details{ 22 | Format: 1573 cases and 3 variables. 23 | } 24 | \examples{ 25 | data(image) 26 | } 27 | \keyword{datasets} 28 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | Meta 2 | doc 3 | # History files 4 | .Rhistory 5 | .Rapp.history 6 | 7 | # Session Data files 8 | .RData 9 | # Example code in package build process 10 | *-Ex.R 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | # Output files from R CMD check 14 | /*.Rcheck/ 15 | # RStudio files 16 | .Rproj.user/ 17 | *.Rproj 18 | # produced vignettes 19 | vignettes/*.html 20 | vignettes/*.log 21 | vignettes/*.tex 22 | vignettes/*.gz 23 | vignettes/*.aux 24 | vignettes/*.out 25 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 26 | .httr-oauth 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | # Temporary files created by R markdown 31 | *.utf8.md 32 | *.knit.md 33 | .Rproj.user 34 | -------------------------------------------------------------------------------- /inst/scripts/ExactFit.R: -------------------------------------------------------------------------------- 1 | # ExactFit.R 2 | # EXAMPLE 5.5 3 | 4 | library(RobStatTM) 5 | set.seed(1003) 6 | n <- 100 7 | m <- 50 8 | rr <- rnorm(m) 9 | x1 <- sort(rnorm(n)) 10 | x2 <- sort(rr)*2 11 | sig <- 0.1 12 | y1 <- x1 + sig*rnorm(n) # "good" data 13 | y2 <- -x2 + sig*rnorm(m) # outliers 14 | x <- c(x1,x2) 15 | y <- c(y1,y2) 16 | out1 <- lm(y~x) # LSE 17 | out2 <- lmrobdetMM(y~x) #MM 18 | # lmrobdetMM uses the default family = "mopt" and efficiency = .95 19 | 20 | plot(y ~ x, pch=19, col='gray30') 21 | abline(out1, lwd=3, col='blue3') 22 | abline(out2, lwd=3, col='red3') 23 | text(c(-3.5,3),c(0,2),c("LS","MM"), cex=1.3, col=c('blue3', 'red3')) 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /man/resex.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/resex.R 3 | \docType{data} 4 | \name{resex} 5 | \alias{resex} 6 | \title{Resex data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Source Engineering, 2nd. Edition, New York, John Wiley. 12 | } 13 | \usage{ 14 | data(resex) 15 | } 16 | \description{ 17 | A monthly series of inward movement of residential telephone extensions in a 18 | fixed geographic area from January 1966 to May 1973. 19 | } 20 | \details{ 21 | Format: numeric vector of size 89. 22 | } 23 | \examples{ 24 | data(resex) 25 | } 26 | \references{ 27 | Brubacher. S.R. (1974), Time series outlier detection and modeling 28 | with interpolation, Bell Laboratories Technical Memo. 29 | } 30 | \keyword{datasets} 31 | -------------------------------------------------------------------------------- /R/skin.R: -------------------------------------------------------------------------------- 1 | #' Skin data 2 | #' 3 | #' These data correspond to a study of the relationship between air inspiration and 4 | #' blood circulation in the skin. 5 | #' 6 | #' Description: The covariates are the logarithms of the volume of air inspired (log VOL) 7 | #' and of the inspiration rate (log RATE). The response (column 3) is the presence or 8 | #' absence of vasoconstriction of the skin of the digits after air inspiration. Format 9 | #' Numeric, 23 rows and 3 columns. 10 | #' 11 | #' @docType data 12 | #' 13 | #' @usage data(skin) 14 | #' 15 | #' @format An object of class \code{"data.frame"}. 16 | #' 17 | #' @source Finney, D.J. (1947), The estimation from individual records of the relationship 18 | #' between dose and quantal response, Biometrika, 34, 320-334. 19 | #' 20 | #' @examples 21 | #' data(skin) 22 | "skin" 23 | -------------------------------------------------------------------------------- /R/shock.R: -------------------------------------------------------------------------------- 1 | #' Shock data 2 | #' 3 | #' Times recorded for a rat to go through a shuttlebox in successive attempts. If the 4 | #' time exceeded 5 seconds, the rat received an electric shock for the duration of the 5 | #' next attempt. The data are the number of shocks received and the average time for 6 | #' all attempts between shocks. 7 | #' 8 | #' Format: Numeric matrix with 16 rows and 2 columns 9 | #' @docType data 10 | #' 11 | #' @usage data(shock) 12 | #' 13 | #' @format An object of class \code{"data.frame"}. 14 | #' 15 | #' @references References go here. 16 | #' 17 | #' @source Bond, N.W. (1979), Impairment of shuttlebox avoidance-learning following repeated 18 | #' alcohol withdrawal episodes in rats, Pharmacology, Biochemistry and Behavior, 19 | #' 11, 589-591. 20 | #' 21 | #' @examples 22 | #' data(shock) 23 | "shock" 24 | -------------------------------------------------------------------------------- /R/stackloss.R: -------------------------------------------------------------------------------- 1 | #' Stackloss data 2 | #' 3 | #' Observations from 21 days operation of a plant for the oxidation 4 | #' of ammonia as a stage in the production of nitric acid. 5 | #' 6 | #' Format: 21 cases and 4 continuous variables. 7 | #' Description: The columns are: 8 | #' 1. air flow 9 | #' 2. cooling water inlet temperature (C) 10 | #' 3. acid concentration (%) 11 | #' 4. Stack loss, defined as the percentage of ingoing ammonia 12 | #' that escapes unabsorbed (response) 13 | #' 14 | #' @docType data 15 | #' 16 | #' @usage data(stackloss) 17 | #' 18 | #' @format An object of class \code{"data.frame"}. 19 | #' 20 | #' @source Brownlee, K.A. (1965), Statistical Theory and Methodology in Science and 21 | #' Engineering, 2nd Edition, New York: John Wiley & Sons, Inc. 22 | #' 23 | #' @examples 24 | #' data(stackloss) 25 | "stackloss" 26 | -------------------------------------------------------------------------------- /inst/scripts/biochem.R: -------------------------------------------------------------------------------- 1 | # biochem.R 2 | # EXAMPLE 6.1 3 | # Figures 6.1, 6.2 4 | # Table 6.1 5 | 6 | data(biochem, package='RobStatTM') 7 | X <- as.matrix(biochem) 8 | colnames(X) <- c('Phosphate', 'Chloride') 9 | plot(X, pch=19, main='Biochem Data scatterplot') 10 | text(.95,4.4,"3",col = "black",cex = 1.2) 11 | 12 | qqnorm(X[,1], pch=19) 13 | qqline(X[,1], lwd=2, col='gray20') 14 | 15 | mu <- colMeans(X) 16 | cov.mat <- var(X) 17 | vv <- diag(cov.mat) 18 | rho <- cor(X)[1,2] 19 | a <- cbind(t(mu), t(vv), rho) 20 | X3 <- X[-3,] #delete obs. 3 and recompute 21 | mu2 <- colMeans(X3) 22 | cov.mat2 <- var(X3) 23 | vv2 <- diag(cov.mat2) 24 | rho2 <- cor(X3)[1,2] 25 | a2 <- cbind(t(mu2), t(vv2), rho2) 26 | print("Means Vars, Correl") 27 | print(round(rbind(a,a2),2)) 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /man/mineral.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/mineral.R 3 | \docType{data} 4 | \name{mineral} 5 | \alias{mineral} 6 | \title{Mineral data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Smith, R.E., Campbell, N.A. and Lichfield, A. (1984), Multivariate 12 | statistical techniques applied to pisolitic laterite geochemistry at Golden Grove, 13 | Western Australia, Journal of Geochemical Exploration, 22, 193-216. 14 | } 15 | \usage{ 16 | data(mineral) 17 | } 18 | \description{ 19 | Contents (in parts per million) of 22 chemical elements in 20 | 53 samples of rocks in Western Australia. 21 | Two columns (8 and 9) were selected for use in this book. 22 | } 23 | \details{ 24 | Format: Numeric with 53 rows and 2 columns: 25 | } 26 | \examples{ 27 | data(mineral) 28 | } 29 | \keyword{datasets} 30 | -------------------------------------------------------------------------------- /R/leuk.dat.R: -------------------------------------------------------------------------------- 1 | #' Leukemia Data 2 | #' 3 | #' Records for 33 leukemia patients. 4 | #' 5 | #' Description: The following features are present: 6 | #' \code{wbc}: white blood cell count; 7 | #' \code{ag}: presence or absence of a certain 8 | #' morphological characteristic in the white cells; and 9 | #' \code{y}: binary response 10 | #' variable, equals \code{1} if the patient survives more than 52 weeks, \code{0} otherwise. 11 | #' 12 | #' Format: Numeric, 33 rows and 3 columns. 13 | #' 14 | #' @docType data 15 | #' 16 | #' @usage data(leuk.dat) 17 | #' 18 | #' @format An object of class \code{"data.frame"}. 19 | #' 20 | #' @source Cook, R.D. and Weisberg, S. (1982). Residuals and Influence in Regression, Chapman 21 | #' and Hall; Johnson, W. (1985), Influence measures for logistic regression: another point of 22 | #' view, Biometrika, 72, 59-65. 23 | #' 24 | #' @examples 25 | #' data(leuk.dat) 26 | "leuk.dat" 27 | -------------------------------------------------------------------------------- /inst/scripts/MA1-AO.R: -------------------------------------------------------------------------------- 1 | # MA1-AO.R 2 | # Example 8.5 3 | # Figure 8.11, Table 8.4 4 | # Robust fitting of a simulated MA(1) series 5 | 6 | # Must install robustarima 7 | library(robustarima) 8 | 9 | set.seed(200) 10 | n.innov = 300 11 | n = 200 12 | theta=-0.8 13 | 14 | n.start = n.innov - n 15 | innov = rnorm(n.innov) 16 | n.start = n.innov - n 17 | 18 | ma1 = arima.sim(model = list(ma = theta), n=n, innov = innov, n.start = n.start) 19 | 20 | #ma1 <- arima.sim(model=list(ma=c(-.8)),n=200,n.innov=n.innov 21 | mac=ma1 22 | mac[20*(1:10)]=ma1[20*(1:10)]+4 23 | ma1tau=arima.rob(mac~1,q=1) 24 | ma1tau 25 | ma1ls=arima(mac,order=c(0,0,1),method="CSS") 26 | 27 | #Figure 8.21 28 | 29 | plot(1:200, mac, ylab='series', xlab='index', type='l', lty=1 ) 30 | outma=seq(20,200,20) 31 | points(outma, mac[outma]) 32 | lines(1:200, ma1tau$y.robust, lty=2 ) 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /man/huber.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/psiFuns.R 3 | \name{huber} 4 | \alias{huber} 5 | \title{Tuning parameter the rho loss functions} 6 | \usage{ 7 | huber(e) 8 | } 9 | \arguments{ 10 | \item{e}{the desired efficiency of the corresponding regression 11 | estimator for Gaussian errors} 12 | } 13 | \value{ 14 | A length-1 vector with the corresponding tuning constant. 15 | } 16 | \description{ 17 | This function computes the tuning constant that yields an MM-regression 18 | estimator with a desired asymptotic efficiency when computed with a 19 | rho function in the corresponding family. The output of this 20 | function can be passed to the functions \link{lmrobdet.control}, 21 | \link{scaleM} and \link{rho}. 22 | } 23 | \examples{ 24 | # Tuning parameters for an 85\%-efficient M-estimator at a Gaussian model 25 | huber(.95) 26 | 27 | } 28 | \author{ 29 | Kjell Konis 30 | } 31 | -------------------------------------------------------------------------------- /R/glass.R: -------------------------------------------------------------------------------- 1 | #' Glass data 2 | #' 3 | #' Measurements of the presence of seven chemical constituents in 4 | #' 76 pieces of glass from nonfloat car windows. 5 | #' 6 | #' Format: 76 cases and 7 continuous variables. 7 | #' Description: The columns are: 8 | #' 1. RI refractive index 9 | #' 2. Na2O sodium oxide (unit measurement: weight percent in 10 | #' corresponding oxide, as are the rest of attributes) 11 | #' 3. MgO magnesium oxide 12 | #' 4. Al2O3 aluminum oxide 13 | #' 5. SiO2 silcon oxide 14 | #' 6. K2O potassium oxide 15 | #' 7. CaO calcium oxide 16 | #' 17 | #' @docType data 18 | #' 19 | #' @usage data(glass) 20 | #' 21 | #' @format An object of class \code{"data.frame"}. 22 | #' 23 | #' @source Hettich, S. and Bay, S.D. (1999), The UCI KDD Archive 24 | #' http://kdd.ics.uci.edu, Irvine, CA: University of California, 25 | #' Department of Information and Computer Science. 26 | #' 27 | #' @examples 28 | #' data(glass) 29 | "glass" 30 | -------------------------------------------------------------------------------- /man/bisquare.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/psiFuns.R 3 | \name{bisquare} 4 | \alias{bisquare} 5 | \title{Tuning parameter the rho loss functions} 6 | \usage{ 7 | bisquare(e) 8 | } 9 | \arguments{ 10 | \item{e}{the desired efficiency of the corresponding regression 11 | estimator for Gaussian errors} 12 | } 13 | \value{ 14 | A length-1 vector with the corresponding tuning constant. 15 | } 16 | \description{ 17 | This function computes the tuning constant that yields an MM-regression 18 | estimator with a desired asymptotic efficiency when computed with a 19 | rho function in the corresponding family. The output of this 20 | function can be passed to the functions \link{lmrobdet.control}, 21 | \link{scaleM} and \link{rho}. 22 | } 23 | \examples{ 24 | # Tuning parameters for an 85\%-efficient M-estimator at a Gaussian model 25 | bisquare(.85) 26 | 27 | } 28 | \author{ 29 | Kjell Konis 30 | } 31 | -------------------------------------------------------------------------------- /man/skin.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/skin.R 3 | \docType{data} 4 | \name{skin} 5 | \alias{skin} 6 | \title{Skin data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Finney, D.J. (1947), The estimation from individual records of the relationship 12 | between dose and quantal response, Biometrika, 34, 320-334. 13 | } 14 | \usage{ 15 | data(skin) 16 | } 17 | \description{ 18 | These data correspond to a study of the relationship between air inspiration and 19 | blood circulation in the skin. 20 | } 21 | \details{ 22 | Description: The covariates are the logarithms of the volume of air inspired (log VOL) 23 | and of the inspiration rate (log RATE). The response (column 3) is the presence or 24 | absence of vasoconstriction of the skin of the digits after air inspiration. Format 25 | Numeric, 23 rows and 3 columns. 26 | } 27 | \examples{ 28 | data(skin) 29 | } 30 | \keyword{datasets} 31 | -------------------------------------------------------------------------------- /man/opt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/psiFuns.R 3 | \name{opt} 4 | \alias{opt} 5 | \title{Tuning parameter for a rho function in the (asymptotic bias-) optimal family} 6 | \usage{ 7 | opt(e) 8 | } 9 | \arguments{ 10 | \item{e}{the desired efficiency of the corresponding regression 11 | estimator for Gaussian errors} 12 | } 13 | \value{ 14 | A vector with named elements containing the corresponding tuning 15 | parameters. 16 | } 17 | \description{ 18 | This function computes the tuning constant that yields an MM-regression 19 | estimator with a desired asymptotic efficiency when computed with a 20 | rho function in the corresponding family. The output of this 21 | function can be passed to the functions \link{lmrobdet.control}, 22 | \link{scaleM} and \link{rho}. 23 | } 24 | \examples{ 25 | # Tuning parameters for an 85\%-efficient M-estimator at a Gaussian model 26 | opt(.85) 27 | 28 | } 29 | \author{ 30 | Kjell Konis 31 | } 32 | -------------------------------------------------------------------------------- /man/stackloss.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/stackloss.R 3 | \docType{data} 4 | \name{stackloss} 5 | \alias{stackloss} 6 | \title{Stackloss data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Brownlee, K.A. (1965), Statistical Theory and Methodology in Science and 12 | Engineering, 2nd Edition, New York: John Wiley & Sons, Inc. 13 | } 14 | \usage{ 15 | data(stackloss) 16 | } 17 | \description{ 18 | Observations from 21 days operation of a plant for the oxidation 19 | of ammonia as a stage in the production of nitric acid. 20 | } 21 | \details{ 22 | Format: 21 cases and 4 continuous variables. 23 | Description: The columns are: 24 | 1. air flow 25 | 2. cooling water inlet temperature (C) 26 | 3. acid concentration (%) 27 | 4. Stack loss, defined as the percentage of ingoing ammonia 28 | that escapes unabsorbed (response) 29 | } 30 | \examples{ 31 | data(stackloss) 32 | } 33 | \keyword{datasets} 34 | -------------------------------------------------------------------------------- /R/alcohol.R: -------------------------------------------------------------------------------- 1 | #' Alcohol data 2 | #' 3 | #' This data set contains physicochemical characteristics of 44 aliphatic alcohols. 4 | #' The aim of the experiment was the prediction of the solubility on the basis of 5 | #' molecular descriptors. 6 | #' 7 | #' Format: 44 cases and 7 continuous variables. 8 | #' The columns are: 9 | #' 1. SAG=solvent accessible surface-bounded molecular volume 10 | #' 2. V=volume 11 | #' 3. log PC (PC=octanol–water partitions coefficient) 12 | #' 4. P=polarizability 13 | #' 5. RM=molar refractivity 14 | #' 6. Mass 15 | #' 7. log(Solubility) (response) 16 | #' 17 | #' @docType data 18 | #' 19 | #' @usage data(alcohol) 20 | #' 21 | #' @format An object of class \code{"data.frame"}. 22 | #' 23 | #' @source Romanelli, G.P., Martino, C.M. and Castro, E.A. (2001), Modeling the 24 | #' solubility of aliphatic alcohols via molecular descriptors, Journal of the Chemical 25 | #' Society of Pakistan, 23, 195-199. 26 | #' 27 | #' @examples 28 | #' data(alcohol) 29 | "alcohol" 30 | -------------------------------------------------------------------------------- /man/optv0.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/psiFuns.R 3 | \name{optv0} 4 | \alias{optv0} 5 | \title{Tuning parameter for a rho function in the (asymptotic bias-) optimal family} 6 | \usage{ 7 | optv0(e) 8 | } 9 | \arguments{ 10 | \item{e}{the desired efficiency of the corresponding regression 11 | estimator for Gaussian errors} 12 | } 13 | \value{ 14 | A vector with named elements containing the corresponding tuning 15 | parameters. 16 | } 17 | \description{ 18 | This function computes the tuning constant that yields an MM-regression 19 | estimator with a desired asymptotic efficiency when computed with a 20 | rho function in the corresponding family. The output of this 21 | function can be passed to the functions \link{lmrobdet.control}, 22 | \link{scaleM} and \link{rho}. 23 | } 24 | \examples{ 25 | # Tuning parameters for an 85\%-efficient M-estimator at a Gaussian model 26 | optv0(.85) 27 | 28 | } 29 | \author{ 30 | Kjell Konis 31 | } 32 | -------------------------------------------------------------------------------- /man/shock.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/shock.R 3 | \docType{data} 4 | \name{shock} 5 | \alias{shock} 6 | \title{Shock data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Bond, N.W. (1979), Impairment of shuttlebox avoidance-learning following repeated 12 | alcohol withdrawal episodes in rats, Pharmacology, Biochemistry and Behavior, 13 | 11, 589-591. 14 | } 15 | \usage{ 16 | data(shock) 17 | } 18 | \description{ 19 | Times recorded for a rat to go through a shuttlebox in successive attempts. If the 20 | time exceeded 5 seconds, the rat received an electric shock for the duration of the 21 | next attempt. The data are the number of shocks received and the average time for 22 | all attempts between shocks. 23 | } 24 | \details{ 25 | Format: Numeric matrix with 16 rows and 2 columns 26 | } 27 | \examples{ 28 | data(shock) 29 | } 30 | \references{ 31 | References go here. 32 | } 33 | \keyword{datasets} 34 | -------------------------------------------------------------------------------- /man/mopt.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/psiFuns.R 3 | \name{mopt} 4 | \alias{mopt} 5 | \title{Tuning parameter for a rho function in the modified (asymptotic bias-) optimal family} 6 | \usage{ 7 | mopt(e) 8 | } 9 | \arguments{ 10 | \item{e}{the desired efficiency of the corresponding regression 11 | estimator for Gaussian errors} 12 | } 13 | \value{ 14 | A vector with named elements containing the corresponding tuning 15 | parameters. 16 | } 17 | \description{ 18 | This function computes the tuning constant that yields an MM-regression 19 | estimator with a desired asymptotic efficiency when computed with a 20 | rho function in the corresponding family. The output of this 21 | function can be passed to the functions \link{lmrobdet.control}, 22 | \link{scaleM} and \link{rho}. 23 | } 24 | \examples{ 25 | # Tuning parameters for an 85\%-efficient M-estimator at a Gaussian model 26 | mopt(.85) 27 | 28 | } 29 | \author{ 30 | Kjell Konis 31 | } 32 | -------------------------------------------------------------------------------- /man/moptv0.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/psiFuns.R 3 | \name{moptv0} 4 | \alias{moptv0} 5 | \title{Tuning parameter for a rho function in the modified (asymptotic bias-) optimal family} 6 | \usage{ 7 | moptv0(e) 8 | } 9 | \arguments{ 10 | \item{e}{the desired efficiency of the corresponding regression 11 | estimator for Gaussian errors} 12 | } 13 | \value{ 14 | A vector with named elements containing the corresponding tuning 15 | parameters. 16 | } 17 | \description{ 18 | This function computes the tuning constant that yields an MM-regression 19 | estimator with a desired asymptotic efficiency when computed with a 20 | rho function in the corresponding family. The output of this 21 | function can be passed to the functions \link{lmrobdet.control}, 22 | \link{scaleM} and \link{rho}. 23 | } 24 | \examples{ 25 | # Tuning parameters for an 85\%-efficient M-estimator at a Gaussian model 26 | moptv0(.85) 27 | 28 | } 29 | \author{ 30 | Kjell Konis 31 | } 32 | -------------------------------------------------------------------------------- /R/hearing.R: -------------------------------------------------------------------------------- 1 | #' Hearing data 2 | #' 3 | #' Prevalence rates in percent for men aged 55–64 with hearing levels 4 | #' 16 decibels or more above the audiometric zero. 5 | #' 6 | #' Format: Two-way ANOVA. 7 | #' Description: The rows correspond to different frequencies and to normal speech. 8 | #' 1. 500 hertz 9 | #' 2. 1000 hertz 10 | #' 3. 2000 hertz 11 | #' 4. 3000 hertz 12 | #' 5. 4000 hertz 13 | #' 6. 6000 hertz 14 | #' 7. Normal speech 15 | #' The columns classify the data in seven occupational groups: 16 | #' 1. professional–managerial 17 | #' 2. farm 18 | #' 3. clerical sales 19 | #' 4. craftsmen 20 | #' 5. operatives 21 | #' 6. service 22 | #' 7. laborers 23 | #' 24 | #' @docType data 25 | #' 26 | #' @usage data(hearing) 27 | #' 28 | #' @format An object of class \code{"data.frame"}. 29 | #' 30 | #' @source Roberts, J. and Cohrssen, J. (1968), Hearing 31 | #' levels of adults, US National Center for Health Statistics Publications, 32 | #' Series 11, No. 31 33 | #' 34 | #' @examples 35 | #' data(hearing) 36 | "hearing" 37 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | # #' @import fit.models 2 | 3 | # # Old Imports: 4 | # # Imports: stats, graphics, utils, methods, pyinit, rrcov, robustbase 5 | 6 | 7 | .onLoad <- function(libname, pkgname) { 8 | # ##--------------- begin {fit.models} ----------------- 9 | # requireNamespace("fit.models") 10 | # FM.add.class <- fit.models::fmclass.add.class 11 | # FM.register <- fit.models::fmclass.register 12 | # 13 | # FM.add.class("lmfm", "lmrobM", warn = F) 14 | # FM.add.class("lmfm", "lmrobdetMM", warn = F) 15 | # FM.add.class("lmfm", "lmrobdetDCML", warn = F) 16 | # 17 | # FM.add.class("covfm", "covClassic", warn = F) 18 | # FM.add.class("covfm", "covRob", warn = F) 19 | # 20 | # FM.register(fmclass = "pcompfm", 21 | # classes = c("prcomp", "prcompRob"), 22 | # validation.function = NULL) 23 | # ##--------------- end {fit.models} ------------------- 24 | 25 | invisible() 26 | } 27 | 28 | #.onUnload <- function (libpath) { 29 | # library.dynam.unload("RobStatTM", libpath) 30 | #} 31 | -------------------------------------------------------------------------------- /inst/scripts/algae.R: -------------------------------------------------------------------------------- 1 | # algae.R 2 | # EXAMPLE 5.4 3 | # Figures 5.14-5.15 4 | # Table 5.4 5 | 6 | library(RobStatTM) 7 | data(algae) 8 | 9 | #Robust fit 10 | cont <- lmrobdet.control(bb = 0.5, efficiency = 0.85, family = "bisquare") 11 | 12 | # We now recommend to use family "mopt" with efficiency = 0.95 as defaults. 13 | # Using that as in next line results in almost no change in Figure 5.15 14 | # cont <- lmrobdet.control(bb = 0.5, efficiency = 0.95, family = "mopt") 15 | algaerob <- lmrobdetMM(V12 ~ ., data=algae, control=cont) 16 | 17 | #LS fit 18 | algaels <- lm(V12 ~ ., data=algae) 19 | 20 | #LS fit without outliers 21 | algaelsd <- lm(V12 ~ ., data=algae, subset= -c(36, 77)) 22 | 23 | #----------------------------------------------------- 24 | #Fig 5.14 25 | plot(algaels, which=2, pch=19) 26 | abline(h=c(-2.5, 0, 2.5), lty=2) 27 | 28 | #------------------------------------------------------- 29 | #Fig 5.15 30 | plot(algaerob, which=2, id.n=2, pch=19) 31 | abline(h=c(-2.5, 0, 2.5)*algaerob$scale, lty=2) 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /inst/scripts/flour.R: -------------------------------------------------------------------------------- 1 | library(RobStatTM) 2 | options(digits=4) 3 | 4 | #Trimmed mean 5 | trimean<-function(x,alfa) { 6 | n=length(x); m=floor(n*alfa) 7 | xs=sort(x); mu=mean(xs[(m+1):(n-m)]) 8 | xs=xs-mu; A=m*xs[m]^2 +m*xs[n-m+1]^2 +sum(xs[(m+1):(n-m)]^2) 9 | mu.std=A/(n-2*m); mu.std=sqrt(mu.std/n) 10 | return(list(mu=mu, mu.std=mu.std)) 11 | } 12 | 13 | n=24; qn=qnorm(0.975) 14 | 15 | data(flour) 16 | x = as.vector(flour[,1]) 17 | resu = locScaleM(x,eff = 0.95) 18 | 19 | muM=resu$mu; muMst=resu$std.mu; h=muMst*qn 20 | interM=c(muM-h, muM+h) 21 | 22 | xbar=mean(x); smed=sd(x)/sqrt(n); h=smed*qn 23 | intermean=c(xbar-h,xbar+h) 24 | 25 | resu=trimean(x,0.25) 26 | mu25=resu$mu; ss25=resu$mu.std; h=ss25*qn 27 | inter25=c(mu25-h,mu25+h) 28 | 29 | # Table 2.4 30 | print("Mean, bisquare M- estimator, and 25% trimmed mean") 31 | print(c(xbar,muM,mu25)) 32 | print("Their estimated standard deviations") 33 | print(c(smed, muMst, ss25)) 34 | 35 | print("Their 0.95 confidence intervals") 36 | print(rbind(intermean, interM, inter25)) 37 | -------------------------------------------------------------------------------- /inst/scripts/step.R: -------------------------------------------------------------------------------- 1 | # step.R 2 | # EXAMPLE 5.3 3 | 4 | # NOTE: The sequence of models in Table 5.2 of the book is correct, 5 | # but the RFPE values are wrong, and the ones computed below are correct. 6 | 7 | library(RobStatTM) 8 | 9 | cont <- lmrobdet.control(bb = 0.5, efficiency = 0.85, family = "bisquare") 10 | 11 | # We now recommend to use family "mopt" with efficiency = .95 as defaults 12 | # Using those defaults in the line above results in slightly different RFPE 13 | # numbers in the object out, but results in the same model selection 14 | 15 | set.seed(300) 16 | X <- matrix(rnorm(50*6), 50, 6) 17 | beta <- c(1,1,1,0,0,0) 18 | y <- as.vector(X %*% beta) + 1 + rnorm(50) 19 | y[1:6] <- seq(30, 55, 5) 20 | 21 | for (i in 1:6) X[i,] <- c(X[i,1:3],i/2,i/2,i/2) 22 | Z <- cbind(y,X) 23 | Z <- as.data.frame(Z) 24 | obj <- lmrobdetMM(y ~ ., data=Z, control=cont) 25 | out <- step.lmrobdetMM(obj) 26 | 27 | obj2 <- lm(y ~ ., data=Z) 28 | out2 <- step(obj2) 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | -------------------------------------------------------------------------------- /man/leuk.dat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/leuk.dat.R 3 | \docType{data} 4 | \name{leuk.dat} 5 | \alias{leuk.dat} 6 | \title{Leukemia Data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Cook, R.D. and Weisberg, S. (1982). Residuals and Influence in Regression, Chapman 12 | and Hall; Johnson, W. (1985), Influence measures for logistic regression: another point of 13 | view, Biometrika, 72, 59-65. 14 | } 15 | \usage{ 16 | data(leuk.dat) 17 | } 18 | \description{ 19 | Records for 33 leukemia patients. 20 | } 21 | \details{ 22 | Description: The following features are present: 23 | \code{wbc}: white blood cell count; 24 | \code{ag}: presence or absence of a certain 25 | morphological characteristic in the white cells; and 26 | \code{y}: binary response 27 | variable, equals \code{1} if the patient survives more than 52 weeks, \code{0} otherwise. 28 | 29 | Format: Numeric, 33 rows and 3 columns. 30 | } 31 | \examples{ 32 | data(leuk.dat) 33 | } 34 | \keyword{datasets} 35 | -------------------------------------------------------------------------------- /inst/scripts/wine.R: -------------------------------------------------------------------------------- 1 | # wine.R 2 | # EXAMPLE 6.2 3 | # Figure 6.3 4 | 5 | library(RobStatTM) 6 | data(wine) 7 | X <- as.matrix(wine) 8 | xbar <- colMeans(X) 9 | C <- cov(X) 10 | disC <- mahalanobis(X, xbar, C) 11 | resu <- covRobMM(X) 12 | mu <- resu$mu 13 | V <- resu$V 14 | disM <- mahalanobis(X, mu, V) 15 | 16 | resu <- covRobRocke(X); 17 | mu <- resu$mu 18 | V <- resu$V 19 | disR <- mahalanobis(X,mu,V) 20 | 21 | #Figure 6.3 22 | par(mfrow=c(2,2)) 23 | plot(disC, xlab="index", ylab="Distances", main = "Classical", cex.main=0.9, pch=19) 24 | plot(qchisq(ppoints(59),13),sort(disC),xlab="chi squared quantiles", ylab="Sorted distances", main ="Classical", cex.main=0.9, pch=19) 25 | lines(sort(disC),sort(disC)) 26 | plot(disM, xlab="index", ylab="Distances", main = "Robust", cex.main=0.9, pch=19) 27 | plot(qchisq(ppoints(59),13),sort(disM),xlab="chi squared quantiles", ylab="Sorted distances", main="Robust", cex.main=0.9, pch=19) 28 | lines(sort(disM),sort(disM)) 29 | par(mfrow=c(1,1)) 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | -------------------------------------------------------------------------------- /R/print.lsRobTest.R: -------------------------------------------------------------------------------- 1 | #' Print an lsRobTest Object 2 | #' 3 | #' @param x lmrobdetMM fitted model object 4 | #' @param digits significant digits printed, default digits = 4 5 | #' @param ... pass through parameters 6 | #' 7 | #' @returns print selected components of lmrobdetMM object 8 | #' @export 9 | #' 10 | print.lsRobTest <- function(x, digits = 4, ...) 11 | { 12 | cat("Test for least squares bias\n") 13 | if(x$test == "T1") 14 | cat("H0: normal regression error distribution\n") 15 | if(x$test == "T2") 16 | cat("H0: composite normal/non-normal regression error distribution\n") 17 | 18 | cat("\n") 19 | cat("Individual coefficient tests:\n") 20 | print(format(as.data.frame(x$coefs), digits = digits, ...)) 21 | cat("\n") 22 | cat("Joint test for bias:\n") 23 | cat("Test statistic: ") 24 | cat(format(x$full$stat, digits = digits, ...)) 25 | cat(" on ") 26 | cat(format(x$full$df, digits = digits, ...)) 27 | cat(" DF, p-value: ") 28 | cat(format(x$full$p.value, digits = digits, ...)) 29 | cat("\n") 30 | 31 | invisible(x) 32 | } -------------------------------------------------------------------------------- /inst/scripts/ar1.R: -------------------------------------------------------------------------------- 1 | # ar1.R 2 | # Simulated AR1 with AO and IO 3 | # AR(1) with AO and IO 4 | # Figure 8.6 5 | 6 | # Must install robustarima 7 | library(robustarima) 8 | 9 | set.seed(1000) 10 | n.innov = 200 11 | n= 100 12 | phi=0.9 13 | 14 | n.start = n.innov - n 15 | innov = rnorm(n.innov) 16 | 17 | x= arima.sim(model = list(ar = phi), n, innov = innov, n.start = n.start) 18 | 19 | #10% of additive outliers of equistant outliers 20 | 21 | ao=rep(0,n) 22 | tt=seq(10,100,10) 23 | ao[tt]=4 24 | xAO=x +ao 25 | #innovation outlier at observation 26 | xIO=x 27 | xIO[1:49]=x[1:49] 28 | xIO[50]=phi*xIO[49]+10 29 | u=rnorm(50) 30 | for (i in 51:100) 31 | xIO[i]=phi*xIO[i-1] +u[i-50] 32 | 33 | par(mfrow=c(3,1)) 34 | plot(x) 35 | title("Gaussian AR(1) series without outliers") 36 | plot(xAO) 37 | points(tt,xAO[tt],pch=1) 38 | title("Gaussian AR(1) series with 10% of additive outliers") 39 | plot(xIO) 40 | points(50,xIO[50] ,pch=1 ) 41 | title("Gaussian AR(1) series with one innovation outlier") 42 | par(mfrow=c(1,1)) 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /man/glass.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/glass.R 3 | \docType{data} 4 | \name{glass} 5 | \alias{glass} 6 | \title{Glass data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Hettich, S. and Bay, S.D. (1999), The UCI KDD Archive 12 | http://kdd.ics.uci.edu, Irvine, CA: University of California, 13 | Department of Information and Computer Science. 14 | } 15 | \usage{ 16 | data(glass) 17 | } 18 | \description{ 19 | Measurements of the presence of seven chemical constituents in 20 | 76 pieces of glass from nonfloat car windows. 21 | } 22 | \details{ 23 | Format: 76 cases and 7 continuous variables. 24 | Description: The columns are: 25 | 1. RI refractive index 26 | 2. Na2O sodium oxide (unit measurement: weight percent in 27 | corresponding oxide, as are the rest of attributes) 28 | 3. MgO magnesium oxide 29 | 4. Al2O3 aluminum oxide 30 | 5. SiO2 silcon oxide 31 | 6. K2O potassium oxide 32 | 7. CaO calcium oxide 33 | } 34 | \examples{ 35 | data(glass) 36 | } 37 | \keyword{datasets} 38 | -------------------------------------------------------------------------------- /R/wine.R: -------------------------------------------------------------------------------- 1 | #' Wine data 2 | #' 3 | #' It contains, for each of 59 wines grown in the same region in Italy, the quantities of 13 4 | #' constituents. The original purpose of the analysis was to classify wines from different 5 | #' cultivars by means of these measurements. In this example we treat cultivar one. 6 | #' 7 | #' Format: Numeric, 59 rows and 13 columns. Description: The attributes are: 8 | #' 1. Alcohol 9 | #' 2. Malic acid 10 | #' 3. Ash 11 | #' 4. Alcalinity of ash 12 | #' 5. Magnesium 13 | #' 6. Total phenols 14 | #' 7. Flavanoids 15 | #' 8. Nonflavanoid phenols 16 | #' 9. Proanthocyanins 17 | #' 10. Color intensity 18 | #' 11. Hue 19 | #' 12. OD280/OD315 of diluted wines 20 | #' 13. Proline 21 | #' 22 | #' @docType data 23 | #' 24 | #' @usage data(wine) 25 | #' 26 | #' @format An object of class \code{"data.frame"}. 27 | #' 28 | #' @source Hettich, S. and Bay, S.D. (1999), The UCI KDD Archive http://kdd.ics.uci.edu. 29 | #' Irvine, CA: University of California, Department of Information and Computer Science. 30 | #' 31 | #' @examples 32 | #' data(wine) 33 | "wine" 34 | -------------------------------------------------------------------------------- /man/alcohol.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/alcohol.R 3 | \docType{data} 4 | \name{alcohol} 5 | \alias{alcohol} 6 | \title{Alcohol data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Romanelli, G.P., Martino, C.M. and Castro, E.A. (2001), Modeling the 12 | solubility of aliphatic alcohols via molecular descriptors, Journal of the Chemical 13 | Society of Pakistan, 23, 195-199. 14 | } 15 | \usage{ 16 | data(alcohol) 17 | } 18 | \description{ 19 | This data set contains physicochemical characteristics of 44 aliphatic alcohols. 20 | The aim of the experiment was the prediction of the solubility on the basis of 21 | molecular descriptors. 22 | } 23 | \details{ 24 | Format: 44 cases and 7 continuous variables. 25 | The columns are: 26 | 1. SAG=solvent accessible surface-bounded molecular volume 27 | 2. V=volume 28 | 3. log PC (PC=octanol–water partitions coefficient) 29 | 4. P=polarizability 30 | 5. RM=molar refractivity 31 | 6. Mass 32 | 7. log(Solubility) (response) 33 | } 34 | \examples{ 35 | data(alcohol) 36 | } 37 | \keyword{datasets} 38 | -------------------------------------------------------------------------------- /man/hearing.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/hearing.R 3 | \docType{data} 4 | \name{hearing} 5 | \alias{hearing} 6 | \title{Hearing data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Roberts, J. and Cohrssen, J. (1968), Hearing 12 | levels of adults, US National Center for Health Statistics Publications, 13 | Series 11, No. 31 14 | } 15 | \usage{ 16 | data(hearing) 17 | } 18 | \description{ 19 | Prevalence rates in percent for men aged 55–64 with hearing levels 20 | 16 decibels or more above the audiometric zero. 21 | } 22 | \details{ 23 | Format: Two-way ANOVA. 24 | Description: The rows correspond to different frequencies and to normal speech. 25 | 1. 500 hertz 26 | 2. 1000 hertz 27 | 3. 2000 hertz 28 | 4. 3000 hertz 29 | 5. 4000 hertz 30 | 6. 6000 hertz 31 | 7. Normal speech 32 | The columns classify the data in seven occupational groups: 33 | 1. professional–managerial 34 | 2. farm 35 | 3. clerical sales 36 | 4. craftsmen 37 | 5. operatives 38 | 6. service 39 | 7. laborers 40 | } 41 | \examples{ 42 | data(hearing) 43 | } 44 | \keyword{datasets} 45 | -------------------------------------------------------------------------------- /R/waste.R: -------------------------------------------------------------------------------- 1 | #' Waste data 2 | #' 3 | #' Waste data. The original data are the result of a study on production 4 | #' waste and land use by Golueke and McGauhey (1970), and contain nine variables, 5 | #' of which we consider six. 6 | #' 7 | #' Format: 40 cases and 6 continuous variables. 8 | #' Description: The columns are 9 | #' 1. industrial land (acres) 10 | #' 2. fabricated metals (acres) 11 | #' 3. trucking and wholesale trade (acres) 12 | #' 4. retail trade (acres) 13 | #' 5. restaurants and hotels (acres) 14 | #' 6. solid waste (millions of tons), response 15 | #' 16 | #' @docType data 17 | #' 18 | #' @usage data(waste) 19 | #' 20 | #' @format An object of class \code{"data.frame"}. 21 | #' 22 | #' @references Golueke, C.G. and McGauhey, P.H. (1970), Comprehensive Studies of 23 | #' Solid Waste Management, US Department of Health, Education and Welfare, Public 24 | #' Health Services Publication No. 2039. 25 | #' 26 | #' @source Golueke, C.G. and McGauhey, P.H. (1970), Comprehensive Studies of 27 | #' Solid Waste Management, US Department of Health, Education and Welfare, Public 28 | #' Health Services Publication No. 2039. 29 | #' 30 | #' @examples 31 | #' data(waste) 32 | "waste" 33 | -------------------------------------------------------------------------------- /man/cov.dcml.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DCML.R 3 | \name{cov.dcml} 4 | \alias{cov.dcml} 5 | \title{Approximate covariance matrix of the DCML regression estimator.} 6 | \usage{ 7 | cov.dcml(res.LS, res.R, CC, sig.R, t0, p, n, control) 8 | } 9 | \arguments{ 10 | \item{res.LS}{vector of residuals from the least squares fit} 11 | 12 | \item{res.R}{vector of residuals from the robust regression fit} 13 | 14 | \item{CC}{estimated covariance matrix of the robust regression estimator} 15 | 16 | \item{sig.R}{robust estimate of the scale of the residuals} 17 | 18 | \item{t0}{mixing parameter} 19 | 20 | \item{p, n}{the dimensions of the problem, needed for the finite 21 | sample correction of the tuning constant of the M-scale} 22 | 23 | \item{control}{a list of control parameters as returned by \code{\link{lmrobdet.control}}} 24 | } 25 | \value{ 26 | The covariance matrix estimate. 27 | } 28 | \description{ 29 | The estimated covariance matrix of the DCML regression estimator. 30 | This function is used internally and not meant to be used 31 | directly. 32 | } 33 | \author{ 34 | Victor Yohai, \email{victoryohai@gmail.com} 35 | } 36 | -------------------------------------------------------------------------------- /man/wine.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/wine.R 3 | \docType{data} 4 | \name{wine} 5 | \alias{wine} 6 | \title{Wine data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Hettich, S. and Bay, S.D. (1999), The UCI KDD Archive http://kdd.ics.uci.edu. 12 | Irvine, CA: University of California, Department of Information and Computer Science. 13 | } 14 | \usage{ 15 | data(wine) 16 | } 17 | \description{ 18 | It contains, for each of 59 wines grown in the same region in Italy, the quantities of 13 19 | constituents. The original purpose of the analysis was to classify wines from different 20 | cultivars by means of these measurements. In this example we treat cultivar one. 21 | } 22 | \details{ 23 | Format: Numeric, 59 rows and 13 columns. Description: The attributes are: 24 | 1. Alcohol 25 | 2. Malic acid 26 | 3. Ash 27 | 4. Alcalinity of ash 28 | 5. Magnesium 29 | 6. Total phenols 30 | 7. Flavanoids 31 | 8. Nonflavanoid phenols 32 | 9. Proanthocyanins 33 | 10. Color intensity 34 | 11. Hue 35 | 12. OD280/OD315 of diluted wines 36 | 13. Proline 37 | } 38 | \examples{ 39 | data(wine) 40 | } 41 | \keyword{datasets} 42 | -------------------------------------------------------------------------------- /man/prcompRob.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/prcompRob.R 3 | \name{prcompRob} 4 | \alias{prcompRob} 5 | \title{Robust Principal Components Cont'd} 6 | \usage{ 7 | prcompRob(x, rank. = NULL, delta.scale = 0.5, max.iter = 100L) 8 | } 9 | \arguments{ 10 | \item{x}{data matrix with observations in rows} 11 | 12 | \item{rank.}{Maximal number of principal components to be used (optional)} 13 | 14 | \item{delta.scale}{"delta" parametor of the scale M-estimator (default = 0.5)} 15 | 16 | \item{max.iter}{maximum number of iterations (default = 100)} 17 | } 18 | \value{ 19 | \item{sdev}{the standard deviation of the principal components} 20 | \item{rotation}{matrix containing the factor loadings} 21 | \item{x}{matrix containing the rotated data} 22 | \item{center}{the centering used} 23 | } 24 | \description{ 25 | This function uses the pcaRobS function to compute all principal components while 26 | behaving similarly to the prcomp function 27 | } 28 | \examples{ 29 | data(wine) 30 | 31 | p.wine <- prcompRob(wine) 32 | summary(p.wine) 33 | 34 | ## Choose only 5 35 | p5.wine <- prcompRob(wine, rank. = 5) 36 | summary(p5.wine) 37 | 38 | } 39 | \author{ 40 | Gregory Brownson, \email{gregory.brownson@gmail.com} 41 | } 42 | -------------------------------------------------------------------------------- /inst/scripts/bus.R: -------------------------------------------------------------------------------- 1 | # bus.R 2 | # EXAMPLE 6.4 3 | # Figure 6.10 4 | # Table 6.6 5 | 6 | library(RobStatTM) 7 | data(bus) 8 | X0 <- as.matrix(bus) 9 | X1 <- X0[,-9] 10 | ss <- apply(X1, 2, mad) 11 | mu <- apply(X1, 2, median) 12 | X <- scale(X1, center=mu, scale=ss) 13 | n <- dim(X)[1] 14 | p <- dim(X)[2] 15 | 16 | #Classical PCA 17 | q <- 3 #compute three components 18 | resC <- prcomp(X) 19 | prC <- as.vector(summary(resC)$importance['Cumulative Proportion', ] ) 20 | nonC <- 1 - prC #proportion of unexaplained variance 21 | Xcent <- scale(X, center=resC$center, scale=FALSE) 22 | fitC <- scale(Xcent %*% resC$rotation[, 1:q] %*% t(resC$rotation[, 1:q]), center=-resC$center, scale=FALSE) #resC$fit 23 | resiC <- X - fitC 24 | dC <- rowSums(resiC^2) 25 | 26 | #Robust PCA 27 | rr <- pcaRobS(X, q, 0.99) 28 | propex <- rr$propex 29 | fitM <- rr$fit 30 | resiM <- X-fitM 31 | dM <- rowSums(resiM^2) 32 | alfa <- seq(from=0.1, to=0.9, by=0.1) 33 | qC <- quantile(dC,alfa) 34 | qM <- quantile(dM,alfa) 35 | print(rbind(qC,qM)) 36 | 37 | dCsor <- sort(dC) 38 | dMsor <- sort(dM) 39 | par(mfrow=c(1,1)) 40 | plot(dCsor, dMsor, xlab="Classic", ylab="Robust", log="xy") 41 | abline(0,1) 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /man/MMPY.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DCML.R 3 | \name{MMPY} 4 | \alias{MMPY} 5 | \title{MM regression estimator using Pen~a-Yohai candidates} 6 | \usage{ 7 | MMPY(X, y, control, mf) 8 | } 9 | \arguments{ 10 | \item{X}{design matrix} 11 | 12 | \item{y}{response vector} 13 | 14 | \item{control}{a list of control parameters as returned by \code{\link{lmrobdet.control}}} 15 | 16 | \item{mf}{model frame} 17 | } 18 | \value{ 19 | an \code{lmrob} object witht the M-estimator 20 | obtained starting from the S-estimator computed with the 21 | Pen~a-Yohai initial candidates. The properties of the final 22 | estimator (efficiency, etc.) are determined by the tuning constants in 23 | the argument \code{control}. 24 | } 25 | \description{ 26 | This function computes MM-regression estimator using Pen~a-Yohai 27 | candidates for the initial S-estimator. This function is used 28 | internally by \code{\link{lmrobdetMM}}, and not meant to be used 29 | directly. 30 | } 31 | \references{ 32 | \url{http://www.wiley.com/go/maronna/robust} 33 | } 34 | \seealso{ 35 | \code{\link{DCML}}, \code{\link{MMPY}}, \code{\link{SMPY}} 36 | } 37 | \author{ 38 | Victor Yohai, \email{victoryohai@gmail.com}, Matias Salibian-Barrera, \email{matias@stat.ubc.ca} 39 | } 40 | -------------------------------------------------------------------------------- /R/algae.R: -------------------------------------------------------------------------------- 1 | #' Algae data 2 | #' 3 | #' Each row of the data set is a set of 90 measurements at a river in some place in Europe. 4 | #' There are 11 predictors. The response is the logarithm of the abundance of a certain 5 | #' class of algae. Description: The columns are: 6 | #' 1. season, categorical (1,2,3,4 for winter, spring, summer and autumn) 7 | #' 2. river size (categorical) (1,2,3 for small, medium and large) 8 | #' 3. fluid velocity (categorical) (1,2,3 for low, medium and high) 9 | #' 4-11 (numerci): content of nitrogen in the form of nitrates, nitrites and ammonia, and other 10 | #' chemical compounds. 11 | #' Col. 12 ia the response: abundance of a type of algae (type 6 in the complete file). For 12 | #' simplicity we deleted the rows with missing values and took the logarithm of the response. 13 | #' 14 | #' Format 90 rows, 12 columns (3 categorical, 9 numeric) 15 | #' 16 | #' @docType data 17 | #' 18 | #' @usage data(algae) 19 | #' 20 | #' @format An object of class \code{"data.frame"}. 21 | #' 22 | #' @references References go here. 23 | #' 24 | #' @source Hettich, S. and Bay, S.D. (1999), The UCI KDD Archive http://kdd.ics.uci.edu. 25 | #' Irvine, CA: University of California, Department of Information and Computer Science. 26 | #' 27 | #' @examples 28 | #' data(algae) 29 | "algae" 30 | -------------------------------------------------------------------------------- /inst/scripts/vehicle.R: -------------------------------------------------------------------------------- 1 | # vehicle.R 2 | # EXAMPLE 6.3 3 | # Figure 6.7 4 | 5 | library(RobStatTM) 6 | data(vehicle) 7 | X <- as.matrix(vehicle) 8 | n <- dim(X)[1] 9 | p <- dim(X)[2] 10 | xbar <- colMeans(X) 11 | C <- cov(X) 12 | disC1 <- mahalanobis(X,xbar,C); disC=sort(disC1) #Classical estimator 13 | resu <- covRobRocke(X) #Rocke estimator 14 | muR <- resu$mu 15 | VR <- resu$V 16 | disR1 <- mahalanobis(X, muR, VR) 17 | disR <- sort(disR1) 18 | resu <- rrcov::CovMcd(X) 19 | muM <- slot(resu, 'center') 20 | VM <- slot(resu, 'cov') 21 | disM1 <- slot(resu, 'mah') 22 | disM <- sort(disM1) 23 | resu <- rrcov::CovSest(X, method= "bisquare") #S-estimator 24 | muS <- slot(resu, 'center') 25 | VS <- slot(resu, 'cov') 26 | disS1 <- mahalanobis(X, muS, VS) 27 | disS <-sort(disS1) 28 | 29 | # Figure 6.7 30 | par(mfrow=c(1,3)) 31 | qua <- qchisq(ppoints(n), p) 32 | plot(qua,disC, xlab="Chi squared quantiles", ylab="Sorted distances", main="Classical", cex.main=0.9, pch=19) 33 | abline(0,1) 34 | plot(qua, disS, xlab="Chi squared quantiles", ylab="Sorted distances", main="S-Bisquare", cex.main=0.9, pch=19) 35 | abline(0,1) 36 | plot(qua, disR, xlab="Chi squared quantiles", ylab="Sorted distances", main="Rocke", cex.main=0.9, pch=19) 37 | abline(0,1) 38 | par(mfrow=c(1,1)) 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /man/waste.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/waste.R 3 | \docType{data} 4 | \name{waste} 5 | \alias{waste} 6 | \title{Waste data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Golueke, C.G. and McGauhey, P.H. (1970), Comprehensive Studies of 12 | Solid Waste Management, US Department of Health, Education and Welfare, Public 13 | Health Services Publication No. 2039. 14 | } 15 | \usage{ 16 | data(waste) 17 | } 18 | \description{ 19 | Waste data. The original data are the result of a study on production 20 | waste and land use by Golueke and McGauhey (1970), and contain nine variables, 21 | of which we consider six. 22 | } 23 | \details{ 24 | Format: 40 cases and 6 continuous variables. 25 | Description: The columns are 26 | 1. industrial land (acres) 27 | 2. fabricated metals (acres) 28 | 3. trucking and wholesale trade (acres) 29 | 4. retail trade (acres) 30 | 5. restaurants and hotels (acres) 31 | 6. solid waste (millions of tons), response 32 | } 33 | \examples{ 34 | data(waste) 35 | } 36 | \references{ 37 | Golueke, C.G. and McGauhey, P.H. (1970), Comprehensive Studies of 38 | Solid Waste Management, US Department of Health, Education and Welfare, Public 39 | Health Services Publication No. 2039. 40 | } 41 | \keyword{datasets} 42 | -------------------------------------------------------------------------------- /man/INVTR2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/INVTR2.R 3 | \name{INVTR2} 4 | \alias{INVTR2} 5 | \title{Robust R^2 coefficient of determination} 6 | \usage{ 7 | INVTR2(RR2, family, cc) 8 | } 9 | \arguments{ 10 | \item{RR2}{the proportional difference in loss functions (a naive robust R^2 coefficient).} 11 | 12 | \item{family}{family string specifying the name of the family of loss function to be used (current valid 13 | options are "bisquare", "opt" and "mopt").} 14 | 15 | \item{cc}{tuning parameters to be computed according to efficiency and / or breakdown 16 | considerations. See \link{lmrobdet.control}, \link{bisquare}, \link{mopt} 17 | and \link{opt}.} 18 | } 19 | \value{ 20 | An unbiased version of the robust R^2 coefficient of determination. 21 | } 22 | \description{ 23 | This function computes a robust version of the R^2 coefficient of determination. 24 | It is used internally by \code{\link{lmrobdetMM}}, 25 | and not meant to be used directly. 26 | } 27 | \details{ 28 | This function computes a robust version of the R^2 coefficient. 29 | It is used internally by \code{\link{lmrobdetMM}}, 30 | and not meant to be used directly. 31 | } 32 | \references{ 33 | \url{http://www.wiley.com/go/maronna/robust} 34 | } 35 | \author{ 36 | Victor Yohai, \email{victoryohai@gmail.com} 37 | } 38 | -------------------------------------------------------------------------------- /man/rhoprime.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/psiFuns.R 3 | \name{rhoprime} 4 | \alias{rhoprime} 5 | \title{The first derivative of the rho function} 6 | \usage{ 7 | rhoprime(u, family, cc, standardize = FALSE) 8 | } 9 | \arguments{ 10 | \item{u}{point or vector at which rho is to be evaluated} 11 | 12 | \item{family}{family string specifying the name of the family of loss function to be used (current valid 13 | options are "bisquare", "opt" and "mopt").} 14 | 15 | \item{cc}{tuning parameters to be computed according to efficiency and / or breakdown 16 | considerations. See \link{lmrobdet.control}, \link{bisquare}, \link{mopt} 17 | and \link{opt}.} 18 | 19 | \item{standardize}{logical value determining whether the rho function is to be 20 | standardized so that its maximum value is 1. See \code{Mpsi}.} 21 | } 22 | \value{ 23 | The value of the first derivative \code{rho} evaluated at \code{u} 24 | } 25 | \description{ 26 | The first derivative of the rho function 27 | } 28 | \examples{ 29 | # Evaluate the derivative of a rho function tuned for 85\% efficiency 30 | rhoprime(u=1.1, family='bisquare', cc=bisquare(.85)) 31 | # Evaluate the derivative of a rho function tuned for 50\% breakdown 32 | rhoprime(u=1.1, family='opt', cc=lmrobdet.control(bb=.5, family='opt')$tuning.chi) 33 | 34 | } 35 | \author{ 36 | Matias Salibian-Barrera, \email{matias@stat.ubc.ca} 37 | } 38 | -------------------------------------------------------------------------------- /R/bus.R: -------------------------------------------------------------------------------- 1 | #' Bus data 2 | #' 3 | #' This data set corresponds to a study in automatic vehicle recognition. 4 | #' Each of the 218 rows corresponds to a view of a bus silhouette, and contains 5 | #' 18 attributes of the image. It was decided to exclude variable 9 and divide the 6 | #' remaining variables by their MADN’s. 7 | #' 8 | #' Description: The following features were extracted from the silhouettes. 9 | #' 1. compactness 10 | #' 2. circularity 11 | #' 3. distance circularity 12 | #' 4. radius ratio 13 | #' 5. principal axis aspect ratio 14 | #' 6. maximum length aspect ratio 15 | #' 7. scatter ratio 16 | #' 8. elongatedness 17 | #' 9. principal axis rectangularity 18 | #' 10. maximum length rectangularity 19 | #' 11. scaled variance along major axis 20 | #' 12. scaled variance along minor axis 21 | #' 13. scaled radius of gyration 22 | #' 14. skewness about major axis 23 | #' 15. skewness about minor axis 24 | #' 16. kurtosis about minor axis 25 | #' 17. kurtosis about major axis 26 | #' 18. hollows ratio 27 | #' 28 | #' Format: Numeric, 218 rows and 18 columns. 29 | #' 30 | #' @docType data 31 | #' 32 | #' @usage data(bus) 33 | #' 34 | #' @format An object of class \code{"data.frame"}. 35 | #' 36 | #' @source Hettich, S. and Bay, S.D. (1999), The UCI KDD Archive http://kdd.ics.uci.edu. 37 | #' Irvine, CA: University of California, Department of Information and Computer Science. 38 | #' 39 | #' @examples 40 | #' data(bus) 41 | "bus" 42 | -------------------------------------------------------------------------------- /R/vehicle.R: -------------------------------------------------------------------------------- 1 | #' Vehicle data 2 | #' 3 | #' The original data set contains an ensemble of shape feature extractors to the 4 | #' 2D silhouettes of different vehicles. The purpose is to classify a given 5 | #' silhouette as one of four types of vehicle, using a set of 18 features extracted 6 | #' from the silhouette. Here we deal with the "van" type, which has 217 cases. 7 | #' Description; The following features were extracted from the silhouettes. 8 | #' 1. compactness 9 | #' 2. circularity 10 | #' 3. distance circularity 11 | #' 4. radius ratio 12 | #' 5. principal axis aspect ratio 13 | #' 6. maximum length aspect ratio 14 | #' 7. scatter ratio 15 | #' 8. elongatedness 16 | #' 9. principal axis rectangularity 17 | #' 10. maximum length rectangularity 18 | #' 11. scaled variance along major axis 19 | #' 12. scaled variance along minor axis 20 | #' 13. scaled radius of gyration 21 | #' 14. skewness about major axis 22 | #' 15. skewness about minor axis 23 | #' 16. kurtosis about minor axis 24 | #' 17. kurtosis about major axis 25 | #' 18. hollows ratio 26 | #' 27 | #' Format: Numeric, 217 rows and 18 columns. 28 | #' 29 | #' @docType data 30 | #' 31 | #' @usage data(vehicle) 32 | #' 33 | #' @format An object of class \code{"data.frame"}. 34 | #' 35 | #' @source Turing Institute, Glasgow, and are available at https://archive.ics.uci.edu/ml/datasets/Statlog+(Vehicle+Silhouettes). 36 | #' 37 | #' @examples 38 | #' data(vehicle) 39 | "vehicle" 40 | -------------------------------------------------------------------------------- /man/algae.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/algae.R 3 | \docType{data} 4 | \name{algae} 5 | \alias{algae} 6 | \title{Algae data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Hettich, S. and Bay, S.D. (1999), The UCI KDD Archive http://kdd.ics.uci.edu. 12 | Irvine, CA: University of California, Department of Information and Computer Science. 13 | } 14 | \usage{ 15 | data(algae) 16 | } 17 | \description{ 18 | Each row of the data set is a set of 90 measurements at a river in some place in Europe. 19 | There are 11 predictors. The response is the logarithm of the abundance of a certain 20 | class of algae. Description: The columns are: 21 | 1. season, categorical (1,2,3,4 for winter, spring, summer and autumn) 22 | 2. river size (categorical) (1,2,3 for small, medium and large) 23 | 3. fluid velocity (categorical) (1,2,3 for low, medium and high) 24 | 4-11 (numerci): content of nitrogen in the form of nitrates, nitrites and ammonia, and other 25 | chemical compounds. 26 | Col. 12 ia the response: abundance of a type of algae (type 6 in the complete file). For 27 | simplicity we deleted the rows with missing values and took the logarithm of the response. 28 | } 29 | \details{ 30 | Format 90 rows, 12 columns (3 categorical, 9 numeric) 31 | } 32 | \examples{ 33 | data(algae) 34 | } 35 | \references{ 36 | References go here. 37 | } 38 | \keyword{datasets} 39 | -------------------------------------------------------------------------------- /man/rhoprime2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/psiFuns.R 3 | \name{rhoprime2} 4 | \alias{rhoprime2} 5 | \title{The second derivative of the rho function} 6 | \usage{ 7 | rhoprime2(u, family, cc, standardize = FALSE) 8 | } 9 | \arguments{ 10 | \item{u}{point or vector at which rho is to be evaluated} 11 | 12 | \item{family}{family string specifying the name of the family of loss function to be used (current valid 13 | options are "bisquare", "opt" and "mopt").} 14 | 15 | \item{cc}{tuning parameters to be computed according to efficiency and / or breakdown 16 | considerations. See \link{lmrobdet.control}, \link{bisquare}, \link{mopt} 17 | and \link{opt}.} 18 | 19 | \item{standardize}{logical value determining whether the rho function is to be 20 | standardized so that its maximum value is 1. See \code{Mpsi}.} 21 | } 22 | \value{ 23 | The value of the second derivative of \code{rho} evaluated at \code{u} 24 | } 25 | \description{ 26 | The second derivative of the rho function 27 | } 28 | \examples{ 29 | # Evaluate the 2nd derivative of a rho function tuned for 85\% efficiency 30 | rhoprime2(u=1.1, family='bisquare', cc=bisquare(.85)) 31 | # Evaluate the 2nd derivative of a rho function tuned for 50\% breakdown 32 | rhoprime2(u=1.1, family='opt', cc=lmrobdet.control(bb=.5, family='opt')$tuning.chi) 33 | 34 | } 35 | \author{ 36 | Matias Salibian-Barrera, \email{matias@stat.ubc.ca} 37 | } 38 | -------------------------------------------------------------------------------- /man/SMPY.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DCML.R 3 | \name{SMPY} 4 | \alias{SMPY} 5 | \title{SM regression estimator using Pen~a-Yohai candidates} 6 | \usage{ 7 | SMPY(mf, y, control, split) 8 | } 9 | \arguments{ 10 | \item{mf}{model frame} 11 | 12 | \item{y}{response vector} 13 | 14 | \item{control}{a list of control parameters as returned by \code{\link{lmrobdet.control}}} 15 | 16 | \item{split}{a list as returned by \code{splitFrame} containing the continuous and 17 | dummy components of the design matrix} 18 | } 19 | \value{ 20 | an \code{lmrob} object witht the M-estimator 21 | obtained starting from the MS-estimator computed with the 22 | Pen~a-Yohai initial candidates. The properties of the final 23 | estimator (efficiency, etc.) are determined by the tuning constants in 24 | the argument \code{control}. 25 | } 26 | \description{ 27 | This function computes a robust regression estimator when there 28 | are categorical / dummy explanatory variables. It uses Pen~a-Yohai 29 | candidates for the S-estimator. This function is used 30 | internally by \code{\link{lmrobdetMM}}, and not meant to be used 31 | directly. 32 | } 33 | \references{ 34 | \url{http://www.wiley.com/go/maronna/robust} 35 | } 36 | \seealso{ 37 | \code{\link{DCML}}, \code{\link{MMPY}}, \code{\link{SMPY}} 38 | } 39 | \author{ 40 | Victor Yohai, \email{victoryohai@gmail.com}, Matias Salibian-Barrera, \email{matias@stat.ubc.ca} 41 | } 42 | -------------------------------------------------------------------------------- /man/DCML.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DCML.R 3 | \name{DCML} 4 | \alias{DCML} 5 | \title{DCML regression estimator} 6 | \usage{ 7 | DCML(x, y, z, z0, control) 8 | } 9 | \arguments{ 10 | \item{x}{design matrix} 11 | 12 | \item{y}{response vector} 13 | 14 | \item{z}{robust fit as returned by \code{\link{MMPY}} or \code{\link{SMPY}}} 15 | 16 | \item{z0}{least squares fit as returned by \code{\link{lm.fit}}} 17 | 18 | \item{control}{a list of control parameters as returned by \code{\link{lmrobdet.control}}} 19 | } 20 | \value{ 21 | a list with the following components 22 | \item{coefficients}{the vector of regression coefficients} 23 | \item{cov}{the estimated covariance matrix of the DCML regression estimator} 24 | \item{residuals}{the vector of regression residuals from the DCML fit} 25 | \item{scale}{a robust residual (M-)scale estimate} 26 | \item{t0}{the mixing proportion between the least squares and robust regression estimators} 27 | } 28 | \description{ 29 | This function computes the DCML regression estimator. This function is used 30 | internally by \code{\link{lmrobdetDCML}}, and not meant to be used 31 | directly. 32 | } 33 | \references{ 34 | \url{http://www.wiley.com/go/maronna/robust} 35 | } 36 | \seealso{ 37 | \code{\link{DCML}}, \code{\link{MMPY}}, \code{\link{SMPY}} 38 | } 39 | \author{ 40 | Victor Yohai, \email{victoryohai@gmail.com}, Matias Salibian-Barrera, \email{matias@stat.ubc.ca} 41 | } 42 | -------------------------------------------------------------------------------- /inst/scripts/skin.R: -------------------------------------------------------------------------------- 1 | # skin.R 2 | # Example 7.2 3 | # Figure 7.5 4 | 5 | library(RobStatTM) 6 | data(skin) 7 | 8 | Xskin <- as.matrix( skin[, 1:2] ) 9 | yskin <- skin$vasoconst 10 | #weighted M fit 11 | skinWBY <- logregWBY(Xskin, yskin, intercept=1) 12 | 13 | # ML fit 14 | skinML <- glm(formula=vasoconst~logVOL+logRATE, family = binomial, data = skin) 15 | 16 | #Figure 7.5 17 | dev1 <- abs(skinWBY$residual.deviances) 18 | dev2 <- abs(resid(skinML, type='deviance')) 19 | sdev1 <- sort(dev1) 20 | sdev2 <- sort(dev2) 21 | tt <- c(18,4) 22 | uu <- order(tt) 23 | xuu <- ppoints(39)[38:39] 24 | yuu1 <- sdev1[38:39] 25 | yuu2 <- sdev2[38:39] 26 | tx <- c("18","4") 27 | 28 | plot(ppoints(39), sdev1, type="b", pch=1, xlab="quantiles", ylab= "deviance residuals") 29 | lines(ppoints(39), sdev2, type="b", pch=2) 30 | text(xuu, yuu1 + .1, tt) 31 | text(xuu, yuu2 + .1, tt) 32 | legend(x="topleft", legend=c("weighted M","maximum likelihood"), pch=c(1,2)) 33 | 34 | 35 | # other estimates 36 | # M fit 37 | skinBY <- logregBY(Xskin, yskin, intercept=1) 38 | 39 | #cubif fit 40 | ufact <- 1.1 41 | ctrl <- robcbi::cubinf.control(ufact=ufact) 42 | yy <- skin$vasoconst 43 | XX <- cbind(rep(1, length(yy)), skin$logVOL, skin$logRATE) 44 | skinCUBIF <- robcbi::cubinf(XX, yy, family=binomial(), null.dev = FALSE, control=ctrl) 45 | 46 | 47 | # weighted ML fit 48 | skinWML <- logregWML(Xskin, yskin, intercept=1) 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /man/rob.linear.test.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lmrobdet.R 3 | \name{lmrobdetLinTest} 4 | \alias{lmrobdetLinTest} 5 | \alias{rob.linear.test} 6 | \title{Robust likelihood ratio test for linear hypotheses} 7 | \usage{ 8 | lmrobdetLinTest(object1, object2) 9 | } 10 | \arguments{ 11 | \item{object1}{an \code{lmrobdetMM} or \code{lmrobM} object with the fit corresponding to the complete model} 12 | 13 | \item{object2}{an \code{lmrobdetMM} or \code{lmrobM} object with the fit corresponding to the model 14 | restricted under the null linear hypothesis.} 15 | } 16 | \value{ 17 | A list with the following components: c("test","chisq.pvalue","f.pvalue","df") 18 | \item{test}{The value of the F-statistic} 19 | \item{f.pvalue}{p-value based on the F distribution} 20 | \item{chisq.pvalue}{p-value based on the chi-squared distribution} 21 | \item{df}{degrees of freedom} 22 | } 23 | \description{ 24 | This function computes a robust likelihood ratio test for linear hypotheses. 25 | } 26 | \examples{ 27 | data(oats) 28 | cont <- lmrobdet.control(bb = 0.5, efficiency = 0.85, family = "bisquare") 29 | oats1M <- lmrobM(response1 ~ variety+block, control=cont, data=oats) 30 | oats1M_var <- lmrobM(response1 ~ block, control=cont, data=oats) 31 | ( anov1M_var <- rob.linear.test(oats1M, oats1M_var) ) 32 | 33 | } 34 | \references{ 35 | \url{http://www.wiley.com/go/maronna/robust} 36 | } 37 | \author{ 38 | Victor Yohai, \email{vyohai@gmail.com} 39 | } 40 | -------------------------------------------------------------------------------- /man/rho.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/psiFuns.R 3 | \name{rho} 4 | \alias{rho} 5 | \title{Rho functions} 6 | \usage{ 7 | rho(u, family = " bisquare", cc, standardize = TRUE) 8 | } 9 | \arguments{ 10 | \item{u}{point or vector at which rho is to be evaluated} 11 | 12 | \item{family}{family string specifying the name of the family of loss function to be used (current valid 13 | options are "bisquare", "opt" and "mopt").} 14 | 15 | \item{cc}{tuning parameters to be computed according to efficiency and / or breakdown 16 | considerations. See \link{lmrobdet.control}, \link{bisquare}, \link{mopt} 17 | and \link{opt}.} 18 | 19 | \item{standardize}{logical value determining whether the rho function is to be 20 | standardized so that its maximum value is 1. See \code{Mpsi}.} 21 | } 22 | \value{ 23 | The value(s) of \code{rho} at \code{u} 24 | } 25 | \description{ 26 | This function returns the value of the "rho" loss function used 27 | to compute either an M-scale estimator or a robust regression 28 | estimator. It currently can be used to compute the bisquare, optimal 29 | and modified optimal loss functions. 30 | } 31 | \examples{ 32 | # Evaluate rho tuned for 85\% efficiency 33 | rho(u=1.1, family='bisquare', cc=bisquare(.85)) 34 | # Evaluate rho tuned for 50\% breakdown 35 | rho(u=1.1, family='opt', cc=lmrobdet.control(bb=.5, family='opt')$tuning.chi) 36 | 37 | } 38 | \author{ 39 | Matias Salibian-Barrera, \email{matias@stat.ubc.ca} 40 | } 41 | -------------------------------------------------------------------------------- /man/bus.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bus.R 3 | \docType{data} 4 | \name{bus} 5 | \alias{bus} 6 | \title{Bus data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Hettich, S. and Bay, S.D. (1999), The UCI KDD Archive http://kdd.ics.uci.edu. 12 | Irvine, CA: University of California, Department of Information and Computer Science. 13 | } 14 | \usage{ 15 | data(bus) 16 | } 17 | \description{ 18 | This data set corresponds to a study in automatic vehicle recognition. 19 | Each of the 218 rows corresponds to a view of a bus silhouette, and contains 20 | 18 attributes of the image. It was decided to exclude variable 9 and divide the 21 | remaining variables by their MADN’s. 22 | } 23 | \details{ 24 | Description: The following features were extracted from the silhouettes. 25 | 1. compactness 26 | 2. circularity 27 | 3. distance circularity 28 | 4. radius ratio 29 | 5. principal axis aspect ratio 30 | 6. maximum length aspect ratio 31 | 7. scatter ratio 32 | 8. elongatedness 33 | 9. principal axis rectangularity 34 | 10. maximum length rectangularity 35 | 11. scaled variance along major axis 36 | 12. scaled variance along minor axis 37 | 13. scaled radius of gyration 38 | 14. skewness about major axis 39 | 15. skewness about minor axis 40 | 16. kurtosis about minor axis 41 | 17. kurtosis about major axis 42 | 18. hollows ratio 43 | 44 | Format: Numeric, 218 rows and 18 columns. 45 | } 46 | \examples{ 47 | data(bus) 48 | } 49 | \keyword{datasets} 50 | -------------------------------------------------------------------------------- /man/vehicle.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/vehicle.R 3 | \docType{data} 4 | \name{vehicle} 5 | \alias{vehicle} 6 | \title{Vehicle data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Turing Institute, Glasgow, and are available at https://archive.ics.uci.edu/ml/datasets/Statlog+(Vehicle+Silhouettes). 12 | } 13 | \usage{ 14 | data(vehicle) 15 | } 16 | \description{ 17 | The original data set contains an ensemble of shape feature extractors to the 18 | 2D silhouettes of different vehicles. The purpose is to classify a given 19 | silhouette as one of four types of vehicle, using a set of 18 features extracted 20 | from the silhouette. Here we deal with the "van" type, which has 217 cases. 21 | Description; The following features were extracted from the silhouettes. 22 | 1. compactness 23 | 2. circularity 24 | 3. distance circularity 25 | 4. radius ratio 26 | 5. principal axis aspect ratio 27 | 6. maximum length aspect ratio 28 | 7. scatter ratio 29 | 8. elongatedness 30 | 9. principal axis rectangularity 31 | 10. maximum length rectangularity 32 | 11. scaled variance along major axis 33 | 12. scaled variance along minor axis 34 | 13. scaled radius of gyration 35 | 14. skewness about major axis 36 | 15. skewness about minor axis 37 | 16. kurtosis about minor axis 38 | 17. kurtosis about major axis 39 | 18. hollows ratio 40 | } 41 | \details{ 42 | Format: Numeric, 217 rows and 18 columns. 43 | } 44 | \examples{ 45 | data(vehicle) 46 | } 47 | \keyword{datasets} 48 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | S3method(drop1,lmrobdetMM) 4 | S3method(hatvalues,lmrob) 5 | S3method(print,lmrobdetMM) 6 | S3method(print,lsRobTest) 7 | S3method(print,prcompRob) 8 | S3method(print,summary.covClassic) 9 | S3method(print,summary.covRob) 10 | S3method(print,summary.lmrobdetMM) 11 | S3method(print,summary.prcompRob) 12 | S3method(summary,covClassic) 13 | S3method(summary,covRob) 14 | S3method(summary,lmrobdetMM) 15 | S3method(summary,prcompRob) 16 | export(BYlogreg) 17 | export(DCML) 18 | export(INVTR2) 19 | export(KurtSDNew) 20 | export(MLocDis) 21 | export(MMPY) 22 | export(MMultiSHR) 23 | export(Multirobu) 24 | export(RockeMulti) 25 | export(SMPCA) 26 | export(SMPY) 27 | export(WBYlogreg) 28 | export(WMLlogreg) 29 | export(bisquare) 30 | export(cov.dcml) 31 | export(covClassic) 32 | export(covRob) 33 | export(covRobMM) 34 | export(covRobRocke) 35 | export(fastmve) 36 | export(huber) 37 | export(initPP) 38 | export(lmrobM) 39 | export(lmrobM.control) 40 | export(lmrobdet.control) 41 | export(lmrobdetDCML) 42 | export(lmrobdetLinTest) 43 | export(lmrobdetMM) 44 | export(lmrobdetMM.RFPE) 45 | export(locScaleM) 46 | export(logregBY) 47 | export(logregWBY) 48 | export(logregWML) 49 | export(lsRobTestMM) 50 | export(mopt) 51 | export(moptv0) 52 | export(opt) 53 | export(optv0) 54 | export(pcaRobS) 55 | export(prcompRob) 56 | export(refine.sm) 57 | export(rho) 58 | export(rhoprime) 59 | export(rhoprime2) 60 | export(rob.linear.test) 61 | export(scaleM) 62 | export(step.lmrobdetMM) 63 | import(stats) 64 | useDynLib(RobStatTM, .registration = TRUE) 65 | -------------------------------------------------------------------------------- /inst/scripts/shock.R: -------------------------------------------------------------------------------- 1 | # shock.R 2 | # EXAMPLE 4.1 3 | # Figures 4.1 4 | # Table 4.1 5 | 6 | library(RobStatTM) 7 | 8 | data(shock) 9 | cont <- lmrobdet.control(bb = 0.5, efficiency = 0.85, family = "bisquare") 10 | 11 | #LS fit 12 | shockls <- lm(time ~ n.shocks, data = shock) 13 | 14 | #LS fit without outliers 15 | shockls124 <- lm(time ~ n.shocks, data = shock, subset = -c(1, 2, 4)) 16 | 17 | #-------------------- 18 | #Figure 4.1 19 | plot(time ~ n.shocks, data=shock, xlab="number of shocks", ylab="average time", pch=19, cex=1.3) 20 | abline(shockls124, lwd=2, col='gray30') 21 | abline(shockls, lwd=2, col='tomato') 22 | text(shock[c(1,2,4),1],shock[c(1,2,4),2]-.4, labels=c("1","2","4"), cex=1.2) 23 | text(1,10.5,"LS", col='tomato', cex=1.3) 24 | text(1,7.2,"LS -", col='gray30', cex=1.3) 25 | #--------------------------------- 26 | 27 | #L1 fit 28 | shockl1 <- quantreg::rq(time~n.shocks , data=shock) 29 | 30 | # M fit 31 | shockrob <- lmrobM(time ~ n.shocks, data = shock,control=cont) 32 | 33 | 34 | #-------------------------- 35 | #Figure 4.3 36 | plot(time ~ n.shocks, data=shock, xlab="number of shocks", ylab="average time", pch=19, cex=1.3) 37 | abline(shockls124, lwd=2, col='gray30') 38 | abline(shockls, lwd=2, col='tomato') 39 | abline(shockrob, lwd=2, col='green') 40 | abline(shockl1, lwd=2, col='blue') 41 | text(shock[c(1,2,4),1],shock[c(1,2,4),2]-.4, labels=c("1","2","4"), cex=1.3) 42 | text(1,10.5,"LS", cex=1.3) 43 | text(1,7.2,"LS -", cex=1.3) 44 | text(1,8.3,"L1", cex=1.3) 45 | text(1,7.7,"M", cex=1.3) 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /inst/scripts/leukemia.R: -------------------------------------------------------------------------------- 1 | # leukemia.R 2 | # Example 7.1 3 | # Figure 7.4 4 | # Table 7.1 5 | 6 | 7 | library(RobStatTM) 8 | 9 | # Must install robust 10 | data(leuk.dat) 11 | 12 | Xleuk <-as.matrix( leuk.dat[, 1:2] ) 13 | yleuk <- leuk.dat$y 14 | # weighted M fit 15 | leukWBY <- logregWBY(Xleuk, yleuk, intercept=1) 16 | pr1 <- as.vector( leukWBY$fitted.values ) 17 | 18 | # ML fit 19 | leukML <- glm(formula = y ~ wbc + ag, family = binomial, data = leuk.dat) 20 | 21 | #-------------------------------- 22 | #Figure 7.4 23 | 24 | dev1 <- abs(leukWBY$residual.deviances) 25 | dev2 <- abs(resid(leukML, type='deviance')) 26 | n <- length(dev1) 27 | ord1 <- order(dev1) 28 | sdev1 <- sort(dev1) # dev1[ord1] 29 | sdev2 <- sort(dev2) #[ord2] # typo in leukemia.R 30 | 31 | plot(ppoints(n), sdev1, type="b",pch=1,xlab="quantiles", ylab= " deviance residuals") 32 | lines(ppoints(n), sdev2, type="b",pch=2) 33 | xuu <- ppoints(n)[n] 34 | text(xuu - .03, max(sdev1) + .1, ord1[n]) 35 | text(xuu, max(sdev2) + .3, ord1[n]) 36 | legend(x="topleft",legend=c("weighted M","maximum likelihood"), pch=c(1,2)) 37 | 38 | 39 | #other estimates 40 | #M fit 41 | leukBY <- logregBY(Xleuk, yleuk, intercept=1) 42 | 43 | #cubif fit 44 | ufact <- 1.1 45 | ctrl <- robcbi::cubinf.control(ufact=ufact) 46 | yy <- leuk.dat$y 47 | XX <- cbind(rep(1, length(yy)), leuk.dat$wbc, leuk.dat$ag) 48 | leukCUBIF <- robcbi::cubinf(XX, yy, family=binomial(), null.dev = FALSE, control=ctrl) 49 | 50 | 51 | #weighted ML fit 52 | leukWML <- logregWML(Xleuk, yleuk, intercept=1) 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | -------------------------------------------------------------------------------- /inst/scripts/fitmodelsRobStatTM.R: -------------------------------------------------------------------------------- 1 | # Section 1: Introduction 2 | install.packages("fit.models") 3 | library(RobStatTM) 4 | library("fit.models") 5 | 6 | # Section 2: fit.models for LS and lmrobdetMM model fits 7 | fmclass.add.class("lmfm","lmrobdetMM") 8 | LSfit <- lm(zinc ~ copper, data = mineral) 9 | control <- lmrobdet.control(family = "mopt",eff = 0.95) 10 | # The choices "mopt" and 0.95 are defaults 11 | robfit <- lmrobdetMM(zinc ~ copper, control = control, data = mineral) 12 | fmLSrob <- fit.models(LSfit,robfit) 13 | class(fmLSrob) 14 | names(fmLSrob) 15 | round(coef(fmLSrob),3) 16 | summary(fmLSrob) 17 | 18 | help(summary.lmfm) 19 | help(plot.lmfm) 20 | 21 | plot(fmLSrob) # View plot types sequentially at the RStudio Console 22 | plot(fmLSrob,which.plots = "ask") # Choose plot types at Console 23 | plot(fmLSrob,which.plots = 10) 24 | plot(fmLSrob,which.plots = 7) 25 | plot(fmLSrob,which.plots = 1) 26 | plot(fmLSrob, which.plots = c(3,4)) # View two plot types at Console 27 | 28 | fmLSonly <- fit.models(LSfit) 29 | class(fmLSonly) 30 | names(fmLSonly) 31 | coef(fmLSonly) 32 | plot(fmLSonly, which.plots = "ask") 33 | 34 | # Section 3: fit.models for covClassic and covRob 35 | data(wine) 36 | wine3 <- wine[,1:3] 37 | fmCovRob <- fit.models(Classic = covClassic(wine3), 38 | Robust = covRob(wine3,type = "auto")) 39 | class(fmCovRob) 40 | summary(fmCovRob) 41 | 42 | names(fmCovRob) 43 | names(fmCovRob$Classic) 44 | names(fmCovRob$Robust) 45 | 46 | help(plot.covfm) 47 | 48 | plot(fmCovRob,which.plot = 1) 49 | plot(fmCovRob,which.plot = 2) 50 | plot(fmCovRob,which.plot = 3) 51 | plot(fmCovRob,which.plot = 4) 52 | 53 | 54 | -------------------------------------------------------------------------------- /man/WMLlogreg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/WMLlogreg.R 3 | \name{logregWML} 4 | \alias{logregWML} 5 | \alias{WMLlogreg} 6 | \title{Weighted likelihood estimator for the logistic model} 7 | \usage{ 8 | logregWML(x0, y, intercept = 1) 9 | } 10 | \arguments{ 11 | \item{x0}{p x n matrix of explanatory variables, p is the number of explanatory variables, n is the number of observations} 12 | 13 | \item{y}{response vector} 14 | 15 | \item{intercept}{1 or 0 indicating if an intercept is included or or not} 16 | } 17 | \value{ 18 | A list with the following components: 19 | \item{coefficients}{vector of regression coefficients} 20 | \item{standard.deviation}{standard deviations of the regression coefficient estimators} 21 | \item{fitted.values}{vector with the probabilities of success} 22 | \item{residual.deviances}{residual deviances} 23 | \item{cov}{covariance matrix of the regression estimates} 24 | \item{objective}{value of the objective function at the minimum} 25 | \item{xweights}{vector of zeros and ones used to compute the weighted maimum likelihood estimator} 26 | } 27 | \description{ 28 | This function computes a weighted likelihood estimator for the logistic model, where 29 | the weights penalize high leverage observations. In this version the weights are zero or one. 30 | } 31 | \examples{ 32 | data(skin) 33 | Xskin <- as.matrix( skin[, 1:2] ) 34 | yskin <- skin$vasoconst 35 | skinWML <- logregWML(Xskin, yskin, intercept=1) 36 | skinWML$coeff 37 | skinWML$standard.deviation 38 | 39 | } 40 | \references{ 41 | \url{http://www.wiley.com/go/maronna/robust} 42 | } 43 | \author{ 44 | Victor Yohai 45 | } 46 | -------------------------------------------------------------------------------- /man/lmrobdetMM.RFPE.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RFPE.R 3 | \name{lmrobdetMM.RFPE} 4 | \alias{lmrobdetMM.RFPE} 5 | \title{Robust Final Prediction Error} 6 | \usage{ 7 | lmrobdetMM.RFPE(object, scale = NULL, bothVals = FALSE) 8 | } 9 | \arguments{ 10 | \item{object}{an object of class \code{lmrobdetMM} or \code{lmrobM}.} 11 | 12 | \item{scale}{a numeric value specifying the scale estimate used to compute the RFPE. Usually this 13 | should be the scale estimate from an encompassing model. If \code{NULL}, the scale estimate in 14 | \code{object} is used.} 15 | 16 | \item{bothVals}{a logical value: if \code{TRUE} the function returns the two terms of the RFPE expression separately (equation 17 | (5.39) in the reference book); otherwise, the value of RFPE is returned.} 18 | } 19 | \value{ 20 | If the argument \code{bothVals} is \code{FALSE}, the robust final prediction error (numeric). Otherwise, 21 | the two terms of the RFPE expression in equation (5.39), Section 5.6.2 of Maronna 22 | et al. (2019), \url{http://www.wiley.com/go/maronna/robust}, are returned separately 23 | in a list with components named \code{minRhoMM.C} and \code{penaltyRFPE} 24 | } 25 | \description{ 26 | This function computes the robust Final Prediction Errors (RFPE) for a robust regression fit using M-estimates. 27 | } 28 | \examples{ 29 | data(coleman, package='robustbase') 30 | m2 <- lmrobdetMM(Y ~ ., data=coleman) 31 | lmrobdetMM.RFPE(m2) 32 | 33 | } 34 | \references{ 35 | \url{http://www.wiley.com/go/maronna/robust} 36 | } 37 | \seealso{ 38 | \code{\link{lmrobdetMM}} 39 | } 40 | \author{ 41 | Victor Yohai, \email{victoryohai@gmail.com}, Matias Salibian-Barrera, \email{matias@stat.ubc.ca} 42 | } 43 | -------------------------------------------------------------------------------- /inst/scripts/identMA1.R: -------------------------------------------------------------------------------- 1 | # identMA1.R 2 | # Example 8.4 3 | # Figures 8.9,8.10 4 | 5 | # Must install robustarima 6 | library(robustarima) 7 | 8 | set.seed(600) 9 | n.innov = 300 10 | n = 200 11 | theta= 0.8 12 | 13 | n.start = n.innov - n 14 | innov = rnorm(n.innov) 15 | 16 | x= arima.sim(model = list(ma = theta), n, innov = innov, n.start = n.start) 17 | ao = ifelse(runif(n)>.1, 0, rnorm(n,6,1)) 18 | ao = sign(runif(n,-1,1))*ao 19 | y = x + ao 20 | 21 | 22 | no=sum(ao!=0) 23 | # Figure 8.9 24 | par(mfrow=c(2,1)) 25 | plot(x, ylab=expression(x[t]),ylim=c(-9,9)) 26 | plot(y, ylab=expression(y[t]),ylim=c(-9,9)) 27 | ao.times = (1:n)[ao != 0] 28 | points(ao.times, y[ao != 0]) 29 | par(mfrow=c(1,1)) 30 | 31 | 32 | 33 | 34 | 35 | out=arima.rob(y~1, auto.ar=TRUE) 36 | #Figure 8.10 37 | par(mfrow=c(4,2)) 38 | acf1=acf(x ,10, plot=FALSE ) 39 | plot(acf1, main="acf of x") 40 | acf2=acf(x,10,"partial", plot=FALSE) 41 | plot(acf2, main="pcf of x") 42 | acf3=acf(y,10, plot=FALSE) 43 | plot(acf3, main="acf of y") 44 | acf4=acf(y,10,"partial", plot=FALSE) 45 | plot(acf4, main="pcf of y") 46 | #out$y.robust is the series filtered y 47 | acf5=acf(out$y.robust,10, plot=FALSE) 48 | plot(acf5, main="acf based on procedure (a)") 49 | acf6=acf(out$y.robust,10,"partial", plot=FALSE) 50 | plot(acf6, main="pcf based on procedure (a)") 51 | 52 | 53 | 54 | 55 | 56 | 57 | tank1=ARMAacf(ar =out$model$ar , lag.max = 10, pacf = FALSE) 58 | 59 | tank2=ARMAacf(ar = out$model$ar , lag.max = 10, pacf = TRUE) 60 | 61 | 62 | 63 | acf7=acf5 64 | acf8=acf6 65 | acf7$acf[,,1]=as.matrix(tank1) 66 | plot(acf7, main="acf based on procedure (b)") 67 | 68 | acf8$acf[,,1]=as.matrix(tank2) 69 | plot(acf8, main="pcf based on procedure (b)") 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /man/KurtSDNew.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/KurtSDNew.R 3 | \name{initPP} 4 | \alias{initPP} 5 | \alias{KurtSDNew} 6 | \title{Robust multivariate location and scatter estimators} 7 | \usage{ 8 | initPP(X, muldirand = 20, muldifix = 10, dirmin = 1000) 9 | } 10 | \arguments{ 11 | \item{X}{a data matrix with observations in rows.} 12 | 13 | \item{muldirand}{used to determine the number of random directions (candidates), which 14 | is \code{max(p*muldirand, dirmin)}, where \code{p} is the number of columns in \code{X}.} 15 | 16 | \item{muldifix}{used to determine the number of random directions (candidates), which 17 | is \code{min(n, 2*muldifix*p)}.} 18 | 19 | \item{dirmin}{minimum number of random directions} 20 | } 21 | \value{ 22 | A list with the following components: 23 | \item{idx}{A zero/one vector with ones in the positions of the suspected outliers} 24 | \item{disma}{Robust squared Mahalanobis distances} 25 | \item{center}{Robust mean estimate} 26 | \item{cova}{Robust covariance matrix estimate} 27 | \item{t}{Outlyingness of data points} 28 | } 29 | \description{ 30 | This function computes robust multivariate location and scatter 31 | estimators using both random and deterministic starting points. 32 | } 33 | \details{ 34 | This function computes robust multivariate location and scatter 35 | using both Pen~a-Prieto and random candidates. 36 | } 37 | \examples{ 38 | data(bus) 39 | X0 <- as.matrix(bus) 40 | X1 <- X0[,-9] 41 | tmp <- initPP(X1) 42 | round(tmp$cov[1:10, 1:10], 3) 43 | tmp$center 44 | 45 | } 46 | \references{ 47 | \url{http://www.wiley.com/go/maronna/robust} 48 | } 49 | \author{ 50 | Ricardo Maronna, \email{rmaronna@retina.ar}, based on original code 51 | by D. Pen~a and J. Prieto 52 | } 53 | -------------------------------------------------------------------------------- /inst/scripts/ar3.R: -------------------------------------------------------------------------------- 1 | # ar3.R 2 | # Simulated AR3 data 3 | # true pararameters 4/3, -5/6 , 1/6 4 | # Table 8.1 5 | 6 | library(RobStatTM) 7 | # Must install robustarima 8 | library(robustarima) 9 | 10 | 11 | set.seed(600) 12 | n.innov = 300 13 | n = 200 14 | phi=c(4/3, -5/6,1/6 ) 15 | 16 | n.start = n.innov - n 17 | innov = rnorm(n.innov) 18 | cont=lmrobdet.control(bb = 0.5, efficiency = 0.85, family = "bisquare") 19 | ar3= arima.sim(model = list(ar = phi), n, innov = innov, n.start = n.start) 20 | 21 | 22 | 23 | 24 | #no outliers 25 | ar3lm=lm(ar3[4:200]~ ar3[3:199]+ar3[2:198]+ar3[1:197]) 26 | ar3mm=lmrobdetMM(ar3[4:200]~ ar3[3:199]+ar3[2:198]+ar3[1:197] ) 27 | ar3tau=arima.rob(ar3~1, p=3) 28 | ar3tau$regcoef=ar3tau$regcoef*(1-sum(ar3tau$model$ar)) 29 | 30 | summary(ar3lm) 31 | summary(ar3mm) 32 | ar3tau 33 | 34 | #5% of outliers 35 | 36 | 37 | ao=rep(0,n) 38 | tt=seq(20,200,20) 39 | ao[tt]=4 40 | 41 | 42 | 43 | ar3c5 =ar3 +ao 44 | ar3c5lm=lm(ar3c5[4:200]~ ar3c5[3:199]+ar3c5[2:198]+ar3c5[1:197]) 45 | ar3c5mm=lmrobdetMM(ar3c5[4:200]~ ar3c5[3:199]+ar3c5[2:198]+ar3c5[1:197], control=cont ) 46 | ar3c5tau=arima.rob(ar3c5~1, p=3) 47 | ar3c5tau$regcoef=ar3c5tau$regcoef*(1-sum(ar3c5tau$model$ar)) 48 | summary(ar3c5lm) 49 | summary(ar3c5mm) 50 | ar3c5tau 51 | 52 | 53 | #10% outliers 54 | 55 | 56 | ao=rep(0,n) 57 | tt=seq(10,200,10) 58 | ao[tt]=4 59 | 60 | ar3c10=ar3 +ao 61 | ar3c10lm=lm(ar3c10[4:200]~ ar3c10[3:199]+ar3c10[2:198]+ar3c10[1:197]) 62 | ar3c10mm=lmrobdetMM(ar3c10[4:200]~ ar3c10[3:199]+ar3c10[2:198]+ar3c10[1:197],control=cont) 63 | ar3c10tau=arima.rob(ar3c10~1, p=3) 64 | ar3c10tau$regcoef=ar3c5tau$regcoef*(1-sum(ar3c5tau$model$ar)) 65 | summary(ar3c10lm) 66 | summary(ar3c10mm) 67 | ar3c10tau 68 | 69 | 70 | 71 | 72 | 73 | 74 | -------------------------------------------------------------------------------- /man/SMPCA.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RobPCA_SM.R 3 | \name{pcaRobS} 4 | \alias{pcaRobS} 5 | \alias{SMPCA} 6 | \title{Robust principal components} 7 | \usage{ 8 | pcaRobS(X, ncomp, desprop = 0.9, deltasca = 0.5, maxit = 100) 9 | } 10 | \arguments{ 11 | \item{X}{a data matrix with observations in rows.} 12 | 13 | \item{ncomp}{desired (maximum) number of components} 14 | 15 | \item{desprop}{desired (minimum) proportion of explained variability (default = 0.9)} 16 | 17 | \item{deltasca}{"delta" parameter of the scale M-estimator (default=0.5)} 18 | 19 | \item{maxit}{maximum number of iterations (default= 100)} 20 | } 21 | \value{ 22 | A list with the following components: 23 | \item{q}{The actual number of principal components} 24 | \item{propex}{The actual proportion of unexplained variability} 25 | \item{eigvec}{Eigenvectors, in a \code{p x q} matrix} 26 | \item{fit}{an \code{n x p} matrix with the rank-q approximation to \code{X}} 27 | \item{repre}{An \code{n x q} matrix with representation of data in R^q (scores)} 28 | \item{propSPC}{A vector of length \code{p} with the cumulative explained variance from initial SPC} 29 | } 30 | \description{ 31 | This function computes robust principal components based on the minimization of 32 | the "residual" M-scale. 33 | } 34 | \examples{ 35 | data(bus) 36 | X0 <- as.matrix(bus) 37 | X1 <- X0[,-9] 38 | ss <- apply(X1, 2, mad) 39 | mu <- apply(X1, 2, median) 40 | X <- scale(X1, center=mu, scale=ss) 41 | q <- 3 #compute three components 42 | rr <- pcaRobS(X, q, 0.99) 43 | round(rr$eigvec, 3) 44 | 45 | } 46 | \references{ 47 | \url{http://www.wiley.com/go/maronna/robust} 48 | } 49 | \author{ 50 | Ricardo Maronna, \email{rmaronna@retina.ar}, based on original code 51 | by D. Pen~a and J. Prieto 52 | } 53 | -------------------------------------------------------------------------------- /inst/scripts/wood.R: -------------------------------------------------------------------------------- 1 | # wood.R 2 | # EXAMPLE 5.2 3 | # Figures 5.8 - 5.12 4 | # wood data from robustbase 5 | 6 | # N.B. The 4 figures produced by the code below are very similar to Figures 7 | # 5.8 - 5.12 in the book, except the vertical scales are different due to 8 | # the use of the plot method 9 | 10 | library(RobStatTM) 11 | data(wood, package='robustbase') 12 | cont <- lmrobdet.control(bb = 0.5, efficiency = 0.85, family = "bisquare") 13 | 14 | # We now recommend to use family "mopt" with efficiency = .95 as defaults 15 | # Using those results in almost no change in Figures 5.11-5.12 16 | 17 | #MM fit 18 | woodMM <- lmrobdetMM(y ~ ., data=wood, control=cont) 19 | 20 | #LS fit 21 | woodLS <- lm(y ~ ., data=wood) 22 | 23 | #------------------------------------------------------- 24 | #Fig 5.8 25 | # Nothing happens in this figure 26 | # text(woodLS$fitted[28]+.03,woodLSresst[28],"28") 27 | sigmaLS <- summary(woodLS)$sigma 28 | plot(woodLS, which=1, add.smooth=FALSE, pch=19, id.n=2, cex.id = 1.2) 29 | abline(h=c(-2.5, 0, 2.5) * sigmaLS, lty=2, lwd=2) 30 | 31 | #---------------------------- 32 | #Figure 5.9 33 | plot(woodLS, which=2, pch=19) 34 | 35 | 36 | #------------------------------------------- 37 | # Fig. 5.10 38 | plot(woodMM, which=4, add.smooth=FALSE, pch=19, cex.id=1.3, id.n=4) 39 | 40 | 41 | #-------------------------- 42 | #Figure 5.11 43 | plot(woodMM, which=2, add.smooth=FALSE, pch=19, cex.id=1.3, id.n=4) 44 | abline(h=c(-2.5, 0, 2.5) * woodMM$scale, lty=2) 45 | #---------------------------- 46 | 47 | #Figure 5.12 48 | wq <- which( abs(resid(woodMM)) > 2.5 * woodMM$scale ) 49 | plot(sort(abs(resid(woodLS)[-wq] ) ), sort(abs(resid(woodMM)[-wq])), pch=19, xlim=c(0, .05), ylim=c(0, .05)) 50 | abline(0,1) 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /inst/scripts/resex.R: -------------------------------------------------------------------------------- 1 | # resex.R 2 | # Example 8.6 3 | # Figures 8.12, 8.13 4 | # Table 8.5 5 | 6 | # Must install robustarima 7 | library(robustarima) 8 | 9 | # resex=scan("resex0.txt") uses wrong .txt file, correct code below 10 | # resex=scan("resex.txt") replaced below 11 | data(resex,package='RobStatTM') 12 | 13 | #filered tau 14 | resxar=arima.rob(formula = resex~ 1, p = 2, sd = 1, sfreq = 12) 15 | 16 | #autoregressive coefficients 17 | arcoeftau=resxar$model$ar 18 | arcoeftau 19 | #mean of the the differenced series 20 | meantau=resxar$regcoef 21 | names(meantau)="mean" 22 | meantau 23 | #intercept of the differenced series 24 | # intercepttau=meantau*(1-sum(arcoef)) incorrect arcoef, correct code below 25 | intercepttau=meantau*(1-sum(arcoeftau)) 26 | names(intercepttau)="intercept" 27 | # intercept # no object intercept, correct code below 28 | intercepttau 29 | #sorted innovations 30 | innovtau=sort(abs(resxar$innov[15:89])) 31 | 32 | #Figure 8.22 33 | 34 | plot(1:89, resex, type="l",xlab="index", ylab="RESEX") 35 | points(1:89, resxar$y.robust ) 36 | 37 | #LS 38 | 39 | sresx=resex[13:89]-resex[1:77] 40 | resxls=lm(formula = sresx[3:77] ~ sresx[2:76] + sresx[1:75]) 41 | #autoregressive coefficients 42 | arcoefls=resxls$coef[2:3] 43 | names(arcoefls)=c("AR(1)", "AR(2)") 44 | arcoefls 45 | #intercept 46 | interceptls=resxls$coeff[1] 47 | names(interceptls)="intercept" 48 | interceptls 49 | 50 | #sorted innovations 51 | innovls=sort(abs(resxls$residuals)) 52 | 53 | # Figure 8.23 54 | 55 | ttt=ppoints(72) 56 | plot(ttt,innovls[1:72], type="l", xlab="probability",ylab="quantiles") 57 | lines(ttt,innovtau[1:72],lty=1) 58 | #lines(ttt,resxgminnov[1:72],lty=1) 59 | 60 | #text(c(.5,.5,.5),c(.83,1,1.25),c("TAU","GM","LS")) 61 | text(c(.5,.5),c(.83,1.25),c("TAU","LS")) 62 | 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /man/MLocDis.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/MLocDis.R 3 | \name{locScaleM} 4 | \alias{locScaleM} 5 | \alias{MLocDis} 6 | \title{Robust univariate location and scale M-estimators} 7 | \usage{ 8 | locScaleM(x, psi = "mopt", eff = 0.95, maxit = 50, tol = 1e-04, na.rm = FALSE) 9 | } 10 | \arguments{ 11 | \item{x}{a vector of univariate observations} 12 | 13 | \item{psi}{a string indicating which score function to use. Valid options are "bisquare", "huber", 14 | "opt" and "mopt".} 15 | 16 | \item{eff}{desired asymptotic efficiency. Valid options are 0.85, 0.9 and 0.95 (default) when 17 | \code{psi} = "bisquare" or "huber", and 0.85, 0.9, 0.95 (default) and 0.99 when 18 | \code{psi} = "opt" or "mopt".} 19 | 20 | \item{maxit}{maximum number of iterations allowed.} 21 | 22 | \item{tol}{tolerance to decide convergence of the iterative algorithm.} 23 | 24 | \item{na.rm}{a logical value indicating whether \code{NA} values should be stripped before 25 | the computation proceeds. Defaults to \code{FALSE}} 26 | } 27 | \value{ 28 | A list with the following components: 29 | \item{mu}{The location estimate} 30 | \item{std.mu}{Estimated standard deviation of the location estimator \code{mu}} 31 | \item{disper}{M-scale/dispersion estimate} 32 | } 33 | \description{ 34 | This function computes M-estimators for location and scale. 35 | } 36 | \details{ 37 | This function computes M-estimators for location and scale. 38 | } 39 | \examples{ 40 | set.seed(123) 41 | r <- rnorm(150, sd=1.5) 42 | locScaleM(r) 43 | # 10\% of outliers, sd of good points is 1.5 44 | set.seed(123) 45 | r2 <- c(rnorm(135, sd=1.5), rnorm(15, mean=-10, sd=.5)) 46 | locScaleM(r2) 47 | 48 | } 49 | \references{ 50 | \url{http://www.wiley.com/go/maronna/robust} 51 | } 52 | \author{ 53 | Ricardo Maronna, \email{rmaronna@retina.ar} 54 | } 55 | -------------------------------------------------------------------------------- /man/fastmve.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/fastmve.R 3 | \name{fastmve} 4 | \alias{fastmve} 5 | \title{Minimum Volume Ellipsoid covariance estimator} 6 | \usage{ 7 | fastmve(x, nsamp = 500) 8 | } 9 | \arguments{ 10 | \item{x}{data matrix (n x p) with cases stored in rows.} 11 | 12 | \item{nsamp}{number of random starts for the iterative algorithm, these 13 | are constructed using subsamples of the data.} 14 | } 15 | \value{ 16 | A list with the following components: 17 | \item{center}{a vector with the robust multivariate location estimate} 18 | \item{cov}{a matrix with the robust covariance / scatter matrix estimate} 19 | \item{scale}{A scalar that equals the median of the mahalanobis distances of the 20 | data to the \code{center}, multiplied by the determinant of the covariance matrix 21 | to the power 1/p} 22 | \item{best}{Indices of the observations that correspond to the MVE estimator} 23 | \item{nsamp}{Number of random starts used for the iterative algorithm} 24 | \item{nsing}{Number of random subsamples (among the \code{nsamp} attempted) 25 | that failed (resulting in singular initial values)} 26 | } 27 | \description{ 28 | This function uses a fast algorithm to compute the Minimum Volume 29 | Ellipsoid (MVE) for multivariate location and scatter. 30 | } 31 | \details{ 32 | This function computes the Minimum Volume 33 | Ellipsoid (MVE) for multivariate location and scatter, using a 34 | fast algorithm related to the fast algorithm for S-regression 35 | estimators (see \code{\link[robustbase]{lmrob}}). 36 | } 37 | \examples{ 38 | data(bus) 39 | X0 <- as.matrix(bus) 40 | X1 <- X0[,-9] 41 | tmp <- fastmve(X1) 42 | round(tmp$cov[1:10, 1:10], 3) 43 | tmp$center 44 | 45 | } 46 | \references{ 47 | \url{http://www.wiley.com/go/maronna/robust} 48 | } 49 | \author{ 50 | Matias Salibian-Barrera, \email{matias@stat.ubc.ca} 51 | } 52 | -------------------------------------------------------------------------------- /inst/scripts/identAR2.R: -------------------------------------------------------------------------------- 1 | # identAR2.R 2 | # Example 8.3 3 | # Figure 8.7, 8.8 4 | 5 | # Must install robustarima 6 | library(robustarima) 7 | 8 | set.seed(700) 9 | n.innov = 300 10 | n = 200 11 | phi=c(4/3, -5/6 ) 12 | 13 | n.start = n.innov - n 14 | innov = rnorm(n.innov) 15 | 16 | x= arima.sim(model = list(ar = phi), n, innov = innov, n.start = n.start) 17 | ao = ifelse(runif(n)>.1, 0, rnorm(n,4,1)) 18 | ao = sign(runif(n,-1,1))*ao 19 | y = x + ao 20 | 21 | no=sum(ao!=0) # Number of additive outliers in y 22 | 23 | #Figure 8.7 24 | par(mfrow=c(2,1)) 25 | plot(x, ylab=expression(x[t]),ylim=c(-9,9)) 26 | plot(y, ylab=expression(y[t]),ylim=c(-9,9)) 27 | ao.times = (1:n)[ao != 0] 28 | points(ao.times, y[ao != 0]) 29 | par(mfrow=c(1,1)) 30 | 31 | # Robust automatic AR(p) fit 32 | out=arima.rob(y~1, auto.ar=TRUE) 33 | 34 | # Warning message, that optimization in arima.rob did not converge, 35 | # did not effect the following code results 36 | 37 | #Figure 8.8 38 | par(mfrow=c(4,2)) 39 | acf1=acf(x ,10, plot=FALSE ) 40 | plot(acf1, main="acf of x") 41 | acf2=acf(x,10,"partial", plot=FALSE) 42 | plot(acf2, main="pcf of x") 43 | acf3=acf(y,10, plot=FALSE) 44 | plot(acf3, main="acf of y") 45 | acf4=acf(y,10,"partial", plot=FALSE) 46 | plot(acf4, main="pcf of y") 47 | 48 | # Procedure (A), out$y.robust is the series filtered y 49 | acf5=acf(out$y.robust,10, plot=FALSE) 50 | plot(acf5, main="acf based on procedure (a)") 51 | acf6=acf(out$y.robust,10,"partial", plot=FALSE) 52 | plot(acf6, main="pcf based on procedure (a)") 53 | 54 | # Procedure (B) 55 | tank1=ARMAacf(ar = out$model$ar, lag.max = 10, pacf = FALSE) 56 | tank2=ARMAacf(ar = out$model$ar, lag.max = 10, pacf = TRUE) 57 | acf7=acf5 58 | acf8=acf6 59 | acf7$acf[,,1]=as.matrix(tank1) 60 | plot(acf7, main="acf based on procedure (b)") 61 | acf8$acf[,,1]=as.matrix(tank2) 62 | plot(acf8, main="pcf based on procedure (b)") 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: RobStatTM 2 | Version: 1.0.12 3 | Date: 2025-03-11 4 | Title: Robust Statistics: Theory and Methods 5 | Authors@R: c(person("Matias", "Salibian-Barrera", role = c("cre"), email = "matias@stat.ubc.ca"), 6 | person("Victor", "Yohai", role = "aut", email = "vyohai@gmail.com"), 7 | person("Ricardo", "Maronna", role="aut", email= "rmaronna@retina.ar"), 8 | person("Doug", "Martin", role="aut", email="martinrd3d@gmail.com"), 9 | person("Gregory", "Brownson", role="aut", email="gregory.brownson@gmail.com", comment="ShinyUI"), 10 | person("Kjell", "Konis", role="aut", email="kjellk@gmail.com"), 11 | person("Kjell", "Konis", role="cph", email="kjellk@gmail.com", comment="erfi"), 12 | person("Christophe", "Croux", role="ctb", email="christophe.croux@edhec.edu", comment="WBYlogreg, BYlogreg"), 13 | person("Gentiane", "Haesbroeck", role="ctb", email="G.Haesbroeck@uliege.be", comment="WBYlogreg, BYlogreg"), 14 | person("Martin", "Maechler", role="cph", email="maechler@stat.math.ethz.ch", comment="lmrob.fit, lmrob..M..fit, lmrob.S"), 15 | person("Manuel", "Koller", role="cph", email="koller.manuel@gmail.com", comment="lmrob.fit, .vcov.avar1, lmrob.S, lmrob.lar"), 16 | person("Matias", "Salibian-Barrera", role="aut", email="matias@stat.ubc.ca") 17 | ) 18 | Description: Companion package for the book: "Robust Statistics: Theory and Methods, second edition", . This package contains code that implements the robust estimators discussed in the recent second edition of the book above, as well as the scripts reproducing all the examples in the book. 19 | Depends: R (>= 3.5.0) 20 | Imports: stats, pyinit, rrcov, robustbase 21 | Suggests: R.rsp 22 | LazyData: yes 23 | License: GPL (>= 3) 24 | RoxygenNote: 7.3.2 25 | Encoding: UTF-8 26 | VignetteBuilder: R.rsp 27 | -------------------------------------------------------------------------------- /man/MMultiSHR.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Multirobu.R 3 | \name{covRobMM} 4 | \alias{covRobMM} 5 | \alias{MMultiSHR} 6 | \title{MM robust multivariate location and scatter estimator} 7 | \usage{ 8 | covRobMM(X, maxit = 50, tolpar = 1e-04, corr = FALSE) 9 | } 10 | \arguments{ 11 | \item{X}{a data matrix with observations in rows.} 12 | 13 | \item{maxit}{Maximum number of iterations.} 14 | 15 | \item{tolpar}{Tolerance to decide converngence.} 16 | 17 | \item{corr}{A logical value. If \code{TRUE} a correlation matrix is included in the element \code{cor} of the returned object. Defaults to \code{FALSE}.} 18 | } 19 | \value{ 20 | A list with class \dQuote{covRob} containing the following elements 21 | \item{center}{The location estimate.} 22 | \item{cov}{The scatter matrix estimate, scaled for consistency at the normal distribution. Same as \code{V} above.} 23 | \item{cor}{The correlation matrix estimate, if the argument \code{cor} equals \code{TRUE}. Otherwise it is set to \code{NULL}.} 24 | \item{dist}{Robust Mahalanobis distances} 25 | \item{wts}{weights} 26 | \item{call}{an image of the call that produced the object with all the arguments named. The matched call.} 27 | \item{mu}{The location estimate. Same as \code{center} above.} 28 | \item{V}{The scatter or correlation matrix estimate, scaled for consistency at the normal distribution} 29 | } 30 | \description{ 31 | This function computes an MM robust estimator for multivariate location and scatter with the "SHR" loss function. 32 | } 33 | \details{ 34 | This function computes an MM robust estimator for multivariate location and scatter with the "SHR" loss function. 35 | } 36 | \examples{ 37 | data(bus) 38 | X0 <- as.matrix(bus) 39 | X1 <- X0[,-9] 40 | tmp <- covRobMM(X1) 41 | round(tmp$cov[1:10, 1:10], 3) 42 | tmp$mu 43 | 44 | } 45 | \references{ 46 | \url{http://www.wiley.com/go/maronna/robust} 47 | } 48 | \author{ 49 | Ricardo Maronna, \email{rmaronna@retina.ar} 50 | } 51 | -------------------------------------------------------------------------------- /man/BYlogreg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/BYlogreg.R 3 | \name{logregBY} 4 | \alias{logregBY} 5 | \alias{BYlogreg} 6 | \title{Bianco and Yohai estimator for logistic regression} 7 | \usage{ 8 | logregBY(x0, y, intercept = 1, const = 0.5, kmax = 1000, maxhalf = 10) 9 | } 10 | \arguments{ 11 | \item{x0}{matrix of explanatory variables;} 12 | 13 | \item{y}{vector of binomial responses (0 or 1);} 14 | 15 | \item{intercept}{1 or 0 indicating if an intercept is included or or not} 16 | 17 | \item{const}{tuning constant used in the computation of the estimator (default=0.5);} 18 | 19 | \item{kmax}{maximum number of iterations before convergence (default=1000);} 20 | 21 | \item{maxhalf}{max number of step-halving (default=10).} 22 | } 23 | \value{ 24 | A list with the following components: 25 | \item{coefficients}{estimates for the regression coefficients} 26 | \item{standard.deviation}{standard deviations of the coefficients} 27 | \item{fitted.values}{fitted values} 28 | \item{residual.deviances}{residual deviances} 29 | \item{components}{logical value indicating whether convergence was achieved} 30 | \item{objective}{value of the objective function at the minimum} 31 | } 32 | \description{ 33 | This function computes the M-estimator proposed by Bianco and Yohai for 34 | logistic regression. By default, an intercept term is included and p 35 | parameters are estimated. Modified by Yohai (2018) to take as initial estimator 36 | a weighted ML estimator with weights derived from the MCD estimator. 37 | For more details we refer to Croux, C., and Haesbroeck, G. (2002), 38 | "Implementing the Bianco and Yohai estimator for Logistic Regression" 39 | } 40 | \examples{ 41 | data(skin) 42 | Xskin <- as.matrix( skin[, 1:2] ) 43 | yskin <- skin$vasoconst 44 | skinBY <- logregBY(Xskin, yskin, intercept=1) 45 | skinBY$coeff 46 | skinBY$standard.deviation 47 | 48 | } 49 | \references{ 50 | \url{http://www.wiley.com/go/maronna/robust} 51 | } 52 | \author{ 53 | Christophe Croux, Gentiane Haesbroeck, Victor Yohai 54 | } 55 | -------------------------------------------------------------------------------- /R/breslow.dat.R: -------------------------------------------------------------------------------- 1 | #' Breslow Data 2 | #' 3 | #' Patients suffering from simple or complex partial seizures 4 | #' were randomized to receive either the antiepileptic drug 5 | #' progabide or a placebo. At each of four successive post 6 | #' randomization clinic visits, the number of seizures occuring 7 | #' over the previous two weeks was reported. 8 | #' 9 | #' Description: A data frame with 59 observations on the 10 | #' following 12 variables: \code{ID}: an integer value 11 | #' specifying the patient identification number; \code{Y1}: 12 | #' an integer value, the number of seizures during the first 13 | #' two week period; \code{Y2}: an integer value, the number of 14 | #' seizures during the second two week period; \code{Y3}: an integer 15 | #' value, the number of seizures during the third two week period. 16 | #' \code{Y4}: an integer value, the number of seizures during the 17 | #' fourth two week period; \code{Base}: an integer value giving 18 | #' the eight-week baseline seizure count; \code{Age}: an integer 19 | #' value giving the age of the parient in years; \code{Trt}: 20 | #' the treatment: a factor with levels placebo and progabide; 21 | #' \code{Ysum}: an integer value, the sum of Y1, Y2, Y3 and Y4; 22 | #' \code{sumY}: an integer value, the sum of Y1, Y2, Y3 and Y4; 23 | #' \code{Age10}: a numeric value, Age divided by 10; \code{Base4}: 24 | #' a numeric value, Base divided by 4. 25 | #' 26 | #' Format: Numeric, 59 rows and 12 columns. 27 | #' 28 | #' @docType data 29 | #' 30 | #' @usage data(breslow.dat) 31 | #' 32 | #' @format An object of class \code{"data.frame"}. 33 | #' 34 | #' @source Breslow, N. E., and Clayton, D. G. (1993), "Approximate 35 | #' Inference in Generalized Linear Mixed Models," Journal of the 36 | #' American Statistical Association, Vol. 88, No. 421, pp. 9-25. 37 | #' 38 | #' Thrall, P. F., and Vail, S. C. (1990), "Some Covariance Models 39 | #' for Longitudinal Count Data With Overdispersion," Biometrics, 40 | #' Vol. 46, pp. 657-671. 41 | #' 42 | #' @examples 43 | #' data(breslow.dat) 44 | "breslow.dat" 45 | -------------------------------------------------------------------------------- /src/erfz.c: -------------------------------------------------------------------------------- 1 | #include "Rinternals.h" 2 | #include "Rmath.h" 3 | #include 4 | #include "RobStatTM.h" 5 | 6 | SEXP R_erfi(SEXP x) 7 | { 8 | SEXP ret = R_NilValue; 9 | PROTECT(ret = allocVector(REALSXP, 1)); 10 | x = PROTECT(coerceVector(x, REALSXP)); 11 | REAL(ret)[0] = (double) erfi((double complex) REAL(x)[0]); 12 | UNPROTECT(2); 13 | return(ret); 14 | } 15 | 16 | 17 | double complex erfz(double complex z) 18 | /******************************************************************************* 19 | 20 | Evalutaes the error function for a complex argument. 21 | 22 | Licensing: None 23 | 24 | Modified: 25 | 26 | 26 July 2017 27 | 28 | Author: 29 | 30 | Kjell P. Konis 31 | 32 | Algorithm Reference: 33 | Shanjie Zhang and Jianming Jin, 34 | Computation of Special Functions, 35 | Wiley, 1996, 36 | ISBN: 0-471-11963-6, 37 | 38 | *******************************************************************************/ 39 | { 40 | double a0 = 0.0; 41 | double complex c0 = 0.0 + 0.0*I, 42 | cer = 0.0 + 0.0*I, 43 | cl = 0.0 + 0.0*I, 44 | cr = 0.0 + 0.0*I, 45 | cs = 0.0 + 0.0*I, 46 | z1 = 0.0 + 0.0*I; 47 | int k = -1; 48 | 49 | a0 = cabs(z); 50 | c0 = cexp(-z*z); 51 | z1 = z; 52 | 53 | if(creal(z) < 0.0 ) 54 | z1 = -z; 55 | 56 | if(a0 <= 5.8) { 57 | 58 | cs = z1; 59 | cr = z1; 60 | for(k = 1; k <= 120; k++) { 61 | cr = cr * z1 * z1 / (0.5 + k); 62 | cs = cs + cr; 63 | if(cabs(cr/cs) < 1.0e-15) 64 | break; 65 | } 66 | 67 | cer = c0 * cs * M_2_SQRTPI; 68 | 69 | } else { 70 | 71 | cl = 1.0 / z1; 72 | cr = cl; 73 | for(k = 1; k <= 13; k++) { 74 | cr = -cr * (k - 0.5) / (z1*z1); 75 | cl = cl + cr; 76 | if(cabs(cr/cl) < 1.0e-15) 77 | break; 78 | } 79 | 80 | cer = 1.0 - c0 * cl / M_SQRT_PI; 81 | 82 | } 83 | 84 | if(creal(z) < 0.0) 85 | cer = -cer; 86 | 87 | return(cer); 88 | } 89 | 90 | 91 | double complex erfi(double complex z) 92 | { 93 | return(-I*erfz(I*z)); 94 | } 95 | 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /man/breslow.dat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/breslow.dat.R 3 | \docType{data} 4 | \name{breslow.dat} 5 | \alias{breslow.dat} 6 | \title{Breslow Data} 7 | \format{ 8 | An object of class \code{"data.frame"}. 9 | } 10 | \source{ 11 | Breslow, N. E., and Clayton, D. G. (1993), "Approximate 12 | Inference in Generalized Linear Mixed Models," Journal of the 13 | American Statistical Association, Vol. 88, No. 421, pp. 9-25. 14 | 15 | Thrall, P. F., and Vail, S. C. (1990), "Some Covariance Models 16 | for Longitudinal Count Data With Overdispersion," Biometrics, 17 | Vol. 46, pp. 657-671. 18 | } 19 | \usage{ 20 | data(breslow.dat) 21 | } 22 | \description{ 23 | Patients suffering from simple or complex partial seizures 24 | were randomized to receive either the antiepileptic drug 25 | progabide or a placebo. At each of four successive post 26 | randomization clinic visits, the number of seizures occuring 27 | over the previous two weeks was reported. 28 | } 29 | \details{ 30 | Description: A data frame with 59 observations on the 31 | following 12 variables: \code{ID}: an integer value 32 | specifying the patient identification number; \code{Y1}: 33 | an integer value, the number of seizures during the first 34 | two week period; \code{Y2}: an integer value, the number of 35 | seizures during the second two week period; \code{Y3}: an integer 36 | value, the number of seizures during the third two week period. 37 | \code{Y4}: an integer value, the number of seizures during the 38 | fourth two week period; \code{Base}: an integer value giving 39 | the eight-week baseline seizure count; \code{Age}: an integer 40 | value giving the age of the parient in years; \code{Trt}: 41 | the treatment: a factor with levels placebo and progabide; 42 | \code{Ysum}: an integer value, the sum of Y1, Y2, Y3 and Y4; 43 | \code{sumY}: an integer value, the sum of Y1, Y2, Y3 and Y4; 44 | \code{Age10}: a numeric value, Age divided by 10; \code{Base4}: 45 | a numeric value, Base divided by 4. 46 | 47 | Format: Numeric, 59 rows and 12 columns. 48 | } 49 | \examples{ 50 | data(breslow.dat) 51 | } 52 | \keyword{datasets} 53 | -------------------------------------------------------------------------------- /man/lsRobTestMM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lsRobTestMM.R 3 | \name{lsRobTestMM} 4 | \alias{lsRobTestMM} 5 | \title{Test for Least Squares Bias Using Robust MM Regressions} 6 | \usage{ 7 | lsRobTestMM(object, test = c("T", "T0"), ...) 8 | } 9 | \arguments{ 10 | \item{object}{An MM regression fitted model whose class is *lmrobdetMM*.} 11 | 12 | \item{test}{A character vector indicating which of two type of tests "T" or 13 | "T0: are used, with type "T" the default.} 14 | 15 | \item{...}{Pass through parameters} 16 | } 17 | \value{ 18 | A list with component names coefs, full, test, efficiency 19 | } 20 | \description{ 21 | Test for Least Squares Bias Using Robust MM Regressions 22 | } 23 | \details{ 24 | The original version of \code{lsRobTestMM} is the \code{lsRobTest} 25 | in the package *robust*. The function \code{lsRobTest} had options *T1* and 26 | *T2*. However, we only recommend *T2*, and deprecate *T1*. Accordingly we 27 | use *T* for the former *T2*, and use *T0* for the former *T1*, and we 28 | deprecate *T0*. 29 | 30 | The *coefs* component of the list is a matrix whose row names are 31 | the names of the regression predictor variables, and whose columns *LS*, 32 | *Robust*, *Delta*, *Std.error*, *t-stat*, *p-value* contain respectively, 33 | the least squares and robust coefficient estimates, the differences in the 34 | coefficient estimates, the standard errors of the differences, the resulting 35 | t-statistic values, and the resulting z-test p-values. 36 | 37 | The *full* component of the list is itself a list with components the full 38 | model quadratic form chi-squared statistic value (*stat*), the degrees of 39 | freedom (*df*), and the full model p value (*p.value*). 40 | 41 | The *test* component of the list is a character value indicating which of the 42 | tests *T* and *T0* has been computed. 43 | 44 | The *efficiency* component of the list is *NULL* when test *T* has been used, 45 | and is equal to the normal distribution efficiency of the *lmrobdetMM* 46 | estimate when test *T0* has been used. 47 | } 48 | \examples{ 49 | args(lsRobTestMM) 50 | } 51 | \author{ 52 | Kjell Konis 53 | } 54 | -------------------------------------------------------------------------------- /man/WBYlogreg.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/WBYlogreg.R 3 | \name{logregWBY} 4 | \alias{logregWBY} 5 | \alias{WBYlogreg} 6 | \title{Bianco and Yohai estimator for logistic regression} 7 | \usage{ 8 | logregWBY(x0, y, intercept = 1, const = 0.5, kmax = 1000, maxhalf = 10) 9 | } 10 | \arguments{ 11 | \item{x0}{matrix of explanatory variables;} 12 | 13 | \item{y}{vector of binomial responses (0 or 1);} 14 | 15 | \item{intercept}{1 or 0 indicating if an intercept is included or or not} 16 | 17 | \item{const}{tuning constant used in the computation of the estimator (default=0.5);} 18 | 19 | \item{kmax}{maximum number of iterations before convergence (default=1000);} 20 | 21 | \item{maxhalf}{max number of step-halving (default=10).} 22 | } 23 | \value{ 24 | A list with the following components: 25 | \item{coefficients}{estimates for the regression coefficients} 26 | \item{standard.deviation}{standard deviations of the coefficients} 27 | \item{fitted.values}{fitted values} 28 | \item{residual.deviances}{residual deviances} 29 | \item{components}{logical value indicating whether convergence was achieved} 30 | \item{objective}{value of the objective function at the minimum} 31 | } 32 | \description{ 33 | This function computes the weighted M-estimator of Bianco and Yohai in logistic regression. 34 | By default, an intercept term is included and p parameters are estimated. Modified by 35 | Yohai (2018) to take as initial estimator a weighted ML estimator computed with weights 36 | derived from the MCD estimator of the continuous explanatory variables. The same weights 37 | are used to compute the final weighted M-estimator. For more details we refer to 38 | Croux, C., and Haesbroeck, G. (2002), "Implementing the Bianco and Yohai estimator for 39 | Logistic Regression" 40 | } 41 | \examples{ 42 | data(skin) 43 | Xskin <- as.matrix( skin[, 1:2] ) 44 | yskin <- skin$vasoconst 45 | skinWBY <- logregWBY(Xskin, yskin, intercept=1) 46 | skinWBY$coeff 47 | skinWBY$standard.deviation 48 | 49 | } 50 | \references{ 51 | \url{http://www.wiley.com/go/maronna/robust} 52 | } 53 | \author{ 54 | Christophe Croux, Gentiane Haesbroeck, Victor Yohai 55 | } 56 | -------------------------------------------------------------------------------- /man/refine.sm.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lmrobdet.R 3 | \name{refine.sm} 4 | \alias{refine.sm} 5 | \title{IRWLS iterations for S- or M-estimators} 6 | \usage{ 7 | refine.sm( 8 | x, 9 | y, 10 | initial.beta, 11 | initial.scale, 12 | k = 50, 13 | conv = 1, 14 | b, 15 | cc, 16 | family, 17 | step = "M", 18 | tol 19 | ) 20 | } 21 | \arguments{ 22 | \item{x}{design matrix} 23 | 24 | \item{y}{vector of responses} 25 | 26 | \item{initial.beta}{vector of initial regression estimates} 27 | 28 | \item{initial.scale}{initial residual scale estimate. If missing the (scaled) median of 29 | the absolute residuals is used.} 30 | 31 | \item{k}{maximum number of refining steps to be performed} 32 | 33 | \item{conv}{an integer indicating whether to check for convergence (1) at each step, 34 | or to force running k steps (0)} 35 | 36 | \item{b}{tuning constant for the M-scale estimator, used if iterations are for an S-estimator.} 37 | 38 | \item{cc}{tuning constant for the rho function.} 39 | 40 | \item{family}{string specifying the name of the family of loss function to be used (current 41 | valid options are "bisquare", "opt" and "mopt")} 42 | 43 | \item{step}{a string indicating whether the iterations are to compute an S-estimator 44 | ('S') or an M-estimator ('M')} 45 | 46 | \item{tol}{tolerance to detect convergence (relative difference of consecutive vectors of parameters)} 47 | } 48 | \value{ 49 | A list with the following components: 50 | \item{beta.rw}{The updated vector of regression coefficients} 51 | \item{scale.rw}{The corresponding estimated residual scale} 52 | \item{converged}{A logical value indicating whether the algorithm 53 | converged} 54 | } 55 | \description{ 56 | This function performs iterative improvements for S- or 57 | M-estimators. 58 | } 59 | \details{ 60 | This function performs iterative improvements for S- or 61 | M-estimators. Both iterations are formally the same, the 62 | only difference is that for M-iterations the residual 63 | scale estimate remains fixed, while for S-iterations 64 | it is updated at each step. In this case, we follow 65 | the Fast-S algorithm of Salibian-Barrera and Yohai 66 | an use one step updates for the M-scale, as opposed 67 | to a full computation. This as internal function. 68 | } 69 | \author{ 70 | Matias Salibian-Barrera, \email{matias@stat.ubc.ca}. 71 | } 72 | -------------------------------------------------------------------------------- /R/fastmve.R: -------------------------------------------------------------------------------- 1 | #' Minimum Volume Ellipsoid covariance estimator 2 | #' 3 | #' This function uses a fast algorithm to compute the Minimum Volume 4 | #' Ellipsoid (MVE) for multivariate location and scatter. 5 | #' 6 | #' This function computes the Minimum Volume 7 | #' Ellipsoid (MVE) for multivariate location and scatter, using a 8 | #' fast algorithm related to the fast algorithm for S-regression 9 | #' estimators (see \code{\link[robustbase]{lmrob}}). 10 | #' 11 | #' @param x data matrix (n x p) with cases stored in rows. 12 | #' @param nsamp number of random starts for the iterative algorithm, these 13 | #' are constructed using subsamples of the data. 14 | #' 15 | #' @return A list with the following components: 16 | #' \item{center}{a vector with the robust multivariate location estimate} 17 | #' \item{cov}{a matrix with the robust covariance / scatter matrix estimate} 18 | #' \item{scale}{A scalar that equals the median of the mahalanobis distances of the 19 | #' data to the \code{center}, multiplied by the determinant of the covariance matrix 20 | #' to the power 1/p} 21 | #' \item{best}{Indices of the observations that correspond to the MVE estimator} 22 | #' \item{nsamp}{Number of random starts used for the iterative algorithm} 23 | #' \item{nsing}{Number of random subsamples (among the \code{nsamp} attempted) 24 | #' that failed (resulting in singular initial values)} 25 | #' 26 | #' @author Matias Salibian-Barrera, \email{matias@stat.ubc.ca} 27 | #' @references \url{http://www.wiley.com/go/maronna/robust} 28 | #' 29 | #' @examples 30 | #' data(bus) 31 | #' X0 <- as.matrix(bus) 32 | #' X1 <- X0[,-9] 33 | #' tmp <- fastmve(X1) 34 | #' round(tmp$cov[1:10, 1:10], 3) 35 | #' tmp$center 36 | #' 37 | #' @export 38 | fastmve <- function(x, nsamp=500) { 39 | n <- nrow(x) 40 | p <- ncol(x) 41 | n2 <- floor(n/2) 42 | nind <- p +1 43 | tmp <- .C('r_fast_mve', as.double(x), 44 | as.integer(n), as.integer(p), as.integer(nsamp), 45 | nsing = as.integer(0), ctr = as.double(rep(0,p)), 46 | cov = as.double(rep(0,p*p)), 47 | scale = as.double(0), best=as.integer(rep(0,n)), 48 | as.integer(nind), as.integer(n2), PACKAGE='RobStatTM') 49 | mve.cov <- matrix(tmp$cov, p, p) 50 | return(list(center= tmp$ctr, cov=mve.cov, scale=tmp$scale, 51 | best=tmp$best[1:floor(n/2)], 52 | nsamp=nsamp, nsing = tmp$nsing)) 53 | } 54 | 55 | -------------------------------------------------------------------------------- /man/drop1.lmrobdetMM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RFPE.R 3 | \name{drop1.lmrobdetMM} 4 | \alias{drop1.lmrobdetMM} 5 | \title{RFPE of submodels of an \code{\link{lmrobdetMM}} fit} 6 | \usage{ 7 | \method{drop1}{lmrobdetMM}(object, scope, scale, keep, ...) 8 | } 9 | \arguments{ 10 | \item{object}{the \code{MM} element (of class \code{lmrob}) in an object of class \code{\link{lmrobdetMM}}.} 11 | 12 | \item{scope}{an optional \code{formula} giving the terms to be considered for dropping. Typically 13 | this argument is omitted, in which case all possible terms are dropped (without breaking hierarchy 14 | rules). The \code{scope} can also be a character vector of term labels. If the argument is supplied as a 15 | formula, any \code{.} is interpreted relative to the formula implied by the \code{object} argument.} 16 | 17 | \item{scale}{an optional residual scale estimate. If missing the residual 18 | scale estimate in \code{object} is used.} 19 | 20 | \item{keep}{a character vector of names of components that should be saved for each subset model. 21 | Only names from the set \code{"coefficients"}, \code{"fitted"} and \code{"residuals"} 22 | are allowed. If \code{keep == TRUE}, the complete set is saved. The default behavior is 23 | not to keep anything.} 24 | 25 | \item{\dots}{additional parameters to match generic method \code{drop1}} 26 | } 27 | \value{ 28 | An anova object consisting of the term labels, the degrees of freedom, and Robust Final 29 | Prediction Errors (RFPE) for each subset model. If \code{keep} is missing, the anova object is 30 | returned. If \code{keep} is present, a list with components \code{"anova"} and \code{"keep"} is returned. 31 | In this case, the \code{"keep"} component is a matrix of mode \code{"list"}, with a column for each 32 | subset model, and a row for each component kept. 33 | } 34 | \description{ 35 | This function computes the RFPE for the MM-estimators obtained with \code{\link{lmrobdetMM}} by 36 | recomputing it, successively removing each of a number of specified terms. 37 | It is used internally by \code{\link{step.lmrobdetMM}} and not meant to be used 38 | directly. 39 | } 40 | \references{ 41 | \url{http://www.wiley.com/go/maronna/robust} 42 | } 43 | \seealso{ 44 | \code{\link{lmrobdetMM}} 45 | } 46 | \author{ 47 | Victor Yohai, \email{victoryohai@gmail.com}, Matias Salibian-Barrera, \email{matias@stat.ubc.ca} 48 | } 49 | -------------------------------------------------------------------------------- /inst/scripts/oats.R: -------------------------------------------------------------------------------- 1 | # oats.R 2 | # EXAMPLE 4.2 3 | # Figures 4.2, 4.4 4 | 5 | library(RobStatTM) 6 | data(oats) 7 | 8 | ## LS regression 9 | oats1LS <- lm(response1 ~ variety+block, data=oats) 10 | oats1LS_var <- lm(response1 ~ block, data=oats) 11 | oats1LS_block <- lm(response1 ~ variety, data=oats) 12 | oats2LS <- lm(response2 ~ variety+block, data=oats) 13 | oats2LS_var <- lm(response2 ~ block, data=oats) 14 | oats2LS_block <- lm(response2 ~ variety, data=oats) 15 | sigma1LS <- summary(oats1LS)$sigma 16 | sigma2LS <- summary(oats2LS)$sigma 17 | 18 | ## Classical ANOVA tests 19 | anov1_var <- anova(oats1LS, oats1LS_var) 20 | anov1_block <- anova(oats1LS, oats1LS_block) 21 | anov2_var <- anova(oats2LS, oats2LS_var) 22 | anov2_block <- anova(oats2LS, oats2LS_block) 23 | 24 | ## M regressions 25 | cont <- lmrobdet.control(bb = 0.5, efficiency = 0.85, family = "bisquare") 26 | oats1M <- lmrobM(response1 ~ variety+block, control=cont, data=oats) 27 | oats1M_var <- lmrobM(response1 ~ block, control=cont, data=oats) 28 | oats1M_block <- lmrobM(response1 ~ variety, control=cont, data=oats) 29 | oats2M <- lmrobM(response2 ~ variety+block, control=cont, data=oats) 30 | oats2M_var <- lmrobM(response2 ~ block, control=cont, data=oats) 31 | oats2M_block <- lmrobM(response2 ~ variety, control=cont, data=oats) 32 | sM2 <- oats2M$scale 33 | 34 | ## Robust ANOVA tests 35 | anov1M_var <- rob.linear.test(oats1M, oats1M_var) 36 | anov1M_block <- rob.linear.test(oats1M, oats1M_block) 37 | anov2M_var <- rob.linear.test(oats2M, oats2M_var) 38 | anov2M_block <- rob.linear.test(oats2M, oats2M_block) 39 | 40 | plot(oats2LS, which=2) 41 | abline(h=0, lty=2) 42 | 43 | 44 | tmp <- qqnorm(resid(oats2M)/sM2, ylab="Standardized residuals", pch=19, col='gray30') 45 | qqline(resid(oats2M)/sM2) 46 | abline(h=c(-2.5, 0, 2.5), lty=2) 47 | w <- c(24,36,1,35,20) 48 | text(tmp$x[w] + .1, tmp$y[w] + .1, w) 49 | 50 | 51 | A=matrix(0,2,4); A[1,]= c(anov1_var[2,6], anov1M_var$F.pvalue, anov1_block[2,6], anov1M_block$F.pvalue) 52 | 53 | A[2,]= c(anov2_var[2,6], anov2M_var$F.pvalue, anov2_block[2,6], anov2M_block$F.pvalue) 54 | rownames(A)=c("Original", "Altered") 55 | colnames(A)=c("F(rows)", "Robust(rows)", "F(cols)", "Robust(cols)") 56 | 57 | "classical and robust p-values of ANOVA tests" 58 | A 59 | 60 | # Comment: Due to changes in the codes that were made after the book's printing, 61 | # not all p-values coincide with those in the example in the book. 62 | 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /R/INVTR2.R: -------------------------------------------------------------------------------- 1 | #' Robust R^2 coefficient of determination 2 | #' 3 | #' This function computes a robust version of the R^2 coefficient of determination. 4 | #' It is used internally by \code{\link{lmrobdetMM}}, 5 | #' and not meant to be used directly. 6 | #' 7 | #' This function computes a robust version of the R^2 coefficient. 8 | #' It is used internally by \code{\link{lmrobdetMM}}, 9 | #' and not meant to be used directly. 10 | #' 11 | #' @param RR2 the proportional difference in loss functions (a naive robust R^2 coefficient). 12 | #' @param family family string specifying the name of the family of loss function to be used (current valid 13 | #' options are "bisquare", "opt" and "mopt"). 14 | #' @param cc tuning parameters to be computed according to efficiency and / or breakdown 15 | #' considerations. See \link{lmrobdet.control}, \link{bisquare}, \link{mopt} 16 | #' and \link{opt}. 17 | 18 | #' 19 | #' @return An unbiased version of the robust R^2 coefficient of determination. 20 | #' 21 | #' @rdname INVTR2 22 | #' @author Victor Yohai, \email{victoryohai@gmail.com} 23 | #' @references \url{http://www.wiley.com/go/maronna/robust} 24 | #' 25 | #' @export 26 | INVTR2 <- function(RR2, family, cc) { 27 | 28 | hh <- function(v, family, cc, z) return( rho(v/z, family=family, cc)*dnorm(v) ) 29 | 30 | TR2 <- function(R2, family, cc) { 31 | a <- Erhobic(family, cc, 1) 32 | b <- Erhobic(family, cc, sqrt(1-R2)) 33 | return( (b-a)/ (b*(1-a)) ) 34 | } 35 | 36 | # compute E(rho(u,cc)), rho is the bisquare function 37 | Erhobic <- function(family, cc, zz) { 38 | if( family == 'bisquare') { 39 | dd <- cc * zz 40 | a0 <- 2*pnorm(dd)-1 41 | a2 <- (-2)*dd*dnorm(dd)+a0 42 | a4 <- (-2)*(dd^3)*dnorm(dd)+3*a2 43 | a6 <- (-2)*(dd^5)*dnorm(dd)+5*a4 44 | ee <- (a6/dd^6)+(3*a2/dd^2)-(3*a4/dd^4)+1-a0 45 | } else { 46 | ee <- 2*(integrate(hh, 0, cc[3]*zz, family=family, cc=cc, z=zz)$value+1-pnorm(cc[3]*zz)) 47 | } 48 | return( ee ) 49 | } 50 | 51 | ff <- function(x, y, family, cc) return( TR2(x, family, cc) - y ) 52 | aa <- TR2(.99999, family, cc) 53 | bb <- TR2(.00001, family, cc) 54 | if( RR2 > .99 ) R2 <- 1 55 | if ( RR2 < bb ) R2 <- 0 56 | if( (RR2 <= .99) & (RR2 >= bb) ) 57 | R2 <- uniroot(ff, c(bb/2, aa+((1-aa)/2)), y=RR2, family=family, cc=cc)$root #R2 <- uniroot(ff,c( .000000001,.999999999),y=RR2,cc=cc)$root 58 | return(R2) 59 | } 60 | 61 | 62 | # INVTR2(0.7367034,3.44) 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /man/covClassic.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Multirobu.R 3 | \name{covClassic} 4 | \alias{covClassic} 5 | \title{Classical Covariance Estimation} 6 | \usage{ 7 | covClassic( 8 | data, 9 | corr = FALSE, 10 | center = TRUE, 11 | distance = TRUE, 12 | na.action = na.fail, 13 | unbiased = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{data}{a numeric matrix or data frame containing the data.} 18 | 19 | \item{corr}{a logical flag. If \code{corr = TRUE} then the estimated correlation matrix is computed.} 20 | 21 | \item{center}{a logical flag or a numeric vector of length \code{p} (where \code{p} is the number of columns of \code{x}) specifying the center. If \code{center = TRUE} then the center is estimated. Otherwise the center is taken to be 0.} 22 | 23 | \item{distance}{a logical flag. If \code{distance = TRUE} the Mahalanobis distances are computed.} 24 | 25 | \item{na.action}{a function to filter missing data. The default \code{na.fail} produces an error if missing values are present. An alternative is \code{na.omit} which deletes observations that contain one or more missing values.} 26 | 27 | \item{unbiased}{a logical flag. If \code{TRUE} the unbiased estimator is returned (computed with denominator equal to \code{n-1}), else the MLE (computed with denominator equal to \code{n}) is returned.} 28 | } 29 | \value{ 30 | a list with class \dQuote{covClassic} containing the following elements: 31 | \item{center}{a numeric vector containing the estimate of the location vector.} 32 | \item{cov}{a numeric matrix containing the estimate of the covariance matrix.} 33 | \item{cor}{a numeric matrix containing the estimate of the correlation matrix if the argument \code{corr = TRUE}. Otherwise it is set to \code{NULL}.} 34 | \item{dist}{a numeric vector containing the squared Mahalanobis distances. Only present if \code{distance = TRUE} in the \code{call}.} 35 | \item{call}{an image of the call that produced the object with all the arguments named. The matched call.} 36 | } 37 | \description{ 38 | Compute an estimate of the covariance/correlation matrix and location 39 | vector using classical methods. 40 | } 41 | \details{ 42 | Its main intention is to return an object compatible to that 43 | produced by \code{\link{covRob}}, but fit using classical methods. 44 | } 45 | \note{ 46 | Originally, and in S-PLUS, this function was called \code{cov}; it has 47 | been renamed, as that did mask the function in the standard package \pkg{stats}. 48 | } 49 | \examples{ 50 | data(wine) 51 | round( covClassic(wine)$cov, 2) 52 | 53 | } 54 | -------------------------------------------------------------------------------- /man/Multirobu.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Multirobu.R 3 | \name{covRob} 4 | \alias{covRob} 5 | \alias{Multirobu} 6 | \title{Robust multivariate location and scatter estimators} 7 | \usage{ 8 | covRob(X, type = "auto", maxit = 50, tol = 1e-04, corr = FALSE) 9 | } 10 | \arguments{ 11 | \item{X}{a data matrix with observations in rows.} 12 | 13 | \item{type}{a string indicating which estimator to compute. Valid options 14 | are "Rocke" for Rocke's S-estimator, "MM" for an MM-estimator with a 15 | SHR rho function, or "auto" (default) which selects "Rocke" if the number 16 | of variables is greater than or equal to 10, and "MM" otherwise.} 17 | 18 | \item{maxit}{Maximum number of iterations, defaults to 50.} 19 | 20 | \item{tol}{Tolerance for convergence, defaults to 1e-4.} 21 | 22 | \item{corr}{A logical value. If \code{TRUE} a correlation matrix is included in the element \code{cor} of the returned object. Defaults to \code{FALSE}.} 23 | } 24 | \value{ 25 | A list with class \dQuote{covClassic} with the following components: 26 | \item{center}{The location estimate.} 27 | \item{cov}{The scatter matrix estimate, scaled for consistency at the normal distribution.} 28 | \item{cor}{The correlation matrix estimate, if the argument \code{cor} equals \code{TRUE}. Otherwise it is set to \code{NULL}.} 29 | \item{dist}{Robust Mahalanobis distances} 30 | \item{wts}{weights} 31 | \item{call}{an image of the call that produced the object with all the arguments named. The matched call.} 32 | \item{mu}{The location estimate. Same as \code{center} above.} 33 | \item{V}{The scatter matrix estimate, scaled for consistency at the normal distribution. Same as \code{cov} above.} 34 | } 35 | \description{ 36 | This function computes robust estimators for multivariate location and scatter. 37 | } 38 | \details{ 39 | This function computes robust estimators for multivariate location and scatter. 40 | The default behaviour (\code{type = "auto"}) computes a "Rocke" estimator 41 | (as implemented in \code{\link{covRobRocke}}) if the number 42 | of variables is greater than or equal to 10, and an MM-estimator with a 43 | SHR rho function (as implemented in \code{\link{covRobMM}}) otherwise. 44 | } 45 | \examples{ 46 | data(bus) 47 | X0 <- as.matrix(bus) 48 | X1 <- X0[,-9] 49 | tmp <- covRob(X1) 50 | round(tmp$cov[1:10, 1:10], 3) 51 | tmp$mu 52 | 53 | } 54 | \references{ 55 | \url{http://www.wiley.com/go/maronna/robust} 56 | } 57 | \seealso{ 58 | \code{\link{covRobRocke}}, \code{\link{covRobMM}} 59 | } 60 | \author{ 61 | Ricardo Maronna, \email{rmaronna@retina.ar} 62 | } 63 | -------------------------------------------------------------------------------- /man/lmrobM.control.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lmrobdet.R 3 | \name{lmrobM.control} 4 | \alias{lmrobM.control} 5 | \title{Tuning parameters for lmrobM} 6 | \usage{ 7 | lmrobM.control( 8 | bb = 0.5, 9 | efficiency = 0.99, 10 | family = "opt", 11 | tuning.chi, 12 | tuning.psi, 13 | max.it = 100, 14 | rel.tol = 1e-07, 15 | mscale_tol = 1e-06, 16 | mscale_maxit = 50, 17 | trace.lev = 0 18 | ) 19 | } 20 | \arguments{ 21 | \item{bb}{tuning constant (between 0 and 1/2) for the M-scale used to compute the residual 22 | scale estimator. Defaults to 0.5.} 23 | 24 | \item{efficiency}{desired asymptotic efficiency of the final regression M-estimator. Defaults to 0.85.} 25 | 26 | \item{family}{string specifying the name of the family of loss function to be used (current valid 27 | options are "bisquare", "opt" and "mopt"). Incomplete entries will be matched to 28 | the current valid options.} 29 | 30 | \item{tuning.chi}{tuning constant for the function used to compute the M-scale 31 | used for the residual scale estimator. If missing, it is computed inside \code{lmrobdet.control} to match 32 | the value of \code{bb} according to the family of rho functions specified in \code{family}.} 33 | 34 | \item{tuning.psi}{tuning parameters for the regression M-estimator computed with a rho function 35 | as specified with argument \code{family}. If missing, it is computed inside \code{lmrobdet.control} to match 36 | the value of \code{efficiency} according to the family of rho functions specified in \code{family}. 37 | Appropriate values for \code{tuning.psi} for a given desired efficiency for Gaussian errors 38 | can be constructed using the functions \link{bisquare}, \link{mopt} and \link{opt}.} 39 | 40 | \item{max.it}{maximum number of IRWLS iterations for the M-estimator} 41 | 42 | \item{rel.tol}{relative covergence tolerance for the IRWLS iterations for the M-estimator} 43 | 44 | \item{mscale_tol}{Convergence tolerance for the M-scale algorithm. See \code{\link{scaleM}}.} 45 | 46 | \item{mscale_maxit}{Maximum number of iterations for the M-scale algorithm. See \code{\link{scaleM}}.} 47 | 48 | \item{trace.lev}{positive values (increasingly) provide details on the progress of the M-algorithm} 49 | } 50 | \value{ 51 | A list with the necessary tuning parameters. 52 | } 53 | \description{ 54 | This function sets tuning parameters for the M estimators of regression implemented 55 | in \code{\link{lmrobM}}. 56 | } 57 | \examples{ 58 | data(coleman, package='robustbase') 59 | m2 <- lmrobM(Y ~ ., data=coleman, control=lmrobM.control()) 60 | m2 61 | summary(m2) 62 | 63 | } 64 | \author{ 65 | Matias Salibian-Barrera, \email{matias@stat.ubc.ca} 66 | } 67 | -------------------------------------------------------------------------------- /man/scaleM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/DCML.R 3 | \name{scaleM} 4 | \alias{scaleM} 5 | \title{M-scale estimator} 6 | \usage{ 7 | scaleM( 8 | u, 9 | delta = 0.5, 10 | family = "bisquare", 11 | max.it = 100, 12 | tol = 1e-06, 13 | tolerancezero = .Machine$double.eps, 14 | tuning.chi = lmrobdet.control(family = family, bb = delta)$tuning.chi 15 | ) 16 | } 17 | \arguments{ 18 | \item{u}{vector of residuals} 19 | 20 | \item{delta}{the right hand side of the M-scale equation} 21 | 22 | \item{family}{string specifying the name of the family of loss function to be used (current valid 23 | options are "bisquare", "opt" and "mopt").} 24 | 25 | \item{max.it}{maximum number of iterations allowed} 26 | 27 | \item{tol}{relative tolerance for convergence} 28 | 29 | \item{tolerancezero}{smallest (in absolute value) non-zero value accepted as a scale. Defaults to \code{.Machine$double.eps}} 30 | 31 | \item{tuning.chi}{the tuning object as returned 32 | by \code{\link{lmrobdet.control}}, \code{\link{bisquare}}, \code{\link{mopt}}, 33 | or \code{\link{opt}}. It defaults to the value that results 34 | in a consistent scale estimator for the specified \code{family} 35 | of loss functions and breakdown point as set by \code{delta}.} 36 | } 37 | \value{ 38 | The scale estimate value at the last iteration or at convergence. 39 | } 40 | \description{ 41 | This function computes an M-scale, which is a robust 42 | scale (spread) estimator. 43 | M-estimators of scale are a robust alternative to 44 | the sample standard deviation. Given a vector of 45 | residuals \code{r}, the M-scale estimator \code{s} 46 | solves the non-linear equation \code{mean(rho(r/s, cc))=delta}, 47 | where \code{delta} determines the breakdown point of the 48 | estimator, and \code{cc} is a tuning parameter 49 | calculated to obtain consistency under a Gaussian model. 50 | The breakdown point of the estimator is \code{min(delta, 1-delta)}, 51 | so the optimal choice for \code{delta} is 0.5. To obtain a 52 | consistent estimator the constant 53 | \code{cc} is chosen such that E(rho(Z, cc)) = delta, where 54 | Z is a standard normal random variable. 55 | } 56 | \details{ 57 | The iterative algorithm starts from the scaled median of 58 | the absolute values of the input vector, and then 59 | cycles through the equation \code{s_{k+1}^2 = s_k^2 * mean(rho(r/s_k, cc)) / delta}. 60 | } 61 | \examples{ 62 | set.seed(123) 63 | r <- rnorm(150, sd=1.5) 64 | scaleM(r) 65 | sd(r) 66 | # 10\% of outliers, sd of good points is 1.5 67 | set.seed(123) 68 | r2 <- c(rnorm(135, sd=1.5), rnorm(15, mean=-5, sd=.5)) 69 | scaleM(r2, family='opt') 70 | sd(r2) 71 | 72 | } 73 | \author{ 74 | Matias Salibian-Barrera, \email{matias@stat.ubc.ca} 75 | } 76 | -------------------------------------------------------------------------------- /man/RockeMulti.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Multirobu.R 3 | \name{covRobRocke} 4 | \alias{covRobRocke} 5 | \alias{RockeMulti} 6 | \title{Rocke's robust multivariate location and scatter estimator} 7 | \usage{ 8 | covRobRocke( 9 | X, 10 | initial = "K", 11 | maxsteps = 5, 12 | propmin = 2, 13 | qs = 2, 14 | maxit = 50, 15 | tol = 1e-04, 16 | corr = FALSE 17 | ) 18 | } 19 | \arguments{ 20 | \item{X}{a data matrix with observations in rows.} 21 | 22 | \item{initial}{A character indicating the initial estimator. Valid options are 'K' (default) 23 | for the Pena-Prieto 'KSD' estimate, and 'mve' for the Minimum Volume Ellipsoid.} 24 | 25 | \item{maxsteps}{Maximum number of steps for the line search section of the algorithm.} 26 | 27 | \item{propmin}{Regulates the proportion of weights computed from the initial estimator that 28 | will be different from zero. The number of observations with initial non-zero weights will 29 | be at least p (the number of columns of X) times propmin.} 30 | 31 | \item{qs}{Tuning paramater for Rocke's loss functions.} 32 | 33 | \item{maxit}{Maximum number of iterations.} 34 | 35 | \item{tol}{Tolerance to decide converngence.} 36 | 37 | \item{corr}{A logical value. If \code{TRUE} a correlation matrix is included in the element \code{cor} of the returned object. Defaults to \code{FALSE}.} 38 | } 39 | \value{ 40 | A list with class \dQuote{covRob} containing the following elements: 41 | \item{center}{The location estimate.} 42 | \item{cov}{The scatter matrix estimate, scaled for consistency at the normal distribution.} 43 | \item{cor}{The correlation matrix estimate, if the argument \code{cor} equals \code{TRUE}. Otherwise it is set to \code{NULL}.} 44 | \item{dist}{Robust Mahalanobis distances.} 45 | \item{wts}{weights} 46 | \item{call}{an image of the call that produced the object with all the arguments named. The matched call.} 47 | \item{mu}{The location estimate. Same as \code{center} above.} 48 | \item{V}{The scatter (or correlation) matrix estimate, scaled for consistency at the normal distribution. Same as \code{cov} above.} 49 | \item{sig}{sig} 50 | \item{gamma}{Final value of the constant gamma that regulates the efficiency.} 51 | } 52 | \description{ 53 | This function computes Rocke's robust estimator for multivariate location and scatter. 54 | } 55 | \details{ 56 | This function computes Rocke's robust estimator for multivariate location and scatter. 57 | } 58 | \examples{ 59 | data(bus) 60 | X0 <- as.matrix(bus) 61 | X1 <- X0[,-9] 62 | tmp <- covRobRocke(X1) 63 | round(tmp$cov[1:10, 1:10], 3) 64 | tmp$mu 65 | 66 | } 67 | \references{ 68 | \url{http://www.wiley.com/go/maronna/robust} 69 | } 70 | \author{ 71 | Ricardo Maronna, \email{rmaronna@retina.ar} 72 | } 73 | -------------------------------------------------------------------------------- /inst/scripts/wine1.R: -------------------------------------------------------------------------------- 1 | # wine1.R 2 | # Example 6.5 and 6.6 3 | 4 | # Must install GSE 5 | 6 | library(RobStatTM) 7 | data(wine) 8 | X <- as.matrix(wine) 9 | DM <- dim(X) 10 | n <- DM[1] 11 | p <- DM[2] 12 | 13 | 14 | #omitted data Figure 6.11 15 | #GSE 16 | set.seed(2400) 17 | RR <- matrix(runif(n*p)<.2,n,p) 18 | X2 <- X 19 | for (i in 1:n) { 20 | for(j in 1:p) { 21 | if(RR[i,j]) X2[i,j] <- NA 22 | } 23 | } 24 | 25 | qq <- qchisq(.999, p) 26 | 27 | out2 <- GSE::GSE(X2) 28 | md2 <- GSE::getDistAdj(out2) 29 | smd2 <- sort(md2) 30 | v2 <- (md2 > qq) 31 | z2 <- 1:n 32 | z2 <- z2[v2] 33 | #number of outliers 34 | no2 <- length(z2) 35 | 36 | #EM estimator 37 | out3 <- GSE::CovEM(X2) 38 | md3 <- GSE::getDistAdj(out3) 39 | smd3 <- sort(md3) 40 | v3 <- ( md3 > qq ) 41 | z3 <- 1:n 42 | z3 <- z3[v3] 43 | #number of outliers 44 | no3 <- length(z3) 45 | 46 | #Figure 6.11 47 | abc <- qchisq(ppoints(n), p) 48 | par(mfrow=c(2,2)) 49 | plot(md3, xlab="Index", ylab="Adjusted distances", pch=19) 50 | abline(h=qq) 51 | plot(abc, abc, xlab="Chi-square quantiles", ylab="Adjusted distance quantiles", type='n', pch=19) 52 | points(abc,smd3) 53 | abline(0,1) 54 | plot(md2, xlab="Index", ylab="Adjusted distances", pch=19) 55 | abline(h=qq) 56 | plot(abc, smd2, xlab="Chi square quantile", ylab="Adjusted distance quantiles", pch=19) 57 | abline(0,1) 58 | par(mfrow=c(1,1)) 59 | 60 | # NOTE: The difference between the plots made by the script above and 61 | # the plots in Figure 6.11 of the book are due to the use of a different 62 | # seed for the generation of random numbers. 63 | 64 | 65 | #---------------------------------------------------------- 66 | #Analysis with independent contamination Figure 6.12 67 | 68 | #MM 69 | set.seed(2400) 70 | out <- covRobMM(X) 71 | md <- out$dist 72 | smd <- sort(md) 73 | v <- ( md> qq ) 74 | z <- 1:n 75 | z <- z[v] 76 | # number of outliers 77 | no <- length(z) 78 | 79 | #TSGS 80 | out4 <- GSE::TSGS(X, method="bisquare", init="emve", filter="UBF-DDC") 81 | mu4 <- GSE::getLocation(out4) 82 | Sigma4 <- GSE::getScatter(out4) 83 | winef4 <- GSE::getFiltDat(out4) 84 | md4 <- mahalanobis(X, mu4, Sigma4) 85 | smd4 <- sort(md4) 86 | v4 <- ( md4 > qq ) 87 | z4 <- 1:n 88 | z4 <- z4[v4] 89 | # number of outliers 90 | no4 <- length(z4) 91 | 92 | 93 | #Figure 6.12 94 | abc <- qchisq(ppoints(n),p) 95 | par(mfrow=c(2,2)) 96 | plot(md, xlab="Index", ylab="Adjusted distances", pch=19) 97 | abline(h = qq) #lines(c(0,60),c(qq,qq)) 98 | plot(abc, smd, xlab="Chi square quantiles", ylab="Adjusted distance quantiles", pch=19) 99 | abline(0,1) 100 | plot(md4, xlab="Index", ylab="Adjusted distances", pch=19) 101 | abline(h=qq) 102 | plot(abc, smd4, xlab="Chi square quantile", ylab="Adjusted distance quantiles", pch=19) 103 | abline(0,1) 104 | par(mfrow=c(1,1)) 105 | -------------------------------------------------------------------------------- /inst/scripts/mineral.R: -------------------------------------------------------------------------------- 1 | # mineral.R 2 | # EXAMPLE 5.1 3 | # Figures 5.1 - 5.7 4 | # Table 5.1 5 | 6 | library(RobStatTM) 7 | 8 | data(mineral) 9 | cont <- lmrobdet.control(bb = 0.5, efficiency = 0.85, family = "bisquare") 10 | 11 | # We now recommend to use family "mopt" with efficiency = .95 as defaults, 12 | # and these are now the defaults in lmrobdet.control(). 13 | # Using these defaults in the code line above produces following good results: 14 | # Decreases slope of ROB line in Figure 5.4 (less influence by outliers) 15 | # Just makes outlier 15 larger in Figure 5.5 (quite reasonable) 16 | # Just makes outlier 15 larger in Figure 5.6 (quite reasonable) 17 | # In Figure 5.7 the smaller LS and robust residuals are closer to one another 18 | # and some of the larger LS residuals are larger 19 | 20 | #LS fit 21 | mineralls <- lm(zinc ~ copper, data=mineral) 22 | 23 | #L1 fit 24 | minerall1 <- quantreg::rq(zinc ~ copper, data=mineral) 25 | 26 | #LS without outlier fit 27 | minerallssin <- lm(zinc ~ copper, data = mineral[-15, ]) 28 | 29 | #Fig 5.1. 30 | plot(zinc ~ copper, data=mineral, pch=19, cex=1.3) 31 | abline(mineralls, lwd=2, col='red') 32 | abline(minerall1, lwd=2, col='blue') 33 | abline(minerallssin, lwd=2, col='green4') 34 | text(c(600,600,600), c(29,55,82), c("LS-15","L1", "LS"), cex=1.3) 35 | text(mineral[15,1], mineral[15,2]-6, "15", cex=1.3) 36 | 37 | 38 | 39 | #Figure 5.2 40 | plot(mineralls, which=2, add.smooth=FALSE, pch=19, id.n=2, cex.id = 1.2) 41 | abline(h=c(2.5, 0, -2.5), lty=2, lwd=2) 42 | 43 | #Figure 5.3 44 | sigmaLS <- summary( mineralls)$sigma 45 | plot(mineralls, which=1, add.smooth=FALSE, pch=19, id.n=2, cex.id = 1.2) 46 | abline(h=c(2.5, 0, -2.5)*sigmaLS, lty=2, lwd=2) 47 | 48 | #-------------------------------------------- 49 | 50 | 51 | #MM fit 52 | mineralMM <- lmrobdetMM(zinc ~ copper, data = mineral, control=cont) 53 | 54 | #Fig 5.4 55 | plot(zinc ~ copper, data=mineral, pch=19, cex=1.3) 56 | abline(minerallssin, lwd=2, col='green4') 57 | abline(mineralMM, lwd=2, col='magenta') 58 | text(c(600,600), c(45,29), c("ROB","LS-15"), cex=1.2) 59 | text(mineral[15,1], mineral[15,2]-6, "15", cex=1.2) 60 | #------------------------------------------------------------ 61 | 62 | 63 | #------------------------------------------------------------ 64 | #Fig 5.5 65 | plot(mineralMM, which=4, add.smooth=FALSE, pch=19, cex.id=1.3) 66 | 67 | #----------------------------------------------- 68 | 69 | #Fig 5.6 70 | plot(mineralMM, which=2, pch=19, id.n=3, cex.id=1.2) 71 | abline(h=c(-2.5, 0, 2.5) * mineralMM$scale, lty=2, lwd=2) 72 | 73 | #---------------------------------------------------------- 74 | 75 | #Fig 5.7 76 | plot(sort(abs(resid(mineralls)))[-53], sort(abs(resid(mineralMM)))[-53], xlab="Least Squares residuals", 77 | ylab="Robust residuals", pch=19, cex=1.3) 78 | abline(0, 1, lwd=2, col='red') 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /inst/scripts/autism.R: -------------------------------------------------------------------------------- 1 | # autism 2 | # Example 6.7 3 | # Tables 6.8,6.9 4 | 5 | library(RobStatTM) 6 | 7 | # Must install robustvarComp 8 | # Must install nlme 9 | # Must install WWWGbook 10 | 11 | data(autism, package='WWGbook') 12 | autism <- autism[complete.cases(autism),] 13 | completi <- table(autism$childid)==5 14 | completi <- names(completi[completi]) 15 | indici <- as.vector(unlist(sapply(completi, function(x) which(autism$childid==x)))) 16 | ind <- rep(FALSE, nrow(autism)) 17 | ind[indici] <- TRUE 18 | autism <- subset(autism, subset=ind) ## complete cases 41 19 | attach(autism) 20 | sicdegp.f <- factor(sicdegp) 21 | age.f <- factor(age) 22 | age.2 <- age - 2 23 | sicdegp2 <- sicdegp 24 | sicdegp2[sicdegp == 3] <- 0 25 | sicdegp2[sicdegp == 2] <- 2 26 | sicdegp2[sicdegp == 1] <- 1 27 | sicdegp2.f <- factor(sicdegp2) 28 | autism.updated <- subset(data.frame(autism, sicdegp2.f, age.2), !is.na(vsae)) 29 | autism.grouped <- nlme::groupedData(vsae ~ age.2 | childid, data=autism.updated, order.groups = FALSE) 30 | p <- 5 31 | n <- 41 32 | z1 <- rep(1, p) 33 | z2 <- c(0, 1, 3, 7, 11) 34 | z3 <- z2^2 35 | K <- list() 36 | K[[1]] <- tcrossprod(z1,z1) 37 | K[[2]] <- tcrossprod(z2,z2) 38 | K[[3]] <- tcrossprod(z3,z3) 39 | K[[4]] <- tcrossprod(z1,z2) + tcrossprod(z2,z1) 40 | K[[5]] <- tcrossprod(z1,z3) + tcrossprod(z3,z1) 41 | K[[6]] <- tcrossprod(z3,z2) + tcrossprod(z2,z3) 42 | names(K) <- c("Int", "age", "age2", "Int:age", "Int:age2", "age:age2") 43 | 44 | groups <- cbind(rep(1:p, each=n), rep((1:n), p)) 45 | 46 | ## Composite Tau 47 | tmp.ctrl <- robustvarComp::varComprob.control(lower=c(0.01,0.01,0.01,-Inf,-Inf,-Inf)) 48 | AutismCompositeTau <- robustvarComp::varComprob(vsae ~ age.2 + I(age.2^2) 49 | + sicdegp2.f + age.2:sicdegp2.f + I(age.2^2):sicdegp2.f, 50 | groups = groups, data = autism.grouped, varcov = K, 51 | control=tmp.ctrl) 52 | # The Warning messages produced by the above function indicates a small change to the code 53 | # that is recommended, but the results are not effected by this Warning. 54 | # That code change will be made in a future release of RobStatTM 55 | summary(AutismCompositeTau) 56 | 57 | ## Classic S 58 | tmp2.ctrl <- robustvarComp::varComprob.control(method="S", psi="rocke", cov.init="covOGK", 59 | lower=c(0.01,0.01,0.01,-Inf,-Inf,-Inf)) 60 | AutismS <- robustvarComp::varComprob(vsae ~ age.2 + I(age.2^2) 61 | + sicdegp2.f + age.2:sicdegp2.f + I(age.2^2):sicdegp2.f, 62 | groups = groups, data = autism.grouped, varcov = K, 63 | control=tmp2.ctrl) 64 | # The Warning message produced by the above function is a notification, and not a 65 | # true "warning". This will be fixed in a future release of RobStatTM 66 | summary(AutismS) 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /src/init.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "RobStatTM.h" 3 | 4 | #define CDEF(name) {#name, (DL_FUNC) &name, sizeof(name ## _t)/sizeof(name ## _t[0]), name ##_t} 5 | #define CALLDEF(name, n) {#name, (DL_FUNC) &name, n} 6 | 7 | 8 | static R_NativePrimitiveArgType R_lmrob_S_t[] = { 9 | REALSXP, REALSXP, INTSXP, INTSXP, INTSXP, REALSXP, REALSXP, 10 | /* rrhoc */ REALSXP, INTSXP, REALSXP, 11 | /* best_r */ INTSXP, INTSXP, INTSXP, 12 | /* K_s */ INTSXP, INTSXP, INTSXP, 13 | /* rel_tol*/ REALSXP, REALSXP, 14 | /* converged */ LGLSXP, INTSXP, INTSXP, INTSXP, INTSXP 15 | }; 16 | 17 | static R_NativePrimitiveArgType R_lmrob_MM_t[] = { 18 | REALSXP, REALSXP, INTSXP, INTSXP, 19 | /* beta_initial */ REALSXP, REALSXP, 20 | /* beta_m */ REALSXP, REALSXP, 21 | /* max_it */ INTSXP, REALSXP, INTSXP, 22 | /* loss */ REALSXP, REALSXP, LGLSXP, INTSXP, INTSXP, INTSXP 23 | }; 24 | 25 | static R_NativePrimitiveArgType R_find_D_scale_t[] = { 26 | REALSXP, REALSXP, REALSXP, INTSXP, REALSXP, 27 | /* c */ REALSXP, INTSXP, INTSXP, REALSXP, 28 | /* max_k */ INTSXP, LGLSXP 29 | }; 30 | 31 | static R_NativePrimitiveArgType R_calc_fitted_t[] = { 32 | REALSXP, REALSXP, REALSXP, INTSXP, INTSXP, INTSXP, 33 | INTSXP, INTSXP 34 | }; 35 | 36 | static R_NativePrimitiveArgType R_lmrob_M_S_t[] = { 37 | REALSXP, REALSXP, REALSXP, REALSXP, 38 | INTSXP, INTSXP, INTSXP, INTSXP, INTSXP, 39 | REALSXP, REALSXP, REALSXP, 40 | REALSXP, INTSXP, REALSXP, 41 | INTSXP, INTSXP, REALSXP, REALSXP, 42 | LGLSXP, INTSXP, 43 | LGLSXP, LGLSXP, LGLSXP, INTSXP, INTSXP 44 | }; 45 | 46 | static R_NativePrimitiveArgType R_subsample_t[] = { 47 | REALSXP, REALSXP, INTSXP, INTSXP, 48 | REALSXP, INTSXP, INTSXP, INTSXP, 49 | REALSXP, REALSXP, INTSXP, 50 | REALSXP, REALSXP, INTSXP, INTSXP, 51 | INTSXP, LGLSXP, INTSXP, INTSXP, REALSXP, 52 | LGLSXP 53 | }; 54 | 55 | 56 | static R_NativePrimitiveArgType r_fast_mve_t[] = { 57 | REALSXP, INTSXP, INTSXP, INTSXP, 58 | INTSXP, REALSXP, REALSXP, REALSXP, 59 | INTSXP, INTSXP, INTSXP 60 | }; 61 | 62 | static const R_CMethodDef CEntries[] = { 63 | CDEF(r_fast_mve), 64 | CDEF(R_lmrob_S), 65 | CDEF(R_lmrob_MM), 66 | CDEF(R_find_D_scale), 67 | CDEF(R_calc_fitted), 68 | CDEF(R_lmrob_M_S), 69 | CDEF(R_subsample), 70 | {NULL, NULL, 0} 71 | }; 72 | 73 | 74 | static const R_CallMethodDef CallEntries[] = { 75 | CALLDEF(R_rho_inf, 2), 76 | CALLDEF(R_psifun, 4), 77 | CALLDEF(R_chifun, 4), 78 | CALLDEF(R_wgtfun, 3), 79 | CALLDEF(R_erfi, 1), 80 | {NULL, NULL, 0} 81 | }; 82 | 83 | 84 | static const R_FortranMethodDef FortEntries[] = { 85 | {"rslarsbi", (DL_FUNC) &F77_SUB(rllarsbi), 18}, 86 | {"dqrdc2", (DL_FUNC) &F77_SUB(dqrdc2), 9}, 87 | {NULL, NULL, 0} 88 | }; 89 | 90 | 91 | void R_init_RobStatTM(DllInfo *dll) 92 | { 93 | R_registerRoutines(dll, CEntries, CallEntries, FortEntries, NULL); 94 | R_useDynamicSymbols(dll, FALSE); 95 | } 96 | -------------------------------------------------------------------------------- /R/prcompRob.R: -------------------------------------------------------------------------------- 1 | #' Robust Principal Components Cont'd 2 | #' 3 | #' This function uses the pcaRobS function to compute all principal components while 4 | #' behaving similarly to the prcomp function 5 | #' 6 | #' @export prcompRob 7 | #' @aliases prcompRob 8 | #' @rdname prcompRob 9 | #' 10 | #' @param x data matrix with observations in rows 11 | #' @param rank. Maximal number of principal components to be used (optional) 12 | #' @param delta.scale "delta" parametor of the scale M-estimator (default = 0.5) 13 | #' @param max.iter maximum number of iterations (default = 100) 14 | #' 15 | #' @return 16 | #' \item{sdev}{the standard deviation of the principal components} 17 | #' \item{rotation}{matrix containing the factor loadings} 18 | #' \item{x}{matrix containing the rotated data} 19 | #' \item{center}{the centering used} 20 | #' 21 | #' @author Gregory Brownson, \email{gregory.brownson@gmail.com} 22 | #' 23 | #' @examples 24 | #' data(wine) 25 | #' 26 | #' p.wine <- prcompRob(wine) 27 | #' summary(p.wine) 28 | #' 29 | #' ## Choose only 5 30 | #' p5.wine <- prcompRob(wine, rank. = 5) 31 | #' summary(p5.wine) 32 | #' 33 | 34 | prcompRob <- function(x, rank. = NULL, delta.scale = 0.5, max.iter = 100L) { 35 | ncomp <- if (is.null(rank.)) { 36 | ncol(x) 37 | } else { 38 | rank. 39 | } 40 | 41 | X = pcaRobS(x, ncomp = ncomp, desprop = 1.0, deltasca = delta.scale, maxit = max.iter) 42 | 43 | # X.scaled <- scale(data, center = center, scale = scale) 44 | 45 | n <- ncol(X$eigvec) 46 | 47 | pc.index <- sapply(1:n, function(i) paste("PC", i)) 48 | 49 | z <- list() 50 | 51 | z$sdev <- sqrt(diag(var(X$repre))) 52 | z$rotation <- X$eigvec 53 | colnames(z$rotation) <- pc.index 54 | 55 | z$center <- X$mu 56 | z$x <- X$repre 57 | 58 | class(z) <- "prcompRob" 59 | 60 | z 61 | } 62 | 63 | #' @export 64 | print.prcompRob <- function(x, print.x = FALSE, ...) { 65 | cat("Standard deviations:\n") 66 | print(x$sdev, ...) 67 | 68 | cat("\nEigenvectors:\n") 69 | print(x$rotation, ...) 70 | 71 | if (print.x && length(x$x)) { 72 | cat("\nRotated data:\n") 73 | print(x$x, ...) 74 | } 75 | 76 | invisible(x) 77 | } 78 | 79 | #' @export 80 | summary.prcompRob <- function(object, ...) { 81 | chkDots(...) 82 | 83 | vars <- object$sdev^2 84 | vars <- vars/sum(vars) 85 | 86 | importance <- rbind("Standard deviation" = object$sdev, 87 | "Proportion of Variance" = round(vars, 5), 88 | "Cumulative Proportion" = round(cumsum(vars), 5)) 89 | 90 | colnames(importance) <- colnames(object$rotation) 91 | 92 | object$importance <- importance 93 | 94 | class(object) <- "summary.prcompRob" 95 | object 96 | } 97 | 98 | #' @export 99 | print.summary.prcompRob <- function(x, digits = max(3L, getOption("digits") - 3L), ...) 100 | { 101 | cat("Importance of components:\n") 102 | print(x$importance, digits = digits, ...) 103 | invisible(x) 104 | } -------------------------------------------------------------------------------- /src/RobStatTM.h: -------------------------------------------------------------------------------- 1 | /* Copied from robustbase.h */ 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | /* For internationalized messages */ 8 | #ifdef ENABLE_NLS 9 | #include 10 | #define _(String) dgettext ("Matrix", String) 11 | #else 12 | #define _(String) (String) 13 | #define dngettext(pkg, String, StringP, N) (N > 1 ? StringP : String) 14 | #endif 15 | 16 | double complex erfz(double complex z); 17 | double complex erfi(double complex z); 18 | SEXP R_erfi(SEXP x); 19 | 20 | SEXP R_rho_inf(SEXP cc, SEXP ipsi); 21 | 22 | void R_lmrob_S(double *X, double *y, int *n, int *P, 23 | int *nRes, double *scale, double *beta_s, 24 | double *C, int *iipsi, double *bb, 25 | int *best_r, int *Groups, int *N_group, 26 | int *K_s, int *max_k, int *max_it_scale, 27 | double *rel_tol, double *inv_tol, 28 | // ^^^^^^^^^ = refine.tol in R 29 | int* converged, int *trace_lev, int *mts, int *ss, int *cutoff); 30 | 31 | void R_lmrob_M_S(double *X1, double *X2, double *y, double *res, 32 | int *n, int *p1, int *p2, int *nRes, int *max_it_scale, 33 | double *scale, double *b1, double *b2, 34 | double *rho_c, int *ipsi, double *bb, 35 | int *K_m_s, int *max_k, double *rel_tol, double *inv_tol, 36 | int *converged, int *trace_lev, 37 | int *orthogonalize, int *subsample, 38 | int *descent, int *mts, int *ss); 39 | 40 | void R_lmrob_MM(double *X, double *y, int *n, int *P, 41 | double *beta_initial, double *scale, 42 | double *beta_m, double *resid, 43 | int *max_it, 44 | double *rho_c, int *ipsi, double *loss, double *rel_tol, 45 | int *converged, int *trace_lev, int *mts, int *ss); 46 | 47 | void R_subsample(const double *x, const double *y, int *n, int *m, 48 | double *beta, int *ind_space, int *idc, int *idr, 49 | double *lu, double *v, int *p, 50 | double *_Dr, double *_Dc, int *_rowequ, int *_colequ, 51 | int *status, int *sample, int *mts, int *ss, double *tol_inv, 52 | int *solve); 53 | 54 | SEXP R_psifun(SEXP x_, SEXP c_, SEXP ipsi_, SEXP deriv_); 55 | SEXP R_chifun(SEXP x_, SEXP c_, SEXP ipsi_, SEXP deriv_); 56 | SEXP R_wgtfun(SEXP x_, SEXP c_, SEXP ipsi_); 57 | 58 | 59 | void R_find_D_scale(double *rr, double *kkappa, double *ttau, int *llength, 60 | double *sscale, double *cc, int *iipsi, int *ttype, double *rel_tol, 61 | int *max_k, int *converged); 62 | 63 | void R_calc_fitted(double *XX, double *bbeta, double *RR, int *nn, int *pp, int *nnrep, 64 | int *nnproc, int *nnerr); 65 | 66 | void F77_NAME(rllarsbi)( 67 | double *X, double *Y, int *N, int *NP, int *MDX, int *MDT, 68 | double *TOL, int *NIT, int *K, int *KODE, double *SIGMA, double *THETA, 69 | double *RS, double *SC1, double *SC2, double *SC3, double *SC4, 70 | double *BET0); 71 | 72 | void r_fast_mve(double *xx, int *nn, int *pp, int *nnsamp, 73 | int *nsingular, double *ctr, double *ccov, double *scale, 74 | int *best_ind, int *nnind, int *nn2); 75 | 76 | void F77_NAME(dqrdc2)(double *xw, int *nind, int *nind2, int *p, 77 | double *tol, int *rank, double *qraux, int *pivot, double *work); 78 | 79 | -------------------------------------------------------------------------------- /inst/scripts/epilepsy.R: -------------------------------------------------------------------------------- 1 | # epilepsy.R 2 | # Example 7.3 3 | # Breslow data 4 | 5 | library(RobStatTM) 6 | data(breslow.dat) 7 | 8 | 9 | #CUBIF Estimator 10 | yy <- breslow.dat[, 10] 11 | xx1 <- breslow.dat[, 11] 12 | xx2 <- breslow.dat[, 12] 13 | xx3 <- breslow.dat[, 8]=="progabide" 14 | xx4 <- xx2*xx3 15 | 16 | XX <- cbind(rep(1,59), xx1, xx2, xx3, xx4) 17 | colnames(XX) <- c("intercept","Age10","Base4","Progabide","interac.Base4-Progabide") 18 | 19 | ufact <- 1.1 20 | ctrl <- robcbi::cubinf.control(ufact=ufact) 21 | epiCUBIF <- robcbi::cubinf(XX, yy, family=poisson(), null.dev = FALSE, control=ctrl) 22 | epiCUBIF_coefficients <- epiCUBIF$coefficients 23 | epiCUBIF_ste <- sqrt(diag(epiCUBIF$cov)) 24 | epiCUBIF_deviances <- sign(yy-epiCUBIF$fitted)*sqrt(2*(yy*log( pmax(yy,1))-yy-yy*log(epiCUBIF$fitted)+epiCUBIF$fitted)) 25 | 26 | #ML estimator 27 | epiMV <- glm(yy~xx1+xx2+xx3+xx4,family=poisson) 28 | epiMV_coefficients <- epiMV$coefficients 29 | tt <- summary(epiMV) 30 | epiMV_ste <- sqrt(diag(tt$cov.unscaled)) 31 | epiMV_deviances <- tt$deviance.resid 32 | 33 | # NOTE 1: The MLE values in Table 7.3 of the book are incorrect. 34 | 35 | #RQL estimator 36 | epiRQL <- robustbase::glmrob(yy~xx1+xx2+xx3+xx4,family=poisson) 37 | epiRQL_coefficients <- epiRQL$coefficients 38 | epiRQL_ste <- sqrt(diag(epiRQL$cov)) 39 | epiRQL_deviances <- residuals(epiRQL) 40 | 41 | 42 | #MT estimator 43 | epiMT <- robustbase::glmrob(yy~xx1+xx2+xx3+xx4, family=poisson, method="MT") 44 | epiMT_coefficients <- epiMT$coefficients 45 | epiMT_ste <- sqrt(diag(epiMT$cov)) 46 | epiMT_deviances <- sign(yy-epiMT$fitted)*sqrt(2*(yy*log( pmax(yy,1))-yy-yy*log(epiMT$fitted)+epiMT$fitted)) 47 | 48 | 49 | #epiMP 50 | # NOTE 2: There is no R code for the MP estimator, so the following was computed with MATLAB 51 | epiMP_coefficients <- c(2.0078, 0.0707, 0.1346, -0.4898 , 0.0476) 52 | epiMP_fitted <- exp(XX%*%epiMP_coefficients) 53 | epiMP_deviances <- sign(yy-epiMP_fitted)*sqrt(2*(yy*log( pmax(yy,1))-yy-yy*log(epiMP_fitted)+epiMP_fitted)) 54 | 55 | #boxplot Figure 7.6 56 | par(mfrow=c(1,2), cex=0.6) 57 | # devC <- cbind(abs(epiMV_deviances),abs(epiCUBIF_deviances),abs(epiMT_deviances),abs(epiRQL_deviances),abs(epiMP_deviances)) 58 | # boxplot(devC,names=c("ML","CUBIF","MT","QL","MP"),ylab="Absolute Deviance Residuals",main="with outliers") 59 | # boxplot(devC,names=c("ML","CUBIF","MT","QL","MP"),ylab="Absolute Deviance Residuals",main="without outliers",outline=FALSE) 60 | devC <- cbind(abs(epiMV_deviances),abs(epiMT_deviances),abs(epiRQL_deviances),abs(epiMP_deviances)) 61 | boxplot(devC,names=c("ML","MT","QL","MP"),ylab="Absolute Deviance Residuals",main="with outliers") 62 | boxplot(devC,names=c("ML","MT","QL","MP"),ylab="Absolute Deviance Residuals",main="without outliers",outline=FALSE) 63 | par(mfrow = c(1,1)) 64 | 65 | # Figure 7.7 66 | sdev1 <- sort(abs(epiMT_deviances)) 67 | sdev2 <- sort(abs(epiMV_deviances)) 68 | 69 | plot(ppoints(59)[1:48],sdev1[1:48], type="b",pch=1,xlab="quantiles", ylab= "deviance residuals") 70 | lines(ppoints(59)[1:48],sdev2[1:48], type="b",pch=2) 71 | legend(x="topleft",legend=c("MT","ML"), pch=c(1,2)) 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /R/RobPCA_SM.R: -------------------------------------------------------------------------------- 1 | #' Robust principal components 2 | #' 3 | #' This function computes robust principal components based on the minimization of 4 | #' the "residual" M-scale. 5 | #' 6 | #' @export SMPCA pcaRobS 7 | #' @aliases SMPCA pcaRobS 8 | #' @rdname SMPCA 9 | #' 10 | #' @param X a data matrix with observations in rows. 11 | #' @param ncomp desired (maximum) number of components 12 | #' @param desprop desired (minimum) proportion of explained variability (default = 0.9) 13 | #' @param maxit maximum number of iterations (default= 100) 14 | #' @param deltasca "delta" parameter of the scale M-estimator (default=0.5) 15 | #' 16 | #' @return A list with the following components: 17 | #' \item{q}{The actual number of principal components} 18 | #' \item{propex}{The actual proportion of unexplained variability} 19 | #' \item{eigvec}{Eigenvectors, in a \code{p x q} matrix} 20 | #' \item{fit}{an \code{n x p} matrix with the rank-q approximation to \code{X}} 21 | #' \item{repre}{An \code{n x q} matrix with representation of data in R^q (scores)} 22 | #' \item{propSPC}{A vector of length \code{p} with the cumulative explained variance from initial SPC} 23 | #' 24 | #' @author Ricardo Maronna, \email{rmaronna@retina.ar}, based on original code 25 | #' by D. Pen~a and J. Prieto 26 | #' 27 | #' @references \url{http://www.wiley.com/go/maronna/robust} 28 | #' 29 | #' @examples 30 | #' data(bus) 31 | #' X0 <- as.matrix(bus) 32 | #' X1 <- X0[,-9] 33 | #' ss <- apply(X1, 2, mad) 34 | #' mu <- apply(X1, 2, median) 35 | #' X <- scale(X1, center=mu, scale=ss) 36 | #' q <- 3 #compute three components 37 | #' rr <- pcaRobS(X, q, 0.99) 38 | #' round(rr$eigvec, 3) 39 | #' 40 | pcaRobS <- SMPCA <-function(X, ncomp, desprop=0.9, deltasca=0.5, maxit=100) { 41 | 42 | Wf <- function(r){ return((1-r)^2*(r<=1)) } #Bisquare weights for r=resid^2 43 | 44 | n=dim(X)[1]; p=dim(X)[2] 45 | tol=1.e-4; 46 | #Inicial 47 | sp= rrcov::PcaLocantore(X, k=p) 48 | mu0=sp@center; lamL=sp@eigenvalues 49 | lamL=lamL/sum(lamL); propSPC=cumsum(lamL) 50 | q1=sum(propSPCtol) { 65 | iter=iter+1; 66 | #update mu 67 | mu=colSums(X*ww)/sum(ww) 68 | Xcen=scale(X, center=mu, scale=FALSE) 69 | #update B 70 | C=t(Xcen)%*%diag(ww)%*%Xcen 71 | B <- svd(C, nu=q, nv=q)$u # B=rARPACK::eigs_sym(C, q)$vectors 72 | fit=Xcen%*%B%*%t(B) 73 | res=Xcen-fit #residuals 74 | rr=colSums(t(res)^2) 75 | sig=mscale(sqrt(rr),delta=deltasca, tuning.chi = 1, family='bisquare') ^2 76 | del1=1-sig/sig0; #print(c(iter,del1)) 77 | U=diag(q)-abs(t(B)%*%B0) 78 | del2=mean(abs(U)); del=max(c(del1,del2)) 79 | sig0=sig; B0=B 80 | ww=Wf(rr/sig); 81 | repre=Xcen%*%B #represnt. in R^2 82 | # print(c(iter, del1, del2)) 83 | } #endo 84 | propex=1-sig/sigini #prop. var. expli 85 | fit=scale(fit, center=-mu, scale=FALSE) 86 | resu=list(eigvec=B, fit=fit, repre=repre, propex=propex, propSPC=propSPC, mu=mu, q=q) 87 | return(resu) 88 | } 89 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RobStatTM 2 | 3 | This repository contains a development version of the companion package to the 2nd edition of 4 | the book [Robust Statistics: Theory and Methods](https://www.wiley.com/go/maronna/robust), by Ricardo Maronna, Doug Martin, Victor Yohai and Matias Salibian-Barrera. 5 | 6 | * The latest official version of the package is available on [CRAN](https://cran.r-project.org/package=RobStatTM). You should probably use [that version](https://cran.r-project.org/package=RobStatTM). 7 | * To install the "development" version on GitHub, use 8 | ``` 9 | devtools::install_github("msalibian/RobStatTM") 10 | ``` 11 | * The scripts reproducing the examples and figures in the book can be found in the folder `inst/scripts`. 12 | 13 | #### Bug reports 14 | 15 | We use [GitHub issues](https://guides.github.com/features/issues/) to track and solve potential bugs in our package. When submitting your bug report please: 16 | 17 | 1. Check if your issue / bug has been fixed by trying to reproduce it using the latest version of the package. 18 | 19 | 3. Isolate the problem by creating a **minimal reproducible example** (see below) 20 | 21 | 4. Create an [issue](https://guides.github.com/features/issues/) for this repository. Refer to [this page](https://help.github.com/en/articles/creating-an-issue) for instructions on how to create a GitHub issue. 22 | 23 | A good bug report should not require others to contact you to find more information. Please 24 | try to be as detailed as possible in your report. What is your environment? What steps will 25 | reproduce the issue? What outcome did you expect and what outcome did you get? 26 | 27 | ###### Example: 28 | 29 | > A short and descriptive bug report title 30 | > 31 | > A summary of the issue and the OS environment in which it occurs. 32 | > Include the steps required to reproduce the bug. 33 | > 34 | > 1. This is the first step 35 | > 2. This is the second step 36 | > 3. Further steps, etc. 37 | > 38 | > Any other information you want to share that is relevant to the issue being 39 | > reported. This might include the lines of code that you have identified as 40 | > causing the bug, and potential solutions. 41 | 42 | 43 | ##### Minimal reproducible examples 44 | 45 | (This section is adapted from [Rob Hyndman's notes on minimal reproducible examples](https://robjhyndman.com/hyndsight/minimal-reproducible-examples/)). 46 | 47 | A Minimal reproducible example (MRE) is intended to reproduce an error using the smallest amount of 48 | code possible. To check that your MRE code is reproducible, try running it in a fresh R 49 | session before you submit the issue. Using minimal reproducible examples 50 | saves package developers time in wading through messy code that is not 51 | relevant to the apparent bug. 52 | 53 | A MRE should consist of a single R script file that can be run without error in a fresh R 54 | session, and should contain the following three sections: 55 | 56 | 57 | * Packages to be loaded. 58 | * The shortest amount of code that reproduces the problem. 59 | * The output of `sessionInfo()` as a comment. 60 | 61 | Please remove anything that is not necessary to reproduce the problem. 62 | 63 | Try to use one of the built-in datasets if possible. If you need to include 64 | some data, then use `dput()` so 65 | the data can be included as part of the same text file. In 66 | most cases, you do not need to include all of 67 | your data, just a small subset that will allow the problem to be reproduced. 68 | 69 | If you randomly generate some data, use `set.seed(somenumber)`. 70 | 71 | Please spend time adding comments so we can understand your code quickly. 72 | -------------------------------------------------------------------------------- /R/WMLlogreg.R: -------------------------------------------------------------------------------- 1 | #' Weighted likelihood estimator for the logistic model 2 | #' 3 | #' This function computes a weighted likelihood estimator for the logistic model, where 4 | #' the weights penalize high leverage observations. In this version the weights are zero or one. 5 | #' 6 | #' @export WMLlogreg logregWML 7 | #' @aliases WMLlogreg logregWML 8 | #' @rdname WMLlogreg 9 | #' 10 | #' @param x0 p x n matrix of explanatory variables, p is the number of explanatory variables, n is the number of observations 11 | #' @param y response vector 12 | #' @param intercept 1 or 0 indicating if an intercept is included or or not 13 | #' 14 | #' @return A list with the following components: 15 | #' \item{coefficients}{vector of regression coefficients} 16 | #' \item{standard.deviation}{standard deviations of the regression coefficient estimators} 17 | #' \item{fitted.values}{vector with the probabilities of success} 18 | #' \item{residual.deviances}{residual deviances} 19 | #' \item{cov}{covariance matrix of the regression estimates} 20 | #' \item{objective}{value of the objective function at the minimum} 21 | #' \item{xweights}{vector of zeros and ones used to compute the weighted maimum likelihood estimator} 22 | #' 23 | #' @author Victor Yohai 24 | #' @references \url{http://www.wiley.com/go/maronna/robust} 25 | #' 26 | #' @examples 27 | #' data(skin) 28 | #' Xskin <- as.matrix( skin[, 1:2] ) 29 | #' yskin <- skin$vasoconst 30 | #' skinWML <- logregWML(Xskin, yskin, intercept=1) 31 | #' skinWML$coeff 32 | #' skinWML$standard.deviation 33 | #' 34 | logregWML <- WMLlogreg <- function (x0, y, intercept = 1) 35 | { 36 | ttx=colnames(x0) 37 | if (!is.numeric(y)) 38 | y <- as.numeric(y) 39 | if (!is.null(dim(y))) { 40 | if (ncol(y) != 1) 41 | stop("y is not onedimensional") 42 | y <- as.vector(y) 43 | } 44 | n <- length(y) 45 | # if (is.data.frame(x0)) { 46 | # x0 <- data.matrix(x0) 47 | # } 48 | #else if (!is.matrix(x0)) { 49 | # x0 <- matrix(x0, length(x0), 1, dimnames = list(names(x0), 50 | # deparse(substitute(x0)))) 51 | # } 52 | x0<-as.matrix(x0) 53 | if (nrow(x0) != n) 54 | stop("Number of observations in x and y not equal") 55 | na.x <- !is.finite(rowSums(x0)) 56 | na.y <- !is.finite(y) 57 | ok <- !(na.x | na.y) 58 | if (!all(ok)) { 59 | x0 <- x0[ok, , drop = FALSE] 60 | y <- y[ok] 61 | } 62 | n <- nrow(x0) 63 | if (n == 0) 64 | stop("All observations have missing values!") 65 | p<-ncol(x0) 66 | family <- binomial() 67 | p<-dim(x0)[2] 68 | zz<-rep(0,p) 69 | for (i in 1:p) 70 | {zz[i]<-sum((x0[,i]==0)|(x0[,i]==1))} 71 | tt<-which(zz!=n) 72 | p1<-length(tt) 73 | x0=as.matrix(x0,n,p) 74 | x00<-x0[,tt] 75 | if(p1==1) 76 | {rdx<-abs(x00-median(x00))/mad(x00) 77 | wrd<-(rdx<=qnorm(.9875))} 78 | if(p1>1) 79 | { 80 | mcdx<-robustbase::covMcd(x00,alpha=.75) 81 | rdx<-mahalanobis(x00,center=mcdx$center,cov=mcdx$cov) 82 | vc<-qchisq(0.975,p) 83 | wrd<-(rdx<=vc)} 84 | if(p1==0) 85 | {wrd=1:n} 86 | 87 | x000=x0[wrd,] 88 | colnames(x000)=ttx 89 | y00=y[wrd] 90 | 91 | if (intercept==1) 92 | { 93 | out=glm(y00~x000, family = family) 94 | x <- cbind(Intercept = 1, x0)} 95 | if (intercept==0) 96 | {out<-glm(y00~x000-1, family = family) 97 | x<-x0} 98 | out1<-summary(out) 99 | fitted.linear<-(x%*%out$coeff) 100 | fitted.linear<-pmin(fitted.linear,1e2) 101 | fitted.values<-exp(fitted.linear)/(1+exp(fitted.linear)) 102 | residual.deviances<-sign(y-fitted.values)*sqrt(-2*(y*log(fitted.values)+(1-y)*log(1-fitted.values))) 103 | result<-list(xweights=wrd, coefficients=out$coeff,standard.deviation=sqrt(diag(out1$cov.uns)),fitted.values=t(fitted.values),cov=out1$cov.unscaled, residual.deviances= t(residual.deviances)) 104 | result 105 | } 106 | 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /man/step.lmrobdetMM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/RFPE.R 3 | \name{step.lmrobdetMM} 4 | \alias{step.lmrobdetMM} 5 | \alias{step.lmrobdet} 6 | \title{Robust stepwise using RFPE} 7 | \usage{ 8 | step.lmrobdetMM( 9 | object, 10 | scope, 11 | direction = c("both", "backward", "forward"), 12 | trace = TRUE, 13 | keep = NULL, 14 | steps = 1000, 15 | whole.path = FALSE 16 | ) 17 | } 18 | \arguments{ 19 | \item{object}{a robust fit as returned by \code{\link{lmrobdetMM}}} 20 | 21 | \item{scope}{either a formula or a list with elements \code{lower} and \code{upper} each of 22 | which is a formula. The terms in the right-hand-side of \code{lower} are always included 23 | in the model and the additional terms in the right-hand-side of \code{upper} are the 24 | candidates for inclusion/exclusion from the model. If a single formula is given, it is 25 | taken to be \code{upper}, and \code{lower} is set to the empty model. The \code{.} operator 26 | is interpreted in the context of the formula in \code{object}.} 27 | 28 | \item{direction}{the direction of stepwise search. Currenly only \code{backward} stepwise 29 | searches are implemented.} 30 | 31 | \item{trace}{logical. If \code{TRUE} information about each step is printed on the screen.} 32 | 33 | \item{keep}{a filter function whose input is a fitted model object and the associated AIC statistic, and whose output is arbitrary. Typically keep will select a subset of the components of the object and return them. The default is not to keep anything.} 34 | 35 | \item{steps}{maximum number of steps to be performed. Defaults to 1000, which should mean as many as needed.} 36 | 37 | \item{whole.path}{if \code{FALSE} (default) variables are dropped until the RFPE fails to improve. If \code{TRUE} the best variable to be dropped is removed, even if this does not improve the RFPE.} 38 | } 39 | \value{ 40 | If \code{whole.path == FALSE} the function returns the robust fit as obtained by \code{lmrobdetMM} using the final model. 41 | If \code{whole.path == TRUE} a list is returned containing the RFPE of each model on the sequence 42 | of submodels. The names of the components of this list are the formulas that correspods to each model. 43 | } 44 | \description{ 45 | This function performs stepwise model selection on a robustly fitted 46 | linear model using the RFPE 47 | criterion and the robust regression estimators computed with 48 | \code{\link{lmrobdetMM}}. Only backwards stepwise is currently implemented. 49 | } 50 | \details{ 51 | Presently only backward stepwise selection is supported. During each step the 52 | Robust Final Prediction Error (as computed by the function \code{lmrobdetMM.RFPE}) is 53 | calculated for the current model and for each sub-model achievable by deleting a 54 | single term. If the argument \code{whole.path} is \code{FALSE}, the function steps 55 | to the sub-model with the lowest 56 | Robust Final Prediction Error or, if the current model has the lowest Robust Final 57 | Prediction Error, terminates. If the argument \code{whole.path} is \code{TRUE}, the 58 | function steps through all smaller submodels removing, at each step, the variable 59 | that most reduces the Robust Final Prediction Error. The scale estimate from \code{object} 60 | is used to compute the Robust Final Prediction Error throughout the procedure. 61 | } 62 | \examples{ 63 | cont <- lmrobdet.control(bb = 0.5, efficiency = 0.85, family = "bisquare") 64 | set.seed(300) 65 | X <- matrix(rnorm(50*6), 50, 6) 66 | beta <- c(1,1,1,0,0,0) 67 | y <- as.vector(X \%*\% beta) + 1 + rnorm(50) 68 | y[1:6] <- seq(30, 55, 5) 69 | for (i in 1:6) X[i,] <- c(X[i,1:3],i/2,i/2,i/2) 70 | Z <- cbind(y,X) 71 | Z <- as.data.frame(Z) 72 | obj <- lmrobdetMM(y ~ ., data=Z, control=cont) 73 | out <- step.lmrobdetMM(obj) 74 | 75 | } 76 | \references{ 77 | \url{http://www.wiley.com/go/maronna/robust} 78 | } 79 | \seealso{ 80 | \code{\link{DCML}}, \code{\link{MMPY}}, \code{\link{SMPY}} 81 | } 82 | \author{ 83 | Victor Yohai, \email{victoryohai@gmail.com}, Matias Salibian-Barrera, \email{matias@stat.ubc.ca} 84 | } 85 | -------------------------------------------------------------------------------- /inst/scripts/VignetteRobStatTM.R: -------------------------------------------------------------------------------- 1 | ## ----include=FALSE------------------------------------------------------- 2 | library(knitr) 3 | opts_chunk$set( 4 | keep.source=TRUE, 5 | tidy=TRUE, 6 | message=FALSE, 7 | fig.path='Plots/', 8 | fig.align='center', 9 | fig.width=4, 10 | fig.height=3, 11 | fig.keep='last', 12 | fig.show='hide', 13 | dev.args=list(pointsize=10), 14 | tidy.opts=list(width.cutoff=40), 15 | cache=FALSE) 16 | options(width=70) 17 | 18 | 19 | ## ----eval = FALSE,echo = T----------------------------------------------- 20 | ## install.packages("RobStatTM") 21 | 22 | 23 | ## ----echo = T------------------------------------------------------------ 24 | library("RobStatTM") 25 | 26 | 27 | ## ----eval = FALSE, echo = T---------------------------------------------- 28 | ## system.file("scripts", package = "RobStatTM") 29 | 30 | 31 | ## ----echo = T------------------------------------------------------------ 32 | data(shock) 33 | head(shock,2) 34 | 35 | 36 | ## ----echo = T------------------------------------------------------------ 37 | data(wood, package = "robustbase") 38 | head(wood,1) 39 | 40 | 41 | ## ----echo = T------------------------------------------------------------ 42 | minerall1 <- quantreg::rq(zinc ~ copper, data=mineral) 43 | 44 | 45 | ## ----echo = T------------------------------------------------------------ 46 | LSfit <- lm(zinc ~ copper, data = mineral) 47 | control <- lmrobdet.control(family = "mopt",eff = 0.95) 48 | robfit <- lmrobdetMM(zinc ~ copper, control = control, data = mineral) 49 | fmLSrob <- fit.models(LSfit,robfit) 50 | class(fmLSrob) 51 | summary(fmLSrob) 52 | 53 | 54 | ## ----echo = T,eval = F,results = F--------------------------------------- 55 | ## help(summary.lmfm) 56 | 57 | 58 | ## ----echo = T,eval = F,results = F--------------------------------------- 59 | ## help(plot.lmfm) 60 | 61 | 62 | ## ----echo = T------------------------------------------------------------ 63 | args(plot.lmfm) 64 | 65 | 66 | ## ----echo = 2,results = F------------------------------------------------ 67 | png(file = "Plots/.png", width = 6, height = 4, units = "in", 68 | pointsize = 6, res = 600) 69 | plot(fmLSrob,which.plots = 11) 70 | dev.off() 71 | 72 | 73 | ## ----echo = 2,results = F------------------------------------------------ 74 | png(file = "Plots/qqplotResiduals.png", width = 6, height = 4, units = "in", 75 | pointsize = 6, res = 600) 76 | plot(fmLSrob,which.plots = 2) 77 | dev.off() 78 | 79 | 80 | ## ----echo = T,warning = F------------------------------------------------ 81 | library(robust) # This is only needed until the package fit.models is updated in CRAN 82 | 83 | 84 | ## ----echo = T------------------------------------------------------------ 85 | data(wine) 86 | wine5 <- wine[,1:5] 87 | cov.fm <- fit.models(Classic = covClassic(wine5),Robust = covRob(wine5,type = "auto")) 88 | class(cov.fm) 89 | summary(cov.fm) 90 | 91 | 92 | ## ----echo = T------------------------------------------------------------ 93 | names(cov.fm) 94 | names(cov.fm$Classic) 95 | names(cov.fm$Robust) 96 | 97 | 98 | ## ----echo = T, eval = F-------------------------------------------------- 99 | ## help(plot.covfm) 100 | 101 | 102 | ## ----echo = 2,results = F------------------------------------------------ 103 | png(file = "Plots/eigenvalues.png", width = 6, height = 4, units = "in", 104 | pointsize = 6, res = 600) 105 | plot(cov.fm,which.plot = 2) 106 | dev.off() 107 | 108 | 109 | ## ----echo = 2,results = F------------------------------------------------ 110 | png(file = "Plots/distances.png", width = 6, height = 4, units = "in", 111 | pointsize = 6, res = 600) 112 | plot(cov.fm,which.plot = 3) 113 | dev.off() 114 | 115 | 116 | ## ----echo = 2,results = F------------------------------------------------ 117 | png(file = "Plots/ellipses.png", width = 6, height = 4, units = "in", 118 | pointsize = 6, res = 600) 119 | plot(cov.fm,which.plot = 4) 120 | dev.off() 121 | 122 | 123 | ## ----echo = 2,results = F------------------------------------------------ 124 | png(file = "Plots/distanceDistance.png", width = 6, height = 4, units = "in", 125 | pointsize = 6, res = 600) 126 | plot(cov.fm,which.plot = 5) 127 | dev.off() 128 | 129 | -------------------------------------------------------------------------------- /R/lmrob.lar.R: -------------------------------------------------------------------------------- 1 | lmrob.lar <- function(x, y, control = lmrob.control(), mf = NULL) 2 | { 3 | ## LAR : Least Absolute Residuals -- i.e. L_1 M-estimate 4 | ## this function is identical to lmRob.lar of the robust package 5 | 6 | x <- as.matrix(x) 7 | p <- ncol(x) 8 | n <- nrow(x) 9 | stopifnot(p > 0, n >= p, length(y) == n, is.numeric(control$rel.tol)) 10 | storage.mode(x) <- "double" 11 | storage.mode(y) <- "double" 12 | bet0 <- 0.773372647623 ## bet0 = pnorm(0.75) 13 | tmpn <- double(n) 14 | tmpp <- double(p) 15 | 16 | z1 <- .Fortran(rslarsbi, ##-> ../src/rllarsbi.f 17 | x, 18 | y, 19 | as.integer(n), 20 | as.integer(p), 21 | as.integer(n), 22 | as.integer(n), 23 | as.double(control$rel.tol), 24 | NIT=integer(1), 25 | K=integer(1), 26 | KODE=integer(1), 27 | SIGMA=double(1), 28 | THETA=tmpn, 29 | RS=tmpn, 30 | SC1=tmpn, 31 | SC2=tmpp, 32 | SC3=tmpp, 33 | SC4=tmpp, 34 | BET0=as.double(bet0))[c("THETA","SIGMA","RS","NIT","KODE")] 35 | if (z1[5] > 1) 36 | stop("calculations stopped prematurely in rllarsbi\n", 37 | "(probably because of rounding errors).") 38 | names(z1) <- c("coefficients", "scale", "residuals", "iter", "status") 39 | ## c("THETA", "SIGMA", "RS", "NIT", "KODE") 40 | z1$converged <- TRUE 41 | length(z1$coefficients) <- p 42 | z1 43 | } 44 | 45 | splitFrame <- function(mf, x = model.matrix(mt, mf), 46 | type = c("f","fi", "fii")) 47 | { 48 | mt <- attr(mf, "terms") 49 | type <- match.arg(type) 50 | x <- as.matrix(x) 51 | p <- ncol(x) 52 | 53 | ## --- split categorical and interactions of categorical vars. 54 | ## from continuous variables 55 | factors <- attr(mt, "factors") 56 | factor.idx <- attr(mt, "dataClasses") == "factor" 57 | if (!any(factor.idx)) ## There are no factors 58 | return(list(x1.idx = rep.int(FALSE, p), x1=matrix(,nrow(x),0L), x2=x)) 59 | switch(type, 60 | ## --- include interactions cat * cont in x1: 61 | fi = { factor.asgn <- which(factor.idx %*% factors > 0) }, 62 | ## --- include also continuous variables that interact with factors in x1: 63 | ## make sure to include interactions of continuous variables 64 | ## interacting with categorical variables, too 65 | fii = { factor.asgn <- numeric(0) 66 | factors.cat <- factors 67 | factors.cat[factors.cat > 0] <- 1L ## fix triple+ interactions 68 | factors.cat[, factor.idx %*% factors == 0] <- 0L 69 | for (i in 1:ncol(factors)) { 70 | comp <- factors[,i] > 0 71 | ## if any of the components is a factor: include in x1 and continue 72 | if (any(factor.idx[comp])) { 73 | factor.asgn <- c(factor.asgn, i) 74 | } else { 75 | ## if there is an interaction of this term with a categorical var. 76 | tmp <- colSums(factors.cat[comp,,drop=FALSE]) >= sum(comp) 77 | if (any(tmp)) { 78 | ## if no other continuous variables are involved 79 | ## include in x1 and continue 80 | ## if (identical(factors[!comp, tmp], factors.cat[!comp, tmp])) 81 | if (!all(colSums(factors[!factor.idx & !comp, tmp, drop=FALSE]) > 0)) 82 | factor.asgn <- c(factor.asgn, i) 83 | } 84 | } 85 | } }, 86 | ## --- do not include interactions cat * cont in x1: 87 | f = { factor.asgn <- which(factor.idx %*% factors & !(!factor.idx) %*% factors) }, 88 | stop("unknown split type")) 89 | x1.idx <- attr(x, "assign") %in% c(0, factor.asgn) ## also include intercept 90 | names(x1.idx) <- colnames(x) 91 | 92 | ## x1: factors and (depending on type) interactions of / with factors 93 | ## x2: continuous variables 94 | list(x1 = x[, x1.idx, drop=FALSE], 95 | x1.idx = x1.idx, 96 | x2 = x[, !x1.idx, drop=FALSE]) 97 | } 98 | -------------------------------------------------------------------------------- /man/lmrobdetDCML.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lmrobdet.R 3 | \name{lmrobdetDCML} 4 | \alias{lmrobdetDCML} 5 | \title{Robust Distance Constrained Maximum Likelihood estimators for linear regression} 6 | \usage{ 7 | lmrobdetDCML( 8 | formula, 9 | data, 10 | subset, 11 | weights, 12 | na.action, 13 | model = TRUE, 14 | x = !control$compute.rd, 15 | y = FALSE, 16 | singular.ok = TRUE, 17 | contrasts = NULL, 18 | offset = NULL, 19 | control = lmrobdet.control() 20 | ) 21 | } 22 | \arguments{ 23 | \item{formula}{a symbolic description of the model to be fit.} 24 | 25 | \item{data}{an optional data frame, list or environment containing 26 | the variables in the model. If not found in \code{data}, model variables 27 | are taken from \code{environment(formula)}, which usually is the 28 | root environment of the current R session.} 29 | 30 | \item{subset}{an optional vector specifying a subset of observations to be used.} 31 | 32 | \item{weights}{an optional vector of weights to be used in the fitting process.} 33 | 34 | \item{na.action}{a function to indicates what should happen when the data contain NAs. 35 | The default is set by the \link{na.action} setting of \code{\link[base]{options}}, and is 36 | \code{na.fail} if that is unset.} 37 | 38 | \item{model}{logical value indicating whether to return the model frame} 39 | 40 | \item{x}{logical value indicating whether to return the model matrix} 41 | 42 | \item{y}{logical value indicating whether to return the vector of responses} 43 | 44 | \item{singular.ok}{logical value. If \code{FALSE} a singular fit produces an error.} 45 | 46 | \item{contrasts}{an optional list. See the \code{contrasts.arg} of \link{model.matrix.default}.} 47 | 48 | \item{offset}{this can be used to specify an a priori known component to be included 49 | in the linear predictor during fitting. An offset term can be included in the formula 50 | instead or as well, and if both are specified their sum is used.} 51 | 52 | \item{control}{a list specifying control parameters as returned by the function 53 | \link{lmrobdet.control}.} 54 | } 55 | \value{ 56 | A list with the following components: 57 | \item{coefficients}{The estimated vector of regression coefficients} 58 | \item{scale}{The estimated scale of the residuals} 59 | \item{residuals}{The vector of residuals associated with the robust fit} 60 | \item{converged}{Logical value indicating whether IRWLS iterations for the MM-estimator have converged} 61 | \item{iter}{Number of IRWLS iterations for the MM-estimator} 62 | \item{rweightsMM}{Robustness weights for the MM-estimator} 63 | \item{fitted.values}{Fitted values associated with the robust fit} 64 | \item{rank}{Numeric rank of the fitted linear model} 65 | \item{cov}{The estimated covariance matrix of the regression estimates} 66 | \item{df.residual}{The residual degrees of freedom} 67 | \item{contrasts}{(only where relevant) the contrasts used} 68 | \item{xlevels}{(only where relevant) a record of the levels of the factors used in fitting} 69 | \item{call}{the matched call} 70 | \item{model}{if requested, the model frame used} 71 | \item{x}{if requested, the model matrix used} 72 | \item{y}{if requested, the response vector used} 73 | \item{na.action}{(where relevant) information returned by model.frame on the special handling of NAs} 74 | } 75 | \description{ 76 | This function computes robust Distance Constrained Maximum Likelihood 77 | estimators for linear models. 78 | } 79 | \details{ 80 | This function computes Distance Constrained Maximum Likelihood regression estimators 81 | computed using an MM-regression estimator based on Pen~a-Yohai 82 | candidates (instead of subsampling ones). 83 | This function makes use of the functions \code{lmrob.fit}, 84 | \code{lmrob..M..fit}, \code{.vcov.avar1}, \code{lmrob.S} and 85 | \code{lmrob.lar}, from robustbase, 86 | along with utility functions used by these functions, 87 | modified so as to include use of the analytic form of the 88 | optimal psi and rho functions (for the optimal psi function , see 89 | Section 5.8.1 of Maronna, Martin, Yohai and Salibian Barrera, 2019) 90 | } 91 | \examples{ 92 | data(coleman, package='robustbase') 93 | m1 <- lmrobdetDCML(Y ~ ., data=coleman) 94 | m1 95 | summary(m1) 96 | 97 | } 98 | \references{ 99 | \url{http://www.wiley.com/go/maronna/robust} 100 | } 101 | \seealso{ 102 | \code{\link{DCML}}, \code{\link{MMPY}}, \code{\link{SMPY}} 103 | } 104 | \author{ 105 | Matias Salibian-Barrera, \email{matias@stat.ubc.ca}, based on \code{lmrob} 106 | } 107 | -------------------------------------------------------------------------------- /R/lsRobTestMM.R: -------------------------------------------------------------------------------- 1 | #' @title Test for Least Squares Bias Using Robust MM Regressions 2 | #' 3 | #' @param object An MM regression fitted model whose class is *lmrobdetMM*. 4 | #' @param test A character vector indicating which of two type of tests "T" or 5 | #' "T0: are used, with type "T" the default. 6 | #' @param ... Pass through parameters 7 | #' 8 | #' @returns A list with component names coefs, full, test, efficiency 9 | #' 10 | #' @details The original version of \code{lsRobTestMM} is the \code{lsRobTest} 11 | #' in the package *robust*. The function \code{lsRobTest} had options *T1* and 12 | #' *T2*. However, we only recommend *T2*, and deprecate *T1*. Accordingly we 13 | #' use *T* for the former *T2*, and use *T0* for the former *T1*, and we 14 | #' deprecate *T0*. 15 | #' 16 | #' The *coefs* component of the list is a matrix whose row names are 17 | #' the names of the regression predictor variables, and whose columns *LS*, 18 | #' *Robust*, *Delta*, *Std.error*, *t-stat*, *p-value* contain respectively, 19 | #' the least squares and robust coefficient estimates, the differences in the 20 | #' coefficient estimates, the standard errors of the differences, the resulting 21 | #' t-statistic values, and the resulting z-test p-values. 22 | #' 23 | #' The *full* component of the list is itself a list with components the full 24 | #' model quadratic form chi-squared statistic value (*stat*), the degrees of 25 | #' freedom (*df*), and the full model p value (*p.value*). 26 | #' 27 | #' The *test* component of the list is a character value indicating which of the 28 | #' tests *T* and *T0* has been computed. 29 | #' 30 | #' The *efficiency* component of the list is *NULL* when test *T* has been used, 31 | #' and is equal to the normal distribution efficiency of the *lmrobdetMM* 32 | #' estimate when test *T0* has been used. 33 | #' 34 | #' @author Kjell Konis 35 | #' 36 | #' @export 37 | #' 38 | #' @examples 39 | #' args(lsRobTestMM) 40 | lsRobTestMM <- function(object, test = c("T", "T0"), ...) 41 | { 42 | # require(RobStatTM) 43 | test <- match.arg(test) 44 | 45 | # family: one of bisquare, opt and mopt 46 | family <- object$control$family 47 | tune <- object$control$tuning.psi 48 | 49 | # the prefix probably can be removed when added into RobStatTM 50 | eff = computeGaussianEfficiencyFromFamily(family, tune) 51 | 52 | if(is.null(object$weights)) { 53 | LS <- lm(formula(object$terms), data = object$model) 54 | } else { 55 | LS <- lm(formula(object$terms), data = object$model, weights = object$weights) 56 | } 57 | 58 | rmm <- residuals(object) 59 | rls <- residuals(LS) 60 | rob.sigma <- object$scale 61 | 62 | # require(robust) # probably not needed anymore, check later 63 | # tune <- lmRob.effvy(eff, ipsi) # control$tuning.psi 64 | # rw <- object$T.M.weights 65 | 66 | rw <- object$rweights 67 | 68 | X <- model.matrix(object) 69 | n <- nrow(X) 70 | p <- ncol(X) 71 | 72 | if (!is.null(object$weights)) { 73 | X <- X * sqrt(object$weights) 74 | } 75 | 76 | V <- (t(rw * X) %*% X) / sum(rw) 77 | V.inv <- solve(V) 78 | 79 | if(test == "T0") { 80 | d <- mean(rhoprime2(rmm/rob.sigma, family=family,cc=tune)) 81 | tau <- n * mean( (rhoprime(rmm/rob.sigma, family=family,cc=tune)/d)^2 ) / (n - p) 82 | mat <- (1 - eff)/n * tau * rob.sigma^2 * V.inv 83 | } 84 | 85 | if(test == "T") { 86 | d <- mean(rhoprime2(rmm/rob.sigma, family=family,cc=tune)) 87 | delta2 <- mean( (rls - (rob.sigma * rhoprime(rmm/rob.sigma, family=family,cc=tune)) / d)^2 ) 88 | mat <- delta2 / n * V.inv 89 | } 90 | 91 | brob <- coef(object) 92 | coef.names <- names(brob) 93 | bls <- coef(LS) 94 | x <- bls - brob 95 | 96 | if(attributes(object$terms)$intercept) { 97 | brob <- brob[-1] 98 | bls <- bls[-1] 99 | x <- x[-1] 100 | mat <- mat[-1, -1, drop = FALSE] 101 | coef.names <- coef.names[-1] 102 | } 103 | 104 | se <- sqrt(diag(mat)) 105 | uniV <- x / se 106 | coefs <- cbind(bls, brob, x, se, uniV, 2*pnorm(-abs(uniV))) 107 | dimnames(coefs) <- list(coef.names, c("LS", "Robust", "Delta", "Std. Error", "Stat", "p-value")) 108 | T <- drop(t(x) %*% solve(mat) %*% x) 109 | 110 | ans <- list(coefs = coefs, 111 | full = list(stat = T, df = length(x), p.value = 1 - pchisq(T, length(x))), 112 | test = test, 113 | efficiency = eff) 114 | 115 | # class here is not changed yet, also the postfix in the name of print 116 | oldClass(ans) <- "lsRobTest" 117 | ans 118 | } -------------------------------------------------------------------------------- /man/lmrobM.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/lmrobdet.R 3 | \name{lmrobM} 4 | \alias{lmrobM} 5 | \title{Robust estimators for linear regression with fixed designs} 6 | \usage{ 7 | lmrobM( 8 | formula, 9 | data, 10 | subset, 11 | weights, 12 | na.action, 13 | model = TRUE, 14 | x = FALSE, 15 | y = FALSE, 16 | singular.ok = TRUE, 17 | contrasts = NULL, 18 | offset = NULL, 19 | control = lmrobM.control() 20 | ) 21 | } 22 | \arguments{ 23 | \item{formula}{a symbolic description of the model to be fit.} 24 | 25 | \item{data}{an optional data frame, list or environment containing 26 | the variables in the model. If not found in \code{data}, model variables 27 | are taken from \code{environment(formula)}, which usually is the 28 | root environment of the current R session.} 29 | 30 | \item{subset}{an optional vector specifying a subset of observations to be used.} 31 | 32 | \item{weights}{an optional vector of weights to be used in the fitting process.} 33 | 34 | \item{na.action}{a function to indicates what should happen when the data contain NAs. 35 | The default is set by the \link{na.action} setting of \code{\link[base]{options}}, and is 36 | \code{na.fail} if that is unset.} 37 | 38 | \item{model}{logical value indicating whether to return the model frame} 39 | 40 | \item{x}{logical value indicating whether to return the model matrix} 41 | 42 | \item{y}{logical value indicating whether to return the vector of responses} 43 | 44 | \item{singular.ok}{logical value. If \code{FALSE} a singular fit produces an error.} 45 | 46 | \item{contrasts}{an optional list. See the \code{contrasts.arg} of \link{model.matrix.default}.} 47 | 48 | \item{offset}{this can be used to specify an a priori known component to be included 49 | in the linear predictor during fitting. An offset term can be included in the formula 50 | instead or as well, and if both are specified their sum is used.} 51 | 52 | \item{control}{a list specifying control parameters as returned by the function 53 | \link{lmrobM.control}.} 54 | } 55 | \value{ 56 | A list with the following components: 57 | \item{coefficients}{The estimated vector of regression coefficients} 58 | \item{scale}{The estimated scale of the residuals} 59 | \item{residuals}{The vector of residuals associated with the robust fit} 60 | \item{converged}{Logical value indicating whether IRWLS iterations for the MM-estimator have converged} 61 | \item{iter}{Number of IRWLS iterations for the MM-estimator} 62 | \item{rweights}{Robustness weights for the MM-estimator} 63 | \item{fitted.values}{Fitted values associated with the robust fit} 64 | \item{rank}{Numeric rank of the fitted linear model} 65 | \item{cov}{The estimated covariance matrix of the regression estimates} 66 | \item{df.residual}{The residual degrees of freedom} 67 | \item{contrasts}{(only where relevant) the contrasts used} 68 | \item{xlevels}{(only where relevant) a record of the levels of the factors used in fitting} 69 | \item{call}{the matched call} 70 | \item{model}{if requested, the model frame used} 71 | \item{x}{if requested, the model matrix used} 72 | \item{y}{if requested, the response vector used} 73 | \item{na.action}{(where relevant) information returned by model.frame on the special handling of NAs} 74 | } 75 | \description{ 76 | This function computes a robust regression 77 | estimator for a linear models with fixed designs. 78 | } 79 | \details{ 80 | This function computes robust regression estimators for linear 81 | models with fixed designs. It computes an L1 estimator, 82 | and uses it as a starting point to find a minimum of a 83 | re-descending M estimator. The scale is set to a quantile of the 84 | absolute residuals from the L1 estimator. 85 | This function makes use of the functions \code{lmrob.fit}, 86 | \code{lmrob..M..fit}, \code{.vcov.avar1}, \code{lmrob.S} and 87 | \code{lmrob.lar}, from robustbase, 88 | along with utility functions used by these functions, 89 | modified so as to include use of the analytic form of the 90 | optimal psi and rho functions (for the optimal psi function , see 91 | Section 5.8.1 of Maronna, Martin, Yohai and Salibian Barrera, 2019) 92 | } 93 | \examples{ 94 | data(shock) 95 | cont <- lmrobM.control(bb = 0.5, efficiency = 0.85, family = "bisquare") 96 | shockrob <- lmrobM(time ~ n.shocks, data = shock, control=cont) 97 | shockrob 98 | summary(shockrob) 99 | 100 | } 101 | \references{ 102 | \url{http://www.wiley.com/go/maronna/robust} 103 | } 104 | \author{ 105 | Victor Yohai, \email{vyohai@gmail.com}, based on \code{lmrob} 106 | } 107 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # Version 1.0.12 2 | - added lsRobTestMM() 3 | 4 | # Version 1.0.11 5 | - fixed C code to work with _R_USE_STRICT_R_HEADERS_ = true, (STRICT_R_HEADERS = 1) 6 | 7 | # Version 1.0.10 8 | 9 | - scaleM() and mscale() are now different functions. mscale() is no longer exported. 10 | - The argument "tuning.chi" of scaleM() has been removed, and it is now computed internally to obtain a consistent scale estimator for the chosen combination of "family" and "delta". 11 | 12 | # Version 1.0.9 13 | 14 | - The robustarima Package Status: This package, which is used in the example scripts "ar1.R", "ar3.R", "resex.R", "identAR2.R", "identMA1.R" and "MA1-AO.R", is currently archived by CRAN due to a small problem in one of package’s supporting functions. We anticipate having the package back on CRAN in 2024. Meanwhile, the package is available at the repository https://github.com/spkaluzny/robustarima of package maintainer Stephen Kaluzny. To install robustarima, you need to have first installed the package devtools. Then you can install the utility packages splusTimeDate and splusTimeSeries, and robustarima with: 15 | ``` 16 | > devtools::install_github("spkaluzny/splusTimeDate") 17 | > devtools::install_github("spkaluzny/splusTimeSeries") 18 | > devtools::install_github("spkaluzny/robustarima") 19 | ``` 20 | 21 | # Version 1.0.8 22 | 23 | - Argument "length" of seq() fixed to "length.out" 24 | - Fixed Rprintf() and REprintf() bad arguments in lmrob.c 25 | 26 | # Version 1.0.7 27 | 28 | - The opt and mopt loss functions, known as rho functions, specified by the choice family = "opt" or family = "mopt" in the function lmrobdet.control are now calculated using polynomials, rather than using the standard normal error function (erf) as in versions of RobStatTM prior to 1.0.7. The numerical results one now gets with the opt or mopt choices will differ by small amounts from those in earlier package versions. Users who wish to replicate results from releases prior to 1.0.7 may do so by using the family arguments family = "optV0" or family = "moptV0". Note that the derivative of the rho loss function, known as the "psi" function, is still an analytic function. For further details, see the Vignette "polynomialRhoFunctions". 29 | 30 | # Version 1.0.6 31 | 32 | - locScaleM produces a warning and returns 0 for the estimated scale when more 33 | than half of the input values are equal to each other. 34 | 35 | # Version 1.0.5 36 | 37 | - Replace legacy S-compatibility deprecated macros DOUBLE_* in 38 | R_ext/Constants.h (included by R.h) by the standard C99 constants 39 | 40 | # Version 1.0.4 41 | 42 | - For the function `lmrobdetMM.RFPE`, when the argument `bothVals` is set 43 | to `TRUE`, the names of the first element in the returned list has been 44 | changed to `minRhoMM.C` (from `minRhoMM`). 45 | 46 | # Version 1.0.3 (November 2021) 47 | 48 | - The Shiny User Interface capability has been removed, and correspondingly the 49 | Shiny Interface to RobStatTM Vignette has also been removed. The reason for 50 | doing so is that the Shiny capability resulted in too many package dependencies. 51 | We anticipate that Greg Brownson (gregory.brownson@gmail.com), the creator of 52 | the RobSTatTM Shiny UI, will 53 | make that capability available in an independent package complement to RobStatTM. 54 | 55 | - The package fit.models has been removed as a dependency, i.e., it is no 56 | longer listed as Depends. The reason for this is that including fit.models in 57 | RobStatTM created too many dependencies. The fit.model package stand-alone 58 | functionality works very well with RobStatTM. Correspondingly, the use of 59 | fit.models has been removed from the "Vignette for Command Line Use of 60 | RobStatTM.pdf" document, and is now provided in the separate 61 | vignette "fit.models using RobStatTM.pdf". 62 | 63 | - In `src/lmrob.c` add USE_FC_LEN_T and use FCONE when calling BLAS and 64 | LAPACK Fortran functions (https://cran.r-project.org/doc/manuals/r-devel/R-exts.html#Fortran-character-strings) 65 | 66 | - Re-ordered the elements in the object returned by `covRob` and `Multirobu`, renamed the 67 | argument `cor` to `corr`, and now the correlation matrix (if the argument 68 | `corr = TRUE`) is returned in the element `cor`. 69 | 70 | - Renamed the returned entry `weights` to `wts` and the order of other entries 71 | in the object returned by `covClassic` 72 | 73 | - Removed comment on the help page of `lmrobdetMM.RFPE` referring to it being for internal use. 74 | This was not correct. The function can be used directly. 75 | 76 | - `lmrobdetMM.RFPE` now includes the argument `bothVals`. If set to `TRUE` the 77 | function returns a list with the two terms (named `minRhoMM` and `penaltyRFPE`) 78 | that added together equal the RFPE (see equation (5.39) in Section 79 | 5.6.2 of the book "Robust Statistics: Theory and Methods (with R)". 80 | If `bothVals` is `FALSE` then the function returns a scalar with the RPFE value. 81 | 82 | - Objects returned by `covClassic` and `covRob` now include an element `call` with 83 | an image of the call that produced the object with all the arguments named (the 84 | matched call). 85 | 86 | - The function `lmrobLinTest` has been renamed to `lmrobdetLinTest` 87 | 88 | - Fixed a bug producing undesired behavior when an exact fit (more than half of the 89 | data lying perfectly on a line) was detected. 90 | 91 | - The help page for lmrobdetMM was revised to describe all entries in the 92 | returned object. 93 | 94 | 95 | # Version 1.0.1: 96 | - Fix an issue with exact fits if the M-scale estimate is (close to) 0. 97 | - Includes help pages for all datasets. 98 | 99 | -------------------------------------------------------------------------------- /R/MLocDis.R: -------------------------------------------------------------------------------- 1 | #' Robust univariate location and scale M-estimators 2 | #' 3 | #' This function computes M-estimators for location and scale. 4 | #' 5 | #' This function computes M-estimators for location and scale. 6 | #' 7 | #' @export MLocDis locScaleM 8 | #' @aliases MLocDis locScaleM 9 | #' @rdname MLocDis 10 | #' 11 | #' @param x a vector of univariate observations 12 | #' @param psi a string indicating which score function to use. Valid options are "bisquare", "huber", 13 | #' "opt" and "mopt". 14 | #' @param eff desired asymptotic efficiency. Valid options are 0.85, 0.9 and 0.95 (default) when 15 | #' \code{psi} = "bisquare" or "huber", and 0.85, 0.9, 0.95 (default) and 0.99 when 16 | #' \code{psi} = "opt" or "mopt". 17 | #' @param maxit maximum number of iterations allowed. 18 | #' @param tol tolerance to decide convergence of the iterative algorithm. 19 | #' @param na.rm a logical value indicating whether \code{NA} values should be stripped before 20 | #' the computation proceeds. Defaults to \code{FALSE} 21 | #' 22 | #' @return A list with the following components: 23 | #' \item{mu}{The location estimate} 24 | #' \item{std.mu}{Estimated standard deviation of the location estimator \code{mu}} 25 | #' \item{disper}{M-scale/dispersion estimate} 26 | #' 27 | #' @author Ricardo Maronna, \email{rmaronna@retina.ar} 28 | #' 29 | #' @references \url{http://www.wiley.com/go/maronna/robust} 30 | #' 31 | #' @examples 32 | #' set.seed(123) 33 | #' r <- rnorm(150, sd=1.5) 34 | #' locScaleM(r) 35 | #' # 10% of outliers, sd of good points is 1.5 36 | #' set.seed(123) 37 | #' r2 <- c(rnorm(135, sd=1.5), rnorm(15, mean=-10, sd=.5)) 38 | #' locScaleM(r2) 39 | #' 40 | locScaleM <- MLocDis <- function(x, psi="mopt", eff=0.95, maxit=50, tol=1.e-4, na.rm = FALSE) { 41 | kpsi <- switch(psi, bisquare = 1, huber = 2, opt = 3, mopt = 4, optv0 = 5, moptv0 = 6, 7) 42 | # if (psi=="bisquare") kpsi=1 43 | # if (psi=="huber") kpsi=2 44 | # } else {print(c(psi, " No such psi")); kpsi=0 45 | # } 46 | # Next 6 lines taken from mean.default() 47 | if (!is.numeric(x)) { 48 | warning("argument is not numeric: returning NA") 49 | return(NA_real_) 50 | } 51 | if (na.rm) 52 | x <- x[!is.na(x)] 53 | if(kpsi == 7) stop(paste0(psi, ' - No such rho function')) 54 | if(kpsi %in% c(1, 2)) { # Start of Ricardo's code 55 | kBis=c(3.44, 3.88, 4.685) 56 | kHub=c(0.732, 0.981, 1.34) 57 | kk=rbind(kBis, kHub) 58 | efis=c(0.85, 0.90, 0.95) 59 | if (is.element(eff, efis)) {keff=match(eff,efis); 60 | } else {print(c(eff, " No such eff")); keff=0 61 | } 62 | if (kpsi>0 & keff>0) { 63 | ktun=kk[kpsi, keff] 64 | mu0=median(x); sig0=mad(x) 65 | if (sig0<1.e-10) { 66 | resu <- list(mu = mu0, std.mu = 0, disper = 0) 67 | num.rep <- sum( x == median(x) ) 68 | perc.rep <- num.rep / length(x) 69 | wrn <- paste0(num.rep, ' elements ', round(perc.rep*100, 1), '% in the input vector are equal to ', median(x), '.') 70 | wrn <- paste0(wrn, ' If percent is 50% or greater, the values of std.mu and disper will be 0, and user may wish to apply locScaleM to data with tied values removed.') 71 | warning(wrn) 72 | return(resu) 73 | } else { #initialize 74 | dife=1.e10; iter=0 75 | while (dife>tol & iter tol && iter < maxit) { 111 | iter <- iter + 1 112 | resi <- (x - mu0) / sig0 113 | ww <- Mwgt(resi, cc, family) #RobStatTM:::Mwgt(resi, cc, family) 114 | mu <- sum(ww * x) / sum(ww) 115 | dife <- abs(mu - mu0) / sig0 116 | mu0 <- mu 117 | } 118 | } 119 | pp <- rhoprime(resi, family, cc) 120 | n <- length(x) 121 | a <- mean(pp^2) 122 | b <- mean(rhoprime2(resi, family, cc)) 123 | sigmu <- sqrt(sig0^2 * a / (n*b^2)) 124 | f <- function(u, family, cc) { 125 | if( (family == "opt") | (family == "mopt") ) { 126 | cc["c2"] <- u } else { cc["c"] <- u } 127 | integrate(function(x, fam, cc) rho(x, fam, cc) * dnorm(x), -Inf, Inf, fam = family, cc = cc)$value - 0.5 128 | } 129 | 130 | tmpc <- uniroot(f, c(0.01, 10), family = family, cc = cc)$root 131 | if( (family == "opt") | (family == "mopt") ) { 132 | cc["c2"] <- tmpc } else { cc["c"] <- tmpc } 133 | scat <- scaleM(x - mu, delta = 0.5, family = family) 134 | resu <- list(mu = mu, std.mu = sigmu, disper = scat) 135 | } else { print(c(eff, " No such eff")) 136 | resu <- NA 137 | } 138 | } 139 | return(resu) 140 | } # end function 141 | 142 | wfun <- function(x,k) { #weight function 143 | if (k==1) ww=(1-x^2)^2 *(abs(x)<=1) 144 | else ww=(abs(x)<=1)+(abs(x)>1)/(abs(x)+1.e-20) 145 | return(ww) 146 | } 147 | 148 | psif <- function(x,k) {return(x*wfun(x,k))} 149 | 150 | psipri <- function(x,k) { 151 | if (k==1) pp= (((1 - (x^2))^2) - 4 * (x^2) * (1 - (x^2))) * (abs(x) < 1) 152 | else pp=(abs(x)<=1) 153 | return(pp) 154 | } 155 | --------------------------------------------------------------------------------