├── roxy.R ├── data ├── crab.rda ├── wine.rda └── powerplant.rda ├── tests ├── testthat.R └── testthat │ ├── test.interpolate.R │ ├── test.arrangeC.R │ ├── test.makepath.R │ └── test.similarityweight.R ├── .gitignore ├── .travis.yml ├── .RBuildignore ├── demo ├── 00Index ├── condvis-regression.R └── condvis-classification.R ├── NAMESPACE ├── R ├── factordist.R ├── weightcolor.R ├── interpolate.R ├── dist1.R ├── factor2color.R ├── cont2color.R ├── diagnosticplots.R ├── ceplot.static.R ├── predict.R ├── makepath.R ├── arrangeC.R ├── savingby2d.R ├── helpers.R ├── similarityweight.R ├── plotxsres.R ├── ceplot.shiny.R ├── copied.R ├── ceplot.R ├── ceplot.interactive.R ├── plotxc.R ├── condtour.R └── plotxs.R ├── inst └── CITATION ├── man ├── interpolate.Rd ├── factor2color.Rd ├── wine.Rd ├── dist1.Rd ├── cont2color.Rd ├── plotxc.pcp.Rd ├── makepath.Rd ├── powerplant.Rd ├── condvis-package.Rd ├── crab.Rd ├── arrangeC.Rd ├── savingby2d.Rd ├── plotxc.Rd ├── similarityweight.Rd ├── condtour.Rd ├── plotxs.Rd └── ceplot.Rd ├── DESCRIPTION ├── README.md └── NEWS /roxy.R: -------------------------------------------------------------------------------- 1 | roxygen2::roxygenise("../condvis") 2 | -------------------------------------------------------------------------------- /data/crab.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markajoc/condvis/HEAD/data/crab.rda -------------------------------------------------------------------------------- /data/wine.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markajoc/condvis/HEAD/data/wine.rda -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(condvis) 3 | test_check("condvis") 4 | -------------------------------------------------------------------------------- /data/powerplant.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/markajoc/condvis/HEAD/data/powerplant.rda -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | src/condvis.so 2 | src/daisy.o 3 | src-i386/ 4 | src-x64/ 5 | .Rapp.history 6 | *.tar.gz 7 | condvis.Rcheck/ 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: r 2 | r: 3 | - oldrel 4 | - release 5 | - devel 6 | env: 7 | global: 8 | - R_CHECK_ARGS="--as-cran" 9 | cache: packages 10 | -------------------------------------------------------------------------------- /.RBuildignore: -------------------------------------------------------------------------------- 1 | .git/ 2 | .gitignore 3 | src/daisy.o 4 | src/condvis.so 5 | crancomments.txt 6 | .Rapp.history 7 | ^\.travis\.yml$ 8 | roxy.R 9 | ^.*\.tar\.gz 10 | condvis.Rcheck/ 11 | -------------------------------------------------------------------------------- /demo/00Index: -------------------------------------------------------------------------------- 1 | condvis-classification example of using ceplot to take sections through a fitted classification model 2 | condvis-regression example of using ceplot to take sections through a fitted regression model 3 | -------------------------------------------------------------------------------- /demo/condvis-regression.R: -------------------------------------------------------------------------------- 1 | library(condvis) 2 | data(powerplant) 3 | ## fit a model 4 | models <- list( 5 | lm = lm(PE ~ ., data = powerplant), 6 | lmquad = lm(PE ~ . + I(AT^2), data = powerplant)) 7 | ## visualise sections along 'AT' 8 | ceplot(data = powerplant, model = models, sectionvars = "AT") 9 | -------------------------------------------------------------------------------- /tests/testthat/test.interpolate.R: -------------------------------------------------------------------------------- 1 | context("interpolate") 2 | 3 | test_that("interpolate throws an error with a negative 'ninterp'", { 4 | expect_error(interpolate(1:5, ninterp = -1)) 5 | }) 6 | 7 | test_that("interpolate works on a numeric vector of length 2", { 8 | expect_equal(interpolate(x = 1:2, ninterp = 3), c(1, 1.25, 1.50, 1.75, 2)) 9 | }) 10 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | export(arrangeC, ceplot, cont2color, factor2color, interpolate, dist1, plotxc, 2 | plotxs, savingby2d, similarityweight, condtour, makepath) 3 | S3method(interpolate, numeric) 4 | S3method(interpolate, integer) 5 | S3method(interpolate, factor) 6 | S3method(interpolate, character) 7 | import(graphics, grDevices, stats) 8 | importFrom(utils, head, tail) 9 | importFrom(MASS, parcoord) 10 | -------------------------------------------------------------------------------- /demo/condvis-classification.R: -------------------------------------------------------------------------------- 1 | library(condvis) 2 | ## demo requires 'e1071' package 3 | library(e1071) 4 | data(wine) 5 | wine$Class <- as.factor(wine$Class) 6 | ## fit a model 7 | model <- svm(Class ~ Alcohol + Malic + Ash + Magnesium + 8 | Phenols + Flavanoids, data = wine) 9 | ## visualise sections along 'Alcohol' and 'Phenols' 10 | ceplot(data = wine, model = model, sectionvars = c("Alcohol", "Phenols"), 11 | threshold = 1.5) 12 | -------------------------------------------------------------------------------- /R/factordist.R: -------------------------------------------------------------------------------- 1 | ## Function to calculate a rough Hamming-style distance between categorical 2 | ## elements of data points. Not currently in use 2016-05-31. 3 | 4 | factormatchratio <- 5 | function(x, X) 6 | { 7 | x <- as.matrix(x) 8 | X <- if (is.null(dim(X))) 9 | matrix(X, ncol = length(x)) 10 | else as.matrix(X) 11 | mp <- rowSums(X == matrix(x, ncol = length(x), nrow = nrow(X), byrow = TRUE) 12 | ) / length(x) 13 | } 14 | -------------------------------------------------------------------------------- /tests/testthat/test.arrangeC.R: -------------------------------------------------------------------------------- 1 | context("arrangeC") 2 | 3 | test_that("arrangeC fails with less than 5 rows", { 4 | expect_error(arrangeC(mtcars[1:4, ])) 5 | }) 6 | 7 | test_that("arrangeC just returns colnames for ncols less than 3", { 8 | expect_equal(unlist(arrangeC(mtcars[, 1, drop = FALSE])), colnames(mtcars)[1]) 9 | expect_equal(unlist(arrangeC(mtcars[, 1:2, drop = FALSE])), colnames(mtcars[, 10 | 1:2])) 11 | }) 12 | 13 | test_that("arrangeC returns list of vectors with length <= 2", { 14 | expect_is(arrangeC(mtcars), "list") 15 | expect_true(all(vapply(arrangeC(mtcars), length, integer(1)) <= 2)) 16 | }) 17 | -------------------------------------------------------------------------------- /tests/testthat/test.makepath.R: -------------------------------------------------------------------------------- 1 | context("makepath") 2 | 3 | data(powerplant) 4 | 5 | ncentroids <- 20 6 | ninterp <- 4 7 | pathobject <- makepath(x = powerplant, ncentroids = ncentroids, ninterp = 8 | ninterp) 9 | 10 | test_that("makepath returns dataframes with correct names", { 11 | expect_equal(names(pathobject$path), names(powerplant)) 12 | expect_equal(names(pathobject$centers), names(powerplant)) 13 | }) 14 | 15 | test_that("makepath returns the correct number of centers and path points", { 16 | expect_equal(nrow(pathobject$centers), ncentroids) 17 | expect_equal(nrow(pathobject$path), ncentroids + (ncentroids - 1L) * ninterp) 18 | }) 19 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | bibentry(bibtype = "Article", 2 | title = "Conditional Visualization for Statistical Models: An Introduction to the {condvis} Package in {R}", 3 | author = c(person(given = "Mark", 4 | family = "O'Connell", 5 | email = "mark_ajoc@yahoo.ie"), 6 | person(given = c("Catherine", "B."), 7 | family = "Hurley"), 8 | person(given = "Katarina", 9 | family = "Domijan")), 10 | journal = "Journal of Statistical Software", 11 | year = "2017", 12 | volume = "81", 13 | number = "5", 14 | pages = "1--20", 15 | doi = "10.18637/jss.v081.i05", 16 | 17 | header = "To cite condvis in publications use:" 18 | ) 19 | -------------------------------------------------------------------------------- /man/interpolate.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/interpolate.R 3 | \name{interpolate} 4 | \alias{interpolate} 5 | \alias{interpolate.character} 6 | \alias{interpolate.factor} 7 | \alias{interpolate.integer} 8 | \alias{interpolate.numeric} 9 | \title{Interpolate} 10 | \usage{ 11 | interpolate(x, ...) 12 | 13 | \method{interpolate}{numeric}(x, ninterp = 4L, ...) 14 | 15 | \method{interpolate}{integer}(x, ninterp = 4L, ...) 16 | 17 | \method{interpolate}{factor}(x, ninterp = 4L, ...) 18 | 19 | \method{interpolate}{character}(x, ninterp = 4L, ...) 20 | } 21 | \arguments{ 22 | \item{x}{A numeric or factor vector.} 23 | 24 | \item{...}{Not used.} 25 | 26 | \item{ninterp}{The number of points to interpolate between observations. It 27 | should be an even number for sensible results on a factor/character vector.} 28 | } 29 | \description{ 30 | Interpolate a numeric or factor vector. 31 | } 32 | 33 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: condvis 2 | Type: Package 3 | Title: Conditional Visualization for Statistical Models 4 | Version: 0.5-1 5 | Date: 2018-09-13 6 | Authors@R: c( 7 | person("Mark", "O'Connell", email = "mark_ajoc@yahoo.ie", 8 | role = c("aut", "cre")), 9 | person("Catherine", "Hurley", role = "aut"), 10 | person("Katarina", "Domijan", role = "aut"), 11 | person("Achim", "Zeileis", role = "ctb", comment = "spineplot, see copied.R"), 12 | person("R Core Team", role = "ctb", comment = "barplot, see copied.R") ) 13 | Depends: 14 | R (>= 2.1.0) 15 | Imports: 16 | graphics, 17 | grDevices, 18 | stats, 19 | utils, 20 | MASS 21 | Suggests: 22 | RColorBrewer, 23 | shiny, 24 | scagnostics, 25 | cluster, 26 | hdrcde, 27 | gplots, 28 | TSP, 29 | DendSer, 30 | testthat 31 | Description: Exploring fitted models by interactively taking 2-D and 3-D 32 | sections in data space. 33 | License: GPL (>= 2) 34 | LazyData: false 35 | BugReports: https://github.com/markajoc/condvis/issues 36 | URL: http://markajoc.github.io/condvis/ 37 | RoxygenNote: 5.0.1.9000 38 | -------------------------------------------------------------------------------- /R/weightcolor.R: -------------------------------------------------------------------------------- 1 | ## Function to weight colours according to a weight vector. Not exported. 2 | 3 | weightcolor <- 4 | function(col, weights, breaks) 5 | { 6 | n <- length(weights) 7 | col <- rep(col, length.out = n) 8 | 9 | ## Discretise `weights`. We just want 3 different shades, as it becomes 10 | ## difficult to differentiate between them otherwise. 11 | 12 | weights <- c(0, 0.4, 0.7, 1)[findInterval(weights, c(0, .Machine$double.eps, 13 | 0.4, 0.7, 1), rightmost.closed = TRUE)] 14 | 15 | ## We won't perform calculations on elements with `weight` == 0. 16 | 17 | weightsgr0 <- which(weights > 0) 18 | data.order <- weightsgr0[order(weights[weightsgr0])] 19 | 20 | ## Linearly fade the colours in `col` to white in RGB space according to their 21 | ## `weights`. 22 | 23 | newcol <- (col2rgb(col[data.order]) * matrix(rep(weights[data.order], 3), 24 | nrow = 3, byrow = TRUE) / 255) + matrix(rep(1 - weights[data.order], 3), 25 | nrow = 3, byrow = TRUE) 26 | data.colour <- rep(NA, n) 27 | data.colour[data.order] <- rgb(t(newcol)) 28 | 29 | ## Return the weighted colours with the order as attribute. 30 | 31 | structure(data.colour, order = data.order) 32 | } 33 | -------------------------------------------------------------------------------- /man/factor2color.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/factor2color.R 3 | \name{factor2color} 4 | \alias{factor2color} 5 | \title{Assign colours to factor vector} 6 | \usage{ 7 | factor2color(x, colors = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{A factor vector.} 11 | 12 | \item{colors}{The colours to use. Defaults to a qualitative colour scheme; 13 | either \code{"Set3"} from \code{RColorBrewer} if available, or 14 | \code{\link{rainbow}} otherwise.} 15 | } 16 | \value{ 17 | A character vector of colours. 18 | } 19 | \description{ 20 | This function takes a factor vector and returns suitable colours 21 | representing the factor levels. Default is to try to use 22 | \code{RColorBrewer} for colours, and \code{\link{rainbow}} otherwise. Can 23 | provide custom colours. 24 | } 25 | \details{ 26 | Uses the \code{RColorBrewer} package if installed. Coerces \code{x} 27 | to factor with a warning. 28 | } 29 | \examples{ 30 | plot(iris[, c("Petal.Length", "Petal.Width")], pch = 21, 31 | bg = factor2color(iris$Species)) 32 | legend("topleft", legend = levels(iris$Species), 33 | fill = factor2color(as.factor(levels(iris$Species)))) 34 | 35 | } 36 | \seealso{ 37 | \code{\link{cont2color}} 38 | } 39 | 40 | -------------------------------------------------------------------------------- /man/wine.Rd: -------------------------------------------------------------------------------- 1 | \name{wine} 2 | \alias{wine} 3 | 4 | \title{ 5 | Italian wine data 6 | } 7 | 8 | \description{ 9 | \code{Class} 3 different cultivars\cr 10 | \code{Alcohol} Alcohol\cr 11 | \code{Malic} Malic acid\cr 12 | \code{Ash} Ash\cr 13 | \code{Alcalinity} Alcalinity of ash\cr 14 | \code{Magnesium} Magnesium\cr 15 | \code{Phenols} Total phenols\cr 16 | \code{Flavanoids} Flavanoids\cr 17 | \code{Nonflavanoid} Nonflavanoid phenols\cr 18 | \code{Proanthocyanins} Proanthocyanins\cr 19 | \code{Intensity} Color intensity\cr 20 | \code{Hue} Hue\cr 21 | \code{OD280} OD280/OD315 of diluted wines\cr 22 | \code{Proline} Proline\cr 23 | } 24 | 25 | \format{ 26 | 178 observations on 14 variables. 27 | } 28 | 29 | \source{ 30 | UCI repository. 31 | \url{https://archive.ics.uci.edu/ml/datasets/Wine} 32 | } 33 | 34 | \references{ 35 | S. Aeberhard, D. Coomans and O. de Vel (1992), Comparison of Classifiers in 36 | High Dimensional Settings, \emph{Technical Report} \bold{92}-02, Dept. of 37 | Computer Science and Dept. of Mathematics and Statistics, James Cook 38 | University of North Queensland. 39 | } 40 | 41 | \examples{ 42 | data(wine) 43 | pairs(wine[, -1], col = factor2color(wine$Class), cex = 0.2) 44 | } 45 | 46 | \keyword{ wine } 47 | -------------------------------------------------------------------------------- /man/dist1.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dist1.R 3 | \name{dist1} 4 | \alias{dist1} 5 | \title{Minkowski distance} 6 | \usage{ 7 | dist1(x, X, p = 2, inf = FALSE) 8 | } 9 | \arguments{ 10 | \item{x}{A numeric vector describing point coordinates.} 11 | 12 | \item{X}{A numeric matrix describing coordinates for several points.} 13 | 14 | \item{p}{The power in Minkowski distance, defaults to 2 for Euclidean 15 | distance.} 16 | 17 | \item{inf}{Logical; switch for calculating maximum norm distance (sometimes 18 | known as Chebychev distance) which is the limit of Minkowski distance as 19 | \eqn{p} tends to infinity.} 20 | } 21 | \value{ 22 | A numeric vector. These are distance^p, for speed of computation. 23 | } 24 | \description{ 25 | Calculate Minkowski distance between one point and a set of 26 | other points. 27 | } 28 | \examples{ 29 | x <- runif(5000) 30 | y <- runif(5000) 31 | 32 | x1 <- 0.5 33 | y1 <- 0.5 34 | 35 | dev.new(width = 4, height = 5.3) 36 | par(mfrow = c(2, 2)) 37 | 38 | for(p in c(0.5, 1, 2, 10)){ 39 | d <- dist1(x = c(x1, y1), X = cbind(x, y), p = p) ^ (1/p) 40 | col <- rep("black", length(x)) 41 | col[d < 0.3] <- "red" 42 | plot(x, y, pch = 16, col = col, asp = 1, main = paste("p = ", p, sep = "")) 43 | } 44 | 45 | } 46 | \seealso{ 47 | \code{\link{similarityweight}} 48 | } 49 | 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## condvis: Conditional Visualisation for Statistical Models 2 | 3 | [![BuildStatus](https://travis-ci.org/markajoc/condvis.svg?branch=devel)](https://travis-ci.org/markajoc/condvis) 4 | [![License](http://img.shields.io/badge/license-GPL%20%28%3E=%202%29-brightgreen.svg?style=flat)](http://www.gnu.org/licenses/gpl-2.0.html) 5 | [![CRAN](http://www.r-pkg.org/badges/version/condvis)](https://cran.r-project.org/package=condvis) 6 | [![Downloads](http://cranlogs.r-pkg.org/badges/condvis?color=brightgreen)](http://www.r-pkg.org/pkg/condvis) 7 | 8 | ### Interactively exploring fitted models 9 | 10 | Interactively take 2-D and 3-D sections in data space, showing where fitted 11 | models intersect the section, and observed data near the section according to 12 | a distance measure. See package [website](https://markajoc.github.io/condvis/) 13 | for examples. 14 | 15 | Works on Windows, Mac OS and Linux. 16 | 17 | Requirements: 18 | * Windows: the standard graphics device is sufficient. 19 | * Mac OS: XQuartz device, [website](http://www.xquartz.org/) 20 | * Linux: X11, included in some distributions. 21 | 22 | Installation: 23 | ```r 24 | install.packages("condvis") 25 | ``` 26 | 27 | Example to get started: 28 | ```r 29 | library(condvis) 30 | data(mtcars) 31 | m <- lm(mpg ~ wt + hp, data = mtcars) 32 | ceplot(data = mtcars, model = m, sectionvars = "hp") 33 | ``` 34 | -------------------------------------------------------------------------------- /man/cont2color.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cont2color.R 3 | \name{cont2color} 4 | \alias{cont2color} 5 | \title{Assign colours to numeric vector} 6 | \usage{ 7 | cont2color(x, xrange = NULL, breaks = NULL, colors = NULL) 8 | } 9 | \arguments{ 10 | \item{x}{A numeric vector.} 11 | 12 | \item{xrange}{The range to use for the colour scale.} 13 | 14 | \item{breaks}{The number of breaks at which to change colour.} 15 | 16 | \item{colors}{The colours to use. Defaults to a diverging colour scheme; 17 | either \code{"PiYG"} from \code{RColorBrewer} if available, or 18 | \code{\link{cm.colors}} otherwise.} 19 | } 20 | \value{ 21 | A character vector of colours. 22 | } 23 | \description{ 24 | This function assigns colours on a linear scale to a numeric 25 | vector. Default is to try to use \code{RColorBrewer} for colours, and 26 | \code{\link{cm.colors}} otherwise. Can provide custom range, breaks and colours. 27 | } 28 | \details{ 29 | Uses the \code{RColorBrewer} package if installed. Coerces \code{x} 30 | to numeric with a warning. 31 | } 32 | \examples{ 33 | x <- runif(200) 34 | plot(x, col = cont2color(x, c(0,1))) 35 | 36 | plot(x, col = cont2color(x, c(0,0.5))) 37 | 38 | plot(sort(x), col = cont2color(sort(x), c(0.25,0.75)), pch = 16) 39 | abline(h = c(0.25, 0.75), lty = 3) 40 | 41 | } 42 | \seealso{ 43 | \code{\link{factor2color}} 44 | } 45 | 46 | -------------------------------------------------------------------------------- /man/plotxc.pcp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotxc.R 3 | \name{plotxc.pcp} 4 | \alias{plotxc.full} 5 | \alias{plotxc.pcp} 6 | \title{Condition selector plot} 7 | \usage{ 8 | plotxc.pcp(Xc, Xc.cond, select.colour = NULL, select.lwd = 3, 9 | cex.axis = NULL, cex.lab = NULL, tck = NULL, select.cex = 1, ...) 10 | 11 | plotxc.full(Xc, Xc.cond, select.colour = NULL, select.lwd = 3, 12 | cex.axis = NULL, cex.lab = NULL, tck = NULL, select.cex = 0.6, ...) 13 | } 14 | \arguments{ 15 | \item{Xc}{A dataframe.} 16 | 17 | \item{Xc.cond}{A dataframe with one row and same names as \code{Xc}.} 18 | 19 | \item{select.colour}{Colour to highlight \code{Xc.cond}} 20 | 21 | \item{select.lwd}{Line weight to highlight \code{Xc.cond}} 22 | 23 | \item{cex.axis}{Axis text scaling} 24 | 25 | \item{cex.lab}{Label text scaling} 26 | 27 | \item{tck}{Plot axis tick size} 28 | 29 | \item{select.cex}{Plot symbol size} 30 | 31 | \item{...}{not used.} 32 | } 33 | \value{ 34 | Produces a plot, and returns a list containing the relevant 35 | information to update the plot at a later stage. 36 | } 37 | \description{ 38 | Multivariate data visualisations used to select sections for 39 | \code{\link{ceplot}}. Basically visualises a dataset and highlights a 40 | single point. 41 | } 42 | \seealso{ 43 | \code{\link{ceplot}}, \code{\link{plotxs}}, \code{\link{plotxc}} 44 | } 45 | 46 | -------------------------------------------------------------------------------- /man/makepath.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/makepath.R 3 | \name{makepath} 4 | \alias{makepath} 5 | \title{Make a default path for conditional tour} 6 | \usage{ 7 | makepath(x, ncentroids, ninterp = 4) 8 | } 9 | \arguments{ 10 | \item{x}{A dataframe} 11 | 12 | \item{ncentroids}{The number of centroids to use as path nodes.} 13 | 14 | \item{ninterp}{The number of points to linearly interpolate between path 15 | nodes.} 16 | } 17 | \value{ 18 | A list with two dataframes: \code{centers} giving the path nodes, and 19 | \code{path} giving the full interpolated path. 20 | } 21 | \description{ 22 | Provides a default path (a set of sections), useful as input to 23 | a conditional tour (\code{\link{condtour}}). Clusters the data using 24 | k-means or partitioning around medoids (from the \code{cluster} package). 25 | The cluster centres/prototypes are then ordered to create a sensible way to 26 | visit each section as smoothly as possible. Ordering uses either the 27 | \code{DendSer} or \code{TSP} package. Linear interpolation is then used to 28 | create intermediate points between the path nodes. 29 | } 30 | \examples{ 31 | d <- data.frame(x = runif(500), y = runif(500)) 32 | plot(d) 33 | mp1 <- makepath(d, 5) 34 | points(mp1$centers, type = "b", col = "blue", pch = 16) 35 | mp2 <- makepath(d, 40) 36 | points(mp2$centers, type = "b", col = "red", pch = 16) 37 | 38 | } 39 | \seealso{ 40 | \code{\link{condtour}} 41 | } 42 | 43 | -------------------------------------------------------------------------------- /R/interpolate.R: -------------------------------------------------------------------------------- 1 | #' @title Interpolate 2 | #' 3 | #' @description Interpolate a numeric or factor vector. 4 | #' 5 | #' @param x A numeric or factor vector. 6 | #' @param ninterp The number of points to interpolate between observations. It 7 | #' should be an even number for sensible results on a factor/character vector. 8 | #' @param ... Not used. 9 | 10 | interpolate <- 11 | function (x, ...) 12 | { 13 | UseMethod("interpolate", x) 14 | } 15 | 16 | #' @rdname interpolate 17 | #' @method interpolate numeric 18 | 19 | interpolate.numeric <- 20 | function (x, ninterp = 4L, ...) 21 | { 22 | if (ninterp < 0) 23 | stop("'ninterp' should be >= 0") 24 | xdiff <- diff(x) / (ninterp + 1L) 25 | cumsum(c(x[1L], rep(xdiff, each = ninterp + 1L))) 26 | } 27 | 28 | #' @rdname interpolate 29 | #' @method interpolate integer 30 | 31 | interpolate.integer <- interpolate.numeric 32 | 33 | #' @rdname interpolate 34 | #' @method interpolate factor 35 | 36 | interpolate.factor <- 37 | function (x, ninterp = 4L, ...) 38 | { 39 | if (ninterp < 0) 40 | stop("'ninterp' should be >= 0") 41 | if (!identical(ninterp %% 2, 0)) 42 | warning("'ninterp' should be even for factor/character vector") 43 | unlist(list(rep(head(x, 1L), 1L + floor(ninterp / 2L)), 44 | rep(head(tail(x, -1L), -1L), each = ninterp + 1L), 45 | rep(tail(x, 1L), 1L + ceiling(ninterp / 2L)))) 46 | } 47 | 48 | #' @rdname interpolate 49 | #' @method interpolate character 50 | 51 | interpolate.character <- interpolate.factor 52 | -------------------------------------------------------------------------------- /R/dist1.R: -------------------------------------------------------------------------------- 1 | #' @title Minkowski distance 2 | #' 3 | #' @description Calculate Minkowski distance between one point and a set of 4 | #' other points. 5 | #' 6 | #' @param x A numeric vector describing point coordinates. 7 | #' @param X A numeric matrix describing coordinates for several points. 8 | #' @param p The power in Minkowski distance, defaults to 2 for Euclidean 9 | #' distance. 10 | #' @param inf Logical; switch for calculating maximum norm distance (sometimes 11 | #' known as Chebychev distance) which is the limit of Minkowski distance as 12 | #' \eqn{p} tends to infinity. 13 | #' 14 | #' @return A numeric vector. These are distance^p, for speed of computation. 15 | #' 16 | #' @examples 17 | #' x <- runif(5000) 18 | #' y <- runif(5000) 19 | #' 20 | #' x1 <- 0.5 21 | #' y1 <- 0.5 22 | #' 23 | #' dev.new(width = 4, height = 5.3) 24 | #' par(mfrow = c(2, 2)) 25 | #' 26 | #' for(p in c(0.5, 1, 2, 10)){ 27 | #' d <- dist1(x = c(x1, y1), X = cbind(x, y), p = p) ^ (1/p) 28 | #' col <- rep("black", length(x)) 29 | #' col[d < 0.3] <- "red" 30 | #' plot(x, y, pch = 16, col = col, asp = 1, main = paste("p = ", p, sep = "")) 31 | #'} 32 | #' 33 | #' @seealso \code{\link{similarityweight}} 34 | 35 | 36 | dist1 <- 37 | function (x, X, p = 2, inf = FALSE) 38 | { 39 | X <- if (is.null(dim(X))) 40 | matrix(X, ncol = length(x)) 41 | else as.matrix(X) 42 | dif <- abs(X - matrix(as.numeric(x), nrow = nrow(X), ncol = ncol(X), byrow = 43 | TRUE)) 44 | if (inf) 45 | return(apply(dif, 1, max)) 46 | tmp <- dif ^ p 47 | rowSums(tmp) 48 | } 49 | -------------------------------------------------------------------------------- /R/factor2color.R: -------------------------------------------------------------------------------- 1 | #' @title Assign colours to factor vector 2 | #' 3 | #' @description This function takes a factor vector and returns suitable colours 4 | #' representing the factor levels. Default is to try to use 5 | #' \code{RColorBrewer} for colours, and \code{\link{rainbow}} otherwise. Can 6 | #' provide custom colours. 7 | #' 8 | #' @param x A factor vector. 9 | #' @param colors The colours to use. Defaults to a qualitative colour scheme; 10 | #' either \code{"Set3"} from \code{RColorBrewer} if available, or 11 | #' \code{\link{rainbow}} otherwise. 12 | #' 13 | #' @return A character vector of colours. 14 | #' 15 | #' @details Uses the \code{RColorBrewer} package if installed. Coerces \code{x} 16 | #' to factor with a warning. 17 | #' 18 | #' @examples 19 | #' plot(iris[, c("Petal.Length", "Petal.Width")], pch = 21, 20 | #' bg = factor2color(iris$Species)) 21 | #' legend("topleft", legend = levels(iris$Species), 22 | #' fill = factor2color(as.factor(levels(iris$Species)))) 23 | #' 24 | #' @seealso \code{\link{cont2color}} 25 | 26 | factor2color <- 27 | function (x, colors = NULL) 28 | { 29 | x <- if (!is.factor(x)){ 30 | as.factor(x) 31 | warning("'x' has been coerced to a factor.") 32 | } else x 33 | n <- nlevels(x) 34 | colors <- if (is.null(colors)){ 35 | if (requireNamespace("RColorBrewer", quietly = TRUE)) 36 | RColorBrewer::brewer.pal(n = max(n, 3L, na.rm = TRUE), name = "Set3")[1L: 37 | n] 38 | else rainbow(n) 39 | } else rep(colors, length.out = n) 40 | vapply(x, function(y) colors[levels(x) == as.character(y)], character(1L)) 41 | } 42 | -------------------------------------------------------------------------------- /man/powerplant.Rd: -------------------------------------------------------------------------------- 1 | \name{powerplant} 2 | \alias{powerplant} 3 | 4 | \title{ 5 | Tuefekci's powerplant data 6 | } 7 | 8 | \description{ 9 | The dataset contains 9568 data points collected from a Combined Cycle Power 10 | Plant over 6 years (2006-2011), when the power plant was set to work with full 11 | load. Features consist of hourly average ambient variables Temperature (T), 12 | Ambient Pressure (AP), Relative Humidity (RH) and Exhaust Vacuum (V) to 13 | predict the net hourly electrical energy output (EP) of the plant. 14 | 15 | A combined cycle power plant (CCPP) is composed of gas turbines (GT), steam 16 | turbines (ST) and heat recovery steam generators. In a CCPP, the electricity 17 | is generated by gas and steam turbines, which are combined in one cycle, and 18 | is transferred from one turbine to another. While the Vacuum is collected from 19 | and has effect on the Steam Turbine, the other three of the ambient variables 20 | affect the GT performance. 21 | } 22 | 23 | \format{ 24 | 9568 observations on 5 continuous variables. 25 | } 26 | 27 | \source{ 28 | UCI repository. 29 | \url{https://archive.ics.uci.edu/ml/datasets/Combined+Cycle+Power+Plant} 30 | } 31 | 32 | \references{ 33 | Tuefekci, P. (2014), Prediction of full load electrical power output of a base 34 | load operated combined cycle power plant using machine learning methods, 35 | \emph{International Journal of Electrical Power & Energy Systems}, \bold{60}, 36 | pp. 126-140, ISSN 0142-0615. 37 | } 38 | 39 | \examples{ 40 | data(powerplant) 41 | head(powerplant) 42 | } 43 | 44 | \keyword{ powerplant } 45 | -------------------------------------------------------------------------------- /man/condvis-package.Rd: -------------------------------------------------------------------------------- 1 | \name{condvis-package} 2 | \alias{condvis-package} 3 | \alias{condvis} 4 | \docType{package} 5 | \title{ 6 | Conditional Visualization for Statistical Models 7 | } 8 | \description{ 9 | Exploring statistical models by interactively taking 2-D and 3-D sections in 10 | data space. The main functions for end users are \code{\link{ceplot}} (see 11 | example below) and \code{\link{condtour}}. Requires 12 | \href{http://www.xquartz.org/}{XQuartz} on Mac OS, and X11 on Linux. A website 13 | for the package is available at \href{http://markajoc.github.io/condvis/}{ 14 | markajoc.github.io/condvis}. Source code is available to browse at 15 | \href{https://github.com/markajoc/condvis/}{GitHub}. Bug reports and feature 16 | requests are very welcome at 17 | \href{https://github.com/markajoc/condvis/issues}{GitHub}. 18 | } 19 | \details{ 20 | \tabular{ll}{ 21 | Package: \tab condvis\cr 22 | Type: \tab Package\cr 23 | Version: \tab 0.5-1\cr 24 | Date: \tab 2018-09-13\cr 25 | License: \tab GPL (>= 2)\cr 26 | } 27 | } 28 | \author{ 29 | Mark O'Connell , Catherine Hurley , 30 | Katarina Domijan . 31 | } 32 | \keyword{ package } 33 | \examples{ 34 | \dontrun{ 35 | mtcars$cyl <- as.factor(mtcars$cyl) 36 | mtcars$am <- as.factor(mtcars$am) 37 | 38 | library(mgcv) 39 | model1 <- list( 40 | quadratic = lm(mpg ~ cyl + am + qsec + wt + I(wt^2), data = mtcars), 41 | additive = gam(mpg ~ cyl + am + qsec + s(wt), data = mtcars)) 42 | 43 | ceplot(data = mtcars, model = model1, sectionvars = "wt") 44 | } 45 | } 46 | \references{ 47 | O'Connell M, Hurley CB and Domijan K (2017). ``Conditional 48 | Visualization for Statistical Models: An Introduction to the 49 | \strong{condvis} Package in R.''\emph{Journal of Statistical Software}, 50 | \strong{81}(5), pp. 1-20. . 51 | } 52 | -------------------------------------------------------------------------------- /R/cont2color.R: -------------------------------------------------------------------------------- 1 | #' @title Assign colours to numeric vector 2 | #' 3 | #' @description This function assigns colours on a linear scale to a numeric 4 | #' vector. Default is to try to use \code{RColorBrewer} for colours, and 5 | #' \code{\link{cm.colors}} otherwise. Can provide custom range, breaks and colours. 6 | #' 7 | #' @param x A numeric vector. 8 | #' @param xrange The range to use for the colour scale. 9 | #' @param breaks The number of breaks at which to change colour. 10 | #' @param colors The colours to use. Defaults to a diverging colour scheme; 11 | #' either \code{"PiYG"} from \code{RColorBrewer} if available, or 12 | #' \code{\link{cm.colors}} otherwise. 13 | #' 14 | #' @return A character vector of colours. 15 | #' 16 | #' @details Uses the \code{RColorBrewer} package if installed. Coerces \code{x} 17 | #' to numeric with a warning. 18 | #' 19 | #' @examples 20 | #' x <- runif(200) 21 | #' plot(x, col = cont2color(x, c(0,1))) 22 | #' 23 | #' plot(x, col = cont2color(x, c(0,0.5))) 24 | #' 25 | #' plot(sort(x), col = cont2color(sort(x), c(0.25,0.75)), pch = 16) 26 | #' abline(h = c(0.25, 0.75), lty = 3) 27 | #' 28 | #' @seealso \code{\link{factor2color}} 29 | 30 | cont2color <- 31 | function (x, xrange = NULL, breaks = NULL, colors = NULL) 32 | { 33 | x <- if (!is.numeric(x)){ 34 | as.numeric(x) 35 | warning("'x' has been coerced to numeric.") 36 | } else x 37 | xrange <- if (is.null(xrange)) 38 | range(x) 39 | else xrange 40 | breaks <- if(is.null(breaks)) 41 | 11 42 | else breaks 43 | br <- c(min(x, min(xrange)) - 1, seq(min(xrange), max(xrange), length.out = 44 | breaks - 1), max(x, max(xrange)) + 1) 45 | colors <- if (is.null(colors)){ 46 | if (requireNamespace("RColorBrewer", quietly = TRUE)) 47 | RColorBrewer::brewer.pal(n = max(breaks, 3L, na.rm = TRUE), name = "PuOr") 48 | else cm.colors(breaks) 49 | } else rep(colors, length.out = breaks) 50 | as.character(cut(x, br, labels = colors, include.lowest = TRUE)) 51 | } 52 | -------------------------------------------------------------------------------- /R/diagnosticplots.R: -------------------------------------------------------------------------------- 1 | ## NOT EXPORTED. 2 | 3 | ## plotap a diagnostic plot to be used with condtour. The plot shows the sum of 4 | ## the similarity weights for each section along the entire path. The plot is 5 | ## given its own update method. 6 | 7 | plotap <- 8 | function (k, pathindex = 1, lcol = "blue") 9 | { 10 | rsk <- rowSums(k) / ncol(k) 11 | plot(rsk, type = "l", xlab = "Path index", ylab = "sum of k/n") 12 | abline(v = pathindex, col = lcol) 13 | structure(list(k = k, rsk = rsk, pathindex = pathindex, device = dev.cur(), 14 | screen = screen(), mar = par()$mar, usr = par()$usr, lcol = lcol), 15 | class = "ap") 16 | } 17 | 18 | update.ap <- 19 | function (object, pathindex = NULL, ...) 20 | { 21 | if (dev.cur() != object$device) 22 | dev.set(object$device) 23 | screen(n = object$screen, new = FALSE) 24 | par(mar = object$mar) 25 | par(usr = object$usr) 26 | if (!is.null(pathindex)){ 27 | abline(v = object$pathindex, col = "white") 28 | refreshindex <- max((object$pathindex - 5), 1):min((object$pathindex + 5), 29 | nrow(object$k)) 30 | points(refreshindex, object$rsk[refreshindex], type = "l") 31 | abline(v = pathindex, col = object$lcol) 32 | box() 33 | object$pathindex <- pathindex 34 | } 35 | object 36 | } 37 | 38 | ## plotmaxk a diagnostic plot to be used with condtour. This is a static plot. 39 | ## Shows the deciles of the maximum similarity weight given to the data by the 40 | ## current conditional tour. If too many observations are reaching maximum 41 | ## similarity weight of 1, the 'threshold' used in similarityweight might be too 42 | ## big. If no observations are reaching maximum similarity weights of 0.3, we 43 | ## may not see any data on the sections. 44 | 45 | plotmaxk <- 46 | function (maxk) 47 | { 48 | seq01 <- seq(0, 1, 0.1) 49 | q <- quantile(maxk, probs = seq01) 50 | plot(q, seq01, type = "l", ylab = "proportion of data", xlab = 51 | "max k attained", ylim = c(0, 1)) 52 | points(q, seq01, pch = 16) 53 | } 54 | 55 | #update.maxk <- 56 | #function (object, ...) 57 | #{ 58 | # 59 | #} 60 | -------------------------------------------------------------------------------- /man/crab.Rd: -------------------------------------------------------------------------------- 1 | \name{crab} 2 | \alias{crab} 3 | 4 | \title{ 5 | Brockmann's crab data 6 | } 7 | 8 | \description{ 9 | Abstract from original paper: Horseshoe crabs arrive on the beach in pairs and 10 | spawn in the high intertidal during the springtime, new and full moon high 11 | tides. Unattached males also come to the beach, crowd around the nesting 12 | couples and compete with attached males for fertilizations. Satellite males 13 | form large groups around some couples while ignoring others, resulting in a 14 | nonrandom distribution that cannot be explained by local environmental 15 | conditions or habitat selection. In experimental manipulations, pairs that had 16 | satellites regained them after they had been removed whereas pairs with no 17 | satellites continued nesting alone, which means that satellites were not 18 | simply accumulating around the pairs that had been on the beach the longest. 19 | Manipulations also revealed that satellites were not just copying the 20 | behaviour of other males. Based on the evidence from observations and 21 | experiments, the most likely explanation for the nonrandom distribution of 22 | satellite males among nesting pairs is that unattached males are 23 | preferentially attracted to some females over others. Females with many 24 | satellites were larger and in better condition, but did not lay more eggs, 25 | than females with few or no satellites.\cr\cr 26 | \code{satellites} response variable; number of satellites around female 27 | crab\cr 28 | \code{color} color of crab\cr 29 | \code{spine} condition of spine\cr 30 | \code{weight} weight of crab\cr 31 | \code{width} width of carapace 32 | } 33 | 34 | \format{ 35 | 173 observations on 5 variables. 36 | } 37 | 38 | \source{ 39 | https://onlinecourses.science.psu.edu/stat504/node/169 40 | } 41 | 42 | \references{ 43 | Brockmann, H. (1996), "Satellite male groups in horseshoe crabs," 44 | \emph{Ethology}, \bold{102}-1, pp. 1-21. 45 | } 46 | 47 | \examples{ 48 | data(crab) 49 | } 50 | 51 | \keyword{ crab } 52 | -------------------------------------------------------------------------------- /man/arrangeC.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/arrangeC.R 3 | \name{arrangeC} 4 | \alias{arrangeC} 5 | \title{Make a list of variable pairings for condition selecting plots 6 | produced by plotxc} 7 | \usage{ 8 | arrangeC(data, method = "default") 9 | } 10 | \arguments{ 11 | \item{data}{A dataframe} 12 | 13 | \item{method}{The character name for the method to use for measuring 14 | bivariate dependency, passed to \code{\link{savingby2d}}.} 15 | } 16 | \value{ 17 | A list containing character vectors giving variable pairings. 18 | } 19 | \description{ 20 | This function arranges a number of variables in pairs, ordered 21 | by their bivariate relationships. The goal is to discover which variable 22 | pairings are most helpful in avoiding extrapolations when exploring the data 23 | space. Variable pairs with strong bivariate dependencies (not necessarily 24 | linear) are chosen first. The bivariate dependency is measured using 25 | \code{\link{savingby2d}}. Each variable appears in the output only once. 26 | } 27 | \details{ 28 | If \code{data} is so big as to make \code{arrangeC} very slow, a 29 | random sample of rows is used instead. The bivariate dependency measures 30 | are rough, and the ordering algorithm is a simple greedy one, so it is not 31 | worth allowing it too much time. This function exists mainly to provide a 32 | helpful default ordering/pairing for \code{\link{ceplot}}. 33 | } 34 | \examples{ 35 | data(powerplant) 36 | 37 | pairings <- arrangeC(powerplant) 38 | 39 | dev.new(height = 2, width = 2 * length(pairings)) 40 | par(mfrow = c(1, length(pairings))) 41 | 42 | for (i in seq_along(pairings)){ 43 | plotxc(powerplant[, pairings[[i]]], powerplant[1, pairings[[i]]], 44 | select.col = NA) 45 | } 46 | 47 | } 48 | \references{ 49 | O'Connell M, Hurley CB and Domijan K (2017). ``Conditional 50 | Visualization for Statistical Models: An Introduction to the 51 | \strong{condvis} Package in R.''\emph{Journal of Statistical Software}, 52 | \strong{81}(5), pp. 1-20. . 53 | } 54 | \seealso{ 55 | \code{\link{savingby2d}} 56 | } 57 | 58 | -------------------------------------------------------------------------------- /R/ceplot.static.R: -------------------------------------------------------------------------------- 1 | ## Function to make a static snapshot of ceplot.interactive 2 | 3 | ceplot.static <- 4 | function (data, model, response = NULL, S = NULL, C = NULL, weights = NULL, col 5 | = "black", cex.axis = NULL, cex.lab = NULL, tck = NULL, view3d = FALSE, 6 | theta3d = 45, phi3d = 20, Corder = "default", xc.cond = NULL, select.colour = 7 | "blue", select.cex = 1, conf = FALSE, probs = FALSE, xsplotpar = NULL, 8 | modelpar = NULL, xcplotpar = NULL) 9 | { 10 | plotlegend <- length(S) == 2 11 | uniqC <- unique(unlist(C)) 12 | xc.cond <- if (is.null(xc.cond)) 13 | data[1, !colnames(data) %in% c(S, response)] 14 | else xc.cond 15 | #data.frame(lapply(data[, !colnames(data) %in% c(S, response)], mode1)) 16 | xcplots <- list() 17 | close.screen(all.screens = T) 18 | n.selector.cols <- ceiling(length(C) / 4L) 19 | selector.colwidth <- 2 20 | height <- 8 21 | width <- height + 0.5 + selector.colwidth * n.selector.cols 22 | xcwidth <- selector.colwidth * n.selector.cols / width 23 | main <- split.screen(figs = matrix(c(0, 1 - xcwidth, 1 - xcwidth, 1, 24 | 0, 0, 1, 1), ncol = 4)) 25 | selectors <- split.screen(figs = c(4, n.selector.cols), screen = main[2]) 26 | dev.hold() 27 | if (length(uniqC) > 0){ 28 | for(i in seq_along(C)){ 29 | screen(selectors[i]) 30 | xcplots[[i]] <- plotxc(xc = data[, C[[i]]], xc.cond = xc.cond[1L, C[[i]]], 31 | name = colnames(data[, C[[i]], drop = FALSE]), trim = xcplotpar$trim, 32 | select.colour = select.colour, select.cex = select.cex, hist2d = 33 | xcplotpar$hist2d, fullbin = xcplotpar$fullbin) 34 | } 35 | } 36 | screen(main[1]) 37 | legendwidth <- 1.15 / height 38 | xsscreens <- if (plotlegend){ 39 | split.screen(figs = matrix(c(0, 1 - legendwidth, 1 - legendwidth, 1, 0, 0, 40 | 1, 1), ncol = 4)) 41 | } else split.screen() 42 | if (plotlegend){ 43 | screen(xsscreens[2L]) 44 | xslegend(data[, response], response) 45 | } 46 | screen(xsscreens[1L]) 47 | xsplot <- plotxs(xs = data[, S, drop = FALSE], y = data[, response, drop = 48 | FALSE], xc.cond = xc.cond, model = model, model.colour = modelpar$col, 49 | model.lwd = modelpar$lwd, model.lty = modelpar$lty, yhat = NULL, mar = NULL, 50 | weights = weights, col = col, view3d = view3d, theta3d = theta3d, phi3d = 51 | phi3d, conf = conf, probs = probs, main = xsplotpar$main, xlim = 52 | xsplotpar$xlim, ylim = xsplotpar$ylim) 53 | dev.flush() 54 | } 55 | -------------------------------------------------------------------------------- /R/predict.R: -------------------------------------------------------------------------------- 1 | ## NOT EXPORTED. 2 | 3 | ## This is a helper function to try to standardise the output from various 4 | ## predict methods. If 'ylevels' is given, it implies that we want the predicted 5 | ## class probabilities back, not just the predicted class. 6 | 7 | predict1 <- 8 | function (object, ..., ylevels = NULL) 9 | { 10 | type <- if (inherits(object, "nnet")) 11 | if (is.null(ylevels)) 12 | "raw" 13 | else "class" 14 | else if (inherits(object, "rpart")) 15 | "vector" 16 | else "response" 17 | n.trees <- if (inherits(object, "gbm")) 18 | object$n.trees 19 | else NULL 20 | if (inherits(object, "nnet") && !is.null(ylevels)){ 21 | return(factor(predict(object, ..., type = type), levels = ylevels)) 22 | } 23 | if (inherits(object, "gbm") && !is.null(ylevels)){ 24 | p1 <- predict(object, ..., n.trees = n.trees, type = type) 25 | out <- structure(factor(ylevels[apply(p1, 1, which.max)], levels = ylevels), 26 | probabilities = p1) 27 | return(out) 28 | } 29 | predict(object, ..., n.trees = n.trees, type = type) 30 | } 31 | 32 | ## This is a slightly hacky function to extract confidence bounds on predictions 33 | ## from certain classes of model, and pass them on in a standard fashion. Most 34 | ## notable is the approach for models of class "custompred", which will be a 35 | ## wrapper object for some model that has no predict method, designed to behave 36 | ## like an lm object under a predict call. 37 | 38 | confpred <- 39 | function (model, newdata) 40 | { 41 | if (identical(class(model), "lm")){ 42 | pred <- predict(object = model, newdata = newdata, interval = "confidence", 43 | type = "response") 44 | upr <- pred[, "upr"] 45 | lwr <- pred[, "lwr"] 46 | return(cbind(lwr, upr)) 47 | } 48 | if (inherits(model, "custompred")){ 49 | pred <- predict(object = model, newdata = newdata, interval = "confidence") 50 | upr <- pred[, "upr"] 51 | lwr <- pred[, "lwr"] 52 | return(cbind(lwr, upr)) 53 | } 54 | if (identical(class(model), c("glm", "lm"))){ 55 | pred <- predict(object = model, newdata = newdata, type = "link", se.fit = 56 | TRUE) 57 | upr <- model$family$linkinv(pred$fit + (2 * pred$se.fit)) 58 | lwr <- model$family$linkinv(pred$fit - (2 * pred$se.fit)) 59 | return(cbind(lwr, upr)) 60 | } 61 | if (identical(class(model), c("gam", "glm", "lm")) && "mgcv.conv" %in% names( 62 | model)){ 63 | pred <- predict(object = model, newdata = newdata, type = "link", se.fit = 64 | TRUE) 65 | upr <- model$family$linkinv(pred$fit + (2 * pred$se.fit)) 66 | lwr <- model$family$linkinv(pred$fit - (2 * pred$se.fit)) 67 | return(cbind(lwr, upr)) 68 | } 69 | NULL 70 | } 71 | -------------------------------------------------------------------------------- /man/savingby2d.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/savingby2d.R 3 | \name{savingby2d} 4 | \alias{savingby2d} 5 | \title{Assess advantage of 2-D view over 1-D view for identifying 6 | extrapolation} 7 | \usage{ 8 | savingby2d(x, y = NULL, method = "default") 9 | } 10 | \arguments{ 11 | \item{x}{A numeric or factor vector. Can also be a dataframe containing 12 | \code{x} and \code{y}, if \code{y} is \code{NULL}.} 13 | 14 | \item{y}{A numeric or factor vector.} 15 | 16 | \item{method}{Character; criterion used to quantify bivariate relationships. 17 | Can be \code{"default"}, a scagnostic measure, or \code{"DECR"} to use a 18 | density estimate confidence region.} 19 | } 20 | \value{ 21 | A number between 0 and 1. Values near 1 imply no benefit to using a 22 | 2-D view, whereas values near 0 imply that a 2-D view reveals structure 23 | hidden in the 1-D views. 24 | } 25 | \description{ 26 | A simple algorithm to evaluate the advantage of by taking a 27 | bivariate marginal view of two variables, when trying to avoid 28 | extrapolations, rather than two univariate marginal views. 29 | } 30 | \details{ 31 | If given two continuous variables, the variables are both scaled to 32 | mean 0 and variance 1. Then the returned value is the ratio of the area of 33 | the convex hull of the data to the area obtained from the product of the 34 | ranges of the two areas, i.e. the area of the bounding rectangle. 35 | 36 | If given two categorical variables, all combinations are tabulated. The 37 | returned value is the number of non-zero table entries divided by the total 38 | number of table entries. 39 | 40 | If given one categorical and one continuous variable, the returned value is 41 | the weighted mean of the range of the continuous variable within each 42 | category divided by the overall range of the continuous variable, where the 43 | weights are given by the number of observations in each level of the 44 | categorical variable. 45 | 46 | Requires package \code{scagnostics} if a scagnostics measure is specified 47 | in \code{method}. Requires package \code{hdrcde} if \code{"DECR"} (density 48 | estimate confidence region) is specified in \code{method}. These only apply 49 | to cases where \code{x} and \code{y} are both numeric. 50 | } 51 | \examples{ 52 | x <- runif(1000) 53 | y <- runif(1000) 54 | plot(x, y) 55 | savingby2d(x, y) 56 | ## value near 1, no real benefit from bivariate view 57 | 58 | x1 <- runif(1000) 59 | y1 <- x1 + rnorm(sd = 0.3, n = 1000) 60 | plot(x1, y1) 61 | savingby2d(x1, y1) 62 | ## smaller value indicates that the bivariate view reveals some structure 63 | 64 | } 65 | \references{ 66 | O'Connell M, Hurley CB and Domijan K (2017). ``Conditional 67 | Visualization for Statistical Models: An Introduction to the 68 | \strong{condvis} Package in R.''\emph{Journal of Statistical Software}, 69 | \strong{81}(5), pp. 1-20. . 70 | } 71 | \seealso{ 72 | \code{\link{similarityweight}} 73 | } 74 | 75 | -------------------------------------------------------------------------------- /R/makepath.R: -------------------------------------------------------------------------------- 1 | #' @title Make a default path for conditional tour 2 | #' 3 | #' @description Provides a default path (a set of sections), useful as input to 4 | #' a conditional tour (\code{\link{condtour}}). Clusters the data using 5 | #' k-means or partitioning around medoids (from the \code{cluster} package). 6 | #' The cluster centres/prototypes are then ordered to create a sensible way to 7 | #' visit each section as smoothly as possible. Ordering uses either the 8 | #' \code{DendSer} or \code{TSP} package. Linear interpolation is then used to 9 | #' create intermediate points between the path nodes. 10 | #' 11 | #' @param x A dataframe 12 | #' @param ncentroids The number of centroids to use as path nodes. 13 | #' @param ninterp The number of points to linearly interpolate between path 14 | #' nodes. 15 | #' 16 | #' @return A list with two dataframes: \code{centers} giving the path nodes, and 17 | #' \code{path} giving the full interpolated path. 18 | #' 19 | #' @examples 20 | #' d <- data.frame(x = runif(500), y = runif(500)) 21 | #' plot(d) 22 | #' mp1 <- makepath(d, 5) 23 | #' points(mp1$centers, type = "b", col = "blue", pch = 16) 24 | #' mp2 <- makepath(d, 40) 25 | #' points(mp2$centers, type = "b", col = "red", pch = 16) 26 | #' 27 | #' @seealso \code{\link{condtour}} 28 | 29 | makepath <- 30 | function (x, ncentroids, ninterp = 4) 31 | { 32 | ## If we have factors, make sure 'ninterp' is odd. 33 | 34 | if (any(arefactors <- vapply(x, is.factor, logical(1L)))){ 35 | if (identical(ninterp %% 2, 0)) 36 | ninterp <- ninterp + 1 37 | } 38 | 39 | ## If we have factors, do partitioning around medoids (PAM) using the daisy 40 | ## distance from the 'cluster' package. 41 | 42 | if (any(arefactors)){ 43 | if (!requireNamespace("cluster", quietly = TRUE)) 44 | stop("requires package 'cluster'") 45 | d <- cluster::daisy(x) 46 | clustering <- cluster::pam(d, k = ncentroids) 47 | centers <- x[clustering$medoids, , drop = FALSE] 48 | 49 | ## Order the cluster centres using 'DendSer' if available. 50 | 51 | if (!requireNamespace("DendSer", quietly = TRUE)){ 52 | warning("requires package 'DendSer' to order path, left unordered") 53 | } else { 54 | d.centers <- cluster::daisy(centers) 55 | h <- hclust(d.centers, method = "single") 56 | o <- DendSer::DendSer(h, d.centers) 57 | centers <- centers[o, , drop = FALSE] 58 | } 59 | path <- as.data.frame(lapply(centers, interpolate, ninterp = ninterp)) 60 | } else { 61 | 62 | ## For all continuous variables, cluster with 'kmeans' 63 | 64 | ## Order the cluster centres with 'TSP'. 65 | 66 | if (!requireNamespace("TSP", quietly = TRUE)) 67 | stop("requires package 'TSP'") 68 | x <- scale(x) 69 | means <- attr(x, "scaled:center") 70 | sds <- attr(x, "scaled:scale") 71 | clustering <- kmeans(x[, ], centers = ncentroids) 72 | centers <- clustering$centers 73 | o <- TSP::TSP(dist(centers)) 74 | orderindex <- TSP::solve_TSP(o) 75 | centers <- centers[orderindex, , drop = FALSE] 76 | centers <- as.data.frame(t(apply(t(apply(centers, 1L, `*`, sds)), 1L, `+`, 77 | means))) 78 | rownames(centers) <- NULL 79 | path <- as.data.frame(lapply(centers, interpolate, ninterp = ninterp)) 80 | } 81 | 82 | ## Return the cluster centres and the interpolated path. 83 | 84 | list(centers = centers, path = path) 85 | } 86 | -------------------------------------------------------------------------------- /man/plotxc.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotxc.R 3 | \name{plotxc} 4 | \alias{plotxc} 5 | \title{Condition selector plot} 6 | \usage{ 7 | plotxc(xc, xc.cond, name = NULL, trim = NULL, select.colour = NULL, 8 | select.lwd = NULL, cex.axis = NULL, cex.lab = NULL, tck = NULL, 9 | select.cex = 1, hist2d = NULL, fullbin = NULL, ...) 10 | } 11 | \arguments{ 12 | \item{xc}{A numeric or factor vector, or a dataframe with two columns} 13 | 14 | \item{xc.cond}{Same type as \code{xc}, representing a single point in data 15 | space to highlight.} 16 | 17 | \item{name}{The variable name for \code{xc}} 18 | 19 | \item{trim}{Logical; if \code{TRUE}, long tails of continuous data are 20 | chopped off at the 5th and 95th percentiles.} 21 | 22 | \item{select.colour}{Colour to highlight \code{xc.cond}} 23 | 24 | \item{select.lwd}{Line weight to highlight \code{xc.cond}} 25 | 26 | \item{cex.axis}{Axis text scaling} 27 | 28 | \item{cex.lab}{Label text scaling} 29 | 30 | \item{tck}{Plot axis tick size} 31 | 32 | \item{select.cex}{Plot symbol size} 33 | 34 | \item{hist2d}{If \code{TRUE}, a scatterplot is visualised as a 2-D histogram. 35 | Default behaviour is to use a 2-D histogram if there are over 2,000 36 | observations.} 37 | 38 | \item{fullbin}{A cap on the counts in a bin for the 2-D histogram, helpful 39 | with skewed data. Larger values give more detail about data density. 40 | Defaults to 25.} 41 | 42 | \item{...}{Passed to \code{condvis:::spineplot2}.} 43 | } 44 | \value{ 45 | Produces a plot, and returns a list containing the relevant 46 | information to update the plot at a later stage. 47 | } 48 | \description{ 49 | Data visualisations used to select sections for 50 | \code{\link{ceplot}}. 51 | } 52 | \examples{ 53 | ## Histogram, highlighting the first case. 54 | 55 | data(mtcars) 56 | obj <- plotxc(mtcars[, "mpg"], mtcars[1, "mpg"]) 57 | obj$usr 58 | 59 | ## Barplot, highlighting 'cyl' = 6. 60 | 61 | plotxc(as.factor(mtcars[, "cyl"]), 6, select.colour = "blue") 62 | 63 | ## Scatterplot, highlighting case 25. 64 | 65 | plotxc(mtcars[, c("qsec", "wt")], mtcars[25, c("qsec", "wt")], 66 | select.colour = "blue", select.lwd = 1, lty = 3) 67 | 68 | ## Boxplot, where 'xc' contains one factor, and one numeric. 69 | 70 | mtcars$carb <- as.factor(mtcars$carb) 71 | plotxc(mtcars[, c("carb", "wt")], mtcars[25, c("carb", "wt")], 72 | select.colour = "red", select.lwd = 3) 73 | 74 | ## Spineplot, where 'xc' contains two factors. 75 | 76 | mtcars$gear <- as.factor(mtcars$gear) 77 | mtcars$cyl <- as.factor(mtcars$cyl) 78 | plotxc(mtcars[, c("cyl", "gear")], mtcars[25, c("cyl", "gear")], 79 | select.colour = "red") 80 | 81 | ## Effect of 'trim'. 82 | 83 | x <- c(-200, runif(400), 200) 84 | plotxc(x, 0.5, trim = FALSE, select.colour = "red") 85 | plotxc(x, 0.5, trim = TRUE, select.colour = "red") 86 | 87 | } 88 | \references{ 89 | O'Connell M, Hurley CB and Domijan K (2017). ``Conditional 90 | Visualization for Statistical Models: An Introduction to the 91 | \strong{condvis} Package in R.''\emph{Journal of Statistical Software}, 92 | \strong{81}(5), pp. 1-20. . 93 | } 94 | \seealso{ 95 | \code{\link{ceplot}}, \code{\link{plotxs}}. 96 | 97 | \code{\link{plotxs}}, \code{\link{ceplot}}, \code{\link{condtour}} 98 | } 99 | 100 | -------------------------------------------------------------------------------- /man/similarityweight.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/similarityweight.R 3 | \name{similarityweight} 4 | \alias{similarityweight} 5 | \title{Calculate the similarity weight for a set of observations} 6 | \usage{ 7 | similarityweight(x, data, threshold = NULL, distance = NULL, 8 | lambda = NULL) 9 | } 10 | \arguments{ 11 | \item{x}{A dataframe describing arbitrary points in the space of the data 12 | (i.e., with same \code{colnames} as \code{data}).} 13 | 14 | \item{data}{A dataframe representing observed data.} 15 | 16 | \item{threshold}{Threshold distance outside which observations will 17 | be assigned similarity weight zero. This is numeric and should be > 0. 18 | Defaults to 1.} 19 | 20 | \item{distance}{The type of distance measure to be used, currently just two 21 | types of Minkowski distance: \code{"euclidean"} (default), and 22 | \code{"maxnorm"}.} 23 | 24 | \item{lambda}{A constant to multiply by the number of categorical 25 | mismatches, before adding to the Minkowski distance, to give a general 26 | dissimilarity measure. If left \code{NULL}, behaves as though \code{lambda} 27 | is set larger than \code{threshold}, meaning that one factor mismatch 28 | guarantees zero weight.} 29 | } 30 | \value{ 31 | A numeric vector or matrix, with values from 0 to 1. The similarity 32 | weights for the observations in \code{data} arranged in rows for each row 33 | in \code{x}. 34 | } 35 | \description{ 36 | Calculate the similarity weight for a set of observations, based 37 | on their distance from some arbitary points in data space. Observations which 38 | are very similar to the point under consideration are given weight 1, while 39 | observations which are dissimilar to the point are given weight zero. 40 | } 41 | \details{ 42 | Similarity weight is assigned to observations based on their 43 | distance from a given point. The distance is calculated as Minkowski 44 | distance between the numeric elements for the observations whose 45 | categorical elements match, with the option to use a more general 46 | dissimilarity measure comprising Minkowski distance and a mismatch count. 47 | } 48 | \examples{ 49 | ## Say we want to find observations similar to the first observation. 50 | ## The first observation is identical to itself, so it gets weight 1. The 51 | ## second observation is similar, so it gets some weight. The rest are more 52 | ## different, and so get zero weight. 53 | 54 | data(mtcars) 55 | similarityweight(x = mtcars[1, ], data = mtcars) 56 | 57 | ## By increasing the threshold, we can find observations which are more 58 | ## approximately similar to the first row. Note that the second observation 59 | ## now has weight 1, so we lose some ability to discern how similar 60 | ## observations are by increasing the threshold. 61 | 62 | similarityweight(x = mtcars[1, ], data = mtcars, threshold = 5) 63 | 64 | ## Can provide a number of points to 'x'. Here we see that the Mazda RX4 Wag 65 | ## is more similar to the Merc 280 than the Mazda RX4 is. 66 | 67 | similarityweight(mtcars[1:2, ], mtcars, threshold = 3) 68 | 69 | } 70 | \references{ 71 | O'Connell M, Hurley CB and Domijan K (2017). ``Conditional 72 | Visualization for Statistical Models: An Introduction to the 73 | \strong{condvis} Package in R.''\emph{Journal of Statistical Software}, 74 | \strong{81}(5), pp. 1-20. . 75 | } 76 | \seealso{ 77 | \code{\link{dist1}} 78 | } 79 | 80 | -------------------------------------------------------------------------------- /R/arrangeC.R: -------------------------------------------------------------------------------- 1 | #' @title Make a list of variable pairings for condition selecting plots 2 | #' produced by plotxc 3 | #' 4 | #' @description This function arranges a number of variables in pairs, ordered 5 | #' by their bivariate relationships. The goal is to discover which variable 6 | #' pairings are most helpful in avoiding extrapolations when exploring the data 7 | #' space. Variable pairs with strong bivariate dependencies (not necessarily 8 | #' linear) are chosen first. The bivariate dependency is measured using 9 | #' \code{\link{savingby2d}}. Each variable appears in the output only once. 10 | #' 11 | #' @param data A dataframe 12 | #' @param method The character name for the method to use for measuring 13 | #' bivariate dependency, passed to \code{\link{savingby2d}}. 14 | #' 15 | #' @return A list containing character vectors giving variable pairings. 16 | #' 17 | #' @details If \code{data} is so big as to make \code{arrangeC} very slow, a 18 | #' random sample of rows is used instead. The bivariate dependency measures 19 | #' are rough, and the ordering algorithm is a simple greedy one, so it is not 20 | #' worth allowing it too much time. This function exists mainly to provide a 21 | #' helpful default ordering/pairing for \code{\link{ceplot}}. 22 | #' 23 | #' @seealso \code{\link{savingby2d}} 24 | #' 25 | #' @examples 26 | #' data(powerplant) 27 | #' 28 | #'pairings <- arrangeC(powerplant) 29 | #' 30 | #'dev.new(height = 2, width = 2 * length(pairings)) 31 | #'par(mfrow = c(1, length(pairings))) 32 | #' 33 | #'for (i in seq_along(pairings)){ 34 | #' plotxc(powerplant[, pairings[[i]]], powerplant[1, pairings[[i]]], 35 | #' select.col = NA) 36 | #'} 37 | #' 38 | #' @references O'Connell M, Hurley CB and Domijan K (2017). ``Conditional 39 | #' Visualization for Statistical Models: An Introduction to the 40 | #' \strong{condvis} Package in R.''\emph{Journal of Statistical Software}, 41 | #' \strong{81}(5), pp. 1-20. . 42 | 43 | arrangeC <- function (data, method = "default") 44 | { 45 | nc.data <- ncol(data) 46 | if (nc.data <= 2L) 47 | return(list(colnames(data))) 48 | data <- na.omit(data) 49 | nr.data <- nrow(data) 50 | if (nr.data < 5) 51 | stop("'data' has less than 5 non-missing rows") 52 | 53 | ## Calculate a ceiling on the number of rows of 'data' we will use. This is just 54 | ## based on a few quick tests on a personal computer, to keep the computation 55 | ## time on the order of seconds. 56 | 57 | n <- max(36800 - 6850 * log(nc.data), 100) 58 | if(nr.data > n) 59 | data <- data[sample(1:nr.data, n, replace = TRUE), ] 60 | 61 | ## Construct a matrix of the bivariate dependencies which will be used to order 62 | ## the variables 63 | 64 | saving <- matrix(nrow = nc.data, ncol = nc.data) 65 | colnames(saving) <- rownames(saving) <- colnames(data) 66 | for (i in 1:nc.data){ 67 | for (j in i:nc.data){ 68 | saving[i, j] <- 69 | saving[j, i] <- savingby2d(data[, i], data[, j], method) 70 | } 71 | } 72 | 73 | ## Assign NA to the diagonal to avoid getting pairs of the same variable. 74 | 75 | diag(saving) <- NA 76 | 77 | ## Simple greedy ordering of pairs 78 | 79 | C <- list() 80 | i <- 1L 81 | while(ncol(saving) > 2){ 82 | pair <- which(saving == min(saving, na.rm = TRUE), arr.ind = TRUE)[1L, ] 83 | C[[i]] <- colnames(saving)[pair] 84 | saving <- saving[-pair, -pair, drop = FALSE] 85 | i <- i + 1L 86 | } 87 | C[[i]] <- colnames(saving) 88 | C 89 | } 90 | -------------------------------------------------------------------------------- /tests/testthat/test.similarityweight.R: -------------------------------------------------------------------------------- 1 | context("similarityweight") 2 | 3 | n <- 250000 4 | p <- 50 5 | 6 | dat <- data.frame( 7 | x1 = factor(sample(letters[1:3], size = n, replace = TRUE)), 8 | x2 = factor(sample(letters[4:6], size = n, replace = TRUE)), 9 | x3 = factor(sample(letters[7:9], size = n, replace = TRUE)), 10 | x4 = factor(sample(letters[10:15], size = n, replace = TRUE)), 11 | x5 = rnorm(n), 12 | x6 = rnorm(n), 13 | x7 = rnorm(n), 14 | x8 = rnorm(n) 15 | )[, sample(1:8, p, replace = TRUE)] 16 | 17 | test_that("similarityweight fails without required inputs", { 18 | expect_error(similarityweight(x = mtcars, data = NULL)) 19 | expect_error(similarityweight(x = NULL, data = NULL)) 20 | expect_error(similarityweight(x = mtcars, data = mtcars[1, ])) 21 | }) 22 | 23 | test_that("similarityweight returns the right types", { 24 | expect_is(similarityweight(x = dat[1, ], data = dat[1:20, ]), "numeric") 25 | expect_is(similarityweight(x = dat[1:5, ], data = dat[1:20, ]), "matrix") 26 | }) 27 | 28 | test_that("identical observations have weight one", { 29 | expect_equivalent(diag(similarityweight(x = dat[1:20, ], data = dat[1:20, ])), 30 | rep(1, 20)) 31 | expect_equal(diag(similarityweight(x = dat[1:10, ], data = dat[1:5, ])), 32 | diag(similarityweight(x = dat[1:5, ], data = dat[1:10, ]))) 33 | }) 34 | 35 | data(powerplant) 36 | test_that("larger threshold values give equal or larger weights", { 37 | expect_true(all(similarityweight(x = powerplant[1:50, ], data = powerplant, 38 | threshold = 0.2) >= similarityweight(x = powerplant[1:50, ], data = 39 | powerplant, threshold = 0.1))) 40 | expect_true(all(similarityweight(x = powerplant[1:50, ], data = powerplant, 41 | threshold = 0.5) >= similarityweight(x = powerplant[1:50, ], data = 42 | powerplant, threshold = 0.2))) 43 | expect_true(all(similarityweight(x = powerplant[1:50, ], data = powerplant, 44 | threshold = 3) >= similarityweight(x = powerplant[1:50, ], data = powerplant 45 | , threshold = 1))) 46 | }) 47 | 48 | test_that("setting threshold to Inf gives weight one to everything", { 49 | expect_true(all(similarityweight(x = powerplant[1:50, ], data = powerplant, 50 | threshold = Inf) == 1)) 51 | expect_true(all(similarityweight(x = dat[1, ], data = dat, threshold = Inf) 52 | == 1)) 53 | }) 54 | 55 | d <- data.frame(x = runif(500)) 56 | test_that("for 1-d continuous case, maxnorm and euclidean are identical", { 57 | expect_equal(similarityweight(d[1:2, 1, drop = FALSE], d, distance = "maxnorm" 58 | ), similarityweight(d[1:2, 1, drop = FALSE], d, distance = "euclidean")) 59 | }) 60 | 61 | test_that("internal visual weight function returns a function", { 62 | expect_is(.similarityweight(mtcars), "function") 63 | expect_error(.similarityweight()) 64 | }) 65 | 66 | zerovartest <- powerplant 67 | zerovartest$zerovar_numeric <- 1 68 | 69 | test_that("a numeric with all values equal has no impact", { 70 | expect_equal(similarityweight(zerovartest[1:5, ], zerovartest, distance = 71 | "euclidean"), similarityweight(powerplant[1:5, ], powerplant, distance = 72 | "euclidean")) 73 | }) 74 | 75 | zerovartest$zerovar_numeric <- NULL 76 | zerovartest$zerovar_factor <- as.factor("a") 77 | 78 | test_that("a factor with all values equal has no impact", { 79 | expect_equal(similarityweight(zerovartest[1:5, ], zerovartest, distance = 80 | "euclidean"), similarityweight(powerplant[1:5, ], powerplant, distance = 81 | "euclidean")) 82 | }) 83 | 84 | #test_that("similarityweight is not too slow", { 85 | # takes_less_than(3)(similarityweight(x = dat[1, ], data = dat)) 86 | # takes_less_than(6)(similarityweight(x = dat[1:15, ], data = dat)) 87 | #}) 88 | -------------------------------------------------------------------------------- /NEWS: -------------------------------------------------------------------------------- 1 | condvis: Conditional Visualisation for Statistical Models 2 | ========================================================= 3 | 4 | News 5 | --------------------------------------------------------- 6 | 7 | Version 0.5-1 2018-09-13 8 | 9 | * Changed check for zero variance (introduced in 0.5-0) to work for factors. 10 | * Fixed S3 method treatment for `interpolate`. 11 | 12 | Version 0.5-0 2018-09-01 13 | 14 | * Fixed bug where `arrangeC` would return pairs of the same variable if there 15 | were no bivariate relationships detected by `savingby2d`. 16 | * Added a generic function `interpolate`, with methods for standard data 17 | types. 18 | * Added an error for negative threshold dissimilarities in `similarityweight`. 19 | * Fixed bug where variables with zero variance caused an error in 20 | `similarityweight`. 21 | 22 | Version 0.4-3 2017-10-19 23 | 24 | * Fixed bug where `ceplot.shiny` dropped `xc.cond` to a vector and threw an 25 | error. 26 | 27 | Version 0.4-2 2017-10-11 28 | * New release for JSS publication. Small changes to documentation, and some 29 | default colours have been changed. 30 | 31 | Version 0.4-0 2016-10-17 32 | * The 2-D histogram in `plotxc` has been made optional, and also allows a cap 33 | on the bin counts, to allow for skewed data. 34 | * In `similarityweight`, the weight values are no longer discretised. All 35 | values between 0 and 1 are allowed. 36 | * New unexported function `weightcolor` used to weight colours using 37 | similarity weight. 38 | * Fixed bug; not passing `pch` to snapshot for `ceplot.interactive`. 39 | 40 | Version 0.3-5 2016-09-26 41 | * In `plotxc`, can chop off long tails for scatterplot/2-d histogram, 42 | controlled by `trim` parameter. 43 | * pass plotting parameters for models from top-level `ceplot` call. See 44 | parameter `modelpar`. 45 | * allow `xlim` and `ylim` to be passed to section plot produced by `plotxs`. 46 | * corrected typo in `?similarityweight`, sigma --> threshold. 47 | * added snapshot feature to `condtour`. 48 | * changed treatment of conditioning predictors in `condtour` to be more like 49 | `ceplot`, plus a hacky fix to `update.xcplot` for `condtour` providing 50 | predictor values instead of mouseclicks. 51 | * corrected problem in `condtour`, where interpolated path omitted the final 52 | row. Also removed rownames from cluster centres. 53 | * diagnostic plot for `condtour`, `plotap` now shows approximate proportion 54 | of data visible, rather than approximate number of observations visible. 55 | 56 | Version 0.3-4 2016-08-04 57 | * fixed incorrect parameter names in demo files. 58 | * fix to `plotxs` to allow `main` and `mar` to be specified. 59 | 60 | Version 0.3-3 2016-07-04 61 | * fixed treatment of `pch` by condtour, was previously defaulting to 1 when it 62 | should have been 21 for background colours. 63 | * can now adjust `threshold` while using `condtour`. 64 | * added help link for `rsconnect` package in Shiny application. 65 | * added fix to `update.xsplot` where the fitted model was not updating 66 | properly, due to the section information remaining static. 67 | 68 | Version 0.3-2 69 | * fixed bug with `rep` for `col` and `pch` where `length.out` was omitted, 70 | causing huge delays for `plotxs`. 71 | 72 | Version 0.3-1 73 | * removed test which timed `similarityweight` (for CRAN) 74 | 75 | Version 0.3-0 2016-07-01 76 | * removed daisy distance measure, and relevant contributors. 77 | * renamed `visualweight` to `similarityweight` and changed parameter names. 78 | * renamed parameters in `ceplot`. 79 | * Shiny implementation completely rewritten, see R/ceplot.shiny.R. 80 | * added Roxygen comments for documentation of exported functions. 81 | * added update methods to parallel coordinates and scatterplot matrix 82 | condition selector plots. 83 | * `condtour` can now handle mixed data (categorical and continuous). 84 | 85 | Version 0.2-2 2016-05-01 86 | * updated contributors for copied code. 87 | * added simple example to README. 88 | 89 | Version 0.2-1 23 Feb 2016 90 | * initial release on CRAN. 91 | -------------------------------------------------------------------------------- /man/condtour.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/condtour.R 3 | \name{condtour} 4 | \alias{condtour} 5 | \title{Conditional tour; a tour through sections in data space} 6 | \usage{ 7 | condtour(data, model, path, response = NULL, sectionvars = NULL, 8 | conditionvars = NULL, threshold = NULL, lambda = NULL, 9 | distance = c("euclidean", "maxnorm"), view3d = FALSE, 10 | Corder = "default", conf = FALSE, col = "black", pch = NULL, 11 | xsplotpar = NULL, modelpar = NULL, xcplotpar = NULL) 12 | } 13 | \arguments{ 14 | \item{data}{A dataframe.} 15 | 16 | \item{model}{A fitted model object, or a list of such objects.} 17 | 18 | \item{path}{A dataframe, describing the sections to take. Basically a 19 | dataframe with its \code{colnames} being \code{conditionvars}.} 20 | 21 | \item{response}{Character name of response variable in \code{data}.} 22 | 23 | \item{sectionvars}{Character name(s) of variables in \code{data} on which to 24 | take sections.} 25 | 26 | \item{conditionvars}{Character name(s) of variables in \code{data} on which 27 | to condition.} 28 | 29 | \item{threshold}{Threshold distance. Observed data which are a distance 30 | greater than \code{threshold} from the current section are not visible. 31 | Passed to \code{\link{similarityweight}}.} 32 | 33 | \item{lambda}{A constant to multiply by number of factor mismatches in 34 | constructing a general dissimilarity measure. If left \code{NULL}, behaves 35 | as though \code{lambda} is set greater than \code{threshold}, and so only 36 | observations whose factor levels match the current section are visible. 37 | Passed to \code{\link{similarityweight}}.} 38 | 39 | \item{distance}{The type of distance measure to use, either 40 | \code{"euclidean"} (default) or \code{"maxnorm"}.} 41 | 42 | \item{view3d}{Logical; if \code{TRUE}, plots a three-dimensional regression 43 | surface when possible.} 44 | 45 | \item{Corder}{Character name for method of ordering conditioning variables. 46 | See \code{\link{arrangeC}}.} 47 | 48 | \item{conf}{Logical; if \code{TRUE}, plots confidence bounds or equivalent 49 | when possible.} 50 | 51 | \item{col}{Colour for observed data points.} 52 | 53 | \item{pch}{Plot symbols for observed data points.} 54 | 55 | \item{xsplotpar}{Plotting parameters for section visualisation as a list, 56 | passed to \code{\link{plotxs}}. Not used.} 57 | 58 | \item{modelpar}{Plotting parameters for models as a list, passed to 59 | \code{\link{plotxs}}. Not used.} 60 | 61 | \item{xcplotpar}{Plotting parameters for condition selector plots as a list, 62 | passed to \code{\link{plotxc}}. Can specify \code{cex.axis}, \code{cex.lab} 63 | , \code{tck}, \code{col} for highlighting current section, \code{cex}.} 64 | } 65 | \value{ 66 | Produces a set of interactive plots. One device displays the current 67 | section. A second device shows the the current section in the space of the 68 | conditioning predictors given by \code{conditionvars}. A third device shows 69 | some simple diagnostic plots; one to show approximately how much data are 70 | visible on each section, and another to show what proportion of data are 71 | \emph{visited} by the tour. 72 | } 73 | \description{ 74 | Whereas \code{\link{ceplot}} allows the user to interactively 75 | choose sections to visualise, \code{condtour} allows the user to pre-select 76 | all sections to visualise, order them, and cycle through them one by one. 77 | ']' key advances the tour, and '[' key goes back. Can adjust 78 | \code{threshold} for the current section visualisation with ',' and '.' 79 | keys. 80 | } 81 | \examples{ 82 | \dontrun{ 83 | 84 | data(powerplant) 85 | library(e1071) 86 | model <- svm(PE ~ ., data = powerplant) 87 | path <- makepath(powerplant[-5], 25) 88 | condtour(data = powerplant, model = model, path = path$path, 89 | sectionvars = "AT") 90 | 91 | data(wine) 92 | wine$Class <- as.factor(wine$Class) 93 | library(e1071) 94 | model5 <- list(svm(Class ~ ., data = wine)) 95 | conditionvars1 <- setdiff(colnames(wine), c("Class", "Hue", "Flavanoids")) 96 | path <- makepath(wine[, conditionvars1], 50) 97 | condtour(data = wine, model = model5, path = path$path, sectionvars = c("Hue" 98 | , "Flavanoids"), threshold = 3) 99 | 100 | } 101 | } 102 | \seealso{ 103 | \code{\link{ceplot}}, \code{\link{similarityweight}} 104 | } 105 | 106 | -------------------------------------------------------------------------------- /man/plotxs.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/plotxs.R 3 | \name{plotxs} 4 | \alias{plotxs} 5 | \title{Visualise a section in data space} 6 | \usage{ 7 | plotxs(xs, y, xc.cond, model, model.colour = NULL, model.lwd = NULL, 8 | model.lty = NULL, model.name = NULL, yhat = NULL, mar = NULL, 9 | col = "black", weights = NULL, view3d = FALSE, theta3d = 45, 10 | phi3d = 20, xs.grid = NULL, prednew = NULL, conf = FALSE, 11 | probs = FALSE, pch = 1, residuals = FALSE, main = NULL, xlim = NULL, 12 | ylim = NULL) 13 | } 14 | \arguments{ 15 | \item{xs}{A dataframe with one or two columns.} 16 | 17 | \item{y}{A dataframe with one column.} 18 | 19 | \item{xc.cond}{A dataframe with a single row, with all columns required for 20 | passing to \code{\link{predict}} methods of models in \code{model}.} 21 | 22 | \item{model}{A fitted model object, or a list of such objects.} 23 | 24 | \item{model.colour}{Colours for fitted models. If \code{model} is a list, 25 | this should be of same length as \code{model}.} 26 | 27 | \item{model.lwd}{Line weight for fitted models. If \code{model} is a list, 28 | this should be of same length as \code{model}.} 29 | 30 | \item{model.lty}{Line style for fitted models. If \code{model} is a list, 31 | this should be of same length as \code{model}.} 32 | 33 | \item{model.name}{Character labels for models, for legend.} 34 | 35 | \item{yhat}{Fitted values for the observations in \code{y}. Calculated if 36 | needed and not provided. Only used if showing residuals, or \code{xs} has 37 | two columns.} 38 | 39 | \item{mar}{Margins for plot.} 40 | 41 | \item{col}{Colours for observed data. Should be of length \code{nrow(xs)}.} 42 | 43 | \item{weights}{Similarity weights for observed data. Should be of length 44 | \code{nrow(xs)}. Usually calculated with \code{\link{similarityweight}}.} 45 | 46 | \item{view3d}{Logical; if \code{TRUE} plots a three-dimensional 47 | regression surface if possible.} 48 | 49 | \item{theta3d, phi3d}{Angles defining the viewing direction. \code{theta3d} 50 | gives the azimuthal direction and \code{phi3d} the colatitude. See 51 | \code{\link[graphics]{persp}}.} 52 | 53 | \item{xs.grid}{The grid of values defining the part of the section to 54 | visualise. Calculated if not provided.} 55 | 56 | \item{prednew}{The \code{y} values where the models in \code{model} intersect 57 | the section. Useful when providing \code{theta3d}, \code{phi3d}, or 58 | \code{weights}, where the predict methods have been called elsewhere.} 59 | 60 | \item{conf}{Logical; if \code{TRUE} plots confidence bounds (or equivalent) 61 | for models which provide this.} 62 | 63 | \item{probs}{Logical; if \code{TRUE}, shows predicted class probabilities 64 | instead of just predicted classes. Only available if \code{xs} contains two 65 | numeric predictors and the model's predict method provides this.} 66 | 67 | \item{pch}{Plot symbols for observed data} 68 | 69 | \item{residuals}{Logical; if \code{TRUE}, plots a residual versus predictor 70 | plot instead of the usual scale of raw response.} 71 | 72 | \item{main}{Character title for plot, default is 73 | \code{"Conditional expectation"}.} 74 | 75 | \item{xlim}{Graphical parameter passed to plotting functions.} 76 | 77 | \item{ylim}{Graphical parameter passed to plotting functions.} 78 | } 79 | \value{ 80 | A list containing relevant information for updating the plot. 81 | } 82 | \description{ 83 | Visualise a section in data space, showing fitted models where 84 | they intersect the section, and nearby observations. The \code{weights} for 85 | observations can be calculated with \code{\link{similarityweight}}. This 86 | function is mainly for use in \code{\link{ceplot}} and 87 | \code{\link{condtour}}. 88 | } 89 | \examples{ 90 | data(mtcars) 91 | model <- lm(mpg ~ ., data = mtcars) 92 | plotxs(xs = mtcars[, "wt", drop = FALSE], y = mtcars[, "mpg", drop = FALSE], 93 | xc.cond = mtcars[1, ], model = list(model)) 94 | 95 | } 96 | \references{ 97 | O'Connell M, Hurley CB and Domijan K (2017). ``Conditional 98 | Visualization for Statistical Models: An Introduction to the 99 | \strong{condvis} Package in R.''\emph{Journal of Statistical Software}, 100 | \strong{81}(5), pp. 1-20. . 101 | } 102 | \seealso{ 103 | \code{\link{plotxc}}, \code{\link{ceplot}}, \code{\link{condtour}} 104 | } 105 | 106 | -------------------------------------------------------------------------------- /R/savingby2d.R: -------------------------------------------------------------------------------- 1 | #' @title Assess advantage of 2-D view over 1-D view for identifying 2 | #' extrapolation 3 | #' 4 | #' @description A simple algorithm to evaluate the advantage of by taking a 5 | #' bivariate marginal view of two variables, when trying to avoid 6 | #' extrapolations, rather than two univariate marginal views. 7 | #' 8 | #' @param x A numeric or factor vector. Can also be a dataframe containing 9 | #' \code{x} and \code{y}, if \code{y} is \code{NULL}. 10 | #' @param y A numeric or factor vector. 11 | #' @param method Character; criterion used to quantify bivariate relationships. 12 | #' Can be \code{"default"}, a scagnostic measure, or \code{"DECR"} to use a 13 | #' density estimate confidence region. 14 | #' 15 | #' @return A number between 0 and 1. Values near 1 imply no benefit to using a 16 | #' 2-D view, whereas values near 0 imply that a 2-D view reveals structure 17 | #' hidden in the 1-D views. 18 | #' 19 | #' @details If given two continuous variables, the variables are both scaled to 20 | #' mean 0 and variance 1. Then the returned value is the ratio of the area of 21 | #' the convex hull of the data to the area obtained from the product of the 22 | #' ranges of the two areas, i.e. the area of the bounding rectangle. 23 | #' 24 | #' If given two categorical variables, all combinations are tabulated. The 25 | #' returned value is the number of non-zero table entries divided by the total 26 | #' number of table entries. 27 | #' 28 | #' If given one categorical and one continuous variable, the returned value is 29 | #' the weighted mean of the range of the continuous variable within each 30 | #' category divided by the overall range of the continuous variable, where the 31 | #' weights are given by the number of observations in each level of the 32 | #' categorical variable. 33 | #' 34 | #' Requires package \code{scagnostics} if a scagnostics measure is specified 35 | #' in \code{method}. Requires package \code{hdrcde} if \code{"DECR"} (density 36 | #' estimate confidence region) is specified in \code{method}. These only apply 37 | #' to cases where \code{x} and \code{y} are both numeric. 38 | #' 39 | #' @examples 40 | #' x <- runif(1000) 41 | #' y <- runif(1000) 42 | #' plot(x, y) 43 | #' savingby2d(x, y) 44 | #' ## value near 1, no real benefit from bivariate view 45 | #' 46 | #' x1 <- runif(1000) 47 | #' y1 <- x1 + rnorm(sd = 0.3, n = 1000) 48 | #' plot(x1, y1) 49 | #' savingby2d(x1, y1) 50 | #' ## smaller value indicates that the bivariate view reveals some structure 51 | #' 52 | #' @seealso \code{\link{similarityweight}} 53 | #' 54 | #' @references O'Connell M, Hurley CB and Domijan K (2017). ``Conditional 55 | #' Visualization for Statistical Models: An Introduction to the 56 | #' \strong{condvis} Package in R.''\emph{Journal of Statistical Software}, 57 | #' \strong{81}(5), pp. 1-20. . 58 | 59 | savingby2d <- function (x, y = NULL, method = "default") 60 | { 61 | 62 | ## Organise inputs 63 | 64 | if(is.data.frame(x) && ncol(x) > 2L) 65 | stop("'x' should have max 2 columns.") 66 | if (is.null(y) && identical(ncol(x), 2L)){ 67 | y <- x[, 2L] 68 | x <- x[, 1L] 69 | } 70 | x <- if (is.data.frame(x)) 71 | x[, 1L] 72 | else x 73 | y <- if (is.data.frame(y)) 74 | y[, 1L] 75 | else y 76 | arefactors <- vapply(list(x, y), is.factor, logical(1L)) 77 | 78 | ## Case 1: Two factors 79 | 80 | if (all(arefactors)){ 81 | tab <- table(x, y) 82 | return(sum(tab != 0) / (ncol(tab) * nrow(tab))) 83 | } else { 84 | 85 | ## Case 2: One factor, one continuous 86 | 87 | if (any(arefactors)){ 88 | if (is.factor(x)){ 89 | fac <- x 90 | cont <- y 91 | } else { 92 | fac <- y 93 | cont <- x 94 | } 95 | totalarea <- abs(diff(range(cont))) 96 | weightbyfac <- table(fac) / length(fac) 97 | lengthbyfac <- vapply(levels(fac), function(x) { 98 | if (length(cont[as.character(fac) == x]) > 1) 99 | abs(diff(range(cont[as.character(fac) == x]))) 100 | else 0 101 | }, numeric(1)) 102 | hullarea <- sum(weightbyfac * lengthbyfac) 103 | return(hullarea / totalarea) 104 | } else { 105 | 106 | ## Case 3: Both continuous 107 | 108 | ## Default method compares area of convex hull to a bounding rectangle. 109 | 110 | if (identical(method, "default")){ 111 | if (abs(cor(x, y)) > 0.995) 112 | return(0) 113 | x.scaled <- (x - mean(x)) / sd(x) 114 | y.scaled <- (y - mean(y)) / sd(y) 115 | totalarea <- abs(diff(range(x.scaled)) * diff(range(y.scaled))) 116 | conhull <- chull(x.scaled, y.scaled) 117 | hullarea <- polygonarea(x.scaled[conhull], y.scaled[conhull]) 118 | return(hullarea / totalarea) 119 | } else { 120 | 121 | ## Scagnostic measure 122 | 123 | if (method %in% c("Outlying", "Skewed", "Clumpy", "Sparse", "Striated", 124 | "Convex", "Skinny", "Stringy", "Monotonic")){ 125 | if (requireNamespace("scagnostics", quietly = TRUE)){ 126 | ratio <- scagnostics::scagnostics.default(x, y)[method] 127 | if (method %in% c("Outlying", "Skewed", "Clumpy", "Sparse", 128 | "Striated", "Skinny", "Stringy", "Monotonic")) 129 | ratio <- 1 - ratio 130 | return(ratio) 131 | } else stop("requires package 'scagnostics'") 132 | } else { 133 | 134 | ## Density estimate confidence region 135 | 136 | if (identical(method, "DECR")){ 137 | if (requireNamespace("hdrcde", quietly = TRUE)){ 138 | o <- hdrcde::hdr.2d(x, y, prob = 0.05) 139 | return(sum(o$den$z > o$falpha) / length(o$den$z)) 140 | } else stop("requires package 'hdrcde'") 141 | } else stop("unknown 'method' specified") 142 | } 143 | } 144 | } 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /R/helpers.R: -------------------------------------------------------------------------------- 1 | ## Helper functions. NOT EXPORTED. 2 | 3 | ## Helper function for extracting variable names from model objects. 4 | 5 | cleanstring <- 6 | function(string) 7 | { 8 | if(!is.character(string)) 9 | stop("'string' should be of type character") 10 | s <- unlist(strsplit(string, split = "")) 11 | if (any(c("(", ")", "^") %in% s)){ 12 | beg <- if ("(" %in% s) 13 | which(s == "(")[1] + 1 14 | else 1 15 | end <- if (any(c(")", "^") %in% s)) 16 | which(s == ")" | s == "^")[1] - 1 17 | else length(s) 18 | s <- s[beg:end] 19 | } 20 | paste(s, collapse = "") 21 | } 22 | 23 | ## Helper function for extracting variable names from model objects 24 | 25 | getvarnames <- 26 | function (model) 27 | { 28 | if (is.null(model$terms)) 29 | stop("model has no terms slot") 30 | string1 <- deparse(model$terms[[3L]]) 31 | string2 <- unlist(strsplit(string1, split = NULL)) 32 | string3 <- paste(string2[string2 != " "], collapse = "") 33 | predictors1 <- unlist(strsplit(string3, split = "+", fixed = TRUE)) 34 | predictors2 <- unique(vapply(predictors1, cleanstring, character(1L))) 35 | response <- unlist(deparse(model$terms[[2L]])) 36 | list(response = response, predictors = predictors2) 37 | } 38 | 39 | ## These are some helper functions to be used when plotxs is representing the 40 | ## predicted class probabilities using little barcharts (i.e., probs = TRUE) 41 | 42 | myglyph <- 43 | function (x, y, xw, yw, heights, col) 44 | { 45 | left <- x - 0.5 * xw 46 | right <- x + 0.5 * xw 47 | top <- y + 0.5 * yw 48 | bottom <- y - 0.5 * yw 49 | barwidth <- xw / length(heights) 50 | barleft <- seq(left, right - barwidth, barwidth) 51 | if (any(heights < 0)) 52 | stop("cannot handle negative 'heights'") 53 | if (any(heights > 1)) 54 | heights <- heights / max(heights) 55 | rect(xleft = barleft, xright = barleft + barwidth, 56 | ybottom = bottom, ytop = bottom + heights * yw, 57 | col = col 58 | ) 59 | } 60 | 61 | myglyph2 <- 62 | function (x, y, xw, yw, heights, col) 63 | { 64 | left <- x - 0.5 * xw 65 | right <- x + 0.5 * xw 66 | top <- y + 0.5 * yw 67 | bottom <- y - 0.5 * yw 68 | barwidth <- xw / length(heights) 69 | barleft <- seq(left, right - barwidth, barwidth) 70 | if (any(heights < 0)) 71 | stop("cannot handle negative 'heights'") 72 | if (any(heights > 1)) 73 | heights <- heights / max(heights) 74 | cbind(barleft, barleft + barwidth, bottom, bottom + heights*yw, 1:length(heights)) 75 | } 76 | 77 | extractprobs <- 78 | function (model, pred) 79 | { 80 | if (any(c("svm", "gbm") %in% class(model))){ 81 | if ("probabilities" %in% names(attributes(pred))){ 82 | p <- attr(pred, "probabilities") 83 | } else stop("predictions do not have 'probabilities' attribute,\n ", 84 | "maybe svm was fitted without 'probability == TRUE'") 85 | } else stop("cannot display class probabilities for this model class") 86 | p 87 | } 88 | 89 | ## Helper function to check if a point is in a rectangle 90 | 91 | `%inrectangle%` <- 92 | function (point, rectangle) 93 | { 94 | ## Assuming (x, y) and (xleft, xright, ybottom, ytop) 95 | check1 <- point[1] >= rectangle[1] 96 | check2 <- point[1] < rectangle[2] 97 | check3 <- point[2] >= rectangle[3] 98 | check4 <- point[2] < rectangle[4] 99 | check1 && check2 && check3 && check4 100 | } 101 | 102 | ## Helper function to make a dataframe representing a section defined by xc.cond 103 | 104 | makenewdata <- 105 | function (xs, xc.cond) 106 | { 107 | if (is.null(xs) || identical(ncol(xs), 0L)) 108 | return(xc.cond) 109 | newdata <- cbind(xs, xc.cond[rep(1L, nrow(xs)), ]) 110 | colnames(newdata) <- c(colnames(xs), colnames(xc.cond)) 111 | rownames(newdata) <- NULL 112 | newdata 113 | } 114 | 115 | ## Helper function to calculate a reasonable mode for numerics and factors. Can 116 | ## be used to initialise 'xc.cond'. Best used as data.frame(lapply(data, mode1)) 117 | 118 | mode1 <- 119 | function (x, breaks = 10) 120 | { 121 | if (is.factor(x)){ 122 | ux <- unique(x) 123 | out <- ux[which.max(tabulate(match(x, ux)))] 124 | } else if (is.numeric(x)){ 125 | cx <- cut(x, breaks = breaks) 126 | ucx <- unique(cx) 127 | bin <- ucx[which.max(tabulate(match(cx, ucx)))] 128 | out <- mean(as.numeric((strsplit(gsub("]", "", gsub("\\(", "", 129 | as.character(bin))), ",")[[1]]))) 130 | } else stop("mode function expects factors or numerics only") 131 | out 132 | } 133 | 134 | ## Helper function to open a device suitable for interactivity with 135 | ## grDevices::getGraphicsEvent 136 | 137 | opendev <- 138 | function (width = 7, height = 7) 139 | { 140 | orig <- options("device") 141 | if (identical(.Platform$OS.type, "windows")){ 142 | options(device = "windows") 143 | } else { 144 | options(device = "X11") 145 | if (identical(version$os, "linux-gnu")){ 146 | X11.options(type = "Xlib") 147 | } 148 | } 149 | dev.new(width = width, height = height) 150 | options(orig) 151 | } 152 | 153 | ## This is a helper function for use in parallel coordinates plots in plotxc.pcp 154 | 155 | scale2unit <- 156 | function (x) 157 | { 158 | x <- as.numeric(x) 159 | mn <- min(x) 160 | mx <- max(x) 161 | out <- (x - mn) / (mx - mn) 162 | out 163 | } 164 | 165 | ## This is a helper function used only in savingby2d. Calculates the area of a 166 | ## polygon coming out of grDevices::chull 167 | 168 | polygonarea <- function (x, y = NULL) 169 | { 170 | if (is.null(y) && identical(ncol(x), 2L)){ 171 | y <- x[, 2L] 172 | x <- x[, 1L] 173 | } 174 | area <- 0 175 | n <- length(x) 176 | j <- n 177 | for (i in 1:n){ 178 | area <- area + (x[j] + x[i]) * (y[j] - y[i]) 179 | j <- i 180 | } 181 | abs(area) / 2 182 | } 183 | 184 | ## Timestamp function to make a string for appending to filenames 185 | 186 | timestamp1 <- function() 187 | { 188 | gsub(":", "-", gsub(" ", "_", Sys.time())) 189 | } 190 | -------------------------------------------------------------------------------- /man/ceplot.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/ceplot.R 3 | \name{ceplot} 4 | \alias{ceplot} 5 | \title{Interactive conditional expectation plot} 6 | \usage{ 7 | ceplot(data, model, response = NULL, sectionvars = NULL, 8 | conditionvars = NULL, threshold = NULL, lambda = NULL, 9 | distance = c("euclidean", "maxnorm"), type = c("default", "separate", 10 | "shiny"), view3d = FALSE, Corder = "default", selectortype = "minimal", 11 | conf = FALSE, probs = FALSE, col = "black", pch = NULL, 12 | residuals = FALSE, xsplotpar = NULL, modelpar = NULL, 13 | xcplotpar = NULL) 14 | } 15 | \arguments{ 16 | \item{data}{A dataframe containing the data to plot} 17 | 18 | \item{model}{A model object, or list of model objects} 19 | 20 | \item{response}{Character name of response in \code{data}} 21 | 22 | \item{sectionvars}{Character name of variable(s) from \code{data} on which to 23 | take a section, can be of length 1 or 2.} 24 | 25 | \item{conditionvars}{Character names of conditioning variables from 26 | \code{data}. These are the predictors which we can set to single values in 27 | order to produce a section. Can be a list of vectors of length 1 or 2. Can 28 | be a character vector, which is then paired up using 29 | \code{\link{arrangeC}}. If \code{NULL}, an attempt will be made to extract 30 | all variable names which are not \code{response} or \code{sectionvars} from 31 | \code{model}, and these will be arranged using \code{\link{arrangeC}}.} 32 | 33 | \item{threshold}{This is a threshold distance. Points further than 34 | \code{threshold} away from the current section will not be visible. Passed 35 | to \code{\link{similarityweight}}.} 36 | 37 | \item{lambda}{A constant to multiply by number of factor mismatches in 38 | constructing a general dissimilarity measure. If left \code{NULL}, behaves 39 | as though \code{lambda} is set greater than \code{threshold}, and so only 40 | observations whose factor levels match the current section are visible. 41 | Passed to \code{\link{similarityweight}}.} 42 | 43 | \item{distance}{A character vector describing the type of distance measure to 44 | use, either \code{"euclidean"} (default) or \code{"maxnorm"}.} 45 | 46 | \item{type}{This specifies the type of interactive plot. \code{"default"} 47 | places everything on one device. \code{"separate"} places condition 48 | selectors on one device and the section on another. (These two options 49 | require XQuartz on OS X). \code{"shiny"} produces a Shiny application.} 50 | 51 | \item{view3d}{Logical; if \code{TRUE} plots a three-dimensional 52 | regression surface if possible.} 53 | 54 | \item{Corder}{Character name for method of ordering conditioning variables. 55 | See \code{\link{arrangeC}}.} 56 | 57 | \item{selectortype}{Type of condition selector plots to use. Must be 58 | \code{"minimal"} if \code{type} is \code{"default"}. If \code{type} is 59 | \code{"separate"}, can be \code{"pcp"} (see \code{\link{plotxc.pcp}}) or 60 | \code{"full"} (see \code{\link{plotxc.full}}).} 61 | 62 | \item{conf}{Logical; if \code{TRUE} plots confidence bounds (or equivalent) 63 | for models which provide this.} 64 | 65 | \item{probs}{Logical; if \code{TRUE}, shows predicted class probabilities 66 | instead of just predicted classes. Only available if \code{S} specifies two 67 | numeric predictors and the model's predict method provides this.} 68 | 69 | \item{col}{Colour for observed data.} 70 | 71 | \item{pch}{Plot symbols for observed data.} 72 | 73 | \item{residuals}{Logical; if \code{TRUE}, plots a residual versus predictor 74 | plot instead of the usual scale of raw response.} 75 | 76 | \item{xsplotpar}{Plotting parameters for section visualisation as a list, 77 | passed to \code{\link{plotxs}}. Can specify \code{xlim}, \code{ylim}.} 78 | 79 | \item{modelpar}{Plotting parameters for models as a list, passed to 80 | \code{\link{plotxs}}. Not used.} 81 | 82 | \item{xcplotpar}{Plotting parameters for condition selector plots as a list, 83 | passed to \code{\link{plotxc}}. Can specify \code{col} for highlighting 84 | current section, \code{cex}, and \code{trim} (see \code{\link{plotxc}}).} 85 | } 86 | \description{ 87 | Creates an interactive conditional expectation plot, which 88 | consists of two main parts. One part is a single plot depicting a section 89 | through a fitted model surface, or conditional expectation. The other part 90 | shows small data summaries which give the current condition, which can be 91 | altered by clicking with the mouse. 92 | } 93 | \examples{ 94 | \dontrun{ 95 | ## Example 1: Multivariate regression, xs one continuous predictor 96 | 97 | mtcars$cyl <- as.factor(mtcars$cyl) 98 | 99 | library(mgcv) 100 | model1 <- list( 101 | quadratic = lm(mpg ~ cyl + hp + wt + I(wt^2), data = mtcars), 102 | additive = mgcv::gam(mpg ~ cyl + hp + s(wt), data = mtcars)) 103 | 104 | conditionvars1 <- list(c("cyl", "hp")) 105 | 106 | ceplot(data = mtcars, model = model1, response = "mpg", sectionvars = "wt", 107 | conditionvars = conditionvars1, threshold = 0.3, conf = T) 108 | 109 | ## Example 2: Binary classification, xs one categorical predictor 110 | 111 | mtcars$cyl <- as.factor(mtcars$cyl) 112 | mtcars$am <- as.factor(mtcars$am) 113 | 114 | library(e1071) 115 | model2 <- list( 116 | svm = svm(am ~ mpg + wt + cyl, data = mtcars, family = "binomial"), 117 | glm = glm(am ~ mpg + wt + cyl, data = mtcars, family = "binomial")) 118 | 119 | ceplot(data = mtcars, model = model2, sectionvars = "wt", threshold = 1, 120 | type = "shiny") 121 | 122 | ## Example 3: Multivariate regression, xs both continuous 123 | 124 | mtcars$cyl <- as.factor(mtcars$cyl) 125 | mtcars$gear <- as.factor(mtcars$gear) 126 | 127 | library(e1071) 128 | model3 <- list(svm(mpg ~ wt + qsec + cyl + hp + gear, 129 | data = mtcars, family = "binomial")) 130 | 131 | conditionvars3 <- list(c("cyl","gear"), "hp") 132 | 133 | ceplot(data = mtcars, model = model3, sectionvars = c("wt", "qsec"), 134 | threshold = 1, conditionvars = conditionvars3) 135 | 136 | ceplot(data = mtcars, model = model3, sectionvars = c("wt", "qsec"), 137 | threshold = 1, type = "separate", view3d = T) 138 | 139 | ## Example 4: Multi-class classification, xs both categorical 140 | 141 | mtcars$cyl <- as.factor(mtcars$cyl) 142 | mtcars$vs <- as.factor(mtcars$vs) 143 | mtcars$am <- as.factor(mtcars$am) 144 | mtcars$gear <- as.factor(mtcars$gear) 145 | mtcars$carb <- as.factor(mtcars$carb) 146 | 147 | library(e1071) 148 | model4 <- list(svm(carb ~ ., data = mtcars, family = "binomial")) 149 | 150 | ceplot(data = mtcars, model = model4, sectionvars = c("cyl", "gear"), 151 | threshold = 3) 152 | 153 | ## Example 5: Multi-class classification, xs both continuous 154 | 155 | data(wine) 156 | wine$Class <- as.factor(wine$Class) 157 | library(e1071) 158 | 159 | model5 <- list(svm(Class ~ ., data = wine, probability = TRUE)) 160 | 161 | ceplot(data = wine, model = model5, sectionvars = c("Hue", "Flavanoids"), 162 | threshold = 3, probs = TRUE) 163 | 164 | ceplot(data = wine, model = model5, sectionvars = c("Hue", "Flavanoids"), 165 | threshold = 3, type = "separate") 166 | 167 | ceplot(data = wine, model = model5, sectionvars = c("Hue", "Flavanoids"), 168 | threshold = 3, type = "separate", selectortype = "pcp") 169 | 170 | ## Example 6: Multi-class classification, xs with one categorical predictor, 171 | ## and one continuous predictor. 172 | 173 | mtcars$cyl <- as.factor(mtcars$cyl) 174 | mtcars$carb <- as.factor(mtcars$carb) 175 | 176 | library(e1071) 177 | model6 <- list(svm(cyl ~ carb + wt + hp, data = mtcars, family = "binomial")) 178 | 179 | ceplot(data = mtcars, model = model6, threshold = 1, sectionvars = c("carb", 180 | "wt"), conditionvars = "hp") 181 | } 182 | 183 | } 184 | \references{ 185 | O'Connell M, Hurley CB and Domijan K (2017). ``Conditional 186 | Visualization for Statistical Models: An Introduction to the 187 | \strong{condvis} Package in R.''\emph{Journal of Statistical Software}, 188 | \strong{81}(5), pp. 1-20. . 189 | } 190 | \seealso{ 191 | \code{\link{condtour}}, \code{\link{similarityweight}} 192 | } 193 | 194 | -------------------------------------------------------------------------------- /R/similarityweight.R: -------------------------------------------------------------------------------- 1 | #' @title Calculate the similarity weight for a set of observations 2 | #' 3 | #' @description Calculate the similarity weight for a set of observations, based 4 | #' on their distance from some arbitary points in data space. Observations which 5 | #' are very similar to the point under consideration are given weight 1, while 6 | #' observations which are dissimilar to the point are given weight zero. 7 | #' 8 | #' @param x A dataframe describing arbitrary points in the space of the data 9 | #' (i.e., with same \code{colnames} as \code{data}). 10 | #' @param data A dataframe representing observed data. 11 | #' @param threshold Threshold distance outside which observations will 12 | #' be assigned similarity weight zero. This is numeric and should be > 0. 13 | #' Defaults to 1. 14 | #' @param distance The type of distance measure to be used, currently just two 15 | #' types of Minkowski distance: \code{"euclidean"} (default), and 16 | #' \code{"maxnorm"}. 17 | #' @param lambda A constant to multiply by the number of categorical 18 | #' mismatches, before adding to the Minkowski distance, to give a general 19 | #' dissimilarity measure. If left \code{NULL}, behaves as though \code{lambda} 20 | #' is set larger than \code{threshold}, meaning that one factor mismatch 21 | #' guarantees zero weight. 22 | #' 23 | #' @return A numeric vector or matrix, with values from 0 to 1. The similarity 24 | #' weights for the observations in \code{data} arranged in rows for each row 25 | #' in \code{x}. 26 | #' 27 | #' @details Similarity weight is assigned to observations based on their 28 | #' distance from a given point. The distance is calculated as Minkowski 29 | #' distance between the numeric elements for the observations whose 30 | #' categorical elements match, with the option to use a more general 31 | #' dissimilarity measure comprising Minkowski distance and a mismatch count. 32 | #' 33 | #' @examples 34 | #' ## Say we want to find observations similar to the first observation. 35 | #' ## The first observation is identical to itself, so it gets weight 1. The 36 | #' ## second observation is similar, so it gets some weight. The rest are more 37 | #' ## different, and so get zero weight. 38 | #' 39 | #' data(mtcars) 40 | #' similarityweight(x = mtcars[1, ], data = mtcars) 41 | #' 42 | #' ## By increasing the threshold, we can find observations which are more 43 | #' ## approximately similar to the first row. Note that the second observation 44 | #' ## now has weight 1, so we lose some ability to discern how similar 45 | #' ## observations are by increasing the threshold. 46 | #' 47 | #' similarityweight(x = mtcars[1, ], data = mtcars, threshold = 5) 48 | #' 49 | #' ## Can provide a number of points to 'x'. Here we see that the Mazda RX4 Wag 50 | #' ## is more similar to the Merc 280 than the Mazda RX4 is. 51 | #' 52 | #' similarityweight(mtcars[1:2, ], mtcars, threshold = 3) 53 | #' 54 | #' @seealso \code{\link{dist1}} 55 | #' 56 | #' @references O'Connell M, Hurley CB and Domijan K (2017). ``Conditional 57 | #' Visualization for Statistical Models: An Introduction to the 58 | #' \strong{condvis} Package in R.''\emph{Journal of Statistical Software}, 59 | #' \strong{81}(5), pp. 1-20. . 60 | 61 | similarityweight <- 62 | function (x, data, threshold = NULL, distance = NULL, lambda = NULL) 63 | { 64 | if (!is.null(threshold) && threshold < 0) 65 | stop("cannot have negative dissimilarity 'threshold'") 66 | 67 | ## Initialise the internal function 68 | 69 | vwfun <- .similarityweight(xc = data) 70 | 71 | ## Make empty matrix for weights 72 | 73 | k <- matrix(nrow = nrow(x), ncol = nrow(data), dimnames = list(rownames( 74 | x), rownames(data))) 75 | 76 | ## Loop through rows of 'x' 77 | 78 | for (i in 1:nrow(x)){ 79 | k[i, ] <- do.call(vwfun, list(xc.cond = x[i, , drop = FALSE], sigma = 80 | threshold, distance = distance, lambda = lambda))$k 81 | } 82 | 83 | ## Return the matrix of weights, dropping to vector if possible 84 | 85 | k[, , drop = TRUE] 86 | } 87 | 88 | ## Internal function which does some preprocessing (particularly scaling) and 89 | ## returns a function which calculates similarity weight for a single row of a 90 | ## dataframe. 91 | 92 | .similarityweight <- 93 | function (xc) 94 | { 95 | ## Scale the dataframe and calculate a few things for later use. 96 | 97 | nrow.xc <- nrow(xc) 98 | if (nrow.xc < 2) 99 | stop("cannot apply scale to data.frame with less than 2 rows") 100 | colnames.xc <- colnames(xc) 101 | arefactors <- vapply(xc, is.factor, logical(1L)) 102 | zerovar <- vapply(xc, function (x) all(duplicated(x)[-1L]) , logical(1L)) 103 | factorindex <- arefactors & !zerovar 104 | numindex <- !arefactors & !zerovar 105 | xc.factors <- data.matrix(xc[, factorindex, drop = FALSE]) 106 | xc.num <- data.matrix(xc[, numindex, drop = FALSE]) 107 | x.scaled <- scale(xc.num) 108 | k <- rep(0, nrow.xc) 109 | 110 | ## Return a function which will calculate the weights for a single arbitrary 111 | ## point in the data space. 112 | 113 | function (xc.cond, sigma = NULL, distance = c("euclidean", "maxnorm"), 114 | lambda = NULL) 115 | { 116 | ## Set up values 117 | 118 | sigma <- if (is.null(sigma)) 119 | 1 120 | else sigma 121 | distance <- match.arg(distance) 122 | p <- if (identical(distance, "maxnorm")) 1 else 2 123 | 124 | ## If 'sigma' is Inf, return 1s for all observations 125 | 126 | if (identical(sigma, Inf)) 127 | return(list(k = rep(1, nrow.xc), sigma = sigma, distance = distance)) 128 | 129 | ## Get the arbitary point in order. 130 | 131 | xc.cond <- xc.cond[, colnames.xc, drop = FALSE] 132 | xc.cond.factors <- data.matrix(xc.cond[, factorindex, drop = FALSE]) 133 | xc.cond.num <- data.matrix(xc.cond[, numindex, drop = FALSE]) 134 | 135 | ## 'factormatches' is the index of observations on which we will calculate 136 | ## the Minkowski distance. Basically pre-filtering for speed. 137 | ## 138 | ## If 'lambda' is NULL, require all factors to be equal to even bother 139 | ## calculating Minkowski distance. 140 | ## 141 | ## If 'lambda' is supplied, only want observations with less than 142 | ## (sigma / lambda) mismatches in the factors. 143 | ## 144 | ## If there are no factors, want all rows. 145 | 146 | factormatches <- if (any(factorindex)){ 147 | if (is.null(lambda)){ 148 | which((nfactormatches <- rowSums(xc.factors == matrix(xc.cond.factors, 149 | ncol = length(xc.cond.factors), nrow = nrow.xc, byrow = TRUE))) == 150 | length(xc.cond.factors)) 151 | } else { 152 | which(length(xc.cond.factors) - (nfactormatches <- rowSums(xc.factors == 153 | matrix(xc.cond.factors, ncol = length(xc.cond.factors), nrow = nrow.xc 154 | , byrow = TRUE))) <= (sigma / lambda)) 155 | } 156 | } else {rep(TRUE, nrow.xc)} 157 | 158 | ## If any observations make it past the above filtering, calculate the 159 | ## dissimilarity 'd' as Minkowski distance plus 'lambda' times number of 160 | ## factor mismatches if 'lambda' is supplied. 161 | ## 162 | ## Convert the dissimilarity to similarity weights 'k', between 0 and 1. 163 | 164 | if ((lfm <- length(factormatches)) > 0){ 165 | if (all(factorindex)){ 166 | if (is.null(lambda)){ 167 | d <- rep(0, lfm) 168 | } else { 169 | d <- lambda * (sum(factorindex) - nfactormatches[factormatches]) ^ p 170 | } 171 | } else { 172 | xcond.scaled <- (xc.cond.num - attr(x.scaled, "scaled:center")) / attr( 173 | x.scaled, "scaled:scale") 174 | d <- dist1(xcond.scaled, x.scaled[factormatches, ], inf = identical( 175 | distance, "maxnorm")) + if (any(factorindex) && !is.null(lambda)) 176 | (lambda * (sum(factorindex) - nfactormatches[factormatches])) ^ p 177 | else 0 178 | } 179 | k[factormatches] <- pmax(0, 1 - (d ^ (1 / p)) / (sigma)) 180 | } 181 | list(k = k, sigma = sigma, distance = distance) 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /R/plotxsres.R: -------------------------------------------------------------------------------- 1 | ## This is a mangled version of plotxs, being used to sandbox some ideas about 2 | ## plotting residual v predictor type plots. Not currently used 2016-06-21. 3 | 4 | plotxsres <- 5 | function (xs, y, xc.cond, model, model.colour = NULL, model.lwd = NULL, 6 | model.lty = NULL, model.name = NULL, yhat = NULL, mar = NULL, col = "black", 7 | weights = NULL, view3d = FALSE, theta3d = 45, phi3d = 20, xs.grid 8 | = NULL, prednew = NULL, conf = FALSE, probs = FALSE, pch = 1) 9 | { 10 | ny <- nrow(y) 11 | col <- rep(col, length.out = ny) 12 | dev.hold() 13 | if (is.null(weights)){ 14 | data.order <- 1:ny 15 | data.colour <- col 16 | } else { 17 | if (!identical(length(weights), ny)) 18 | stop("'weights' should be same length as number of observations") 19 | weightsgr0 <- which(weights > 0) 20 | data.order <- weightsgr0[order(weights[weightsgr0])] 21 | newcol <- (col2rgb(col[data.order]) * matrix(rep(weights[data.order], 22 | 3), nrow = 3, byrow = TRUE) / 255) + matrix(rep(1 - weights[data.order 23 | ], 3), nrow = 3, byrow = TRUE) 24 | data.colour <- rep(NA, ny) 25 | data.colour[data.order] <- rgb(t(newcol)) 26 | } 27 | pch <- rep(pch, length.out = ny) 28 | #if (!(ncol(xs) %in% 1:2)) 29 | # stop("xs must be a dataframe with 1 or 2 columns") 30 | if (ncol(y) != 1) 31 | stop("y must be a dataframe with 1 column") 32 | model <- if (!is.list(model)) 33 | list(model) 34 | else model 35 | model.colour <- if (is.null(model.colour)){ 36 | if (requireNamespace("RColorBrewer", quietly = TRUE)) 37 | RColorBrewer::brewer.pal(n = max(length(model), 3L), name = "Dark2") 38 | else rainbow(max(length(model), 4L)) 39 | } else rep(model.colour, length.out = length(model)) 40 | model.lwd <- if (is.null(model.lwd)) 41 | rep(2, length(model)) 42 | else rep(model.lwd, length.out = length(model)) 43 | model.lty <- if (is.null(model.lty)) 44 | rep(1, length(model)) 45 | else rep(model.lty, length.out = length(model)) 46 | model.name <- if(!is.null(names(model))) 47 | names(model) 48 | else seq_along(model) 49 | data.order <- if (is.null(data.order)) 50 | 1:nrow(y) 51 | else data.order 52 | data.colour <- if (is.null(data.colour)) 53 | rep("gray", length(data.order)) 54 | else data.colour 55 | pch <- if (is.null(pch)) 56 | rep(1, length(data.order)) 57 | else rep(pch, length(data.order)) 58 | yhat <- if (is.null(yhat)) 59 | lapply(model, predict1, ylevels = NULL) 60 | else yhat 61 | color <- ybg <- NULL 62 | par(mar = c(5, 4, 3, 2)) 63 | residuals <- lapply(yhat, function(x) y[, 1L] - x) 64 | resrange <- range(unlist(lapply(residuals, range))) 65 | if (is.null(xs)){ 66 | o <- hist(y[data.order, 1L], plot = FALSE) 67 | a1 <- hist(y[, 1L], plot = FALSE) 68 | } else { 69 | if (identical(ncol(xs), 1L)){ 70 | # xs has one column 71 | if (is.factor(xs[, 1L])){ 72 | # xs is a factor 73 | if (is.factor(y[, 1L])){ 74 | # y is factor 75 | stop("residuals only defined for continuous response") 76 | } else { 77 | # y is continuous 78 | plot.type <- "cf" 79 | plot(unique(xs[, 1L]), rep(-888, length(levels(xs[, 1L]))), col = NULL 80 | , main = "Conditional residualstation", xlab = colnames(xs)[1L], ylab = 81 | colnames(y)[1L], ylim = resrange) 82 | abline(h = 0, lty = 3) 83 | if (length(data.order) > 0){ 84 | for (i in 1){ 85 | points(xs[data.order, 1L], residuals[[i]][data.order], col = 86 | data.colour[data.order], pch = pch[data.order]) 87 | } 88 | } 89 | #legend("topright", legend = model.name, col = model.colour, lwd = 90 | # model.lwd, lty = model.lty) 91 | } 92 | } else { 93 | #xs is continuous 94 | if (is.factor(y[, 1L])){ 95 | # y is factor 96 | stop("residuals only defined for continuous response") 97 | } else { 98 | # y is continuous 99 | plot.type <- "cc" 100 | plot(range(xs[, 1L]), range(y[, 1L]), col = NULL, main = 101 | "Conditional residuals", xlab = colnames(xs)[1L], ylab = colnames( 102 | y)[1L], ylim = resrange) 103 | abline(h = 0, lty = 3) 104 | if (length(data.order) > 0){ 105 | for (i in 1){ 106 | points(xs[data.order, 1L], residuals[[i]][data.order], col = 107 | data.colour[data.order], pch = pch[data.order]) 108 | } 109 | } 110 | #legend("topright", legend = model.name, col = model.colour, lwd = model.lwd, 111 | # lty = model.lty) 112 | } 113 | } 114 | } else { 115 | # xs has two columns 116 | arefactorsxs <- vapply(xs, is.factor, logical(1L)) 117 | if (all(arefactorsxs)){ 118 | # xs are both factors 119 | xrect <- as.integer(xs.grid[, 1L]) 120 | yrect <- as.integer(xs.grid[, 2L]) 121 | xoffset <- abs(diff(unique(xrect)[1:2])) / 2.1 122 | yoffset <- abs(diff(unique(yrect)[1:2])) / 2.1 123 | plot(xrect, yrect, col = NULL, xlab = colnames(xs)[1L], ylab = colnames( 124 | xs)[2L], xlim = c(min(xrect) - xoffset, max(xrect) + xoffset), xaxt = 125 | "n", bty = "n", ylim = c(min(yrect) - yoffset, max(yrect) + yoffset), 126 | yaxt = "n", main = "Conditional residuals") 127 | rect(xleft = xrect - xoffset, xright = xrect + xoffset, ybottom = yrect 128 | - yoffset, ytop = yrect + yoffset, col = color) 129 | if (length(data.order) > 0) 130 | points(jitter(as.integer(xs[data.order, 1L]), amount = 0.6 * xoffset), 131 | jitter(as.integer(xs[data.order, 2L]), amount = 0.6 * yoffset), bg = 132 | ybg, col = data.colour[data.order], pch = pch[data.order]) 133 | axis(1L, at = unique(xrect), labels = levels(xs[, 1L]), tick = FALSE) 134 | axis(2L, at = unique(yrect), labels = levels(xs[, 2L]), tick = FALSE) 135 | if (is.factor(y[, 1L])){ 136 | # y is factor 137 | plot.type <- "fff" 138 | } else { 139 | # y is continuous 140 | plot.type <- "cff" 141 | } 142 | } else { 143 | if (any(arefactorsxs)){ 144 | # xs is one factor, one continuous 145 | plot.type <- if (is.factor(y[, 1L])) 146 | "ffc" # y is factor 147 | else "cfc" # y is continuous 148 | xrect <- xs.grid[, !arefactorsxs] 149 | yrect <- as.integer(xs.grid[, arefactorsxs]) 150 | xoffset <- abs(diff(unique(xrect)[1:2])) / 2 151 | yoffset <- abs(diff(unique(yrect)[1:2])) / 2.1 152 | plot(0, 0, col = NULL, xlab = colnames(xs)[!arefactorsxs], ylab = 153 | colnames(xs)[arefactorsxs], xlim = c(min(xrect) - xoffset, max(xrect 154 | ) + xoffset), bty = "n", main = "Conditional residuals", ylim = 155 | c(min(yrect) - yoffset, max(yrect) + yoffset), yaxt = "n") 156 | rect(xleft = xrect - xoffset, xright = xrect + xoffset, ybottom = 157 | yrect - yoffset, ytop = yrect + yoffset, col = color, border = NA) 158 | if (length(data.order) > 0) 159 | points(jitter(xs[data.order, !arefactorsxs]), jitter(as.integer(xs[ 160 | data.order, arefactorsxs])), bg = ybg, col = data.colour[ 161 | data.order], pch = pch[data.order]) 162 | axis(2L, at = unique(yrect), labels = levels(xs[, arefactorsxs]), 163 | tick = FALSE) 164 | } else { 165 | # xs are both continuous 166 | if (is.factor(y[, 1L])){ 167 | # y is factor 168 | stop("residuals only defined for continuous response") 169 | } else { 170 | # y is continuous 171 | plot.type <- "ccc" 172 | if (view3d){ 173 | x.persp <- unique(xs.grid[, 1L]) 174 | y.persp <- unique(xs.grid[, 2L]) 175 | z.persp <- matrix(0, ncol = length(x.persp), nrow = length( 176 | y.persp)) 177 | par(mar = c(3, 3, 3, 3)) 178 | persp.object <- suppressWarnings(persp(x = x.persp, y = y.persp, 179 | border = rgb(0.3, 0.3, 0.3), lwd = 0.1, z = z.persp, col = NULL, 180 | zlim = c(-4, 4), xlab = colnames(xs)[1L], ylab = colnames(xs)[2L 181 | ], zlab = colnames(y)[1L], d = 10, ticktype = "detailed", main = 182 | "Residuals", theta = theta3d, phi = phi3d)) 183 | if (length(data.order) > 0){ 184 | points(trans3d(xs[data.order, 1L], xs[data.order, 2L], y[ 185 | data.order, 1L], pmat = persp.object), col = data.colour[ 186 | data.order], pch = pch[data.order]) 187 | linestarts <- trans3d(xs[data.order, 1L], xs[data.order, 2L], 188 | y[data.order, 1L], pmat = persp.object) 189 | lineends <- trans3d(xs[data.order, 1L], xs[data.order, 2L], 190 | yhat[[1]][data.order], pmat = persp.object) 191 | segments(x0 = linestarts$x, y0 = linestarts$y, x1 = lineends$x 192 | , y1 = lineends$y, col = data.colour[data.order]) 193 | } 194 | } else { 195 | xoffset <- abs(diff(unique(xs.grid[, 1L])[1:2])) / 2 196 | yoffset <- abs(diff(unique(xs.grid[, 2L])[1:2])) / 2 197 | plot(range(xs.grid[, 1L]), range(xs.grid[, 2L]), col = NULL, 198 | xlab = colnames(xs)[1L], ylab = colnames(xs)[2L], main = 199 | "Conditional expectation") 200 | rect(xleft = xs.grid[, 1L] - xoffset, xright = xs.grid[, 1L] + 201 | xoffset, ybottom = xs.grid[, 2L] - yoffset, ytop = xs.grid[, 202 | 2L] + yoffset, col = color, border = NA) 203 | if (length(data.order) > 0) 204 | points(xs[data.order, , drop = FALSE], bg = ybg, col = 205 | data.colour[data.order], pch = pch[data.order]) 206 | } 207 | } 208 | } 209 | } 210 | } 211 | } 212 | dev.flush() 213 | structure(list(xs = xs, y = y, xc.cond = xc.cond, model = model, 214 | model.colour = model.colour, model.lwd = model.lwd, model.lty = model.lty, 215 | model.name = model.name, yhat = yhat, mar = par("mar"), data.colour = 216 | data.colour, data.order = data.order, view3d = view3d, theta3d = theta3d, 217 | usr = par("usr"), phi3d = phi3d, plot.type = if (exists("plot.type")) 218 | plot.type else NULL, screen = screen(), device = dev.cur(), xs.grid = 219 | xs.grid, prednew = prednew, xs.grid = xs.grid, conf = 220 | conf, probs = probs, pch = pch, residuals = residuals), class = "xsresplot") 221 | } 222 | -------------------------------------------------------------------------------- /R/ceplot.shiny.R: -------------------------------------------------------------------------------- 1 | ceplot.shiny <- 2 | function (data, model, response = NULL, S = NULL, C = NULL, sigma = NULL, lambda 3 | = NULL, distance = "euclidean", cex.axis = NULL, cex.lab = NULL, tck = NULL, 4 | view3d = FALSE, Corder = "default", conf = FALSE, separate = TRUE, 5 | select.colour = "blue", select.cex = 1, select.lwd = 2, select.type = 6 | "minimal", probs = FALSE, col = "black", pch = 1, residuals = FALSE, xc.cond = 7 | NULL, packages = NULL, xsplotpar = NULL, modelpar = NULL, xcplotpar = NULL) 8 | { 9 | ## Check for shiny package, and stop if not installed 10 | 11 | if(!requireNamespace("shiny", quietly = TRUE)) 12 | stop("requires package 'shiny'") 13 | else if (!exists("runApp")) attachNamespace("shiny") 14 | 15 | ## Set up the initial section 16 | 17 | xc.cond <- if (is.null(xc.cond)) 18 | data[1L, !colnames(data) %in% c(S, response), drop = FALSE] 19 | else xc.cond 20 | #data.frame(lapply(data[, !colnames(data) %in% c(S, response)], mode1)) 21 | 22 | ## Set some variables and the similarityweight function 23 | 24 | uniqC <- unique(unlist(C)) 25 | xcplots <- list() 26 | plotlegend <- length(S) == 2 27 | n.selector.cols <- ceiling(length(C) / 4L) 28 | selector.colwidth <- 2 29 | height <- 8 30 | col <- rep(col, length.out = nrow(data)) 31 | need3d <- identical(length(S), 2L) && all(vapply(data[, S, drop = FALSE], 32 | is.numeric, logical(1L))) && is.numeric(data[, response[1]]) 33 | seqC <- seq_along(C) 34 | lenC <- length(C) 35 | wd <- getwd() 36 | 37 | vwfun <- .similarityweight(xc = data[, uniqC, drop = FALSE]) 38 | 39 | ## These are the packages required to call predict on all models in 'model' 40 | ## If not supplied, all packages attached to the search path at the time of 41 | ## calling ceplot are recorded. 42 | 43 | packages <- if (is.null(packages)) 44 | rev(gsub("package:", "", grep("package:", search(), value = TRUE))) 45 | else packages 46 | 47 | ## Function to create the shiny ui.R file. 'deploy' switch removes certain 48 | ## elements when deploying the application. 49 | 50 | ui <- function (deploy = FALSE) 51 | { 52 | paste0(' 53 | ## This ui.R file was created by condvis:::ceplot.shiny 54 | 55 | library(shiny) 56 | load("app.Rdata") 57 | h <- "170px" 58 | hS <- "350px" 59 | basicPage( 60 | column(4, 61 | if (need3d) { 62 | tabsetPanel( 63 | tabPanel("Contour", 64 | plotOutput("plotS", height = hS, width = hS), 65 | value = 1), 66 | tabPanel("Perspective", 67 | plotOutput("plotS3D", height = hS, width = hS), 68 | value = 2), 69 | id = "tab" 70 | ) 71 | } else plotOutput("plotS", height = hS, width = hS), 72 | conditionalPanel(condition = "input.tab == 2", 73 | numericInput("phi", "Vertical rotation: ", 20, -180, 180), 74 | numericInput("theta", "Horizontal rotation: ", 45, -180, 180)), 75 | sliderInput("threshold", "Distance threshold: ", 0.01, 5, step = 76 | 0.01, value = if (is.null(sigma)) 1 else sigma), 77 | radioButtons("distance", "Distance function type:", c("euclidean", 78 | "maxnorm")), 79 | hr(), 80 | downloadButton("download", "Download snapshot (pdf)")', 81 | if (!deploy) ',\n actionButton("openDeploy", "Deploy app"), 82 | br(), 83 | conditionalPanel(condition = "input.openDeploy % 2 == 1", 84 | radioButtons("deployLocation", "", c("to web via rsconnect", 85 | "to working directory")), 86 | textInput("appName", label = "Application name (valid directory name)"), 87 | actionButton("deployButton", "Deploy app"), 88 | br(), br(), 89 | p("Help configuring", a("rsconnect", href = 90 | "http://shiny.rstudio.com/articles/shinyapps.html", target = "_blank") 91 | ) 92 | )',' 93 | ), 94 | ', if (identical(length(S), 2L)) 'column(1, 95 | plotOutput("legend", height = hS, width = "100px") 96 | ),',' 97 | column(7, 98 | fluidRow(helpText("Condition selectors")), 99 | column(4, 100 | plotOutput("plot1", click = "plot_click1", height = h, width = h), 101 | plotOutput("plot2", click = "plot_click2", height = h, width = h), 102 | plotOutput("plot3", click = "plot_click3", height = h, width = h), 103 | tableOutput("info") 104 | ) 105 | ', if (lenC > 3){',column(4, 106 | plotOutput("plot4", click = "plot_click4", height = h, width = h), 107 | plotOutput("plot5", click = "plot_click5", height = h, width = h), 108 | plotOutput("plot6", click = "plot_click6", height = h, width = h) 109 | )'}, 110 | if (lenC > 6){',column(4, 111 | plotOutput("plot7", click = "plot_click4", height = h, width = h), 112 | plotOutput("plot8", click = "plot_click5", height = h, width = h), 113 | plotOutput("plot9", click = "plot_click6", height = h, width = h) 114 | )'}, ' 115 | ) 116 | ) 117 | ') 118 | } 119 | 120 | ## Function to create the shiny server.R file. 'deploy' switch removes certain 121 | ## elements when deploying the application. 122 | 123 | server <- function (deploy = FALSE){ 124 | paste( 125 | ' ## This server.R file was created by condvis:::ceplot.shiny 126 | 127 | library(condvis) 128 | library(shiny) 129 | 130 | ## Include the packages required for the application. If these have not been 131 | ## specified, the package list will be inferred from the search path at the 132 | ## time ceplot was called. 133 | \n ', 134 | paste(paste0("library(", packages, ")"), collapse = "\n ") 135 | ,' 136 | 137 | ## Shiny server 138 | 139 | shinyServer(function (input, output) 140 | { 141 | ## Load the objects that were in the environment of the ceplot call. 142 | 143 | #load("app.Rdata") 144 | 145 | ## Reactive value for the current condition/section 146 | 147 | rv <- reactiveValues(xc.cond = xc.cond) 148 | 149 | ## Event listeners for mouseclicks on plots 150 | ', 151 | paste(' 152 | observeEvent({input$plot_click', seqC,'}, { 153 | rv$xc.cond[, xcplots[[', seqC,']]$name] <<- condvis:::update.xcplot( 154 | xcplots[[', seqC,']], xclick = input$plot_click', seqC,'$x, yclick = 155 | input$plot_click', seqC,'$y, user = TRUE, draw = FALSE)$xc.cond.old 156 | }) 157 | ', sep = '', collapse = '\n'), 158 | ' 159 | ## Do condition selector plots. 160 | ', 161 | paste(" 162 | output$plot", seqC, " <- renderPlot({ 163 | i <- ", seqC, " 164 | xcplots[[i]] <<- plotxc(xc = data[, C[[i]]], xc.cond = rv$xc.cond[1L, 165 | C[[i]]], name = colnames(data[, C[[i]], drop = FALSE]), trim = 166 | xcplotpar$trim, select.colour = select.colour, select.cex = select.cex, 167 | hist2d = xcplotpar$hist2d, fullbin = xcplotpar$fullbin) 168 | })" 169 | , sep = "", collapse = "\n"), ' 170 | 171 | ## Next do the section visualisation. 172 | 173 | vw <- NULL 174 | output$plotS <- renderPlot({ 175 | vw <<- vwfun(xc.cond = rv$xc.cond, sigma = input$threshold, distance = 176 | input$distance, lambda = lambda) 177 | xsplot <<- condvis:::plotxs(xs = data[, S, drop = FALSE], data[, response 178 | , drop = FALSE], xc.cond = rv$xc.cond, model = model, col = col, weights 179 | = vw$k, view3d = FALSE, conf = conf, probs = probs, pch = pch, 180 | model.colour = modelpar$col, model.lwd = modelpar$lwd, model.lty = 181 | modelpar$lty, main = xsplotpar$main, xlim = xsplotpar$xlim, ylim = 182 | xsplotpar$ylim) 183 | }) 184 | 185 | ## Section visualisation for 3-D perspective mesh. 186 | 187 | output$plotS3D <- renderPlot({ 188 | vw <<- vwfun(xc.cond = rv$xc.cond, sigma = input$threshold, distance = 189 | input$distance, lambda = lambda) 190 | xsplot <<- condvis:::plotxs(xs = data[, S, drop = FALSE], data[, response 191 | , drop = FALSE], xc.cond = rv$xc.cond, model = model, col = col, 192 | weights = vw$k, view3d = TRUE, conf = conf, probs = probs, pch = pch, 193 | model.colour = modelpar$col, model.lwd = modelpar$lwd, model.lty = 194 | modelpar$lty, main = xsplotpar$main, xlim = xsplotpar$xlim, ylim = 195 | xsplotpar$ylim) 196 | }) 197 | 198 | ## Legend for section 199 | 200 | output$legend <- renderPlot({ 201 | condvis:::xslegend(y = data[, response[1]], name = response[1]) 202 | }) 203 | 204 | ## Give a basic table showing the section/condition values 205 | 206 | output$info <- renderTable({ 207 | structure(rv$xc.cond, row.names = "section") 208 | }) 209 | 210 | ## Allow the user to download a snapshot of the current visualisation 211 | 212 | output$download <- downloadHandler(filename = function() { paste0( 213 | "condvis-download-", condvis:::timestamp1(), ".pdf")}, { 214 | function(file){ 215 | n.selector.cols <- ceiling(length(C) / 4L) 216 | select.colwidth <- max(min(0.18 * n.selector.cols, 0.45), 0.2) 217 | width <- 8.5 + 2 * n.selector.cols 218 | pdf(file = file, width = width, height = 8) 219 | condvis:::ceplot.static(data = data, model = model, response = response, 220 | S = S, C = C, cex.axis = cex.axis, cex.lab = cex.lab, tck = tck, 221 | xc.cond = rv$xc.cond, weights = vw$k, col = col, select.colour = 222 | select.colour, select.cex = select.cex, conf = conf, probs = probs) 223 | dev.off() 224 | } 225 | })', 226 | if (!deploy){' 227 | 228 | ## Code after here relates to deploying the current application, and will 229 | ## not be present in a deployed application. 230 | 231 | observeEvent(input$deployButton, { 232 | folderpath <- if (input$deployLocation == "to working directory") 233 | wd 234 | else tempdir() 235 | appname <- if (input$appName == "") 236 | "condvis-shinyapp" 237 | else input$appName 238 | deploy.path <- paste0(folderpath, "/", appname) 239 | dir.create(deploy.path, showWarnings = FALSE) 240 | write(ui(deploy = TRUE), file = paste0(deploy.path, "/ui.R")) 241 | write(server(deploy = TRUE), file = paste0(deploy.path, "/server.R")) 242 | file.copy(from = paste0(app.path, "/app.Rdata"), to = paste0(deploy.path, 243 | "/app.Rdata"), overwrite = TRUE) 244 | file.copy(from = paste0(app.path, "/global.R"), to = paste0(deploy.path, 245 | "/global.R"), overwrite = TRUE) 246 | if (input$deployLocation == "to web via rsconnect"){ 247 | if (!requireNamespace("rsconnect", quietly = TRUE)) 248 | stop("requires package \'rsconnect\'") 249 | else if (!exists("deployApp")) attachNamespace("rsconnect") 250 | rsconnect::deployApp(deploy.path) 251 | } 252 | })'}, ' 253 | }) 254 | ') 255 | } 256 | 257 | ## Create a temporary directory to store the application files (including a 258 | ## snapshot of the objects in this environment in "app.Rdata") and run the 259 | ## application 260 | 261 | app.path <- paste0(tempdir(), "/condvis-shinyapp-temp") 262 | dir.create(app.path, showWarnings = FALSE) 263 | write(ui(), file = paste0(app.path, "/ui.R")) 264 | write(server(), file = paste0(app.path, "/server.R")) 265 | write("load(\"app.Rdata\", envir=.GlobalEnv)\n", file = paste0(app.path, 266 | "/global.R")) 267 | save(list = union(ls(), ls(.GlobalEnv)), file = paste0(app.path, "/app.Rdata") 268 | ) 269 | shiny::runApp(appDir = app.path) 270 | } 271 | -------------------------------------------------------------------------------- /R/copied.R: -------------------------------------------------------------------------------- 1 | ## This is code that has been copied and modified. NOT EXPORTED. 2 | 3 | ## This code is copied from graphics::barplot.default and edited to return some 4 | ## info to allow subsequent editing of the plot. Originall written by R Core 5 | ## Team. Copied and modified by Mark O'Connell, December 2015. 6 | 7 | barplot2 <- function (height, width = 1, space = NULL, names.arg = NULL, 8 | legend.text = NULL, beside = FALSE, horiz = FALSE, density = NULL, 9 | angle = 45, col = NULL, border = par("fg"), main = NULL, 10 | sub = NULL, xlab = NULL, ylab = NULL, xlim = NULL, ylim = NULL, 11 | xpd = TRUE, log = "", axes = TRUE, axisnames = TRUE, cex.axis = 12 | par("cex.axis"), cex.names = par("cex.axis"), inside = TRUE, plot = TRUE, 13 | axis.lty = 0, offset = 0, add = FALSE, args.legend = NULL, 14 | ...) 15 | { 16 | if (!missing(inside)) 17 | .NotYetUsed("inside", error = FALSE) 18 | if (is.null(space)) 19 | space <- if (is.matrix(height) && beside) 20 | c(0, 1) 21 | else 0.2 22 | space <- space * mean(width) 23 | if (plot && axisnames && is.null(names.arg)) 24 | names.arg <- if (is.matrix(height)) 25 | colnames(height) 26 | else names(height) 27 | if (is.vector(height) || (is.array(height) && (length(dim(height)) == 28 | 1))) { 29 | height <- cbind(height) 30 | beside <- TRUE 31 | if (is.null(col)) 32 | col <- "grey" 33 | } 34 | else if (is.matrix(height)) { 35 | if (is.null(col)) 36 | col <- gray.colors(nrow(height)) 37 | } 38 | else stop("'height' must be a vector or a matrix") 39 | if (is.logical(legend.text)) 40 | legend.text <- if (legend.text && is.matrix(height)) 41 | rownames(height) 42 | stopifnot(is.character(log)) 43 | logx <- logy <- FALSE 44 | if (log != "") { 45 | logx <- length(grep("x", log)) > 0L 46 | logy <- length(grep("y", log)) > 0L 47 | } 48 | if ((logx || logy) && !is.null(density)) 49 | stop("Cannot use shading lines in bars when log scale is used") 50 | NR <- nrow(height) 51 | NC <- ncol(height) 52 | if (beside) { 53 | if (length(space) == 2) 54 | space <- rep.int(c(space[2L], rep.int(space[1L], 55 | NR - 1)), NC) 56 | width <- rep_len(width, NR) 57 | } 58 | else { 59 | width <- rep_len(width, NC) 60 | } 61 | offset <- rep_len(as.vector(offset), length(width)) 62 | delta <- width/2 63 | w.r <- cumsum(space + width) 64 | w.m <- w.r - delta 65 | w.l <- w.m - delta 66 | log.dat <- (logx && horiz) || (logy && !horiz) 67 | if (log.dat) { 68 | if (min(height + offset, na.rm = TRUE) <= 0) 69 | stop("log scale error: at least one 'height + offset' value <= 0") 70 | if (logx && !is.null(xlim) && min(xlim) <= 0) 71 | stop("log scale error: 'xlim' <= 0") 72 | if (logy && !is.null(ylim) && min(ylim) <= 0) 73 | stop("log scale error: 'ylim' <= 0") 74 | rectbase <- if (logy && !horiz && !is.null(ylim)) 75 | ylim[1L] 76 | else if (logx && horiz && !is.null(xlim)) 77 | xlim[1L] 78 | else 0.9 * min(height, na.rm = TRUE) 79 | } 80 | else rectbase <- 0 81 | if (!beside) 82 | height <- rbind(rectbase, apply(height, 2L, cumsum)) 83 | rAdj <- offset + (if (log.dat) 84 | 0.9 * height 85 | else -0.01 * height) 86 | delta <- width/2 87 | w.r <- cumsum(space + width) 88 | w.m <- w.r - delta 89 | w.l <- w.m - delta 90 | if (horiz) { 91 | if (is.null(xlim)) 92 | xlim <- range(rAdj, height + offset, na.rm = TRUE) 93 | if (is.null(ylim)) 94 | ylim <- c(min(w.l), max(w.r)) 95 | } 96 | else { 97 | if (is.null(xlim)) 98 | xlim <- c(min(w.l), max(w.r)) 99 | if (is.null(ylim)) 100 | ylim <- range(rAdj, height + offset, na.rm = TRUE) 101 | } 102 | if (beside) 103 | w.m <- matrix(w.m, ncol = NC) 104 | if (plot) { 105 | dev.hold() 106 | opar <- if (horiz) 107 | par(xaxs = "i", xpd = xpd) 108 | else par(yaxs = "i", xpd = xpd) 109 | on.exit({ 110 | dev.flush() 111 | par(opar) 112 | }) 113 | if (!add) { 114 | plot.new() 115 | plot.window(xlim, ylim, log = log, ...) 116 | } 117 | xyrect <- function(x1, y1, x2, y2, horizontal = TRUE, 118 | ...) { 119 | if (horizontal) 120 | rect(x1, y1, x2, y2, ...) 121 | else rect(y1, x1, y2, x2, ...) 122 | } 123 | if (beside) 124 | xyrect(rectbase + offset, w.l, c(height) + offset, 125 | w.r, horizontal = horiz, angle = angle, density = density, 126 | col = col, border = border) 127 | else { 128 | for (i in 1L:NC) { 129 | xyrect(height[1L:NR, i] + offset[i], w.l[i], 130 | height[-1, i] + offset[i], w.r[i], horizontal = horiz, 131 | angle = angle, density = density, col = col, 132 | border = border) 133 | } 134 | } 135 | if (axisnames && !is.null(names.arg)) { 136 | at.l <- if (length(names.arg) != length(w.m)) { 137 | if (length(names.arg) == NC) 138 | colMeans(w.m) 139 | else stop("incorrect number of names") 140 | } 141 | else w.m 142 | axis(if (horiz) 143 | 2 144 | else 1, at = at.l, labels = names.arg, lty = axis.lty, 145 | cex.axis = cex.names, ...) 146 | } 147 | if (!is.null(legend.text)) { 148 | legend.col <- rep_len(col, length(legend.text)) 149 | if ((horiz & beside) || (!horiz & !beside)) { 150 | legend.text <- rev(legend.text) 151 | legend.col <- rev(legend.col) 152 | density <- rev(density) 153 | angle <- rev(angle) 154 | } 155 | xy <- par("usr") 156 | if (is.null(args.legend)) { 157 | legend(xy[2L] - xinch(0.1), xy[4L] - yinch(0.1), 158 | legend = legend.text, angle = angle, density = density, 159 | fill = legend.col, xjust = 1, yjust = 1) 160 | } 161 | else { 162 | args.legend1 <- list(x = xy[2L] - xinch(0.1), 163 | y = xy[4L] - yinch(0.1), legend = legend.text, 164 | angle = angle, density = density, fill = legend.col, 165 | xjust = 1, yjust = 1) 166 | args.legend1[names(args.legend)] <- args.legend 167 | do.call("legend", args.legend1) 168 | } 169 | } 170 | title(main = main, sub = sub, xlab = xlab, ylab = ylab, 171 | ...) 172 | if (axes) 173 | axis(if (horiz) 174 | 1 175 | else 2, cex.axis = cex.axis, ...) 176 | invisible(list(w.m = w.m, w.l = w.l, w.r = w.r, height = height)) 177 | } 178 | else list(w.m = w.m, w.l = w.l, w.r = w.r, height = height) 179 | } 180 | 181 | ## This code was copied from graphics:::spineplot.default and modified to return 182 | ## info for subsequent editing. Originally written by Achim Zeileis. Copied and 183 | ## modified by Mark O'Connell, December 2015. 184 | 185 | spineplot2 <- 186 | function (x, y = NULL, breaks = NULL, tol.ylab = 0.05, 187 | off = NULL, ylevels = NULL, col = NULL, main = "", xlab = NULL, 188 | ylab = NULL, xaxlabels = NULL, yaxlabels = NULL, xlim = NULL, 189 | ylim = c(0, 1), axes = TRUE, ...) 190 | { 191 | if (missing(y)) { 192 | if (length(dim(x)) != 2L) 193 | stop("a 2-way table has to be specified") 194 | tab <- x 195 | x.categorical <- TRUE 196 | if (is.null(xlab)) 197 | xlab <- names(dimnames(tab))[1L] 198 | if (is.null(ylab)) 199 | ylab <- names(dimnames(tab))[2L] 200 | xnam <- dimnames(tab)[[1L]] 201 | ynam <- dimnames(tab)[[2L]] 202 | ny <- NCOL(tab) 203 | nx <- NROW(tab) 204 | } 205 | else { 206 | if (!is.factor(y)) 207 | stop("dependent variable should be a factor") 208 | if (!is.null(ylevels)) 209 | y <- factor(y, levels = if (is.numeric(ylevels)) 210 | levels(y)[ylevels] 211 | else ylevels) 212 | x.categorical <- is.factor(x) 213 | if (is.null(xlab)) 214 | xlab <- deparse(substitute(x)) 215 | if (is.null(ylab)) 216 | ylab <- deparse(substitute(y)) 217 | if (x.categorical) { 218 | tab <- table(x, y) 219 | xnam <- levels(x) 220 | nx <- NROW(tab) 221 | } 222 | ynam <- levels(y) 223 | ny <- length(ynam) 224 | } 225 | if (is.null(col)) 226 | col <- gray.colors(ny) 227 | col <- rep_len(col, ny) 228 | off <- if (!x.categorical) 229 | 0 230 | else if (is.null(off)) 231 | 0.02 232 | else off/100 233 | yaxlabels <- if (is.null(yaxlabels)) 234 | ynam 235 | else rep_len(yaxlabels, ny) 236 | if (x.categorical) { 237 | xat <- c(0, cumsum(prop.table(margin.table(tab, 1)) + 238 | off)) 239 | xaxlabels <- if (is.null(xaxlabels)) 240 | xnam 241 | else rep_len(xaxlabels, nx) 242 | } 243 | else { 244 | if (!(xnumeric <- is.numeric(x))) { 245 | xorig <- x 246 | x <- as.numeric(x) 247 | } 248 | if (is.null(breaks)) { 249 | breaks <- list() 250 | } 251 | else { 252 | breaks <- as.numeric(breaks) 253 | } 254 | if (!is.list(breaks)) 255 | breaks <- list(breaks = breaks) 256 | breaks <- c(list(x = x), breaks) 257 | breaks$plot <- FALSE 258 | breaks <- do.call("hist", breaks)$breaks 259 | x1 <- cut(x, breaks = breaks, include.lowest = TRUE) 260 | xat <- c(0, cumsum(prop.table(table(x1)))) 261 | tab <- table(x1, y) 262 | nx <- NROW(tab) 263 | xaxlabels <- if (is.null(xaxlabels)) { 264 | if (xnumeric) 265 | breaks 266 | else c(xorig[1L], xorig[c(diff(as.numeric(x1)) > 267 | 0, TRUE)]) 268 | } 269 | else { 270 | rep_len(xaxlabels, nx + 1L) 271 | } 272 | } 273 | yat <- rbind(0, apply(prop.table(tab, 1), 1L, cumsum)) 274 | yat[is.na(yat)] <- 1 275 | if (is.null(xlim)) 276 | xlim <- c(0, 1 + off * (nx - 1L)) 277 | else if (any(xlim < 0) || any(xlim > 1)) { 278 | warning("x axis is on a cumulative probability scale", 279 | ", 'xlim' must be in [0,1]") 280 | if (min(xlim) > 1 || max(xlim) < 0) 281 | xlim <- c(0, 1) 282 | else xlim <- c(max(min(xlim), 0), min(max(xlim), 1)) 283 | } 284 | if (any(ylim < 0) || any(ylim > 1)) { 285 | warning("y axis is on a cumulative probability scale", 286 | ", 'ylim' must be in [0,1]") 287 | if (min(ylim) > 1 || max(ylim) < 0) 288 | ylim <- c(0, 1) 289 | else ylim <- c(max(min(ylim), 0), min(max(ylim), 1)) 290 | } 291 | dev.hold() 292 | on.exit(dev.flush()) 293 | plot(0, 0, xlim = xlim, ylim = ylim, type = "n", axes = FALSE, 294 | xaxs = "i", yaxs = "i", main = main, xlab = xlab, ylab = ylab) 295 | ybottom <- as.vector(yat[-(ny + 1L), ]) 296 | ytop <- as.vector(yat[-1L, ]) 297 | xleft <- rep(xat[1L:nx], rep(ny, nx)) 298 | xright <- rep(xat[2L:(nx + 1L)] - off, rep(ny, nx)) 299 | col <- rep(col, nx) 300 | rect(xleft, ybottom, xright, ytop, col = col, ...) 301 | if (axes) { 302 | if (x.categorical) 303 | axis(1, at = (xat[1L:nx] + xat[2L:(nx + 1L)] - off)/2, 304 | labels = xaxlabels, tick = FALSE) 305 | else axis(1, at = xat, labels = xaxlabels) 306 | yat <- yat[, 1L] 307 | equidist <- any(diff(yat) < tol.ylab) 308 | yat <- if (equidist) 309 | seq.int(1/(2 * ny), 1 - 1/(2 * ny), by = 1/ny) 310 | else (yat[-1L] + yat[-length(yat)])/2 311 | axis(2, at = yat, labels = yaxlabels, tick = FALSE) 312 | # 313 | # #axis(4) # removing this line to stop the proportion axis 314 | # 315 | } 316 | if (!x.categorical) 317 | box() 318 | names(dimnames(tab)) <- c(xlab, ylab) 319 | # 320 | # #invisible(tab) # removing this line and adding our own return 321 | # 322 | list(ybottom = ybottom, ytop = ytop, xleft = xleft, xright = xright, 323 | xnames = names(xright), ynames = rep(yaxlabels, 324 | times = length(xaxlabels)), xat = xat, yat = yat, nx = nx, off = off) 325 | } 326 | -------------------------------------------------------------------------------- /R/ceplot.R: -------------------------------------------------------------------------------- 1 | #' @title Interactive conditional expectation plot 2 | #' 3 | #' @description Creates an interactive conditional expectation plot, which 4 | #' consists of two main parts. One part is a single plot depicting a section 5 | #' through a fitted model surface, or conditional expectation. The other part 6 | #' shows small data summaries which give the current condition, which can be 7 | #' altered by clicking with the mouse. 8 | #' 9 | #' @param data A dataframe containing the data to plot 10 | #' @param model A model object, or list of model objects 11 | #' @param response Character name of response in \code{data} 12 | #' @param sectionvars Character name of variable(s) from \code{data} on which to 13 | #' take a section, can be of length 1 or 2. 14 | #' @param conditionvars Character names of conditioning variables from 15 | #' \code{data}. These are the predictors which we can set to single values in 16 | #' order to produce a section. Can be a list of vectors of length 1 or 2. Can 17 | #' be a character vector, which is then paired up using 18 | #' \code{\link{arrangeC}}. If \code{NULL}, an attempt will be made to extract 19 | #' all variable names which are not \code{response} or \code{sectionvars} from 20 | #' \code{model}, and these will be arranged using \code{\link{arrangeC}}. 21 | #' @param threshold This is a threshold distance. Points further than 22 | #' \code{threshold} away from the current section will not be visible. Passed 23 | #' to \code{\link{similarityweight}}. 24 | #' @param lambda A constant to multiply by number of factor mismatches in 25 | #' constructing a general dissimilarity measure. If left \code{NULL}, behaves 26 | #' as though \code{lambda} is set greater than \code{threshold}, and so only 27 | #' observations whose factor levels match the current section are visible. 28 | #' Passed to \code{\link{similarityweight}}. 29 | #' @param distance A character vector describing the type of distance measure to 30 | #' use, either \code{"euclidean"} (default) or \code{"maxnorm"}. 31 | #' @param type This specifies the type of interactive plot. \code{"default"} 32 | #' places everything on one device. \code{"separate"} places condition 33 | #' selectors on one device and the section on another. (These two options 34 | #' require XQuartz on OS X). \code{"shiny"} produces a Shiny application. 35 | #' @param view3d Logical; if \code{TRUE} plots a three-dimensional 36 | #' regression surface if possible. 37 | #' @param Corder Character name for method of ordering conditioning variables. 38 | #' See \code{\link{arrangeC}}. 39 | #' @param selectortype Type of condition selector plots to use. Must be 40 | #' \code{"minimal"} if \code{type} is \code{"default"}. If \code{type} is 41 | #' \code{"separate"}, can be \code{"pcp"} (see \code{\link{plotxc.pcp}}) or 42 | #' \code{"full"} (see \code{\link{plotxc.full}}). 43 | #' @param conf Logical; if \code{TRUE} plots confidence bounds (or equivalent) 44 | #' for models which provide this. 45 | #' @param probs Logical; if \code{TRUE}, shows predicted class probabilities 46 | #' instead of just predicted classes. Only available if \code{S} specifies two 47 | #' numeric predictors and the model's predict method provides this. 48 | #' @param col Colour for observed data. 49 | #' @param pch Plot symbols for observed data. 50 | #' @param residuals Logical; if \code{TRUE}, plots a residual versus predictor 51 | #' plot instead of the usual scale of raw response. 52 | #' @param xsplotpar Plotting parameters for section visualisation as a list, 53 | #' passed to \code{\link{plotxs}}. Can specify \code{xlim}, \code{ylim}. 54 | #' @param modelpar Plotting parameters for models as a list, passed to 55 | #' \code{\link{plotxs}}. Not used. 56 | #' @param xcplotpar Plotting parameters for condition selector plots as a list, 57 | #' passed to \code{\link{plotxc}}. Can specify \code{col} for highlighting 58 | #' current section, \code{cex}, and \code{trim} (see \code{\link{plotxc}}). 59 | #' 60 | #' @examples 61 | #' \dontrun{ 62 | #' ## Example 1: Multivariate regression, xs one continuous predictor 63 | #' 64 | #' mtcars$cyl <- as.factor(mtcars$cyl) 65 | #' 66 | #' library(mgcv) 67 | #' model1 <- list( 68 | #' quadratic = lm(mpg ~ cyl + hp + wt + I(wt^2), data = mtcars), 69 | #' additive = mgcv::gam(mpg ~ cyl + hp + s(wt), data = mtcars)) 70 | #' 71 | #' conditionvars1 <- list(c("cyl", "hp")) 72 | #' 73 | #' ceplot(data = mtcars, model = model1, response = "mpg", sectionvars = "wt", 74 | #' conditionvars = conditionvars1, threshold = 0.3, conf = T) 75 | #' 76 | #' ## Example 2: Binary classification, xs one categorical predictor 77 | #' 78 | #' mtcars$cyl <- as.factor(mtcars$cyl) 79 | #' mtcars$am <- as.factor(mtcars$am) 80 | #' 81 | #' library(e1071) 82 | #' model2 <- list( 83 | #' svm = svm(am ~ mpg + wt + cyl, data = mtcars, family = "binomial"), 84 | #' glm = glm(am ~ mpg + wt + cyl, data = mtcars, family = "binomial")) 85 | #' 86 | #' ceplot(data = mtcars, model = model2, sectionvars = "wt", threshold = 1, 87 | #' type = "shiny") 88 | #' 89 | #' ## Example 3: Multivariate regression, xs both continuous 90 | #' 91 | #' mtcars$cyl <- as.factor(mtcars$cyl) 92 | #' mtcars$gear <- as.factor(mtcars$gear) 93 | #' 94 | #' library(e1071) 95 | #' model3 <- list(svm(mpg ~ wt + qsec + cyl + hp + gear, 96 | #' data = mtcars, family = "binomial")) 97 | #' 98 | #' conditionvars3 <- list(c("cyl","gear"), "hp") 99 | #' 100 | #' ceplot(data = mtcars, model = model3, sectionvars = c("wt", "qsec"), 101 | #' threshold = 1, conditionvars = conditionvars3) 102 | #' 103 | #' ceplot(data = mtcars, model = model3, sectionvars = c("wt", "qsec"), 104 | #' threshold = 1, type = "separate", view3d = T) 105 | #' 106 | #' ## Example 4: Multi-class classification, xs both categorical 107 | #' 108 | #' mtcars$cyl <- as.factor(mtcars$cyl) 109 | #' mtcars$vs <- as.factor(mtcars$vs) 110 | #' mtcars$am <- as.factor(mtcars$am) 111 | #' mtcars$gear <- as.factor(mtcars$gear) 112 | #' mtcars$carb <- as.factor(mtcars$carb) 113 | #' 114 | #' library(e1071) 115 | #' model4 <- list(svm(carb ~ ., data = mtcars, family = "binomial")) 116 | #' 117 | #' ceplot(data = mtcars, model = model4, sectionvars = c("cyl", "gear"), 118 | #' threshold = 3) 119 | #' 120 | #' ## Example 5: Multi-class classification, xs both continuous 121 | #' 122 | #' data(wine) 123 | #' wine$Class <- as.factor(wine$Class) 124 | #' library(e1071) 125 | #' 126 | #' model5 <- list(svm(Class ~ ., data = wine, probability = TRUE)) 127 | #' 128 | #' ceplot(data = wine, model = model5, sectionvars = c("Hue", "Flavanoids"), 129 | #' threshold = 3, probs = TRUE) 130 | #' 131 | #' ceplot(data = wine, model = model5, sectionvars = c("Hue", "Flavanoids"), 132 | #' threshold = 3, type = "separate") 133 | #' 134 | #' ceplot(data = wine, model = model5, sectionvars = c("Hue", "Flavanoids"), 135 | #' threshold = 3, type = "separate", selectortype = "pcp") 136 | #' 137 | #' ## Example 6: Multi-class classification, xs with one categorical predictor, 138 | #' ## and one continuous predictor. 139 | #' 140 | #' mtcars$cyl <- as.factor(mtcars$cyl) 141 | #' mtcars$carb <- as.factor(mtcars$carb) 142 | #' 143 | #' library(e1071) 144 | #' model6 <- list(svm(cyl ~ carb + wt + hp, data = mtcars, family = "binomial")) 145 | #' 146 | #' ceplot(data = mtcars, model = model6, threshold = 1, sectionvars = c("carb", 147 | #' "wt"), conditionvars = "hp") 148 | #' } 149 | #' 150 | #' @seealso \code{\link{condtour}}, \code{\link{similarityweight}} 151 | #' 152 | #' @references O'Connell M, Hurley CB and Domijan K (2017). ``Conditional 153 | #' Visualization for Statistical Models: An Introduction to the 154 | #' \strong{condvis} Package in R.''\emph{Journal of Statistical Software}, 155 | #' \strong{81}(5), pp. 1-20. . 156 | 157 | ceplot <- 158 | function (data, model, response = NULL, sectionvars = NULL, conditionvars = NULL 159 | , threshold = NULL, lambda = NULL, distance = c("euclidean", "maxnorm"), type 160 | = c("default", "separate", "shiny"), view3d = FALSE, Corder = "default", 161 | selectortype = "minimal", conf = FALSE, probs = FALSE, col = "black", pch = 162 | NULL, residuals = FALSE, xsplotpar = NULL, modelpar = NULL, xcplotpar = NULL) 163 | { 164 | ## Rename for internal 165 | 166 | S <- sectionvars 167 | C <- conditionvars 168 | sigma <- threshold 169 | 170 | ## Check for optional inputs 171 | 172 | cex.axis <- xcplotpar$cex.axis 173 | cex.lab <- xcplotpar$cex.lab 174 | tck <- xcplotpar$tck 175 | select.colour <- if (is.null(xcplotpar$col)) 176 | "blue" 177 | else xcplotpar$col 178 | select.cex <- if (is.null(xcplotpar$cex)) 179 | 1 180 | else xcplotpar$cex 181 | 182 | ## Prepare variables 183 | 184 | data <- na.omit(data) 185 | model <- if (!inherits(model, "list")) 186 | list(model) 187 | else model 188 | model.name <- if (!is.null(names(model))) 189 | names(model) 190 | else paste("model", 1:length(model), sep = "_") 191 | varnamestry <- try(getvarnames(model[[1]]), silent = TRUE) 192 | response <- if (is.null(response)) 193 | if (class(varnamestry) != "try-error") 194 | varnamestry$response[1L] 195 | else stop("could not extract response from 'model'.") 196 | else if (is.integer(response)){ 197 | colnames(data)[response] 198 | } else response 199 | 200 | type <- match.arg(type) 201 | 202 | ## Need to check here if XQuartz is available when on OS X platform. 203 | ## capabilities() is hanging up my R session when called with no XQuartz 204 | ## installed. The problem seems to be that .Internal(capabilitiesX11()) just 205 | ## keeps waiting for X11 to be found, instead of returning FALSE, when some X11 206 | ## files are installed, but X11.app/XQuartz.app is missing. 207 | ## Resorting to a rough system call of ls on the Utilities directory. 208 | 209 | if (identical(.Platform$OS.type, "unix")){ 210 | if (grepl("^darwin", R.version$os)){ 211 | # if (!capabilities("X11")) 212 | if (!any(c("XQuartz.app", "X11.app") %in% system2(command = "ls", args = 213 | "/Applications/Utilities/", stdout = TRUE))){ 214 | warning("no X11 available, setting 'type' to \"shiny\"") 215 | type <- "shiny" 216 | } 217 | } 218 | } 219 | 220 | ## If no section variables have been specified, just try and pick the first 221 | ## predictor out of the model. 222 | 223 | # S <- if(is.null(S)){ 224 | # if (!inherits(varnamestry, "try-error")){ 225 | # varnamestry$predictors[1L] 226 | # } else { 227 | # setdiff(colnames(data), response)[1L] 228 | # } 229 | # } else if (is.integer(S)){ 230 | # colnames(data)[S] 231 | # } else S 232 | 233 | ## Hierarchy for specifying C 234 | ## 1. If user supplies list, use that exactly (maybe chop length). 235 | ## 2. If user supplies vector, use that but order/group it. 236 | ## 3. If user supplies nothing, try to extract from model, then order. 237 | ## 4. Else bung in everything from the data that isn't 'response' or 'S' and 238 | ## then try to order that and show the top 20. 239 | 240 | if (is.list(C)){ 241 | C <- C[1:min(length(C), 20L)] 242 | } else if (is.vector(C)){ 243 | C <- arrangeC(data[, setdiff(C, S), drop = FALSE], method = Corder) 244 | } else if (!inherits(varnamestry, "try-error")){ 245 | possibleC <- unique(unlist(lapply(lapply(model, getvarnames), `[[`, 2))) 246 | C <- arrangeC(data[, setdiff(intersect(possibleC, colnames(data)), S), drop 247 | = FALSE], method = Corder) 248 | } else { 249 | C <- arrangeC(data[, setdiff(colnames(data), c(S, response)), drop = FALSE], 250 | method = Corder) 251 | } 252 | C <- C[1:min(length(C), 20L)] 253 | 254 | uniqC <- unique(unlist(C)) 255 | 256 | ## A few checks on choices of response, S and C. 257 | 258 | if (any(response %in% uniqC)) 259 | stop("cannot have 'response' variable in 'C'") 260 | if (any(response %in% S)) 261 | stop("cannot have 'response' variable in 'S'") 262 | if (!identical(length(intersect(S, uniqC)), 0L)) 263 | stop("cannot have variables common to both 'S' and 'C'") 264 | 265 | ## Set up col so it is a vector with length equal to nrow(data). Default pch to 266 | ## 1, or 21 for using background colour to represent observed values. 267 | 268 | nr.data <- nrow(data) 269 | col <- rep(col, length.out = nr.data) 270 | pch <- if (is.null(pch)){ 271 | if (identical(length(S), 2L)) 272 | rep(21, nr.data) 273 | else rep(1, nr.data) 274 | } else rep(pch, length.out = nr.data) 275 | 276 | ## Make the appropriate call to an internal ceplot function 277 | 278 | if (identical(type, "default")){ 279 | ceplot.interactive(data = data, model = model, response = response, S = S, 280 | C = C, sigma = sigma, lambda = lambda, distance = distance, cex.axis = 281 | cex.axis, cex.lab = cex.lab, tck = tck, view3d = view3d, Corder = Corder, 282 | conf = conf, separate = FALSE, select.colour = select.colour, select.cex = 283 | select.cex, probs = probs, col = col, pch = pch, residuals = residuals, 284 | xsplotpar = xsplotpar, modelpar = modelpar, xcplotpar = xcplotpar) 285 | } else if (identical(type, "separate")){ 286 | ceplot.interactive(data = data, model = model, response = response, S = S, 287 | C = C, sigma = sigma, lambda = lambda, distance = distance, cex.axis = 288 | cex.axis, cex.lab = cex.lab, tck = tck, view3d = view3d, Corder = Corder, 289 | conf = conf, separate = TRUE, select.colour = select.colour, select.cex = 290 | select.cex, probs = probs, col = col, pch = pch, select.type = 291 | selectortype, residuals = residuals, xsplotpar = xsplotpar, modelpar = 292 | modelpar, xcplotpar = xcplotpar) 293 | } else if (identical(type, "shiny")){ 294 | ceplot.shiny(data = data, model = model, response = response, S = S, 295 | C = C, sigma = sigma, lambda = lambda, distance = distance, cex.axis 296 | = cex.axis, cex.lab = cex.lab, tck = tck, view3d = view3d, Corder = Corder 297 | , conf = conf, separate = FALSE, select.colour = select.colour, select.cex 298 | = select.cex, probs = probs, col = col, pch = pch, residuals = residuals, 299 | xsplotpar = xsplotpar, modelpar = modelpar, xcplotpar = xcplotpar) 300 | } 301 | } 302 | -------------------------------------------------------------------------------- /R/ceplot.interactive.R: -------------------------------------------------------------------------------- 1 | ## This is the default interactive plot for visualising sections which are 2 | ## chosen interactively. NOT EXPORTED. 3 | 4 | ceplot.interactive <- 5 | function (data, model, response = NULL, S = NULL, C = NULL, sigma = NULL, lambda 6 | = NULL, distance = "euclidean", cex.axis = NULL, cex.lab = NULL, tck = NULL, 7 | view3d = FALSE, Corder = "default", conf = FALSE, separate = TRUE, 8 | select.colour = "blue", select.cex = 1, select.lwd = 2, select.type = 9 | "minimal", probs = FALSE, col = "black", pch = 1, residuals = FALSE, xc.cond = 10 | NULL, xsplotpar = NULL, modelpar = NULL, xcplotpar = NULL) 11 | { 12 | uniqC <- unique(unlist(C)) 13 | xc.cond <- if (is.null(xc.cond)) 14 | data[1, setdiff(colnames(data), c(S, response)), drop = FALSE] 15 | else xc.cond 16 | #data.frame(lapply(data[, !colnames(data) %in% c(S, response)], mode1)) 17 | xcplots <- list() 18 | coords <- matrix(ncol = 4L, nrow = length(C)) 19 | plotlegend <- length(S) == 2 20 | n.selector.cols <- ceiling(length(C) / 4L) 21 | selector.colwidth <- 2 22 | height <- 8 23 | col <- rep(col, length.out = nrow(data)) 24 | vwfun <- .similarityweight(xc = data[, uniqC, drop = FALSE]) 25 | if (separate){ 26 | 27 | ## Plot condition selectors on a separate device 28 | 29 | ## Set up section visualisation first 30 | 31 | width <- height + 0.5 * plotlegend 32 | opendev(width = width, height = height) 33 | devexp <- dev.cur() 34 | close.screen(all.screens = TRUE) 35 | legendwidth <- 1.15 / height 36 | xsscreens <- if (plotlegend){ 37 | split.screen(figs = matrix(c(0, 1 - legendwidth, 1 - legendwidth, 1, 0, 0, 38 | 1, 1), ncol = 4)) 39 | } else split.screen() 40 | if (plotlegend){ 41 | screen(xsscreens[2L]) 42 | xslegend(data[, response], response) 43 | } 44 | screen(xsscreens[1L]) 45 | vw <- vwfun(xc.cond = xc.cond, sigma = sigma, distance = distance, lambda = 46 | lambda) 47 | par(mar = c(3, 3, 3, 3)) 48 | 49 | ## Check whether response should be raw or residual 50 | 51 | if (residuals){ 52 | xsplot <- plotxsres(xs = data[, S, drop = FALSE], data[, response, drop = 53 | FALSE], xc.cond = xc.cond, model = model, col = col, weights = vw$k, 54 | view3d = view3d, conf = conf, probs = probs, pch = pch, model.colour = 55 | modelpar$col, model.lwd = modelpar$lwd, model.lty = 56 | modelpar$lty) 57 | } else { 58 | xsplot <- plotxs(xs = data[, S, drop = FALSE], data[, response, drop = 59 | FALSE], xc.cond = xc.cond, model = model, col = col, weights = vw$k, 60 | view3d = view3d, conf = conf, probs = probs, pch = pch, model.colour = 61 | modelpar$col, model.lwd = modelpar$lwd, model.lty = 62 | modelpar$lty, main = xsplotpar$main, xlim = xsplotpar$xlim, ylim = 63 | xsplotpar$ylim) 64 | } 65 | xscoords <- par("fig") 66 | 67 | ## Produce the condition selector plots. Can be either "minimal", meaning 68 | ## bivariate and univariate plots, "pcp" for parallel coordinates or "full" 69 | ## for a full scatterplot matrix. 70 | 71 | if (length(uniqC) > 0){ 72 | if (identical(select.type, "minimal")){ 73 | xcwidth <- selector.colwidth * n.selector.cols 74 | n.selector.rows <- ceiling(length(C) / n.selector.cols) 75 | xcheight <- selector.colwidth * n.selector.rows 76 | opendev(height = xcheight, width = xcwidth) 77 | close.screen(all.screens = TRUE) 78 | xcscreens <- split.screen(c(n.selector.rows, n.selector.cols)) 79 | for (i in seq_along(C)){ 80 | screen(xcscreens[i]) 81 | xcplots[[i]] <- plotxc(xc = data[, C[[i]]], xc.cond = xc.cond[1L, C[[ 82 | i]]], name = colnames(data[, C[[i]], drop = FALSE]), trim = 83 | xcplotpar$trim, select.colour = select.colour, select.cex = 84 | select.cex, hist2d = xcplotpar$hist2d, fullbin = xcplotpar$fullbin) 85 | coords[i, ] <- par("fig") 86 | } 87 | } else if (identical(select.type, "pcp")){ 88 | xcwidth <- 7 89 | xcheight <- 3 90 | opendev(height = xcheight, width = xcwidth) 91 | xcplots <- plotxc.pcp(Xc = data[, uniqC, drop = FALSE], Xc.cond = 92 | xc.cond[1, uniqC, drop = FALSE], select.colour = select.colour, 93 | select.lwd = select.lwd, cex.axis = cex.axis, cex.lab = cex.lab, tck = 94 | tck, select.cex = select.cex) 95 | } else if (identical(select.type, "full")){ 96 | xcwidth <- 7 97 | opendev(height = xcwidth, width = xcwidth) 98 | xcplots <- plotxc.full(Xc = data[, uniqC, drop = FALSE], Xc.cond = 99 | xc.cond[1, uniqC, drop = FALSE], select.colour = select.colour, 100 | select.lwd = select.lwd, cex.axis = cex.axis, cex.lab = cex.lab, tck = 101 | tck, select.cex = select.cex) 102 | } else stop("'select.type' must be one of 'minimal', 'pcp' or 'full'") 103 | devcond <- dev.cur() 104 | } 105 | } else { 106 | 107 | ## Otherwise, put everything on one device. 108 | 109 | ## Do condition selectors first 110 | 111 | width <- height + 0.5 * plotlegend + selector.colwidth * n.selector.cols 112 | opendev(width = width, height = height) 113 | close.screen(all.screens = TRUE) 114 | xcwidth <- selector.colwidth * n.selector.cols / width 115 | mainscreens <- split.screen(figs = matrix(c(0, 1 - xcwidth, 1 - xcwidth, 1, 116 | 0, 0, 1, 1), ncol = 4L)) 117 | xcscreens <- split.screen(c(4L, n.selector.cols), screen = mainscreens[2L]) 118 | if (length(uniqC) > 0){ 119 | for (i in seq_along(C)){ 120 | screen(xcscreens[i]) 121 | xcplots[[i]] <- plotxc(xc = data[, C[[i]]], xc.cond = xc.cond[1L, 122 | C[[i]]], name = colnames(data[, C[[i]], drop = FALSE]), trim = 123 | xcplotpar$trim, select.colour = select.colour, select.cex = 124 | select.cex, hist2d = xcplotpar$hist2d, fullbin = xcplotpar$fullbin) 125 | coords[i, ] <- par("fig") 126 | } 127 | } 128 | 129 | ## Do section visualisation 130 | 131 | legendwidth <- 1.15 / height 132 | xsscreens <- if (plotlegend){ 133 | split.screen(figs = matrix(c(0, 1 - legendwidth, 1 - legendwidth, 1, 0, 0, 1 134 | , 1), ncol = 4), screen = mainscreens[1L]) 135 | } else mainscreens[1L] 136 | if (plotlegend){ 137 | screen(xsscreens[2L]) 138 | xslegend(data[, response], response) 139 | } 140 | screen(xsscreens[1L]) 141 | vw <- vwfun(xc.cond = xc.cond, sigma = sigma, distance = distance, lambda = 142 | lambda) 143 | par(mar = c(3, 3, 3, 3)) 144 | if (residuals){ 145 | xsplot <- plotxsres(xs = data[, S, drop = FALSE], data[, response, drop = 146 | FALSE], xc.cond = xc.cond, model = model, col = col, weights = vw$k, 147 | view3d = view3d, conf = conf, probs = probs, pch = pch, model.colour = 148 | modelpar$col, model.lwd = modelpar$lwd, model.lty = 149 | modelpar$lty) 150 | } else { 151 | xsplot <- plotxs(xs = data[, S, drop = FALSE], data[, response, drop = 152 | FALSE], xc.cond = xc.cond, model = model, col = col, weights = vw$k, 153 | view3d = view3d, conf = conf, probs = probs, pch = pch, model.colour = 154 | modelpar$col, model.lwd = modelpar$lwd, model.lty = 155 | modelpar$lty, main = xsplotpar$main, xlim = xsplotpar$xlim, ylim = 156 | xsplotpar$ylim) 157 | } 158 | xscoords <- par("fig") 159 | xold <- NULL 160 | yold <- NULL 161 | } 162 | 163 | ## Define event handling functions; mouseclick and keystroke. 164 | 165 | mouseclick <- function (separate = FALSE) 166 | { 167 | function (buttons, x, y) 168 | { 169 | if (0 %in% buttons){ 170 | needupdate <- FALSE 171 | if (identical(select.type, "minimal")){ 172 | plotindex <- which(apply(coords, 1, `%inrectangle%`, point = c(x, y))) 173 | if ((length(plotindex) > 0) && (0 %in% buttons)){ 174 | xcplots[[plotindex]] <<- update(xcplots[[plotindex]], x, y) 175 | if (any(xc.cond[, xcplots[[plotindex]]$name] != xcplots[[plotindex] 176 | ]$xc.cond.old)){ 177 | needupdate <- TRUE 178 | xc.cond[, xcplots[[plotindex]]$name] <<- xcplots[[plotindex] 179 | ]$xc.cond.old 180 | } 181 | } 182 | } else if (select.type %in% c("pcp", "full")){ 183 | xcplots <<- update(xcplots, x, y) 184 | if (any(xc.cond[, uniqC] != xcplots$Xc.cond[, uniqC])){ 185 | needupdate <- TRUE 186 | xc.cond[, uniqC] <<- xcplots$Xc.cond 187 | } 188 | } 189 | if (needupdate){ 190 | vw <<- vwfun(xc.cond = xc.cond, sigma = vw$sigma, distance = 191 | vw$distance, lambda = lambda) 192 | xsplot <<- update(xsplot, xc.cond = xc.cond, weights = vw$k) 193 | } 194 | if (all(!separate, findInterval(x, xscoords[1:2]) == 1, identical( 195 | xsplot$plot.type, "ccc"), xsplot$view3d, 0 %in% buttons)){ 196 | if (!is.null(xold)) 197 | xsplot <<- update(xsplot, theta3d = xsplot$theta3d + 1 * (xold > x) 198 | - 1 * (xold < x), phi3d = xsplot$phi3d + 1 * (yold > y) - 1 * ( 199 | yold < y), xs.grid = xsplot$xs.grid, prednew = xsplot$prednew) 200 | xold <<- x 201 | yold <<- y 202 | } 203 | points(NULL) 204 | } 205 | } 206 | } 207 | keystroke <- function () 208 | { 209 | function (key) 210 | { 211 | 212 | ## 'q' key ends the interactive session. 213 | 214 | if (identical(key, "q")){ 215 | cat("\nInteractive session ended.\n") 216 | return(invisible(1)) 217 | } 218 | 219 | ## Direction keys rotate a 3-D perspective plot. 220 | 221 | if (identical(xsplot$plot.type, "ccc") & xsplot$view3d & key %in% c("Up", 222 | "Down", "Left", "Right")){ 223 | xsplot <<- update(xsplot, theta3d = xsplot$theta3d - 2 * (key == "Right" 224 | ) + 2 * (key == "Left"), phi3d = xsplot$phi3d - 2 * (key == "Up") + 2 225 | * (key == "Down"), xs.grid = xsplot$xs.grid, prednew = xsplot$prednew) 226 | } 227 | 228 | ## ',' and '.' decrease and increase the threshold distance used for similarity 229 | ## weight. 230 | 231 | if (key %in% c(",", ".")){ 232 | sigma <- vw$sigma + 0.01 * vw$sigma * (key == ".") - 0.01 * vw$sigma * 233 | (key == ",") 234 | vw <<- vwfun(xc.cond = xc.cond, sigma = sigma, distance = vw$distance, 235 | lambda = lambda) 236 | xsplot <<- update(xsplot, weights = vw$k, xs.grid = xsplot$xs.grid, 237 | newdata = xsplot$newdata, prednew = xsplot$prednew) 238 | } 239 | 240 | ## 's' key saves a pdf snapshot to the working directory. Saved in two files if 241 | ## 'separate' is true. 242 | 243 | if (identical(key, "s")){ 244 | if (separate){ 245 | filename <- paste("snapshot_", gsub(":", ".", gsub(" ", "_", 246 | Sys.time())), c("-expectation.pdf", "-condition.pdf"), sep = "") 247 | dev.set(devexp) 248 | devexpsize <- dev.size() 249 | pdf(file = filename[1L], width = devexpsize[1L], height = 250 | devexpsize[2L]) 251 | close.screen(all.screens = TRUE) 252 | xsscreens <- if (plotlegend){ 253 | split.screen(figs = matrix(c(0, 1 - legendwidth, 1 - legendwidth, 1, 254 | 0, 0, 1, 1), ncol = 4L)) 255 | } else split.screen() 256 | if (plotlegend){ 257 | screen(xsscreens[2L]) 258 | xslegend(data[, response], response) 259 | } 260 | screen(xsscreens[1L]) 261 | if (residuals){ 262 | plotxsres(xs = data[, S, drop = FALSE], data[, response, drop = 263 | FALSE], xc.cond = xc.cond, model = model, col = col, weights = 264 | vw$k, view3d = xsplot$view3d, theta3d = xsplot$theta3d, phi3d = 265 | xsplot$phi3d, conf = conf, probs = probs, pch = pch, model.colour 266 | = modelpar$col, model.lwd = modelpar$lwd, model.lty = modelpar$lty 267 | ) 268 | } else { 269 | plotxs(xs = data[, S, drop = FALSE], data[, response, drop = FALSE], 270 | xc.cond = xc.cond, model = model, col = col, weights = vw$k, 271 | view3d = xsplot$view3d, theta3d = xsplot$theta3d, phi3d = 272 | xsplot$phi3d, conf = conf, probs = probs, pch = pch, model.colour 273 | = modelpar$col, model.lwd = modelpar$lwd, model.lty = modelpar$lty 274 | , main = xsplotpar$main, xlim = xsplotpar$xlim, ylim = 275 | xsplotpar$ylim) 276 | } 277 | dev.off() 278 | cat(paste("\nSnapshot saved: '", filename[1L],"'", sep = "")) 279 | dev.set(devcond) 280 | devcondsize <- dev.size() 281 | pdf(file = filename[2L], width = devcondsize[1L], height = 282 | devcondsize[2L]) 283 | close.screen(all.screens = TRUE) 284 | xcscreens <- split.screen(c(n.selector.rows, n.selector.cols)) 285 | for (i in seq_along(C)){ 286 | screen(xcscreens[i]) 287 | plotxc(xc = xcplots[[i]]$xc, xc.cond = xcplots[[i]]$xc.cond.old, 288 | name = xcplots[[i]]$name, trim = FALSE, select.colour = xcplots[[i 289 | ]]$select.colour, select.cex = xcplots[[i]]$select.cex, hist2d = 290 | xcplotpar$hist2d, fullbin = xcplotpar$fullbin) 291 | } 292 | dev.off() 293 | cat(paste("\nSnapshot saved: '", filename[2L],"'", sep = "")) 294 | cat("\n") 295 | } else { 296 | filename <- paste("snapshot_", gsub(":", ".", gsub(" ", "_", 297 | Sys.time())), ".pdf", sep = "") 298 | pdf(file = filename, width = width, height = height) 299 | close.screen(all.screens = TRUE) 300 | xcwidth <- selector.colwidth * n.selector.cols / width 301 | mainscreens <- split.screen(figs = matrix(c(0, 1 - xcwidth, 1 - 302 | xcwidth, 1, 0, 0, 1, 1), ncol = 4)) 303 | xcscreens <- split.screen(c(4, n.selector.cols), screen = 304 | mainscreens[2L]) 305 | for (i in seq_along(C)){ 306 | screen(xcscreens[i]) 307 | plotxc(xc = xcplots[[i]]$xc, xc.cond = xcplots[[i]]$xc.cond.old, 308 | name = xcplots[[i]]$name, trim = FALSE, select.colour = xcplots[[i 309 | ]]$select.colour, select.cex = xcplots[[i]]$select.cex, hist2d = 310 | xcplotpar$hist2d, fullbin = xcplotpar$fullbin) 311 | } 312 | xsscreens <- if (plotlegend){ 313 | split.screen(figs = matrix(c(0, 1 - legendwidth, 1 - legendwidth, 1, 314 | 0, 0, 1, 1), ncol = 4L), screen = mainscreens[1L]) 315 | } else mainscreens[1L] 316 | if (plotlegend){ 317 | screen(xsscreens[2L]) 318 | xslegend(data[, response], response) 319 | } 320 | screen(xsscreens[1L]) 321 | if (residuals){ 322 | plotxsres(xs = data[, S, drop = FALSE], data[, response, drop = 323 | FALSE], xc.cond = xc.cond, model = model, col = col, weights = 324 | vw$k, view3d = xsplot$view3d, theta3d = xsplot$theta3d, phi3d = 325 | xsplot$phi3d, conf = conf, probs = probs, pch = 1, model.colour = 326 | modelpar$col, model.lwd = modelpar$lwd, model.lty = 327 | modelpar$lty) 328 | } else { 329 | plotxs(xs = data[, S, drop = FALSE], data[, response, drop = FALSE], 330 | xc.cond = xc.cond, model = model, col = col, weights = vw$k, 331 | view3d = xsplot$view3d, theta3d = xsplot$theta3d, phi3d = 332 | xsplot$phi3d, conf = conf, probs = probs, pch = pch, model.colour 333 | = modelpar$col, model.lwd = modelpar$lwd, model.lty = 334 | modelpar$lty, main = xsplotpar$main, xlim = xsplotpar$xlim, ylim = 335 | xsplotpar$ylim) 336 | } 337 | dev.off() 338 | cat(paste("\nSnapshot saved: '", filename,"'\n", sep = "")) 339 | } 340 | } 341 | points(NULL) 342 | } 343 | } 344 | setGraphicsEventHandlers( 345 | onMouseDown = mouseclick(separate), 346 | onMouseMove = mouseclick(separate), 347 | onKeybd = keystroke()) 348 | #getGraphicsEventEnv() 349 | getGraphicsEvent() 350 | } 351 | -------------------------------------------------------------------------------- /R/plotxc.R: -------------------------------------------------------------------------------- 1 | #' @title Condition selector plot 2 | #' 3 | #' @description Data visualisations used to select sections for 4 | #' \code{\link{ceplot}}. 5 | #' 6 | #' @param xc A numeric or factor vector, or a dataframe with two columns 7 | #' @param xc.cond Same type as \code{xc}, representing a single point in data 8 | #' space to highlight. 9 | #' @param name The variable name for \code{xc} 10 | #' @param trim Logical; if \code{TRUE}, long tails of continuous data are 11 | #' chopped off at the 5th and 95th percentiles. 12 | #' @param select.colour Colour to highlight \code{xc.cond} 13 | #' @param select.lwd Line weight to highlight \code{xc.cond} 14 | #' @param cex.axis Axis text scaling 15 | #' @param cex.lab Label text scaling 16 | #' @param tck Plot axis tick size 17 | #' @param select.cex Plot symbol size 18 | #' @param hist2d If \code{TRUE}, a scatterplot is visualised as a 2-D histogram. 19 | #' Default behaviour is to use a 2-D histogram if there are over 2,000 20 | #' observations. 21 | #' @param fullbin A cap on the counts in a bin for the 2-D histogram, helpful 22 | #' with skewed data. Larger values give more detail about data density. 23 | #' Defaults to 25. 24 | #' @param ... Passed to \code{condvis:::spineplot2}. 25 | #' 26 | #' @return Produces a plot, and returns a list containing the relevant 27 | #' information to update the plot at a later stage. 28 | #' 29 | #' @seealso \code{\link{ceplot}}, \code{\link{plotxs}}. 30 | #' 31 | #' @examples 32 | #' ## Histogram, highlighting the first case. 33 | #' 34 | #' data(mtcars) 35 | #' obj <- plotxc(mtcars[, "mpg"], mtcars[1, "mpg"]) 36 | #' obj$usr 37 | #' 38 | #' ## Barplot, highlighting 'cyl' = 6. 39 | #' 40 | #' plotxc(as.factor(mtcars[, "cyl"]), 6, select.colour = "blue") 41 | #' 42 | #' ## Scatterplot, highlighting case 25. 43 | #' 44 | #' plotxc(mtcars[, c("qsec", "wt")], mtcars[25, c("qsec", "wt")], 45 | #' select.colour = "blue", select.lwd = 1, lty = 3) 46 | #' 47 | #' ## Boxplot, where 'xc' contains one factor, and one numeric. 48 | #' 49 | #' mtcars$carb <- as.factor(mtcars$carb) 50 | #' plotxc(mtcars[, c("carb", "wt")], mtcars[25, c("carb", "wt")], 51 | #' select.colour = "red", select.lwd = 3) 52 | #' 53 | #' ## Spineplot, where 'xc' contains two factors. 54 | #' 55 | #' mtcars$gear <- as.factor(mtcars$gear) 56 | #' mtcars$cyl <- as.factor(mtcars$cyl) 57 | #' plotxc(mtcars[, c("cyl", "gear")], mtcars[25, c("cyl", "gear")], 58 | #' select.colour = "red") 59 | #' 60 | #' ## Effect of 'trim'. 61 | #' 62 | #' x <- c(-200, runif(400), 200) 63 | #' plotxc(x, 0.5, trim = FALSE, select.colour = "red") 64 | #' plotxc(x, 0.5, trim = TRUE, select.colour = "red") 65 | #' 66 | #' @seealso \code{\link{plotxs}}, \code{\link{ceplot}}, \code{\link{condtour}} 67 | #' 68 | #' @references O'Connell M, Hurley CB and Domijan K (2017). ``Conditional 69 | #' Visualization for Statistical Models: An Introduction to the 70 | #' \strong{condvis} Package in R.''\emph{Journal of Statistical Software}, 71 | #' \strong{81}(5), pp. 1-20. . 72 | 73 | ## plotxc plots a univariate or bivariate view of predictors, 74 | ## highlighting one selected point, which represents a section in the data 75 | ## space. 76 | 77 | plotxc <- 78 | function (xc, xc.cond, name = NULL, trim = NULL, select.colour = NULL, 79 | select.lwd = NULL, cex.axis = NULL, cex.lab = NULL, tck = NULL, select.cex = 1 80 | , hist2d = NULL, fullbin = NULL, ...) 81 | { 82 | hist2d <- if (is.null(hist2d)) 83 | TRUE 84 | else hist2d 85 | trim <- if (is.null(trim)) 86 | TRUE 87 | else trim 88 | select.colour <- if (is.null(select.colour)) 89 | "black" 90 | else select.colour 91 | select.lwd <- if (is.null(select.lwd)) 92 | 2 93 | else select.lwd 94 | mar <- if (!exists("mar")) 95 | c(3, 3, 0.5, 0.5) 96 | else mar 97 | cex.axis <- if (identical(version$os, "linux-gnu")) 98 | 1 99 | else if (is.null(cex.axis)) 100 | 0.7 101 | else cex.axis 102 | cex.lab <- if (identical(version$os, "linux-gnu")) 103 | 1 104 | else if (is.null(cex.lab)) 105 | 0.8 106 | else cex.lab 107 | tck <- if (is.null(tck)) 108 | - 0.2 109 | else tck 110 | par(mar = mar) 111 | par(mgp = c(1.5, 0.5, 0)) 112 | if (is.vector(xc) | is.factor(xc)){ 113 | if (!is.factor(xc)){ 114 | 115 | ## Histogram 116 | 117 | ## To deal with long tails, we try chopping them off. 118 | if (trim){ 119 | if (diff(range(xc, na.rm = TRUE)) / diff(q1 <- quantile(xc, 120 | c(0.05, 0.95), na.rm = TRUE)) > 3){ 121 | xc <- xc[findInterval(xc, q1) == 1] 122 | } 123 | } 124 | histmp <- hist(xc, xlab = name, ylab = "", main = "", cex.axis = cex.axis, 125 | cex.lab = cex.lab, tcl = tck, mgp = c(1.5, 0.5, 0.1)) 126 | lines(x = rep(xc.cond, 2L), y = c(0, max(histmp$counts)), col = 127 | select.colour, lwd = select.lwd) 128 | plot.type <- "histogram" 129 | } else { 130 | 131 | ## Bar plot 132 | 133 | bartmp <- barplot2(table(xc), main = "", xlab = name, cex.axis = cex.axis, 134 | cex.lab = cex.lab, tcl = tck) 135 | factorcoords <- data.frame(level = levels(xc), x = - 0.5 + 1.2 * (1: 136 | length(levels(xc)))) 137 | barindex <- factorcoords$level == as.character(xc.cond) 138 | rect(xleft = bartmp$w.l[barindex], xright = bartmp$w.r[barindex], ybottom 139 | = 0, ytop = bartmp$height[barindex], col = select.colour, density = -1) 140 | plot.type <- "barplot" 141 | xc.cond <- factor(xc.cond, levels(xc)) 142 | } 143 | } else { 144 | if (is.data.frame(xc) & identical(ncol(xc), 2L)){ 145 | are.factors <- vapply(xc,is.factor, logical(1)) 146 | if (all(are.factors)){ 147 | 148 | ## Spineplot, segmented barchart 149 | 150 | sptmp <- spineplot2(table(xc), ...) 151 | xmatch <- as.character(xc.cond[, 1]) == levels(xc[, 1]) 152 | ymatch <- as.character(xc.cond[, 2]) == levels(xc[, 2]) 153 | xlev <- levels(xc[, 1])[xmatch] 154 | ylev <- levels(xc[, 2])[ymatch] 155 | match.index <- (sptmp$xnames == xlev) & (sptmp$ynames == ylev) 156 | rect(xleft = sptmp$xleft[match.index], ybottom = sptmp$ybottom[ 157 | match.index], xright = sptmp$xright[match.index], ytop = sptmp$ytop[ 158 | match.index], col = select.colour, density = -1) 159 | axis(1, at = ((sptmp$xat[1L:sptmp$nx] + sptmp$xat[2L:(sptmp$nx + 1L)] - 160 | sptmp$off)/2)[xmatch], labels = unique(sptmp$xnames)[xmatch], tick = 161 | FALSE, col.axis = select.colour) 162 | axis(2, at = sptmp$yat[ymatch], labels = unique(sptmp$ynames)[ymatch], 163 | col.axis = select.colour, tick = FALSE) 164 | plot.type <- "spineplot" 165 | } else { 166 | if (any(are.factors)){ 167 | 168 | ## Boxplot 169 | 170 | boxx <- xc[, are.factors] 171 | boxy <- xc[, !are.factors] 172 | boxtmp <- boxplot(boxy ~ boxx, xlab = name[are.factors], ylab = name[ 173 | !are.factors], cex.axis = cex.axis, cex.lab = cex.lab) 174 | factorcoords <- data.frame(level = levels(xc[, are.factors]), x = 1: 175 | length(levels(xc[, are.factors]))) 176 | abline(v = factorcoords$x[as.character(factorcoords$level) == 177 | as.character(xc.cond[,are.factors])], h = xc.cond[!are.factors], lwd 178 | = select.lwd, col = select.colour) 179 | plot.type <- "boxplot" 180 | xc <- xc[, order(!are.factors)] 181 | xc.cond <- data.frame(factor(xc.cond[, are.factors], 182 | levels(boxx)), xc.cond[, !are.factors]) 183 | name <- name[order(!are.factors)] 184 | names(xc.cond) <- name 185 | } else { 186 | 187 | ## Scatterplot, going to 2-D histogram if required/possible 188 | 189 | ## To deal with long tails, we try chopping them off. 190 | 191 | if (trim){ 192 | index1 <- index2 <- rep(TRUE, nrow(xc)) 193 | if (diff(range(xc[, 1], na.rm = TRUE)) / diff(q1 <- quantile(xc[, 1] 194 | , c(0.05, 0.95), na.rm = TRUE)) > 3){ 195 | index1 <- findInterval(xc[, 1], q1) == 1 196 | } 197 | if (diff(range(xc[, 2], na.rm = TRUE)) / diff(q2 <- quantile(xc[, 2] 198 | , c(0.05, 0.95), na.rm = TRUE)) > 3){ 199 | index2 <- findInterval(xc[, 2], q2) == 1 200 | } 201 | xc <- xc[index1 & index2, ] 202 | } 203 | 204 | if (hist2d && nrow(xc) > 2000 && requireNamespace("gplots", quietly = 205 | TRUE)){ 206 | b <- seq(0.35, 1, length.out = 16) 207 | fullbin <- if (is.null(fullbin)) 208 | 25 209 | else fullbin 210 | gplots::hist2d(xc[, 1], xc[, 2], nbins = 50, col = c("white", rgb(1 211 | - b, 1 - b, 1 - b)), xlab = colnames(xc)[1], ylab = colnames(xc)[ 212 | 2], cex.axis = cex.axis, cex.lab = cex.lab, tcl = tck, FUN = 213 | function(x) min(length(x), fullbin)) 214 | box() 215 | } else { 216 | plot.default(xc[, 1], xc[, 2], xlab = colnames(xc)[1], ylab = 217 | colnames(xc)[2], cex.axis = cex.axis, cex.lab = cex.lab, tcl = tck 218 | , cex = select.cex) 219 | } 220 | abline(v = xc.cond[1], h = xc.cond[2], lwd = select.lwd, col = 221 | select.colour) 222 | plot.type <- "scatterplot" 223 | } 224 | } 225 | } else stop("Unexpected value for 'xc'") 226 | } 227 | invisible(structure(list(xc = xc, xc.cond.old = xc.cond, name = name, 228 | select.colour = select.colour, mar = mar, select.lwd = select.lwd, 229 | select.cex = select.cex, cex.axis = cex.axis, cex.lab = cex.lab, tck = tck, 230 | device = dev.cur(), usr = par("usr"), screen = screen(), screen.coords = 231 | par("fig"), plot.type = plot.type, sptmp = if (exists("sptmp")) sptmp else 232 | NULL, factorcoords = if (exists("factorcoords")) factorcoords else NULL, 233 | histmp = if (exists("histmp")) histmp else NULL, bartmp = if (exists( 234 | "bartmp")) bartmp else NULL, boxtmp = if (exists("boxtmp")) boxtmp else 235 | NULL, hist2d = hist2d, fullbin = fullbin, ...), class = "xcplot")) 236 | } 237 | 238 | #' @title Condition selector plot 239 | #' 240 | #' @description Multivariate data visualisations used to select sections for 241 | #' \code{\link{ceplot}}. Basically visualises a dataset and highlights a 242 | #' single point. 243 | #' 244 | #' @param Xc A dataframe. 245 | #' @param Xc.cond A dataframe with one row and same names as \code{Xc}. 246 | #' @param select.colour Colour to highlight \code{Xc.cond} 247 | #' @param select.lwd Line weight to highlight \code{Xc.cond} 248 | #' @param cex.axis Axis text scaling 249 | #' @param cex.lab Label text scaling 250 | #' @param tck Plot axis tick size 251 | #' @param select.cex Plot symbol size 252 | #' @param ... not used. 253 | #' 254 | #' @return Produces a plot, and returns a list containing the relevant 255 | #' information to update the plot at a later stage. 256 | #' 257 | #' @seealso \code{\link{ceplot}}, \code{\link{plotxs}}, \code{\link{plotxc}} 258 | 259 | ## plotxc.pcp plots a parallel coordinates plot of predictors, highlighting 260 | ## one selected point, which represents a section in the data space. 261 | 262 | plotxc.pcp <- 263 | function (Xc, Xc.cond, select.colour = NULL, select.lwd = 3, 264 | cex.axis = NULL, cex.lab = NULL, tck = NULL, select.cex = 1, ...) 265 | { 266 | select.colour <- if (is.null(select.colour)) 267 | "blue" 268 | else select.colour 269 | cex.axis <- if (identical(version$os, "linux-gnu")) 270 | 1 271 | else if (is.null(cex.axis)) 272 | 0.7 273 | else cex.axis 274 | cex.lab <- if (identical(version$os, "linux-gnu")) 275 | 1 276 | else if (is.null(cex.lab)) 277 | 0.8 278 | else cex.lab 279 | tck <- if (is.null(tck)) 280 | - 0.2 281 | else tck 282 | factorindex <- which(vapply(Xc, is.factor, logical(1))) 283 | Xc.num <- vapply(Xc, as.numeric, numeric(nrow(Xc))) 284 | Xc.num.scaled <- apply(Xc.num, 2, scale2unit) 285 | Xc.cond.num <- vapply(Xc.cond, as.numeric, numeric(1L)) 286 | xcoord <- 1:ncol(Xc) 287 | ycoord <- (Xc.cond.num - apply(Xc.num, 2L, min))/(apply(Xc.num, 2L, max) - 288 | apply(Xc.num, 2L, min)) 289 | parcoord(Xc.num, main = "Condition selector") 290 | points(xcoord, ycoord, col = select.colour, type = "l", lwd = select.lwd) 291 | points(xcoord, ycoord, col = select.colour, pch = 16) 292 | invisible(structure(list(Xc = Xc, Xc.cond = Xc.cond, Xc.num.scaled = 293 | Xc.num.scaled, xc.num.max = apply(Xc.num, 2, max), xc.num.min = apply( 294 | Xc.num, 2, min), xcoord = xcoord, ycoord = ycoord, plot.type = "pcp", 295 | select.colour = select.colour, select.cex = select.cex, select.lwd = 296 | select.lwd, mar = par("mar"), usr = par("usr"), factorindex = factorindex, 297 | device = dev.cur(), screen = screen()), class = "xcplot")) 298 | } 299 | 300 | #' @rdname plotxc.pcp 301 | 302 | ## plotxc.full plots a full scatterplot matrix of predictors, highlighting 303 | ## one selected point, which represents a section in the data space. 304 | 305 | plotxc.full <- 306 | function (Xc, Xc.cond, select.colour = NULL, select.lwd = 3, 307 | cex.axis = NULL, cex.lab = NULL, tck = NULL, select.cex = 0.6, ...) 308 | { 309 | select.colour <- if (is.null(select.colour)) 310 | "blue" 311 | else select.colour 312 | cex.axis <- if (identical(version$os, "linux-gnu")) 313 | 1 314 | else if (is.null(cex.axis)) 315 | 0.7 316 | else cex.axis 317 | cex.lab <- if (identical(version$os, "linux-gnu")) 318 | 1 319 | else if (is.null(cex.lab)) 320 | 0.8 321 | else cex.lab 322 | tck <- if (is.null(tck)) 323 | - 0.2 324 | else tck 325 | factorindex <- vapply(Xc, is.factor, logical(1)) 326 | Xc.num <- vapply(Xc, as.numeric, numeric(nrow(Xc))) 327 | Xc.cond.num <- vapply(Xc.cond, as.numeric, numeric(1L)) 328 | close.screen(all.screens = TRUE) 329 | scr <- split.screen(c(ncol(Xc) + 2, ncol(Xc) + 2)) 330 | scr2 <- as.vector(matrix(scr, ncol = ncol(Xc) + 2)[c(-1, 331 | -(ncol(Xc) + 2)), c(-1, -(ncol(Xc) + 2))]) 332 | usr.matrix <- mar.matrix <- fig.matrix <- matrix(ncol = 4L, nrow = length( 333 | scr2)) 334 | rows <- rep(1:ncol(Xc), each = ncol(Xc)) 335 | cols <- rep(1:ncol(Xc), ncol(Xc)) 336 | dev.hold() 337 | for (i in seq_along(scr2)){ 338 | screen(scr2[i]) 339 | par(mar = c(0.1, 0.1, 0.1, 0.1)) 340 | par(mgp = c(3, 0.25, 0.15)) 341 | plot(Xc.num[,cols[i]], Xc.num[,rows[i]], cex = select.cex, xlab = "", 342 | ylab = "", xaxt = "n", yaxt = "n", col = if (identical( 343 | rows[i], cols[i])) NULL else "black") 344 | if (!identical(rows[i], cols[i])) 345 | abline(v = Xc.cond.num[cols[i]], h = Xc.cond.num[rows[i]], 346 | col = select.colour, lwd = select.lwd) 347 | if (identical(rows[i], 1L) & (2 * (round(cols[i] / 2)) == cols[i])) 348 | axis(3, cex.axis = 0.7, tcl = -0.2) 349 | if (identical(rows[i], ncol(Xc)) & !(2 * (round(cols[i] / 2)) == cols[i])) 350 | axis(1, cex.axis = 0.7, tcl = -0.2) 351 | if (identical(cols[i], 1L) & (2 * (round(rows[i] / 2)) == rows[i])) 352 | axis(2, cex.axis = 0.7, tcl = -0.2) 353 | if (identical(cols[i], ncol(Xc)) & !(2 * (round(rows[i] / 2)) == rows[i])) 354 | axis(4, cex.axis = 0.7, tcl = -0.2) 355 | if (identical(rows[i], cols[i])) 356 | text(x = mean(range(Xc.num[,rows[i]])), y = mean(range(Xc.num[,cols[i]])), 357 | labels = colnames(Xc.num)[rows[i]]) 358 | mar.matrix[i, ] <- par("mar") 359 | usr.matrix[i, ] <- par("usr") 360 | fig.matrix[i, ] <- par("fig") 361 | } 362 | coords <- data.frame(fig.matrix) 363 | names(coords) <- c("xleft", "xright", "ybottom", "ytop") 364 | coords$xcplots.index <- scr2 365 | dev.flush() 366 | invisible(structure(list(Xc = Xc, Xc.cond = Xc.cond, Xc.num = Xc.num, 367 | Xc.cond.num = Xc.cond.num, rows = rows, cols = cols, factorindex = 368 | factorindex, scr2 = scr2, coords = coords, plot.type = "full", device = 369 | dev.cur(), select.colour = select.colour, select.lwd = select.lwd, 370 | select.cex = select.cex, mar.matrix = mar.matrix, usr.matrix = usr.matrix), 371 | class = "xcplot")) 372 | } 373 | -------------------------------------------------------------------------------- /R/condtour.R: -------------------------------------------------------------------------------- 1 | #' @title Conditional tour; a tour through sections in data space 2 | #' 3 | #' @description Whereas \code{\link{ceplot}} allows the user to interactively 4 | #' choose sections to visualise, \code{condtour} allows the user to pre-select 5 | #' all sections to visualise, order them, and cycle through them one by one. 6 | #' ']' key advances the tour, and '[' key goes back. Can adjust 7 | #' \code{threshold} for the current section visualisation with ',' and '.' 8 | #' keys. 9 | #' 10 | #' @param data A dataframe. 11 | #' @param model A fitted model object, or a list of such objects. 12 | #' @param path A dataframe, describing the sections to take. Basically a 13 | #' dataframe with its \code{colnames} being \code{conditionvars}. 14 | #' @param response Character name of response variable in \code{data}. 15 | #' @param sectionvars Character name(s) of variables in \code{data} on which to 16 | #' take sections. 17 | #' @param conditionvars Character name(s) of variables in \code{data} on which 18 | #' to condition. 19 | #' @param threshold Threshold distance. Observed data which are a distance 20 | #' greater than \code{threshold} from the current section are not visible. 21 | #' Passed to \code{\link{similarityweight}}. 22 | #' @param lambda A constant to multiply by number of factor mismatches in 23 | #' constructing a general dissimilarity measure. If left \code{NULL}, behaves 24 | #' as though \code{lambda} is set greater than \code{threshold}, and so only 25 | #' observations whose factor levels match the current section are visible. 26 | #' Passed to \code{\link{similarityweight}}. 27 | #' @param distance The type of distance measure to use, either 28 | #' \code{"euclidean"} (default) or \code{"maxnorm"}. 29 | #' @param view3d Logical; if \code{TRUE}, plots a three-dimensional regression 30 | #' surface when possible. 31 | #' @param Corder Character name for method of ordering conditioning variables. 32 | #' See \code{\link{arrangeC}}. 33 | #' @param conf Logical; if \code{TRUE}, plots confidence bounds or equivalent 34 | #' when possible. 35 | #' @param col Colour for observed data points. 36 | #' @param pch Plot symbols for observed data points. 37 | #' @param xsplotpar Plotting parameters for section visualisation as a list, 38 | #' passed to \code{\link{plotxs}}. Not used. 39 | #' @param modelpar Plotting parameters for models as a list, passed to 40 | #' \code{\link{plotxs}}. Not used. 41 | #' @param xcplotpar Plotting parameters for condition selector plots as a list, 42 | #' passed to \code{\link{plotxc}}. Can specify \code{cex.axis}, \code{cex.lab} 43 | #' , \code{tck}, \code{col} for highlighting current section, \code{cex}. 44 | #' 45 | #' @return Produces a set of interactive plots. One device displays the current 46 | #' section. A second device shows the the current section in the space of the 47 | #' conditioning predictors given by \code{conditionvars}. A third device shows 48 | #' some simple diagnostic plots; one to show approximately how much data are 49 | #' visible on each section, and another to show what proportion of data are 50 | #' \emph{visited} by the tour. 51 | #' 52 | #' @seealso \code{\link{ceplot}}, \code{\link{similarityweight}} 53 | #' 54 | #' @examples 55 | #' \dontrun{ 56 | #' 57 | #' data(powerplant) 58 | #' library(e1071) 59 | #' model <- svm(PE ~ ., data = powerplant) 60 | #' path <- makepath(powerplant[-5], 25) 61 | #' condtour(data = powerplant, model = model, path = path$path, 62 | #' sectionvars = "AT") 63 | #' 64 | #' data(wine) 65 | #' wine$Class <- as.factor(wine$Class) 66 | #' library(e1071) 67 | #' model5 <- list(svm(Class ~ ., data = wine)) 68 | #' conditionvars1 <- setdiff(colnames(wine), c("Class", "Hue", "Flavanoids")) 69 | #' path <- makepath(wine[, conditionvars1], 50) 70 | #' condtour(data = wine, model = model5, path = path$path, sectionvars = c("Hue" 71 | #' , "Flavanoids"), threshold = 3) 72 | #' 73 | #'} 74 | 75 | condtour <- 76 | function(data, model, path, response = NULL, sectionvars = NULL, conditionvars = 77 | NULL, threshold = NULL, lambda = NULL, distance = c("euclidean", "maxnorm"), 78 | view3d = FALSE, Corder = "default", conf = FALSE, col = "black", pch = NULL, 79 | xsplotpar = NULL, modelpar = NULL, xcplotpar = NULL) 80 | { 81 | ## Rename for internal 82 | 83 | S <- sectionvars 84 | C <- conditionvars 85 | sigma <- threshold 86 | 87 | ## Check for optional inputs 88 | 89 | cex.axis <- xcplotpar$cex.axis 90 | cex.lab <- xcplotpar$cex.lab 91 | tck <- xcplotpar$tck 92 | select.colour <- if (is.null(xcplotpar$col)) 93 | "blue" 94 | else xcplotpar$col 95 | select.cex <- if (is.null(xcplotpar$select.cex)) 96 | 1 97 | else xcplotpar$select.cex 98 | 99 | ## Set up interactive functions for mousemove, mouseclick and keystroke. 100 | 101 | xold <- NULL 102 | yold <- NULL 103 | mousemove <- function () 104 | { 105 | function (buttons, x, y) 106 | { 107 | 108 | ## Rotate 3-D perspective plot from plotxs. 109 | 110 | if (all(findInterval(x, xscoords[1:2]) == 1, identical( 111 | xsplot$plot.type, "ccc"), xsplot$view3d, 0 %in% buttons)){ 112 | if (!is.null(xold)) 113 | xsplot <<- update(xsplot, theta3d = xsplot$theta3d + 1 * (xold > x) 114 | - 1 * (xold < x), phi3d = xsplot$phi3d + 1 * (yold > y) - 1 * ( 115 | yold < y), xs.grid = xsplot$xs.grid, prednew = xsplot$prednew) 116 | xold <<- x 117 | yold <<- y 118 | } 119 | points(NULL) 120 | } 121 | } 122 | mouseclick <- function () 123 | { 124 | function (buttons, x, y) 125 | { 126 | if (0 %in% buttons){ 127 | 128 | ## Clicking the mouse advances the tour by one. 129 | 130 | pathindex <<- max(min(pathindex + 1, max(pathindexrange)), min( 131 | pathindexrange)) 132 | applot <<- update(applot, pathindex = pathindex) 133 | xc.cond[, colnames(path)] <<- path[pathindex, , drop = FALSE] 134 | xsplot <<- update(xsplot, xc.cond = xc.cond, weights = k[pathindex, ]) 135 | for (i in seq_along(C)){ 136 | xcplots[[i]] <<- update(xcplots[[i]], xc.cond = path[pathindex, 137 | C[[i]]]) 138 | } 139 | vw$sigma <<- threshold 140 | } 141 | points(NULL) 142 | } 143 | } 144 | keystroke <- function () 145 | { 146 | function (key) 147 | { 148 | 149 | ## 'q' key ends the interactive session. 150 | 151 | if (identical(key, "q")){ 152 | cat("\nInteractive session ended.\n") 153 | return(invisible(1)) 154 | } 155 | 156 | ## Arrow keys rotate a 3-D perspective plot from plotxs. 157 | 158 | if (identical(xsplot$plot.type, "ccc") & xsplot$view3d & 159 | key %in% c("Up", "Down", "Left", "Right")){ 160 | xsplot <<- update(xsplot, theta3d = xsplot$theta3d - 2 * (key == "Right" 161 | ) + 2 * (key == "Left"), phi3d = xsplot$phi3d - 2 * (key == "Up") + 2 162 | * (key == "Down"), xs.grid = xsplot$xs.grid, prednew = xsplot$prednew) 163 | } 164 | 165 | ## '[' and ']' reverse and advance the tour respectively. 166 | 167 | if (key %in% c("[", "]")){ 168 | pathindex <<- max(min(pathindex + 1 * (key == "]") - 1 * (key == "["), 169 | max(pathindexrange)), min(pathindexrange)) 170 | applot <<- update(applot, pathindex = pathindex) 171 | xc.cond[, colnames(path)] <<- path[pathindex, , drop = FALSE] 172 | xsplot <<- update(xsplot, xc.cond = xc.cond, weights = k[pathindex, ]) 173 | for (i in seq_along(C)){ 174 | xcplots[[i]] <<- update(xcplots[[i]], xc.cond = path[pathindex, 175 | C[[i]]]) 176 | } 177 | vw$sigma <<- threshold 178 | } 179 | 180 | ## ',' and '.' decrease and increase the threshold distance used for 181 | ## similarity weight. 182 | 183 | if (key %in% c(",", ".")){ 184 | sigma <- vw$sigma + 0.01 * vw$sigma * (key == ".") - 0.01 * vw$sigma * 185 | (key == ",") 186 | vw <<- vwfun(xc.cond = path[pathindex, ], sigma = sigma, distance = 187 | vw$distance, lambda = lambda) 188 | xsplot <<- update(xsplot, weights = vw$k, xs.grid = xsplot$xs.grid, 189 | newdata = xsplot$newdata, prednew = xsplot$prednew) 190 | } 191 | 192 | ## Save a snapshot. 193 | 194 | if (key %in% c("s")){ 195 | filename <- paste("snapshot_", gsub(":", ".", gsub(" ", "_", 196 | Sys.time())), c("-expectation.pdf", "-condition.pdf", 197 | "-diagnostics.pdf"), sep = "") 198 | 199 | ## Snapshot of section. 200 | 201 | pdf(filename[1], height = 6, width = 6) 202 | close.screen(all.screens = TRUE) 203 | xsscreens <- if (plotlegend){ 204 | split.screen(figs = matrix(c(0, 1 - legendwidth, 1 - legendwidth, 1, 0, 0, 1 205 | , 1), ncol = 4)) 206 | } else split.screen() 207 | if (plotlegend){ 208 | screen(xsscreens[2L]) 209 | xslegend(data[, response], colnames(data)[response]) 210 | } 211 | screen(xsscreens[1L]) 212 | par(mar = c(3, 3, 3, 3)) 213 | xsplot <- plotxs(xs = data[, S, drop = FALSE], data[, response, drop = 214 | FALSE], xc.cond = xc.cond, model = model, weights = k[pathindex, ], 215 | col = col, view3d = view3d, conf = conf, pch = pch, model.colour = 216 | modelpar$col, model.lwd = modelpar$lwd, model.lty = modelpar$lty, main 217 | = xsplotpar$main, xlim = xsplotpar$xlim, ylim = xsplotpar$ylim) 218 | dev.off() 219 | cat(paste("\nSnapshot saved: '", filename[1L],"'", sep = "")) 220 | 221 | ## Snapshot of condition plots. 222 | 223 | pdf(filename[2L], width = 2 * n.selector.cols, height = 2 * 224 | n.selector.rows) 225 | close.screen(all.screens = TRUE) 226 | xcscreens <- split.screen(c(n.selector.rows, n.selector.cols)) 227 | for (i in seq_along(C)){ 228 | screen(xcscreens[i]) 229 | xcplots[[i]] <- plotxc(xc = data[, C[[i]]], xc.cond = path[pathindex, 230 | C[[i]]], name = C[[i]], select.colour = select.colour, hist2d = 231 | xcplotpar$hist2d) 232 | coords[i, ] <- par("fig") 233 | } 234 | dev.off() 235 | cat(paste("\nSnapshot saved: '", filename[2L],"'", sep = "")) 236 | 237 | ## Snapshot of diagnostic plots. 238 | 239 | pdf(filename[3L], width = 4, height = 6) 240 | close.screen(all.screens = TRUE) 241 | diagscreens <- split.screen(c(2, 1)) 242 | screen(diagscreens[1L]) 243 | par(mar = c(4, 4, 2, 2)) 244 | plotmaxk(apply(k, 2, max)) 245 | screen(diagscreens[2L]) 246 | par(mar = c(4, 4, 2, 2)) 247 | plotap(k, pathindex = pathindex) 248 | dev.off() 249 | cat(paste("\nSnapshot saved: '", filename[3L],"'", sep = "")) 250 | } 251 | points(NULL) 252 | } 253 | } 254 | 255 | ## Set up variable default values etc. 256 | 257 | data <- na.omit(data) 258 | model <- if (!identical(class(model), "list")) 259 | list(model) 260 | else model 261 | model.name <- if (!is.null(names(model))) 262 | names(model) 263 | else NULL 264 | varnamestry <- try(getvarnames(model[[1]]), silent = TRUE) 265 | response <- if (is.null(response)) 266 | if (class(varnamestry) != "try-error") 267 | which(colnames(data) == varnamestry$response[1]) 268 | else stop("could not extract response from 'model'.") 269 | else if (is.character(response)) 270 | which(colnames(data) == response) 271 | else response 272 | 273 | ## Set up conditioning predictors. 274 | 275 | ## Hierarchy for specifying C 276 | ## 1. If user supplies list, use that exactly (maybe chop length). 277 | ## 2. If user supplies vector, use that but order/group it. 278 | ## 3. If user supplies nothing, try to extract from model, then order. 279 | ## 4. Else bung in everything from the data that isn't 'response' or 'S' and 280 | ## then try to order that and show the top 20. 281 | 282 | if (is.list(C)){ 283 | C <- C[1:min(length(C), 20L)] 284 | } else if (is.vector(C)){ 285 | C <- arrangeC(data[, setdiff(C, S), drop = FALSE], method = Corder) 286 | } else if (!inherits(varnamestry, "try-error")){ 287 | possibleC <- unique(unlist(lapply(lapply(model, getvarnames), `[[`, 2))) 288 | C <- arrangeC(data[, setdiff(intersect(possibleC, colnames(data)), S), drop 289 | = FALSE], method = Corder) 290 | } else { 291 | C <- arrangeC(data[, setdiff(colnames(data), c(S, response)), drop = FALSE], 292 | method = Corder) 293 | } 294 | C <- C[1:min(length(C), 20L)] 295 | uniqC <- unique(unlist(C)) 296 | #C <- uniqC 297 | 298 | threshold <- if (is.null(threshold)) 299 | 1 300 | else threshold 301 | 302 | ## Set up col so it is a vector with length equal to nrow(data). Default pch 303 | ## to 1, or 21 for using background colour to represent observed values. 304 | 305 | nr.data <- nrow(data) 306 | col <- rep(col, length.out = nr.data) 307 | pch <- if (is.null(pch)){ 308 | if (identical(length(S), 2L)) 309 | rep(21, nr.data) 310 | else rep(1, nr.data) 311 | } else rep(pch, length.out = nr.data) 312 | 313 | distance <- match.arg(distance) 314 | pathindex <- 1 315 | pathindexrange <- c(1, nrow(path)) 316 | xc.cond <- data[, setdiff(colnames(data), c(S, response))] 317 | xc.cond[, colnames(path)] <- path[pathindex, , drop = FALSE] 318 | if (any(response %in% uniqC)) 319 | stop("cannot have 'response' variable in 'C'") 320 | if (any(response %in% S)) 321 | stop("cannot have 'response' variable in 'S'") 322 | if (!identical(length(intersect(S, uniqC)), 0L)) 323 | stop("cannot have variables common to both 'S' and 'C'") 324 | xcplots <- list() 325 | coords <- matrix(ncol = 4L, nrow = length(C)) 326 | plotlegend <- length(S) == 2 327 | n.selector.cols <- ceiling(length(C) / 4L) 328 | selector.colwidth <- 2 329 | height <- 8 330 | width <- height + 0.5 * plotlegend 331 | 332 | ## Calculate the similarity weights for the entire tour. 333 | 334 | vwfun <- .similarityweight(xc = data[, colnames(path), drop = FALSE]) 335 | vw <- list(sigma = threshold, distance = 336 | distance, lambda = lambda) 337 | 338 | k <- matrix(nrow = nrow(path), ncol = nrow(data), dimnames = list(rownames( 339 | path), rownames(data))) 340 | for (i in 1:nrow(path)){ 341 | k[i, ] <- do.call(vwfun, list(xc.cond = path[i, , drop = FALSE], sigma = 342 | threshold, distance = distance, lambda = lambda))$k 343 | } 344 | 345 | ## Do section visualisation. 346 | 347 | opendev(width = width, height = height) 348 | devexp <- dev.cur() 349 | close.screen(all.screens = TRUE) 350 | legendwidth <- 1.15 / height 351 | xsscreens <- if (plotlegend){ 352 | split.screen(figs = matrix(c(0, 1 - legendwidth, 1 - legendwidth, 1, 0, 0, 1 353 | , 1), ncol = 4)) 354 | } else split.screen() 355 | if (plotlegend){ 356 | screen(xsscreens[2L]) 357 | xslegend(data[, response], colnames(data)[response]) 358 | } 359 | screen(xsscreens[1L]) 360 | par(mar = c(3, 3, 3, 3)) 361 | xsplot <- plotxs(xs = data[, S, drop = FALSE], data[, response, drop = FALSE] 362 | , xc.cond = xc.cond, model = model, weights = k[pathindex, ], col = col, 363 | view3d = view3d, conf = conf, pch = pch, model.colour = modelpar$col, 364 | model.lwd = modelpar$lwd, model.lty = modelpar$lty, main = xsplotpar$main, 365 | xlim = xsplotpar$xlim, ylim = xsplotpar$ylim) 366 | xscoords <- par("fig") 367 | setGraphicsEventHandlers( 368 | onMouseMove = mousemove(), 369 | onKeybd = keystroke()) 370 | 371 | ## Do diagnostic plots. 372 | 373 | opendev(width = 4, height = 6) 374 | devdiag <- dev.cur() 375 | close.screen(all.screens = TRUE) 376 | diagscreens <- split.screen(c(2, 1)) 377 | screen(diagscreens[1L]) 378 | par(mar = c(4, 4, 2, 2)) 379 | plotmaxk(apply(k, 2, max)) 380 | screen(diagscreens[2L]) 381 | par(mar = c(4, 4, 2, 2)) 382 | applot <- plotap(k) 383 | setGraphicsEventHandlers( 384 | onMouseDown = mouseclick(), 385 | onKeybd = keystroke()) 386 | 387 | ## Do condition plots, so we can see where we are in the data space. 388 | 389 | xcwidth <- selector.colwidth * n.selector.cols 390 | n.selector.rows <- ceiling(length(C) / n.selector.cols) 391 | xcheight <- selector.colwidth * n.selector.rows 392 | opendev(width = xcwidth, height = xcheight) 393 | devcond <- dev.cur() 394 | close.screen(all.screens = TRUE) 395 | xcscreens <- split.screen(c(n.selector.rows, n.selector.cols)) 396 | for (i in seq_along(C)){ 397 | screen(xcscreens[i]) 398 | xcplots[[i]] <- plotxc(xc = data[, C[[i]]], xc.cond = path[pathindex, 399 | C[[i]]], name = C[[i]], select.colour = select.colour, hist2d = 400 | xcplotpar$hist2d) 401 | coords[i, ] <- par("fig") 402 | } 403 | 404 | setGraphicsEventHandlers( 405 | onMouseDown = mouseclick(), 406 | onKeybd = keystroke()) 407 | getGraphicsEventEnv() 408 | getGraphicsEvent() 409 | } 410 | -------------------------------------------------------------------------------- /R/plotxs.R: -------------------------------------------------------------------------------- 1 | #' @title Visualise a section in data space 2 | #' 3 | #' @description Visualise a section in data space, showing fitted models where 4 | #' they intersect the section, and nearby observations. The \code{weights} for 5 | #' observations can be calculated with \code{\link{similarityweight}}. This 6 | #' function is mainly for use in \code{\link{ceplot}} and 7 | #' \code{\link{condtour}}. 8 | #' 9 | #' @param xs A dataframe with one or two columns. 10 | #' @param y A dataframe with one column. 11 | #' @param xc.cond A dataframe with a single row, with all columns required for 12 | #' passing to \code{\link{predict}} methods of models in \code{model}. 13 | #' @param model A fitted model object, or a list of such objects. 14 | #' @param model.colour Colours for fitted models. If \code{model} is a list, 15 | #' this should be of same length as \code{model}. 16 | #' @param model.lwd Line weight for fitted models. If \code{model} is a list, 17 | #' this should be of same length as \code{model}. 18 | #' @param model.lty Line style for fitted models. If \code{model} is a list, 19 | #' this should be of same length as \code{model}. 20 | #' @param model.name Character labels for models, for legend. 21 | #' @param yhat Fitted values for the observations in \code{y}. Calculated if 22 | #' needed and not provided. Only used if showing residuals, or \code{xs} has 23 | #' two columns. 24 | #' @param mar Margins for plot. 25 | #' @param col Colours for observed data. Should be of length \code{nrow(xs)}. 26 | #' @param weights Similarity weights for observed data. Should be of length 27 | #' \code{nrow(xs)}. Usually calculated with \code{\link{similarityweight}}. 28 | #' @param view3d Logical; if \code{TRUE} plots a three-dimensional 29 | #' regression surface if possible. 30 | #' @param theta3d,phi3d Angles defining the viewing direction. \code{theta3d} 31 | #' gives the azimuthal direction and \code{phi3d} the colatitude. See 32 | #' \code{\link[graphics]{persp}}. 33 | #' @param xs.grid The grid of values defining the part of the section to 34 | #' visualise. Calculated if not provided. 35 | #' @param prednew The \code{y} values where the models in \code{model} intersect 36 | #' the section. Useful when providing \code{theta3d}, \code{phi3d}, or 37 | #' \code{weights}, where the predict methods have been called elsewhere. 38 | #' @param conf Logical; if \code{TRUE} plots confidence bounds (or equivalent) 39 | #' for models which provide this. 40 | #' @param probs Logical; if \code{TRUE}, shows predicted class probabilities 41 | #' instead of just predicted classes. Only available if \code{xs} contains two 42 | #' numeric predictors and the model's predict method provides this. 43 | #' @param pch Plot symbols for observed data 44 | #' @param residuals Logical; if \code{TRUE}, plots a residual versus predictor 45 | #' plot instead of the usual scale of raw response. 46 | #' @param main Character title for plot, default is 47 | #' \code{"Conditional expectation"}. 48 | #' @param xlim Graphical parameter passed to plotting functions. 49 | #' @param ylim Graphical parameter passed to plotting functions. 50 | #' 51 | #' @return A list containing relevant information for updating the plot. 52 | #' 53 | #' @examples 54 | #' data(mtcars) 55 | #' model <- lm(mpg ~ ., data = mtcars) 56 | #' plotxs(xs = mtcars[, "wt", drop = FALSE], y = mtcars[, "mpg", drop = FALSE], 57 | #' xc.cond = mtcars[1, ], model = list(model)) 58 | #' 59 | #' @seealso \code{\link{plotxc}}, \code{\link{ceplot}}, \code{\link{condtour}} 60 | #' 61 | #' @references O'Connell M, Hurley CB and Domijan K (2017). ``Conditional 62 | #' Visualization for Statistical Models: An Introduction to the 63 | #' \strong{condvis} Package in R.''\emph{Journal of Statistical Software}, 64 | #' \strong{81}(5), pp. 1-20. . 65 | 66 | plotxs <- 67 | function (xs, y, xc.cond, model, model.colour = NULL, model.lwd = NULL, 68 | model.lty = NULL, model.name = NULL, yhat = NULL, mar = NULL, col = "black", 69 | weights = NULL, view3d = FALSE, theta3d = 45, phi3d = 20, xs.grid 70 | = NULL, prednew = NULL, conf = FALSE, probs = FALSE, pch = 1, residuals = 71 | FALSE, main = NULL, xlim = NULL, ylim = NULL) 72 | { 73 | ny <- nrow(y) 74 | col <- rep(col, length.out = ny) 75 | main <- if (is.null(main)) 76 | "Conditional expectation" 77 | else main 78 | dev.hold() 79 | 80 | ## If no weights are provided, just show all data with the appropriate colour. 81 | ## Otherwise, adjust the colours according to the weights, find the 82 | ## observations with weights greater than one, and order them for plotting. 83 | 84 | if (is.null(weights)){ 85 | data.order <- 1:ny 86 | data.colour <- col 87 | } else { 88 | if (!identical(length(weights), ny)) 89 | stop("'weights' should be same length as number of observations") 90 | data.colour <- weightcolor(col, weights) 91 | data.order <- attr(data.colour, "order") 92 | } 93 | 94 | ## Organise defaults and check inputs. 95 | 96 | pch <- rep(pch, length.out = ny) 97 | if (ncol(y) != 1) 98 | stop("y must be a dataframe with 1 column") 99 | model <- if (!is.list(model)) 100 | list(model) 101 | else model 102 | model.colour <- if (is.null(model.colour)){ 103 | if (requireNamespace("RColorBrewer", quietly = TRUE)) 104 | RColorBrewer::brewer.pal(n = max(length(model), 3L), name = "Dark2") 105 | else rainbow(max(length(model), 4L)) 106 | } else rep(model.colour, length.out = length(model)) 107 | model.lwd <- if (is.null(model.lwd)) 108 | rep(2, length(model)) 109 | else rep(model.lwd, length.out = length(model)) 110 | model.lty <- if (is.null(model.lty)) 111 | rep(1, length(model)) 112 | else rep(model.lty, length.out = length(model)) 113 | model.name <- if (!is.null(names(model))) 114 | names(model) 115 | else paste("model", seq_along(model), sep = "_") 116 | mar <- if (is.null(mar)) 117 | c(5, 4, 3, 2) 118 | else mar 119 | par(mar = mar) 120 | 121 | ## If xs is NULL, show a univariate summary 122 | 123 | if (is.null(xs) || identical(ncol(xs), 0L)){ 124 | if (is.null(prednew)){ 125 | newdata <- xc.cond 126 | prednew <- lapply(model, predict1, newdata = newdata, ylevels = if ( 127 | nlevels(y[, 1L]) > 2) levels(y[, 1L]) else NULL) 128 | } 129 | fullhist <- hist(y[, 1L], border = NA) 130 | abline(v = unlist(prednew), col = model.colour, lwd = model.lwd, lty = 131 | model.lty) 132 | legend("topright", legend = model.name, col = model.colour, lwd = 133 | model.lwd, lty = model.lty) 134 | box() 135 | plot.type <- "residuals" 136 | } else { 137 | 138 | ## Otherwise, go through the various combinations of xs having one or two 139 | ## columns of factors or numerics, and y being factor or numeric. 140 | 141 | ## 'plot.type's are coded with 2 or 3 letters. The first one refers to the 142 | ## response, and the following letters refer to the predictors. For example, 143 | ## "cfc" refers to a continuous response, one factor predictor and one 144 | ## continuous predictor. 145 | 146 | if (identical(ncol(xs), 1L)){ 147 | # xs has one column 148 | if (is.null(xs.grid)){ 149 | if (!is.factor(xs[, 1L])){ 150 | xs.min <- if (is.null(xlim)) 151 | min(xs[, 1L], na.rm = TRUE) 152 | else xlim[1] 153 | xs.max <- if (is.null(xlim)) 154 | max(xs[, 1L], na.rm = TRUE) 155 | else xlim[2] 156 | xs.grid <- data.frame(seq(xs.min, xs.max, length.out = if (view3d) 20L 157 | else 50L)) 158 | } else { 159 | xs.grid <- data.frame(as.factor(levels(xs[, 1L]))) 160 | } 161 | colnames(xs.grid) <- colnames(xs) 162 | } 163 | newdata <- makenewdata(xs = xs.grid, xc.cond = xc.cond) 164 | if (is.null(prednew)) 165 | prednew <- lapply(model, predict1, newdata = newdata, ylevels = 166 | if (nlevels(y[, 1L]) > 2) levels(y[, 1L]) else NULL) 167 | if (is.factor(xs[, 1L])){ 168 | # xs is a factor 169 | if (is.factor(y[, 1L])){ 170 | # y is factor 171 | plot.type <- "ff" 172 | if (identical(nlevels(y[, 1L]), 2L)){ 173 | plot(unique(xs[, 1L]), rep(-888, length(levels(xs[, 1L]))), col = 174 | NULL, main = main, ylab = paste("Probability 175 | ", colnames(y)[1L], "=", levels(y[, 1L])[2L]), ylim = c(0, 1)) 176 | if (length(data.order) > 0) 177 | points.default((as.numeric(xs[data.order, 1L])) + rnorm(n = 178 | length(data.order), sd = 0.1), (as.integer(y[data.order, 1L]) - 179 | 1) + rnorm(n = length(data.order), sd = 0.01), col = 180 | data.colour[data.order], pch = pch[data.order]) 181 | for (i in seq_along(model)){ 182 | if ("glm" %in% class(model[[i]])){ 183 | points.default(xs.grid[, 1L], prednew[[i]], type = 'l', col = 184 | model.colour[i], lwd = model.lwd[i], lty = model.lty[i]) 185 | } else if (inherits(model[[i]], "gbm")){ 186 | points.default(xs.grid[, 1L], prednew[[i]], type = 'l', col = 187 | model.colour[i], lwd = model.lwd[i], lty = model.lty[i]) 188 | } else { 189 | points.default(xs.grid[, 1L], as.numeric(prednew[[i]]) - 1, type 190 | = 'l', col = model.colour[i], lwd = model.lwd[i], lty = 191 | model.lty[i]) 192 | } 193 | } 194 | } else { 195 | plot(range(as.numeric(xs[, 1L])) + c(0, 0.1 * abs(diff(range( 196 | as.numeric(xs[, 1L])))) ), range(as.integer(y[, 1L])), col = NULL, 197 | xlab = colnames(xs)[1L], ylab = colnames(y)[1L], yaxt = "n", main 198 | = main, xaxt = if (is.factor(xs[, 1L])) "n" 199 | else NULL) 200 | axis(2, at = 1:nlevels(y[, 1L]), labels = levels(y[, 1L])) 201 | if (is.factor(xs[, 1L])) 202 | axis(1, at = 1:nlevels(xs[, 1L]), labels = levels(xs[, 1L])) 203 | if (length(data.order) > 0) 204 | points(as.numeric(xs[data.order, 1L]), as.integer(y[data.order, 205 | 1L]), col = data.colour[data.order], pch = pch[data.order]) 206 | for (i in seq_along(model)){ 207 | points.default(as.numeric(xs.grid[, 1L]), as.integer(prednew[[i]]) 208 | , type = 'l', col = model.colour[i], lwd = model.lwd[i], lty = 209 | model.lty[i]) 210 | } 211 | } 212 | legend("topright", legend = model.name, col = model.colour, lwd = 213 | model.lwd, lty = model.lty) 214 | } else { 215 | # y is continuous 216 | plot.type <- "cf" 217 | plot(unique(xs[, 1L]), rep(-888, length(levels(xs[, 1L]))), col = NULL 218 | , main = main, xlab = colnames(xs)[1L], ylab = 219 | colnames(y)[1L], ylim = if(is.null(ylim)) range(y[, 1L]) else ylim) 220 | if (length(data.order) > 0) 221 | points(xs[data.order, 1L], y[data.order, 1L], col = data.colour[ 222 | data.order], pch = pch[data.order]) 223 | if (conf){ 224 | prednew2 <- lapply(model, confpred, newdata = newdata) 225 | for (i in seq_along(model)){ 226 | points.default(xs.grid[, 1L], prednew[[i]], type = 'l', col = 227 | model.colour[i], lwd = model.lwd[i], lty = model.lty[i]) 228 | if (all(c("lwr", "upr") %in% colnames(prednew2[[i]]))){ 229 | points.default(xs.grid[, 1L], prednew2[[i]][, "lwr"], type = 'l' 230 | , lty = 2, col = model.colour[i], lwd = max(0.8, 0.5 * 231 | model.lwd[i])) 232 | points.default(xs.grid[, 1L], prednew2[[i]][, "upr"], type = 'l' 233 | , lty = 2, col = model.colour[i], lwd = max(0.8, 0.5 * 234 | model.lwd[i])) 235 | } 236 | } 237 | } else { 238 | for (i in seq_along(model)){ 239 | points.default(xs.grid[, 1L], prednew[[i]], type = 'l', col = 240 | model.colour[i], lwd = model.lwd[i], lty = model.lty[i]) 241 | } 242 | } 243 | legend("topright", legend = model.name, col = model.colour, lwd = 244 | model.lwd, lty = model.lty) 245 | } 246 | } else { 247 | #xs is continuous 248 | if (is.factor(y[, 1L])){ 249 | # y is factor 250 | plot.type <- "fc" 251 | if (identical(nlevels(y[, 1L]), 2L)){ 252 | plot(range(xs[, 1L]) + 0.1 * abs(diff(range(xs[, 1L]))), c(0, 0), 253 | col = NULL, main = main, xlab = colnames(xs)[ 254 | 1L], ylab = paste("Probability ", colnames(y)[1L], "=", levels(y[, 255 | 1L])[2L]), ylim = c(0, 1)) 256 | if (length(data.order) > 0) 257 | points.default(xs[data.order, 1L], as.integer(y[data.order, 1L]) - 258 | 1, col = data.colour[data.order], pch = pch[data.order]) 259 | for (i in seq_along(model)){ 260 | if ("glm" %in% class(model[[i]])){ 261 | points.default(xs.grid[, 1L], prednew[[i]], type = 'l', col = 262 | model.colour[i], lwd = model.lwd[i], lty = model.lty[i]) 263 | } else if (inherits(model[[i]], "gbm")){ 264 | points.default(xs.grid[, 1L], prednew[[i]], type = 'l', col = 265 | model.colour[i], lwd = model.lwd[i], lty = model.lty[i]) 266 | } else { 267 | points.default(xs.grid[, 1L], as.numeric(prednew[[i]]) - 1, type 268 | = 'l', col = model.colour[i], lwd = model.lwd[i], lty = 269 | model.lty[i]) 270 | } 271 | } 272 | } else { 273 | plot(range(xs[, 1L]), range(as.integer(y[, 1L])), col = NULL, xlab = 274 | colnames(xs)[1L], ylab = colnames(y)[1L], yaxt = "n", main = 275 | main, xaxt = if (is.factor(xs[, 1L])) "n" 276 | else NULL) 277 | axis(2, at = 1:nlevels(y[, 1L]), labels = levels(y[, 1L])) 278 | if (is.factor(xs[, 1L])) 279 | axis(1, at = 1:nlevels(xs[, 1L]), labels = levels(xs[, 1L])) 280 | if (length(data.order) > 0) 281 | points(xs[data.order, 1L], as.integer(y[data.order, 1L]), col = 282 | data.colour[data.order], pch = pch[data.order]) 283 | for (i in seq_along(model)){ 284 | points.default(as.numeric(xs.grid[, 1L]), as.integer(prednew[[i]]) 285 | , type = 'l', col = model.colour[i], lwd = model.lwd[i], lty = 286 | model.lty[i]) 287 | } 288 | } 289 | legend("topright", legend = model.name, col = model.colour, lwd = 290 | model.lwd, lty = model.lty) 291 | } else { 292 | # y is continuous 293 | plot.type <- "cc" 294 | plot(range(xs[, 1L]), range(y[, 1L]), col = NULL, main = 295 | main, xlab = colnames(xs)[1L], ylab = colnames( 296 | y)[1L], xlim = xlim, ylim = ylim) 297 | if (length(data.order) > 0){ 298 | points(xs[data.order, 1L], y[data.order, 1L], col = data.colour[ 299 | data.order], pch = pch[data.order]) 300 | } 301 | if (conf){ 302 | prednew2 <- lapply(model, confpred, newdata = newdata) 303 | for (i in seq_along(model)){ 304 | points.default(xs.grid[, 1L], prednew[[i]], type = 'l', col = 305 | model.colour[i], lwd = model.lwd[i], lty = model.lty[i]) 306 | if (all(c("lwr", "upr") %in% colnames(prednew2[[i]]))){ 307 | points.default(xs.grid[, 1L], prednew2[[i]][, "lwr"], type = 'l' 308 | , col = model.colour[i], lwd = max(0.8, 0.5 * model.lwd[i]), 309 | lty = 2) 310 | points.default(xs.grid[, 1L], prednew2[[i]][, "upr"], type = 'l' 311 | , col = model.colour[i], lwd = max(0.8, 0.5 * model.lwd[i]), 312 | lty = 2) 313 | } 314 | } 315 | } else { 316 | for (i in seq_along(model)){ 317 | points.default(xs.grid[, 1L], prednew[[i]], type = 'l', col = 318 | model.colour[i], lwd = model.lwd[i], lty = model.lty[i]) 319 | } 320 | } 321 | pos <- if (cor(xs, y) < 0) 322 | "topright" 323 | else "bottomright" 324 | legend(pos, legend = model.name, col = model.colour, lwd = model.lwd, 325 | lty = model.lty) 326 | } 327 | } 328 | } else { 329 | # xs has two columns 330 | arefactorsxs <- vapply(xs, is.factor, logical(1L)) 331 | if (is.null(xs.grid)){ 332 | xs.grid1 <- if (!is.factor(xs[, 1L])) 333 | seq(min(xs[, 1L], na.rm = TRUE), max(xs[, 1L], na.rm = TRUE), 334 | length.out = if (view3d) {20L} else if (probs) 15 else 50L) 335 | else as.factor(levels(xs[, 1L])) 336 | xs.grid2 <- if (!is.factor(xs[, 2L])) 337 | seq(min(xs[, 2L], na.rm = TRUE), max(xs[, 2L], na.rm = TRUE), 338 | length.out = if (view3d) {20L} else if (probs) 15 else 50L) 339 | else as.factor(levels(xs[, 2L])) 340 | xs.grid <- data.frame(rep(xs.grid1, by = length(xs.grid2)), rep(xs.grid2 341 | , each = length(xs.grid1))) 342 | colnames(xs.grid) <- colnames(xs) 343 | } 344 | newdata <- makenewdata(xs = xs.grid, xc.cond = xc.cond) 345 | if (is.null(prednew)) 346 | prednew <- lapply(model, predict1, newdata = newdata, ylevels = if ( 347 | nlevels(y[, 1L]) > 2) levels(y[, 1L]) else NULL) 348 | color <- if (is.factor(y[, 1L])){ 349 | if (identical(nlevels(y[, 1L]), 2L) && inherits(model[[1L]], "glm")){ 350 | factor2color(as.factor(round(prednew[[1L]]))) 351 | } else factor2color(as.factor(prednew[[1L]])) 352 | } else cont2color(prednew[[1L]], range(y[, 1L])) 353 | ybg <- if (length(data.order) > 0){ 354 | if (is.factor(y[, 1L])) 355 | factor2color(y[data.order, 1L]) 356 | else cont2color(y[data.order, 1L], range(y[, 1L])) 357 | } else NULL 358 | if (all(arefactorsxs)){ 359 | # xs are both factors 360 | xrect <- as.integer(xs.grid[, 1L]) 361 | yrect <- as.integer(xs.grid[, 2L]) 362 | xoffset <- abs(diff(unique(xrect)[1:2])) / 2.1 363 | yoffset <- abs(diff(unique(yrect)[1:2])) / 2.1 364 | plot(xrect, yrect, col = NULL, xlab = colnames(xs)[1L], ylab = colnames( 365 | xs)[2L], xlim = c(min(xrect) - xoffset, max(xrect) + xoffset), xaxt = 366 | "n", bty = "n", ylim = c(min(yrect) - yoffset, max(yrect) + yoffset), 367 | yaxt = "n", main = main) 368 | rect(xleft = xrect - xoffset, xright = xrect + xoffset, ybottom = yrect 369 | - yoffset, ytop = yrect + yoffset, col = color) 370 | if (length(data.order) > 0) 371 | points(jitter(as.integer(xs[data.order, 1L]), amount = 0.6 * xoffset), 372 | jitter(as.integer(xs[data.order, 2L]), amount = 0.6 * yoffset), bg = 373 | ybg, col = data.colour[data.order], pch = pch[data.order]) 374 | axis(1L, at = unique(xrect), labels = levels(xs[, 1L]), tick = FALSE) 375 | axis(2L, at = unique(yrect), labels = levels(xs[, 2L]), tick = FALSE) 376 | if (is.factor(y[, 1L])){ 377 | # y is factor 378 | plot.type <- "fff" 379 | } else { 380 | # y is continuous 381 | plot.type <- "cff" 382 | } 383 | } else { 384 | if (any(arefactorsxs)){ 385 | # xs is one factor, one continuous 386 | plot.type <- if (is.factor(y[, 1L])) 387 | "ffc" # y is factor 388 | else "cfc" # y is continuous 389 | xrect <- xs.grid[, !arefactorsxs] 390 | yrect <- as.integer(xs.grid[, arefactorsxs]) 391 | xoffset <- abs(diff(unique(xrect)[1:2])) / 2 392 | yoffset <- abs(diff(unique(yrect)[1:2])) / 2.1 393 | plot(0, 0, col = NULL, xlab = colnames(xs)[!arefactorsxs], ylab = 394 | colnames(xs)[arefactorsxs], xlim = c(min(xrect) - xoffset, max(xrect 395 | ) + xoffset), bty = "n", main = main, ylim = 396 | c(min(yrect) - yoffset, max(yrect) + yoffset), yaxt = "n") 397 | rect(xleft = xrect - xoffset, xright = xrect + xoffset, ybottom = 398 | yrect - yoffset, ytop = yrect + yoffset, col = color, border = NA) 399 | if (length(data.order) > 0) 400 | points(jitter(xs[data.order, !arefactorsxs]), jitter(as.integer(xs[ 401 | data.order, arefactorsxs])), bg = ybg, col = data.colour[ 402 | data.order], pch = pch[data.order]) 403 | axis(2L, at = unique(yrect), labels = levels(xs[, arefactorsxs]), 404 | tick = FALSE) 405 | } else { 406 | # xs are both continuous 407 | if (is.factor(y[, 1L])){ 408 | # y is factor 409 | plot.type <- "fcc" 410 | if (probs){ 411 | plot(range(xs.grid[, 1L]), range(xs.grid[, 2L]), col = NULL, xlab 412 | = colnames(xs)[1L], ylab = colnames(xs)[2L], main = 413 | main) 414 | pred <- predict1(model[[1L]], newdata = newdata, probability = 415 | TRUE, ylevels = levels(y[, 1L])) 416 | p1 <- extractprobs(model[[1L]], pred) 417 | totalwidth <- abs(diff(par()$usr[1:2])) 418 | totalheight <- abs(diff(par()$usr[3:4])) 419 | o1 <- apply(cbind(xs.grid, p1), 1, function (x) myglyph2( 420 | x[1], x[2], 0.6 * totalwidth / 15, 0.6 * totalheight / 421 | 15, x[3:(2 + ncol(p1))], factor2color(as.factor(levels( 422 | y[, 1L]))))) 423 | o2 <- matrix(t(o1), ncol = 5, byrow = FALSE) 424 | rect(xleft = o2[, 1], xright = o2[, 2], ybottom = o2[, 3], 425 | ytop = o2[, 4], col = factor2color(as.factor(levels( 426 | y[, 1L])))[o2[, 5]]) 427 | } else { 428 | xoffset <- abs(diff(unique(xs.grid[, 1L])[1:2])) / 2 429 | yoffset <- abs(diff(unique(xs.grid[, 2L])[1:2])) / 2 430 | plot(range(xs.grid[, 1L]), range(xs.grid[, 2L]), col = NULL, 431 | xlab = colnames(xs)[1L], ylab = colnames(xs)[2L], 432 | main = main) 433 | rect(xleft = xs.grid[, 1L] - xoffset, xright = xs.grid[, 1L] 434 | + xoffset, ybottom = xs.grid[, 2L] - yoffset, ytop = 435 | xs.grid[, 2L] + yoffset, col = color, border = NA) 436 | if (length(data.order) > 0) 437 | points(xs[data.order, , drop = FALSE], bg = ybg, col = 438 | data.colour[data.order], pch = pch[data.order]) 439 | } 440 | } else { 441 | # y is continuous 442 | plot.type <- "ccc" 443 | if (view3d){ 444 | yhat <- if (is.null(yhat)) 445 | lapply(model[1], predict1, ylevels = NULL) 446 | else yhat 447 | z <- matrix(prednew[[1L]], ncol = 20L, byrow = FALSE) 448 | zfacet <- (z[-1, -1] + z[-1, -ncol(z)] + z[-nrow(z), -1] 449 | + z[-nrow(z), -ncol(z)]) / 4 450 | colorfacet <- cont2color(zfacet, range(y[, 1L])) 451 | par(mar = c(3, 3, 3, 3)) 452 | persp.object <- suppressWarnings(persp(x = unique(xs.grid[, 1L]) 453 | , y = unique(xs.grid[, 2L]), border = rgb(0.3, 0.3, 0.3), lwd 454 | = 0.1, z = z, col = colorfacet, zlim = range(y), xlab = 455 | colnames(xs)[1L], ylab = colnames(xs)[2L], zlab = colnames(y)[ 456 | 1L], d = 10, ticktype = "detailed", main = 457 | main, theta = theta3d, phi = phi3d)) 458 | if (length(data.order) > 0){ 459 | points(trans3d(xs[data.order, 1L], xs[data.order, 2L], y[ 460 | data.order, 1L], pmat = persp.object), col = data.colour[ 461 | data.order], pch = pch[data.order]) 462 | linestarts <- trans3d(xs[data.order, 1L], xs[data.order, 2L], 463 | y[data.order, 1L], pmat = persp.object) 464 | lineends <- trans3d(xs[data.order, 1L], xs[data.order, 2L], 465 | yhat[[1]][data.order], pmat = persp.object) 466 | segments(x0 = linestarts$x, y0 = linestarts$y, x1 = lineends$x 467 | , y1 = lineends$y, col = data.colour[data.order]) 468 | } 469 | } else { 470 | xoffset <- abs(diff(unique(xs.grid[, 1L])[1:2])) / 2 471 | yoffset <- abs(diff(unique(xs.grid[, 2L])[1:2])) / 2 472 | plot(range(xs.grid[, 1L]), range(xs.grid[, 2L]), col = NULL, 473 | xlab = colnames(xs)[1L], ylab = colnames(xs)[2L], main = 474 | main) 475 | rect(xleft = xs.grid[, 1L] - xoffset, xright = xs.grid[, 1L] + 476 | xoffset, ybottom = xs.grid[, 2L] - yoffset, ytop = xs.grid[, 477 | 2L] + yoffset, col = color, border = NA) 478 | if (length(data.order) > 0) 479 | points(xs[data.order, , drop = FALSE], bg = ybg, col = 480 | data.colour[data.order], pch = pch[data.order]) 481 | } 482 | } 483 | } 484 | } 485 | } 486 | } 487 | dev.flush() 488 | invisible(structure(list(xs = xs, y = y, xc.cond = xc.cond, model = model, 489 | model.colour = model.colour, model.lwd = model.lwd, model.lty = model.lty, 490 | model.name = model.name, yhat = yhat, mar = par("mar"), data.colour = 491 | data.colour, data.order = data.order, view3d = view3d, theta3d = theta3d, 492 | usr = par("usr"), phi3d = phi3d, plot.type = if (exists("plot.type")) 493 | plot.type else NULL, screen = screen(), device = dev.cur(), xs.grid = 494 | xs.grid, newdata = newdata, prednew = prednew, xs.grid = xs.grid, conf = 495 | conf, probs = probs, pch = pch, col = col, ny = ny), class = "xsplot")) 496 | } 497 | 498 | ## Helper function to produce a legend to go with plotxs 499 | 500 | xslegend <- 501 | function (y, name = NULL) 502 | { 503 | if (is.factor(y)){ 504 | par(mar = c(0, 0, 0, 0)) 505 | legend("left", legend = levels(y), fill = factor2color(as.factor(levels(y))) 506 | , title = if (!is.null(name)) name else "", bg = "white") 507 | } else { 508 | par(mar = c(8, 2.2, 8, 2.2)) 509 | fullrange <- abs(diff(range(y))) 510 | yrange <- seq(min(y, na.rm = TRUE) - 0.15 * fullrange, max(y, na.rm = TRUE) 511 | + 0.15 * fullrange, length.out = 80L) 512 | spacing <- abs(diff(unique(yrange[1:2]))) / 2 513 | plot(0, 0, xaxt = "n", main = if (!is.null(name)) name else "", ylab = "", 514 | col = NULL, pch = 16, xlab = "", bty = "n", xlim = c(-0.5, 0.5), ylim = 515 | range(y)) 516 | rect(xleft = -0.7, xright = 0.7, ybottom = yrange - spacing, ytop = yrange + 517 | spacing, col = cont2color(yrange, range(y)), border = NA) 518 | box() 519 | } 520 | } 521 | --------------------------------------------------------------------------------