├── .github ├── .gitignore └── workflows │ └── R-CMD-check.yaml ├── .gitignore ├── LICENSE ├── tests ├── testall.R └── testthat │ ├── _snaps │ ├── alluvial │ │ └── alluvial.png │ └── alluvial.md │ └── test-alluvial.R ├── tools ├── alluvial-1.png └── alluvial_ts-1.png ├── .Rbuildignore ├── Makefile ├── alluvial.Rproj ├── inst └── CITATION ├── R ├── Refugees.R ├── alluvial.R └── alluvial_ts.R ├── DESCRIPTION ├── vignettes ├── alluvial.bib └── alluvial.Rmd ├── man ├── Refugees.Rd ├── alluvial_ts.Rd └── alluvial.Rd ├── NAMESPACE ├── NEWS.md ├── man-roxygen ├── alluvial_ts.R └── alluvial.R ├── data └── Refugees.csv ├── README.Rmd └── README.md /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2016 2 | COPYRIGHT HOLDER: Michal Bojanowski 3 | -------------------------------------------------------------------------------- /tests/testall.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(alluvial) 3 | 4 | test_check("alluvial") 5 | -------------------------------------------------------------------------------- /tools/alluvial-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbojan/alluvial/HEAD/tools/alluvial-1.png -------------------------------------------------------------------------------- /tools/alluvial_ts-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbojan/alluvial/HEAD/tools/alluvial_ts-1.png -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | man-roxygen 2 | ^.*\.Rproj$ 3 | ^\.Rproj\.user$ 4 | README.Rmd 5 | Makefile 6 | ^\.github$ 7 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/alluvial/alluvial.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mbojan/alluvial/HEAD/tests/testthat/_snaps/alluvial/alluvial.png -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | default: 2 | 3 | %.md: %.Rmd 4 | Rscript -e 'knitr::knit("$<", output="$@")' 5 | 6 | %.html: %.md 7 | pandoc -f markdown_github -o $@ $< 8 | 9 | 10 | default: README.md 11 | 12 | preview: README.html 13 | 14 | .PHONY: default preview 15 | -------------------------------------------------------------------------------- /alluvial.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | year <- sub("-.*", "", meta$Date) 2 | note <- sprintf("R package version: %s", meta$Version) 3 | 4 | bibentry( bibtype="Manual", 5 | title="{alluvial}: R Package for Creating Alluvial Diagrams", 6 | author= c(person("Michal", "Bojanowski"), 7 | person("Robin", "Edwards") ), 8 | year = year, 9 | note = note, 10 | url = "https://github.com/mbojan/alluvial", 11 | mheader="To cite package 'allluvial' in publications use:", 12 | mfooter= paste("We have invested some time and effort in creating this package,", 13 | "please cite it when using it for data analysis or other tasks.", sep = " ") 14 | ) 15 | -------------------------------------------------------------------------------- /R/Refugees.R: -------------------------------------------------------------------------------- 1 | #' Refugees data 2 | #' 3 | #' Top 10 countries/territories of origin (excluding "Various") for period 4 | #' 2003-13 of UNHCR statistics on "Persons recognized as refugees under the 5 | #' 1951 UN Convention/1967 Protocol, the 1969 OAU Convention, in accordance 6 | #' with the UNHCR Statute, persons granted a complementary form of protection 7 | #' and those granted temporary protection." 8 | #' 9 | #' @format 10 | #' Data frame with the following columns: 11 | #' \describe{ 12 | #' \item{country}{Country or territory of origin} 13 | #' \item{year}{Year (2003-13)} 14 | #' \item{refugees}{Persons recognized as refugees under the 1951 UN Convention, etc..} 15 | #' } 16 | #' 17 | #' @source 18 | #' \url{http://data.un.org/Data.aspx?d=UNHCR&f=indID\%3aType-Ref} 19 | #' 20 | #' @docType data 21 | #' @name Refugees 22 | NULL 23 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: alluvial 2 | Type: Package 3 | Title: Alluvial Diagrams 4 | Version: 0.2-0 5 | Date: 2017-04-07 6 | Authors@R: c( 7 | person("Michał", "Bojanowski", role=c("aut", "cre"), email="michal2992@gmail.com", comment=c(ORCID="0000-0001-7503-852X")), 8 | person("Robin", "Edwards", role="aut", email="robin.edwards@ucl.ac.uk") 9 | ) 10 | Description: Creating alluvial diagrams (also known as parallel sets plots) for multivariate 11 | and time series-like data. 12 | URL: https://github.com/mbojan/alluvial 13 | BugReports: https://github.com/mbojan/alluvial/issues 14 | Imports: dplyr, tidyr 15 | Suggests: 16 | devtools, 17 | testthat, 18 | reshape2, 19 | knitr, 20 | rmarkdown, 21 | License: MIT + file LICENSE 22 | LazyLoad: yes 23 | LazyData: yes 24 | Config/testthat/edition: 3 25 | VignetteBuilder: knitr 26 | RoxygenNote: 7.1.2 27 | Encoding: UTF-8 28 | -------------------------------------------------------------------------------- /vignettes/alluvial.bib: -------------------------------------------------------------------------------- 1 | @Manual{r-graphics, 2 | title = {R: A Language and Environment for Statistical Computing}, 3 | author = {{R Core Team}}, 4 | organization = {R Foundation for Statistical Computing}, 5 | address = {Vienna, Austria}, 6 | year = {2021}, 7 | url = {https://www.R-project.org/}, 8 | } 9 | 10 | @Misc{r-ggalluvial, 11 | title = {ggalluvial: Alluvial Plots in 'ggplot2'}, 12 | author = {Jason Cory Brunson and Quentin D. Read}, 13 | year = {2020}, 14 | note = {R package version 0.12.3}, 15 | url = {http://corybrunson.github.io/ggalluvial/}, 16 | } 17 | 18 | @Article{ggalluvial-article, 19 | title = {{ggalluvial}: Layered Grammar for Alluvial Plots}, 20 | author = {Jason Cory Brunson}, 21 | year = {2020}, 22 | journal = {Journal of Open Source Software}, 23 | volume = {5}, 24 | number = {49}, 25 | pages = {2017}, 26 | doi = {10.21105/joss.02017}, 27 | } -------------------------------------------------------------------------------- /man/Refugees.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/Refugees.R 3 | \docType{data} 4 | \name{Refugees} 5 | \alias{Refugees} 6 | \title{Refugees data} 7 | \format{ 8 | Data frame with the following columns: 9 | \describe{ 10 | \item{country}{Country or territory of origin} 11 | \item{year}{Year (2003-13)} 12 | \item{refugees}{Persons recognized as refugees under the 1951 UN Convention, etc..} 13 | } 14 | } 15 | \source{ 16 | \url{http://data.un.org/Data.aspx?d=UNHCR&f=indID\%3aType-Ref} 17 | } 18 | \description{ 19 | Top 10 countries/territories of origin (excluding "Various") for period 20 | 2003-13 of UNHCR statistics on "Persons recognized as refugees under the 21 | 1951 UN Convention/1967 Protocol, the 1969 OAU Convention, in accordance 22 | with the UNHCR Statute, persons granted a complementary form of protection 23 | and those granted temporary protection." 24 | } 25 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(alluvial) 4 | export(alluvial_ts) 5 | importFrom(dplyr,"%>%") 6 | importFrom(dplyr,.data) 7 | importFrom(dplyr,arrange_) 8 | importFrom(dplyr,filter_) 9 | importFrom(dplyr,group_by_) 10 | importFrom(dplyr,select_) 11 | importFrom(dplyr,summarise_) 12 | importFrom(dplyr,ungroup) 13 | importFrom(grDevices,col2rgb) 14 | importFrom(grDevices,extendrange) 15 | importFrom(grDevices,rainbow) 16 | importFrom(grDevices,rgb) 17 | importFrom(graphics,abline) 18 | importFrom(graphics,axis) 19 | importFrom(graphics,lines) 20 | importFrom(graphics,mtext) 21 | importFrom(graphics,par) 22 | importFrom(graphics,plot) 23 | importFrom(graphics,plot.new) 24 | importFrom(graphics,plot.window) 25 | importFrom(graphics,polygon) 26 | importFrom(graphics,rect) 27 | importFrom(graphics,text) 28 | importFrom(graphics,xspline) 29 | importFrom(stats,aggregate) 30 | importFrom(stats,approx) 31 | importFrom(tidyr,gather_) 32 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # 0.2-0 2 | 3 | ## New features 4 | 5 | - `alluvial()` exposes `mar` argument that is passed to `par()` so the user can customize the plot margins (which are rather narrow by default) (#31). 6 | - `alluvial()` has two new arguments `xlim_offset` and `ylim_offset` which are passed to `xlim` and `ylim` of `plot()`. This enables adjusting the size of the plotting region and avoid label clipping in some cirumstances (#32) 7 | - The vignette has a new section on reordering the alluvia, and some other improvements. 8 | 9 | ## Minor updates 10 | 11 | - The vignette dependency on RColorBrewer has been removed. 12 | 13 | 14 | 15 | 16 | # alluvial 0.1-2 17 | 18 | ## New features 19 | 20 | - There is a vignette `vignette("alluvial", package="alluvial")` illustrating basic usage of `alluvial()`. The vignette needs `dplyr` package so it is now `Suggested`. 21 | 22 | ## Minor updates 23 | 24 | - README has been updated. It is dynamically generated from an associated `.Rmd` file. Some typos fixed. 25 | 26 | 27 | # alluvial 0.1-1 28 | 29 | First release. -------------------------------------------------------------------------------- /tests/testthat/test-alluvial.R: -------------------------------------------------------------------------------- 1 | # Oddities in input data -------------------------------------------------- 2 | 3 | test_that("Character vectors in data do not trigger warnings", { 4 | d <- data.frame( 5 | x1=rep(letters[1:2], 2), 6 | x2=rep(letters[1:2], each=2), 7 | freq=1:4 8 | ) 9 | expect_silent({ 10 | pdf(NULL) 11 | alluvial(d[,1:2], freq=d$freq) 12 | dev.off() 13 | } ) 14 | }) 15 | 16 | 17 | 18 | # Output ------------------------------------------------------------------ 19 | 20 | test_that("alluvial() returns proper value", { 21 | d <- data.frame( 22 | x1=rep(letters[1:2], 2), 23 | x2=rep(letters[1:2], each=2), 24 | freq=1:4 25 | ) 26 | pdf(NULL) 27 | expect_snapshot_value(alluvial(d[,1:2], freq=d$freq), style = "deparse") 28 | dev.off() 29 | }) 30 | 31 | test_that("alluvial() produces a proper plot", { 32 | skip_on_cran() 33 | skip_on_ci() 34 | d <- data.frame( 35 | x1=rep(letters[1:2], 2), 36 | x2=rep(letters[1:2], each=2), 37 | freq=1:4 38 | ) 39 | expect_snapshot_file({ 40 | tfile <- tempfile() 41 | png(tfile) 42 | alluvial(d[,1:2], freq=d$freq) 43 | dev.off() 44 | tfile 45 | }, 46 | "alluvial.png" 47 | ) 48 | }) 49 | -------------------------------------------------------------------------------- /tests/testthat/_snaps/alluvial.md: -------------------------------------------------------------------------------- 1 | # alluvial() returns proper value 2 | 3 | list(endpoints = structure(list(x1 = structure(c(1L, 2L, 1L, 4 | 2L, 1L, 2L, 1L, 2L), .Label = c("a", "b"), class = "factor"), 5 | x2 = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("a", 6 | "b"), class = "factor"), .bottom = c(0, 0.43, 0.095, 0.62, 7 | 0, 0.095, 0.335, 0.62), .top = c(0.095, 0.62, 0.38, 1, 0.095, 8 | 0.285, 0.62, 1), .axis = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L 9 | )), row.names = c(NA, -8L), class = "data.frame"), category_midpoints = list( 10 | x1 = structure(c(a = 0.19, b = 0.715), .Dim = 2L, .Dimnames = list( 11 | c("a", "b"))), x2 = structure(c(a = 0.1425, b = 0.6675 12 | ), .Dim = 2L, .Dimnames = list(c("a", "b")))), alluvium_midpoints = structure(list( 13 | x1 = structure(c(1L, 1L, 2L, 2L), .Label = c("a", "b"), class = "factor"), 14 | x2 = structure(c(1L, 2L, 1L, 2L), .Label = c("a", "b"), class = "factor"), 15 | .axis_from = c(1L, 1L, 1L, 1L), .axis_to = c(2L, 2L, 2L, 16 | 2L), .x = c(1.5, 1.5, 1.5, 1.5), .y = c(0.0475, 0.3575, 0.3575, 17 | 0.81), .slope = c(0, 0.266666666666667, -0.372222222222222, 18 | 0)), row.names = c(NA, -4L), class = "data.frame")) 19 | 20 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macOS-latest, r: 'release'} 22 | - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'oldrel-1'} 26 | 27 | env: 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | R_KEEP_PKG_SOURCE: yes 30 | 31 | steps: 32 | - uses: actions/checkout@v2 33 | 34 | - uses: r-lib/actions/setup-pandoc@v1 35 | 36 | - uses: r-lib/actions/setup-r@v1 37 | with: 38 | r-version: ${{ matrix.config.r }} 39 | http-user-agent: ${{ matrix.config.http-user-agent }} 40 | use-public-rspm: true 41 | 42 | - uses: r-lib/actions/setup-r-dependencies@v1 43 | with: 44 | extra-packages: rcmdcheck 45 | 46 | - uses: r-lib/actions/check-r-package@v1 47 | -------------------------------------------------------------------------------- /man-roxygen/alluvial_ts.R: -------------------------------------------------------------------------------- 1 | if( require(reshape2) ) 2 | { 3 | data(Refugees) 4 | reshape2::dcast(Refugees, country ~ year, value.var = 'refugees') 5 | d <- Refugees 6 | 7 | set.seed(39) # for nice colours 8 | cols <- hsv(h = sample(1:10/10), s = sample(3:12)/15, v = sample(3:12)/15) 9 | 10 | alluvial_ts(d) 11 | alluvial_ts(d, wave = .2, ygap = 5, lwd = 3) 12 | alluvial_ts(d, wave = .3, ygap = 5, col = cols) 13 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, rankup = TRUE) 14 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'down') 15 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'centred', grid=TRUE, 16 | grid.lwd = 5) 17 | alluvial_ts(d, wave = 0, ygap = 0, col = cols, alpha = .9, border = 'white', 18 | grid = TRUE, grid.lwd = 5) 19 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.4) 20 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.3, lab.cex = .7) 21 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.3, lab.cex=.7, 22 | leg.cex=.7, leg.col = 'white') 23 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, leg.mode = FALSE, leg.x = .1, 24 | leg.y = .7, leg.max = 3e6) 25 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'centred', alpha=.9, 26 | grid = TRUE, grid.lwd = 5, xmargin = 0.2, lab.cex = .7, xlab = '', 27 | ylab = '', border = NA, axis.cex = .8, leg.cex = .7, 28 | leg.col='white', 29 | title = "UNHCR-recognised refugees\nTop 10 countries (2003-13)\n") 30 | 31 | # non time-series example - Virginia deaths dataset 32 | d <- reshape2::melt(data.frame(age=row.names(VADeaths), VADeaths), id.vars='age')[,c(2,1,3)] 33 | names(d) = c('pop_group','age_group','deaths') 34 | alluvial_ts(d) 35 | } 36 | -------------------------------------------------------------------------------- /man-roxygen/alluvial.R: -------------------------------------------------------------------------------- 1 | # Titanic data 2 | tit <- as.data.frame(Titanic) 3 | 4 | # 2d 5 | tit2d <- aggregate( Freq ~ Class + Survived, data=tit, sum) 6 | alluvial( tit2d[,1:2], freq=tit2d$Freq, xw=0.0, alpha=0.8, 7 | gap.width=0.1, col= "steelblue", border="white", 8 | layer = tit2d$Survived != "Yes" ) 9 | 10 | alluvial( tit2d[,1:2], freq=tit2d$Freq, 11 | hide=tit2d$Freq < 150, 12 | xw=0.0, alpha=0.8, 13 | gap.width=0.1, col= "steelblue", border="white", 14 | layer = tit2d$Survived != "Yes" ) 15 | 16 | # 3d 17 | tit3d <- aggregate( Freq ~ Class + Sex + Survived, data=tit, sum) 18 | 19 | alluvial(tit3d[,1:3], freq=tit3d$Freq, alpha=1, xw=0.2, 20 | col=ifelse( tit3d$Survived == "No", "red", "gray"), 21 | layer = tit3d$Sex != "Female", 22 | border="white") 23 | 24 | 25 | # 4d 26 | alluvial( tit[,1:4], freq=tit$Freq, border=NA, 27 | hide = tit$Freq < quantile(tit$Freq, .50), 28 | col=ifelse( tit$Class == "3rd" & tit$Sex == "Male", "red", "gray") ) 29 | 30 | # 3d example with custom ordering 31 | # Reorder "Sex" axis according to survival status 32 | ord <- list(NULL, with(tit3d, order(Sex, Survived)), NULL) 33 | alluvial(tit3d[,1:3], freq=tit3d$Freq, alpha=1, xw=0.2, 34 | col=ifelse( tit3d$Survived == "No", "red", "gray"), 35 | layer = tit3d$Sex != "Female", 36 | border="white", ordering=ord) 37 | 38 | # Possible blocks options 39 | for (blocks in c(TRUE, FALSE, "bookends")) { 40 | 41 | # Elaborate alluvial diagram from main examples file 42 | alluvial( tit[, 1:4], freq = tit$Freq, border = NA, 43 | hide = tit$Freq < quantile(tit$Freq, .50), 44 | col = ifelse( tit$Class == "3rd" & tit$Sex == "Male", 45 | "red", "gray" ), 46 | blocks = blocks ) 47 | } 48 | 49 | 50 | # Data returned 51 | x <- alluvial( tit2d[,1:2], freq=tit2d$Freq, xw=0.0, alpha=0.8, 52 | gap.width=0.1, col= "steelblue", border="white", 53 | layer = tit2d$Survived != "Yes" ) 54 | points( rep(1, 16), x$endpoints[[1]], col="green") 55 | points( rep(2, 16), x$endpoints[[2]], col="blue") 56 | -------------------------------------------------------------------------------- /data/Refugees.csv: -------------------------------------------------------------------------------- 1 | "country";"year";"refugees" 2 | "Afghanistan";2003;2136043 3 | "Burundi";2003;531637 4 | "Congo DRC";2003;453465 5 | "Iraq";2003;368580 6 | "Myanmar";2003;151384 7 | "Palestine";2003;350568 8 | "Somalia";2003;402336 9 | "Sudan";2003;606242 10 | "Syria";2003;20819 11 | "Vietnam";2003;363179 12 | "Afghanistan";2004;2084109 13 | "Burundi";2004;485454 14 | "Congo DRC";2004;461042 15 | "Iraq";2004;311905 16 | "Myanmar";2004;161013 17 | "Palestine";2004;350617 18 | "Somalia";2004;389304 19 | "Sudan";2004;730647 20 | "Syria";2004;21440 21 | "Vietnam";2004;349809 22 | "Afghanistan";2005;2166149 23 | "Burundi";2005;438706 24 | "Congo DRC";2005;430929 25 | "Iraq";2005;262299 26 | "Myanmar";2005;164864 27 | "Palestine";2005;349673 28 | "Somalia";2005;395553 29 | "Sudan";2005;693632 30 | "Syria";2005;16401 31 | "Vietnam";2005;358268 32 | "Afghanistan";2006;2107519 33 | "Burundi";2006;396541 34 | "Congo DRC";2006;401914 35 | "Iraq";2006;1450905 36 | "Myanmar";2006;202826 37 | "Palestine";2006;334142 38 | "Somalia";2006;464252 39 | "Sudan";2006;686311 40 | "Syria";2006;12338 41 | "Vietnam";2006;374279 42 | "Afghanistan";2007;1909911 43 | "Burundi";2007;375715 44 | "Congo DRC";2007;370386 45 | "Iraq";2007;2279245 46 | "Myanmar";2007;191256 47 | "Palestine";2007;335219 48 | "Somalia";2007;455356 49 | "Sudan";2007;523032 50 | "Syria";2007;13671 51 | "Vietnam";2007;327776 52 | "Afghanistan";2008;1817913 53 | "Burundi";2008;281592 54 | "Congo DRC";2008;367995 55 | "Iraq";2008;1873519 56 | "Myanmar";2008;184347 57 | "Palestine";2008;333990 58 | "Somalia";2008;559153 59 | "Sudan";2008;397013 60 | "Syria";2008;15186 61 | "Vietnam";2008;328183 62 | "Afghanistan";2009;1905804 63 | "Burundi";2009;94239 64 | "Congo DRC";2009;455852 65 | "Iraq";2009;1785212 66 | "Myanmar";2009;206650 67 | "Palestine";2009;95177 68 | "Somalia";2009;678308 69 | "Sudan";2009;348500 70 | "Syria";2009;17884 71 | "Vietnam";2009;339289 72 | "Afghanistan";2010;3054709 73 | "Burundi";2010;84064 74 | "Congo DRC";2010;476693 75 | "Iraq";2010;1683575 76 | "Myanmar";2010;215644 77 | "Palestine";2010;93299 78 | "Somalia";2010;770148 79 | "Sudan";2010;379067 80 | "Syria";2010;18428 81 | "Vietnam";2010;338698 82 | "Afghanistan";2011;2664436 83 | "Burundi";2011;101288 84 | "Congo DRC";2011;491481 85 | "Iraq";2011;1428308 86 | "Myanmar";2011;214594 87 | "Palestine";2011;94121 88 | "Somalia";2011;1075148 89 | "Sudan";2011;491013 90 | "Syria";2011;19900 91 | "Vietnam";2011;337829 92 | "Afghanistan";2012;2586034 93 | "Burundi";2012;73362 94 | "Congo DRC";2012;509082 95 | "Iraq";2012;746181 96 | "Myanmar";2012;215338 97 | "Palestine";2012;94820 98 | "Somalia";2012;1136713 99 | "Sudan";2012;558195 100 | "Syria";2012;728603 101 | "Vietnam";2012;336939 102 | "Afghanistan";2013;2556507 103 | "Burundi";2013;72652 104 | "Congo DRC";2013;499320 105 | "Iraq";2013;401384 106 | "Myanmar";2013;222053 107 | "Palestine";2013;96044 108 | "Somalia";2013;1121772 109 | "Sudan";2013;636400 110 | "Syria";2013;2457255 111 | "Vietnam";2013;314105 112 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: 3 | github_document: 4 | html_preview: false 5 | bibliography: vignettes/alluvial.bib 6 | editor_options: 7 | chunk_output_type: console 8 | --- 9 | 10 | ```{r setup, include=FALSE, cache=FALSE} 11 | library(alluvial) 12 | library(dplyr) 13 | 14 | knitr::opts_chunk$set( 15 | fig.width=8, 16 | fig.height=6, 17 | fig.path = "tools/" 18 | ) 19 | 20 | knitr::render_markdown(strict=FALSE) 21 | ``` 22 | 23 | 24 | 25 | # R package for drawing alluvial diagrams 26 | 27 | 28 | [![R-CMD-check](https://github.com/mbojan/alluvial/workflows/R-CMD-check/badge.svg)](https://github.com/mbojan/alluvial/actions) 29 | [![rstudio mirror downloads](http://cranlogs.r-pkg.org/badges/alluvial?color=2ED968)](http://cranlogs.r-pkg.org/) 30 | [![cran version](http://www.r-pkg.org/badges/version/alluvial)](https://cran.r-project.org/package=alluvial) 31 | 32 | 33 | What are alluvial diagrams? See for example: 34 | 35 | * [Wikipedia](http://en.wikipedia.org/wiki/Alluvial_diagram) 36 | * My [blog post](http://bc.bojanorama.pl/2014/03/alluvial-diagrams) showing-off this package 37 | * Some discussion on [CrossValidated](http://stats.stackexchange.com/questions/12029/is-it-possible-to-create-parallel-sets-plot-using-r) 38 | 39 | This package use base R **graphics** [@r-graphics] package. For Grammar of Graphics implementation see [**ggalluvial**](https://github.com/corybrunson/ggalluvial) [@r-ggalluvial;@ggalluvial-article]. 40 | 41 | 42 | ## Examples 43 | 44 | Alluvial diagram of `datasets::Titanic` data made with `alluvial()`. Notice how each category block becomes a stacked barchart showing relative frequency of survivors. 45 | 46 | ```{r alluvial} 47 | tit <- tibble::as_tibble(Titanic) 48 | 49 | tit %>% head() %>% knitr::kable() 50 | 51 | alluvial( 52 | select(tit, Survived, Sex, Age, Class), 53 | freq=tit$n, 54 | col = ifelse(tit$Survived == "Yes", "orange", "grey"), 55 | border = ifelse(tit$Survived == "Yes", "orange", "grey"), 56 | layer = tit$Survived != "Yes", 57 | alpha = 0.8, 58 | blocks=FALSE 59 | ) 60 | ``` 61 | 62 | 63 | 64 | 65 | 66 | Alluvial diagram for multiple time series / cross-sectional data based on `alluvial::Refugees` data made with `alluvial_ts()`. 67 | 68 | ```{r alluvial_ts} 69 | Refugees %>% head() %>% knitr::kable() 70 | 71 | 72 | set.seed(39) # for nice colours 73 | cols <- hsv(h = sample(1:10/10), s = sample(3:12)/15, v = sample(3:12)/15) 74 | 75 | alluvial_ts(Refugees, wave = .3, ygap = 5, col = cols, plotdir = 'centred', alpha=.9, 76 | grid = TRUE, grid.lwd = 5, xmargin = 0.2, lab.cex = .7, xlab = '', 77 | ylab = '', border = NA, axis.cex = .8, leg.cex = .7, 78 | leg.col='white', 79 | title = "UNHCR-recognised refugees\nTop 10 countries (2003-13)\n") 80 | ``` 81 | 82 | 83 | 84 | ## Installation 85 | 86 | Install stable version from CRAN using 87 | 88 | ```{r, eval=FALSE} 89 | install.packages("alluvial") 90 | ``` 91 | 92 | or development version from GitHub using `remotes::install_github()`: 93 | 94 | ```{r, eval=FALSE} 95 | remotes::install_github("mbojan/alluvial", build_vignettes=TRUE) 96 | ``` 97 | 98 | 99 | 100 | 101 | ## References 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # R package for drawing alluvial diagrams 3 | 4 | 5 | 6 | [![R-CMD-check](https://github.com/mbojan/alluvial/workflows/R-CMD-check/badge.svg)](https://github.com/mbojan/alluvial/actions) 7 | [![rstudio mirror 8 | downloads](http://cranlogs.r-pkg.org/badges/alluvial?color=2ED968)](http://cranlogs.r-pkg.org/) 9 | [![cran 10 | version](http://www.r-pkg.org/badges/version/alluvial)](https://cran.r-project.org/package=alluvial) 11 | 12 | 13 | What are alluvial diagrams? See for example: 14 | 15 | - [Wikipedia](http://en.wikipedia.org/wiki/Alluvial_diagram) 16 | - My [blog post](http://bc.bojanorama.pl/2014/03/alluvial-diagrams) 17 | showing-off this package 18 | - Some discussion on 19 | [CrossValidated](http://stats.stackexchange.com/questions/12029/is-it-possible-to-create-parallel-sets-plot-using-r) 20 | 21 | This package use base R **graphics** (R Core Team 2021) package. For 22 | Grammar of Graphics implementation see 23 | [**ggalluvial**](https://github.com/corybrunson/ggalluvial) (Brunson and 24 | Read 2020; Brunson 2020). 25 | 26 | ## Examples 27 | 28 | Alluvial diagram of `datasets::Titanic` data made with `alluvial()`. 29 | Notice how each category block becomes a stacked barchart showing 30 | relative frequency of survivors. 31 | 32 | ``` r 33 | tit <- tibble::as_tibble(Titanic) 34 | 35 | tit %>% head() %>% knitr::kable() 36 | ``` 37 | 38 | | Class | Sex | Age | Survived | n | 39 | |:------|:-------|:------|:---------|----:| 40 | | 1st | Male | Child | No | 0 | 41 | | 2nd | Male | Child | No | 0 | 42 | | 3rd | Male | Child | No | 35 | 43 | | Crew | Male | Child | No | 0 | 44 | | 1st | Female | Child | No | 0 | 45 | | 2nd | Female | Child | No | 0 | 46 | 47 | ``` r 48 | alluvial( 49 | select(tit, Survived, Sex, Age, Class), 50 | freq=tit$n, 51 | col = ifelse(tit$Survived == "Yes", "orange", "grey"), 52 | border = ifelse(tit$Survived == "Yes", "orange", "grey"), 53 | layer = tit$Survived != "Yes", 54 | alpha = 0.8, 55 | blocks=FALSE 56 | ) 57 | ``` 58 | 59 | ![](tools/alluvial-1.png) 60 | 61 | Alluvial diagram for multiple time series / cross-sectional data based 62 | on `alluvial::Refugees` data made with `alluvial_ts()`. 63 | 64 | ``` r 65 | Refugees %>% head() %>% knitr::kable() 66 | ``` 67 | 68 | | country | year | refugees | 69 | |:------------|-----:|---------:| 70 | | Afghanistan | 2003 | 2136043 | 71 | | Burundi | 2003 | 531637 | 72 | | Congo DRC | 2003 | 453465 | 73 | | Iraq | 2003 | 368580 | 74 | | Myanmar | 2003 | 151384 | 75 | | Palestine | 2003 | 350568 | 76 | 77 | ``` r 78 | set.seed(39) # for nice colours 79 | cols <- hsv(h = sample(1:10/10), s = sample(3:12)/15, v = sample(3:12)/15) 80 | 81 | alluvial_ts(Refugees, wave = .3, ygap = 5, col = cols, plotdir = 'centred', alpha=.9, 82 | grid = TRUE, grid.lwd = 5, xmargin = 0.2, lab.cex = .7, xlab = '', 83 | ylab = '', border = NA, axis.cex = .8, leg.cex = .7, 84 | leg.col='white', 85 | title = "UNHCR-recognised refugees\nTop 10 countries (2003-13)\n") 86 | ``` 87 | 88 | ![](tools/alluvial_ts-1.png) 89 | 90 | ## Installation 91 | 92 | Install stable version from CRAN using 93 | 94 | ``` r 95 | install.packages("alluvial") 96 | ``` 97 | 98 | or development version from GitHub using `remotes::install_github()`: 99 | 100 | ``` r 101 | remotes::install_github("mbojan/alluvial", build_vignettes=TRUE) 102 | ``` 103 | 104 | ## References 105 | 106 |
107 | 108 |
109 | 110 | Brunson, Jason Cory. 2020. “ggalluvial: 111 | Layered Grammar for Alluvial Plots.” *Journal of Open Source Software* 5 112 | (49): 2017. . 113 | 114 |
115 | 116 |
117 | 118 | Brunson, Jason Cory, and Quentin D. Read. 2020. “Ggalluvial: Alluvial 119 | Plots in ’Ggplot2’.” . 120 | 121 |
122 | 123 |
124 | 125 | R Core Team. 2021. *R: A Language and Environment for Statistical 126 | Computing*. Vienna, Austria: R Foundation for Statistical Computing. 127 | . 128 | 129 |
130 | 131 |
132 | -------------------------------------------------------------------------------- /man/alluvial_ts.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/alluvial_ts.R 3 | \name{alluvial_ts} 4 | \alias{alluvial_ts} 5 | \title{Alluvial diagram for multiple time series data} 6 | \usage{ 7 | alluvial_ts( 8 | dat, 9 | wave = NA, 10 | ygap = 1, 11 | col = NA, 12 | alpha = NA, 13 | plotdir = "up", 14 | rankup = FALSE, 15 | lab.cex = 1, 16 | lab.col = "black", 17 | xmargin = 0.1, 18 | axis.col = "black", 19 | title = NA, 20 | title.cex = 1, 21 | axis.cex = 1, 22 | grid = FALSE, 23 | grid.col = "grey80", 24 | grid.lwd = 1, 25 | leg.mode = TRUE, 26 | leg.x = 0.1, 27 | leg.y = 0.9, 28 | leg.cex = 1, 29 | leg.col = "black", 30 | leg.lty = NA, 31 | leg.lwd = NA, 32 | leg.max = NA, 33 | xlab = NA, 34 | ylab = NA, 35 | xlab.pos = 2, 36 | ylab.pos = 1, 37 | lwd = 1, 38 | ... 39 | ) 40 | } 41 | \arguments{ 42 | \item{dat}{data.frame of time-series (or suitable equivalent continuously disaggregated data), with 3 columns (in order: category, time-variable, value) with <= 1 row for each category-time combination} 43 | 44 | \item{wave}{numeric, curve wavyness defined in terms of x axis data range - i.e. bezier point offset. Experiment to get this right} 45 | 46 | \item{ygap}{numeric, vertical distance between polygons - a multiple of 10\% of the mean data value} 47 | 48 | \item{col}{colour, value or vector of length matching the number of unique categories. Individual colours of vector are mapped to categories in alpha-numeric order} 49 | 50 | \item{alpha}{numeric, [0,1] polygon fill transparency} 51 | 52 | \item{plotdir}{character, string ('up', 'down' or 'centred') giving the vertical alignment of polygon stacks} 53 | 54 | \item{rankup}{logical, rank polygons on time axes upward by magnitude (largest to smallest) or not} 55 | 56 | \item{lab.cex}{numeric, category label font size} 57 | 58 | \item{lab.col}{colour, of category label} 59 | 60 | \item{xmargin}{numeric [0,1], proportional space for category labels} 61 | 62 | \item{axis.col}{colour, of axes} 63 | 64 | \item{title}{character, plot title} 65 | 66 | \item{title.cex}{numeric, plot title font size} 67 | 68 | \item{axis.cex}{numeric, font size of x-axis break labels} 69 | 70 | \item{grid}{logical, plot vertical axes} 71 | 72 | \item{grid.col}{colour, of grid axes} 73 | 74 | \item{grid.lwd}{numeric, line width of grid axes} 75 | 76 | \item{leg.mode}{logical, draw y-axis scale legend inside largest data point (TRUE default) or alternatively with custom position/value (FALSE)} 77 | 78 | \item{leg.x, leg.y}{numeric [0,1], x/y positions of legend if leg.mode = FALSE} 79 | 80 | \item{leg.cex}{numeric, legend text size} 81 | 82 | \item{leg.col}{colour, of legend lines and text} 83 | 84 | \item{leg.lty}{numeric, code for legend line type} 85 | 86 | \item{leg.lwd}{numeric, legend line width} 87 | 88 | \item{leg.max}{numeric, legend scale line width} 89 | 90 | \item{xlab, ylab}{character, x-axis / y-axis titles} 91 | 92 | \item{xlab.pos, ylab.pos}{numeric, perpendicular offset for axis titles} 93 | 94 | \item{lwd}{numeric, value or vector of length matching the number of unique categories for polygon stroke line width. Individual values of vector are mapped to categories in alpha-numeric order} 95 | 96 | \item{...}{arguments to pass to polygon()} 97 | } 98 | \description{ 99 | This is a variant of alluvial diagram suitable for multiple 100 | (cross-sectional) time series. It also works with continuous variables equivalent to time 101 | } 102 | \examples{ 103 | if( require(reshape2) ) 104 | { 105 | data(Refugees) 106 | reshape2::dcast(Refugees, country ~ year, value.var = 'refugees') 107 | d <- Refugees 108 | 109 | set.seed(39) # for nice colours 110 | cols <- hsv(h = sample(1:10/10), s = sample(3:12)/15, v = sample(3:12)/15) 111 | 112 | alluvial_ts(d) 113 | alluvial_ts(d, wave = .2, ygap = 5, lwd = 3) 114 | alluvial_ts(d, wave = .3, ygap = 5, col = cols) 115 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, rankup = TRUE) 116 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'down') 117 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'centred', grid=TRUE, 118 | grid.lwd = 5) 119 | alluvial_ts(d, wave = 0, ygap = 0, col = cols, alpha = .9, border = 'white', 120 | grid = TRUE, grid.lwd = 5) 121 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.4) 122 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.3, lab.cex = .7) 123 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.3, lab.cex=.7, 124 | leg.cex=.7, leg.col = 'white') 125 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, leg.mode = FALSE, leg.x = .1, 126 | leg.y = .7, leg.max = 3e6) 127 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'centred', alpha=.9, 128 | grid = TRUE, grid.lwd = 5, xmargin = 0.2, lab.cex = .7, xlab = '', 129 | ylab = '', border = NA, axis.cex = .8, leg.cex = .7, 130 | leg.col='white', 131 | title = "UNHCR-recognised refugees\nTop 10 countries (2003-13)\n") 132 | 133 | # non time-series example - Virginia deaths dataset 134 | d <- reshape2::melt(data.frame(age=row.names(VADeaths), VADeaths), id.vars='age')[,c(2,1,3)] 135 | names(d) = c('pop_group','age_group','deaths') 136 | alluvial_ts(d) 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /man/alluvial.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/alluvial.R 3 | \name{alluvial} 4 | \alias{alluvial} 5 | \title{Alluvial diagram} 6 | \usage{ 7 | alluvial( 8 | ..., 9 | freq, 10 | col = "gray", 11 | border = 0, 12 | layer, 13 | hide = FALSE, 14 | alpha = 0.5, 15 | gap.width = 0.05, 16 | xw = 0.1, 17 | cw = 0.1, 18 | blocks = TRUE, 19 | ordering = NULL, 20 | axis_labels = NULL, 21 | mar = c(2, 1, 1, 1), 22 | cex = par("cex"), 23 | xlim_offset = c(0, 0), 24 | ylim_offset = c(0, 0), 25 | cex.axis = par("cex.axis"), 26 | axes = TRUE, 27 | ann = TRUE, 28 | title = NULL 29 | ) 30 | } 31 | \arguments{ 32 | \item{...}{vectors or data frames, all for the same number of observations} 33 | 34 | \item{freq}{numeric, vector of frequencies of the same length as the number of observations} 35 | 36 | \item{col}{vector of colors of the stripes} 37 | 38 | \item{border}{vector of border colors for the stripes} 39 | 40 | \item{layer}{numeric, order of drawing of the stripes} 41 | 42 | \item{hide}{logical, should particular stripe be plotted} 43 | 44 | \item{alpha}{numeric, vector of transparency of the stripes} 45 | 46 | \item{gap.width}{numeric, relative width of inter-category gaps} 47 | 48 | \item{xw}{numeric, the distance from the set axis to the control points of the xspline} 49 | 50 | \item{cw}{numeric, width of the category axis} 51 | 52 | \item{blocks}{logical, whether to use blocks to tie the flows together at each category, versus contiguous ribbons (also admits character value "bookends")} 53 | 54 | \item{ordering}{list of numeric vectors allowing to reorder the alluvia on each axis separately, see Examples} 55 | 56 | \item{axis_labels}{character, labels of the axes, defaults to variable names in the data} 57 | 58 | \item{mar}{numeric, plot margins as in \code{\link{par}}} 59 | 60 | \item{cex, cex.axis}{numeric, scaling of fonts of category labels and axis labels respectively. See \code{\link{par}}.} 61 | 62 | \item{xlim_offset, ylim_offset}{numeric vectors of length 2, passed to 63 | \code{xlim} and \code{ylim} of \code{\link{plot}}, and allow for adjusting 64 | the limits of the plotting region} 65 | 66 | \item{axes}{logical, whether to draw axes, defaults to TRUE} 67 | 68 | \item{ann}{logical, whether to draw annotations: category labels. Defaults to TRUE} 69 | 70 | \item{title}{character, plot title} 71 | } 72 | \value{ 73 | Invisibly a list with elements: 74 | \item{\code{endpoints}}{A data frame with data on locations of the stripes with columns: 75 | \describe{ 76 | \item{\code{...}}{Vectors/data frames supplied to 77 | \code{alluvial} through \code{...} that define the axes} 78 | \item{\code{.bottom},\code{.top}}{Y locations of bottom 79 | and top coordinates respectively at which the stripes 80 | originate from the axis \code{.axis}} 81 | \item{\code{.axis}}{Axis number counting from the left} 82 | } } 83 | \item{\code{category_midpoints}}{List of vectors of Y 84 | locations of category block midpoints.} 85 | \item{\code{alluvium_midpoints}}{A data frame with 86 | location of midpoints on each alluvium segement with 87 | columns: 88 | \describe{ 89 | \item{\code{...}}{Vectors/data frames supplied to 90 | \code{alluvial} through the \code{...}} 91 | \item{\code{.axis_from}, \code{.axis_to}}{IDs of axes that 92 | a segment originates from and goes to} 93 | \item{\code{.x}, \code{.y}}{X and Y locations of the alluvium midpoints} 94 | \item{\code{.slope}}{The (approximate) slope of the alluvium at the midpoint} 95 | } 96 | } 97 | } 98 | \description{ 99 | Drawing alluvial diagrams, also known as parallel set plots. 100 | } 101 | \note{ 102 | Please mind that the API is planned to change to be more compatible 103 | with \pkg{dplyr} verbs. 104 | } 105 | \examples{ 106 | # Titanic data 107 | tit <- as.data.frame(Titanic) 108 | 109 | # 2d 110 | tit2d <- aggregate( Freq ~ Class + Survived, data=tit, sum) 111 | alluvial( tit2d[,1:2], freq=tit2d$Freq, xw=0.0, alpha=0.8, 112 | gap.width=0.1, col= "steelblue", border="white", 113 | layer = tit2d$Survived != "Yes" ) 114 | 115 | alluvial( tit2d[,1:2], freq=tit2d$Freq, 116 | hide=tit2d$Freq < 150, 117 | xw=0.0, alpha=0.8, 118 | gap.width=0.1, col= "steelblue", border="white", 119 | layer = tit2d$Survived != "Yes" ) 120 | 121 | # 3d 122 | tit3d <- aggregate( Freq ~ Class + Sex + Survived, data=tit, sum) 123 | 124 | alluvial(tit3d[,1:3], freq=tit3d$Freq, alpha=1, xw=0.2, 125 | col=ifelse( tit3d$Survived == "No", "red", "gray"), 126 | layer = tit3d$Sex != "Female", 127 | border="white") 128 | 129 | 130 | # 4d 131 | alluvial( tit[,1:4], freq=tit$Freq, border=NA, 132 | hide = tit$Freq < quantile(tit$Freq, .50), 133 | col=ifelse( tit$Class == "3rd" & tit$Sex == "Male", "red", "gray") ) 134 | 135 | # 3d example with custom ordering 136 | # Reorder "Sex" axis according to survival status 137 | ord <- list(NULL, with(tit3d, order(Sex, Survived)), NULL) 138 | alluvial(tit3d[,1:3], freq=tit3d$Freq, alpha=1, xw=0.2, 139 | col=ifelse( tit3d$Survived == "No", "red", "gray"), 140 | layer = tit3d$Sex != "Female", 141 | border="white", ordering=ord) 142 | 143 | # Possible blocks options 144 | for (blocks in c(TRUE, FALSE, "bookends")) { 145 | 146 | # Elaborate alluvial diagram from main examples file 147 | alluvial( tit[, 1:4], freq = tit$Freq, border = NA, 148 | hide = tit$Freq < quantile(tit$Freq, .50), 149 | col = ifelse( tit$Class == "3rd" & tit$Sex == "Male", 150 | "red", "gray" ), 151 | blocks = blocks ) 152 | } 153 | 154 | 155 | # Data returned 156 | x <- alluvial( tit2d[,1:2], freq=tit2d$Freq, xw=0.0, alpha=0.8, 157 | gap.width=0.1, col= "steelblue", border="white", 158 | layer = tit2d$Survived != "Yes" ) 159 | points( rep(1, 16), x$endpoints[[1]], col="green") 160 | points( rep(2, 16), x$endpoints[[2]], col="blue") 161 | } 162 | -------------------------------------------------------------------------------- /R/alluvial.R: -------------------------------------------------------------------------------- 1 | #' Alluvial diagram 2 | #' 3 | #' Drawing alluvial diagrams, also known as parallel set plots. 4 | #' 5 | #' @param ... vectors or data frames, all for the same number of observations 6 | #' @param freq numeric, vector of frequencies of the same length as the number of observations 7 | #' @param col vector of colors of the stripes 8 | #' @param border vector of border colors for the stripes 9 | #' @param layer numeric, order of drawing of the stripes 10 | #' @param hide logical, should particular stripe be plotted 11 | #' @param alpha numeric, vector of transparency of the stripes 12 | #' @param gap.width numeric, relative width of inter-category gaps 13 | #' @param xw numeric, the distance from the set axis to the control points of the xspline 14 | #' @param cw numeric, width of the category axis 15 | #' @param blocks logical, whether to use blocks to tie the flows together at each category, versus contiguous ribbons (also admits character value "bookends") 16 | #' @param ordering list of numeric vectors allowing to reorder the alluvia on each axis separately, see Examples 17 | #' @param axis_labels character, labels of the axes, defaults to variable names in the data 18 | #' @param mar numeric, plot margins as in \code{\link{par}} 19 | #' @param cex,cex.axis numeric, scaling of fonts of category labels and axis labels respectively. See \code{\link{par}}. 20 | #' @param xlim_offset,ylim_offset numeric vectors of length 2, passed to 21 | #' \code{xlim} and \code{ylim} of \code{\link{plot}}, and allow for adjusting 22 | #' the limits of the plotting region 23 | #' @param axes logical, whether to draw axes, defaults to TRUE 24 | #' @param ann logical, whether to draw annotations: category labels. Defaults to TRUE 25 | #' @param title character, plot title 26 | #' 27 | #' @return Invisibly a list with elements: 28 | #' \item{\code{endpoints}}{A data frame with data on locations of the stripes with columns: 29 | #' \describe{ 30 | #' \item{\code{...}}{Vectors/data frames supplied to 31 | #' \code{alluvial} through \code{...} that define the axes} 32 | #' \item{\code{.bottom},\code{.top}}{Y locations of bottom 33 | #' and top coordinates respectively at which the stripes 34 | #' originate from the axis \code{.axis}} 35 | #' \item{\code{.axis}}{Axis number counting from the left} 36 | #' } } 37 | #' \item{\code{category_midpoints}}{List of vectors of Y 38 | #' locations of category block midpoints.} 39 | #' \item{\code{alluvium_midpoints}}{A data frame with 40 | #' location of midpoints on each alluvium segement with 41 | #' columns: 42 | #' \describe{ 43 | #' \item{\code{...}}{Vectors/data frames supplied to 44 | #' \code{alluvial} through the \code{...}} 45 | #' \item{\code{.axis_from}, \code{.axis_to}}{IDs of axes that 46 | #' a segment originates from and goes to} 47 | #' \item{\code{.x}, \code{.y}}{X and Y locations of the alluvium midpoints} 48 | #' \item{\code{.slope}}{The (approximate) slope of the alluvium at the midpoint} 49 | #' } 50 | #' } 51 | #' 52 | #' @note Please mind that the API is planned to change to be more compatible 53 | #' with \pkg{dplyr} verbs. 54 | #' 55 | #' @importFrom grDevices col2rgb rgb 56 | #' @importFrom graphics plot xspline axis rect polygon text par 57 | #' @importFrom dplyr select_ group_by_ arrange_ filter_ "%>%" ungroup summarise_ .data 58 | #' @importFrom tidyr gather_ 59 | #' 60 | #' @export 61 | #' 62 | #' @example man-roxygen/alluvial.R 63 | 64 | alluvial <- function( ..., freq, 65 | col="gray", border=0, layer, hide=FALSE, alpha=0.5, 66 | gap.width=0.05, xw=0.1, cw=0.1, 67 | blocks = TRUE, 68 | ordering=NULL, 69 | axis_labels=NULL, 70 | mar = c(2, 1, 1, 1), 71 | cex=par("cex"), 72 | xlim_offset= c(0, 0), 73 | ylim_offset= c(0, 0), 74 | cex.axis=par("cex.axis"), 75 | axes=TRUE, 76 | ann=TRUE, 77 | title = NULL) 78 | { 79 | # Data and graphical parameters 80 | p <- data.frame( ..., freq=freq, col, alpha, border, hide, stringsAsFactors=FALSE) 81 | np <- ncol(p) - 5 # Number of dimensions 82 | # check if 'ordering' is of proper form 83 | if( !is.null(ordering) ) 84 | { 85 | stopifnot(is.list(ordering)) 86 | if( length(ordering) != np ) 87 | stop("'ordering' argument should have ", 88 | np, " components, has ", length(ordering)) 89 | } 90 | n <- nrow(p) 91 | # Layers determine plotting order 92 | if(missing(layer)) 93 | { 94 | layer <- 1:n 95 | } 96 | p$layer <- layer 97 | d <- p[ , 1:np, drop=FALSE] # Dimensions dframe 98 | p <- p[ , -c(1:np), drop=FALSE] # Parameteres dframe 99 | p$freq <- with(p, freq/sum(freq)) # Frequencies (weights) 100 | # Converting colors to hexcodes 101 | col <- col2rgb(p$col, alpha=TRUE) 102 | if(!identical(alpha, FALSE)) { 103 | col["alpha", ] <- p$alpha*256 104 | } 105 | p$col <- apply(col, 2, function(x) do.call(rgb, c(as.list(x), maxColorValue = 256))) 106 | # convert character vectors in data to factors 107 | isch <- sapply(d, is.character) 108 | d[isch] <- lapply(d[isch], as.factor) 109 | # Convert blocks to vector 110 | if (length(blocks) == 1) 111 | { 112 | blocks <- if (!is.na(as.logical(blocks))) 113 | { 114 | rep(blocks, np) 115 | } else if (blocks == "bookends") 116 | { 117 | c(TRUE, rep(FALSE, np - 2), TRUE) 118 | } 119 | } 120 | # Axis labels 121 | if(is.null(axis_labels)) { 122 | axis_labels <- names(d) 123 | } else { 124 | if(length(axis_labels) != ncol(d)) 125 | stop("`axis_labels` should have length ", names(d), ", has ", length(axis_labels)) 126 | } 127 | # Compute endpoints of flows (polygons) 128 | # i = dimension id 129 | # d = data frame of dimensions 130 | # f = weights 131 | # w = gap between categories 132 | getp <- function(i, d, f, w=gap.width) { 133 | # Ordering dimension ids for lexicographic sorting 134 | a <- c(i, (1:ncol(d))[-i]) 135 | # Order of rows of d starting from i-th dimension 136 | if( is.null(ordering[[i]]) ) 137 | { 138 | o <- do.call(order, d[a]) 139 | } else { 140 | d2 <- d 141 | d2[1] <- ordering[[i]] 142 | o <- do.call(order, d2[a]) 143 | } 144 | # Breakpoints on a dimension 145 | x <- c(0, cumsum(f[o])) * (1-w) 146 | # Stripe coordinates on a dimension 147 | x <- cbind(x[-length(x)], x[-1]) 148 | # By how much stripes need to be shifted upwards (gap/max(gap)) 149 | gap <- cumsum( c(0L, diff(as.numeric(d[o,i])) != 0) ) 150 | mx <- max(gap) 151 | if (mx == 0) mx <- 1 152 | # shifts 153 | gap <- gap / mx * w 154 | # add gap-related shifts to stripe coordinates on dimension i 155 | (x + gap)[order(o),] 156 | } 157 | # Calculate stripe locations on dimensions: list of data frames. A component 158 | # for a dimension. Data frame contains 'y' locations of stripes. 159 | dd <- lapply(seq_along(d), getp, d=d, f=p$freq) 160 | # Plotting 161 | op <- par(mar=mar) 162 | plot(NULL, type="n", xlim=c(1-cw, np+cw) + xlim_offset, ylim=c(0, 1) + ylim_offset, xaxt="n", yaxt="n", 163 | xaxs="i", yaxs="i", xlab='', ylab='', main=title, frame=FALSE) 164 | # For every stripe 165 | ind <- which(!p$hide)[rev(order(p[!p$hide, ]$layer))] 166 | for(i in ind ) 167 | { 168 | # For every inter-dimensional segment 169 | for(j in 1:(np-1) ) 170 | { 171 | # Draw stripe 172 | xspline( c(j, j, j+xw, j+1-xw, j+1, j+1, j+1-xw, j+xw, j) + rep(c(cw, -cw, cw), c(3, 4, 2)), 173 | c( dd[[j]][i, c(1, 2, 2)], rev(dd[[j+1]][i, c(1, 1, 2, 2)]), dd[[j]][i,c(1, 1)]), 174 | shape = c(0,0,1,1,0,0,1,1,0, 0), 175 | open=FALSE, 176 | col=p$col[i], border=p$border[i]) 177 | } 178 | } 179 | # Category blocks with labels 180 | for(j in seq_along(dd)) 181 | { 182 | ax <- lapply(split(dd[[j]], d[,j]), range) 183 | if (blocks[j]) 184 | { 185 | for(k in seq_along(ax)) 186 | { 187 | rect( j-cw, ax[[k]][1], j+cw, ax[[k]][2] ) 188 | } 189 | } else 190 | { 191 | for (i in ind) 192 | { 193 | x <- j + c(-1, 1) * cw 194 | y <- t(dd[[j]][c(i, i), ]) 195 | w <- xw * (x[2] - x[1]) 196 | xspline(x = c(x[1], x[1], x[1] + w, x[2] - w, 197 | x[2], x[2], x[2] - w, x[1] + w, x[1]), 198 | y = c(y[c(1, 2, 2), 1], y[c(2, 2, 1, 1), 2], y[c(1, 1), 1]), 199 | shape = c(0, 0, 1, 1, 0, 0, 1, 1, 0, 0), 200 | open = FALSE, col = p$col[i], border = p$border[i]) 201 | } 202 | } 203 | for(k in seq_along(ax)) 204 | { 205 | if(ann) text( j, mean(ax[[k]]), labels=names(ax)[k], cex=cex) 206 | } 207 | } 208 | # X axis 209 | if(axes) { 210 | axis(1, at= rep(c(-cw, cw), ncol(d)) + rep(seq_along(d), each=2), 211 | line=0.5, col="white", col.ticks="black", labels=FALSE) 212 | axis(1, at=seq_along(d), tick=FALSE, labels=axis_labels, cex.axis=cex.axis) 213 | } 214 | par(op) 215 | rval <- list( 216 | # Endpoints of alluvia 217 | endpoints = do.call( 218 | "rbind", 219 | lapply( 220 | seq(along=dd), 221 | function(i) { 222 | df <- as.data.frame( 223 | structure(dd[[i]], dimnames=list(NULL,c(".bottom", ".top"))), 224 | stringsAsFactors=FALSE 225 | ) 226 | df$.axis <- i 227 | cbind(d, df) 228 | } 229 | ) 230 | ) 231 | ) 232 | # Category midpoints 233 | rval$category_midpoints <- structure( 234 | lapply( 235 | seq(1, ncol(d)), 236 | function(i) { 237 | mi <- with( 238 | rval$endpoints[rval$endpoints$.axis == i , ], 239 | tapply(.bottom, d[[i]], min) 240 | ) 241 | ma <- with( 242 | rval$endpoints[ rval$endpoints$.axis == i , ], 243 | tapply(.top, d[[i]], max) 244 | ) 245 | (mi + ma)/2 246 | } 247 | ), 248 | names = names(d) 249 | ) 250 | # alluvium midpoints 251 | qs1 <- c(as.list(names(d)), list(".axis")) %>% 252 | lapply(as.name) 253 | qs2 <- as.list(names(d)) %>% 254 | lapply(as.name) 255 | 256 | rval$alluvium_midpoints <- rval$endpoints %>% 257 | tidyr::pivot_longer(dplyr::one_of(".bottom", ".top"), names_to = ".endpoint", values_to = ".value") %>% 258 | dplyr::group_by( !!!qs1 ) %>% 259 | dplyr::summarise(m = mean(.data$.value)) %>% 260 | dplyr::arrange(!!!qs1) %>% 261 | dplyr::group_by(!!!qs2) %>% 262 | dplyr::mutate( 263 | .axis_from = dplyr::lag(.data$.axis), 264 | .axis_to = .data$.axis, 265 | .x = (.data$.axis_from + .data$.axis_to)/2, 266 | .y = (.data$m + dplyr::lag(.data$m))/2, 267 | .slope = (.data$m - dplyr::lag(.data$m)) / (.data$.axis_to - .data$.axis_from - cw) 268 | ) %>% 269 | dplyr::ungroup() %>% 270 | dplyr::filter(!is.na(.data$.axis_from)) %>% 271 | dplyr::select(dplyr::one_of(names(d), ".axis_from", ".axis_to", ".x", ".y", ".slope")) %>% 272 | as.data.frame(stringsAsFactors=FALSE) 273 | 274 | invisible(rval) 275 | } 276 | 277 | -------------------------------------------------------------------------------- /R/alluvial_ts.R: -------------------------------------------------------------------------------- 1 | #' Alluvial diagram for multiple time series data 2 | #' 3 | #' This is a variant of alluvial diagram suitable for multiple 4 | #' (cross-sectional) time series. It also works with continuous variables equivalent to time 5 | #' 6 | #' @param dat data.frame of time-series (or suitable equivalent continuously disaggregated data), with 3 columns (in order: category, time-variable, value) with <= 1 row for each category-time combination 7 | #' @param wave numeric, curve wavyness defined in terms of x axis data range - i.e. bezier point offset. Experiment to get this right 8 | #' @param ygap numeric, vertical distance between polygons - a multiple of 10\% of the mean data value 9 | #' @param col colour, value or vector of length matching the number of unique categories. Individual colours of vector are mapped to categories in alpha-numeric order 10 | #' @param lwd numeric, value or vector of length matching the number of unique categories for polygon stroke line width. Individual values of vector are mapped to categories in alpha-numeric order 11 | #' @param alpha numeric, [0,1] polygon fill transparency 12 | #' @param plotdir character, string ('up', 'down' or 'centred') giving the vertical alignment of polygon stacks 13 | #' @param rankup logical, rank polygons on time axes upward by magnitude (largest to smallest) or not 14 | #' @param lab.cex numeric, category label font size 15 | #' @param lab.col colour, of category label 16 | #' @param xmargin numeric [0,1], proportional space for category labels 17 | #' @param axis.col colour, of axes 18 | #' @param axis.cex numeric, font size of x-axis break labels 19 | #' @param title character, plot title 20 | #' @param title.cex numeric, plot title font size 21 | #' @param grid logical, plot vertical axes 22 | #' @param grid.col colour, of grid axes 23 | #' @param grid.lwd numeric, line width of grid axes 24 | #' @param leg.mode logical, draw y-axis scale legend inside largest data point (TRUE default) or alternatively with custom position/value (FALSE) 25 | #' @param leg.x,leg.y numeric [0,1], x/y positions of legend if leg.mode = FALSE 26 | #' @param leg.cex numeric, legend text size 27 | #' @param leg.col colour, of legend lines and text 28 | #' @param leg.lty numeric, code for legend line type 29 | #' @param leg.lwd numeric, legend line width 30 | #' @param leg.max numeric, legend scale line width 31 | #' @param xlab,ylab character, x-axis / y-axis titles 32 | #' @param xlab.pos,ylab.pos numeric, perpendicular offset for axis titles 33 | #' @param ... arguments to pass to polygon() 34 | #' 35 | #' @importFrom grDevices extendrange rainbow 36 | #' @importFrom graphics mtext plot.new plot.window abline lines 37 | #' @importFrom stats aggregate approx 38 | #' 39 | #' @export 40 | #' 41 | #' @example man-roxygen/alluvial_ts.R 42 | 43 | 44 | alluvial_ts <- function(dat, wave = NA, ygap = 1, col = NA, alpha = NA, plotdir = 'up', rankup = FALSE, 45 | lab.cex = 1, lab.col = 'black', xmargin = .1, axis.col = 'black', title = NA, 46 | title.cex = 1, axis.cex = 1, grid = FALSE, grid.col = 'grey80', grid.lwd = 1, 47 | leg.mode = TRUE, leg.x = .1, leg.y = .9, leg.cex = 1, leg.col = 'black', leg.lty = NA, 48 | leg.lwd = NA, leg.max = NA, xlab = NA, ylab = NA, xlab.pos = 2, ylab.pos = 1, lwd = 1, ...){ 49 | 50 | orig.names <- names(dat) 51 | names(dat) <- c('item', 'time', 'val') 52 | if(is.numeric(dat$item)) dat$item <- as.character(dat$item) 53 | if(is.ordered(dat$time) | is.factor(dat$time)) { 54 | axis.labs <- levels(dat$time) 55 | dat$time <- as.numeric(dat$time) 56 | } else if(is.numeric(dat$time)) axis.labs <- sort(unique(dat$time)) else { 57 | return("Error: time variable must be numeric, factor, or ordered")} 58 | times <- sort(unique(dat$time)) 59 | 60 | dat <- dat[order(dat$item), ] 61 | datsum <- aggregate(val ~ item, dat, mean) 62 | plotorder <- order(datsum$val, decreasing = TRUE) # smallest last (on top) 63 | maxval <- pretty(max(dat$val))[2] # legend max 64 | 65 | # colours 66 | n <- length(unique(dat$item)) 67 | if(all(is.na(col))) col <- rainbow(n) 68 | if(length(col) == 1) col <- rep(col, n) 69 | col <- rgb(t(col2rgb(col)), maxColorValue = 255) # ensure hex 70 | if(!length(col) == n) return("Error: 'col' length must equal the number of unique data elements") 71 | if(!is.na(alpha)) { 72 | col[nchar(col)>7] <- substr(col[nchar(col)>7], 1, 7) 73 | col <- paste0(col, substr(rgb(0, 0, alpha), 6, 7)) 74 | } 75 | 76 | # calc vertical gap between items 77 | ymean <- mean(dat$val) 78 | ygap <- ymean * .1 * ygap 79 | 80 | # if not specified (but it really should be) 81 | if(is.na(wave)) wave <- .5 * (rev(times)[1] - times[1])/length(times) 82 | 83 | plot.y.max <- 0 # vertical plot scaling 84 | 85 | # prepare main data object 86 | d <- list() 87 | for(i in unique(dat$item)) d[[i]] <- list() 88 | 89 | # loop through periods 90 | for(i in 1:length(times)){ 91 | 92 | # 3 time variables, NA if they fall outside the data period 93 | if(i>1) t1 <- times[i-1] else t1 <- NA # prev period 94 | t2 <- times[i] # this period 95 | if(i plot.y.max) plot.y.max <- y.sum 103 | 104 | # loop through items 105 | if(plotdir == 'centred') y <- -y.sum/2 else y <- 0 # vertical scaler 106 | 107 | for(j in 1:nrow(dat.t)){ 108 | # work up/down y axes to calculate spline positions 109 | y0 <- y + ygap 110 | y1 <- y0 + dat.t$val[j] 111 | y <- y1 112 | 113 | # calculate left and right x-axis splines 114 | if(!is.na(t1)) spline.x <- c(t2 - wave, t2) else spline.x <- numeric(0) 115 | if(!is.na(t3)) spline.x <- c(spline.x, t2, t2 + wave) 116 | 117 | # update d 118 | item <- as.character(dat.t$item[j]) 119 | d[[item]]$x <- c(d[[item]]$x, spline.x) 120 | d[[item]]$y0 <- c(d[[item]]$y0, rep(y0, length(spline.x))) 121 | d[[item]]$y1 <- c(d[[item]]$y1, rep(y1, length(spline.x))) 122 | } # end items loop 123 | } # end period loop 124 | 125 | # function to ensure vertex arrays are same length, as xspline output can vary 126 | resize <- function(v, n = 500){ d<-data.frame(x = 1:length(v), y = v); 127 | approx(d, xout = seq(1, length(v), length.out = n))$y} 128 | plot.new() # required by xspline 129 | 130 | # calculate spline curves 131 | for(i in names(d)){ 132 | curves <- list() 133 | # iterate through bottom/top sets of bezier points, get curves and resize 134 | for(j in c('y0', 'y1')) curves[[j]] <- lapply(xspline(d[[i]]$x, d[[i]][[j]], shape = 1, draw = FALSE), resize) 135 | # stitch top and bottom polylines together clockwise into a polygon 136 | d[[i]]$poly <- data.frame(x = c(curves[[1]]$x, rev(curves[[2]]$x)), y = c(curves[[1]]$y, rev(curves[[2]]$y))) 137 | } 138 | 139 | # label y positions 140 | labs.l <- data.frame(t(sapply(1:length(d), FUN = function(i)t(data.frame(lab = names(d[i]), lefty = mean(c(d[[i]]$y0[1], d[[i]]$y1[1]))) ))), stringsAsFactors = FALSE) 141 | labs.r <- data.frame(t(sapply(1:length(d), FUN = function(i)t(data.frame(lab = names(d[i]), lefty = mean(c(rev(d[[i]]$y0)[1], rev(d[[i]]$y1)[1])))) )), stringsAsFactors = FALSE) 142 | names(labs.l) <- names(labs.r) <- c('item', 'y') 143 | labs.l$y <- as.numeric(as.character(labs.l$y)); labs.r$y <- as.numeric(as.character(labs.r$y)) # to numeric 144 | labs.l$col <- col[match(labs.l$item, datsum$item)] 145 | labs.r$col <- col[match(labs.r$item, datsum$item)] 146 | 147 | # line widths 148 | if(any(lwd == 0)) return("Error: 'lwd' must be greater than zero") 149 | if(length(lwd) == 1) lwd <- rep(lwd, length(d)) 150 | if(!length(lwd) == length(d)) return("Error: 'lwd' length must equal the number of unique data elements") 151 | lwd <- lwd[plotorder] 152 | 153 | # scale and orientation of axes 154 | if(plotdir == 'up') ylim <- c(0, plot.y.max) else { 155 | if(plotdir == 'down') ylim <- c(plot.y.max, 0) else { 156 | if(plotdir == 'centred') ylim <- c(-plot.y.max/2, plot.y.max/2) else { 157 | return("Incorrect specification for plotdir: please select 'up', 'down' or 'centred'") 158 | }}} 159 | xran <- range(dat$time) 160 | xlim <- extendrange(r = xran, f = xmargin/2) 161 | 162 | # plot order (biggest polygons first) 163 | d <- d[plotorder] 164 | col <- col[plotorder] 165 | 166 | # axis labels 167 | if(is.na(xlab)) xlab <- orig.names[2] 168 | if(is.na(ylab)) ylab <- orig.names[3] 169 | 170 | # plot 171 | plot.window(xlim = xlim, ylim = ylim) 172 | axis(1, times, cex.axis = axis.cex, labels = axis.labs, col = lab.col, col.axis = axis.col) 173 | if(is.na(title)) title <- paste('Alluvial plot of', orig.names[1], 'vs', orig.names[3], 'by', orig.names[2]) 174 | title(title, cex.main = title.cex) 175 | mtext(xlab, side = 1, line = xlab.pos, col = lab.col); mtext(ylab, side = 2, line = ylab.pos, col = lab.col) 176 | if(grid) abline(v = times, col = grid.col, lwd = grid.lwd) 177 | for(i in 1:length(d)) polygon(d[[i]]$poly$x, d[[i]]$poly$y, col = col[i], lwd = lwd[i], ...) 178 | text(times[1], labs.l$y, labels = labs.l$item, col = labs.l$col, pos = 2, cex = lab.cex) 179 | text(times[length(times)], labs.r$y, labels = labs.r$item, adj = 0, col = labs.r$col, pos = 4, cex = lab.cex) 180 | 181 | # legend 182 | topval <- max(dat$val) 183 | topitem <- as.character(dat$item[match(topval, dat$val)]) 184 | toptime <- dat$time[match(topval, dat$val)] 185 | 186 | if(leg.mode){ # legend plotted on maximum data point 187 | if(is.na(leg.lty)) leg.lty <- "dotted"; if(is.na(leg.lwd)) leg.lwd <- 2 188 | val_ind <- match(toptime, d[[topitem]]$x) 189 | leg_y0 <- d[[topitem]]$y0[val_ind] 190 | leg_y1 <- d[[topitem]]$y1[val_ind] 191 | leg_ym <- mean(c(leg_y0, leg_y1)) 192 | lines(rep(toptime, 2), c(leg_y0, leg_ym-(topval*.08)), lwd = leg.lwd, lend = 'butt', col = leg.col, lty = leg.lty) 193 | lines(rep(toptime, 2), c(leg_y1, leg_ym+(topval*.08)), lwd = leg.lwd, lend = 'butt', col = leg.col, lty = leg.lty) 194 | text(toptime, leg_ym, labels = formatC(topval, format = "d", big.mark = ','), pos = NULL, cex = leg.cex, col = leg.col) 195 | } else { # legend plotted in custom position 196 | if(!is.na(leg.max)) maxval <- leg.max 197 | if(is.na(leg.lty)) leg.lty <- "solid"; if(is.na(leg.lwd)) leg.lwd <- 10 198 | leg_x <- (xlim[2]-xlim[1]) * leg.x + xlim[1] 199 | leg_y <- (ylim[2]-ylim[1]) * leg.y + ylim[1] 200 | diffs <- d[[topitem]]$y1 - d[[topitem]]$y0 201 | time <- d[[topitem]]$x[match(max(diffs), diffs)] 202 | lines(data.frame(x = c(leg_x, leg_x), y = c(leg_y, leg_y + maxval)), lwd = leg.lwd, lend = 'butt', col = leg.col, lty = leg.lty) 203 | max.lab <- formatC(maxval, format = "d", big.mark = ',') 204 | text(rep(leg_x, 2), c(leg_y, leg_y + maxval), labels = c('0', max.lab), cex = leg.cex, pos = 4, offset = 1, col = leg.col) 205 | } 206 | } 207 | -------------------------------------------------------------------------------- /vignettes/alluvial.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Creating Alluvial Diagrams" 3 | author: "Michał Bojanowski" 4 | date: "`r Sys.Date()`" 5 | output: 6 | rmarkdown::html_vignette: 7 | toc: yes 8 | fig_caption: yes 9 | vignette: > 10 | %\VignetteIndexEntry{Creating Alluvial Diagrams} 11 | %\VignetteEngine{knitr::rmarkdown} 12 | %\VignetteEncoding{UTF-8} 13 | --- 14 | 15 | 16 | ```{r setup, cache=FALSE, include=FALSE} 17 | library(alluvial) 18 | library(dplyr) 19 | 20 | knitr::opts_chunk$set( 21 | fig.width=7, 22 | fig.height=4, 23 | cache=FALSE 24 | ) 25 | ``` 26 | 27 | 28 | 29 | 30 | # What is alluvial diagram? 31 | 32 | Alluvial diagram is a variant of a Parallel Coordinates Plot (PCP) but for categorical variables. Variables are assigned to vertical axes that are parallel. Values are represented with blocks on each axis. Observations are represented with *alluvia* (sing. "alluvium") spanning across all the axes. 33 | 34 | You create alluvial diagrams with function `alluvial()`. Let us use `Titanic` dataset as an example. As it is a `table`, we need to convert it to a data frame 35 | 36 | ```{r data} 37 | tit <- as.data.frame(Titanic, stringsAsFactors = FALSE) 38 | head(tit) 39 | ``` 40 | 41 | and create the alluvial diagram: 42 | 43 | ```{r example} 44 | alluvial( 45 | tit[,1:4], 46 | freq=tit$Freq, 47 | col = ifelse(tit$Survived == "Yes", "orange", "grey"), 48 | border = ifelse(tit$Survived == "Yes", "orange", "grey"), 49 | hide = tit$Freq == 0, 50 | cex = 0.7 51 | ) 52 | ``` 53 | 54 | We have four variables: 55 | 56 | - `Class` on the ship the passanger occupied 57 | - `Sex` of the passenger 58 | - `Age` of the passenger 59 | - Whether the passenger `Survived`. 60 | 61 | Vertical sizes of the blocks are proportional to the frequency, and so are the widths of the alluvia. Alluvia represent all combinations of values of the variables in the dataset. By default the vertical order of the alluvia is determined by alphabetical ordering of the values on each variable lexicographically (last variable changes first) drawn from bottom to top. In this example, the color is determined by passengers' survival status, i.e. passenger who survived are represented with orange alluvia. 62 | 63 | Alluvial diagrams are very useful in reading various conditional and uncoditional distributions in a multivariate dataset. For example, we can see that: 64 | 65 | - Most of the Crew did not survived -- majority of the height of the Crew category is covered by grey alluvia. 66 | - Majortity of the Crew where adult men. 67 | - Almost all women from the 1st Class did survive. 68 | - The women who did not survive come mostly from 3rd class. 69 | 70 | 71 | 72 | 73 | 74 | # Simple use 75 | 76 | Minimal use requires supplying data frame(s) as first argument, and a vector of frequencies as the `freq` argument. By default all alluvia are drawn using mildly transparent gray. 77 | 78 | Two variables `Class` and `Survived`: 79 | 80 | ```{r alluvial_defaults_2} 81 | # Survival status and Class 82 | tit %>% group_by(Class, Survived) %>% 83 | summarise(n = sum(Freq)) -> tit2d 84 | 85 | alluvial(tit2d[,1:2], freq=tit2d$n) 86 | ``` 87 | 88 | Three variables `Sex`, `Class`, and `Survived`: 89 | 90 | ```{r alluvial_defaults_3} 91 | # Survival status, Sex, and Class 92 | tit %>% group_by(Sex, Class, Survived) %>% 93 | summarise(n = sum(Freq)) -> tit3d 94 | 95 | alluvial(tit3d[,1:3], freq=tit3d$n) 96 | ``` 97 | 98 | 99 | 100 | 101 | # Customizing 102 | 103 | There are several ways to customize alluvial diagrams with `alluvial()` the following sections illustrate probably most common usecases. 104 | 105 | 106 | ## Customizing colors 107 | 108 | Colors of the alluvia can be customized with `col`, `border` and `alpha` arguments. For example: 109 | 110 | ```{r colors} 111 | alluvial( 112 | tit3d[,1:3], 113 | freq=tit3d$n, 114 | col = ifelse( tit3d$Sex == "Female", "pink", "lightskyblue"), 115 | border = "grey", 116 | alpha = 0.7, 117 | blocks=FALSE 118 | ) 119 | ``` 120 | 121 | 122 | 123 | 124 | 125 | ## Hiding and reordering alluvia 126 | 127 | ### Hiding 128 | 129 | With `alluvial` sometimes it is desirable to omit plotting of some of the alluvia. This is most frequently the case with larger datasets in which there are a lot of combinations of values of the variables associated with very small frequencies, or even 0s. Alluvia can be hidden with argument `hide` expecting a logical vector of length equal to the number of rows in the data. Alluvia for which `hide` is `FALSE` are not plotted. For example, to hide alluvia with frequency less than 150: 130 | 131 | ```{r alluvial_hide} 132 | alluvial(tit2d[,1:2], freq=tit2d$n, hide=tit2d$n < 150) 133 | ``` 134 | 135 | This skips drawing the alluvia corresponding to the following rows in `tit` data frame: 136 | 137 | ```{r} 138 | tit2d %>% select(Class, Survived, n) %>% 139 | filter(n < 150) 140 | ``` 141 | 142 | You can see the gaps e.g. on the "Yes" and "No" category blocks on the `Survived` axis. 143 | 144 | If you would rather omit these rows from the plot alltogether (i.e. no gaps), you need to filter your data before it is used by `alluvial()`. 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | ### Changing "layers" 155 | 156 | By default alluvia are plotted in sequence determined by the row order in the dataset. It determines which alluvia will be plotted "on top" of which others. 157 | 158 | Consider simple data: 159 | 160 | ```{r data_layer} 161 | d <- data.frame( 162 | x = c(1, 2, 3), 163 | y = c(3 ,2, 1), 164 | freq=c(1,1,1) 165 | ) 166 | d 167 | ``` 168 | 169 | As there are three rows, we will have three alluvia: 170 | 171 | ```{r layer_ex1, fig.width=3, fig.height=3, fig.show="hold"} 172 | alluvial(d[,1:2], freq=d$freq, col=1:3, alpha=1) 173 | # Reversing the order 174 | alluvial(d[ 3:1, 1:2 ], freq=d$freq, col=3:1, alpha=1) 175 | ``` 176 | 177 | Note that to keep colors matched in the same way to the alluvia we had to reverse the `col` argument too. Instead of reordering the data and keeping track of the other arguments plotting order can be adjusted with `layer` argument: 178 | 179 | ```{r layer_ex2} 180 | alluvial(d[,1:2], freq=d$freq, col=1:3, alpha=1, 181 | layer=3:1) 182 | ``` 183 | 184 | The value of `layer` is passed to `order` so it is possible to use logical vectors e.g. if you only want to put some of the flows on top. For example, for Titanic data to put all alluvia for all survivors on top we can: 185 | 186 | ```{r layer_ex3} 187 | alluvial(tit3d[,1:3], freq=tit3d$n, 188 | col = ifelse( tit3d$Survived == "Yes", "orange", "grey" ), 189 | alpha = 0.8, 190 | layer = tit3d$Survived == "No" 191 | ) 192 | ``` 193 | 194 | First layer is the one on top, second layer below the first and so on. Consequently, in the example above, `Survived == "No"` is ordered after `Survived == "Yes"` so the former is below the latter. 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | ### Adjusting vertical order of categories 204 | 205 | By default `alluvial()` orders the values on each axis in an alphabetic order. This happens irrespectively of the ordering of observations in the plotted dataset. It is possible to override the default ordering by transforming the variables of interest into `factor`s with a custom ordering of levels. 206 | 207 | Consider the following example data: 208 | 209 | ```{r catorder_data} 210 | d <- data.frame( 211 | col = c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", 212 | "#E31A1C", "#FDBF6F", "#FF7F00"), # from RColorBrewer Paired palette 213 | Temperature = rep(c("cool", "hot"), each=4), 214 | Luminance = rep(c("bright", "dark"), 4), 215 | Color = rep(c("blue", "green", "red", "orange"), each=2) 216 | ) %>% 217 | mutate( 218 | n = 1 219 | ) 220 | d 221 | ``` 222 | 223 | Plotting it with `alluvial()` with default settings will give: 224 | 225 | ```{r catorder_alluvial_defaults} 226 | alluvial( 227 | select(d, Temperature, Luminance, Color), 228 | freq=d$n, 229 | col = d$col, 230 | alpha=0.9 231 | ) 232 | ``` 233 | 234 | Let's change the order of categories of: 235 | 236 | - the `Color` axis such that it is (from the bottom): green, blue, orange, red. 237 | - the `Luminance` axis such that it is reversed. 238 | 239 | For each variable we create a factor with a vector of unique values of that variable sorted the way we want passed to `levels` argument of `factor()`: 240 | 241 | ```{r catorder_data_factor} 242 | d <- d %>% 243 | mutate( 244 | Color_f = factor(Color, levels=c("green", "blue", "red", "orange")), 245 | Luminance_f = factor(Luminance, levels=c("dark", "bright")) 246 | ) 247 | ``` 248 | 249 | ... and plot 250 | 251 | ```{r catorder_alluvial_factor} 252 | alluvial( 253 | select(d, Temperature, Luminance_f, Color_f), 254 | freq=d$n, 255 | col = d$col, 256 | alpha=0.9 257 | ) 258 | ``` 259 | 260 | Another version recognizing that in data `Color` is nested in `Temperature`. Different axis order gives clearer picture of the data structure. 261 | 262 | ```{r catorder_alluvial_factor2} 263 | alluvial( 264 | select(d, Temperature, Color_f, Luminance_f), 265 | freq=d$n, 266 | col = d$col, 267 | alpha=0.9 268 | ) 269 | ``` 270 | 271 | 272 | 273 | ### Adjusting vertical order of alluvia 274 | 275 | **This feature is experimental!** 276 | 277 | Usually the order of the variables (axes) is rather unimportant. However, having particular two variables next to each other facilitates analyzing dependency between those two variables. In alluvial diagrams the ordering of the variables determines the vertical plotting order of the alluvia. This vertical order, together with setting `blocks` to `FALSE`, can be used to turn category blocks into stacked barcharts. 278 | 279 | Consider two versions of subsets of the Titanic data that differ only in the order of variables. 280 | 281 | ```{r ordering_data} 282 | tit %>% group_by(Sex, Age, Survived) %>% 283 | summarise( n= sum(Freq)) -> x 284 | 285 | tit %>% group_by(Survived, Age, Sex) %>% 286 | summarise( n= sum(Freq)) -> y 287 | ``` 288 | 289 | In `x` we have `r paste(names(x), collapse="-")` while in `y` we have 290 | `r paste(names(y), collapse="-")`. 291 | 292 | If we color the alluvia according to the first axis, the category blocks of Age and Survived become barcharts showing relative frequencies of Men and Women within categories of Age and Survived. 293 | 294 | ```{r ordering_x} 295 | alluvial(x[,1:3], freq=x$n, 296 | col = ifelse(x$Sex == "Male", "orange", "grey"), 297 | alpha = 0.8, 298 | blocks=FALSE 299 | ) 300 | ``` 301 | 302 | Now we can see for example that 303 | 304 | - There were a little bit of more girls than boys (category `Age == "Child"`) 305 | - Among surviors there were roughly the same number of Men and Women. 306 | 307 | Argument `ordering` can be used to fully customize the ordering of each alluvium on each axis without the need to reorder the axes themselves. This feature is experimental as you can easily break things. It expects a list of numeric vectors or `NULL`s one for each variable in the data: 308 | 309 | - Value `NULL` does not change the default order on the corresponding axis. 310 | - A numeric vector should have length equal to the number of rows in the data and is determines the vertical order of the alluvia on the corresponding axis. 311 | 312 | For example: 313 | 314 | ```{r ordering_y} 315 | alluvial(y[,1:3], freq=y$n, 316 | # col = RColorBrewer::brewer.pal(8, "Set1"), 317 | col = ifelse(y$Sex == "Male", "orange", "grey"), 318 | alpha = 0.8, 319 | blocks = FALSE, 320 | ordering = list( 321 | order(y$Survived, y$Sex == "Male"), 322 | order(y$Age, y$Sex == "Male"), 323 | NULL 324 | ) 325 | ) 326 | ``` 327 | 328 | The list passed to `ordering` has has three elements corresponding to `Survived`, `Age`, and `Sex` respectively (that's the order of the variables in `y`). The elements of this list are 329 | 330 | 1. Call to `order` sorting the alluvia on the `Survived` axis. The alluvia need to be sorted according to `Survived` first (otherwise the categories "Yes" and "No" will be destroyed) 331 | and according to the `Sex` second. 332 | 2. Call to `order` sorting the alluvia on the `Age` axis. The alluvia need to be sorted according to `Age` first `Sex` second. 333 | 3. `NULL` leaves the default ordering on `Sex` axis. 334 | 335 | In the example below alluvia are colored by sex (red=Female, blue=Male) and survival status (bright=survived, dark=did not survive). Each category block is a stacked barchart showing relative freuquencies of man/women who did/did not survive. The alluvia are reordered on the last axis (Age) so that Sex categories are next each other (red together and blue together): 336 | 337 | 338 | ```{r} 339 | pal <- c("red4", "lightskyblue4", "red", "lightskyblue") 340 | 341 | tit %>% 342 | mutate( 343 | ss = paste(Survived, Sex), 344 | k = pal[ match(ss, sort(unique(ss))) ] 345 | ) -> tit 346 | 347 | 348 | alluvial(tit[,c(4,2,3)], freq=tit$Freq, 349 | hide = tit$Freq < 10, 350 | col = tit$k, 351 | border = tit$k, 352 | blocks=FALSE, 353 | ordering = list( 354 | NULL, 355 | NULL, 356 | order(tit$Age, tit$Sex ) 357 | 358 | ) 359 | ) 360 | ``` 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | # Appendix 372 | 373 | ```{r session_info} 374 | sessionInfo() 375 | ``` 376 | 377 | --------------------------------------------------------------------------------