├── .github
    ├── .gitignore
    └── workflows
    │   └── R-CMD-check.yaml
├── .gitignore
├── LICENSE
├── tests
    ├── testall.R
    └── testthat
    │   ├── _snaps
    │       ├── alluvial
    │       │   └── alluvial.png
    │       └── alluvial.md
    │   └── test-alluvial.R
├── tools
    ├── alluvial-1.png
    └── alluvial_ts-1.png
├── .Rbuildignore
├── Makefile
├── alluvial.Rproj
├── inst
    └── CITATION
├── R
    ├── Refugees.R
    ├── alluvial.R
    └── alluvial_ts.R
├── DESCRIPTION
├── vignettes
    ├── alluvial.bib
    └── alluvial.Rmd
├── man
    ├── Refugees.Rd
    ├── alluvial_ts.Rd
    └── alluvial.Rd
├── NAMESPACE
├── NEWS.md
├── man-roxygen
    ├── alluvial_ts.R
    └── alluvial.R
├── data
    └── Refugees.csv
├── README.Rmd
└── README.md


/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2016
2 | COPYRIGHT HOLDER: Michal Bojanowski
3 | 


--------------------------------------------------------------------------------
/tests/testall.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(alluvial)
3 | 
4 | test_check("alluvial")
5 | 


--------------------------------------------------------------------------------
/tools/alluvial-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mbojan/alluvial/HEAD/tools/alluvial-1.png


--------------------------------------------------------------------------------
/tools/alluvial_ts-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mbojan/alluvial/HEAD/tools/alluvial_ts-1.png


--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | man-roxygen
2 | ^.*\.Rproj$
3 | ^\.Rproj\.user$
4 | README.Rmd
5 | Makefile
6 | ^\.github$
7 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/alluvial/alluvial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mbojan/alluvial/HEAD/tests/testthat/_snaps/alluvial/alluvial.png


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | default:
 2 | 
 3 | %.md: %.Rmd
 4 | 	Rscript -e 'knitr::knit("$<", output="$@")'
 5 | 
 6 | %.html: %.md
 7 | 	pandoc -f markdown_github -o $@ $<
 8 | 
 9 | 	
10 | default: README.md
11 | 
12 | preview: README.html
13 | 
14 | .PHONY: default preview
15 | 


--------------------------------------------------------------------------------
/alluvial.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | year <- sub("-.*", "", meta$Date)
 2 | note <- sprintf("R package version: %s", meta$Version)
 3 | 
 4 | bibentry( bibtype="Manual",
 5 |          title="{alluvial}: R Package for Creating Alluvial Diagrams",
 6 |          author= c(person("Michal", "Bojanowski"),
 7 |                    person("Robin", "Edwards") ),
 8 |          year = year,
 9 |          note = note,
10 |          url = "https://github.com/mbojan/alluvial",
11 |          mheader="To cite package 'allluvial' in publications use:",
12 |          mfooter= paste("We have invested some time and effort in creating this package,",
13 |                         "please cite it when using it for data analysis or other tasks.", sep = " ")
14 |          )
15 | 


--------------------------------------------------------------------------------
/R/Refugees.R:
--------------------------------------------------------------------------------
 1 | #' Refugees data
 2 | #'
 3 | #' Top 10 countries/territories of origin (excluding "Various") for period
 4 | #' 2003-13 of UNHCR statistics on "Persons recognized as refugees under the
 5 | #' 1951 UN Convention/1967 Protocol, the 1969 OAU Convention, in accordance
 6 | #' with the UNHCR Statute, persons granted a complementary form of protection
 7 | #' and those granted temporary protection."
 8 | #'
 9 | #' @format
10 | #' Data frame with the following columns:
11 | #' \describe{
12 | #' \item{country}{Country or territory of origin}
13 | #' \item{year}{Year (2003-13)}
14 | #' \item{refugees}{Persons recognized as refugees under the 1951 UN Convention, etc..}
15 | #' }
16 | #'
17 | #' @source
18 | #' \url{http://data.un.org/Data.aspx?d=UNHCR&f=indID\%3aType-Ref}
19 | #'
20 | #' @docType data
21 | #' @name Refugees
22 | NULL
23 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: alluvial
 2 | Type: Package
 3 | Title: Alluvial Diagrams
 4 | Version: 0.2-0
 5 | Date: 2017-04-07
 6 | Authors@R: c(
 7 |     person("Michał", "Bojanowski", role=c("aut", "cre"), email="michal2992@gmail.com", comment=c(ORCID="0000-0001-7503-852X")),
 8 |     person("Robin", "Edwards", role="aut", email="robin.edwards@ucl.ac.uk")
 9 |     )
10 | Description: Creating alluvial diagrams (also known as parallel sets plots) for multivariate
11 |   and time series-like data.
12 | URL: https://github.com/mbojan/alluvial
13 | BugReports: https://github.com/mbojan/alluvial/issues
14 | Imports: dplyr, tidyr
15 | Suggests:
16 |     devtools,
17 |     testthat,
18 |     reshape2,
19 |     knitr,
20 |     rmarkdown,
21 | License: MIT + file LICENSE
22 | LazyLoad: yes
23 | LazyData: yes
24 | Config/testthat/edition: 3
25 | VignetteBuilder: knitr
26 | RoxygenNote: 7.1.2
27 | Encoding: UTF-8
28 | 


--------------------------------------------------------------------------------
/vignettes/alluvial.bib:
--------------------------------------------------------------------------------
 1 | @Manual{r-graphics,
 2 |     title = {R: A Language and Environment for Statistical Computing},
 3 |     author = {{R Core Team}},
 4 |     organization = {R Foundation for Statistical Computing},
 5 |     address = {Vienna, Austria},
 6 |     year = {2021},
 7 |     url = {https://www.R-project.org/},
 8 | }
 9 | 
10 | @Misc{r-ggalluvial,
11 |   title = {ggalluvial: Alluvial Plots in 'ggplot2'},
12 |   author = {Jason Cory Brunson and Quentin D. Read},
13 |   year = {2020},
14 |   note = {R package version 0.12.3},
15 |   url = {http://corybrunson.github.io/ggalluvial/},
16 | }
17 | 
18 | @Article{ggalluvial-article,
19 |   title = {{ggalluvial}: Layered Grammar for Alluvial Plots},
20 |   author = {Jason Cory Brunson},
21 |   year = {2020},
22 |   journal = {Journal of Open Source Software},
23 |   volume = {5},
24 |   number = {49},
25 |   pages = {2017},
26 |   doi = {10.21105/joss.02017},
27 | }


--------------------------------------------------------------------------------
/man/Refugees.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/Refugees.R
 3 | \docType{data}
 4 | \name{Refugees}
 5 | \alias{Refugees}
 6 | \title{Refugees data}
 7 | \format{
 8 | Data frame with the following columns:
 9 | \describe{
10 | \item{country}{Country or territory of origin}
11 | \item{year}{Year (2003-13)}
12 | \item{refugees}{Persons recognized as refugees under the 1951 UN Convention, etc..}
13 | }
14 | }
15 | \source{
16 | \url{http://data.un.org/Data.aspx?d=UNHCR&f=indID\%3aType-Ref}
17 | }
18 | \description{
19 | Top 10 countries/territories of origin (excluding "Various") for period
20 | 2003-13 of UNHCR statistics on "Persons recognized as refugees under the
21 | 1951 UN Convention/1967 Protocol, the 1969 OAU Convention, in accordance
22 | with the UNHCR Statute, persons granted a complementary form of protection
23 | and those granted temporary protection."
24 | }
25 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(alluvial)
 4 | export(alluvial_ts)
 5 | importFrom(dplyr,"%>%")
 6 | importFrom(dplyr,.data)
 7 | importFrom(dplyr,arrange_)
 8 | importFrom(dplyr,filter_)
 9 | importFrom(dplyr,group_by_)
10 | importFrom(dplyr,select_)
11 | importFrom(dplyr,summarise_)
12 | importFrom(dplyr,ungroup)
13 | importFrom(grDevices,col2rgb)
14 | importFrom(grDevices,extendrange)
15 | importFrom(grDevices,rainbow)
16 | importFrom(grDevices,rgb)
17 | importFrom(graphics,abline)
18 | importFrom(graphics,axis)
19 | importFrom(graphics,lines)
20 | importFrom(graphics,mtext)
21 | importFrom(graphics,par)
22 | importFrom(graphics,plot)
23 | importFrom(graphics,plot.new)
24 | importFrom(graphics,plot.window)
25 | importFrom(graphics,polygon)
26 | importFrom(graphics,rect)
27 | importFrom(graphics,text)
28 | importFrom(graphics,xspline)
29 | importFrom(stats,aggregate)
30 | importFrom(stats,approx)
31 | importFrom(tidyr,gather_)
32 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # 0.2-0
 2 | 
 3 | ## New features
 4 | 
 5 | - `alluvial()` exposes `mar` argument that is passed to `par()` so the user can customize the plot margins (which are rather narrow by default) (#31).
 6 | - `alluvial()` has two new arguments `xlim_offset` and `ylim_offset` which are passed to `xlim` and `ylim` of `plot()`. This enables adjusting the size of the plotting region and avoid label clipping in some cirumstances (#32)
 7 | - The vignette has a new section on reordering the alluvia, and some other improvements. 
 8 | 
 9 | ## Minor updates
10 | 
11 | - The vignette dependency on RColorBrewer has been removed.
12 | 
13 | 
14 | 
15 | 
16 | # alluvial 0.1-2
17 | 
18 | ## New features
19 | 
20 | - There is a vignette `vignette("alluvial", package="alluvial")` illustrating basic usage of `alluvial()`. The vignette needs `dplyr` package so it is now `Suggested`.
21 | 
22 | ## Minor updates
23 | 
24 | - README has been updated. It is dynamically generated from an associated `.Rmd` file. Some typos fixed.
25 | 
26 | 
27 | # alluvial 0.1-1
28 | 
29 | First release.


--------------------------------------------------------------------------------
/tests/testthat/test-alluvial.R:
--------------------------------------------------------------------------------
 1 | # Oddities in input data --------------------------------------------------
 2 | 
 3 | test_that("Character vectors in data do not trigger warnings", {
 4 |   d <- data.frame(
 5 |     x1=rep(letters[1:2], 2),
 6 |     x2=rep(letters[1:2], each=2),
 7 |     freq=1:4
 8 |   )
 9 |   expect_silent({
10 |     pdf(NULL)
11 |     alluvial(d[,1:2], freq=d$freq)
12 |     dev.off()
13 |   } )
14 | })
15 | 
16 | 
17 | 
18 | # Output ------------------------------------------------------------------
19 | 
20 | test_that("alluvial() returns proper value", {
21 |   d <- data.frame(
22 |     x1=rep(letters[1:2], 2),
23 |     x2=rep(letters[1:2], each=2),
24 |     freq=1:4
25 |   )
26 |   pdf(NULL)
27 |   expect_snapshot_value(alluvial(d[,1:2], freq=d$freq), style = "deparse")
28 |   dev.off()
29 | })
30 | 
31 | test_that("alluvial() produces a proper plot", {
32 |   skip_on_cran()
33 |   skip_on_ci()
34 |   d <- data.frame(
35 |     x1=rep(letters[1:2], 2),
36 |     x2=rep(letters[1:2], each=2),
37 |     freq=1:4
38 |   )
39 |   expect_snapshot_file({
40 |     tfile <- tempfile()
41 |     png(tfile)
42 |     alluvial(d[,1:2], freq=d$freq)
43 |     dev.off()
44 |     tfile
45 |   },
46 |     "alluvial.png"
47 |   )
48 | })
49 | 


--------------------------------------------------------------------------------
/tests/testthat/_snaps/alluvial.md:
--------------------------------------------------------------------------------
 1 | # alluvial() returns proper value
 2 | 
 3 |     list(endpoints = structure(list(x1 = structure(c(1L, 2L, 1L, 
 4 |     2L, 1L, 2L, 1L, 2L), .Label = c("a", "b"), class = "factor"), 
 5 |         x2 = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("a", 
 6 |         "b"), class = "factor"), .bottom = c(0, 0.43, 0.095, 0.62, 
 7 |         0, 0.095, 0.335, 0.62), .top = c(0.095, 0.62, 0.38, 1, 0.095, 
 8 |         0.285, 0.62, 1), .axis = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L
 9 |         )), row.names = c(NA, -8L), class = "data.frame"), category_midpoints = list(
10 |         x1 = structure(c(a = 0.19, b = 0.715), .Dim = 2L, .Dimnames = list(
11 |             c("a", "b"))), x2 = structure(c(a = 0.1425, b = 0.6675
12 |         ), .Dim = 2L, .Dimnames = list(c("a", "b")))), alluvium_midpoints = structure(list(
13 |         x1 = structure(c(1L, 1L, 2L, 2L), .Label = c("a", "b"), class = "factor"), 
14 |         x2 = structure(c(1L, 2L, 1L, 2L), .Label = c("a", "b"), class = "factor"), 
15 |         .axis_from = c(1L, 1L, 1L, 1L), .axis_to = c(2L, 2L, 2L, 
16 |         2L), .x = c(1.5, 1.5, 1.5, 1.5), .y = c(0.0475, 0.3575, 0.3575, 
17 |         0.81), .slope = c(0, 0.266666666666667, -0.372222222222222, 
18 |         0)), row.names = c(NA, -4L), class = "data.frame"))
19 | 
20 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |   pull_request:
 7 |     branches: [main, master]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | jobs:
12 |   R-CMD-check:
13 |     runs-on: ${{ matrix.config.os }}
14 | 
15 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 | 
17 |     strategy:
18 |       fail-fast: false
19 |       matrix:
20 |         config:
21 |           - {os: macOS-latest,   r: 'release'}
22 |           - {os: windows-latest, r: 'release'}
23 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
24 |           - {os: ubuntu-latest,   r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
26 | 
27 |     env:
28 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 |       R_KEEP_PKG_SOURCE: yes
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v2
33 | 
34 |       - uses: r-lib/actions/setup-pandoc@v1
35 | 
36 |       - uses: r-lib/actions/setup-r@v1
37 |         with:
38 |           r-version: ${{ matrix.config.r }}
39 |           http-user-agent: ${{ matrix.config.http-user-agent }}
40 |           use-public-rspm: true
41 | 
42 |       - uses: r-lib/actions/setup-r-dependencies@v1
43 |         with:
44 |           extra-packages: rcmdcheck
45 | 
46 |       - uses: r-lib/actions/check-r-package@v1
47 | 


--------------------------------------------------------------------------------
/man-roxygen/alluvial_ts.R:
--------------------------------------------------------------------------------
 1 | if( require(reshape2) )
 2 | {
 3 | data(Refugees)
 4 | reshape2::dcast(Refugees, country ~ year, value.var = 'refugees')
 5 | d <- Refugees
 6 | 
 7 | set.seed(39) # for nice colours
 8 | cols <- hsv(h = sample(1:10/10), s = sample(3:12)/15, v = sample(3:12)/15)
 9 | 
10 | alluvial_ts(d)
11 | alluvial_ts(d, wave = .2, ygap = 5, lwd = 3)
12 | alluvial_ts(d, wave = .3, ygap = 5, col = cols)
13 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, rankup = TRUE)
14 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'down')
15 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'centred', grid=TRUE,
16 |             grid.lwd = 5)
17 | alluvial_ts(d, wave =  0, ygap = 0, col = cols, alpha = .9, border = 'white',
18 |             grid = TRUE, grid.lwd = 5)
19 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.4)
20 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.3, lab.cex = .7)
21 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.3, lab.cex=.7,
22 |             leg.cex=.7, leg.col = 'white')
23 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, leg.mode = FALSE, leg.x = .1,
24 |             leg.y = .7, leg.max = 3e6)
25 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'centred', alpha=.9,
26 |             grid = TRUE, grid.lwd = 5, xmargin = 0.2, lab.cex = .7, xlab = '',
27 |             ylab = '', border = NA, axis.cex = .8, leg.cex = .7,
28 |             leg.col='white', 
29 |          title = "UNHCR-recognised refugees\nTop 10 countries (2003-13)\n")
30 | 
31 | # non time-series example - Virginia deaths dataset
32 | d <- reshape2::melt(data.frame(age=row.names(VADeaths), VADeaths), id.vars='age')[,c(2,1,3)]
33 | names(d) = c('pop_group','age_group','deaths')
34 | alluvial_ts(d)
35 | }
36 | 


--------------------------------------------------------------------------------
/man-roxygen/alluvial.R:
--------------------------------------------------------------------------------
 1 | # Titanic data
 2 | tit <- as.data.frame(Titanic)
 3 | 
 4 | # 2d
 5 | tit2d <- aggregate( Freq ~ Class + Survived, data=tit, sum)
 6 | alluvial( tit2d[,1:2], freq=tit2d$Freq, xw=0.0, alpha=0.8,
 7 |          gap.width=0.1, col= "steelblue", border="white",
 8 |          layer = tit2d$Survived != "Yes" )
 9 | 
10 | alluvial( tit2d[,1:2], freq=tit2d$Freq, 
11 |          hide=tit2d$Freq < 150,
12 |          xw=0.0, alpha=0.8,
13 |          gap.width=0.1, col= "steelblue", border="white",
14 |          layer = tit2d$Survived != "Yes" )
15 | 
16 | # 3d
17 | tit3d <- aggregate( Freq ~ Class + Sex + Survived, data=tit, sum)
18 | 
19 | alluvial(tit3d[,1:3], freq=tit3d$Freq, alpha=1, xw=0.2,
20 |          col=ifelse( tit3d$Survived == "No", "red", "gray"),
21 |          layer = tit3d$Sex != "Female",
22 |          border="white")
23 | 
24 | 
25 | # 4d
26 | alluvial( tit[,1:4], freq=tit$Freq, border=NA,
27 |          hide = tit$Freq < quantile(tit$Freq, .50),
28 |          col=ifelse( tit$Class == "3rd" & tit$Sex == "Male", "red", "gray") )
29 | 
30 | # 3d example with custom ordering
31 | # Reorder "Sex" axis according to survival status
32 | ord <- list(NULL, with(tit3d, order(Sex, Survived)), NULL)
33 | alluvial(tit3d[,1:3], freq=tit3d$Freq, alpha=1, xw=0.2,
34 |          col=ifelse( tit3d$Survived == "No", "red", "gray"),
35 |          layer = tit3d$Sex != "Female",
36 |          border="white", ordering=ord)
37 | 
38 | # Possible blocks options
39 | for (blocks in c(TRUE, FALSE, "bookends")) {
40 |     
41 |     # Elaborate alluvial diagram from main examples file
42 |     alluvial( tit[, 1:4], freq = tit$Freq, border = NA,
43 |               hide = tit$Freq < quantile(tit$Freq, .50),
44 |               col = ifelse( tit$Class == "3rd" & tit$Sex == "Male",
45 |                             "red", "gray" ),
46 |               blocks = blocks )
47 | }
48 | 
49 | 
50 | # Data returned
51 | x <- alluvial( tit2d[,1:2], freq=tit2d$Freq, xw=0.0, alpha=0.8,
52 |           gap.width=0.1, col= "steelblue", border="white",
53 |           layer = tit2d$Survived != "Yes" )
54 | points( rep(1, 16), x$endpoints[[1]], col="green")
55 | points( rep(2, 16), x$endpoints[[2]], col="blue")
56 | 


--------------------------------------------------------------------------------
/data/Refugees.csv:
--------------------------------------------------------------------------------
  1 | "country";"year";"refugees"
  2 | "Afghanistan";2003;2136043
  3 | "Burundi";2003;531637
  4 | "Congo DRC";2003;453465
  5 | "Iraq";2003;368580
  6 | "Myanmar";2003;151384
  7 | "Palestine";2003;350568
  8 | "Somalia";2003;402336
  9 | "Sudan";2003;606242
 10 | "Syria";2003;20819
 11 | "Vietnam";2003;363179
 12 | "Afghanistan";2004;2084109
 13 | "Burundi";2004;485454
 14 | "Congo DRC";2004;461042
 15 | "Iraq";2004;311905
 16 | "Myanmar";2004;161013
 17 | "Palestine";2004;350617
 18 | "Somalia";2004;389304
 19 | "Sudan";2004;730647
 20 | "Syria";2004;21440
 21 | "Vietnam";2004;349809
 22 | "Afghanistan";2005;2166149
 23 | "Burundi";2005;438706
 24 | "Congo DRC";2005;430929
 25 | "Iraq";2005;262299
 26 | "Myanmar";2005;164864
 27 | "Palestine";2005;349673
 28 | "Somalia";2005;395553
 29 | "Sudan";2005;693632
 30 | "Syria";2005;16401
 31 | "Vietnam";2005;358268
 32 | "Afghanistan";2006;2107519
 33 | "Burundi";2006;396541
 34 | "Congo DRC";2006;401914
 35 | "Iraq";2006;1450905
 36 | "Myanmar";2006;202826
 37 | "Palestine";2006;334142
 38 | "Somalia";2006;464252
 39 | "Sudan";2006;686311
 40 | "Syria";2006;12338
 41 | "Vietnam";2006;374279
 42 | "Afghanistan";2007;1909911
 43 | "Burundi";2007;375715
 44 | "Congo DRC";2007;370386
 45 | "Iraq";2007;2279245
 46 | "Myanmar";2007;191256
 47 | "Palestine";2007;335219
 48 | "Somalia";2007;455356
 49 | "Sudan";2007;523032
 50 | "Syria";2007;13671
 51 | "Vietnam";2007;327776
 52 | "Afghanistan";2008;1817913
 53 | "Burundi";2008;281592
 54 | "Congo DRC";2008;367995
 55 | "Iraq";2008;1873519
 56 | "Myanmar";2008;184347
 57 | "Palestine";2008;333990
 58 | "Somalia";2008;559153
 59 | "Sudan";2008;397013
 60 | "Syria";2008;15186
 61 | "Vietnam";2008;328183
 62 | "Afghanistan";2009;1905804
 63 | "Burundi";2009;94239
 64 | "Congo DRC";2009;455852
 65 | "Iraq";2009;1785212
 66 | "Myanmar";2009;206650
 67 | "Palestine";2009;95177
 68 | "Somalia";2009;678308
 69 | "Sudan";2009;348500
 70 | "Syria";2009;17884
 71 | "Vietnam";2009;339289
 72 | "Afghanistan";2010;3054709
 73 | "Burundi";2010;84064
 74 | "Congo DRC";2010;476693
 75 | "Iraq";2010;1683575
 76 | "Myanmar";2010;215644
 77 | "Palestine";2010;93299
 78 | "Somalia";2010;770148
 79 | "Sudan";2010;379067
 80 | "Syria";2010;18428
 81 | "Vietnam";2010;338698
 82 | "Afghanistan";2011;2664436
 83 | "Burundi";2011;101288
 84 | "Congo DRC";2011;491481
 85 | "Iraq";2011;1428308
 86 | "Myanmar";2011;214594
 87 | "Palestine";2011;94121
 88 | "Somalia";2011;1075148
 89 | "Sudan";2011;491013
 90 | "Syria";2011;19900
 91 | "Vietnam";2011;337829
 92 | "Afghanistan";2012;2586034
 93 | "Burundi";2012;73362
 94 | "Congo DRC";2012;509082
 95 | "Iraq";2012;746181
 96 | "Myanmar";2012;215338
 97 | "Palestine";2012;94820
 98 | "Somalia";2012;1136713
 99 | "Sudan";2012;558195
100 | "Syria";2012;728603
101 | "Vietnam";2012;336939
102 | "Afghanistan";2013;2556507
103 | "Burundi";2013;72652
104 | "Congo DRC";2013;499320
105 | "Iraq";2013;401384
106 | "Myanmar";2013;222053
107 | "Palestine";2013;96044
108 | "Somalia";2013;1121772
109 | "Sudan";2013;636400
110 | "Syria";2013;2457255
111 | "Vietnam";2013;314105
112 | 


--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | output: 
  3 |   github_document:
  4 |     html_preview: false
  5 | bibliography: vignettes/alluvial.bib
  6 | editor_options: 
  7 |   chunk_output_type: console
  8 | ---
  9 | 
 10 | ```{r setup, include=FALSE, cache=FALSE}
 11 | library(alluvial)
 12 | library(dplyr)
 13 | 
 14 | knitr::opts_chunk$set(
 15 |   fig.width=8,
 16 |   fig.height=6,
 17 |   fig.path = "tools/"
 18 | )
 19 | 
 20 | knitr::render_markdown(strict=FALSE)
 21 | ```
 22 | 
 23 | 
 24 | 
 25 | # R package for drawing alluvial diagrams
 26 | 
 27 | <!-- badges: start -->
 28 | [![R-CMD-check](https://github.com/mbojan/alluvial/workflows/R-CMD-check/badge.svg)](https://github.com/mbojan/alluvial/actions)
 29 | [![rstudio mirror downloads](http://cranlogs.r-pkg.org/badges/alluvial?color=2ED968)](http://cranlogs.r-pkg.org/)
 30 | [![cran version](http://www.r-pkg.org/badges/version/alluvial)](https://cran.r-project.org/package=alluvial)
 31 | <!-- badges: end -->
 32 | 
 33 | What are alluvial diagrams? See for example:
 34 | 
 35 | * [Wikipedia](http://en.wikipedia.org/wiki/Alluvial_diagram)
 36 | * My [blog post](http://bc.bojanorama.pl/2014/03/alluvial-diagrams) showing-off this package
 37 | * Some discussion on [CrossValidated](http://stats.stackexchange.com/questions/12029/is-it-possible-to-create-parallel-sets-plot-using-r)
 38 | 
 39 | This package use base R **graphics** [@r-graphics] package. For Grammar of Graphics implementation see [**ggalluvial**](https://github.com/corybrunson/ggalluvial) [@r-ggalluvial;@ggalluvial-article].
 40 | 
 41 | 
 42 | ## Examples
 43 | 
 44 | Alluvial diagram of `datasets::Titanic` data made with `alluvial()`. Notice how each category block becomes a stacked barchart showing relative frequency of survivors.
 45 | 
 46 | ```{r alluvial}
 47 | tit <- tibble::as_tibble(Titanic)
 48 | 
 49 | tit %>% head() %>% knitr::kable()
 50 | 
 51 | alluvial(
 52 |     select(tit, Survived, Sex, Age, Class),
 53 |     freq=tit$n,
 54 |     col = ifelse(tit$Survived == "Yes", "orange", "grey"),
 55 |     border = ifelse(tit$Survived == "Yes", "orange", "grey"),
 56 |     layer = tit$Survived != "Yes",
 57 |     alpha = 0.8,
 58 |     blocks=FALSE
 59 |   )
 60 | ```
 61 | 
 62 | 
 63 | 
 64 | 
 65 | 
 66 | Alluvial diagram for multiple time series / cross-sectional data based on `alluvial::Refugees` data made with `alluvial_ts()`.
 67 | 
 68 | ```{r alluvial_ts}
 69 | Refugees %>% head() %>% knitr::kable()
 70 | 
 71 | 
 72 | set.seed(39) # for nice colours
 73 | cols <- hsv(h = sample(1:10/10), s = sample(3:12)/15, v = sample(3:12)/15)
 74 | 
 75 | alluvial_ts(Refugees, wave = .3, ygap = 5, col = cols, plotdir = 'centred', alpha=.9,
 76 |             grid = TRUE, grid.lwd = 5, xmargin = 0.2, lab.cex = .7, xlab = '',
 77 |             ylab = '', border = NA, axis.cex = .8, leg.cex = .7,
 78 |             leg.col='white', 
 79 |             title = "UNHCR-recognised refugees\nTop 10 countries (2003-13)\n")
 80 | ```
 81 | 
 82 | 
 83 | 
 84 | ## Installation
 85 | 
 86 | Install stable version from CRAN using
 87 | 
 88 | ```{r, eval=FALSE}
 89 | install.packages("alluvial")
 90 | ```
 91 | 
 92 | or development version from GitHub using `remotes::install_github()`:
 93 | 
 94 | ```{r, eval=FALSE}
 95 | remotes::install_github("mbojan/alluvial", build_vignettes=TRUE)
 96 | ```
 97 | 
 98 | 
 99 | 
100 | 
101 | ## References
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # R package for drawing alluvial diagrams
  3 | 
  4 | <!-- badges: start -->
  5 | 
  6 | [![R-CMD-check](https://github.com/mbojan/alluvial/workflows/R-CMD-check/badge.svg)](https://github.com/mbojan/alluvial/actions)
  7 | [![rstudio mirror
  8 | downloads](http://cranlogs.r-pkg.org/badges/alluvial?color=2ED968)](http://cranlogs.r-pkg.org/)
  9 | [![cran
 10 | version](http://www.r-pkg.org/badges/version/alluvial)](https://cran.r-project.org/package=alluvial)
 11 | <!-- badges: end -->
 12 | 
 13 | What are alluvial diagrams? See for example:
 14 | 
 15 | -   [Wikipedia](http://en.wikipedia.org/wiki/Alluvial_diagram)
 16 | -   My [blog post](http://bc.bojanorama.pl/2014/03/alluvial-diagrams)
 17 |     showing-off this package
 18 | -   Some discussion on
 19 |     [CrossValidated](http://stats.stackexchange.com/questions/12029/is-it-possible-to-create-parallel-sets-plot-using-r)
 20 | 
 21 | This package use base R **graphics** (R Core Team 2021) package. For
 22 | Grammar of Graphics implementation see
 23 | [**ggalluvial**](https://github.com/corybrunson/ggalluvial) (Brunson and
 24 | Read 2020; Brunson 2020).
 25 | 
 26 | ## Examples
 27 | 
 28 | Alluvial diagram of `datasets::Titanic` data made with `alluvial()`.
 29 | Notice how each category block becomes a stacked barchart showing
 30 | relative frequency of survivors.
 31 | 
 32 | ``` r
 33 | tit <- tibble::as_tibble(Titanic)
 34 | 
 35 | tit %>% head() %>% knitr::kable()
 36 | ```
 37 | 
 38 | | Class | Sex    | Age   | Survived |   n |
 39 | |:------|:-------|:------|:---------|----:|
 40 | | 1st   | Male   | Child | No       |   0 |
 41 | | 2nd   | Male   | Child | No       |   0 |
 42 | | 3rd   | Male   | Child | No       |  35 |
 43 | | Crew  | Male   | Child | No       |   0 |
 44 | | 1st   | Female | Child | No       |   0 |
 45 | | 2nd   | Female | Child | No       |   0 |
 46 | 
 47 | ``` r
 48 | alluvial(
 49 |     select(tit, Survived, Sex, Age, Class),
 50 |     freq=tit$n,
 51 |     col = ifelse(tit$Survived == "Yes", "orange", "grey"),
 52 |     border = ifelse(tit$Survived == "Yes", "orange", "grey"),
 53 |     layer = tit$Survived != "Yes",
 54 |     alpha = 0.8,
 55 |     blocks=FALSE
 56 |   )
 57 | ```
 58 | 
 59 | ![](tools/alluvial-1.png)<!-- -->
 60 | 
 61 | Alluvial diagram for multiple time series / cross-sectional data based
 62 | on `alluvial::Refugees` data made with `alluvial_ts()`.
 63 | 
 64 | ``` r
 65 | Refugees %>% head() %>% knitr::kable()
 66 | ```
 67 | 
 68 | | country     | year | refugees |
 69 | |:------------|-----:|---------:|
 70 | | Afghanistan | 2003 |  2136043 |
 71 | | Burundi     | 2003 |   531637 |
 72 | | Congo DRC   | 2003 |   453465 |
 73 | | Iraq        | 2003 |   368580 |
 74 | | Myanmar     | 2003 |   151384 |
 75 | | Palestine   | 2003 |   350568 |
 76 | 
 77 | ``` r
 78 | set.seed(39) # for nice colours
 79 | cols <- hsv(h = sample(1:10/10), s = sample(3:12)/15, v = sample(3:12)/15)
 80 | 
 81 | alluvial_ts(Refugees, wave = .3, ygap = 5, col = cols, plotdir = 'centred', alpha=.9,
 82 |             grid = TRUE, grid.lwd = 5, xmargin = 0.2, lab.cex = .7, xlab = '',
 83 |             ylab = '', border = NA, axis.cex = .8, leg.cex = .7,
 84 |             leg.col='white', 
 85 |             title = "UNHCR-recognised refugees\nTop 10 countries (2003-13)\n")
 86 | ```
 87 | 
 88 | ![](tools/alluvial_ts-1.png)<!-- -->
 89 | 
 90 | ## Installation
 91 | 
 92 | Install stable version from CRAN using
 93 | 
 94 | ``` r
 95 | install.packages("alluvial")
 96 | ```
 97 | 
 98 | or development version from GitHub using `remotes::install_github()`:
 99 | 
100 | ``` r
101 | remotes::install_github("mbojan/alluvial", build_vignettes=TRUE)
102 | ```
103 | 
104 | ## References
105 | 
106 | <div id="refs" class="references csl-bib-body hanging-indent">
107 | 
108 | <div id="ref-ggalluvial-article" class="csl-entry">
109 | 
110 | Brunson, Jason Cory. 2020. “<span class="nocase">ggalluvial</span>:
111 | Layered Grammar for Alluvial Plots.” *Journal of Open Source Software* 5
112 | (49): 2017. <https://doi.org/10.21105/joss.02017>.
113 | 
114 | </div>
115 | 
116 | <div id="ref-r-ggalluvial" class="csl-entry">
117 | 
118 | Brunson, Jason Cory, and Quentin D. Read. 2020. “Ggalluvial: Alluvial
119 | Plots in ’Ggplot2’.” <http://corybrunson.github.io/ggalluvial/>.
120 | 
121 | </div>
122 | 
123 | <div id="ref-r-graphics" class="csl-entry">
124 | 
125 | R Core Team. 2021. *R: A Language and Environment for Statistical
126 | Computing*. Vienna, Austria: R Foundation for Statistical Computing.
127 | <https://www.R-project.org/>.
128 | 
129 | </div>
130 | 
131 | </div>
132 | 


--------------------------------------------------------------------------------
/man/alluvial_ts.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/alluvial_ts.R
  3 | \name{alluvial_ts}
  4 | \alias{alluvial_ts}
  5 | \title{Alluvial diagram for multiple time series data}
  6 | \usage{
  7 | alluvial_ts(
  8 |   dat,
  9 |   wave = NA,
 10 |   ygap = 1,
 11 |   col = NA,
 12 |   alpha = NA,
 13 |   plotdir = "up",
 14 |   rankup = FALSE,
 15 |   lab.cex = 1,
 16 |   lab.col = "black",
 17 |   xmargin = 0.1,
 18 |   axis.col = "black",
 19 |   title = NA,
 20 |   title.cex = 1,
 21 |   axis.cex = 1,
 22 |   grid = FALSE,
 23 |   grid.col = "grey80",
 24 |   grid.lwd = 1,
 25 |   leg.mode = TRUE,
 26 |   leg.x = 0.1,
 27 |   leg.y = 0.9,
 28 |   leg.cex = 1,
 29 |   leg.col = "black",
 30 |   leg.lty = NA,
 31 |   leg.lwd = NA,
 32 |   leg.max = NA,
 33 |   xlab = NA,
 34 |   ylab = NA,
 35 |   xlab.pos = 2,
 36 |   ylab.pos = 1,
 37 |   lwd = 1,
 38 |   ...
 39 | )
 40 | }
 41 | \arguments{
 42 | \item{dat}{data.frame of time-series (or suitable equivalent continuously disaggregated data), with 3 columns (in order: category, time-variable, value) with <= 1 row for each category-time combination}
 43 | 
 44 | \item{wave}{numeric, curve wavyness defined in terms of x axis data range - i.e. bezier point offset. Experiment to get this right}
 45 | 
 46 | \item{ygap}{numeric, vertical distance between polygons - a multiple of 10\% of the mean data value}
 47 | 
 48 | \item{col}{colour, value or vector of length matching the number of unique categories. Individual colours of vector are mapped to categories in alpha-numeric order}
 49 | 
 50 | \item{alpha}{numeric, [0,1] polygon fill transparency}
 51 | 
 52 | \item{plotdir}{character, string ('up', 'down' or 'centred') giving the vertical alignment of polygon stacks}
 53 | 
 54 | \item{rankup}{logical, rank polygons on time axes upward by magnitude (largest to smallest) or not}
 55 | 
 56 | \item{lab.cex}{numeric, category label font size}
 57 | 
 58 | \item{lab.col}{colour, of category label}
 59 | 
 60 | \item{xmargin}{numeric [0,1], proportional space for category labels}
 61 | 
 62 | \item{axis.col}{colour, of axes}
 63 | 
 64 | \item{title}{character, plot title}
 65 | 
 66 | \item{title.cex}{numeric, plot title font size}
 67 | 
 68 | \item{axis.cex}{numeric, font size of x-axis break labels}
 69 | 
 70 | \item{grid}{logical, plot vertical axes}
 71 | 
 72 | \item{grid.col}{colour, of grid axes}
 73 | 
 74 | \item{grid.lwd}{numeric, line width of grid axes}
 75 | 
 76 | \item{leg.mode}{logical, draw y-axis scale legend inside largest data point (TRUE default) or alternatively with custom position/value (FALSE)}
 77 | 
 78 | \item{leg.x, leg.y}{numeric [0,1], x/y positions of legend if leg.mode = FALSE}
 79 | 
 80 | \item{leg.cex}{numeric, legend text size}
 81 | 
 82 | \item{leg.col}{colour, of legend lines and text}
 83 | 
 84 | \item{leg.lty}{numeric, code for legend line type}
 85 | 
 86 | \item{leg.lwd}{numeric, legend line width}
 87 | 
 88 | \item{leg.max}{numeric, legend scale line width}
 89 | 
 90 | \item{xlab, ylab}{character, x-axis / y-axis titles}
 91 | 
 92 | \item{xlab.pos, ylab.pos}{numeric, perpendicular offset for axis titles}
 93 | 
 94 | \item{lwd}{numeric, value or vector of length matching the number of unique categories for polygon stroke line width. Individual values of vector are mapped to categories in alpha-numeric order}
 95 | 
 96 | \item{...}{arguments to pass to polygon()}
 97 | }
 98 | \description{
 99 | This is a variant of alluvial diagram suitable for multiple
100 | (cross-sectional) time series. It also works with continuous variables equivalent to time
101 | }
102 | \examples{
103 | if( require(reshape2) )
104 | {
105 | data(Refugees)
106 | reshape2::dcast(Refugees, country ~ year, value.var = 'refugees')
107 | d <- Refugees
108 | 
109 | set.seed(39) # for nice colours
110 | cols <- hsv(h = sample(1:10/10), s = sample(3:12)/15, v = sample(3:12)/15)
111 | 
112 | alluvial_ts(d)
113 | alluvial_ts(d, wave = .2, ygap = 5, lwd = 3)
114 | alluvial_ts(d, wave = .3, ygap = 5, col = cols)
115 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, rankup = TRUE)
116 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'down')
117 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'centred', grid=TRUE,
118 |             grid.lwd = 5)
119 | alluvial_ts(d, wave =  0, ygap = 0, col = cols, alpha = .9, border = 'white',
120 |             grid = TRUE, grid.lwd = 5)
121 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.4)
122 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.3, lab.cex = .7)
123 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.3, lab.cex=.7,
124 |             leg.cex=.7, leg.col = 'white')
125 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, leg.mode = FALSE, leg.x = .1,
126 |             leg.y = .7, leg.max = 3e6)
127 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'centred', alpha=.9,
128 |             grid = TRUE, grid.lwd = 5, xmargin = 0.2, lab.cex = .7, xlab = '',
129 |             ylab = '', border = NA, axis.cex = .8, leg.cex = .7,
130 |             leg.col='white', 
131 |          title = "UNHCR-recognised refugees\nTop 10 countries (2003-13)\n")
132 | 
133 | # non time-series example - Virginia deaths dataset
134 | d <- reshape2::melt(data.frame(age=row.names(VADeaths), VADeaths), id.vars='age')[,c(2,1,3)]
135 | names(d) = c('pop_group','age_group','deaths')
136 | alluvial_ts(d)
137 | }
138 | }
139 | 


--------------------------------------------------------------------------------
/man/alluvial.Rd:
--------------------------------------------------------------------------------
  1 | % Generated by roxygen2: do not edit by hand
  2 | % Please edit documentation in R/alluvial.R
  3 | \name{alluvial}
  4 | \alias{alluvial}
  5 | \title{Alluvial diagram}
  6 | \usage{
  7 | alluvial(
  8 |   ...,
  9 |   freq,
 10 |   col = "gray",
 11 |   border = 0,
 12 |   layer,
 13 |   hide = FALSE,
 14 |   alpha = 0.5,
 15 |   gap.width = 0.05,
 16 |   xw = 0.1,
 17 |   cw = 0.1,
 18 |   blocks = TRUE,
 19 |   ordering = NULL,
 20 |   axis_labels = NULL,
 21 |   mar = c(2, 1, 1, 1),
 22 |   cex = par("cex"),
 23 |   xlim_offset = c(0, 0),
 24 |   ylim_offset = c(0, 0),
 25 |   cex.axis = par("cex.axis"),
 26 |   axes = TRUE,
 27 |   ann = TRUE,
 28 |   title = NULL
 29 | )
 30 | }
 31 | \arguments{
 32 | \item{...}{vectors or data frames, all for the same number of observations}
 33 | 
 34 | \item{freq}{numeric, vector of frequencies of the same length as the number of observations}
 35 | 
 36 | \item{col}{vector of colors of the stripes}
 37 | 
 38 | \item{border}{vector of border colors for the stripes}
 39 | 
 40 | \item{layer}{numeric, order of drawing of the stripes}
 41 | 
 42 | \item{hide}{logical, should particular stripe be plotted}
 43 | 
 44 | \item{alpha}{numeric, vector of transparency of the stripes}
 45 | 
 46 | \item{gap.width}{numeric, relative width of inter-category gaps}
 47 | 
 48 | \item{xw}{numeric, the distance from the set axis to the control points of the xspline}
 49 | 
 50 | \item{cw}{numeric, width of the category axis}
 51 | 
 52 | \item{blocks}{logical, whether to use blocks to tie the flows together at each category, versus contiguous ribbons (also admits character value "bookends")}
 53 | 
 54 | \item{ordering}{list of numeric vectors allowing to reorder the alluvia on each axis separately, see Examples}
 55 | 
 56 | \item{axis_labels}{character, labels of the axes, defaults to variable names in the data}
 57 | 
 58 | \item{mar}{numeric, plot margins as in \code{\link{par}}}
 59 | 
 60 | \item{cex, cex.axis}{numeric, scaling of fonts of category labels and axis labels respectively. See \code{\link{par}}.}
 61 | 
 62 | \item{xlim_offset, ylim_offset}{numeric vectors of length 2, passed to
 63 | \code{xlim} and \code{ylim} of \code{\link{plot}}, and allow for adjusting
 64 | the limits of the plotting region}
 65 | 
 66 | \item{axes}{logical, whether to draw axes, defaults to TRUE}
 67 | 
 68 | \item{ann}{logical, whether to draw annotations: category labels. Defaults to TRUE}
 69 | 
 70 | \item{title}{character, plot title}
 71 | }
 72 | \value{
 73 | Invisibly a list with elements:
 74 | \item{\code{endpoints}}{A data frame with data on locations of the stripes with columns:
 75 | \describe{
 76 | \item{\code{...}}{Vectors/data frames supplied to
 77 | \code{alluvial} through \code{...} that define the axes}
 78 | \item{\code{.bottom},\code{.top}}{Y locations of bottom
 79 | and top coordinates respectively at which the stripes
 80 | originate from the axis \code{.axis}}
 81 | \item{\code{.axis}}{Axis number counting from the left}
 82 | } }
 83 | \item{\code{category_midpoints}}{List of vectors of Y
 84 | locations of category block midpoints.}
 85 | \item{\code{alluvium_midpoints}}{A data frame with
 86 | location of midpoints on each alluvium segement with
 87 | columns:
 88 | \describe{
 89 | \item{\code{...}}{Vectors/data frames supplied to
 90 | \code{alluvial} through the \code{...}}
 91 | \item{\code{.axis_from}, \code{.axis_to}}{IDs of axes that
 92 | a segment originates from and goes to}
 93 | \item{\code{.x}, \code{.y}}{X and Y locations of the alluvium midpoints}
 94 | \item{\code{.slope}}{The (approximate) slope of the alluvium at the midpoint}
 95 | }
 96 | }
 97 | }
 98 | \description{
 99 | Drawing alluvial diagrams, also known as parallel set plots.
100 | }
101 | \note{
102 | Please mind that the API is planned to change to be more compatible
103 |   with \pkg{dplyr} verbs.
104 | }
105 | \examples{
106 | # Titanic data
107 | tit <- as.data.frame(Titanic)
108 | 
109 | # 2d
110 | tit2d <- aggregate( Freq ~ Class + Survived, data=tit, sum)
111 | alluvial( tit2d[,1:2], freq=tit2d$Freq, xw=0.0, alpha=0.8,
112 |          gap.width=0.1, col= "steelblue", border="white",
113 |          layer = tit2d$Survived != "Yes" )
114 | 
115 | alluvial( tit2d[,1:2], freq=tit2d$Freq, 
116 |          hide=tit2d$Freq < 150,
117 |          xw=0.0, alpha=0.8,
118 |          gap.width=0.1, col= "steelblue", border="white",
119 |          layer = tit2d$Survived != "Yes" )
120 | 
121 | # 3d
122 | tit3d <- aggregate( Freq ~ Class + Sex + Survived, data=tit, sum)
123 | 
124 | alluvial(tit3d[,1:3], freq=tit3d$Freq, alpha=1, xw=0.2,
125 |          col=ifelse( tit3d$Survived == "No", "red", "gray"),
126 |          layer = tit3d$Sex != "Female",
127 |          border="white")
128 | 
129 | 
130 | # 4d
131 | alluvial( tit[,1:4], freq=tit$Freq, border=NA,
132 |          hide = tit$Freq < quantile(tit$Freq, .50),
133 |          col=ifelse( tit$Class == "3rd" & tit$Sex == "Male", "red", "gray") )
134 | 
135 | # 3d example with custom ordering
136 | # Reorder "Sex" axis according to survival status
137 | ord <- list(NULL, with(tit3d, order(Sex, Survived)), NULL)
138 | alluvial(tit3d[,1:3], freq=tit3d$Freq, alpha=1, xw=0.2,
139 |          col=ifelse( tit3d$Survived == "No", "red", "gray"),
140 |          layer = tit3d$Sex != "Female",
141 |          border="white", ordering=ord)
142 | 
143 | # Possible blocks options
144 | for (blocks in c(TRUE, FALSE, "bookends")) {
145 |     
146 |     # Elaborate alluvial diagram from main examples file
147 |     alluvial( tit[, 1:4], freq = tit$Freq, border = NA,
148 |               hide = tit$Freq < quantile(tit$Freq, .50),
149 |               col = ifelse( tit$Class == "3rd" & tit$Sex == "Male",
150 |                             "red", "gray" ),
151 |               blocks = blocks )
152 | }
153 | 
154 | 
155 | # Data returned
156 | x <- alluvial( tit2d[,1:2], freq=tit2d$Freq, xw=0.0, alpha=0.8,
157 |           gap.width=0.1, col= "steelblue", border="white",
158 |           layer = tit2d$Survived != "Yes" )
159 | points( rep(1, 16), x$endpoints[[1]], col="green")
160 | points( rep(2, 16), x$endpoints[[2]], col="blue")
161 | }
162 | 


--------------------------------------------------------------------------------
/R/alluvial.R:
--------------------------------------------------------------------------------
  1 | #' Alluvial diagram
  2 | #'
  3 | #' Drawing alluvial diagrams, also known as parallel set plots.
  4 | #'
  5 | #' @param ... vectors or data frames, all for the same number of observations
  6 | #' @param freq numeric, vector of frequencies of the same length as the number of observations
  7 | #' @param col vector of colors of the stripes
  8 | #' @param border vector of border colors for the stripes
  9 | #' @param layer numeric, order of drawing of the stripes
 10 | #' @param hide logical, should particular stripe be plotted
 11 | #' @param alpha numeric, vector of transparency of the stripes
 12 | #' @param gap.width numeric, relative width of inter-category gaps
 13 | #' @param xw numeric, the distance from the set axis to the control points of the xspline
 14 | #' @param cw numeric, width of the category axis
 15 | #' @param blocks logical, whether to use blocks to tie the flows together at each category, versus contiguous ribbons (also admits character value "bookends")
 16 | #' @param ordering list of numeric vectors allowing to reorder the alluvia on each axis separately, see Examples
 17 | #' @param axis_labels character, labels of the axes, defaults to variable names in the data
 18 | #' @param mar numeric, plot margins as in \code{\link{par}}
 19 | #' @param cex,cex.axis numeric, scaling of fonts of category labels and axis labels respectively. See \code{\link{par}}.
 20 | #' @param xlim_offset,ylim_offset numeric vectors of length 2, passed to
 21 | #'   \code{xlim} and \code{ylim} of \code{\link{plot}}, and allow for adjusting
 22 | #'   the limits of the plotting region
 23 | #' @param axes logical, whether to draw axes, defaults to TRUE
 24 | #' @param ann logical, whether to draw annotations: category labels. Defaults to TRUE
 25 | #' @param title character, plot title
 26 | #'
 27 | #' @return Invisibly a list with elements:
 28 | #' \item{\code{endpoints}}{A data frame with data on locations of the stripes with columns:
 29 | #' \describe{
 30 | #' \item{\code{...}}{Vectors/data frames supplied to
 31 | #' \code{alluvial} through \code{...} that define the axes}
 32 | #' \item{\code{.bottom},\code{.top}}{Y locations of bottom
 33 | #' and top coordinates respectively at which the stripes
 34 | #' originate from the axis \code{.axis}}
 35 | #' \item{\code{.axis}}{Axis number counting from the left}
 36 | #' } }
 37 | #' \item{\code{category_midpoints}}{List of vectors of Y
 38 | #' locations of category block midpoints.}
 39 | #' \item{\code{alluvium_midpoints}}{A data frame with
 40 | #' location of midpoints on each alluvium segement with
 41 | #' columns:
 42 | #' \describe{
 43 | #' \item{\code{...}}{Vectors/data frames supplied to
 44 | #' \code{alluvial} through the \code{...}}
 45 | #' \item{\code{.axis_from}, \code{.axis_to}}{IDs of axes that
 46 | #' a segment originates from and goes to}
 47 | #' \item{\code{.x}, \code{.y}}{X and Y locations of the alluvium midpoints}
 48 | #' \item{\code{.slope}}{The (approximate) slope of the alluvium at the midpoint}
 49 | #' }
 50 | #' }
 51 | #' 
 52 | #' @note Please mind that the API is planned to change to be more compatible
 53 | #'   with \pkg{dplyr} verbs.
 54 | #' 
 55 | #' @importFrom grDevices col2rgb rgb
 56 | #' @importFrom graphics plot xspline axis rect polygon text par
 57 | #' @importFrom dplyr select_ group_by_ arrange_ filter_ "%>%" ungroup summarise_ .data
 58 | #' @importFrom tidyr gather_
 59 | #'
 60 | #' @export
 61 | #'
 62 | #' @example man-roxygen/alluvial.R
 63 | 
 64 | alluvial <- function( ..., freq,
 65 |                      col="gray", border=0, layer, hide=FALSE, alpha=0.5,
 66 |                      gap.width=0.05, xw=0.1, cw=0.1,
 67 |                      blocks = TRUE,
 68 |                      ordering=NULL,
 69 |                      axis_labels=NULL,
 70 |                      mar = c(2, 1, 1, 1),
 71 |                      cex=par("cex"),
 72 |                      xlim_offset= c(0, 0),
 73 |                      ylim_offset= c(0, 0),
 74 |                      cex.axis=par("cex.axis"),
 75 |                      axes=TRUE,
 76 |                      ann=TRUE,
 77 |                      title = NULL)
 78 | {
 79 |   # Data and graphical parameters
 80 |   p <- data.frame( ..., freq=freq, col, alpha, border, hide, stringsAsFactors=FALSE)
 81 |   np <- ncol(p) - 5                    # Number of dimensions
 82 |   # check if 'ordering' is of proper form
 83 |   if( !is.null(ordering) )
 84 |   {
 85 |     stopifnot(is.list(ordering))
 86 |     if( length(ordering) != np )
 87 |       stop("'ordering' argument should have ",
 88 |            np, " components, has ", length(ordering))
 89 |   }
 90 |   n <- nrow(p)
 91 |   # Layers determine plotting order
 92 |   if(missing(layer))
 93 |   {
 94 |     layer <- 1:n
 95 |   }
 96 |   p$layer <- layer
 97 |   d <- p[ , 1:np, drop=FALSE]          # Dimensions dframe
 98 |   p <- p[ , -c(1:np), drop=FALSE]      # Parameteres dframe
 99 |   p$freq <- with(p, freq/sum(freq))    # Frequencies (weights)
100 |   # Converting colors to hexcodes
101 |   col <- col2rgb(p$col, alpha=TRUE)
102 |   if(!identical(alpha, FALSE)) {
103 |     col["alpha", ] <- p$alpha*256
104 |   }
105 |   p$col <- apply(col, 2, function(x) do.call(rgb, c(as.list(x), maxColorValue = 256)))
106 |   # convert character vectors in data to factors
107 |   isch <- sapply(d, is.character)
108 |   d[isch] <- lapply(d[isch], as.factor)
109 |   # Convert blocks to vector
110 |   if (length(blocks) == 1)
111 |   {
112 |     blocks <- if (!is.na(as.logical(blocks)))
113 |     {
114 |       rep(blocks, np)
115 |     } else if (blocks == "bookends")
116 |     {
117 |       c(TRUE, rep(FALSE, np - 2), TRUE)
118 |     }
119 |   }
120 |   # Axis labels
121 |   if(is.null(axis_labels)) {
122 |     axis_labels <- names(d)
123 |   } else {
124 |     if(length(axis_labels) != ncol(d))
125 |       stop("`axis_labels` should have length ", names(d), ", has ", length(axis_labels))
126 |   }
127 |   # Compute endpoints of flows (polygons)
128 |   # i = dimension id
129 |   # d = data frame of dimensions
130 |   # f = weights
131 |   # w = gap between categories
132 |   getp <- function(i, d, f, w=gap.width) {
133 |     # Ordering dimension ids for lexicographic sorting
134 |     a <- c(i, (1:ncol(d))[-i])
135 |     # Order of rows of d starting from i-th dimension
136 |     if( is.null(ordering[[i]]) )
137 |     {
138 |       o <- do.call(order, d[a])
139 |     } else {
140 |       d2 <- d
141 |       d2[1] <- ordering[[i]]
142 |       o <- do.call(order, d2[a])
143 |     }
144 |     # Breakpoints on a dimension
145 |     x <- c(0, cumsum(f[o])) * (1-w)
146 |     # Stripe coordinates on a dimension
147 |     x <- cbind(x[-length(x)], x[-1])
148 |     # By how much stripes need to be shifted upwards (gap/max(gap))
149 |     gap <- cumsum( c(0L, diff(as.numeric(d[o,i])) != 0) )
150 |     mx <- max(gap)
151 |     if (mx == 0) mx <- 1
152 |     # shifts
153 |     gap <- gap / mx * w
154 |     # add gap-related shifts to stripe coordinates on dimension i
155 |     (x + gap)[order(o),]
156 |   }
157 |   # Calculate stripe locations on dimensions: list of data frames. A component
158 |   # for a dimension. Data frame contains 'y' locations of stripes.
159 |   dd <- lapply(seq_along(d), getp, d=d, f=p$freq)
160 |   # Plotting
161 |   op <- par(mar=mar)
162 |   plot(NULL, type="n", xlim=c(1-cw, np+cw) + xlim_offset, ylim=c(0, 1) + ylim_offset, xaxt="n", yaxt="n",
163 |        xaxs="i", yaxs="i", xlab='', ylab='', main=title, frame=FALSE)
164 |   # For every stripe
165 |   ind <- which(!p$hide)[rev(order(p[!p$hide, ]$layer))]
166 |   for(i in ind )
167 |   {
168 |     # For every inter-dimensional segment
169 |     for(j in 1:(np-1) )
170 |     {
171 |       # Draw stripe
172 |       xspline( c(j, j, j+xw, j+1-xw, j+1, j+1, j+1-xw, j+xw, j) + rep(c(cw, -cw, cw), c(3, 4, 2)),
173 |              c( dd[[j]][i, c(1, 2, 2)], rev(dd[[j+1]][i, c(1, 1, 2, 2)]), dd[[j]][i,c(1, 1)]), 
174 |              shape = c(0,0,1,1,0,0,1,1,0, 0),
175 |              open=FALSE,
176 |              col=p$col[i], border=p$border[i])
177 |     }
178 |   }
179 |   # Category blocks with labels
180 |   for(j in seq_along(dd))
181 |   {
182 |     ax <- lapply(split(dd[[j]], d[,j]), range)
183 |     if (blocks[j])
184 |     {
185 |       for(k in seq_along(ax))
186 |       {
187 |         rect( j-cw, ax[[k]][1], j+cw, ax[[k]][2] )
188 |       }
189 |     } else
190 |     {
191 |       for (i in ind)
192 |       {
193 |         x <- j + c(-1, 1) * cw
194 |         y <- t(dd[[j]][c(i, i), ])
195 |         w <- xw * (x[2] - x[1])
196 |         xspline(x = c(x[1], x[1], x[1] + w, x[2] - w,
197 |                       x[2], x[2], x[2] - w, x[1] + w, x[1]),
198 |                 y = c(y[c(1, 2, 2), 1], y[c(2, 2, 1, 1), 2], y[c(1, 1), 1]),
199 |                 shape = c(0, 0, 1, 1, 0, 0, 1, 1, 0, 0),
200 |                 open = FALSE, col = p$col[i], border = p$border[i])
201 |       }
202 |     }
203 |     for(k in seq_along(ax))
204 |     {
205 |       if(ann) text( j, mean(ax[[k]]), labels=names(ax)[k], cex=cex)
206 |     }
207 |   }           
208 |   # X axis
209 |   if(axes) {
210 |     axis(1, at= rep(c(-cw, cw), ncol(d)) + rep(seq_along(d), each=2),
211 |          line=0.5, col="white", col.ticks="black", labels=FALSE)
212 |     axis(1, at=seq_along(d), tick=FALSE, labels=axis_labels, cex.axis=cex.axis)
213 |   }
214 |   par(op)
215 |   rval <- list(
216 |     # Endpoints of alluvia
217 |     endpoints = do.call(
218 |       "rbind",
219 |       lapply(
220 |         seq(along=dd),
221 |         function(i) {
222 |           df <- as.data.frame(
223 |             structure(dd[[i]], dimnames=list(NULL,c(".bottom", ".top"))),
224 |             stringsAsFactors=FALSE
225 |           )
226 |           df$.axis <- i
227 |           cbind(d, df)
228 |         }
229 |       )
230 |     )
231 |   )
232 |   # Category midpoints
233 |   rval$category_midpoints <- structure(
234 |     lapply(
235 |       seq(1, ncol(d)),
236 |       function(i) {
237 |         mi <- with(
238 |           rval$endpoints[rval$endpoints$.axis == i , ],
239 |           tapply(.bottom, d[[i]], min)
240 |         )
241 |         ma <- with(
242 |           rval$endpoints[ rval$endpoints$.axis == i , ],
243 |           tapply(.top, d[[i]], max)
244 |         )
245 |         (mi + ma)/2
246 |       }
247 |     ),
248 |     names = names(d)
249 |   )
250 |   # alluvium midpoints
251 |   qs1 <- c(as.list(names(d)), list(".axis")) %>%
252 |     lapply(as.name)
253 |   qs2 <- as.list(names(d)) %>%
254 |     lapply(as.name)
255 |   
256 |   rval$alluvium_midpoints <- rval$endpoints %>%
257 |     tidyr::pivot_longer(dplyr::one_of(".bottom", ".top"), names_to = ".endpoint", values_to = ".value") %>%
258 |     dplyr::group_by( !!!qs1 ) %>%
259 |     dplyr::summarise(m = mean(.data$.value)) %>%
260 |     dplyr::arrange(!!!qs1) %>%
261 |     dplyr::group_by(!!!qs2) %>%
262 |     dplyr::mutate(
263 |       .axis_from = dplyr::lag(.data$.axis),
264 |       .axis_to = .data$.axis,
265 |       .x = (.data$.axis_from + .data$.axis_to)/2,
266 |       .y = (.data$m + dplyr::lag(.data$m))/2,
267 |       .slope = (.data$m - dplyr::lag(.data$m)) / (.data$.axis_to - .data$.axis_from - cw)
268 |     ) %>%
269 |     dplyr::ungroup() %>%
270 |     dplyr::filter(!is.na(.data$.axis_from)) %>%
271 |     dplyr::select(dplyr::one_of(names(d), ".axis_from", ".axis_to", ".x", ".y", ".slope")) %>%
272 |     as.data.frame(stringsAsFactors=FALSE)
273 |   
274 |   invisible(rval)
275 | }
276 | 
277 | 


--------------------------------------------------------------------------------
/R/alluvial_ts.R:
--------------------------------------------------------------------------------
  1 | #' Alluvial diagram for multiple time series data
  2 | #'
  3 | #' This is a variant of alluvial diagram suitable for multiple
  4 | #' (cross-sectional) time series. It also works with continuous variables equivalent to time
  5 | #' 
  6 | #' @param dat data.frame of time-series (or suitable equivalent continuously disaggregated data), with 3 columns (in order: category, time-variable, value) with <= 1 row for each category-time combination
  7 | #' @param wave numeric, curve wavyness defined in terms of x axis data range - i.e. bezier point offset. Experiment to get this right
  8 | #' @param ygap numeric, vertical distance between polygons - a multiple of 10\% of the mean data value
  9 | #' @param col colour, value or vector of length matching the number of unique categories. Individual colours of vector are mapped to categories in alpha-numeric order
 10 | #' @param lwd numeric, value or vector of length matching the number of unique categories for polygon stroke line width. Individual values of vector are mapped to categories in alpha-numeric order
 11 | #' @param alpha numeric, [0,1] polygon fill transparency
 12 | #' @param plotdir character, string ('up', 'down' or 'centred') giving the vertical alignment of polygon stacks
 13 | #' @param rankup logical, rank polygons on time axes upward by magnitude (largest to smallest) or not
 14 | #' @param lab.cex numeric, category label font size
 15 | #' @param lab.col colour, of category label
 16 | #' @param xmargin numeric [0,1], proportional space for category labels
 17 | #' @param axis.col colour, of axes
 18 | #' @param axis.cex numeric, font size of x-axis break labels
 19 | #' @param title character, plot title
 20 | #' @param title.cex numeric, plot title font size
 21 | #' @param grid logical, plot vertical axes
 22 | #' @param grid.col colour, of grid axes
 23 | #' @param grid.lwd numeric, line width of grid axes
 24 | #' @param leg.mode logical, draw y-axis scale legend inside largest data point (TRUE default) or alternatively with custom position/value (FALSE)
 25 | #' @param leg.x,leg.y numeric [0,1], x/y positions of legend if leg.mode = FALSE
 26 | #' @param leg.cex numeric, legend text size
 27 | #' @param leg.col colour, of legend lines and text
 28 | #' @param leg.lty numeric, code for legend line type
 29 | #' @param leg.lwd numeric, legend line width
 30 | #' @param leg.max numeric, legend scale line width
 31 | #' @param xlab,ylab character, x-axis / y-axis titles
 32 | #' @param xlab.pos,ylab.pos numeric, perpendicular offset for axis titles
 33 | #' @param ... arguments to pass to polygon()
 34 | #' 
 35 | #' @importFrom grDevices extendrange rainbow
 36 | #' @importFrom graphics mtext plot.new plot.window abline lines
 37 | #' @importFrom stats aggregate approx
 38 | #'
 39 | #' @export
 40 | #'
 41 | #' @example man-roxygen/alluvial_ts.R
 42 | 
 43 | 
 44 | alluvial_ts <- function(dat, wave = NA, ygap = 1, col = NA, alpha = NA, plotdir = 'up', rankup = FALSE, 
 45 |                         lab.cex = 1, lab.col = 'black', xmargin = .1, axis.col = 'black', title = NA, 
 46 |                         title.cex = 1, axis.cex = 1, grid = FALSE, grid.col = 'grey80', grid.lwd = 1, 
 47 |                         leg.mode = TRUE, leg.x = .1, leg.y = .9, leg.cex = 1, leg.col = 'black', leg.lty = NA, 
 48 |                         leg.lwd = NA, leg.max = NA, xlab = NA, ylab = NA, xlab.pos = 2, ylab.pos = 1, lwd = 1, ...){
 49 |   
 50 |   orig.names <- names(dat)
 51 |   names(dat) <- c('item', 'time', 'val')
 52 |   if(is.numeric(dat$item)) dat$item <- as.character(dat$item)
 53 |   if(is.ordered(dat$time) | is.factor(dat$time)) {
 54 |     axis.labs <- levels(dat$time)
 55 |     dat$time <- as.numeric(dat$time)
 56 |   } else if(is.numeric(dat$time)) axis.labs <- sort(unique(dat$time)) else {
 57 |     return("Error: time variable must be numeric, factor, or ordered")}
 58 |   times <- sort(unique(dat$time))
 59 |   
 60 |   dat <- dat[order(dat$item), ]
 61 |   datsum <- aggregate(val ~ item, dat, mean)
 62 |   plotorder <- order(datsum$val, decreasing = TRUE) # smallest last (on top)
 63 |   maxval <- pretty(max(dat$val))[2] # legend max
 64 |   
 65 |   # colours
 66 |   n <- length(unique(dat$item))
 67 |   if(all(is.na(col))) col <- rainbow(n)
 68 |   if(length(col) == 1) col <- rep(col, n)
 69 |   col <- rgb(t(col2rgb(col)), maxColorValue = 255) # ensure hex
 70 |   if(!length(col) == n) return("Error: 'col' length must equal the number of unique data elements")
 71 |   if(!is.na(alpha)) {
 72 |     col[nchar(col)>7] <- substr(col[nchar(col)>7], 1, 7)
 73 |     col <- paste0(col, substr(rgb(0, 0, alpha), 6, 7))
 74 |   }
 75 |   
 76 |   # calc vertical gap between items
 77 |   ymean <- mean(dat$val)
 78 |   ygap <- ymean * .1 * ygap
 79 |   
 80 |   # if not specified (but it really should be)
 81 |   if(is.na(wave)) wave <- .5 * (rev(times)[1] - times[1])/length(times)
 82 |   
 83 |   plot.y.max <- 0  # vertical plot scaling
 84 |   
 85 |   # prepare main data object
 86 |   d <- list()
 87 |   for(i in unique(dat$item)) d[[i]] <- list()
 88 |   
 89 |   # loop through periods
 90 |   for(i in 1:length(times)){
 91 |     
 92 |     # 3 time variables, NA if they fall outside the data period
 93 |     if(i>1) t1 <- times[i-1] else t1 <- NA              # prev period
 94 |     t2 <- times[i]                                     # this period
 95 |     if(i<length(times)) t3 <- times[i+1] else t3 <- NA  # next period
 96 |     
 97 |     dat.t <- dat[dat$time == t2, ]
 98 |     dat.t <- if(rankup) { dat.t[order(dat.t$val, decreasing = TRUE), ] 
 99 |       } else dat.t[order(dat.t$val, decreasing = FALSE), ]
100 |     
101 |     y.sum <- sum(dat.t$val) + (nrow(dat.t) * ygap)
102 |     if(y.sum > plot.y.max) plot.y.max <- y.sum
103 |     
104 |     # loop through items
105 |     if(plotdir == 'centred') y <- -y.sum/2 else y <- 0  # vertical scaler
106 |     
107 |     for(j in 1:nrow(dat.t)){
108 |       # work up/down y axes to calculate spline positions
109 |       y0 <- y + ygap
110 |       y1 <- y0 + dat.t$val[j]
111 |       y <- y1
112 |       
113 |       # calculate left and right x-axis splines
114 |       if(!is.na(t1)) spline.x <- c(t2 - wave, t2) else spline.x <- numeric(0)
115 |       if(!is.na(t3)) spline.x <- c(spline.x, t2, t2 + wave)
116 |       
117 |       # update d
118 |       item <- as.character(dat.t$item[j])
119 |       d[[item]]$x  <- c(d[[item]]$x,  spline.x)
120 |       d[[item]]$y0 <- c(d[[item]]$y0, rep(y0, length(spline.x)))
121 |       d[[item]]$y1 <- c(d[[item]]$y1, rep(y1, length(spline.x)))
122 |     } # end items loop
123 |   } # end period loop
124 |   
125 |   # function to ensure vertex arrays are same length, as xspline output can vary
126 |   resize <- function(v, n = 500){ d<-data.frame(x = 1:length(v), y = v); 
127 |                                   approx(d, xout = seq(1, length(v), length.out = n))$y}
128 |   plot.new() # required by xspline
129 |   
130 |   # calculate spline curves
131 |   for(i in names(d)){
132 |     curves <- list()
133 |     # iterate through bottom/top sets of bezier points, get curves and resize
134 |     for(j in c('y0', 'y1')) curves[[j]] <- lapply(xspline(d[[i]]$x, d[[i]][[j]], shape = 1, draw = FALSE), resize)
135 |     # stitch top and bottom polylines together clockwise into a polygon
136 |     d[[i]]$poly <- data.frame(x = c(curves[[1]]$x, rev(curves[[2]]$x)), y = c(curves[[1]]$y, rev(curves[[2]]$y)))
137 |   }
138 |   
139 |   # label y positions
140 |   labs.l <- data.frame(t(sapply(1:length(d), FUN = function(i)t(data.frame(lab = names(d[i]), lefty = mean(c(d[[i]]$y0[1], d[[i]]$y1[1]))) ))), stringsAsFactors = FALSE)
141 |   labs.r <- data.frame(t(sapply(1:length(d), FUN = function(i)t(data.frame(lab = names(d[i]), lefty = mean(c(rev(d[[i]]$y0)[1], rev(d[[i]]$y1)[1])))) )), stringsAsFactors = FALSE)
142 |   names(labs.l) <- names(labs.r) <- c('item', 'y')
143 |   labs.l$y <- as.numeric(as.character(labs.l$y)); labs.r$y <- as.numeric(as.character(labs.r$y)) # to numeric
144 |   labs.l$col <- col[match(labs.l$item, datsum$item)]
145 |   labs.r$col <- col[match(labs.r$item, datsum$item)]
146 |   
147 |   # line widths
148 |   if(any(lwd == 0)) return("Error: 'lwd' must be greater than zero")
149 |   if(length(lwd) == 1) lwd <- rep(lwd, length(d))
150 |   if(!length(lwd) == length(d)) return("Error: 'lwd' length must equal the number of unique data elements")
151 |   lwd <- lwd[plotorder]
152 | 
153 |   # scale and orientation of axes
154 |   if(plotdir == 'up')       ylim <- c(0, plot.y.max) else {
155 |     if(plotdir == 'down')   ylim <- c(plot.y.max, 0) else {
156 |       if(plotdir == 'centred') ylim <- c(-plot.y.max/2, plot.y.max/2) else {
157 |         return("Incorrect specification for plotdir: please select 'up', 'down' or 'centred'")
158 |       }}}
159 |   xran <- range(dat$time)
160 |   xlim <- extendrange(r = xran, f = xmargin/2)
161 |   
162 |   # plot order (biggest polygons first)
163 |   d <- d[plotorder]
164 |   col <- col[plotorder]
165 |   
166 |   # axis labels
167 |   if(is.na(xlab)) xlab <- orig.names[2] 
168 |   if(is.na(ylab)) ylab <- orig.names[3]
169 |   
170 |   # plot
171 |   plot.window(xlim = xlim, ylim = ylim)
172 |   axis(1, times, cex.axis = axis.cex, labels = axis.labs, col = lab.col, col.axis = axis.col)
173 |   if(is.na(title)) title <- paste('Alluvial plot of', orig.names[1], 'vs', orig.names[3], 'by', orig.names[2])
174 |   title(title, cex.main = title.cex)
175 |   mtext(xlab, side = 1, line = xlab.pos, col = lab.col); mtext(ylab, side = 2, line = ylab.pos, col = lab.col)
176 |   if(grid) abline(v = times, col = grid.col, lwd = grid.lwd)
177 |   for(i in 1:length(d)) polygon(d[[i]]$poly$x, d[[i]]$poly$y, col = col[i], lwd = lwd[i], ...)
178 |   text(times[1], labs.l$y, labels = labs.l$item, col = labs.l$col, pos = 2, cex = lab.cex)
179 |   text(times[length(times)], labs.r$y, labels = labs.r$item, adj = 0, col = labs.r$col, pos = 4, cex = lab.cex)
180 |   
181 |   # legend
182 |   topval  <- max(dat$val)
183 |   topitem <- as.character(dat$item[match(topval, dat$val)])
184 |   toptime <- dat$time[match(topval, dat$val)]
185 |   
186 |   if(leg.mode){ # legend plotted on maximum data point
187 |     if(is.na(leg.lty)) leg.lty <- "dotted"; if(is.na(leg.lwd)) leg.lwd <- 2
188 |     val_ind <- match(toptime, d[[topitem]]$x)
189 |     leg_y0 <- d[[topitem]]$y0[val_ind]
190 |     leg_y1 <- d[[topitem]]$y1[val_ind]
191 |     leg_ym <- mean(c(leg_y0, leg_y1))
192 |     lines(rep(toptime, 2), c(leg_y0, leg_ym-(topval*.08)), lwd = leg.lwd, lend = 'butt', col = leg.col, lty = leg.lty)
193 |     lines(rep(toptime, 2), c(leg_y1, leg_ym+(topval*.08)), lwd = leg.lwd, lend = 'butt', col = leg.col, lty = leg.lty)
194 |     text(toptime, leg_ym, labels = formatC(topval, format = "d", big.mark = ','), pos = NULL, cex = leg.cex, col = leg.col)
195 |   } else {         # legend plotted in custom position
196 |     if(!is.na(leg.max)) maxval <- leg.max
197 |     if(is.na(leg.lty)) leg.lty <- "solid"; if(is.na(leg.lwd)) leg.lwd <- 10
198 |     leg_x <- (xlim[2]-xlim[1]) * leg.x + xlim[1]
199 |     leg_y <- (ylim[2]-ylim[1]) * leg.y + ylim[1]
200 |     diffs <- d[[topitem]]$y1 - d[[topitem]]$y0
201 |     time <- d[[topitem]]$x[match(max(diffs), diffs)]
202 |     lines(data.frame(x = c(leg_x, leg_x), y = c(leg_y, leg_y + maxval)), lwd = leg.lwd, lend = 'butt', col = leg.col, lty = leg.lty)
203 |     max.lab <- formatC(maxval, format = "d", big.mark = ',')
204 |     text(rep(leg_x, 2), c(leg_y, leg_y + maxval), labels = c('0', max.lab), cex = leg.cex, pos = 4, offset = 1, col = leg.col)
205 |   }
206 | }
207 | 


--------------------------------------------------------------------------------
/vignettes/alluvial.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Creating Alluvial Diagrams"
  3 | author: "Michał Bojanowski"
  4 | date: "`r Sys.Date()`"
  5 | output: 
  6 |   rmarkdown::html_vignette:
  7 |     toc: yes
  8 |     fig_caption: yes
  9 | vignette: >
 10 |   %\VignetteIndexEntry{Creating Alluvial Diagrams}
 11 |   %\VignetteEngine{knitr::rmarkdown}
 12 |   %\VignetteEncoding{UTF-8}
 13 | ---
 14 | 
 15 | 
 16 | ```{r setup, cache=FALSE, include=FALSE}
 17 | library(alluvial)
 18 | library(dplyr)
 19 | 
 20 | knitr::opts_chunk$set(
 21 |   fig.width=7, 
 22 |   fig.height=4,
 23 |   cache=FALSE
 24 | )
 25 | ```
 26 | 
 27 | 
 28 | 
 29 | 
 30 | # What is alluvial diagram?
 31 | 
 32 | Alluvial diagram is a variant of a Parallel Coordinates Plot (PCP) but for categorical variables. Variables are assigned to vertical axes that are parallel. Values are represented with blocks on each axis. Observations are represented with *alluvia* (sing. "alluvium") spanning across all the axes.
 33 | 
 34 | You create alluvial diagrams with function `alluvial()`. Let us use `Titanic` dataset as an example. As it is a `table`, we need to convert it to a data frame
 35 | 
 36 | ```{r data}
 37 | tit <- as.data.frame(Titanic, stringsAsFactors = FALSE)
 38 | head(tit)
 39 | ```
 40 | 
 41 | and create the alluvial diagram:
 42 | 
 43 | ```{r example}
 44 | alluvial(
 45 |   tit[,1:4], 
 46 |   freq=tit$Freq,
 47 |   col = ifelse(tit$Survived == "Yes", "orange", "grey"),
 48 |   border = ifelse(tit$Survived == "Yes", "orange", "grey"),
 49 |   hide = tit$Freq == 0,
 50 |   cex = 0.7
 51 | )
 52 | ```
 53 | 
 54 | We have four variables:
 55 | 
 56 | - `Class` on the ship the passanger occupied
 57 | - `Sex` of the passenger
 58 | - `Age` of the passenger
 59 | - Whether the passenger `Survived`.
 60 | 
 61 | Vertical sizes of the blocks are proportional to the frequency, and so are the widths of the alluvia. Alluvia represent all combinations of values of the variables in the dataset. By default the vertical order of the alluvia is determined by alphabetical ordering of the values on each variable lexicographically (last variable changes first) drawn from bottom to top. In this example, the color is determined by passengers' survival status, i.e. passenger who survived are represented with orange alluvia.
 62 | 
 63 | Alluvial diagrams are very useful in reading various conditional and uncoditional distributions in a multivariate dataset. For example, we can see that:
 64 | 
 65 | - Most of the Crew did not survived -- majority of the height of the Crew category is covered by grey alluvia.
 66 | - Majortity of the Crew where adult men.
 67 | - Almost all women from the 1st Class did survive.
 68 | - The women who did not survive come mostly from 3rd class.
 69 | 
 70 | 
 71 | 
 72 | 
 73 | 
 74 | # Simple use
 75 | 
 76 | Minimal use requires supplying data frame(s) as first argument, and a vector of frequencies as the `freq` argument. By default all alluvia are drawn using mildly transparent gray.
 77 | 
 78 | Two variables `Class` and `Survived`:
 79 | 
 80 | ```{r alluvial_defaults_2}
 81 | # Survival status and Class
 82 | tit %>% group_by(Class, Survived) %>%
 83 |   summarise(n = sum(Freq)) -> tit2d
 84 | 
 85 | alluvial(tit2d[,1:2], freq=tit2d$n)
 86 | ```
 87 | 
 88 | Three variables `Sex`, `Class`, and `Survived`:
 89 | 
 90 | ```{r alluvial_defaults_3}
 91 | # Survival status, Sex, and Class
 92 | tit %>% group_by(Sex, Class, Survived) %>%
 93 |   summarise(n = sum(Freq)) -> tit3d
 94 | 
 95 | alluvial(tit3d[,1:3], freq=tit3d$n)
 96 | ```
 97 | 
 98 | 
 99 | 
100 | 
101 | # Customizing
102 | 
103 | There are several ways to customize alluvial diagrams with `alluvial()` the following sections illustrate probably most common usecases.
104 | 
105 | 
106 | ## Customizing colors
107 | 
108 | Colors of the alluvia can be customized with `col`, `border` and `alpha` arguments. For example:
109 | 
110 | ```{r colors}
111 | alluvial(
112 |   tit3d[,1:3], 
113 |   freq=tit3d$n,
114 |   col = ifelse( tit3d$Sex == "Female", "pink", "lightskyblue"),
115 |   border = "grey",
116 |   alpha = 0.7,
117 |   blocks=FALSE
118 | )
119 | ```
120 | 
121 | 
122 | 
123 | 
124 | 
125 | ## Hiding and reordering alluvia
126 | 
127 | ### Hiding
128 | 
129 | With `alluvial` sometimes it is desirable to omit plotting of some of the alluvia. This is most frequently the case with larger datasets in which there are a lot of combinations of values of the variables associated with very small frequencies, or even 0s. Alluvia can be hidden with argument `hide` expecting a logical vector of length equal to the number of rows in the data. Alluvia for which `hide` is `FALSE` are not plotted. For example, to hide alluvia with frequency less than 150:
130 | 
131 | ```{r alluvial_hide}
132 | alluvial(tit2d[,1:2], freq=tit2d$n, hide=tit2d$n < 150)
133 | ```
134 | 
135 | This skips drawing the alluvia corresponding to the following rows in `tit` data frame:
136 | 
137 | ```{r}
138 | tit2d %>% select(Class, Survived, n) %>%
139 |   filter(n < 150)
140 | ```
141 | 
142 | You can see the gaps e.g. on the "Yes" and "No" category blocks on the `Survived` axis. 
143 | 
144 | If you would rather omit these rows from the plot alltogether (i.e. no gaps), you need to filter your data before it is used by `alluvial()`.
145 | 
146 | 
147 | 
148 | 
149 | 
150 | 
151 | 
152 | 
153 | 
154 | ### Changing "layers"
155 | 
156 | By default alluvia are plotted in sequence determined by the row order in the dataset. It determines which alluvia will be plotted "on top" of which others.
157 | 
158 | Consider simple data:
159 | 
160 | ```{r data_layer}
161 | d <- data.frame(
162 |   x = c(1, 2, 3),
163 |   y = c(3 ,2, 1),
164 |   freq=c(1,1,1)
165 | )
166 | d
167 | ```
168 | 
169 | As there are three rows, we will have three alluvia:
170 | 
171 | ```{r layer_ex1, fig.width=3, fig.height=3, fig.show="hold"}
172 | alluvial(d[,1:2], freq=d$freq, col=1:3, alpha=1)
173 | # Reversing the order
174 | alluvial(d[ 3:1, 1:2 ], freq=d$freq, col=3:1, alpha=1)
175 | ```
176 | 
177 | Note that to keep colors matched in the same way to the alluvia we had to reverse the `col` argument too. Instead of reordering the data and keeping track of the other arguments plotting order can be adjusted with `layer` argument:
178 | 
179 | ```{r layer_ex2}
180 | alluvial(d[,1:2], freq=d$freq, col=1:3, alpha=1, 
181 |          layer=3:1)
182 | ```
183 | 
184 | The value of `layer` is passed to `order` so it is possible to use logical vectors e.g. if you only want to put some of the flows on top. For example, for Titanic data to put all alluvia for all survivors on top we can:
185 | 
186 | ```{r layer_ex3}
187 | alluvial(tit3d[,1:3], freq=tit3d$n, 
188 |          col = ifelse( tit3d$Survived == "Yes", "orange", "grey" ),
189 |          alpha = 0.8,
190 |          layer = tit3d$Survived == "No"
191 | )
192 | ```
193 | 
194 | First layer is the one on top, second layer below the first and so on. Consequently, in the example above, `Survived == "No"` is ordered after `Survived == "Yes"` so the former is below the latter.
195 | 
196 | 
197 | 
198 | 
199 | 
200 | 
201 | 
202 | 
203 | ### Adjusting vertical order of categories
204 | 
205 | By default `alluvial()` orders the values on each axis in an alphabetic order. This happens irrespectively of the ordering of observations in the plotted dataset. It is possible to override the default ordering by transforming the variables of interest into `factor`s with a custom ordering of levels. 
206 | 
207 | Consider the following example data:
208 | 
209 | ```{r catorder_data}
210 | d <- data.frame(
211 |   col = c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99", 
212 |           "#E31A1C", "#FDBF6F", "#FF7F00"), # from RColorBrewer Paired palette
213 |   Temperature = rep(c("cool", "hot"), each=4),
214 |   Luminance = rep(c("bright", "dark"), 4),
215 |   Color = rep(c("blue", "green", "red", "orange"), each=2)
216 | ) %>%
217 |   mutate(
218 |     n = 1
219 |   )
220 | d
221 | ```
222 | 
223 | Plotting it with `alluvial()` with default settings will give:
224 | 
225 | ```{r catorder_alluvial_defaults}
226 | alluvial(
227 |   select(d, Temperature, Luminance, Color),
228 |   freq=d$n,
229 |   col = d$col,
230 |   alpha=0.9
231 | )
232 | ```
233 | 
234 | Let's change the order of categories of:
235 | 
236 | - the `Color` axis such that it is (from the bottom): green, blue, orange, red.
237 | - the `Luminance` axis such that it is reversed.
238 | 
239 | For each variable we create a factor with a vector of unique values of that variable sorted the way we want passed to `levels` argument of `factor()`:
240 | 
241 | ```{r catorder_data_factor}
242 | d <- d %>%
243 |   mutate(
244 |     Color_f = factor(Color, levels=c("green", "blue", "red", "orange")),
245 |     Luminance_f = factor(Luminance, levels=c("dark", "bright"))
246 |   )
247 | ```
248 | 
249 | ... and plot
250 | 
251 | ```{r catorder_alluvial_factor}
252 | alluvial(
253 |   select(d, Temperature, Luminance_f, Color_f),
254 |   freq=d$n,
255 |   col = d$col,
256 |   alpha=0.9
257 | )
258 | ```
259 | 
260 | Another version recognizing that in data `Color` is nested in `Temperature`. Different axis order gives clearer picture of the data structure.
261 | 
262 | ```{r catorder_alluvial_factor2}
263 | alluvial(
264 |   select(d, Temperature, Color_f, Luminance_f),
265 |   freq=d$n,
266 |   col = d$col,
267 |   alpha=0.9
268 | )
269 | ```
270 | 
271 | 
272 | 
273 | ### Adjusting vertical order of alluvia
274 | 
275 | **This feature is experimental!**
276 | 
277 | Usually the order of the variables (axes) is rather unimportant. However, having particular two variables next to each other facilitates analyzing dependency between those two variables. In alluvial diagrams the ordering of the variables determines the vertical plotting order of the alluvia. This vertical order, together with setting `blocks` to `FALSE`, can be used to turn category blocks into stacked barcharts.
278 | 
279 | Consider two versions of subsets of the Titanic data that differ only in the order of variables.
280 | 
281 | ```{r ordering_data}
282 | tit %>% group_by(Sex, Age, Survived) %>%
283 |   summarise( n= sum(Freq)) -> x
284 | 
285 | tit %>% group_by(Survived, Age, Sex) %>%
286 |   summarise( n= sum(Freq)) -> y
287 | ```
288 | 
289 | In `x` we have `r paste(names(x), collapse="-")` while in `y` we have
290 | `r paste(names(y), collapse="-")`.
291 | 
292 | If we color the alluvia according to the first axis, the category blocks of Age and Survived become barcharts showing relative frequencies of Men and Women within categories of Age and Survived. 
293 | 
294 | ```{r ordering_x}
295 | alluvial(x[,1:3], freq=x$n, 
296 |          col = ifelse(x$Sex == "Male", "orange", "grey"),
297 |          alpha = 0.8,
298 |          blocks=FALSE
299 | )
300 | ```
301 | 
302 | Now we can see for example that
303 | 
304 | - There were a little bit of more girls than boys (category `Age == "Child"`)
305 | - Among surviors there were roughly the same number of Men and Women.
306 | 
307 | Argument `ordering` can be used to fully customize the ordering of each alluvium on each axis without the need to reorder the axes themselves. This feature is experimental as you can easily break things. It expects a list of numeric vectors or `NULL`s one for each variable in the data:
308 | 
309 | - Value `NULL` does not change the default order on the corresponding axis. 
310 | - A numeric vector should have length equal to the number of rows in the data and is determines the vertical order of the alluvia on the corresponding axis.
311 | 
312 | For example:
313 | 
314 | ```{r ordering_y}
315 | alluvial(y[,1:3], freq=y$n, 
316 |          # col = RColorBrewer::brewer.pal(8, "Set1"),
317 |          col = ifelse(y$Sex == "Male", "orange", "grey"),
318 |          alpha = 0.8,
319 |          blocks = FALSE,
320 |          ordering = list(
321 |            order(y$Survived, y$Sex == "Male"),
322 |            order(y$Age, y$Sex == "Male"),
323 |            NULL
324 |          )
325 | )
326 | ```
327 | 
328 | The list passed to `ordering` has has three elements corresponding to `Survived`, `Age`, and `Sex` respectively (that's the order of the variables in `y`). The elements of this list are
329 | 
330 | 1. Call to `order` sorting the alluvia on the `Survived` axis. The alluvia need to be sorted according to `Survived` first (otherwise the categories "Yes" and "No" will be destroyed)
331 | and according to the `Sex` second.
332 | 2. Call to `order` sorting the alluvia on the `Age` axis. The alluvia need to be sorted according to `Age` first `Sex` second.
333 | 3. `NULL` leaves the default ordering on `Sex` axis.
334 | 
335 | In the example below alluvia are colored by sex (red=Female, blue=Male) and survival status (bright=survived, dark=did not survive). Each category block is a stacked barchart showing relative freuquencies of man/women who did/did not survive. The alluvia are reordered on the last axis (Age) so that Sex categories are next each other (red together and blue together):
336 | 
337 | 
338 | ```{r}
339 | pal <- c("red4", "lightskyblue4", "red", "lightskyblue")
340 | 
341 | tit %>%
342 |   mutate(
343 |     ss = paste(Survived, Sex),
344 |     k = pal[ match(ss, sort(unique(ss))) ]
345 |   ) -> tit
346 | 
347 | 
348 | alluvial(tit[,c(4,2,3)], freq=tit$Freq,
349 |          hide = tit$Freq < 10,
350 |          col = tit$k,
351 |          border = tit$k,
352 |          blocks=FALSE,
353 |          ordering = list(
354 |            NULL,
355 |            NULL,
356 |            order(tit$Age, tit$Sex )
357 | 
358 |          )
359 | )
360 | ```
361 | 
362 | 
363 | 
364 | 
365 | 
366 | 
367 | 
368 | 
369 | 
370 | 
371 | # Appendix
372 | 
373 | ```{r session_info}
374 | sessionInfo()
375 | ```
376 | 
377 | 


--------------------------------------------------------------------------------