├── .github
├── .gitignore
└── workflows
│ └── R-CMD-check.yaml
├── .gitignore
├── LICENSE
├── tests
├── testall.R
└── testthat
│ ├── _snaps
│ ├── alluvial
│ │ └── alluvial.png
│ └── alluvial.md
│ └── test-alluvial.R
├── tools
├── alluvial-1.png
└── alluvial_ts-1.png
├── .Rbuildignore
├── Makefile
├── alluvial.Rproj
├── inst
└── CITATION
├── R
├── Refugees.R
├── alluvial.R
└── alluvial_ts.R
├── DESCRIPTION
├── vignettes
├── alluvial.bib
└── alluvial.Rmd
├── man
├── Refugees.Rd
├── alluvial_ts.Rd
└── alluvial.Rd
├── NAMESPACE
├── NEWS.md
├── man-roxygen
├── alluvial_ts.R
└── alluvial.R
├── data
└── Refugees.csv
├── README.Rmd
└── README.md
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2016
2 | COPYRIGHT HOLDER: Michal Bojanowski
3 |
--------------------------------------------------------------------------------
/tests/testall.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(alluvial)
3 |
4 | test_check("alluvial")
5 |
--------------------------------------------------------------------------------
/tools/alluvial-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mbojan/alluvial/HEAD/tools/alluvial-1.png
--------------------------------------------------------------------------------
/tools/alluvial_ts-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mbojan/alluvial/HEAD/tools/alluvial_ts-1.png
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | man-roxygen
2 | ^.*\.Rproj$
3 | ^\.Rproj\.user$
4 | README.Rmd
5 | Makefile
6 | ^\.github$
7 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/alluvial/alluvial.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mbojan/alluvial/HEAD/tests/testthat/_snaps/alluvial/alluvial.png
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | default:
2 |
3 | %.md: %.Rmd
4 | Rscript -e 'knitr::knit("$<", output="$@")'
5 |
6 | %.html: %.md
7 | pandoc -f markdown_github -o $@ $<
8 |
9 |
10 | default: README.md
11 |
12 | preview: README.html
13 |
14 | .PHONY: default preview
15 |
--------------------------------------------------------------------------------
/alluvial.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 |
--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
1 | year <- sub("-.*", "", meta$Date)
2 | note <- sprintf("R package version: %s", meta$Version)
3 |
4 | bibentry( bibtype="Manual",
5 | title="{alluvial}: R Package for Creating Alluvial Diagrams",
6 | author= c(person("Michal", "Bojanowski"),
7 | person("Robin", "Edwards") ),
8 | year = year,
9 | note = note,
10 | url = "https://github.com/mbojan/alluvial",
11 | mheader="To cite package 'allluvial' in publications use:",
12 | mfooter= paste("We have invested some time and effort in creating this package,",
13 | "please cite it when using it for data analysis or other tasks.", sep = " ")
14 | )
15 |
--------------------------------------------------------------------------------
/R/Refugees.R:
--------------------------------------------------------------------------------
1 | #' Refugees data
2 | #'
3 | #' Top 10 countries/territories of origin (excluding "Various") for period
4 | #' 2003-13 of UNHCR statistics on "Persons recognized as refugees under the
5 | #' 1951 UN Convention/1967 Protocol, the 1969 OAU Convention, in accordance
6 | #' with the UNHCR Statute, persons granted a complementary form of protection
7 | #' and those granted temporary protection."
8 | #'
9 | #' @format
10 | #' Data frame with the following columns:
11 | #' \describe{
12 | #' \item{country}{Country or territory of origin}
13 | #' \item{year}{Year (2003-13)}
14 | #' \item{refugees}{Persons recognized as refugees under the 1951 UN Convention, etc..}
15 | #' }
16 | #'
17 | #' @source
18 | #' \url{http://data.un.org/Data.aspx?d=UNHCR&f=indID\%3aType-Ref}
19 | #'
20 | #' @docType data
21 | #' @name Refugees
22 | NULL
23 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: alluvial
2 | Type: Package
3 | Title: Alluvial Diagrams
4 | Version: 0.2-0
5 | Date: 2017-04-07
6 | Authors@R: c(
7 | person("Michał", "Bojanowski", role=c("aut", "cre"), email="michal2992@gmail.com", comment=c(ORCID="0000-0001-7503-852X")),
8 | person("Robin", "Edwards", role="aut", email="robin.edwards@ucl.ac.uk")
9 | )
10 | Description: Creating alluvial diagrams (also known as parallel sets plots) for multivariate
11 | and time series-like data.
12 | URL: https://github.com/mbojan/alluvial
13 | BugReports: https://github.com/mbojan/alluvial/issues
14 | Imports: dplyr, tidyr
15 | Suggests:
16 | devtools,
17 | testthat,
18 | reshape2,
19 | knitr,
20 | rmarkdown,
21 | License: MIT + file LICENSE
22 | LazyLoad: yes
23 | LazyData: yes
24 | Config/testthat/edition: 3
25 | VignetteBuilder: knitr
26 | RoxygenNote: 7.1.2
27 | Encoding: UTF-8
28 |
--------------------------------------------------------------------------------
/vignettes/alluvial.bib:
--------------------------------------------------------------------------------
1 | @Manual{r-graphics,
2 | title = {R: A Language and Environment for Statistical Computing},
3 | author = {{R Core Team}},
4 | organization = {R Foundation for Statistical Computing},
5 | address = {Vienna, Austria},
6 | year = {2021},
7 | url = {https://www.R-project.org/},
8 | }
9 |
10 | @Misc{r-ggalluvial,
11 | title = {ggalluvial: Alluvial Plots in 'ggplot2'},
12 | author = {Jason Cory Brunson and Quentin D. Read},
13 | year = {2020},
14 | note = {R package version 0.12.3},
15 | url = {http://corybrunson.github.io/ggalluvial/},
16 | }
17 |
18 | @Article{ggalluvial-article,
19 | title = {{ggalluvial}: Layered Grammar for Alluvial Plots},
20 | author = {Jason Cory Brunson},
21 | year = {2020},
22 | journal = {Journal of Open Source Software},
23 | volume = {5},
24 | number = {49},
25 | pages = {2017},
26 | doi = {10.21105/joss.02017},
27 | }
--------------------------------------------------------------------------------
/man/Refugees.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/Refugees.R
3 | \docType{data}
4 | \name{Refugees}
5 | \alias{Refugees}
6 | \title{Refugees data}
7 | \format{
8 | Data frame with the following columns:
9 | \describe{
10 | \item{country}{Country or territory of origin}
11 | \item{year}{Year (2003-13)}
12 | \item{refugees}{Persons recognized as refugees under the 1951 UN Convention, etc..}
13 | }
14 | }
15 | \source{
16 | \url{http://data.un.org/Data.aspx?d=UNHCR&f=indID\%3aType-Ref}
17 | }
18 | \description{
19 | Top 10 countries/territories of origin (excluding "Various") for period
20 | 2003-13 of UNHCR statistics on "Persons recognized as refugees under the
21 | 1951 UN Convention/1967 Protocol, the 1969 OAU Convention, in accordance
22 | with the UNHCR Statute, persons granted a complementary form of protection
23 | and those granted temporary protection."
24 | }
25 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(alluvial)
4 | export(alluvial_ts)
5 | importFrom(dplyr,"%>%")
6 | importFrom(dplyr,.data)
7 | importFrom(dplyr,arrange_)
8 | importFrom(dplyr,filter_)
9 | importFrom(dplyr,group_by_)
10 | importFrom(dplyr,select_)
11 | importFrom(dplyr,summarise_)
12 | importFrom(dplyr,ungroup)
13 | importFrom(grDevices,col2rgb)
14 | importFrom(grDevices,extendrange)
15 | importFrom(grDevices,rainbow)
16 | importFrom(grDevices,rgb)
17 | importFrom(graphics,abline)
18 | importFrom(graphics,axis)
19 | importFrom(graphics,lines)
20 | importFrom(graphics,mtext)
21 | importFrom(graphics,par)
22 | importFrom(graphics,plot)
23 | importFrom(graphics,plot.new)
24 | importFrom(graphics,plot.window)
25 | importFrom(graphics,polygon)
26 | importFrom(graphics,rect)
27 | importFrom(graphics,text)
28 | importFrom(graphics,xspline)
29 | importFrom(stats,aggregate)
30 | importFrom(stats,approx)
31 | importFrom(tidyr,gather_)
32 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # 0.2-0
2 |
3 | ## New features
4 |
5 | - `alluvial()` exposes `mar` argument that is passed to `par()` so the user can customize the plot margins (which are rather narrow by default) (#31).
6 | - `alluvial()` has two new arguments `xlim_offset` and `ylim_offset` which are passed to `xlim` and `ylim` of `plot()`. This enables adjusting the size of the plotting region and avoid label clipping in some cirumstances (#32)
7 | - The vignette has a new section on reordering the alluvia, and some other improvements.
8 |
9 | ## Minor updates
10 |
11 | - The vignette dependency on RColorBrewer has been removed.
12 |
13 |
14 |
15 |
16 | # alluvial 0.1-2
17 |
18 | ## New features
19 |
20 | - There is a vignette `vignette("alluvial", package="alluvial")` illustrating basic usage of `alluvial()`. The vignette needs `dplyr` package so it is now `Suggested`.
21 |
22 | ## Minor updates
23 |
24 | - README has been updated. It is dynamically generated from an associated `.Rmd` file. Some typos fixed.
25 |
26 |
27 | # alluvial 0.1-1
28 |
29 | First release.
--------------------------------------------------------------------------------
/tests/testthat/test-alluvial.R:
--------------------------------------------------------------------------------
1 | # Oddities in input data --------------------------------------------------
2 |
3 | test_that("Character vectors in data do not trigger warnings", {
4 | d <- data.frame(
5 | x1=rep(letters[1:2], 2),
6 | x2=rep(letters[1:2], each=2),
7 | freq=1:4
8 | )
9 | expect_silent({
10 | pdf(NULL)
11 | alluvial(d[,1:2], freq=d$freq)
12 | dev.off()
13 | } )
14 | })
15 |
16 |
17 |
18 | # Output ------------------------------------------------------------------
19 |
20 | test_that("alluvial() returns proper value", {
21 | d <- data.frame(
22 | x1=rep(letters[1:2], 2),
23 | x2=rep(letters[1:2], each=2),
24 | freq=1:4
25 | )
26 | pdf(NULL)
27 | expect_snapshot_value(alluvial(d[,1:2], freq=d$freq), style = "deparse")
28 | dev.off()
29 | })
30 |
31 | test_that("alluvial() produces a proper plot", {
32 | skip_on_cran()
33 | skip_on_ci()
34 | d <- data.frame(
35 | x1=rep(letters[1:2], 2),
36 | x2=rep(letters[1:2], each=2),
37 | freq=1:4
38 | )
39 | expect_snapshot_file({
40 | tfile <- tempfile()
41 | png(tfile)
42 | alluvial(d[,1:2], freq=d$freq)
43 | dev.off()
44 | tfile
45 | },
46 | "alluvial.png"
47 | )
48 | })
49 |
--------------------------------------------------------------------------------
/tests/testthat/_snaps/alluvial.md:
--------------------------------------------------------------------------------
1 | # alluvial() returns proper value
2 |
3 | list(endpoints = structure(list(x1 = structure(c(1L, 2L, 1L,
4 | 2L, 1L, 2L, 1L, 2L), .Label = c("a", "b"), class = "factor"),
5 | x2 = structure(c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L), .Label = c("a",
6 | "b"), class = "factor"), .bottom = c(0, 0.43, 0.095, 0.62,
7 | 0, 0.095, 0.335, 0.62), .top = c(0.095, 0.62, 0.38, 1, 0.095,
8 | 0.285, 0.62, 1), .axis = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L
9 | )), row.names = c(NA, -8L), class = "data.frame"), category_midpoints = list(
10 | x1 = structure(c(a = 0.19, b = 0.715), .Dim = 2L, .Dimnames = list(
11 | c("a", "b"))), x2 = structure(c(a = 0.1425, b = 0.6675
12 | ), .Dim = 2L, .Dimnames = list(c("a", "b")))), alluvium_midpoints = structure(list(
13 | x1 = structure(c(1L, 1L, 2L, 2L), .Label = c("a", "b"), class = "factor"),
14 | x2 = structure(c(1L, 2L, 1L, 2L), .Label = c("a", "b"), class = "factor"),
15 | .axis_from = c(1L, 1L, 1L, 1L), .axis_to = c(2L, 2L, 2L,
16 | 2L), .x = c(1.5, 1.5, 1.5, 1.5), .y = c(0.0475, 0.3575, 0.3575,
17 | 0.81), .slope = c(0, 0.266666666666667, -0.372222222222222,
18 | 0)), row.names = c(NA, -4L), class = "data.frame"))
19 |
20 |
--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples
2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
3 | on:
4 | push:
5 | branches: [main, master]
6 | pull_request:
7 | branches: [main, master]
8 |
9 | name: R-CMD-check
10 |
11 | jobs:
12 | R-CMD-check:
13 | runs-on: ${{ matrix.config.os }}
14 |
15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }})
16 |
17 | strategy:
18 | fail-fast: false
19 | matrix:
20 | config:
21 | - {os: macOS-latest, r: 'release'}
22 | - {os: windows-latest, r: 'release'}
23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'}
24 | - {os: ubuntu-latest, r: 'release'}
25 | - {os: ubuntu-latest, r: 'oldrel-1'}
26 |
27 | env:
28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
29 | R_KEEP_PKG_SOURCE: yes
30 |
31 | steps:
32 | - uses: actions/checkout@v2
33 |
34 | - uses: r-lib/actions/setup-pandoc@v1
35 |
36 | - uses: r-lib/actions/setup-r@v1
37 | with:
38 | r-version: ${{ matrix.config.r }}
39 | http-user-agent: ${{ matrix.config.http-user-agent }}
40 | use-public-rspm: true
41 |
42 | - uses: r-lib/actions/setup-r-dependencies@v1
43 | with:
44 | extra-packages: rcmdcheck
45 |
46 | - uses: r-lib/actions/check-r-package@v1
47 |
--------------------------------------------------------------------------------
/man-roxygen/alluvial_ts.R:
--------------------------------------------------------------------------------
1 | if( require(reshape2) )
2 | {
3 | data(Refugees)
4 | reshape2::dcast(Refugees, country ~ year, value.var = 'refugees')
5 | d <- Refugees
6 |
7 | set.seed(39) # for nice colours
8 | cols <- hsv(h = sample(1:10/10), s = sample(3:12)/15, v = sample(3:12)/15)
9 |
10 | alluvial_ts(d)
11 | alluvial_ts(d, wave = .2, ygap = 5, lwd = 3)
12 | alluvial_ts(d, wave = .3, ygap = 5, col = cols)
13 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, rankup = TRUE)
14 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'down')
15 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'centred', grid=TRUE,
16 | grid.lwd = 5)
17 | alluvial_ts(d, wave = 0, ygap = 0, col = cols, alpha = .9, border = 'white',
18 | grid = TRUE, grid.lwd = 5)
19 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.4)
20 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.3, lab.cex = .7)
21 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.3, lab.cex=.7,
22 | leg.cex=.7, leg.col = 'white')
23 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, leg.mode = FALSE, leg.x = .1,
24 | leg.y = .7, leg.max = 3e6)
25 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'centred', alpha=.9,
26 | grid = TRUE, grid.lwd = 5, xmargin = 0.2, lab.cex = .7, xlab = '',
27 | ylab = '', border = NA, axis.cex = .8, leg.cex = .7,
28 | leg.col='white',
29 | title = "UNHCR-recognised refugees\nTop 10 countries (2003-13)\n")
30 |
31 | # non time-series example - Virginia deaths dataset
32 | d <- reshape2::melt(data.frame(age=row.names(VADeaths), VADeaths), id.vars='age')[,c(2,1,3)]
33 | names(d) = c('pop_group','age_group','deaths')
34 | alluvial_ts(d)
35 | }
36 |
--------------------------------------------------------------------------------
/man-roxygen/alluvial.R:
--------------------------------------------------------------------------------
1 | # Titanic data
2 | tit <- as.data.frame(Titanic)
3 |
4 | # 2d
5 | tit2d <- aggregate( Freq ~ Class + Survived, data=tit, sum)
6 | alluvial( tit2d[,1:2], freq=tit2d$Freq, xw=0.0, alpha=0.8,
7 | gap.width=0.1, col= "steelblue", border="white",
8 | layer = tit2d$Survived != "Yes" )
9 |
10 | alluvial( tit2d[,1:2], freq=tit2d$Freq,
11 | hide=tit2d$Freq < 150,
12 | xw=0.0, alpha=0.8,
13 | gap.width=0.1, col= "steelblue", border="white",
14 | layer = tit2d$Survived != "Yes" )
15 |
16 | # 3d
17 | tit3d <- aggregate( Freq ~ Class + Sex + Survived, data=tit, sum)
18 |
19 | alluvial(tit3d[,1:3], freq=tit3d$Freq, alpha=1, xw=0.2,
20 | col=ifelse( tit3d$Survived == "No", "red", "gray"),
21 | layer = tit3d$Sex != "Female",
22 | border="white")
23 |
24 |
25 | # 4d
26 | alluvial( tit[,1:4], freq=tit$Freq, border=NA,
27 | hide = tit$Freq < quantile(tit$Freq, .50),
28 | col=ifelse( tit$Class == "3rd" & tit$Sex == "Male", "red", "gray") )
29 |
30 | # 3d example with custom ordering
31 | # Reorder "Sex" axis according to survival status
32 | ord <- list(NULL, with(tit3d, order(Sex, Survived)), NULL)
33 | alluvial(tit3d[,1:3], freq=tit3d$Freq, alpha=1, xw=0.2,
34 | col=ifelse( tit3d$Survived == "No", "red", "gray"),
35 | layer = tit3d$Sex != "Female",
36 | border="white", ordering=ord)
37 |
38 | # Possible blocks options
39 | for (blocks in c(TRUE, FALSE, "bookends")) {
40 |
41 | # Elaborate alluvial diagram from main examples file
42 | alluvial( tit[, 1:4], freq = tit$Freq, border = NA,
43 | hide = tit$Freq < quantile(tit$Freq, .50),
44 | col = ifelse( tit$Class == "3rd" & tit$Sex == "Male",
45 | "red", "gray" ),
46 | blocks = blocks )
47 | }
48 |
49 |
50 | # Data returned
51 | x <- alluvial( tit2d[,1:2], freq=tit2d$Freq, xw=0.0, alpha=0.8,
52 | gap.width=0.1, col= "steelblue", border="white",
53 | layer = tit2d$Survived != "Yes" )
54 | points( rep(1, 16), x$endpoints[[1]], col="green")
55 | points( rep(2, 16), x$endpoints[[2]], col="blue")
56 |
--------------------------------------------------------------------------------
/data/Refugees.csv:
--------------------------------------------------------------------------------
1 | "country";"year";"refugees"
2 | "Afghanistan";2003;2136043
3 | "Burundi";2003;531637
4 | "Congo DRC";2003;453465
5 | "Iraq";2003;368580
6 | "Myanmar";2003;151384
7 | "Palestine";2003;350568
8 | "Somalia";2003;402336
9 | "Sudan";2003;606242
10 | "Syria";2003;20819
11 | "Vietnam";2003;363179
12 | "Afghanistan";2004;2084109
13 | "Burundi";2004;485454
14 | "Congo DRC";2004;461042
15 | "Iraq";2004;311905
16 | "Myanmar";2004;161013
17 | "Palestine";2004;350617
18 | "Somalia";2004;389304
19 | "Sudan";2004;730647
20 | "Syria";2004;21440
21 | "Vietnam";2004;349809
22 | "Afghanistan";2005;2166149
23 | "Burundi";2005;438706
24 | "Congo DRC";2005;430929
25 | "Iraq";2005;262299
26 | "Myanmar";2005;164864
27 | "Palestine";2005;349673
28 | "Somalia";2005;395553
29 | "Sudan";2005;693632
30 | "Syria";2005;16401
31 | "Vietnam";2005;358268
32 | "Afghanistan";2006;2107519
33 | "Burundi";2006;396541
34 | "Congo DRC";2006;401914
35 | "Iraq";2006;1450905
36 | "Myanmar";2006;202826
37 | "Palestine";2006;334142
38 | "Somalia";2006;464252
39 | "Sudan";2006;686311
40 | "Syria";2006;12338
41 | "Vietnam";2006;374279
42 | "Afghanistan";2007;1909911
43 | "Burundi";2007;375715
44 | "Congo DRC";2007;370386
45 | "Iraq";2007;2279245
46 | "Myanmar";2007;191256
47 | "Palestine";2007;335219
48 | "Somalia";2007;455356
49 | "Sudan";2007;523032
50 | "Syria";2007;13671
51 | "Vietnam";2007;327776
52 | "Afghanistan";2008;1817913
53 | "Burundi";2008;281592
54 | "Congo DRC";2008;367995
55 | "Iraq";2008;1873519
56 | "Myanmar";2008;184347
57 | "Palestine";2008;333990
58 | "Somalia";2008;559153
59 | "Sudan";2008;397013
60 | "Syria";2008;15186
61 | "Vietnam";2008;328183
62 | "Afghanistan";2009;1905804
63 | "Burundi";2009;94239
64 | "Congo DRC";2009;455852
65 | "Iraq";2009;1785212
66 | "Myanmar";2009;206650
67 | "Palestine";2009;95177
68 | "Somalia";2009;678308
69 | "Sudan";2009;348500
70 | "Syria";2009;17884
71 | "Vietnam";2009;339289
72 | "Afghanistan";2010;3054709
73 | "Burundi";2010;84064
74 | "Congo DRC";2010;476693
75 | "Iraq";2010;1683575
76 | "Myanmar";2010;215644
77 | "Palestine";2010;93299
78 | "Somalia";2010;770148
79 | "Sudan";2010;379067
80 | "Syria";2010;18428
81 | "Vietnam";2010;338698
82 | "Afghanistan";2011;2664436
83 | "Burundi";2011;101288
84 | "Congo DRC";2011;491481
85 | "Iraq";2011;1428308
86 | "Myanmar";2011;214594
87 | "Palestine";2011;94121
88 | "Somalia";2011;1075148
89 | "Sudan";2011;491013
90 | "Syria";2011;19900
91 | "Vietnam";2011;337829
92 | "Afghanistan";2012;2586034
93 | "Burundi";2012;73362
94 | "Congo DRC";2012;509082
95 | "Iraq";2012;746181
96 | "Myanmar";2012;215338
97 | "Palestine";2012;94820
98 | "Somalia";2012;1136713
99 | "Sudan";2012;558195
100 | "Syria";2012;728603
101 | "Vietnam";2012;336939
102 | "Afghanistan";2013;2556507
103 | "Burundi";2013;72652
104 | "Congo DRC";2013;499320
105 | "Iraq";2013;401384
106 | "Myanmar";2013;222053
107 | "Palestine";2013;96044
108 | "Somalia";2013;1121772
109 | "Sudan";2013;636400
110 | "Syria";2013;2457255
111 | "Vietnam";2013;314105
112 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output:
3 | github_document:
4 | html_preview: false
5 | bibliography: vignettes/alluvial.bib
6 | editor_options:
7 | chunk_output_type: console
8 | ---
9 |
10 | ```{r setup, include=FALSE, cache=FALSE}
11 | library(alluvial)
12 | library(dplyr)
13 |
14 | knitr::opts_chunk$set(
15 | fig.width=8,
16 | fig.height=6,
17 | fig.path = "tools/"
18 | )
19 |
20 | knitr::render_markdown(strict=FALSE)
21 | ```
22 |
23 |
24 |
25 | # R package for drawing alluvial diagrams
26 |
27 |
28 | [](https://github.com/mbojan/alluvial/actions)
29 | [](http://cranlogs.r-pkg.org/)
30 | [](https://cran.r-project.org/package=alluvial)
31 |
32 |
33 | What are alluvial diagrams? See for example:
34 |
35 | * [Wikipedia](http://en.wikipedia.org/wiki/Alluvial_diagram)
36 | * My [blog post](http://bc.bojanorama.pl/2014/03/alluvial-diagrams) showing-off this package
37 | * Some discussion on [CrossValidated](http://stats.stackexchange.com/questions/12029/is-it-possible-to-create-parallel-sets-plot-using-r)
38 |
39 | This package use base R **graphics** [@r-graphics] package. For Grammar of Graphics implementation see [**ggalluvial**](https://github.com/corybrunson/ggalluvial) [@r-ggalluvial;@ggalluvial-article].
40 |
41 |
42 | ## Examples
43 |
44 | Alluvial diagram of `datasets::Titanic` data made with `alluvial()`. Notice how each category block becomes a stacked barchart showing relative frequency of survivors.
45 |
46 | ```{r alluvial}
47 | tit <- tibble::as_tibble(Titanic)
48 |
49 | tit %>% head() %>% knitr::kable()
50 |
51 | alluvial(
52 | select(tit, Survived, Sex, Age, Class),
53 | freq=tit$n,
54 | col = ifelse(tit$Survived == "Yes", "orange", "grey"),
55 | border = ifelse(tit$Survived == "Yes", "orange", "grey"),
56 | layer = tit$Survived != "Yes",
57 | alpha = 0.8,
58 | blocks=FALSE
59 | )
60 | ```
61 |
62 |
63 |
64 |
65 |
66 | Alluvial diagram for multiple time series / cross-sectional data based on `alluvial::Refugees` data made with `alluvial_ts()`.
67 |
68 | ```{r alluvial_ts}
69 | Refugees %>% head() %>% knitr::kable()
70 |
71 |
72 | set.seed(39) # for nice colours
73 | cols <- hsv(h = sample(1:10/10), s = sample(3:12)/15, v = sample(3:12)/15)
74 |
75 | alluvial_ts(Refugees, wave = .3, ygap = 5, col = cols, plotdir = 'centred', alpha=.9,
76 | grid = TRUE, grid.lwd = 5, xmargin = 0.2, lab.cex = .7, xlab = '',
77 | ylab = '', border = NA, axis.cex = .8, leg.cex = .7,
78 | leg.col='white',
79 | title = "UNHCR-recognised refugees\nTop 10 countries (2003-13)\n")
80 | ```
81 |
82 |
83 |
84 | ## Installation
85 |
86 | Install stable version from CRAN using
87 |
88 | ```{r, eval=FALSE}
89 | install.packages("alluvial")
90 | ```
91 |
92 | or development version from GitHub using `remotes::install_github()`:
93 |
94 | ```{r, eval=FALSE}
95 | remotes::install_github("mbojan/alluvial", build_vignettes=TRUE)
96 | ```
97 |
98 |
99 |
100 |
101 | ## References
102 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 | # R package for drawing alluvial diagrams
3 |
4 |
5 |
6 | [](https://github.com/mbojan/alluvial/actions)
7 | [](http://cranlogs.r-pkg.org/)
9 | [](https://cran.r-project.org/package=alluvial)
11 |
12 |
13 | What are alluvial diagrams? See for example:
14 |
15 | - [Wikipedia](http://en.wikipedia.org/wiki/Alluvial_diagram)
16 | - My [blog post](http://bc.bojanorama.pl/2014/03/alluvial-diagrams)
17 | showing-off this package
18 | - Some discussion on
19 | [CrossValidated](http://stats.stackexchange.com/questions/12029/is-it-possible-to-create-parallel-sets-plot-using-r)
20 |
21 | This package use base R **graphics** (R Core Team 2021) package. For
22 | Grammar of Graphics implementation see
23 | [**ggalluvial**](https://github.com/corybrunson/ggalluvial) (Brunson and
24 | Read 2020; Brunson 2020).
25 |
26 | ## Examples
27 |
28 | Alluvial diagram of `datasets::Titanic` data made with `alluvial()`.
29 | Notice how each category block becomes a stacked barchart showing
30 | relative frequency of survivors.
31 |
32 | ``` r
33 | tit <- tibble::as_tibble(Titanic)
34 |
35 | tit %>% head() %>% knitr::kable()
36 | ```
37 |
38 | | Class | Sex | Age | Survived | n |
39 | |:------|:-------|:------|:---------|----:|
40 | | 1st | Male | Child | No | 0 |
41 | | 2nd | Male | Child | No | 0 |
42 | | 3rd | Male | Child | No | 35 |
43 | | Crew | Male | Child | No | 0 |
44 | | 1st | Female | Child | No | 0 |
45 | | 2nd | Female | Child | No | 0 |
46 |
47 | ``` r
48 | alluvial(
49 | select(tit, Survived, Sex, Age, Class),
50 | freq=tit$n,
51 | col = ifelse(tit$Survived == "Yes", "orange", "grey"),
52 | border = ifelse(tit$Survived == "Yes", "orange", "grey"),
53 | layer = tit$Survived != "Yes",
54 | alpha = 0.8,
55 | blocks=FALSE
56 | )
57 | ```
58 |
59 | 
60 |
61 | Alluvial diagram for multiple time series / cross-sectional data based
62 | on `alluvial::Refugees` data made with `alluvial_ts()`.
63 |
64 | ``` r
65 | Refugees %>% head() %>% knitr::kable()
66 | ```
67 |
68 | | country | year | refugees |
69 | |:------------|-----:|---------:|
70 | | Afghanistan | 2003 | 2136043 |
71 | | Burundi | 2003 | 531637 |
72 | | Congo DRC | 2003 | 453465 |
73 | | Iraq | 2003 | 368580 |
74 | | Myanmar | 2003 | 151384 |
75 | | Palestine | 2003 | 350568 |
76 |
77 | ``` r
78 | set.seed(39) # for nice colours
79 | cols <- hsv(h = sample(1:10/10), s = sample(3:12)/15, v = sample(3:12)/15)
80 |
81 | alluvial_ts(Refugees, wave = .3, ygap = 5, col = cols, plotdir = 'centred', alpha=.9,
82 | grid = TRUE, grid.lwd = 5, xmargin = 0.2, lab.cex = .7, xlab = '',
83 | ylab = '', border = NA, axis.cex = .8, leg.cex = .7,
84 | leg.col='white',
85 | title = "UNHCR-recognised refugees\nTop 10 countries (2003-13)\n")
86 | ```
87 |
88 | 
89 |
90 | ## Installation
91 |
92 | Install stable version from CRAN using
93 |
94 | ``` r
95 | install.packages("alluvial")
96 | ```
97 |
98 | or development version from GitHub using `remotes::install_github()`:
99 |
100 | ``` r
101 | remotes::install_github("mbojan/alluvial", build_vignettes=TRUE)
102 | ```
103 |
104 | ## References
105 |
106 |
107 |
108 |
109 |
110 | Brunson, Jason Cory. 2020. “ggalluvial:
111 | Layered Grammar for Alluvial Plots.” *Journal of Open Source Software* 5
112 | (49): 2017. .
113 |
114 |
115 |
116 |
117 |
118 | Brunson, Jason Cory, and Quentin D. Read. 2020. “Ggalluvial: Alluvial
119 | Plots in ’Ggplot2’.” .
120 |
121 |
122 |
123 |
124 |
125 | R Core Team. 2021. *R: A Language and Environment for Statistical
126 | Computing*. Vienna, Austria: R Foundation for Statistical Computing.
127 | .
128 |
129 |
130 |
131 |
132 |
--------------------------------------------------------------------------------
/man/alluvial_ts.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/alluvial_ts.R
3 | \name{alluvial_ts}
4 | \alias{alluvial_ts}
5 | \title{Alluvial diagram for multiple time series data}
6 | \usage{
7 | alluvial_ts(
8 | dat,
9 | wave = NA,
10 | ygap = 1,
11 | col = NA,
12 | alpha = NA,
13 | plotdir = "up",
14 | rankup = FALSE,
15 | lab.cex = 1,
16 | lab.col = "black",
17 | xmargin = 0.1,
18 | axis.col = "black",
19 | title = NA,
20 | title.cex = 1,
21 | axis.cex = 1,
22 | grid = FALSE,
23 | grid.col = "grey80",
24 | grid.lwd = 1,
25 | leg.mode = TRUE,
26 | leg.x = 0.1,
27 | leg.y = 0.9,
28 | leg.cex = 1,
29 | leg.col = "black",
30 | leg.lty = NA,
31 | leg.lwd = NA,
32 | leg.max = NA,
33 | xlab = NA,
34 | ylab = NA,
35 | xlab.pos = 2,
36 | ylab.pos = 1,
37 | lwd = 1,
38 | ...
39 | )
40 | }
41 | \arguments{
42 | \item{dat}{data.frame of time-series (or suitable equivalent continuously disaggregated data), with 3 columns (in order: category, time-variable, value) with <= 1 row for each category-time combination}
43 |
44 | \item{wave}{numeric, curve wavyness defined in terms of x axis data range - i.e. bezier point offset. Experiment to get this right}
45 |
46 | \item{ygap}{numeric, vertical distance between polygons - a multiple of 10\% of the mean data value}
47 |
48 | \item{col}{colour, value or vector of length matching the number of unique categories. Individual colours of vector are mapped to categories in alpha-numeric order}
49 |
50 | \item{alpha}{numeric, [0,1] polygon fill transparency}
51 |
52 | \item{plotdir}{character, string ('up', 'down' or 'centred') giving the vertical alignment of polygon stacks}
53 |
54 | \item{rankup}{logical, rank polygons on time axes upward by magnitude (largest to smallest) or not}
55 |
56 | \item{lab.cex}{numeric, category label font size}
57 |
58 | \item{lab.col}{colour, of category label}
59 |
60 | \item{xmargin}{numeric [0,1], proportional space for category labels}
61 |
62 | \item{axis.col}{colour, of axes}
63 |
64 | \item{title}{character, plot title}
65 |
66 | \item{title.cex}{numeric, plot title font size}
67 |
68 | \item{axis.cex}{numeric, font size of x-axis break labels}
69 |
70 | \item{grid}{logical, plot vertical axes}
71 |
72 | \item{grid.col}{colour, of grid axes}
73 |
74 | \item{grid.lwd}{numeric, line width of grid axes}
75 |
76 | \item{leg.mode}{logical, draw y-axis scale legend inside largest data point (TRUE default) or alternatively with custom position/value (FALSE)}
77 |
78 | \item{leg.x, leg.y}{numeric [0,1], x/y positions of legend if leg.mode = FALSE}
79 |
80 | \item{leg.cex}{numeric, legend text size}
81 |
82 | \item{leg.col}{colour, of legend lines and text}
83 |
84 | \item{leg.lty}{numeric, code for legend line type}
85 |
86 | \item{leg.lwd}{numeric, legend line width}
87 |
88 | \item{leg.max}{numeric, legend scale line width}
89 |
90 | \item{xlab, ylab}{character, x-axis / y-axis titles}
91 |
92 | \item{xlab.pos, ylab.pos}{numeric, perpendicular offset for axis titles}
93 |
94 | \item{lwd}{numeric, value or vector of length matching the number of unique categories for polygon stroke line width. Individual values of vector are mapped to categories in alpha-numeric order}
95 |
96 | \item{...}{arguments to pass to polygon()}
97 | }
98 | \description{
99 | This is a variant of alluvial diagram suitable for multiple
100 | (cross-sectional) time series. It also works with continuous variables equivalent to time
101 | }
102 | \examples{
103 | if( require(reshape2) )
104 | {
105 | data(Refugees)
106 | reshape2::dcast(Refugees, country ~ year, value.var = 'refugees')
107 | d <- Refugees
108 |
109 | set.seed(39) # for nice colours
110 | cols <- hsv(h = sample(1:10/10), s = sample(3:12)/15, v = sample(3:12)/15)
111 |
112 | alluvial_ts(d)
113 | alluvial_ts(d, wave = .2, ygap = 5, lwd = 3)
114 | alluvial_ts(d, wave = .3, ygap = 5, col = cols)
115 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, rankup = TRUE)
116 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'down')
117 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'centred', grid=TRUE,
118 | grid.lwd = 5)
119 | alluvial_ts(d, wave = 0, ygap = 0, col = cols, alpha = .9, border = 'white',
120 | grid = TRUE, grid.lwd = 5)
121 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.4)
122 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.3, lab.cex = .7)
123 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, xmargin = 0.3, lab.cex=.7,
124 | leg.cex=.7, leg.col = 'white')
125 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, leg.mode = FALSE, leg.x = .1,
126 | leg.y = .7, leg.max = 3e6)
127 | alluvial_ts(d, wave = .3, ygap = 5, col = cols, plotdir = 'centred', alpha=.9,
128 | grid = TRUE, grid.lwd = 5, xmargin = 0.2, lab.cex = .7, xlab = '',
129 | ylab = '', border = NA, axis.cex = .8, leg.cex = .7,
130 | leg.col='white',
131 | title = "UNHCR-recognised refugees\nTop 10 countries (2003-13)\n")
132 |
133 | # non time-series example - Virginia deaths dataset
134 | d <- reshape2::melt(data.frame(age=row.names(VADeaths), VADeaths), id.vars='age')[,c(2,1,3)]
135 | names(d) = c('pop_group','age_group','deaths')
136 | alluvial_ts(d)
137 | }
138 | }
139 |
--------------------------------------------------------------------------------
/man/alluvial.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/alluvial.R
3 | \name{alluvial}
4 | \alias{alluvial}
5 | \title{Alluvial diagram}
6 | \usage{
7 | alluvial(
8 | ...,
9 | freq,
10 | col = "gray",
11 | border = 0,
12 | layer,
13 | hide = FALSE,
14 | alpha = 0.5,
15 | gap.width = 0.05,
16 | xw = 0.1,
17 | cw = 0.1,
18 | blocks = TRUE,
19 | ordering = NULL,
20 | axis_labels = NULL,
21 | mar = c(2, 1, 1, 1),
22 | cex = par("cex"),
23 | xlim_offset = c(0, 0),
24 | ylim_offset = c(0, 0),
25 | cex.axis = par("cex.axis"),
26 | axes = TRUE,
27 | ann = TRUE,
28 | title = NULL
29 | )
30 | }
31 | \arguments{
32 | \item{...}{vectors or data frames, all for the same number of observations}
33 |
34 | \item{freq}{numeric, vector of frequencies of the same length as the number of observations}
35 |
36 | \item{col}{vector of colors of the stripes}
37 |
38 | \item{border}{vector of border colors for the stripes}
39 |
40 | \item{layer}{numeric, order of drawing of the stripes}
41 |
42 | \item{hide}{logical, should particular stripe be plotted}
43 |
44 | \item{alpha}{numeric, vector of transparency of the stripes}
45 |
46 | \item{gap.width}{numeric, relative width of inter-category gaps}
47 |
48 | \item{xw}{numeric, the distance from the set axis to the control points of the xspline}
49 |
50 | \item{cw}{numeric, width of the category axis}
51 |
52 | \item{blocks}{logical, whether to use blocks to tie the flows together at each category, versus contiguous ribbons (also admits character value "bookends")}
53 |
54 | \item{ordering}{list of numeric vectors allowing to reorder the alluvia on each axis separately, see Examples}
55 |
56 | \item{axis_labels}{character, labels of the axes, defaults to variable names in the data}
57 |
58 | \item{mar}{numeric, plot margins as in \code{\link{par}}}
59 |
60 | \item{cex, cex.axis}{numeric, scaling of fonts of category labels and axis labels respectively. See \code{\link{par}}.}
61 |
62 | \item{xlim_offset, ylim_offset}{numeric vectors of length 2, passed to
63 | \code{xlim} and \code{ylim} of \code{\link{plot}}, and allow for adjusting
64 | the limits of the plotting region}
65 |
66 | \item{axes}{logical, whether to draw axes, defaults to TRUE}
67 |
68 | \item{ann}{logical, whether to draw annotations: category labels. Defaults to TRUE}
69 |
70 | \item{title}{character, plot title}
71 | }
72 | \value{
73 | Invisibly a list with elements:
74 | \item{\code{endpoints}}{A data frame with data on locations of the stripes with columns:
75 | \describe{
76 | \item{\code{...}}{Vectors/data frames supplied to
77 | \code{alluvial} through \code{...} that define the axes}
78 | \item{\code{.bottom},\code{.top}}{Y locations of bottom
79 | and top coordinates respectively at which the stripes
80 | originate from the axis \code{.axis}}
81 | \item{\code{.axis}}{Axis number counting from the left}
82 | } }
83 | \item{\code{category_midpoints}}{List of vectors of Y
84 | locations of category block midpoints.}
85 | \item{\code{alluvium_midpoints}}{A data frame with
86 | location of midpoints on each alluvium segement with
87 | columns:
88 | \describe{
89 | \item{\code{...}}{Vectors/data frames supplied to
90 | \code{alluvial} through the \code{...}}
91 | \item{\code{.axis_from}, \code{.axis_to}}{IDs of axes that
92 | a segment originates from and goes to}
93 | \item{\code{.x}, \code{.y}}{X and Y locations of the alluvium midpoints}
94 | \item{\code{.slope}}{The (approximate) slope of the alluvium at the midpoint}
95 | }
96 | }
97 | }
98 | \description{
99 | Drawing alluvial diagrams, also known as parallel set plots.
100 | }
101 | \note{
102 | Please mind that the API is planned to change to be more compatible
103 | with \pkg{dplyr} verbs.
104 | }
105 | \examples{
106 | # Titanic data
107 | tit <- as.data.frame(Titanic)
108 |
109 | # 2d
110 | tit2d <- aggregate( Freq ~ Class + Survived, data=tit, sum)
111 | alluvial( tit2d[,1:2], freq=tit2d$Freq, xw=0.0, alpha=0.8,
112 | gap.width=0.1, col= "steelblue", border="white",
113 | layer = tit2d$Survived != "Yes" )
114 |
115 | alluvial( tit2d[,1:2], freq=tit2d$Freq,
116 | hide=tit2d$Freq < 150,
117 | xw=0.0, alpha=0.8,
118 | gap.width=0.1, col= "steelblue", border="white",
119 | layer = tit2d$Survived != "Yes" )
120 |
121 | # 3d
122 | tit3d <- aggregate( Freq ~ Class + Sex + Survived, data=tit, sum)
123 |
124 | alluvial(tit3d[,1:3], freq=tit3d$Freq, alpha=1, xw=0.2,
125 | col=ifelse( tit3d$Survived == "No", "red", "gray"),
126 | layer = tit3d$Sex != "Female",
127 | border="white")
128 |
129 |
130 | # 4d
131 | alluvial( tit[,1:4], freq=tit$Freq, border=NA,
132 | hide = tit$Freq < quantile(tit$Freq, .50),
133 | col=ifelse( tit$Class == "3rd" & tit$Sex == "Male", "red", "gray") )
134 |
135 | # 3d example with custom ordering
136 | # Reorder "Sex" axis according to survival status
137 | ord <- list(NULL, with(tit3d, order(Sex, Survived)), NULL)
138 | alluvial(tit3d[,1:3], freq=tit3d$Freq, alpha=1, xw=0.2,
139 | col=ifelse( tit3d$Survived == "No", "red", "gray"),
140 | layer = tit3d$Sex != "Female",
141 | border="white", ordering=ord)
142 |
143 | # Possible blocks options
144 | for (blocks in c(TRUE, FALSE, "bookends")) {
145 |
146 | # Elaborate alluvial diagram from main examples file
147 | alluvial( tit[, 1:4], freq = tit$Freq, border = NA,
148 | hide = tit$Freq < quantile(tit$Freq, .50),
149 | col = ifelse( tit$Class == "3rd" & tit$Sex == "Male",
150 | "red", "gray" ),
151 | blocks = blocks )
152 | }
153 |
154 |
155 | # Data returned
156 | x <- alluvial( tit2d[,1:2], freq=tit2d$Freq, xw=0.0, alpha=0.8,
157 | gap.width=0.1, col= "steelblue", border="white",
158 | layer = tit2d$Survived != "Yes" )
159 | points( rep(1, 16), x$endpoints[[1]], col="green")
160 | points( rep(2, 16), x$endpoints[[2]], col="blue")
161 | }
162 |
--------------------------------------------------------------------------------
/R/alluvial.R:
--------------------------------------------------------------------------------
1 | #' Alluvial diagram
2 | #'
3 | #' Drawing alluvial diagrams, also known as parallel set plots.
4 | #'
5 | #' @param ... vectors or data frames, all for the same number of observations
6 | #' @param freq numeric, vector of frequencies of the same length as the number of observations
7 | #' @param col vector of colors of the stripes
8 | #' @param border vector of border colors for the stripes
9 | #' @param layer numeric, order of drawing of the stripes
10 | #' @param hide logical, should particular stripe be plotted
11 | #' @param alpha numeric, vector of transparency of the stripes
12 | #' @param gap.width numeric, relative width of inter-category gaps
13 | #' @param xw numeric, the distance from the set axis to the control points of the xspline
14 | #' @param cw numeric, width of the category axis
15 | #' @param blocks logical, whether to use blocks to tie the flows together at each category, versus contiguous ribbons (also admits character value "bookends")
16 | #' @param ordering list of numeric vectors allowing to reorder the alluvia on each axis separately, see Examples
17 | #' @param axis_labels character, labels of the axes, defaults to variable names in the data
18 | #' @param mar numeric, plot margins as in \code{\link{par}}
19 | #' @param cex,cex.axis numeric, scaling of fonts of category labels and axis labels respectively. See \code{\link{par}}.
20 | #' @param xlim_offset,ylim_offset numeric vectors of length 2, passed to
21 | #' \code{xlim} and \code{ylim} of \code{\link{plot}}, and allow for adjusting
22 | #' the limits of the plotting region
23 | #' @param axes logical, whether to draw axes, defaults to TRUE
24 | #' @param ann logical, whether to draw annotations: category labels. Defaults to TRUE
25 | #' @param title character, plot title
26 | #'
27 | #' @return Invisibly a list with elements:
28 | #' \item{\code{endpoints}}{A data frame with data on locations of the stripes with columns:
29 | #' \describe{
30 | #' \item{\code{...}}{Vectors/data frames supplied to
31 | #' \code{alluvial} through \code{...} that define the axes}
32 | #' \item{\code{.bottom},\code{.top}}{Y locations of bottom
33 | #' and top coordinates respectively at which the stripes
34 | #' originate from the axis \code{.axis}}
35 | #' \item{\code{.axis}}{Axis number counting from the left}
36 | #' } }
37 | #' \item{\code{category_midpoints}}{List of vectors of Y
38 | #' locations of category block midpoints.}
39 | #' \item{\code{alluvium_midpoints}}{A data frame with
40 | #' location of midpoints on each alluvium segement with
41 | #' columns:
42 | #' \describe{
43 | #' \item{\code{...}}{Vectors/data frames supplied to
44 | #' \code{alluvial} through the \code{...}}
45 | #' \item{\code{.axis_from}, \code{.axis_to}}{IDs of axes that
46 | #' a segment originates from and goes to}
47 | #' \item{\code{.x}, \code{.y}}{X and Y locations of the alluvium midpoints}
48 | #' \item{\code{.slope}}{The (approximate) slope of the alluvium at the midpoint}
49 | #' }
50 | #' }
51 | #'
52 | #' @note Please mind that the API is planned to change to be more compatible
53 | #' with \pkg{dplyr} verbs.
54 | #'
55 | #' @importFrom grDevices col2rgb rgb
56 | #' @importFrom graphics plot xspline axis rect polygon text par
57 | #' @importFrom dplyr select_ group_by_ arrange_ filter_ "%>%" ungroup summarise_ .data
58 | #' @importFrom tidyr gather_
59 | #'
60 | #' @export
61 | #'
62 | #' @example man-roxygen/alluvial.R
63 |
64 | alluvial <- function( ..., freq,
65 | col="gray", border=0, layer, hide=FALSE, alpha=0.5,
66 | gap.width=0.05, xw=0.1, cw=0.1,
67 | blocks = TRUE,
68 | ordering=NULL,
69 | axis_labels=NULL,
70 | mar = c(2, 1, 1, 1),
71 | cex=par("cex"),
72 | xlim_offset= c(0, 0),
73 | ylim_offset= c(0, 0),
74 | cex.axis=par("cex.axis"),
75 | axes=TRUE,
76 | ann=TRUE,
77 | title = NULL)
78 | {
79 | # Data and graphical parameters
80 | p <- data.frame( ..., freq=freq, col, alpha, border, hide, stringsAsFactors=FALSE)
81 | np <- ncol(p) - 5 # Number of dimensions
82 | # check if 'ordering' is of proper form
83 | if( !is.null(ordering) )
84 | {
85 | stopifnot(is.list(ordering))
86 | if( length(ordering) != np )
87 | stop("'ordering' argument should have ",
88 | np, " components, has ", length(ordering))
89 | }
90 | n <- nrow(p)
91 | # Layers determine plotting order
92 | if(missing(layer))
93 | {
94 | layer <- 1:n
95 | }
96 | p$layer <- layer
97 | d <- p[ , 1:np, drop=FALSE] # Dimensions dframe
98 | p <- p[ , -c(1:np), drop=FALSE] # Parameteres dframe
99 | p$freq <- with(p, freq/sum(freq)) # Frequencies (weights)
100 | # Converting colors to hexcodes
101 | col <- col2rgb(p$col, alpha=TRUE)
102 | if(!identical(alpha, FALSE)) {
103 | col["alpha", ] <- p$alpha*256
104 | }
105 | p$col <- apply(col, 2, function(x) do.call(rgb, c(as.list(x), maxColorValue = 256)))
106 | # convert character vectors in data to factors
107 | isch <- sapply(d, is.character)
108 | d[isch] <- lapply(d[isch], as.factor)
109 | # Convert blocks to vector
110 | if (length(blocks) == 1)
111 | {
112 | blocks <- if (!is.na(as.logical(blocks)))
113 | {
114 | rep(blocks, np)
115 | } else if (blocks == "bookends")
116 | {
117 | c(TRUE, rep(FALSE, np - 2), TRUE)
118 | }
119 | }
120 | # Axis labels
121 | if(is.null(axis_labels)) {
122 | axis_labels <- names(d)
123 | } else {
124 | if(length(axis_labels) != ncol(d))
125 | stop("`axis_labels` should have length ", names(d), ", has ", length(axis_labels))
126 | }
127 | # Compute endpoints of flows (polygons)
128 | # i = dimension id
129 | # d = data frame of dimensions
130 | # f = weights
131 | # w = gap between categories
132 | getp <- function(i, d, f, w=gap.width) {
133 | # Ordering dimension ids for lexicographic sorting
134 | a <- c(i, (1:ncol(d))[-i])
135 | # Order of rows of d starting from i-th dimension
136 | if( is.null(ordering[[i]]) )
137 | {
138 | o <- do.call(order, d[a])
139 | } else {
140 | d2 <- d
141 | d2[1] <- ordering[[i]]
142 | o <- do.call(order, d2[a])
143 | }
144 | # Breakpoints on a dimension
145 | x <- c(0, cumsum(f[o])) * (1-w)
146 | # Stripe coordinates on a dimension
147 | x <- cbind(x[-length(x)], x[-1])
148 | # By how much stripes need to be shifted upwards (gap/max(gap))
149 | gap <- cumsum( c(0L, diff(as.numeric(d[o,i])) != 0) )
150 | mx <- max(gap)
151 | if (mx == 0) mx <- 1
152 | # shifts
153 | gap <- gap / mx * w
154 | # add gap-related shifts to stripe coordinates on dimension i
155 | (x + gap)[order(o),]
156 | }
157 | # Calculate stripe locations on dimensions: list of data frames. A component
158 | # for a dimension. Data frame contains 'y' locations of stripes.
159 | dd <- lapply(seq_along(d), getp, d=d, f=p$freq)
160 | # Plotting
161 | op <- par(mar=mar)
162 | plot(NULL, type="n", xlim=c(1-cw, np+cw) + xlim_offset, ylim=c(0, 1) + ylim_offset, xaxt="n", yaxt="n",
163 | xaxs="i", yaxs="i", xlab='', ylab='', main=title, frame=FALSE)
164 | # For every stripe
165 | ind <- which(!p$hide)[rev(order(p[!p$hide, ]$layer))]
166 | for(i in ind )
167 | {
168 | # For every inter-dimensional segment
169 | for(j in 1:(np-1) )
170 | {
171 | # Draw stripe
172 | xspline( c(j, j, j+xw, j+1-xw, j+1, j+1, j+1-xw, j+xw, j) + rep(c(cw, -cw, cw), c(3, 4, 2)),
173 | c( dd[[j]][i, c(1, 2, 2)], rev(dd[[j+1]][i, c(1, 1, 2, 2)]), dd[[j]][i,c(1, 1)]),
174 | shape = c(0,0,1,1,0,0,1,1,0, 0),
175 | open=FALSE,
176 | col=p$col[i], border=p$border[i])
177 | }
178 | }
179 | # Category blocks with labels
180 | for(j in seq_along(dd))
181 | {
182 | ax <- lapply(split(dd[[j]], d[,j]), range)
183 | if (blocks[j])
184 | {
185 | for(k in seq_along(ax))
186 | {
187 | rect( j-cw, ax[[k]][1], j+cw, ax[[k]][2] )
188 | }
189 | } else
190 | {
191 | for (i in ind)
192 | {
193 | x <- j + c(-1, 1) * cw
194 | y <- t(dd[[j]][c(i, i), ])
195 | w <- xw * (x[2] - x[1])
196 | xspline(x = c(x[1], x[1], x[1] + w, x[2] - w,
197 | x[2], x[2], x[2] - w, x[1] + w, x[1]),
198 | y = c(y[c(1, 2, 2), 1], y[c(2, 2, 1, 1), 2], y[c(1, 1), 1]),
199 | shape = c(0, 0, 1, 1, 0, 0, 1, 1, 0, 0),
200 | open = FALSE, col = p$col[i], border = p$border[i])
201 | }
202 | }
203 | for(k in seq_along(ax))
204 | {
205 | if(ann) text( j, mean(ax[[k]]), labels=names(ax)[k], cex=cex)
206 | }
207 | }
208 | # X axis
209 | if(axes) {
210 | axis(1, at= rep(c(-cw, cw), ncol(d)) + rep(seq_along(d), each=2),
211 | line=0.5, col="white", col.ticks="black", labels=FALSE)
212 | axis(1, at=seq_along(d), tick=FALSE, labels=axis_labels, cex.axis=cex.axis)
213 | }
214 | par(op)
215 | rval <- list(
216 | # Endpoints of alluvia
217 | endpoints = do.call(
218 | "rbind",
219 | lapply(
220 | seq(along=dd),
221 | function(i) {
222 | df <- as.data.frame(
223 | structure(dd[[i]], dimnames=list(NULL,c(".bottom", ".top"))),
224 | stringsAsFactors=FALSE
225 | )
226 | df$.axis <- i
227 | cbind(d, df)
228 | }
229 | )
230 | )
231 | )
232 | # Category midpoints
233 | rval$category_midpoints <- structure(
234 | lapply(
235 | seq(1, ncol(d)),
236 | function(i) {
237 | mi <- with(
238 | rval$endpoints[rval$endpoints$.axis == i , ],
239 | tapply(.bottom, d[[i]], min)
240 | )
241 | ma <- with(
242 | rval$endpoints[ rval$endpoints$.axis == i , ],
243 | tapply(.top, d[[i]], max)
244 | )
245 | (mi + ma)/2
246 | }
247 | ),
248 | names = names(d)
249 | )
250 | # alluvium midpoints
251 | qs1 <- c(as.list(names(d)), list(".axis")) %>%
252 | lapply(as.name)
253 | qs2 <- as.list(names(d)) %>%
254 | lapply(as.name)
255 |
256 | rval$alluvium_midpoints <- rval$endpoints %>%
257 | tidyr::pivot_longer(dplyr::one_of(".bottom", ".top"), names_to = ".endpoint", values_to = ".value") %>%
258 | dplyr::group_by( !!!qs1 ) %>%
259 | dplyr::summarise(m = mean(.data$.value)) %>%
260 | dplyr::arrange(!!!qs1) %>%
261 | dplyr::group_by(!!!qs2) %>%
262 | dplyr::mutate(
263 | .axis_from = dplyr::lag(.data$.axis),
264 | .axis_to = .data$.axis,
265 | .x = (.data$.axis_from + .data$.axis_to)/2,
266 | .y = (.data$m + dplyr::lag(.data$m))/2,
267 | .slope = (.data$m - dplyr::lag(.data$m)) / (.data$.axis_to - .data$.axis_from - cw)
268 | ) %>%
269 | dplyr::ungroup() %>%
270 | dplyr::filter(!is.na(.data$.axis_from)) %>%
271 | dplyr::select(dplyr::one_of(names(d), ".axis_from", ".axis_to", ".x", ".y", ".slope")) %>%
272 | as.data.frame(stringsAsFactors=FALSE)
273 |
274 | invisible(rval)
275 | }
276 |
277 |
--------------------------------------------------------------------------------
/R/alluvial_ts.R:
--------------------------------------------------------------------------------
1 | #' Alluvial diagram for multiple time series data
2 | #'
3 | #' This is a variant of alluvial diagram suitable for multiple
4 | #' (cross-sectional) time series. It also works with continuous variables equivalent to time
5 | #'
6 | #' @param dat data.frame of time-series (or suitable equivalent continuously disaggregated data), with 3 columns (in order: category, time-variable, value) with <= 1 row for each category-time combination
7 | #' @param wave numeric, curve wavyness defined in terms of x axis data range - i.e. bezier point offset. Experiment to get this right
8 | #' @param ygap numeric, vertical distance between polygons - a multiple of 10\% of the mean data value
9 | #' @param col colour, value or vector of length matching the number of unique categories. Individual colours of vector are mapped to categories in alpha-numeric order
10 | #' @param lwd numeric, value or vector of length matching the number of unique categories for polygon stroke line width. Individual values of vector are mapped to categories in alpha-numeric order
11 | #' @param alpha numeric, [0,1] polygon fill transparency
12 | #' @param plotdir character, string ('up', 'down' or 'centred') giving the vertical alignment of polygon stacks
13 | #' @param rankup logical, rank polygons on time axes upward by magnitude (largest to smallest) or not
14 | #' @param lab.cex numeric, category label font size
15 | #' @param lab.col colour, of category label
16 | #' @param xmargin numeric [0,1], proportional space for category labels
17 | #' @param axis.col colour, of axes
18 | #' @param axis.cex numeric, font size of x-axis break labels
19 | #' @param title character, plot title
20 | #' @param title.cex numeric, plot title font size
21 | #' @param grid logical, plot vertical axes
22 | #' @param grid.col colour, of grid axes
23 | #' @param grid.lwd numeric, line width of grid axes
24 | #' @param leg.mode logical, draw y-axis scale legend inside largest data point (TRUE default) or alternatively with custom position/value (FALSE)
25 | #' @param leg.x,leg.y numeric [0,1], x/y positions of legend if leg.mode = FALSE
26 | #' @param leg.cex numeric, legend text size
27 | #' @param leg.col colour, of legend lines and text
28 | #' @param leg.lty numeric, code for legend line type
29 | #' @param leg.lwd numeric, legend line width
30 | #' @param leg.max numeric, legend scale line width
31 | #' @param xlab,ylab character, x-axis / y-axis titles
32 | #' @param xlab.pos,ylab.pos numeric, perpendicular offset for axis titles
33 | #' @param ... arguments to pass to polygon()
34 | #'
35 | #' @importFrom grDevices extendrange rainbow
36 | #' @importFrom graphics mtext plot.new plot.window abline lines
37 | #' @importFrom stats aggregate approx
38 | #'
39 | #' @export
40 | #'
41 | #' @example man-roxygen/alluvial_ts.R
42 |
43 |
44 | alluvial_ts <- function(dat, wave = NA, ygap = 1, col = NA, alpha = NA, plotdir = 'up', rankup = FALSE,
45 | lab.cex = 1, lab.col = 'black', xmargin = .1, axis.col = 'black', title = NA,
46 | title.cex = 1, axis.cex = 1, grid = FALSE, grid.col = 'grey80', grid.lwd = 1,
47 | leg.mode = TRUE, leg.x = .1, leg.y = .9, leg.cex = 1, leg.col = 'black', leg.lty = NA,
48 | leg.lwd = NA, leg.max = NA, xlab = NA, ylab = NA, xlab.pos = 2, ylab.pos = 1, lwd = 1, ...){
49 |
50 | orig.names <- names(dat)
51 | names(dat) <- c('item', 'time', 'val')
52 | if(is.numeric(dat$item)) dat$item <- as.character(dat$item)
53 | if(is.ordered(dat$time) | is.factor(dat$time)) {
54 | axis.labs <- levels(dat$time)
55 | dat$time <- as.numeric(dat$time)
56 | } else if(is.numeric(dat$time)) axis.labs <- sort(unique(dat$time)) else {
57 | return("Error: time variable must be numeric, factor, or ordered")}
58 | times <- sort(unique(dat$time))
59 |
60 | dat <- dat[order(dat$item), ]
61 | datsum <- aggregate(val ~ item, dat, mean)
62 | plotorder <- order(datsum$val, decreasing = TRUE) # smallest last (on top)
63 | maxval <- pretty(max(dat$val))[2] # legend max
64 |
65 | # colours
66 | n <- length(unique(dat$item))
67 | if(all(is.na(col))) col <- rainbow(n)
68 | if(length(col) == 1) col <- rep(col, n)
69 | col <- rgb(t(col2rgb(col)), maxColorValue = 255) # ensure hex
70 | if(!length(col) == n) return("Error: 'col' length must equal the number of unique data elements")
71 | if(!is.na(alpha)) {
72 | col[nchar(col)>7] <- substr(col[nchar(col)>7], 1, 7)
73 | col <- paste0(col, substr(rgb(0, 0, alpha), 6, 7))
74 | }
75 |
76 | # calc vertical gap between items
77 | ymean <- mean(dat$val)
78 | ygap <- ymean * .1 * ygap
79 |
80 | # if not specified (but it really should be)
81 | if(is.na(wave)) wave <- .5 * (rev(times)[1] - times[1])/length(times)
82 |
83 | plot.y.max <- 0 # vertical plot scaling
84 |
85 | # prepare main data object
86 | d <- list()
87 | for(i in unique(dat$item)) d[[i]] <- list()
88 |
89 | # loop through periods
90 | for(i in 1:length(times)){
91 |
92 | # 3 time variables, NA if they fall outside the data period
93 | if(i>1) t1 <- times[i-1] else t1 <- NA # prev period
94 | t2 <- times[i] # this period
95 | if(i plot.y.max) plot.y.max <- y.sum
103 |
104 | # loop through items
105 | if(plotdir == 'centred') y <- -y.sum/2 else y <- 0 # vertical scaler
106 |
107 | for(j in 1:nrow(dat.t)){
108 | # work up/down y axes to calculate spline positions
109 | y0 <- y + ygap
110 | y1 <- y0 + dat.t$val[j]
111 | y <- y1
112 |
113 | # calculate left and right x-axis splines
114 | if(!is.na(t1)) spline.x <- c(t2 - wave, t2) else spline.x <- numeric(0)
115 | if(!is.na(t3)) spline.x <- c(spline.x, t2, t2 + wave)
116 |
117 | # update d
118 | item <- as.character(dat.t$item[j])
119 | d[[item]]$x <- c(d[[item]]$x, spline.x)
120 | d[[item]]$y0 <- c(d[[item]]$y0, rep(y0, length(spline.x)))
121 | d[[item]]$y1 <- c(d[[item]]$y1, rep(y1, length(spline.x)))
122 | } # end items loop
123 | } # end period loop
124 |
125 | # function to ensure vertex arrays are same length, as xspline output can vary
126 | resize <- function(v, n = 500){ d<-data.frame(x = 1:length(v), y = v);
127 | approx(d, xout = seq(1, length(v), length.out = n))$y}
128 | plot.new() # required by xspline
129 |
130 | # calculate spline curves
131 | for(i in names(d)){
132 | curves <- list()
133 | # iterate through bottom/top sets of bezier points, get curves and resize
134 | for(j in c('y0', 'y1')) curves[[j]] <- lapply(xspline(d[[i]]$x, d[[i]][[j]], shape = 1, draw = FALSE), resize)
135 | # stitch top and bottom polylines together clockwise into a polygon
136 | d[[i]]$poly <- data.frame(x = c(curves[[1]]$x, rev(curves[[2]]$x)), y = c(curves[[1]]$y, rev(curves[[2]]$y)))
137 | }
138 |
139 | # label y positions
140 | labs.l <- data.frame(t(sapply(1:length(d), FUN = function(i)t(data.frame(lab = names(d[i]), lefty = mean(c(d[[i]]$y0[1], d[[i]]$y1[1]))) ))), stringsAsFactors = FALSE)
141 | labs.r <- data.frame(t(sapply(1:length(d), FUN = function(i)t(data.frame(lab = names(d[i]), lefty = mean(c(rev(d[[i]]$y0)[1], rev(d[[i]]$y1)[1])))) )), stringsAsFactors = FALSE)
142 | names(labs.l) <- names(labs.r) <- c('item', 'y')
143 | labs.l$y <- as.numeric(as.character(labs.l$y)); labs.r$y <- as.numeric(as.character(labs.r$y)) # to numeric
144 | labs.l$col <- col[match(labs.l$item, datsum$item)]
145 | labs.r$col <- col[match(labs.r$item, datsum$item)]
146 |
147 | # line widths
148 | if(any(lwd == 0)) return("Error: 'lwd' must be greater than zero")
149 | if(length(lwd) == 1) lwd <- rep(lwd, length(d))
150 | if(!length(lwd) == length(d)) return("Error: 'lwd' length must equal the number of unique data elements")
151 | lwd <- lwd[plotorder]
152 |
153 | # scale and orientation of axes
154 | if(plotdir == 'up') ylim <- c(0, plot.y.max) else {
155 | if(plotdir == 'down') ylim <- c(plot.y.max, 0) else {
156 | if(plotdir == 'centred') ylim <- c(-plot.y.max/2, plot.y.max/2) else {
157 | return("Incorrect specification for plotdir: please select 'up', 'down' or 'centred'")
158 | }}}
159 | xran <- range(dat$time)
160 | xlim <- extendrange(r = xran, f = xmargin/2)
161 |
162 | # plot order (biggest polygons first)
163 | d <- d[plotorder]
164 | col <- col[plotorder]
165 |
166 | # axis labels
167 | if(is.na(xlab)) xlab <- orig.names[2]
168 | if(is.na(ylab)) ylab <- orig.names[3]
169 |
170 | # plot
171 | plot.window(xlim = xlim, ylim = ylim)
172 | axis(1, times, cex.axis = axis.cex, labels = axis.labs, col = lab.col, col.axis = axis.col)
173 | if(is.na(title)) title <- paste('Alluvial plot of', orig.names[1], 'vs', orig.names[3], 'by', orig.names[2])
174 | title(title, cex.main = title.cex)
175 | mtext(xlab, side = 1, line = xlab.pos, col = lab.col); mtext(ylab, side = 2, line = ylab.pos, col = lab.col)
176 | if(grid) abline(v = times, col = grid.col, lwd = grid.lwd)
177 | for(i in 1:length(d)) polygon(d[[i]]$poly$x, d[[i]]$poly$y, col = col[i], lwd = lwd[i], ...)
178 | text(times[1], labs.l$y, labels = labs.l$item, col = labs.l$col, pos = 2, cex = lab.cex)
179 | text(times[length(times)], labs.r$y, labels = labs.r$item, adj = 0, col = labs.r$col, pos = 4, cex = lab.cex)
180 |
181 | # legend
182 | topval <- max(dat$val)
183 | topitem <- as.character(dat$item[match(topval, dat$val)])
184 | toptime <- dat$time[match(topval, dat$val)]
185 |
186 | if(leg.mode){ # legend plotted on maximum data point
187 | if(is.na(leg.lty)) leg.lty <- "dotted"; if(is.na(leg.lwd)) leg.lwd <- 2
188 | val_ind <- match(toptime, d[[topitem]]$x)
189 | leg_y0 <- d[[topitem]]$y0[val_ind]
190 | leg_y1 <- d[[topitem]]$y1[val_ind]
191 | leg_ym <- mean(c(leg_y0, leg_y1))
192 | lines(rep(toptime, 2), c(leg_y0, leg_ym-(topval*.08)), lwd = leg.lwd, lend = 'butt', col = leg.col, lty = leg.lty)
193 | lines(rep(toptime, 2), c(leg_y1, leg_ym+(topval*.08)), lwd = leg.lwd, lend = 'butt', col = leg.col, lty = leg.lty)
194 | text(toptime, leg_ym, labels = formatC(topval, format = "d", big.mark = ','), pos = NULL, cex = leg.cex, col = leg.col)
195 | } else { # legend plotted in custom position
196 | if(!is.na(leg.max)) maxval <- leg.max
197 | if(is.na(leg.lty)) leg.lty <- "solid"; if(is.na(leg.lwd)) leg.lwd <- 10
198 | leg_x <- (xlim[2]-xlim[1]) * leg.x + xlim[1]
199 | leg_y <- (ylim[2]-ylim[1]) * leg.y + ylim[1]
200 | diffs <- d[[topitem]]$y1 - d[[topitem]]$y0
201 | time <- d[[topitem]]$x[match(max(diffs), diffs)]
202 | lines(data.frame(x = c(leg_x, leg_x), y = c(leg_y, leg_y + maxval)), lwd = leg.lwd, lend = 'butt', col = leg.col, lty = leg.lty)
203 | max.lab <- formatC(maxval, format = "d", big.mark = ',')
204 | text(rep(leg_x, 2), c(leg_y, leg_y + maxval), labels = c('0', max.lab), cex = leg.cex, pos = 4, offset = 1, col = leg.col)
205 | }
206 | }
207 |
--------------------------------------------------------------------------------
/vignettes/alluvial.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Creating Alluvial Diagrams"
3 | author: "Michał Bojanowski"
4 | date: "`r Sys.Date()`"
5 | output:
6 | rmarkdown::html_vignette:
7 | toc: yes
8 | fig_caption: yes
9 | vignette: >
10 | %\VignetteIndexEntry{Creating Alluvial Diagrams}
11 | %\VignetteEngine{knitr::rmarkdown}
12 | %\VignetteEncoding{UTF-8}
13 | ---
14 |
15 |
16 | ```{r setup, cache=FALSE, include=FALSE}
17 | library(alluvial)
18 | library(dplyr)
19 |
20 | knitr::opts_chunk$set(
21 | fig.width=7,
22 | fig.height=4,
23 | cache=FALSE
24 | )
25 | ```
26 |
27 |
28 |
29 |
30 | # What is alluvial diagram?
31 |
32 | Alluvial diagram is a variant of a Parallel Coordinates Plot (PCP) but for categorical variables. Variables are assigned to vertical axes that are parallel. Values are represented with blocks on each axis. Observations are represented with *alluvia* (sing. "alluvium") spanning across all the axes.
33 |
34 | You create alluvial diagrams with function `alluvial()`. Let us use `Titanic` dataset as an example. As it is a `table`, we need to convert it to a data frame
35 |
36 | ```{r data}
37 | tit <- as.data.frame(Titanic, stringsAsFactors = FALSE)
38 | head(tit)
39 | ```
40 |
41 | and create the alluvial diagram:
42 |
43 | ```{r example}
44 | alluvial(
45 | tit[,1:4],
46 | freq=tit$Freq,
47 | col = ifelse(tit$Survived == "Yes", "orange", "grey"),
48 | border = ifelse(tit$Survived == "Yes", "orange", "grey"),
49 | hide = tit$Freq == 0,
50 | cex = 0.7
51 | )
52 | ```
53 |
54 | We have four variables:
55 |
56 | - `Class` on the ship the passanger occupied
57 | - `Sex` of the passenger
58 | - `Age` of the passenger
59 | - Whether the passenger `Survived`.
60 |
61 | Vertical sizes of the blocks are proportional to the frequency, and so are the widths of the alluvia. Alluvia represent all combinations of values of the variables in the dataset. By default the vertical order of the alluvia is determined by alphabetical ordering of the values on each variable lexicographically (last variable changes first) drawn from bottom to top. In this example, the color is determined by passengers' survival status, i.e. passenger who survived are represented with orange alluvia.
62 |
63 | Alluvial diagrams are very useful in reading various conditional and uncoditional distributions in a multivariate dataset. For example, we can see that:
64 |
65 | - Most of the Crew did not survived -- majority of the height of the Crew category is covered by grey alluvia.
66 | - Majortity of the Crew where adult men.
67 | - Almost all women from the 1st Class did survive.
68 | - The women who did not survive come mostly from 3rd class.
69 |
70 |
71 |
72 |
73 |
74 | # Simple use
75 |
76 | Minimal use requires supplying data frame(s) as first argument, and a vector of frequencies as the `freq` argument. By default all alluvia are drawn using mildly transparent gray.
77 |
78 | Two variables `Class` and `Survived`:
79 |
80 | ```{r alluvial_defaults_2}
81 | # Survival status and Class
82 | tit %>% group_by(Class, Survived) %>%
83 | summarise(n = sum(Freq)) -> tit2d
84 |
85 | alluvial(tit2d[,1:2], freq=tit2d$n)
86 | ```
87 |
88 | Three variables `Sex`, `Class`, and `Survived`:
89 |
90 | ```{r alluvial_defaults_3}
91 | # Survival status, Sex, and Class
92 | tit %>% group_by(Sex, Class, Survived) %>%
93 | summarise(n = sum(Freq)) -> tit3d
94 |
95 | alluvial(tit3d[,1:3], freq=tit3d$n)
96 | ```
97 |
98 |
99 |
100 |
101 | # Customizing
102 |
103 | There are several ways to customize alluvial diagrams with `alluvial()` the following sections illustrate probably most common usecases.
104 |
105 |
106 | ## Customizing colors
107 |
108 | Colors of the alluvia can be customized with `col`, `border` and `alpha` arguments. For example:
109 |
110 | ```{r colors}
111 | alluvial(
112 | tit3d[,1:3],
113 | freq=tit3d$n,
114 | col = ifelse( tit3d$Sex == "Female", "pink", "lightskyblue"),
115 | border = "grey",
116 | alpha = 0.7,
117 | blocks=FALSE
118 | )
119 | ```
120 |
121 |
122 |
123 |
124 |
125 | ## Hiding and reordering alluvia
126 |
127 | ### Hiding
128 |
129 | With `alluvial` sometimes it is desirable to omit plotting of some of the alluvia. This is most frequently the case with larger datasets in which there are a lot of combinations of values of the variables associated with very small frequencies, or even 0s. Alluvia can be hidden with argument `hide` expecting a logical vector of length equal to the number of rows in the data. Alluvia for which `hide` is `FALSE` are not plotted. For example, to hide alluvia with frequency less than 150:
130 |
131 | ```{r alluvial_hide}
132 | alluvial(tit2d[,1:2], freq=tit2d$n, hide=tit2d$n < 150)
133 | ```
134 |
135 | This skips drawing the alluvia corresponding to the following rows in `tit` data frame:
136 |
137 | ```{r}
138 | tit2d %>% select(Class, Survived, n) %>%
139 | filter(n < 150)
140 | ```
141 |
142 | You can see the gaps e.g. on the "Yes" and "No" category blocks on the `Survived` axis.
143 |
144 | If you would rather omit these rows from the plot alltogether (i.e. no gaps), you need to filter your data before it is used by `alluvial()`.
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 | ### Changing "layers"
155 |
156 | By default alluvia are plotted in sequence determined by the row order in the dataset. It determines which alluvia will be plotted "on top" of which others.
157 |
158 | Consider simple data:
159 |
160 | ```{r data_layer}
161 | d <- data.frame(
162 | x = c(1, 2, 3),
163 | y = c(3 ,2, 1),
164 | freq=c(1,1,1)
165 | )
166 | d
167 | ```
168 |
169 | As there are three rows, we will have three alluvia:
170 |
171 | ```{r layer_ex1, fig.width=3, fig.height=3, fig.show="hold"}
172 | alluvial(d[,1:2], freq=d$freq, col=1:3, alpha=1)
173 | # Reversing the order
174 | alluvial(d[ 3:1, 1:2 ], freq=d$freq, col=3:1, alpha=1)
175 | ```
176 |
177 | Note that to keep colors matched in the same way to the alluvia we had to reverse the `col` argument too. Instead of reordering the data and keeping track of the other arguments plotting order can be adjusted with `layer` argument:
178 |
179 | ```{r layer_ex2}
180 | alluvial(d[,1:2], freq=d$freq, col=1:3, alpha=1,
181 | layer=3:1)
182 | ```
183 |
184 | The value of `layer` is passed to `order` so it is possible to use logical vectors e.g. if you only want to put some of the flows on top. For example, for Titanic data to put all alluvia for all survivors on top we can:
185 |
186 | ```{r layer_ex3}
187 | alluvial(tit3d[,1:3], freq=tit3d$n,
188 | col = ifelse( tit3d$Survived == "Yes", "orange", "grey" ),
189 | alpha = 0.8,
190 | layer = tit3d$Survived == "No"
191 | )
192 | ```
193 |
194 | First layer is the one on top, second layer below the first and so on. Consequently, in the example above, `Survived == "No"` is ordered after `Survived == "Yes"` so the former is below the latter.
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 | ### Adjusting vertical order of categories
204 |
205 | By default `alluvial()` orders the values on each axis in an alphabetic order. This happens irrespectively of the ordering of observations in the plotted dataset. It is possible to override the default ordering by transforming the variables of interest into `factor`s with a custom ordering of levels.
206 |
207 | Consider the following example data:
208 |
209 | ```{r catorder_data}
210 | d <- data.frame(
211 | col = c("#A6CEE3", "#1F78B4", "#B2DF8A", "#33A02C", "#FB9A99",
212 | "#E31A1C", "#FDBF6F", "#FF7F00"), # from RColorBrewer Paired palette
213 | Temperature = rep(c("cool", "hot"), each=4),
214 | Luminance = rep(c("bright", "dark"), 4),
215 | Color = rep(c("blue", "green", "red", "orange"), each=2)
216 | ) %>%
217 | mutate(
218 | n = 1
219 | )
220 | d
221 | ```
222 |
223 | Plotting it with `alluvial()` with default settings will give:
224 |
225 | ```{r catorder_alluvial_defaults}
226 | alluvial(
227 | select(d, Temperature, Luminance, Color),
228 | freq=d$n,
229 | col = d$col,
230 | alpha=0.9
231 | )
232 | ```
233 |
234 | Let's change the order of categories of:
235 |
236 | - the `Color` axis such that it is (from the bottom): green, blue, orange, red.
237 | - the `Luminance` axis such that it is reversed.
238 |
239 | For each variable we create a factor with a vector of unique values of that variable sorted the way we want passed to `levels` argument of `factor()`:
240 |
241 | ```{r catorder_data_factor}
242 | d <- d %>%
243 | mutate(
244 | Color_f = factor(Color, levels=c("green", "blue", "red", "orange")),
245 | Luminance_f = factor(Luminance, levels=c("dark", "bright"))
246 | )
247 | ```
248 |
249 | ... and plot
250 |
251 | ```{r catorder_alluvial_factor}
252 | alluvial(
253 | select(d, Temperature, Luminance_f, Color_f),
254 | freq=d$n,
255 | col = d$col,
256 | alpha=0.9
257 | )
258 | ```
259 |
260 | Another version recognizing that in data `Color` is nested in `Temperature`. Different axis order gives clearer picture of the data structure.
261 |
262 | ```{r catorder_alluvial_factor2}
263 | alluvial(
264 | select(d, Temperature, Color_f, Luminance_f),
265 | freq=d$n,
266 | col = d$col,
267 | alpha=0.9
268 | )
269 | ```
270 |
271 |
272 |
273 | ### Adjusting vertical order of alluvia
274 |
275 | **This feature is experimental!**
276 |
277 | Usually the order of the variables (axes) is rather unimportant. However, having particular two variables next to each other facilitates analyzing dependency between those two variables. In alluvial diagrams the ordering of the variables determines the vertical plotting order of the alluvia. This vertical order, together with setting `blocks` to `FALSE`, can be used to turn category blocks into stacked barcharts.
278 |
279 | Consider two versions of subsets of the Titanic data that differ only in the order of variables.
280 |
281 | ```{r ordering_data}
282 | tit %>% group_by(Sex, Age, Survived) %>%
283 | summarise( n= sum(Freq)) -> x
284 |
285 | tit %>% group_by(Survived, Age, Sex) %>%
286 | summarise( n= sum(Freq)) -> y
287 | ```
288 |
289 | In `x` we have `r paste(names(x), collapse="-")` while in `y` we have
290 | `r paste(names(y), collapse="-")`.
291 |
292 | If we color the alluvia according to the first axis, the category blocks of Age and Survived become barcharts showing relative frequencies of Men and Women within categories of Age and Survived.
293 |
294 | ```{r ordering_x}
295 | alluvial(x[,1:3], freq=x$n,
296 | col = ifelse(x$Sex == "Male", "orange", "grey"),
297 | alpha = 0.8,
298 | blocks=FALSE
299 | )
300 | ```
301 |
302 | Now we can see for example that
303 |
304 | - There were a little bit of more girls than boys (category `Age == "Child"`)
305 | - Among surviors there were roughly the same number of Men and Women.
306 |
307 | Argument `ordering` can be used to fully customize the ordering of each alluvium on each axis without the need to reorder the axes themselves. This feature is experimental as you can easily break things. It expects a list of numeric vectors or `NULL`s one for each variable in the data:
308 |
309 | - Value `NULL` does not change the default order on the corresponding axis.
310 | - A numeric vector should have length equal to the number of rows in the data and is determines the vertical order of the alluvia on the corresponding axis.
311 |
312 | For example:
313 |
314 | ```{r ordering_y}
315 | alluvial(y[,1:3], freq=y$n,
316 | # col = RColorBrewer::brewer.pal(8, "Set1"),
317 | col = ifelse(y$Sex == "Male", "orange", "grey"),
318 | alpha = 0.8,
319 | blocks = FALSE,
320 | ordering = list(
321 | order(y$Survived, y$Sex == "Male"),
322 | order(y$Age, y$Sex == "Male"),
323 | NULL
324 | )
325 | )
326 | ```
327 |
328 | The list passed to `ordering` has has three elements corresponding to `Survived`, `Age`, and `Sex` respectively (that's the order of the variables in `y`). The elements of this list are
329 |
330 | 1. Call to `order` sorting the alluvia on the `Survived` axis. The alluvia need to be sorted according to `Survived` first (otherwise the categories "Yes" and "No" will be destroyed)
331 | and according to the `Sex` second.
332 | 2. Call to `order` sorting the alluvia on the `Age` axis. The alluvia need to be sorted according to `Age` first `Sex` second.
333 | 3. `NULL` leaves the default ordering on `Sex` axis.
334 |
335 | In the example below alluvia are colored by sex (red=Female, blue=Male) and survival status (bright=survived, dark=did not survive). Each category block is a stacked barchart showing relative freuquencies of man/women who did/did not survive. The alluvia are reordered on the last axis (Age) so that Sex categories are next each other (red together and blue together):
336 |
337 |
338 | ```{r}
339 | pal <- c("red4", "lightskyblue4", "red", "lightskyblue")
340 |
341 | tit %>%
342 | mutate(
343 | ss = paste(Survived, Sex),
344 | k = pal[ match(ss, sort(unique(ss))) ]
345 | ) -> tit
346 |
347 |
348 | alluvial(tit[,c(4,2,3)], freq=tit$Freq,
349 | hide = tit$Freq < 10,
350 | col = tit$k,
351 | border = tit$k,
352 | blocks=FALSE,
353 | ordering = list(
354 | NULL,
355 | NULL,
356 | order(tit$Age, tit$Sex )
357 |
358 | )
359 | )
360 | ```
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
371 | # Appendix
372 |
373 | ```{r session_info}
374 | sessionInfo()
375 | ```
376 |
377 |
--------------------------------------------------------------------------------