├── .gitattributes
├── GitAnimation.key
├── GitHubWorkflow.pdf
├── .gitignore
├── GitHubWorkflowPt2.pdf
├── images
    ├── ggplot2SO.png
    ├── messyvstidy1.png
    ├── messyvstidy2.png
    ├── messyvstidy3.png
    ├── messyvstidy4.png
    ├── messyvstidy5.png
    ├── messyvstidy6.png
    ├── messyvstidy7.png
    ├── messyvstidy8.png
    ├── messyvstidy9.png
    └── Inkedmessyvstidy8.jpg
├── BaseGraphicsCheatsheet.pdf
├── R
    ├── gather_files
    │   ├── figure-gfm
    │   │   └── unnamed-chunk-6-1.png
    │   └── figure-markdown_github
    │   │   └── unnamed-chunk-6-1.png
    ├── reorder_files
    │   ├── figure-gfm
    │   │   ├── unnamed-chunk-1-1.png
    │   │   ├── unnamed-chunk-1-2.png
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   ├── unnamed-chunk-13-1.png
    │   │   ├── unnamed-chunk-2-1.png
    │   │   ├── unnamed-chunk-2-2.png
    │   │   ├── unnamed-chunk-3-1.png
    │   │   ├── unnamed-chunk-4-1.png
    │   │   ├── unnamed-chunk-5-1.png
    │   │   ├── unnamed-chunk-6-1.png
    │   │   ├── unnamed-chunk-7-1.png
    │   │   ├── unnamed-chunk-8-1.png
    │   │   └── unnamed-chunk-9-1.png
    │   ├── figure-html
    │   │   ├── unnamed-chunk-2-1.png
    │   │   ├── unnamed-chunk-2-2.png
    │   │   ├── unnamed-chunk-3-1.png
    │   │   ├── unnamed-chunk-5-1.png
    │   │   ├── unnamed-chunk-6-1.png
    │   │   ├── unnamed-chunk-7-1.png
    │   │   ├── unnamed-chunk-8-1.png
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-12-1.png
    │   │   └── unnamed-chunk-13-1.png
    │   └── figure-markdown_github
    │   │   ├── unnamed-chunk-1-1.png
    │   │   ├── unnamed-chunk-10-1.png
    │   │   ├── unnamed-chunk-11-1.png
    │   │   ├── unnamed-chunk-2-1.png
    │   │   ├── unnamed-chunk-3-1.png
    │   │   ├── unnamed-chunk-4-1.png
    │   │   ├── unnamed-chunk-5-1.png
    │   │   ├── unnamed-chunk-6-1.png
    │   │   ├── unnamed-chunk-7-1.png
    │   │   ├── unnamed-chunk-8-1.png
    │   │   └── unnamed-chunk-9-1.png
    ├── pivot_longer_files
    │   └── figure-gfm
    │   │   ├── unnamed-chunk-5-1.png
    │   │   └── unnamed-chunk-6-1.png
    ├── mosaic_files
    │   └── figure-markdown_github
    │   │   ├── unnamed-chunk-2-1.png
    │   │   ├── unnamed-chunk-3-1.png
    │   │   ├── unnamed-chunk-4-1.png
    │   │   ├── unnamed-chunk-5-1.png
    │   │   ├── unnamed-chunk-6-1.png
    │   │   ├── unnamed-chunk-7-1.png
    │   │   ├── unnamed-chunk-8-1.png
    │   │   ├── unnamed-chunk-9-1.png
    │   │   ├── unnamed-chunk-10-1.png
    │   │   └── unnamed-chunk-11-1.png
    ├── topten.Rmd
    ├── basevsforcats.Rmd
    ├── mosaic.Rmd
    ├── mosaic.md
    ├── pivot_longer.Rmd
    ├── gather.Rmd
    ├── pivot_longer.md
    ├── reorder.Rmd
    ├── gather.md
    └── reorder.md
├── data
    ├── MusicIcecream.csv
    ├── Assets.csv
    └── countries2012.csv
├── codehelp.Rproj
├── README.md
└── gitworkflows.md


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.html linguist-detectable=false
2 | *.Rmd linguist-language=R
3 | 


--------------------------------------------------------------------------------
/GitAnimation.key:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/GitAnimation.key


--------------------------------------------------------------------------------
/GitHubWorkflow.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/GitHubWorkflow.pdf


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | *cache*
6 | */*.html
7 | 


--------------------------------------------------------------------------------
/GitHubWorkflowPt2.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/GitHubWorkflowPt2.pdf


--------------------------------------------------------------------------------
/images/ggplot2SO.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/images/ggplot2SO.png


--------------------------------------------------------------------------------
/images/messyvstidy1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/images/messyvstidy1.png


--------------------------------------------------------------------------------
/images/messyvstidy2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/images/messyvstidy2.png


--------------------------------------------------------------------------------
/images/messyvstidy3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/images/messyvstidy3.png


--------------------------------------------------------------------------------
/images/messyvstidy4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/images/messyvstidy4.png


--------------------------------------------------------------------------------
/images/messyvstidy5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/images/messyvstidy5.png


--------------------------------------------------------------------------------
/images/messyvstidy6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/images/messyvstidy6.png


--------------------------------------------------------------------------------
/images/messyvstidy7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/images/messyvstidy7.png


--------------------------------------------------------------------------------
/images/messyvstidy8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/images/messyvstidy8.png


--------------------------------------------------------------------------------
/images/messyvstidy9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/images/messyvstidy9.png


--------------------------------------------------------------------------------
/BaseGraphicsCheatsheet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/BaseGraphicsCheatsheet.pdf


--------------------------------------------------------------------------------
/images/Inkedmessyvstidy8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/images/Inkedmessyvstidy8.jpg


--------------------------------------------------------------------------------
/R/gather_files/figure-gfm/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/gather_files/figure-gfm/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-1-2.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-2-2.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-gfm/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-gfm/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-html/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-html/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-html/unnamed-chunk-2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-html/unnamed-chunk-2-2.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-html/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-html/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-html/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-html/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-html/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-html/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-html/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-html/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-html/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-html/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-html/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-html/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-html/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-html/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-html/unnamed-chunk-12-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-html/unnamed-chunk-12-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-html/unnamed-chunk-13-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-html/unnamed-chunk-13-1.png


--------------------------------------------------------------------------------
/R/pivot_longer_files/figure-gfm/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/pivot_longer_files/figure-gfm/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/R/pivot_longer_files/figure-gfm/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/pivot_longer_files/figure-gfm/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/R/gather_files/figure-markdown_github/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/gather_files/figure-markdown_github/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/R/mosaic_files/figure-markdown_github/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/mosaic_files/figure-markdown_github/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/R/mosaic_files/figure-markdown_github/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/mosaic_files/figure-markdown_github/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/R/mosaic_files/figure-markdown_github/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/mosaic_files/figure-markdown_github/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/R/mosaic_files/figure-markdown_github/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/mosaic_files/figure-markdown_github/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/R/mosaic_files/figure-markdown_github/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/mosaic_files/figure-markdown_github/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/R/mosaic_files/figure-markdown_github/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/mosaic_files/figure-markdown_github/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/R/mosaic_files/figure-markdown_github/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/mosaic_files/figure-markdown_github/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/R/mosaic_files/figure-markdown_github/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/mosaic_files/figure-markdown_github/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/R/mosaic_files/figure-markdown_github/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/mosaic_files/figure-markdown_github/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/R/mosaic_files/figure-markdown_github/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/mosaic_files/figure-markdown_github/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-markdown_github/unnamed-chunk-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-markdown_github/unnamed-chunk-1-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-markdown_github/unnamed-chunk-10-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-markdown_github/unnamed-chunk-10-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-markdown_github/unnamed-chunk-11-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-markdown_github/unnamed-chunk-11-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-markdown_github/unnamed-chunk-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-markdown_github/unnamed-chunk-2-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-markdown_github/unnamed-chunk-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-markdown_github/unnamed-chunk-3-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-markdown_github/unnamed-chunk-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-markdown_github/unnamed-chunk-4-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-markdown_github/unnamed-chunk-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-markdown_github/unnamed-chunk-5-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-markdown_github/unnamed-chunk-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-markdown_github/unnamed-chunk-6-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-markdown_github/unnamed-chunk-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-markdown_github/unnamed-chunk-7-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-markdown_github/unnamed-chunk-8-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-markdown_github/unnamed-chunk-8-1.png


--------------------------------------------------------------------------------
/R/reorder_files/figure-markdown_github/unnamed-chunk-9-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jtr13/codehelp/HEAD/R/reorder_files/figure-markdown_github/unnamed-chunk-9-1.png


--------------------------------------------------------------------------------
/data/MusicIcecream.csv:
--------------------------------------------------------------------------------
 1 | "Age","Favorite","Music","Freq"
 2 | "old","bubble gum","classical",1
 3 | "old","bubble gum","rock",1
 4 | "old","coffee","classical",3
 5 | "old","coffee","rock",1
 6 | "young","bubble gum","classical",2
 7 | "young","bubble gum","rock",5
 8 | "young","coffee","classical",1
 9 | "young","coffee","rock",0
10 | 


--------------------------------------------------------------------------------
/codehelp.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: Default
 4 | SaveWorkspace: Default
 5 | AlwaysSaveHistory: Default
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: knitr
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # codehelp
 2 | Code tutorials
 3 | 
 4 | [`tidyr::gather()` tutorial](R/gather.md)
 5 | 
 6 | [Reorder those bars tutorial](R/reorder.md)
 7 | 
 8 | [Drawing a mosaic plot with `vcd::mosaic()` tutorial](R/mosaic.md)
 9 | 
10 | [R Base Graphics Cheatsheet](R/BaseGraphicsCheatsheet.pdf)
11 | 
12 | [Git with Joyce, RLadies, 1/9/18, Part 1](GitHubWorkflow.pdf)
13 | 
14 | [Git with Joyce, RLadies, 1/9/18, Part 2](GitHubWorkflowPt2.pdf)
15 | 


--------------------------------------------------------------------------------
/data/Assets.csv:
--------------------------------------------------------------------------------
 1 | Name,Assets
 2 | DeVos,579783484
 3 | Ross,326224177
 4 | Cohn,252952172
 5 | Kushner,241037233
 6 | Tillerson,239488353
 7 | Cordish,197281047
 8 | Mnuchin,154137166
 9 | Liddell,75262171
10 | Lighthizer,18627275
11 | Bannon,11850014
12 | Chao,11269045
13 | Conway,11015017
14 | Perdue,11269045
15 | Rosen,9086316
16 | Coats,8770090
17 | Carson,8114059
18 | Price,8026265
19 | Mattis,3563022
20 | Spicer,3900059
21 | Mulvaney,3239057
22 | Sessions,2938056
23 | Eisenberg,2951034
24 | Wilson,2150150
25 | Zinke,1812011
26 | McGinley,1331036
27 | Perry,891027
28 | Hahn,1006009
29 | Priebus,604008
30 | Navarro,516013
31 | Acosta,431006
32 | Pruitt,210006
33 | Kelly,181005
34 | Bossert,140013
35 | Gorka,101010
36 | Pompeo,77021
37 | Haley,66003


--------------------------------------------------------------------------------
/R/topten.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "Top Ten Essential Tips for R Newbies"
 3 | author: "Joyce Robbins"
 4 | date: "2/15/2018"
 5 | output: github_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE, fig.height = 3.5,
10 |                       fig.width = 5, fig.align = 'center',
11 |                       warning = FALSE, message = FALSE,
12 |                       cache = TRUE)
13 | ```
14 | 
15 | 1. Tidyverse is not a package.
16 | 2. Chunk Output Inline vs. Chunk Output in Console (= Plots in Plot Window)
17 | 3. Size figures with fig.height and fig.width
18 | 4. Not all packages are on CRAN. Go-to today: devtools::install_github
19 | 5. ::
20 | 6. If data doesn't appear,use data()
21 | 7. Compiling to pdf / Word
22 | 8. Don't get fancy in rmarkdown
23 | 9. %>%
24 | 10. 
25 | 
26 | Bonus tip: Second monitor
27 | 


--------------------------------------------------------------------------------
/R/basevsforcats.Rmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: "basevsforcats"
 3 | author: "Joyce Robbins"
 4 | date: "2/8/2018"
 5 | output: github_document
 6 | ---
 7 | 
 8 | ```{r setup, include=FALSE}
 9 | knitr::opts_chunk$set(echo = TRUE)
10 | ```
11 | 
12 | ### `fct_inorder()`
13 | ```{r}
14 | library(forcats)
15 | mydf <- data.frame(Skill = c("Beginner", "Adv Beginner", "Intermediate", "Expert"),
16 |                Num = c(75, 60, 15, 25))
17 | 
18 | levels(fct_inorder(mydf$Skill)) == levels(factor(mydf$Skill, levels = mydf$Skill))
19 | ```
20 | 
21 | ### `fct_relevel()`
22 | 
23 | ```{r}
24 | MotherAge <-  c("15-19 years", "20-24 years", "25-29 years", 
25 |                 "30-34 years", "35-39 years", "40-44 years",
26 |                 "45-49 years", "50 years and over", "Under 15 years")
27 | 
28 | Num <- c(229.715, 850.509, 1152.311, 1094.693, 527.996, 111.848,
29 |          8.171, .754, 2.500)
30 | 
31 | Births2015 <- data.frame(MotherAge, Num)
32 | 
33 | levels(fct_relevel(Births2015$MotherAge, "Under 15 years")) == levels(relevel(Births2015$MotherAge, "Under 15 years"))
34 | ```
35 | 
36 | fct_inorder(x) = factor(x, levels = x)
37 | fct_relevel(x, "a") = relevel(x, "a") but fct_relevel has after= param and ability to handle more than one level
38 | fct_reorder(x, y) = reorder(x,y) (no diff)
39 | fct_infreq(x) = factor(x, levels = names(sort(summary(factor(x)), decreasing = TRUE)))
40 | 


--------------------------------------------------------------------------------
/gitworkflows.md:
--------------------------------------------------------------------------------
 1 | ## Git/GitHub Workflows for R Users (DRAFT)
 2 | 
 3 | Joyce Robbins
 4 | 12/7/18
 5 | 
 6 | Note: This is an attempt to outline beginner workflows in as succinct a manner as possible for reviewers who are familiar with Git/GitHub and can provide feedback / suggest improvements to the workflows themselves, rather than the how-to. As such, I have not provided step-by-step, tutorial style instructions. Eventually the ideas here will all be incorporated into a tutorial presentation, a draft of which is available here: [GitHubWorkflow.pdf](GitHubWorkflow.pdf).
 7 | 
 8 | Feedback welcome by issue, pull request, or email: jtr13@columbia.edu
 9 | 
10 | Thank you for your help!
11 | 
12 | Beginner workflows in increasing order of difficulty:
13 | 
14 | ### 1. Share work on GitHub (No git required)
15 | 
16 | Situation: You have files that you want to share.
17 | 
18 | Mantra: WORK, UPLOAD, REPEAT
19 | 
20 | Method:  
21 | 1. Create a repo on GitHub.  
22 | 2. Add files on GitHub via the "Add files via upload" button.
23 | 
24 | ### 2. Work on local master branch
25 | 
26 | Situation: You are the only contributor to your project. You need to be able to work locally and sync with GitHub.
27 | 
28 | Mantra: PULL, WORK, COMMIT, PUSH, REPEAT
29 | 
30 | Method:  
31 | 1. Create a repo on GitHub.  
32 | 2. Clone it while creating a new RStudio project.  
33 | 3. Begin with pulling, then work, commit, push.   Everything is done with RStudio buttons.  
34 | 
35 | ### 3. Work on local new branch (your project)
36 | 
37 | Situation: You are working on a project with other collaborators that resides on your GitHub repo. You have agreed that pull requests will not be merged by the author.
38 | 
39 | Mantra: PULL, BRANCH, WORK, COMMIT, PUSH, SUBMIT PULL REQUEST, DELETE BRANCH, REPEAT
40 | 
41 | Method:  
42 | 1. Same as above but work starts with a new branch (RStudio button)  
43 | 2. After pushing new work, a pull request is submitted on GitHub.  
44 | 3. Once the PR is merged, the remote branch is deleted on GitHub (button) and locally with `git branch -d <branch-name>`. Stop tracking deleted branch with `git fetch -p`.  
45 | 
46 | ### 4. Work on new local branch (someone else's project, you are a collaborator)  
47 | 
48 | *to be added*
49 | 
50 | ### 5. Work on new local branch (someone else's project, you are not a collaborator)  
51 | 
52 | *to be added*
53 | 
54 | 
55 | 
56 | 
57 | *Notes on fixing things*
58 | 
59 | Undo saved, uncommmitted changes:
60 | `git checkout -- <file-name>`
61 | (discards changes in working directory)
62 | 
63 | Undo saved, committed changes:
64 | 
65 | https://stackoverflow.com/questions/927358/how-do-i-undo-the-most-recent-commits-in-git/34547846
66 | 
67 | 
68 | 


--------------------------------------------------------------------------------
/R/mosaic.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Drawing a mosaic plot with `vcd::mosaic()`"
  3 | author: "Joyce Robbins"
  4 | date: "2/15/2018"
  5 | output: ../github_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE, fig.height = 3.5,
 10 |                       fig.width = 5, fig.align = 'center',
 11 |                       warning = FALSE, message = FALSE,
 12 |                       cache = TRUE)
 13 | ```
 14 | 
 15 | ## The Data
 16 | 
 17 | ```{r}
 18 | df <- read.csv("../data/MusicIcecream.csv")
 19 | df
 20 | ```
 21 | 
 22 | ### Order of splits
 23 | 
 24 | It is best to draw mosaic plots incrementally: start with splitting on one variable and then add additional variables one at a time.  The full mosaic plot will have one split per variable.
 25 | 
 26 | Important: if your data is in a data frame (see above), **the count column must be called `Freq`**. (Other data structure options are tables, matrices -- for 2 variables -- or objects of class `structable` -- see `>?vcd::structable`.)
 27 | 
 28 | Also note that all of these plots are drawn with **`vcd::mosaic()`** *not `mosaicplot()`*.
 29 | 
 30 | Split on `Age` only:
 31 | 
 32 | ```{r}
 33 | library(vcd)
 34 | mosaic(~Age, df)
 35 | ```
 36 | 
 37 | Split on `Age`, then `Music`:
 38 | 
 39 | ```{r}
 40 | mosaic(Music ~ Age, df)
 41 | ```
 42 | 
 43 | Note that the *first* split is between "young" and "old", while the second set of splits divides each age group into "classical" and "rock".
 44 | 
 45 | Split on `Age`, then `Music`, then `Favorite`:
 46 | 
 47 | ```{r}
 48 | mosaic(Favorite ~ Age + Music, df)
 49 | ```
 50 | 
 51 | ### Direction of splits
 52 | 
 53 | Note that in the previous example, the direction of the splits is as follows:
 54 | 
 55 | 1. `Age` -- horizontal split
 56 | 
 57 | 2. `Music` -- vertical split
 58 | 
 59 | 3. `Favorite` -- horizontal split
 60 | 
 61 | This is the default direction pattern: alternating directions beginning with horizontal. Therefore we get the same plot with the following:
 62 | 
 63 | ```{r}
 64 | mosaic(Favorite ~ Age + Music, 
 65 |        direction = c("h", "v", "h"), df)
 66 | ```
 67 | 
 68 | The directions can be altered as desired.  For example, to create a doubledecker plot, make all splits vertical except the last one:
 69 | 
 70 | ```{r}
 71 | mosaic(Favorite ~ Age + Music,
 72 |        direction = c("v", "v", "h"), df)
 73 | ```
 74 | 
 75 | Note that the direction vector is in order of splits (`Age`, `Music`, `Favorite`), not in the order in which the variables appear in the formula, where the last variable to be split is listed first, before the "~".
 76 | 
 77 | ### Options
 78 | 
 79 | #### Fill color:
 80 | 
 81 | ```{r}
 82 | library(grid) # needed for gpar
 83 | mosaic(Favorite ~ Age + Music, 
 84 |        gp = gpar(fill = c("pink", "tan3")),
 85 |        df)
 86 | ```
 87 | 
 88 | Note that the fill colors are applied to the dependent variable factor levels ("coffee" and "bubble gum" in this case.) 
 89 | 
 90 | #### Rotate labels:
 91 | 
 92 | ```{r}
 93 | mosaic(Favorite ~ Age + Music, 
 94 |        labeling = labeling_border(rot_labels = c(45, -45, 0, 0)),
 95 |        df)
 96 | ```
 97 | 
 98 | The `rot_labels = ` vector sets the rotation in degrees on the four sides of the plot in this order: *top, right, bottom, left*. (Different from the typical base graphics order!) The default is `rot_labels = c(0, 90, 0, 90)`.
 99 | 
100 | #### Abbreviate labels:
101 | 
102 | ```{r}
103 | mosaic(Favorite ~ Age + Music, 
104 |        labeling = labeling_border(abbreviate_labs = c(3, 1, 6)), 
105 |        df)
106 | ```
107 | 
108 | Labels are abbreviated in the order of the splits (as for `direction = `). The abbreviation algorithm appears to return the specified number of characters after vowels are eliminated (if necessary).
109 | 
110 | For more formatting options, see `>?vcd::labeling_border`.
111 | 
112 | #### Remove spacing between cells
113 | 
114 | ```{r}
115 | mosaic(Favorite ~ Age + Music,
116 |        spacing = spacing_equal(sp = unit(0, "lines")),
117 |        df)
118 | ```
119 | 
120 | For more details, see `>?vcd::spacings`
121 | 
122 | #### Change border color (must also set fill(?))
123 | 
124 | ```{r}
125 | mosaic(Favorite ~ Age + Music,
126 |        gp = gpar(fill = c("lightblue", "blue"),
127 |                  col = "white"),
128 |        spacing = spacing_equal(sp = unit(0, "lines")),
129 |        df)
130 | ```
131 | 
132 | Please feel free to suggest changes by submitting a pull request or ask questions / offer comments by opening an issue.
133 | 


--------------------------------------------------------------------------------
/R/mosaic.md:
--------------------------------------------------------------------------------
  1 | Drawing a mosaic plot with `vcd::mosaic()`
  2 | ================
  3 | Joyce Robbins
  4 | 2/15/2018
  5 | 
  6 | The Data
  7 | --------
  8 | 
  9 | ``` r
 10 | df <- read.csv("../data/MusicIcecream.csv")
 11 | df
 12 | ```
 13 | 
 14 |     ##     Age   Favorite     Music Freq
 15 |     ## 1   old bubble gum classical    1
 16 |     ## 2   old bubble gum      rock    1
 17 |     ## 3   old     coffee classical    3
 18 |     ## 4   old     coffee      rock    1
 19 |     ## 5 young bubble gum classical    2
 20 |     ## 6 young bubble gum      rock    5
 21 |     ## 7 young     coffee classical    1
 22 |     ## 8 young     coffee      rock    0
 23 | 
 24 | ### Order of splits
 25 | 
 26 | It is best to draw mosaic plots incrementally: start with splitting on one variable and then add additional variables one at a time. The full mosaic plot will have one split per variable.
 27 | 
 28 | Important: if your data is in a data frame (see above), **the count column must be called `Freq`**. (Other data structure options are tables, matrices -- for 2 variables -- or objects of class `structable` -- see `>?vcd::structable`.)
 29 | 
 30 | Also note that all of these plots are drawn with **`vcd::mosaic()`** *not `mosaicplot()`*.
 31 | 
 32 | Split on `Age` only:
 33 | 
 34 | ``` r
 35 | library(vcd)
 36 | mosaic(~Age, df)
 37 | ```
 38 | 
 39 | <img src="mosaic_files/figure-markdown_github/unnamed-chunk-2-1.png" style="display: block; margin: auto;" />
 40 | 
 41 | Split on `Age`, then `Music`:
 42 | 
 43 | ``` r
 44 | mosaic(Music ~ Age, df)
 45 | ```
 46 | 
 47 | <img src="mosaic_files/figure-markdown_github/unnamed-chunk-3-1.png" style="display: block; margin: auto;" />
 48 | 
 49 | Note that the *first* split is between "young" and "old", while the second set of splits divides each age group into "classical" and "rock".
 50 | 
 51 | Split on `Age`, then `Music`, then `Favorite`:
 52 | 
 53 | ``` r
 54 | mosaic(Favorite ~ Age + Music, df)
 55 | ```
 56 | 
 57 | <img src="mosaic_files/figure-markdown_github/unnamed-chunk-4-1.png" style="display: block; margin: auto;" />
 58 | 
 59 | ### Direction of splits
 60 | 
 61 | Note that in the previous example, the direction of the splits is as follows:
 62 | 
 63 | 1.  `Age` -- horizontal split
 64 | 
 65 | 2.  `Music` -- vertical split
 66 | 
 67 | 3.  `Favorite` -- horizontal split
 68 | 
 69 | This is the default direction pattern: alternating directions beginning with horizontal. Therefore we get the same plot with the following:
 70 | 
 71 | ``` r
 72 | mosaic(Favorite ~ Age + Music, 
 73 |        direction = c("h", "v", "h"), df)
 74 | ```
 75 | 
 76 | <img src="mosaic_files/figure-markdown_github/unnamed-chunk-5-1.png" style="display: block; margin: auto;" />
 77 | 
 78 | The directions can be altered as desired. For example, to create a doubledecker plot, make all splits vertical except the last one:
 79 | 
 80 | ``` r
 81 | mosaic(Favorite ~ Age + Music,
 82 |        direction = c("v", "v", "h"), df)
 83 | ```
 84 | 
 85 | <img src="mosaic_files/figure-markdown_github/unnamed-chunk-6-1.png" style="display: block; margin: auto;" />
 86 | 
 87 | Note that the direction vector is in order of splits (`Age`, `Music`, `Favorite`), not in the order in which the variables appear in the formula, where the last variable to be split is listed first, before the "~".
 88 | 
 89 | ### Options
 90 | 
 91 | #### Fill color:
 92 | 
 93 | ``` r
 94 | library(grid) # needed for gpar
 95 | mosaic(Favorite ~ Age + Music, 
 96 |        gp = gpar(fill = c("lightblue", "blue")),
 97 |        df)
 98 | ```
 99 | 
100 | <img src="mosaic_files/figure-markdown_github/unnamed-chunk-7-1.png" style="display: block; margin: auto;" />
101 | 
102 | #### Rotate labels:
103 | 
104 | ``` r
105 | mosaic(Favorite ~ Age + Music, 
106 |        labeling = labeling_border(rot_labels = c(45, -45, 0, 0)),
107 |        df)
108 | ```
109 | 
110 | <img src="mosaic_files/figure-markdown_github/unnamed-chunk-8-1.png" style="display: block; margin: auto;" />
111 | 
112 | The `rot_labels =` vector sets the rotation in degrees on the four sides of the plot in this order: *top, right, bottom, left*. (Different from the typical base graphics order!) The default is `rot_labels = c(0, 90, 0, 90)`.
113 | 
114 | #### Abbreviate labels:
115 | 
116 | ``` r
117 | mosaic(Favorite ~ Age + Music, 
118 |        labeling = labeling_border(abbreviate_labs = c(3, 1, 6)), 
119 |        df)
120 | ```
121 | 
122 | <img src="mosaic_files/figure-markdown_github/unnamed-chunk-9-1.png" style="display: block; margin: auto;" />
123 | 
124 | Labels are abbreviated in the order of the splits (as for `direction =`). The abbreviation algorithm appears to return the specified number of characters after vowels are eliminated (if necessary).
125 | 
126 | For more formatting options, see `>?vcd::labeling_border`.
127 | 
128 | #### Remove spacing between cells
129 | 
130 | ``` r
131 | mosaic(Favorite ~ Age + Music,
132 |        spacing = spacing_equal(sp = unit(0, "lines")),
133 |        df)
134 | ```
135 | 
136 | <img src="mosaic_files/figure-markdown_github/unnamed-chunk-10-1.png" style="display: block; margin: auto;" />
137 | 
138 | For more details, see `>?vcd::spacings`
139 | 
140 | #### Change border color (must also set fill(?))
141 | 
142 | ``` r
143 | mosaic(Favorite ~ Age + Music,
144 |        gp = gpar(fill = c("lightblue", "blue"),
145 |                  col = "white"),
146 |        spacing = spacing_equal(sp = unit(0, "lines")),
147 |        df)
148 | ```
149 | 
150 | <img src="mosaic_files/figure-markdown_github/unnamed-chunk-11-1.png" style="display: block; margin: auto;" />
151 | 
152 | Please feel free to suggest changes by submitting a pull request or ask questions / offer comments by opening an issue.
153 | 


--------------------------------------------------------------------------------
/R/pivot_longer.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using `tidyr::pivot_longer()` to tidy data"
  3 | author: "Joyce Robbins"
  4 | output: 
  5 |   github_document:
  6 |     toc: true
  7 | ---
  8 | 
  9 | ```{r setup, include=FALSE}
 10 | knitr::opts_chunk$set(echo = FALSE, warning = FALSE,
 11 |                       message = FALSE)
 12 | ```
 13 | 
 14 | The [`tidyr::pivot_longer()`]() function offers substantial improvements over its predecessor `tidyr::gather()`. This tutorial updates a previous one I wrote on using [`gather()`](https://github.com/jtr13/codehelp/blob/master/R/gather.md). Most of the difficulties I observed with `gather()` have been eliminated in `pivot_longer()`, so this tutorial will be shorter than the one it replaces.
 15 | 
 16 | I also recommend reading the official [`pivot_longer()` vignette](https://tidyr.tidyverse.org/articles/pivot.html){target="_blank}.
 17 | 
 18 | `tidyr::pivot_longer()` is used to convert data from messy to tidy, or wide to long format (shhh, you didn't hear me say those words.) ["Tidy Data"](https://www.jstatsoft.org/article/view/v059i10) by Hadley Wickham, is the seminal paper on the topic, and lays out the concepts of messy vs. tidy in detail.
 19 | 
 20 | ## Example 1: City vs. Highway
 21 | 
 22 | ### Observe
 23 | 
 24 | Before getting into the nitty-gritty of `pivot_longer()` let's study what our messy data looks like, and what it will look like when it's tidied:
 25 | 
 26 | ```{r}
 27 | library(tidyverse)
 28 | messydata <- tibble(id = c("car1", "car2", "car3"),
 29 |                     city = c(19, 20, 29),
 30 |                     hwy = c(24, 30, 35))
 31 | ```
 32 | 
 33 | 
 34 | ```{r}
 35 | tidydata <- messydata %>% gather(key = "roadtype", 
 36 |                                  value = "mpg", -id)
 37 | ```
 38 | 
 39 | ![](../images/messyvstidy1.png)
 40 | 
 41 | Study the differences carefully.  What did you observe?
 42 | <br>
 43 | <br>
 44 | <br>
 45 | Hopefully you noticed the following:
 46 | 
 47 | 1. The `id` column appears in both versions, though it's double the length in the tidy form.
 48 | 
 49 | 2. We have two new columns: `roadtype` and `mpg`.
 50 | 
 51 | 3. The `city` and `hwy` column names became the contents of the new `roadtype` (*key*) column.
 52 | 
 53 | 4. The *values* of the `city` and `hwy` columns became the *values* of the new `mpg` (*value*) column.
 54 | 
 55 | 
 56 | 
 57 | ![](../images/messyvstidy2.png)
 58 | 
 59 | 
 60 | ### Plan
 61 | 
 62 | Now here's the trick: the only parts we need to concern ourselves with are 1. and 2.: deciding which columns to keep unchanged, and what names to give the new columns, which are called the *name* and *value* columns.
 63 | 
 64 | The columns to keep are the ones that are already tidy. The ones to dump are the ones that aren't true variables but in fact are *levels* of another variable.  In this case, `city` and `hwy` should be levels of a new variable called `roadtype` (or something similar), according to the theory of tidy data. On the other hand, `id` should remain as is.
 65 | 
 66 | The bottom line: **keep `id` as is**
 67 | 
 68 | ![](../images/messyvstidy8.png)
 69 | 
 70 | The second (and final) step is to choose names for the new *name* and *value* columns.  We've already chosen `roadtype` for *name*; let's pick `mpg` for *value*.  You can also choose to stick with the defaults for the new *name* and *value* column names: `name` and `value` (surprise).  Using `key` and `value` may be helpful at first to keep track of which is which, though ultimately I find more descriptive names to be more useful.
 71 | 
 72 | The bottom line: **names_to = `roadtype`, values_to = `mpg`**
 73 | 
 74 | ![](../images/messyvstidy9.png)
 75 | 
 76 | 
 77 | ### Code
 78 | 
 79 | The code to carry out the transformation from messy to tidy is one call to `pivot_longer()`, with parameters that reflect our conclusions about the columns to keep and the names we've chosen:
 80 | 
 81 | ```{r, echo = TRUE}
 82 | tidydata <- messydata %>% 
 83 |   pivot_longer(cols = !id, names_to = "roadtype",  values_to = "mpg")
 84 | ```
 85 | 
 86 | The most important thing to note is that *each parameter corresponds to one of the columns in the tidy data set.*
 87 | 
 88 | Now let's look at them one at a time in more detail:
 89 | 
 90 | `!id` says "Don't touch my `id` column! Leave it as is!"
 91 | 
 92 | `names_to = "roadtype"` is an instruction to *create a new name column and call it "roadtype".*
 93 | 
 94 | `values_to = "mpg"` likewise is an instruction to *create a new value column and call it "mpg".*
 95 | 
 96 | Nothing else needs to be specified: every value in a column not marked "don't touch" will be moved to the *value* ("mpg") column, paired with its old column name (in this case "city" or "hwy") from the messy data set, which now appears in the *name* column.
 97 | 
 98 | Another option is to specify the columns to pivot, rather the columns to keep:
 99 | 
100 | ```{r}
101 | messydata %>% 
102 |   pivot_longer(cols = c("city", "hwy"), names_to = "roadtype",  values_to = "mpg")
103 | ```
104 | 
105 | ### Common problem: missing *id* column
106 | 
107 | Note in our example that the `id` column is important for linking the city and highway miles per gallon.  For example, we might want to plot the data as follows, which requires knowing which car had which city and highway mpg:
108 | 
109 | ```{r, fig.height = 3, fig.width = 6}
110 | ggplot(tidydata, aes(mpg, id, color = roadtype)) + geom_point(size = 2) + theme_bw(18)
111 | ```
112 | 
113 | Often, however, there is no `id` column: it's not necessary since each row represents one item -- a car in this case. If we try to tidy messy data without an id, it looks like this:
114 | 
115 | ```{r}
116 | messydata %>% 
117 |   select("city", "hwy") %>% 
118 |   pivot_longer(cols = c("city", "hwy"), names_to = "roadtype", values_to = "mpg")
119 | ```
120 | 
121 | The problem is that we've lost the natural connection we had between city and highway for each car since the two values no longer reside in the same row.  The solution here is to move the row names -- or row numbers in this case -- to a column to be used as an id:
122 | 
123 | ```{r, echo = TRUE}
124 | messydata %>% 
125 |   select("city", "hwy") %>% 
126 |   rownames_to_column("id") %>% 
127 |   pivot_longer(!id, names_to = "roadtype", values_to = "mpg")
128 | ```
129 | 
130 | ## Example 2: `MASS::painters` dataset
131 | 
132 | Let's take the `painters` dataset from the **MASS** package:
133 | ```{r}
134 | library(MASS)
135 | head(painters)
136 | ```
137 | 
138 | ...and tidy it into four columns as such:
139 | 
140 | ```{r}
141 | library(MASS)
142 | tidypaint <- painters %>% 
143 |   rownames_to_column("Name") %>% 
144 |   pivot_longer(cols = !c("Name", "School"), names_to = "Skill", values_to = "Score")
145 | head(tidypaint)
146 | ```
147 | 
148 | This example is a little more complex than the previous one, since it's missing an *id* column, and there is more than one "keep" column. (You are encouraged to try this on your own before looking at the solution!)
149 | 
150 | ### Observe
151 | 
152 | ![](../images/messyvstidy6.png)
153 | 
154 | 
155 | 
156 | ### Plan
157 | 
158 | ![](../images/messyvstidy7.png)
159 | 
160 |  - Move rownames to a new column and call it `Name` (see "missing `id` column" in the **Heads up** section above to understand the need for this new column.)
161 | 
162 |  - Keep `Name` and `School` columns as is. ("Don't Touch!") 
163 | 
164 |  - Since the four columns `Composition`, `Drawing`, `Colour`, and `Expression` are really levels of another variable, they do not get the "keep" as is designation.  That means we are staging them to be dumped into a new *key* variable which we'll call `Skill`. In addition, the values contained in these columns will move to a new *value* column, which we'll call `Score`.
165 | 
166 | ### Code
167 | 
168 | ```{r, echo = TRUE}
169 | library(MASS)
170 | library(tidyverse)
171 | tidypaint <- painters %>% 
172 |   rownames_to_column("Name") %>% 
173 |   pivot_longer(cols = !c("Name", "School"), names_to = "Skill", values_to = "Score")
174 | ```
175 | 
176 | The instructions:
177 | 
178 | "Take the (messy) dataset `painters`. Move the contents of the rownames to a new column called `Name`. Tidy the data by a new *name* column called `Skill` and a new *value* column called `Score`. Do not touch or change the `Name` and `School` columns (other than to replicate as necessary)."
179 | 
180 | The result:
181 | 
182 | The four other column names will fill the new `Skill` column, and the values of those columns will fill the new `Score` column."
183 | 
184 | Thank you to [\@angelotrivelli](https://twitter.com/angelotrivelli) [\@dch4n](https://twitter.com/@dch4n) [\@jschoeley](https://twitter.com/jschoeley) [\@jspncr_](https://twitter.com/jspncr_) [\@kierisi](https://twitter.com/kierisi) [\@s_lump](https://twitter.com/s_lump) for providing feedback and helpful suggestions in response to [this Twitter post](https://twitter.com/jtrnyc/status/958845845385940993) about my [`gather()` tutorial](https://github.com/jtr13/codehelp/blob/master/R/gather.md) upon which this tutorial is based.
185 | 


--------------------------------------------------------------------------------
/data/countries2012.csv:
--------------------------------------------------------------------------------
1 | COUNTRY,CONTINENT,GDP,TFR,LIFEEXP,CHMORTAfghanistan,Asia,690.842629,5.272,59.67960976,99.5Albania,Europe,4247.485437,1.76,77.35046341,15.5Algeria,Africa,5583.61616,2.909,74.32409756,26.1Angola,Africa,5531.776299,6.251,51.464,172.2Antigua and Barbuda,North America,13525.61622,2.102,75.62180488,9.1Argentina,South America,14357.41159,2.347,75.8162439,13.8Armenia,Europe,3565.517575,1.581,74.45209756,16.3Australia,Oceania,67646.10385,1.921,82.04634146,4.3Austria,Europe,48324.25404,1.44,80.93658537,4Azerbaijan,Europe,7393.771877,2,70.62495122,35.7"Bahamas, The",North America,22112.60835,1.893,74.91446341,13.2Bahrain,Asia,23063.13229,2.095,76.40763415,7.3Bangladesh,Asia,858.9333626,2.245,70.86026829,44Barbados,North America,15317.139,1.788,75.17102439,14.1Belarus,Europe,6721.834908,1.62,71.96585366,5.3Belgium,Europe,44731.21948,1.79,80.38536585,4.3Belize,North America,4674.293377,2.643,69.90514634,18Benin,Africa,807.688451,4.927,59.12197561,107Bhutan,Asia,2452.151588,2.152,68.72290244,37.8Bolivia,South America,2645.290274,3.073,67.44546341,43.1Bosnia and Herzegovina,Europe,4415.923592,1.28,76.12017073,6.5Botswana,Africa,6935.593653,2.881,64.22273171,49.5Brazil,South America,11922.51306,1.812,73.83958537,16Brunei Darussalam,Asia,41807.65334,1.913,78.25258537,9.7Bulgaria,Europe,7333.355073,1.5,74.31463415,12Burkina Faso,Africa,673.0267834,5.693,57.87931707,101.4Burundi,Africa,244.1964862,6.123,55.78929268,91.2Cambodia,Asia,946.4766787,2.739,67.32887805,35.8Cameroon,Africa,1222.192142,4.859,54.5875122,97.4Canada,North America,52733.47369,1.61,81.23804878,5.3Cape Verde,Africa,3497.691141,2.376,72.82821951,26.6Central African Republic,Africa,469.6842871,4.451,49.10529268,142.1Chad,Africa,972.6793451,6.374,50.78139024,151.6Chile,South America,15253.33083,1.789,80.89485366,8.6China,Asia,6264.643878,1.663,75.1995122,13.4Colombia,South America,7885.061292,1.948,73.63078049,17.4Comoros,Africa,750.3146086,4.628,62.58297561,80.9"Congo, Dem. Rep.",Africa,390.7066035,6.199,57.85407317,108.5"Congo, Rep.",Africa,3191.164299,4.961,60.92390244,52.6Costa Rica,North America,9733.396931,1.866,79.05353659,10.1Cote d'Ivoire,Africa,1281.382865,5.121,50.86334146,102.5Croatia,Europe,13235.97757,1.51,76.92439024,4.9Cuba,North America,6448.155635,1.628,79.14160976,5.8Cyprus,Europe,28868.27382,1.464,79.76226829,3.2Czech Republic,Europe,19640.92866,1.45,78.07560976,3.8Denmark,Europe,57636.12531,1.73,80.05121951,3.8Djibouti,Africa,1586.780133,3.332,61.29597561,71.7Dominican Republic,North America,5967.000984,2.539,73.13531707,32.7Ecuador,South America,5702.168288,2.599,75.433,23.7"Egypt, Arab Rep.",Africa,3068.193883,3.306,70.72914634,26.8El Salvador,North America,3921.720395,1.991,72.23185366,18.6Equatorial Guinea,Africa,23278.23006,5.011,56.89026829,103.7Estonia,Europe,17490.99313,1.56,76.32682927,3.8Ethiopia,Africa,469.7923039,4.642,62.79353659,67.7Fiji,Oceania,4550.267095,2.615,69.74321951,23.5Finland,Europe,47415.55987,1.8,80.62682927,2.7France,Europe,40850.35237,2.01,81.96829268,4.3Gabon,Africa,10642.43225,4.01,63.28073171,57.7"Gambia, The",Africa,504.9890138,5.775,59.77814634,76.1Georgia,Europe,3528.731511,1.82,73.94487805,14.4Germany,Europe,44010.93139,1.38,80.89268293,4Ghana,Africa,1641.825922,4.238,60.97702439,69.2Greece,Europe,22146.91592,1.34,80.63414634,4.7Grenada,North America,7583.546304,2.194,73.00226829,12.7Guatemala,North America,3278.629083,3.317,71.24939024,32.1Guinea,Africa,487.3457142,5.175,57.63763415,104Guinea-Bissau,Africa,559.224752,4.972,54.5035122,105.2Guyana,South America,3759.377095,2.615,66.21726829,41.2Haiti,North America,766.8722334,3.169,62.03339024,75.2Honduras,North America,2395.073442,2.514,72.75502439,22.6Hungary,Europe,12819.71206,1.34,75.06341463,6.3Iceland,Europe,44258.84279,2.04,82.91707317,2.2India,Asia,1449.664875,2.51,67.28987805,54.5Indonesia,Asia,3700.523538,2.5,68.51956098,30.4"Iran, Islamic Rep.",Asia,7710.513314,1.742,74.79934146,17.5Iraq,Asia,6650.228867,4.086,69.24192683,34.8Ireland,Europe,48976.92975,2.01,80.89512195,4Israel,Asia,32818.85838,3.05,81.70487805,4.3Italy,Europe,34844.49809,1.43,82.23902439,3.8Jamaica,North America,5445.894718,2.284,73.2824878,17.2Japan,Asia,46679.26543,1.41,83.09609756,3Jordan,Asia,4896.688447,3.314,73.74739024,19.6Kazakhstan,Asia,12120.30534,2.62,69.61,17.9Kenya,Africa,1184.923256,4.481,60.27278049,55.6Kiribati,Oceania,1641.197419,3.796,65.59607317,60.4"Korea, Rep.",Asia,24453.97191,1.297,81.21341463,3.8Kuwait,Asia,50903.9046,2.626,74.359,9.9Kyrgyz Republic,Asia,1177.974735,3.2,70.00243902,25.7Lao PDR,Asia,1445.86945,3.138,65.2485122,74Latvia,Europe,13775.26158,1.44,73.77804878,8.7Lebanon,Asia,9729.282193,1.498,79.84636585,9.3Lesotho,Africa,1158.804222,3.253,48.836,94.1Liberia,Africa,414.1851554,4.868,60.20436585,80.3Libya,Africa,13035.1922,2.543,71.64956098,15.1Lithuania,Europe,14342.52348,1.6,73.86341463,5.6Luxembourg,Europe,105447.0932,1.57,81.39268293,2.1"Macedonia, FYR",Europe,4709.511628,1.497,75.03126829,7.4Madagascar,Africa,444.9584938,4.528,64.24665854,55.5Malawi,Africa,270.0875313,5.318,60.05029268,77.3Malaysia,Asia,10834.65908,1.968,74.42331707,7.7Maldives,Asia,6529.978071,2.175,76.46234146,10.7Mali,Africa,641.7938201,6.396,57.095,127Malta,Europe,21176.30998,1.43,80.74634146,6.7Mauritania,Africa,1282.785101,4.721,62.56017073,92.7Mauritius,Africa,9113.640643,1.54,73.86341463,14.6Mexico,North America,9703.371017,2.3,76.35409756,15.3"Micronesia, Fed. Sts.",Oceania,3147.679202,3.347,68.85070732,38.2Moldova,Europe,2046.536787,1.462,68.69341463,16.7Mongolia,Asia,4377.23887,2.641,68.6135122,26.2Montenegro,Europe,6586.721279,1.676,74.64987805,5.7Morocco,Africa,2931.4002,2.545,73.36465854,30.7Mozambique,Africa,564.8124631,5.472,54.21212195,90.9Myanmar,Asia,1421.497351,2.28,65.42778049,55.3Namibia,Africa,5679.958215,3.586,63.88114634,49.9Nepal,Asia,685.4967586,2.381,68.82331707,40.9Netherlands,Europe,49474.70561,1.72,81.10487805,4.2Nicaragua,North America,1779.867088,2.34,74.21246341,24.4Niger,Africa,393.643423,7.642,60.07253659,109.6Nigeria,Africa,2739.852189,5.758,52.105,120.9Norway,Europe,101563.7027,1.85,81.45121951,3Oman,Asia,21533.8076,2.857,76.58956098,11.6Pakistan,Asia,1266.380758,3.744,65.71687805,87.8Panama,North America,10138.52113,2.484,77.23704878,18.7Papua New Guinea,Oceania,2151.210277,3.869,62.29990244,62.5Paraguay,South America,3858.036492,2.625,72.654,22.6Peru,South America,6388.845098,2.504,74.05765854,19.1Philippines,Asia,2604.655997,3.048,68.00707317,30.4Poland,Europe,13142.04599,1.3,76.79756098,5.3Portugal,Europe,20577.40264,1.28,80.37317073,3.8Qatar,Asia,94407.40692,2.059,78.22765854,8.6Romania,Europe,8577.289214,1.53,74.46341463,12.5Russian Federation,Asia,14078.83057,1.7,70.36585366,10.8Rwanda,Africa,667.4145823,4.143,62.79936585,52.1Samoa,Oceania,4257.060935,4.212,72.98004878,18.5Sao Tome and Principe,Africa,1488.048003,4.689,66.13390244,52.4Saudi Arabia,Asia,24883.18971,2.873,74.01602439,16Senegal,Africa,1019.27223,5.161,65.31887805,55.9Serbia,Europe,5659.380204,1.45,74.83658537,7.1Seychelles,Africa,12844.85853,2.4,74.22682927,14.2Sierra Leone,Africa,618.9472529,4.874,49.74909756,141.6Singapore,Asia,54577.13737,1.29,81.99512195,2.8Slovak Republic,Europe,17207.27921,1.34,76.1097561,7.9Slovenia,Europe,22477.59756,1.58,80.12439024,3Solomon Islands,Oceania,1866.707246,4.098,67.50658537,30.7South Africa,Africa,7592.157997,2.412,56.09831707,47.7South Sudan,Africa,944.2828116,5.197,54.727,102.8Spain,Europe,28647.83524,1.32,82.42682927,4.4Sri Lanka,Asia,3366.51036,2.346,74.06804878,10.4St. Lucia,North America,7248.234505,1.936,74.77763415,15.4St. Vincent and the Grenadines,North America,6337.770112,2.021,72.67185366,19.8Sudan,Africa,1662.287641,4.491,62.83219512,76.3Suriname,South America,9422.270994,2.411,70.80631707,23.4Swaziland,Africa,3988.667162,3.407,48.85063415,72.6Sweden,Europe,57134.07707,1.91,81.70487805,3Switzerland,Europe,83208.68654,1.52,82.69756098,4.3Tajikistan,Asia,962.4391249,3.525,69.16697561,49.3Tanzania,Africa,827.5288808,5.287,63.52090244,55.7Thailand,Asia,5917.917934,1.534,74.07190244,13.5Timor-Leste,Asia,1127.108215,5.3,67.02058537,58.7Togo,Africa,580.4950618,4.727,58.55385366,85.8Tonga,Oceania,4364.309244,3.815,72.488,17.5Trinidad and Tobago,North America,18322.3238,1.798,70.15139024,22.2Tunisia,Africa,4187.543531,2.2,73.99512195,15.8Turkey,Asia,10646.03553,2.06,74.86243902,16.5Turkmenistan,Asia,6797.721166,2.353,65.31158537,56.5Uganda,Africa,656.3980727,5.964,57.10019512,64.1Ukraine,Europe,3855.42128,1.531,70.94414634,10.6United Arab Emirates,Asia,41712.12421,1.82,77.02414634,7.8United Kingdom,Europe,41294.5148,1.92,80.90487805,4.8United States,North America,51456.65873,1.8805,78.74146341,7.1Uruguay,South America,15127.64415,2.048,76.68839024,11.6Uzbekistan,Asia,1719.036196,2.3,68.104,43.2Vanuatu,Oceania,3158.420974,3.419,71.40817073,28.6"Venezuela, RB",South America,12771.59504,2.417,73.92573171,16.1Vietnam,Asia,1755.265424,1.768,75.60668293,23.5West Bank and Gaza,Asia,2782.905026,4.076,73.01787805,22.8"Yemen, Rep.",Asia,1289.034078,4.416,63.32729268,48.4Zambia,Africa,1686.618024,5.511,58.36331707,74.4Zimbabwe,Africa,850.827694,4.016,53.64307317,78.5


--------------------------------------------------------------------------------
/R/gather.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using `tidyr::gather()` to tidy data"
  3 | author: "Joyce Robbins"
  4 | date: "originally published: 1/31/2018"
  5 | output: 
  6 |   github_document:
  7 |     toc: true
  8 | ---
  9 | 
 10 | ```{r setup, include=FALSE}
 11 | knitr::opts_chunk$set(echo = FALSE, warning = FALSE,
 12 |                       message = FALSE)
 13 | ```
 14 | 
 15 | **`tidyr::gather()` has been superceded by `tidyr::pivot_longer()`. Please see [this tutorial on `pivot_longer()`](https://github.com/jtr13/codehelp/blob/master/R/pivot_longer.md) instead.**
 16 | 
 17 | This tutorial focuses on one function: `tidyr::gather()`, which is used to convert data from messy to tidy.  I won't get into the distinction here -- I'm going to assume that you know the terms and just need help figuring how to get from A to B. If, however, this is all new to you, I suggest reading ["Tidy Data"](https://www.jstatsoft.org/article/view/v059i10) by Hadley Wickham, the seminal paper on the topic, which lays out the concept in detail.
 18 | 
 19 | ## Example 1: City vs. Highway
 20 | 
 21 | ### Observe
 22 | 
 23 | Before getting into the nitty-gritty of `gather()` let's study what our messy data looks like, and what it will look like when it's tidied:
 24 | 
 25 | ```{r, echo = FALSE}
 26 | library(tidyverse)
 27 | messydata <- tibble(id = c("car1", "car2", "car3"),
 28 |                     city = c(19, 20, 29),
 29 |                     hwy = c(24, 30, 35))
 30 | ```
 31 | 
 32 | 
 33 | ```{r}
 34 | tidydata <- messydata %>% 
 35 |   gather(key = "roadtype", 
 36 |          value = "mpg", -id)
 37 | ```
 38 | 
 39 | ![](../images/messyvstidy1.png)
 40 | 
 41 | Study the differences carefully.  What did you observe?
 42 | <br>
 43 | <br>
 44 | <br>
 45 | Hopefully you noticed the following:
 46 | 
 47 | 1. The `id` column appears in both versions, though it's double the length in the tidy form.
 48 | 
 49 | 2. We have two new columns: `roadtype` and `mpg`.
 50 | 
 51 | 3. The `city` and `hwy` column names became the contents of the new `roadtype` (*key*) column.
 52 | 
 53 | 4. The *values* of the `city` and `hwy` columns became the *values* of the new `mpg` (*value*) column.
 54 | 
 55 | 
 56 | 
 57 | ![](../images/messyvstidy2.png)
 58 | 
 59 | 
 60 | ### Plan
 61 | 
 62 | Now here's the trick: the only parts we need to concern ourselves with are 1. and 2.: deciding which columns to keep unchanged, and what names to give the new columns, which are called the *key* and *value* columns.
 63 | 
 64 | The columns to keep are the ones that are already tidy. The ones to dump are the ones that aren't true variables but in fact are *levels* of another variable.  In this case, `city` and `hwy` should be levels of a new variable called `roadtype` (or something similar), according to the theory of tidy data. On the other hand, `id` should remain as is.
 65 | 
 66 | The bottom line: **keep `id` as is**
 67 | 
 68 | ![](../images/messyvstidy3.png)
 69 | 
 70 | The second (and final) step is to choose names for the new *key* and *value* columns.  We've already chosen `roadtype` for *key*; let's pick `mpg` for *value*.  You can also choose to stick with the defaults for the new *key* and *value* column names: `key` and `value` (surprise).  Using `key` and `value` may be helpful at first to keep track of which is which, though ultimately I find more descriptive names to be more useful.
 71 | 
 72 | The bottom line: **key = `roadtype`, value = `mpg`**
 73 | 
 74 | ![](../images/messyvstidy4.png)
 75 | 
 76 | 
 77 | ### Code
 78 | 
 79 | The code to carry out the transformation from messy to tidy is one call to `gather()`, with parameters that reflect our conclusions about the columns to keep and the names we've chosen:
 80 | 
 81 | ```{r, echo = TRUE}
 82 | tidydata <- messydata %>% 
 83 |   gather(key = "roadtype",  value = "mpg", -id)
 84 |   
 85 | ```
 86 | 
 87 | The most important thing to note is that *each parameter corresponds to one of the columns in the tidy data set.*
 88 | 
 89 | Now let's look at them one at a time in more detail:
 90 | 
 91 | `key = "roadtype"` sounds like it's setting the *key* to a preexisting column called "roadtype", but that's not what's going on.  Rather, think of this as an instruction to *create a new key column and call it "roadtype".*
 92 | 
 93 | `value = "mpg"` likewise is an instruction to *create a new value column and call it "mpg".*
 94 | 
 95 | `-id` is the piece that says "Don't touch my `id` column! Leave it as is!"
 96 | 
 97 | Nothing else needs to be specified: every value in a column not marked "don't touch" will be moved to the *value* ("mpg") column, paired with its old column name (in this case "city" or "hwy") from the messy data set, which now appears in the *key* column.
 98 | 
 99 | ## Heads up
100 | 
101 | There are a few problems I've seen with the way in which people use `gather()`:
102 | 
103 | ### Not separating the messy data columns properly into "keep" and "dump".
104 | 
105 | If we had made this mistake in the example above, the result would be something like:
106 | 
107 | ```{r, echo = TRUE}
108 | messydata %>% 
109 |   gather(key = "roadtype", value = "mpg")
110 | ```
111 | 
112 | Yikes, not what we wanted.  Looking at the `mpg` column, we see that "car1", "car2", and "car3" don't belong.  The solution is to add the `-id` parameter -- remember, think: "don't touch `id`" -- so that it isn't "dumped" into the `key` column.
113 | 
114 | Note: it is possible to specify the columns to *dump* rather then the columns to *keep*:
115 | 
116 | ```{r, echo = TRUE}
117 | messydata %>% 
118 |   gather(key = "roadtype", value = "mpg", city, hwy)
119 | ```
120 | 
121 | I find this method less intuitive, but of course it's your choice.
122 | 
123 | ### Missing *id* column
124 | 
125 | Note in our example that the `id` column is important for linking the city and highway miles per gallon.  For example, we might want to plot the data as follows, which requires knowing which car had which city and highway mpg:
126 | 
127 | ```{r, fig.height = 3, fig.width = 6}
128 | ggplot(tidydata, aes(mpg, id, color = roadtype)) + 
129 |   geom_point(size = 2) + 
130 |   theme_bw(18)
131 | ```
132 | 
133 | Often, however, there is no `id` column: it's not necessary since each row represents one item -- a car in this case. If we try to tidy messy data without an id, it looks like this:
134 | 
135 | ```{r}
136 | messy2 <- messydata %>% 
137 |   dplyr::select("city", "hwy")
138 | tidy2 <- messy2 %>% 
139 |   gather(key = "roadtype", value = "mpg")
140 | ```
141 | 
142 | ![](../images/messyvstidy5.png)
143 | 
144 | The problem is that we've lost the natural connection we had between city and highway for each car since the two values no longer reside in the same row.  The solution here is to move the row names -- or row numbers in this case -- to a column to be used as an id:
145 | 
146 | ```{r, echo = TRUE}
147 | messy2 %>% rownames_to_column("id") %>% 
148 |   gather(key, value, -id)
149 | ```
150 | 
151 | Note that we must specify the "don't touch" id column that we just created.  Also note that we used the default `key` and `value` column names rather than choose our own.  
152 | 
153 | ## Example 2: `MASS::painters` dataset
154 | 
155 | Let's take the `painters` dataset from the **MASS** package:
156 | ```{r}
157 | library(MASS)
158 | head(painters)
159 | ```
160 | 
161 | ...and tidy it into four columns as such:
162 | 
163 | ```{r}
164 | library(MASS)
165 | tidypaint <- painters %>% rownames_to_column("Name") %>% 
166 |   gather(key = "Skill", value = "Score", -Name, -School)
167 | head(tidypaint)
168 | ```
169 | 
170 | This example is a little more complex than the previous one, since it's missing an *id* column, and there is more than one "keep" column. (You are encouraged to try this on your own before looking at the solution!)
171 | 
172 | ### Observe
173 | 
174 | ![](../images/messyvstidy6.png)
175 | 
176 | 
177 | 
178 | ### Plan
179 | 
180 | ![](../images/messyvstidy7.png)
181 | 
182 |  - Move rownames to a new column and call it `Name` (see "missing `id` column" in the **Heads up** section above to understand the need for this new column.)
183 | 
184 |  - Keep `Name` and `School` columns as is. ("Don't Touch!") 
185 | 
186 |  - Since the four columns `Composition`, `Drawing`, `Colour`, and `Expression` are really levels of another variable, they do not get the "keep" as is designation.  That means we are staging them to be dumped into a new *key* variable which we'll call `Skill`. In addition, the values contained in these columns will move to a new *value* column, which we'll call `Score`.
187 | 
188 | ### Code
189 | 
190 | ```{r, echo = TRUE}
191 | library(MASS)
192 | library(tidyverse)
193 | tidypaint <- painters %>% 
194 |   rownames_to_column("Name") %>% 
195 |   gather(key = "Skill", value = "Score", -Name, -School)
196 | ```
197 | 
198 | The breakdown:
199 | 
200 | ![](../images/Inkedmessyvstidy8.jpg)
201 | 
202 | Finally, if the visuals aren't your style, here's a running commentary of the instructions:
203 | 
204 | "Take the (messy) dataset `painters`. Move the contents of the rownames to a new column called `Name`. Now let's start tidying by gathering multiple columns into *key-value* pairs. Do this by creating a new *key* column called `Skill`. While you're at it, create a new *value* column called `Score`. Absolutely do not touch or change the `Name` and `School` columns (other than to replicate as necessary). It goes without saying that the four other column names will fill the new `Skill` column, and the values of those columns will fill the new `Score` column, since it's understood from what I've said already! Much appreciated!"
205 | 
206 | Thank you to [\@angelotrivelli](https://twitter.com/angelotrivelli) [\@dch4n](https://twitter.com/@dch4n) [\@jschoeley](https://twitter.com/jschoeley) [\@jspncr_](https://twitter.com/jspncr_) [\@kierisi](https://twitter.com/kierisi) [\@s_lump](https://twitter.com/s_lump) for providing feedback and helpful suggestions in response to [this Twitter post.](https://twitter.com/jtrnyc/status/958845845385940993)
207 | 


--------------------------------------------------------------------------------
/R/pivot_longer.md:
--------------------------------------------------------------------------------
  1 | Using `tidyr::pivot_longer()` to tidy data
  2 | ================
  3 | Joyce Robbins
  4 | 
  5 |   - [Example 1: City vs. Highway](#example-1-city-vs.-highway)
  6 |       - [Observe](#observe)
  7 |       - [Plan](#plan)
  8 |       - [Code](#code)
  9 |       - [Common problem: missing *id*
 10 |         column](#common-problem-missing-id-column)
 11 |   - [Example 2: `MASS::painters`
 12 |     dataset](#example-2-masspainters-dataset)
 13 |       - [Observe](#observe-1)
 14 |       - [Plan](#plan-1)
 15 |       - [Code](#code-1)
 16 | 
 17 | The [`tidyr::pivot_longer()`]() function offers substantial improvements
 18 | over its predecessor `tidyr::gather()`. This tutorial updates a previous
 19 | one I wrote on using
 20 | [`gather()`](https://github.com/jtr13/codehelp/blob/master/R/gather.md).
 21 | Most of the difficulties I observed with `gather()` have been eliminated
 22 | in `pivot_longer()`, so this tutorial will be shorter than the one it
 23 | replaces.
 24 | 
 25 | I also recommend reading the official [`pivot_longer()`
 26 | vignette](https://tidyr.tidyverse.org/articles/pivot.html).
 27 | 
 28 | `tidyr::pivot_longer()` is used to convert data from messy to tidy, or
 29 | wide to long format (shhh, you didn’t hear me say those words.) [“Tidy
 30 | Data”](https://www.jstatsoft.org/article/view/v059i10) by Hadley
 31 | Wickham, is the seminal paper on the topic, and lays out the concepts of
 32 | messy vs. tidy in detail.
 33 | 
 34 | ## Example 1: City vs. Highway
 35 | 
 36 | ### Observe
 37 | 
 38 | Before getting into the nitty-gritty of `pivot_longer()` let’s study
 39 | what our messy data looks like, and what it will look like when it’s
 40 | tidied:
 41 | 
 42 | ![](../images/messyvstidy1.png)
 43 | 
 44 | Study the differences carefully. What did you observe? <br> <br> <br>
 45 | Hopefully you noticed the following:
 46 | 
 47 | 1.  The `id` column appears in both versions, though it’s double the
 48 |     length in the tidy form.
 49 | 
 50 | 2.  We have two new columns: `roadtype` and `mpg`.
 51 | 
 52 | 3.  The `city` and `hwy` column names became the contents of the new
 53 |     `roadtype` (*key*) column.
 54 | 
 55 | 4.  The *values* of the `city` and `hwy` columns became the *values* of
 56 |     the new `mpg` (*value*) column.
 57 | 
 58 | ![](../images/messyvstidy2.png)
 59 | 
 60 | ### Plan
 61 | 
 62 | Now here’s the trick: the only parts we need to concern ourselves with
 63 | are 1. and 2.: deciding which columns to keep unchanged, and what names
 64 | to give the new columns, which are called the *name* and *value*
 65 | columns.
 66 | 
 67 | The columns to keep are the ones that are already tidy. The ones to dump
 68 | are the ones that aren’t true variables but in fact are *levels* of
 69 | another variable. In this case, `city` and `hwy` should be levels of a
 70 | new variable called `roadtype` (or something similar), according to the
 71 | theory of tidy data. On the other hand, `id` should remain as is.
 72 | 
 73 | The bottom line: **keep `id` as is**
 74 | 
 75 | ![](../images/messyvstidy8.png)
 76 | 
 77 | The second (and final) step is to choose names for the new *name* and
 78 | *value* columns. We’ve already chosen `roadtype` for *name*; let’s pick
 79 | `mpg` for *value*. You can also choose to stick with the defaults for
 80 | the new *name* and *value* column names: `name` and `value` (surprise).
 81 | Using `key` and `value` may be helpful at first to keep track of which
 82 | is which, though ultimately I find more descriptive names to be more
 83 | useful.
 84 | 
 85 | The bottom line: **names\_to = `roadtype`, values\_to = `mpg`**
 86 | 
 87 | ![](../images/messyvstidy9.png)
 88 | 
 89 | ### Code
 90 | 
 91 | The code to carry out the transformation from messy to tidy is one call
 92 | to `pivot_longer()`, with parameters that reflect our conclusions about
 93 | the columns to keep and the names we’ve chosen:
 94 | 
 95 | ``` r
 96 | tidydata <- messydata %>% 
 97 |   pivot_longer(cols = !id, names_to = "roadtype",  values_to = "mpg")
 98 | ```
 99 | 
100 | The most important thing to note is that *each parameter corresponds to
101 | one of the columns in the tidy data set.*
102 | 
103 | Now let’s look at them one at a time in more detail:
104 | 
105 | `!id` says “Don’t touch my `id` column\! Leave it as is\!”
106 | 
107 | `names_to = "roadtype"` is an instruction to *create a new name column
108 | and call it “roadtype”.*
109 | 
110 | `values_to = "mpg"` likewise is an instruction to *create a new value
111 | column and call it “mpg”.*
112 | 
113 | Nothing else needs to be specified: every value in a column not marked
114 | “don’t touch” will be moved to the *value* (“mpg”) column, paired with
115 | its old column name (in this case “city” or “hwy”) from the messy data
116 | set, which now appears in the *name* column.
117 | 
118 | Another option is to specify the columns to pivot, rather the columns to
119 | keep:
120 | 
121 |     ## # A tibble: 6 x 3
122 |     ##   id    roadtype   mpg
123 |     ##   <chr> <chr>    <dbl>
124 |     ## 1 car1  city        19
125 |     ## 2 car1  hwy         24
126 |     ## 3 car2  city        20
127 |     ## 4 car2  hwy         30
128 |     ## 5 car3  city        29
129 |     ## 6 car3  hwy         35
130 | 
131 | ### Common problem: missing *id* column
132 | 
133 | Note in our example that the `id` column is important for linking the
134 | city and highway miles per gallon. For example, we might want to plot
135 | the data as follows, which requires knowing which car had which city and
136 | highway mpg:
137 | 
138 | ![](pivot_longer_files/figure-gfm/unnamed-chunk-5-1.png)<!-- -->
139 | 
140 | Often, however, there is no `id` column: it’s not necessary since each
141 | row represents one item – a car in this case. If we try to tidy messy
142 | data without an id, it looks like this:
143 | 
144 |     ## # A tibble: 6 x 2
145 |     ##   roadtype   mpg
146 |     ##   <chr>    <dbl>
147 |     ## 1 city        19
148 |     ## 2 hwy         24
149 |     ## 3 city        20
150 |     ## 4 hwy         30
151 |     ## 5 city        29
152 |     ## 6 hwy         35
153 | 
154 | The problem is that we’ve lost the natural connection we had between
155 | city and highway for each car since the two values no longer reside in
156 | the same row. The solution here is to move the row names – or row
157 | numbers in this case – to a column to be used as an id:
158 | 
159 | ``` r
160 | messydata %>% 
161 |   select("city", "hwy") %>% 
162 |   rownames_to_column("id") %>% 
163 |   pivot_longer(!id, names_to = "roadtype", values_to = "mpg")
164 | ```
165 | 
166 |     ## # A tibble: 6 x 3
167 |     ##   id    roadtype   mpg
168 |     ##   <chr> <chr>    <dbl>
169 |     ## 1 1     city        19
170 |     ## 2 1     hwy         24
171 |     ## 3 2     city        20
172 |     ## 4 2     hwy         30
173 |     ## 5 3     city        29
174 |     ## 6 3     hwy         35
175 | 
176 | ## Example 2: `MASS::painters` dataset
177 | 
178 | Let’s take the `painters` dataset from the **MASS** package:
179 | 
180 |     ##               Composition Drawing Colour Expression School
181 |     ## Da Udine               10       8     16          3      A
182 |     ## Da Vinci               15      16      4         14      A
183 |     ## Del Piombo              8      13     16          7      A
184 |     ## Del Sarto              12      16      9          8      A
185 |     ## Fr. Penni               0      15      8          0      A
186 |     ## Guilio Romano          15      16      4         14      A
187 | 
188 | …and tidy it into four columns as such:
189 | 
190 |     ## # A tibble: 6 x 4
191 |     ##   Name     School Skill       Score
192 |     ##   <chr>    <fct>  <chr>       <int>
193 |     ## 1 Da Udine A      Composition    10
194 |     ## 2 Da Udine A      Drawing         8
195 |     ## 3 Da Udine A      Colour         16
196 |     ## 4 Da Udine A      Expression      3
197 |     ## 5 Da Vinci A      Composition    15
198 |     ## 6 Da Vinci A      Drawing        16
199 | 
200 | This example is a little more complex than the previous one, since it’s
201 | missing an *id* column, and there is more than one “keep” column. (You
202 | are encouraged to try this on your own before looking at the solution\!)
203 | 
204 | ### Observe
205 | 
206 | ![](../images/messyvstidy6.png)
207 | 
208 | ### Plan
209 | 
210 | ![](../images/messyvstidy7.png)
211 | 
212 |   - Move rownames to a new column and call it `Name` (see “missing `id`
213 |     column” in the **Heads up** section above to understand the need for
214 |     this new column.)
215 | 
216 |   - Keep `Name` and `School` columns as is. (“Don’t Touch\!”)
217 | 
218 |   - Since the four columns `Composition`, `Drawing`, `Colour`, and
219 |     `Expression` are really levels of another variable, they do not get
220 |     the “keep” as is designation. That means we are staging them to be
221 |     dumped into a new *key* variable which we’ll call `Skill`. In
222 |     addition, the values contained in these columns will move to a new
223 |     *value* column, which we’ll call `Score`.
224 | 
225 | ### Code
226 | 
227 | ``` r
228 | library(MASS)
229 | library(tidyverse)
230 | tidypaint <- painters %>% 
231 |   rownames_to_column("Name") %>% 
232 |   pivot_longer(cols = !c("Name", "School"), names_to = "Skill", values_to = "Score")
233 | ```
234 | 
235 | The instructions:
236 | 
237 | “Take the (messy) dataset `painters`. Move the contents of the rownames
238 | to a new column called `Name`. Tidy the data by a new *name* column
239 | called `Skill` and a new *value* column called `Score`. Do not touch or
240 | change the `Name` and `School` columns (other than to replicate as
241 | necessary).”
242 | 
243 | The result:
244 | 
245 | The four other column names will fill the new `Skill` column, and the
246 | values of those columns will fill the new `Score` column."
247 | 
248 | Thank you to [@angelotrivelli](https://twitter.com/angelotrivelli)
249 | [@dch4n](https://twitter.com/@dch4n)
250 | [@jschoeley](https://twitter.com/jschoeley)
251 | [@jspncr\_](https://twitter.com/jspncr_)
252 | [@kierisi](https://twitter.com/kierisi)
253 | [@s\_lump](https://twitter.com/s_lump) for providing feedback and
254 | helpful suggestions in response to [this Twitter
255 | post](https://twitter.com/jtrnyc/status/958845845385940993) about my
256 | [`gather()`
257 | tutorial](https://github.com/jtr13/codehelp/blob/master/R/gather.md)
258 | upon which this tutorial is based.
259 | 


--------------------------------------------------------------------------------
/R/reorder.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Reorder those bars, once and for all, with **forcats**"
  3 | author: "Joyce Robbins"
  4 | date: "2/7/2018"
  5 | output: html_document
  6 | ---
  7 | 
  8 | ```{r setup, include=FALSE}
  9 | knitr::opts_chunk$set(echo = TRUE, fig.height = 3.5,
 10 |                       fig.width = 5, fig.align = 'center',
 11 |                       warning = FALSE, message = FALSE,
 12 |                       cache = TRUE)
 13 | ```
 14 | <img src = "../images/ggplot2SO.png" width = "500"></img>
 15 | 
 16 | 
 17 | Since I happened to be preparing to teach, among other things, *how to reorder the bars in a bar chart* when I saw Claus Wilke's response, I figured this would be a good topic for a tutorial.
 18 | 
 19 | My theory is that this is a troublesome topic because several different problems are conflated into one question.  If, however, you first identify *how* you want the bars ordered, and *why* they are not ordered, the road will be less rocky. And with [**forcats**](http://forcats.tidyverse.org/) (now a core [**tidyverse**](tidyverse.org) package) functions -- namely, `fct_inorder()`, `fct_relevel()`, `fct_reorder()`, and `fct_infreq()` -- it will become second nature to get the order of the bars right. 
 20 | 
 21 | Before getting into reordering, it's important to understand R defaults, which happen to be the same for base R and **ggplot2**. For *factor* data, the default is for the bars to be plotted in the order of the factor levels. This applies to *ordered* and *non-ordered* factors--*all* types of factors have ordered levels. *Character* data, in contrast, is plotted in alphabetical order by default. Check what you have with `str()`.
 22 | 
 23 | ```{r}
 24 | library(tidyverse)
 25 | mycolor <- "#002448"; myfill = "#7192E3"
 26 | 
 27 | df <- tibble(chardata = c("cold", "warm", "hot", "hot", "warm", "warm", "cold", "cold", "cold"), 
 28 |     factordata = factor(c("cold", "warm", "hot", "hot", "warm", "warm", "cold", "cold", "cold"), 
 29 |                                  levels = c("cold", "warm", "hot")))
 30 | 
 31 | str(df)
 32 | ```
 33 | 
 34 | ```{r, echo = FALSE, fig.width = 3, fig.height = 2,  out.width='.2\\textwidth'}
 35 | 
 36 | ggplot(df, aes(chardata)) +
 37 |   geom_bar(color = mycolor, fill = myfill) + 
 38 |   ggtitle("Default: alphabetical order") +
 39 |   xlab("Character variable") +
 40 |   theme_grey(12)
 41 | 
 42 | ggplot(df, aes(factordata)) +
 43 |   geom_bar(color = mycolor, fill = myfill) + 
 44 |   ggtitle("Default: factor level order") +
 45 |   xlab("Factor variable") +
 46 |   theme_grey(12)
 47 | 
 48 | ```
 49 | 
 50 | In the examples below, all of the reordering happens in the calls to `ggplot2`, which means that you don't have to alter your data at all to achieve the desired effect.  In fact, you don't even have to have to convert your character data to factor data before plotting. The **forcats** functions will perform the conversion for plotting purposes only. However, if you prefer to change the data itself, of course, you can.  See the U.S. births example below for an example on how to do so.
 51 | 
 52 | Now, there are two key rules to follow when deciding how to reorder bars in a bar chart:
 53 | 
 54 | ### 1. Bars should appear in their natural order, if they have one.
 55 | 
 56 | If the levels or categories have a natural order to them (a.k.a. ordinal data), they should be plotted in that order. However, unless specified otherwise, whether your categories are stored as factor or character data, they will appear in alphabetical order in a bar chart (unless you changed around the levels of the factors).
 57 | 
 58 | In this example, the levels have a natural order, from Beginner to Expert, that is not reflected in the plot:
 59 | 
 60 | ```{r}
 61 | mydf <- tibble(Skill = c("Beginner", "Adv Beginner", "Intermediate", "Expert"),
 62 |                Num = c(75, 60, 15, 25))
 63 | 
 64 | ggplot(mydf, aes(Skill, Num)) + 
 65 |   geom_col(color = mycolor, fill = myfill) + theme_grey(14)
 66 | ```
 67 | 
 68 | The simplest way to get the order right depends on the situation at hand:
 69 | 
 70 | #### (a) The row order is correct
 71 | 
 72 | In this case, we can simply indicate with `fct_inorder()` that we want the levels to be plotted in the order in which they appear in the data frame:
 73 | 
 74 | ```{r}
 75 | mydf
 76 | ```
 77 | 
 78 | Since the row order in the data frame is correct, it's a simple fix: 
 79 | 
 80 | ```{r}
 81 | ggplot(mydf, aes(fct_inorder(Skill), Num)) + 
 82 |   geom_col(color = mycolor, fill = myfill) + theme_grey(14)
 83 | ```
 84 | 
 85 | 
 86 | #### (b) Only one category is out of order
 87 | 
 88 | Often there's just one level out of order.  In the case below it's "Under 15 years", which should be the first category in the chart, not the last:
 89 | 
 90 | ```{r}
 91 | # 2015 U.S. Births
 92 | MotherAge <-  c("15-19 years", "20-24 years", "25-29 years", 
 93 |                 "30-34 years", "35-39 years", "40-44 years",
 94 |                 "45-49 years", "50 years and over", "Under 15 years")
 95 | 
 96 | Num <- c(229.715, 850.509, 1152.311, 1094.693, 527.996, 111.848,
 97 |             8.171, .754, 2.500)
 98 | 
 99 | Births2015 <- tibble(MotherAge, Num)
100 | 
101 | ggplot(Births2015, aes(MotherAge, Num)) + 
102 |   geom_col(color = mycolor, fill = myfill) + 
103 |   ggtitle("United States Births, 2015", subtitle = "in thousands") +
104 |   scale_y_continuous(breaks = seq(0, 1250, 250)) +
105 |   coord_flip() + theme_grey(14)
106 | ```
107 | 
108 | We can use `fct_relevel()` to move it where it needs to go:
109 | 
110 | ```{r, fig.height = 4}
111 | ggplot(Births2015, aes(fct_relevel(MotherAge, "Under 15 years"), Num)) + 
112 |   ggtitle("United States Births, 2015", subtitle = "in thousands") +
113 |   scale_y_continuous(breaks = seq(0, 1250, 250)) +
114 |   geom_col(color = mycolor, fill = myfill) + coord_flip() + theme_grey(14)
115 | ```
116 | 
117 | Although we can move the levels around without touching the original data, in this case, we probably do want to change the levels to the correct natural order and then plot, as follows:
118 | 
119 | ```{r, fig.height = 4}
120 | Births2015 <- Births2015 %>%
121 |   mutate(MotherAge = fct_relevel(MotherAge, "Under 15 years"))
122 | 
123 | ggplot(Births2015, aes(MotherAge, Num)) + 
124 |   ggtitle("United States Births, 2015", subtitle = "in thousands") +
125 |   scale_y_continuous(breaks = seq(0, 1250, 250)) +
126 |   geom_col(color = mycolor, fill = myfill) + coord_flip() + theme_grey(14)
127 | ```
128 | 
129 | #### (c) Many categories are out of order
130 | 
131 | As long as the categories that are out of order all need to be moved to the same place, we can use the same technique:
132 | 
133 | ```{r}
134 | x <- factor(c("A", "B", "C", "move1", "D", "E", "move2", "F"))
135 | x
136 | fct_relevel(x, "move1", "move2")   # move to the beginning (default)
137 | fct_relevel(x, "move1", "move2", after = 4) # move after the fourth item
138 | fct_relevel(x, "move1", "move2", after = Inf) # move to the end
139 | ```
140 | 
141 | However, if they're all in a big jumble, the only solution is to manually reorder all of the levels with `fct_relevel()`.
142 | 
143 | Some important notes:
144 | 
145 | * This problem has nothing to do with any other variable.  There is simply a mismatch between the levels of the factors and the natural order of the categories.
146 | 
147 | * Don't be tempted to use ordered factors even though your data has ordered levels. The levels are ordered for *all* factors.
148 | 
149 | ### 2. Otherwise, bars should be ordered by frequency count.
150 | 
151 | Ordering by frequency count is the recommended approach for nominal data, that is, categories that are not naturally ordered.
152 | 
153 | #### (a) Using `geom_col()` 
154 | 
155 | Once again, the default is for the bars to be ordered alphabetically, which is not what we want. (Since the bar chart is horizontal the categories are alphabetical from bottom to top.)
156 | 
157 | ```{r}
158 | weekend_gross <- tibble(movie = c("Jumanji", "Maze Runner", "Winchester",
159 |                         "The Greatest Snowman", "The Post"),
160 |               gross = c(10.93, 10.475, 9.307, 7.696, 5.218))
161 | 
162 | ggplot(weekend_gross, aes(movie, gross)) + 
163 |   ggtitle("Weekend Box Office", subtitle = "Feb 2-4, 2018") + 
164 |   ylab("millions of dollars") +
165 |   geom_col(color = mycolor, fill = myfill) + coord_flip() + theme_grey(14)
166 | ```
167 | 
168 | This issue can be addressed within the call to `ggplot()` with `fct_reorder()`, also from **forcats**; we do not have to actually reorder the factor levels.
169 | 
170 | ```{r}
171 | # note the change in the first line:
172 | ggplot(weekend_gross, aes(fct_reorder(movie, gross), gross)) +  
173 |   ggtitle("Weekend Box Office", subtitle = "Feb 2-4, 2018") + 
174 |   ylab("millions of dollars") +
175 |   geom_col(color = mycolor, fill = myfill) + coord_flip() + theme_grey(14)
176 | ```
177 | 
178 | Notes:
179 | 
180 | * Although it appears that the bars are ordered from highest to lowest frequency count, in fact, they are ordered from lowest to highest, and plotted from the bottom up in a horizontal bar chart.  If you need to reverse the order, you can add a minus sign to the variable which determines the order: `fct_reorder(movie, -gross)` or use `fct_reorder(movie, gross) %>% fct_rev()`.
181 | 
182 | #### (b) Using `geom_bar()` -- data is unbinned
183 | 
184 | In this case, we can't order by another variable since we only have one variable: a list of categories:
185 | 
186 | ```{r}
187 | unbinned <- tibble(response = sample(c("yes", "no", "maybe"), 100, 
188 |                                      replace = TRUE, prob = c(.5, .15, .35)))
189 | 
190 | ggplot(unbinned, aes(response)) + geom_bar(color = mycolor, fill = myfill) +
191 |   theme_grey(14)
192 | ```
193 | 
194 | Again the bars are ordered alphabetically by default, not in order of frequency. The solution is our fourth **forcats** function, `fct_infreq()`:
195 | 
196 | ```{r}
197 | ggplot(unbinned, aes(fct_infreq(response))) + geom_bar(color = mycolor, fill = myfill) +
198 |   theme_grey(14)
199 | ```
200 | 
201 | Note that `fct_infreq()` orders the levels in *decreasing* order of frequency, ideal for drawing bar charts (presumably not a coincidence).
202 | 
203 | *Many thanks to Emily Zabor ( [\@zabormetrics](https://twitter.com/zabormetrics)) for convincing me to try `forcats` despite my initial reluctance.*
204 | 
205 | For more on best practices for bar charts, see:
206 | 
207 | Antony Unwin, "Displaying Categorical Data," *Graphical Data Analysis with R* (CRC Press: 2015).
208 | 
209 | For more detail on **forcats** functions in general, see:
210 | 
211 | Jenny Bryan, ["Be the boss of your factors"](http://stat545.com/block029_factors.html)
212 | 
213 | Garrett Grolemund and Hadley Wickham, ["Factors" chapter](http://r4ds.had.co.nz/factors.html) in *R for Data Science*
214 | 
215 | 
216 | 
217 | 
218 | 
219 | 
220 | 
221 | 


--------------------------------------------------------------------------------
/R/gather.md:
--------------------------------------------------------------------------------
  1 | Using `tidyr::gather()` to tidy data
  2 | ================
  3 | Joyce Robbins
  4 | originally published: 1/31/2018
  5 | 
  6 |   - [Example 1: City vs. Highway](#example-1-city-vs.-highway)
  7 |       - [Observe](#observe)
  8 |       - [Plan](#plan)
  9 |       - [Code](#code)
 10 |   - [Heads up](#heads-up)
 11 |       - [Not separating the messy data columns properly into “keep” and
 12 |         “dump”.](#not-separating-the-messy-data-columns-properly-into-keep-and-dump.)
 13 |       - [Missing *id* column](#missing-id-column)
 14 |   - [Example 2: `MASS::painters`
 15 |     dataset](#example-2-masspainters-dataset)
 16 |       - [Observe](#observe-1)
 17 |       - [Plan](#plan-1)
 18 |       - [Code](#code-1)
 19 | 
 20 | **`tidyr::gather()` has been superceded by `tidyr::pivot_longer()`.
 21 | Please see [this tutorial on
 22 | `pivot_longer()`](https://github.com/jtr13/codehelp/blob/master/R/pivot_longer.md)
 23 | instead.**
 24 | 
 25 | This tutorial focuses on one function: `tidyr::gather()`, which is used
 26 | to convert data from messy to tidy. I won’t get into the distinction
 27 | here – I’m going to assume that you know the terms and just need help
 28 | figuring how to get from A to B. If, however, this is all new to you, I
 29 | suggest reading [“Tidy
 30 | Data”](https://www.jstatsoft.org/article/view/v059i10) by Hadley
 31 | Wickham, the seminal paper on the topic, which lays out the concept in
 32 | detail.
 33 | 
 34 | ## Example 1: City vs. Highway
 35 | 
 36 | ### Observe
 37 | 
 38 | Before getting into the nitty-gritty of `gather()` let’s study what our
 39 | messy data looks like, and what it will look like when it’s tidied:
 40 | 
 41 | ![](../images/messyvstidy1.png)
 42 | 
 43 | Study the differences carefully. What did you observe? <br> <br> <br>
 44 | Hopefully you noticed the following:
 45 | 
 46 | 1.  The `id` column appears in both versions, though it’s double the
 47 |     length in the tidy form.
 48 | 
 49 | 2.  We have two new columns: `roadtype` and `mpg`.
 50 | 
 51 | 3.  The `city` and `hwy` column names became the contents of the new
 52 |     `roadtype` (*key*) column.
 53 | 
 54 | 4.  The *values* of the `city` and `hwy` columns became the *values* of
 55 |     the new `mpg` (*value*) column.
 56 | 
 57 | ![](../images/messyvstidy2.png)
 58 | 
 59 | ### Plan
 60 | 
 61 | Now here’s the trick: the only parts we need to concern ourselves with
 62 | are 1. and 2.: deciding which columns to keep unchanged, and what names
 63 | to give the new columns, which are called the *key* and *value* columns.
 64 | 
 65 | The columns to keep are the ones that are already tidy. The ones to dump
 66 | are the ones that aren’t true variables but in fact are *levels* of
 67 | another variable. In this case, `city` and `hwy` should be levels of a
 68 | new variable called `roadtype` (or something similar), according to the
 69 | theory of tidy data. On the other hand, `id` should remain as is.
 70 | 
 71 | The bottom line: **keep `id` as is**
 72 | 
 73 | ![](../images/messyvstidy3.png)
 74 | 
 75 | The second (and final) step is to choose names for the new *key* and
 76 | *value* columns. We’ve already chosen `roadtype` for *key*; let’s pick
 77 | `mpg` for *value*. You can also choose to stick with the defaults for
 78 | the new *key* and *value* column names: `key` and `value` (surprise).
 79 | Using `key` and `value` may be helpful at first to keep track of which
 80 | is which, though ultimately I find more descriptive names to be more
 81 | useful.
 82 | 
 83 | The bottom line: **key = `roadtype`, value = `mpg`**
 84 | 
 85 | ![](../images/messyvstidy4.png)
 86 | 
 87 | ### Code
 88 | 
 89 | The code to carry out the transformation from messy to tidy is one call
 90 | to `gather()`, with parameters that reflect our conclusions about the
 91 | columns to keep and the names we’ve chosen:
 92 | 
 93 | ``` r
 94 | tidydata <- messydata %>% 
 95 |   gather(key = "roadtype",  value = "mpg", -id)
 96 | ```
 97 | 
 98 | The most important thing to note is that *each parameter corresponds to
 99 | one of the columns in the tidy data set.*
100 | 
101 | Now let’s look at them one at a time in more detail:
102 | 
103 | `key = "roadtype"` sounds like it’s setting the *key* to a preexisting
104 | column called “roadtype”, but that’s not what’s going on. Rather, think
105 | of this as an instruction to *create a new key column and call it
106 | “roadtype”.*
107 | 
108 | `value = "mpg"` likewise is an instruction to *create a new value column
109 | and call it “mpg”.*
110 | 
111 | `-id` is the piece that says “Don’t touch my `id` column\! Leave it as
112 | is\!”
113 | 
114 | Nothing else needs to be specified: every value in a column not marked
115 | “don’t touch” will be moved to the *value* (“mpg”) column, paired with
116 | its old column name (in this case “city” or “hwy”) from the messy data
117 | set, which now appears in the *key* column.
118 | 
119 | ## Heads up
120 | 
121 | There are a few problems I’ve seen with the way in which people use
122 | `gather()`:
123 | 
124 | ### Not separating the messy data columns properly into “keep” and “dump”.
125 | 
126 | If we had made this mistake in the example above, the result would be
127 | something like:
128 | 
129 | ``` r
130 | messydata %>% 
131 |   gather(key = "roadtype", value = "mpg")
132 | ```
133 | 
134 |     ## # A tibble: 9 x 2
135 |     ##   roadtype mpg  
136 |     ##   <chr>    <chr>
137 |     ## 1 id       car1 
138 |     ## 2 id       car2 
139 |     ## 3 id       car3 
140 |     ## 4 city     19   
141 |     ## 5 city     20   
142 |     ## 6 city     29   
143 |     ## 7 hwy      24   
144 |     ## 8 hwy      30   
145 |     ## 9 hwy      35
146 | 
147 | Yikes, not what we wanted. Looking at the `mpg` column, we see that
148 | “car1”, “car2”, and “car3” don’t belong. The solution is to add the
149 | `-id` parameter – remember, think: “don’t touch `id`” – so that it isn’t
150 | “dumped” into the `key` column.
151 | 
152 | Note: it is possible to specify the columns to *dump* rather then the
153 | columns to *keep*:
154 | 
155 | ``` r
156 | messydata %>% 
157 |   gather(key = "roadtype", value = "mpg", city, hwy)
158 | ```
159 | 
160 |     ## # A tibble: 6 x 3
161 |     ##   id    roadtype   mpg
162 |     ##   <chr> <chr>    <dbl>
163 |     ## 1 car1  city        19
164 |     ## 2 car2  city        20
165 |     ## 3 car3  city        29
166 |     ## 4 car1  hwy         24
167 |     ## 5 car2  hwy         30
168 |     ## 6 car3  hwy         35
169 | 
170 | I find this method less intuitive, but of course it’s your choice.
171 | 
172 | ### Missing *id* column
173 | 
174 | Note in our example that the `id` column is important for linking the
175 | city and highway miles per gallon. For example, we might want to plot
176 | the data as follows, which requires knowing which car had which city and
177 | highway mpg:
178 | 
179 | ![](gather_files/figure-gfm/unnamed-chunk-6-1.png)<!-- -->
180 | 
181 | Often, however, there is no `id` column: it’s not necessary since each
182 | row represents one item – a car in this case. If we try to tidy messy
183 | data without an id, it looks like this:
184 | 
185 | ![](../images/messyvstidy5.png)
186 | 
187 | The problem is that we’ve lost the natural connection we had between
188 | city and highway for each car since the two values no longer reside in
189 | the same row. The solution here is to move the row names – or row
190 | numbers in this case – to a column to be used as an id:
191 | 
192 | ``` r
193 | messy2 %>% rownames_to_column("id") %>% 
194 |   gather(key, value, -id)
195 | ```
196 | 
197 |     ## # A tibble: 6 x 3
198 |     ##   id    key   value
199 |     ##   <chr> <chr> <dbl>
200 |     ## 1 1     city     19
201 |     ## 2 2     city     20
202 |     ## 3 3     city     29
203 |     ## 4 1     hwy      24
204 |     ## 5 2     hwy      30
205 |     ## 6 3     hwy      35
206 | 
207 | Note that we must specify the “don’t touch” id column that we just
208 | created. Also note that we used the default `key` and `value` column
209 | names rather than choose our own.
210 | 
211 | ## Example 2: `MASS::painters` dataset
212 | 
213 | Let’s take the `painters` dataset from the **MASS** package:
214 | 
215 |     ##               Composition Drawing Colour Expression School
216 |     ## Da Udine               10       8     16          3      A
217 |     ## Da Vinci               15      16      4         14      A
218 |     ## Del Piombo              8      13     16          7      A
219 |     ## Del Sarto              12      16      9          8      A
220 |     ## Fr. Penni               0      15      8          0      A
221 |     ## Guilio Romano          15      16      4         14      A
222 | 
223 | …and tidy it into four columns as such:
224 | 
225 |     ##            Name School       Skill Score
226 |     ## 1      Da Udine      A Composition    10
227 |     ## 2      Da Vinci      A Composition    15
228 |     ## 3    Del Piombo      A Composition     8
229 |     ## 4     Del Sarto      A Composition    12
230 |     ## 5     Fr. Penni      A Composition     0
231 |     ## 6 Guilio Romano      A Composition    15
232 | 
233 | This example is a little more complex than the previous one, since it’s
234 | missing an *id* column, and there is more than one “keep” column. (You
235 | are encouraged to try this on your own before looking at the solution\!)
236 | 
237 | ### Observe
238 | 
239 | ![](../images/messyvstidy6.png)
240 | 
241 | ### Plan
242 | 
243 | ![](../images/messyvstidy7.png)
244 | 
245 |   - Move rownames to a new column and call it `Name` (see “missing `id`
246 |     column” in the **Heads up** section above to understand the need for
247 |     this new column.)
248 | 
249 |   - Keep `Name` and `School` columns as is. (“Don’t Touch\!”)
250 | 
251 |   - Since the four columns `Composition`, `Drawing`, `Colour`, and
252 |     `Expression` are really levels of another variable, they do not get
253 |     the “keep” as is designation. That means we are staging them to be
254 |     dumped into a new *key* variable which we’ll call `Skill`. In
255 |     addition, the values contained in these columns will move to a new
256 |     *value* column, which we’ll call `Score`.
257 | 
258 | ### Code
259 | 
260 | ``` r
261 | library(MASS)
262 | library(tidyverse)
263 | tidypaint <- painters %>% 
264 |   rownames_to_column("Name") %>% 
265 |   gather(key = "Skill", value = "Score", -Name, -School)
266 | ```
267 | 
268 | The breakdown:
269 | 
270 | ![](../images/Inkedmessyvstidy8.jpg)
271 | 
272 | Finally, if the visuals aren’t your style, here’s a running commentary
273 | of the instructions:
274 | 
275 | “Take the (messy) dataset `painters`. Move the contents of the rownames
276 | to a new column called `Name`. Now let’s start tidying by gathering
277 | multiple columns into *key-value* pairs. Do this by creating a new *key*
278 | column called `Skill`. While you’re at it, create a new *value* column
279 | called `Score`. Absolutely do not touch or change the `Name` and
280 | `School` columns (other than to replicate as necessary). It goes without
281 | saying that the four other column names will fill the new `Skill`
282 | column, and the values of those columns will fill the new `Score`
283 | column, since it’s understood from what I’ve said already\! Much
284 | appreciated\!”
285 | 
286 | Thank you to [@angelotrivelli](https://twitter.com/angelotrivelli)
287 | [@dch4n](https://twitter.com/@dch4n)
288 | [@jschoeley](https://twitter.com/jschoeley)
289 | [@jspncr\_](https://twitter.com/jspncr_)
290 | [@kierisi](https://twitter.com/kierisi)
291 | [@s\_lump](https://twitter.com/s_lump) for providing feedback and
292 | helpful suggestions in response to [this Twitter
293 | post.](https://twitter.com/jtrnyc/status/958845845385940993)
294 | 


--------------------------------------------------------------------------------
/R/reorder.md:
--------------------------------------------------------------------------------
  1 | Reorder those bars, once and for all, with **forcats**
  2 | ================
  3 | Joyce Robbins
  4 | 2/7/2018
  5 | 
  6 | <img src = "../images/ggplot2SO.png" width = "500"></img>
  7 | 
  8 | Since I happened to be preparing to teach, among other things, *how to
  9 | reorder the bars in a bar chart* when I saw Claus Wilke’s response, I
 10 | figured this would be a good topic for a tutorial.
 11 | 
 12 | My theory is that this is a troublesome topic because several different
 13 | problems are conflated into one question. If, however, you first
 14 | identify *how* you want the bars ordered, and *why* they are not
 15 | ordered, the road will be less rocky. And with
 16 | [**forcats**](http://forcats.tidyverse.org/) (now a core
 17 | [**tidyverse**](tidyverse.org) package) functions – namely,
 18 | `fct_inorder()`, `fct_relevel()`, `fct_reorder()`, and `fct_infreq()` –
 19 | it will become second nature to get the order of the bars right.
 20 | 
 21 | Before getting into reordering, it’s important to understand R defaults,
 22 | which happen to be the same for base R and **ggplot2**. For *factor*
 23 | data, the default is for the bars to be plotted in the order of the
 24 | factor levels. This applies to *ordered* and *non-ordered* factors–*all*
 25 | types of factors have ordered levels. *Character* data, in contrast, is
 26 | plotted in alphabetical order by default. Check what you have with
 27 | `str()`.
 28 | 
 29 | ``` r
 30 | library(tidyverse)
 31 | mycolor <- "#002448"; myfill = "#7192E3"
 32 | 
 33 | df <- tibble(chardata = c("cold", "warm", "hot", "hot", "warm", "warm", "cold", "cold", "cold"), 
 34 |     factordata = factor(c("cold", "warm", "hot", "hot", "warm", "warm", "cold", "cold", "cold"), 
 35 |                                  levels = c("cold", "warm", "hot")))
 36 | 
 37 | head(df)
 38 | ```
 39 | 
 40 |     ## # A tibble: 6 x 2
 41 |     ##   chardata factordata
 42 |     ##   <chr>    <fct>     
 43 |     ## 1 cold     cold      
 44 |     ## 2 warm     warm      
 45 |     ## 3 hot      hot       
 46 |     ## 4 hot      hot       
 47 |     ## 5 warm     warm      
 48 |     ## 6 warm     warm
 49 | 
 50 | <img src="reorder_files/figure-gfm/unnamed-chunk-2-1.png" width=".4\textwidth" style="display: block; margin: auto;" /><img src="reorder_files/figure-gfm/unnamed-chunk-2-2.png" width=".4\textwidth" style="display: block; margin: auto;" />
 51 | 
 52 | In the examples below, all of the reordering happens in the calls to
 53 | `ggplot2`, which means that you don’t have to alter your data at all to
 54 | achieve the desired effect. In fact, you don’t even have to have to
 55 | convert your character data to factor data before plotting. The
 56 | **forcats** functions will perform the conversion for plotting purposes
 57 | only. However, if you prefer to change the data itself, of course, you
 58 | can. See the U.S. births example below for an example on how to do so.
 59 | 
 60 | Now, there are two key rules to follow when deciding how to reorder bars
 61 | in a bar chart:
 62 | 
 63 | ### 1\. Bars should appear in their natural order, if they have one.
 64 | 
 65 | If the levels or categories have a natural order to them (a.k.a. ordinal
 66 | data), they should be plotted in that order. However, unless specified
 67 | otherwise, whether your categories are stored as factor or character
 68 | data, they will appear in alphabetical order in a bar chart (unless you
 69 | changed around the levels of the factors).
 70 | 
 71 | In this example, the levels have a natural order, from Beginner to
 72 | Expert, that is not reflected in the
 73 | plot:
 74 | 
 75 | ``` r
 76 | mydf <- tibble(Skill = c("Beginner", "Adv Beginner", "Intermediate", "Expert"),
 77 |                Num = c(75, 60, 15, 25))
 78 | 
 79 | ggplot(mydf, aes(Skill, Num)) + 
 80 |   geom_col(color = mycolor, fill = myfill) + theme_grey(14)
 81 | ```
 82 | 
 83 | <img src="reorder_files/figure-gfm/unnamed-chunk-3-1.png" style="display: block; margin: auto;" />
 84 | 
 85 | The simplest way to get the order right depends on the situation at
 86 | hand:
 87 | 
 88 | #### (a) The row order is correct (data is binned*)
 89 | 
 90 | * = has a count or frequency column, in this case, `Num`
 91 | 
 92 | In this case, we can simply indicate with `fct_inorder()` that we want
 93 | the levels to be plotted in the order in which they appear in the data
 94 | frame:
 95 | 
 96 | ``` r
 97 | mydf
 98 | ```
 99 | 
100 |     ## # A tibble: 4 x 2
101 |     ##   Skill          Num
102 |     ##   <chr>        <dbl>
103 |     ## 1 Beginner        75
104 |     ## 2 Adv Beginner    60
105 |     ## 3 Intermediate    15
106 |     ## 4 Expert          25
107 | 
108 | Since the row order in the data frame is correct, it’s a simple fix:
109 | 
110 | ``` r
111 | ggplot(mydf, aes(fct_inorder(Skill), Num)) + 
112 |   geom_col(color = mycolor, fill = myfill) + theme_grey(14)
113 | ```
114 | 
115 | <img src="reorder_files/figure-gfm/unnamed-chunk-5-1.png" style="display: block; margin: auto;" />
116 | 
117 | #### (b) Only one category is out of order (data is binned*)
118 | 
119 | * = has a count or frequency column, in this case, `Num`
120 | 
121 | Often there’s just one level out of order. In the case below it’s “Under
122 | 15 years”, which should be the first category in the chart, not the
123 | last:
124 | 
125 | ``` r
126 | # 2015 U.S. Births
127 | MotherAge <-  c("15-19 years", "20-24 years", "25-29 years", 
128 |                 "30-34 years", "35-39 years", "40-44 years",
129 |                 "45-49 years", "50 years and over", "Under 15 years")
130 | 
131 | Num <- c(229.715, 850.509, 1152.311, 1094.693, 527.996, 111.848,
132 |             8.171, .754, 2.500)
133 | 
134 | Births2015 <- tibble(MotherAge, Num)
135 | 
136 | ggplot(Births2015, aes(MotherAge, Num)) + 
137 |   geom_col(color = mycolor, fill = myfill) + 
138 |   ggtitle("United States Births, 2015", subtitle = "in thousands") +
139 |   scale_y_continuous(breaks = seq(0, 1250, 250)) +
140 |   coord_flip() + theme_grey(14)
141 | ```
142 | 
143 | <img src="reorder_files/figure-gfm/unnamed-chunk-6-1.png" style="display: block; margin: auto;" />
144 | 
145 | We can use `fct_relevel()` to move it where it needs to
146 | go:
147 | 
148 | ``` r
149 | ggplot(Births2015, aes(fct_relevel(MotherAge, "Under 15 years"), Num)) + 
150 |   ggtitle("United States Births, 2015", subtitle = "in thousands") +
151 |   scale_y_continuous(breaks = seq(0, 1250, 250)) +
152 |   geom_col(color = mycolor, fill = myfill) + coord_flip() + theme_grey(14)
153 | ```
154 | 
155 | <img src="reorder_files/figure-gfm/unnamed-chunk-7-1.png" style="display: block; margin: auto;" />
156 | 
157 | Although we can move the levels around without touching the original
158 | data, in this case, we probably do want to change the levels to the
159 | correct natural order and then plot, as follows:
160 | 
161 | ``` r
162 | Births2015 <- Births2015 %>%
163 |   mutate(MotherAge = fct_relevel(MotherAge, "Under 15 years"))
164 | 
165 | ggplot(Births2015, aes(MotherAge, Num)) + 
166 |   ggtitle("United States Births, 2015", subtitle = "in thousands") +
167 |   scale_y_continuous(breaks = seq(0, 1250, 250)) +
168 |   geom_col(color = mycolor, fill = myfill) + coord_flip() + theme_grey(14)
169 | ```
170 | 
171 | <img src="reorder_files/figure-gfm/unnamed-chunk-8-1.png" style="display: block; margin: auto;" />
172 | 
173 | #### (c) Many categories are out of order
174 | 
175 | As long as the categories that are out of order all need to be moved to
176 | the same place, we can use the same technique:
177 | 
178 | ``` r
179 | x <- factor(c("A", "B", "C", "move1", "D", "E", "move2", "F"))
180 | x
181 | ```
182 | 
183 |     ## [1] A     B     C     move1 D     E     move2 F    
184 |     ## Levels: A B C D E F move1 move2
185 | 
186 | ``` r
187 | fct_relevel(x, "move1", "move2")   # move to the beginning (default)
188 | ```
189 | 
190 |     ## [1] A     B     C     move1 D     E     move2 F    
191 |     ## Levels: move1 move2 A B C D E F
192 | 
193 | ``` r
194 | fct_relevel(x, "move1", "move2", after = 4) # move after the fourth item
195 | ```
196 | 
197 |     ## [1] A     B     C     move1 D     E     move2 F    
198 |     ## Levels: A B C D move1 move2 E F
199 | 
200 | ``` r
201 | fct_relevel(x, "move1", "move2", after = Inf) # move to the end
202 | ```
203 | 
204 |     ## [1] A     B     C     move1 D     E     move2 F    
205 |     ## Levels: A B C D E F move1 move2
206 | 
207 | However, if they’re all in a big jumble, the only solution is to
208 | manually reorder all of the levels with `fct_relevel()`.
209 | 
210 | Some important notes:
211 | 
212 |   - This problem has nothing to do with any other variable. There is
213 |     simply a mismatch between the levels of the factors and the natural
214 |     order of the categories.
215 | 
216 |   - Don’t be tempted to use ordered factors even though your data has
217 |     ordered levels. The levels are ordered for *all* factors.
218 | 
219 | ### 2\. Otherwise, bars should be ordered by frequency count.
220 | 
221 | Ordering by frequency count is the recommended approach for nominal
222 | data, that is, categories that are not naturally ordered.
223 | 
224 | #### (a) Using `geom_col()`(binned data)
225 | 
226 | Once again, the default is for the bars to be ordered alphabetically,
227 | which is not what we want. (Since the bar chart is horizontal the
228 | categories are alphabetical from bottom to
229 | top.)
230 | 
231 | ``` r
232 | weekend_gross <- tibble(movie = c("Jumanji", "Maze Runner", "Winchester",
233 |                         "The Greatest Snowman", "The Post"),
234 |               gross = c(10.93, 10.475, 9.307, 7.696, 5.218))
235 | 
236 | ggplot(weekend_gross, aes(movie, gross)) + 
237 |   ggtitle("Weekend Box Office", subtitle = "Feb 2-4, 2018") + 
238 |   ylab("millions of dollars") +
239 |   geom_col(color = mycolor, fill = myfill) + coord_flip() + theme_grey(14)
240 | ```
241 | 
242 | <img src="reorder_files/figure-gfm/unnamed-chunk-10-1.png" style="display: block; margin: auto;" />
243 | 
244 | This issue can be addressed within the call to `ggplot()` with
245 | `fct_reorder()`, also from **forcats**; we do not have to actually
246 | reorder the factor levels.
247 | 
248 | ``` r
249 | # note the change in the first line:
250 | ggplot(weekend_gross, aes(fct_reorder(movie, gross), gross)) +  
251 |   ggtitle("Weekend Box Office", subtitle = "Feb 2-4, 2018") + 
252 |   ylab("millions of dollars") +
253 |   geom_col(color = mycolor, fill = myfill) + coord_flip() + theme_grey(14)
254 | ```
255 | 
256 | <img src="reorder_files/figure-gfm/unnamed-chunk-11-1.png" style="display: block; margin: auto;" />
257 | 
258 | Notes:
259 | 
260 |   - Although it appears that the bars are ordered from highest to lowest
261 |     frequency count, in fact, they are ordered from lowest to highest,
262 |     and plotted from the bottom up in a horizontal bar chart. If you
263 |     need to reverse the order, you can add a minus sign to the variable
264 |     which determines the order: `fct_reorder(movie, -gross)` or use
265 |     `fct_reorder(movie, gross) %>% fct_rev()`.
266 | 
267 | #### (b) Using `geom_bar()` – data is unbinned
268 | 
269 | In this case, we can’t order by another variable since we only have one
270 | variable: a list of categories:
271 | 
272 | ``` r
273 | unbinned <- tibble(response = sample(c("yes", "no", "maybe"), 100, 
274 |                                      replace = TRUE, prob = c(.5, .15, .35)))
275 | 
276 | ggplot(unbinned, aes(response)) + geom_bar(color = mycolor, fill = myfill) +
277 |   theme_grey(14)
278 | ```
279 | 
280 | <img src="reorder_files/figure-gfm/unnamed-chunk-12-1.png" style="display: block; margin: auto;" />
281 | 
282 | Again the bars are ordered alphabetically by default, not in order of
283 | frequency. The solution is our fourth **forcats** function,
284 | `fct_infreq()`:
285 | 
286 | ``` r
287 | ggplot(unbinned, aes(fct_infreq(response))) + geom_bar(color = mycolor, fill = myfill) +
288 |   theme_grey(14)
289 | ```
290 | 
291 | <img src="reorder_files/figure-gfm/unnamed-chunk-13-1.png" style="display: block; margin: auto;" />
292 | 
293 | Note that `fct_infreq()` orders the levels in *decreasing* order of
294 | frequency, ideal for drawing bar charts (presumably not a coincidence).
295 | 
296 | *Many thanks to Emily Zabor (
297 | [@zabormetrics](https://twitter.com/zabormetrics)) for convincing me to
298 | try `forcats` despite my initial reluctance.*
299 | 
300 | For more on best practices for bar charts, see:
301 | 
302 | Antony Unwin, “Displaying Categorical Data,” *Graphical Data Analysis
303 | with R* (CRC Press: 2015).
304 | 
305 | For more detail on **forcats** functions in general, see:
306 | 
307 | Jenny Bryan, [“Be the boss of your
308 | factors”](http://stat545.com/block029_factors.html)
309 | 
310 | Garrett Grolemund and Hadley Wickham, [“Factors”
311 | chapter](http://r4ds.had.co.nz/factors.html) in *R for Data Science*
312 | 


--------------------------------------------------------------------------------