├── .gitignore ├── LICENSE ├── NAMESPACE ├── data └── uscenpops.rda ├── man ├── figures │ ├── hex-uscenpops.png │ └── README-example-2-1.png └── uscenpops.Rd ├── .Rbuildignore ├── _pkgdown.yml ├── .travis.yml ├── uscenpops.Rproj ├── inst └── CITATION ├── DESCRIPTION ├── R └── data.R ├── LICENSE.md ├── README.Rmd └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2020 2 | COPYRIGHT HOLDER: Kieran Healy 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | -------------------------------------------------------------------------------- /data/uscenpops.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjhealy/uscenpops/HEAD/data/uscenpops.rda -------------------------------------------------------------------------------- /man/figures/hex-uscenpops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjhealy/uscenpops/HEAD/man/figures/hex-uscenpops.png -------------------------------------------------------------------------------- /man/figures/README-example-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjhealy/uscenpops/HEAD/man/figures/README-example-2-1.png -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^uscenpops\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^README\.Rmd$ 4 | ^LICENSE\.md$ 5 | ^\.travis\.yml$ 6 | ^_pkgdown\.yml$ 7 | ^docs$ 8 | ^pkgdown$ 9 | ^data-raw$ 10 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | destination: docs 2 | authors: 3 | Kieran Healy: 4 | href: http://kieranhealy.org 5 | template: 6 | params: 7 | bootswatch: cosmo 8 | reference: 9 | - title: "Datasets" 10 | desc: > 11 | Included datasets. 12 | contents: 13 | - uscenpops 14 | navbar: 15 | right: 16 | - icon: fa-github 17 | href: https://github.com/kjhealy/uscenpops 18 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # R for travis: see documentation at https://docs.travis-ci.com/user/languages/r 2 | 3 | language: R 4 | cache: packages 5 | 6 | warnings_are_errors: false 7 | 8 | addons: 9 | apt: 10 | packages: 11 | - libudunits2-dev 12 | 13 | before_cache: Rscript -e 'remotes::install_github("r-lib/pkgdown")' 14 | deploy: 15 | provider: script 16 | script: Rscript -e 'pkgdown::deploy_site_github(ssh_id = Sys.getenv("TRAVIS_DEPLOY_KEY", ""))' 17 | skip_cleanup: true 18 | -------------------------------------------------------------------------------- /uscenpops.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | 18 | BuildType: Package 19 | PackageUseDevtools: Yes 20 | PackageInstallArgs: --no-multiarch --with-keep.source 21 | PackageRoxygenize: rd,collate,namespace 22 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite the package 'uscenpops' in publications use:") 2 | 3 | bibentry( 4 | bibtype = "Manual", 5 | title = "uscenpops: US Census Counts", 6 | date = "2020", 7 | author = person(given = "Kieran", 8 | family = "Healy", 9 | role = c("aut", "cre"), 10 | email = "kjhealy@soc.duke.edu"), 11 | year = "2020", 12 | note = "R package version 0.1.0", 13 | url = "http://kjhealy.github.io/uscenpops", 14 | key = "uscenpops") 15 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: uscenpops 2 | Title: Decennial and Intercensal Estimates of US Population by Age and Sex 1990-2019 3 | Version: 0.0.0.9000 4 | Authors@R: 5 | person(given = "Kieran", 6 | family = "Healy", 7 | role = c("aut", "cre"), 8 | email = "kjhealy@gmail.com", 9 | comment = c(ORCID = "https://orcid.org/0000-0001-9114-981X")) 10 | Description: Decennial and Intercensal Estimates of US Population by Age and Sex 1990-2019 from the US Census Bureau. 11 | License: MIT + file LICENSE 12 | Encoding: UTF-8 13 | LazyData: true 14 | URL: https://github.com/kjhealy/uscenpops 15 | BugReports: https://github.com/kjhealy/uscenpops/issues 16 | RoxygenNote: 7.1.0 17 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' US Census and Intercensal Population Counts and Estimates 2 | #' 3 | #' US Census population estimates by year of age and sex, 1900-2019 4 | #' 5 | #' @format A tibble with 10,520 rows and 5 columns 6 | #' \describe{ 7 | #' \item{\code{year}}{Year in format. Where multiple monthly or quarterly estimates were available in the original data, the July estimate was used for the yearly count.} 8 | #' \item{\code{age}}{Age in years. Top-coded at 75 between 1900 and 1939, at 85 between 1940 and 1979, and 100 thereafter.} 9 | #' \item{\code{pop}}{Total population count.} 10 | #' \item{\code{male}}{Male population count.} 11 | #' \item{\code{female}}{Female population count.} 12 | #' } 13 | #' 14 | #' @docType data 15 | #' @keywords datasets 16 | #' @name uscenpops 17 | #' @source United States Census Bureau 18 | 'uscenpops' 19 | -------------------------------------------------------------------------------- /man/uscenpops.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{uscenpops} 5 | \alias{uscenpops} 6 | \title{US Census and Intercensal Population Counts and Estimates} 7 | \format{ 8 | A tibble with 10,520 rows and 5 columns 9 | \describe{ 10 | \item{\code{year}}{Year in format. Where multiple monthly or quarterly estimates were available in the original data, the July estimate was used for the yearly count.} 11 | \item{\code{age}}{Age in years. Top-coded at 75 between 1900 and 1939, at 85 between 1940 and 1979, and 100 thereafter.} 12 | \item{\code{pop}}{Total population count.} 13 | \item{\code{male}}{Male population count.} 14 | \item{\code{female}}{Female population count.} 15 | } 16 | } 17 | \source{ 18 | United States Census Bureau 19 | } 20 | \usage{ 21 | uscenpops 22 | } 23 | \description{ 24 | US Census population estimates by year of age and sex, 1900-2019 25 | } 26 | \keyword{datasets} 27 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2020 Kieran Healy 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | 17 | # uscenpops 18 | 19 | 20 | 21 | [![Travis build status](https://travis-ci.org/kjhealy/uscenpops.svg?branch=master)](https://travis-ci.org/kjhealy/uscenpops) 22 | 23 | 24 | This package provides a single dataset of censal and intercensal population estimates for the United States by year of age and sex, for every year from 1900 to 2019. 25 | 26 | ## Installation 27 | 28 | `uscenpops` is a data package for use in teaching. 29 | 30 | ### Install direct from GitHub 31 | 32 | You can install the beta version of uscenpops from [GitHub](https://github.com/kjhealy/uscenpops) with: 33 | 34 | ```{r, eval = FALSE} 35 | devtools::install_github("kjhealy/uscenpops") 36 | ``` 37 | 38 | ### Installation using `drat` 39 | 40 | While using `install_github()` works just fine, it would be nicer to be able to just type `install.packages("uscenpops")` or `update.packages("uscenpops")` in the ordinary way. We can do this using Dirk Eddelbuettel's [drat](http://eddelbuettel.github.io/drat/DratForPackageUsers.html) package. Drat provides a convenient way to make R aware of package repositories other than CRAN. 41 | 42 | First, install `drat`: 43 | 44 | ```{r drat, eval = FALSE} 45 | if (!require("drat")) { 46 | install.packages("drat") 47 | library("drat") 48 | } 49 | ``` 50 | 51 | Then use `drat` to tell R about the repository where `uscenpops` is hosted: 52 | 53 | ```{r drat-uscenpops} 54 | drat::addRepo("kjhealy") 55 | ``` 56 | 57 | You can now install `uscenpops`: 58 | 59 | ```{r uscenpops-drat, eval = FALSE} 60 | install.packages("uscenpops") 61 | ``` 62 | 63 | To ensure that the `uscenpops` repository is always available, you can add the following line to your `.Rprofile` or `.Rprofile.site` file: 64 | 65 | ```{r rprofile} 66 | drat::addRepo("kjhealy") 67 | ``` 68 | 69 | With that in place you'll be able to do `install.packages("uscenpops")` or `update.packages("uscenpops")` and have everything work as you'd expect. 70 | 71 | Note that the drat repository only contains data packages that are not on CRAN, so you will never be in danger of grabbing the wrong version of any other package. 72 | 73 | 74 | ## Loading the data 75 | 76 | The package works best with the [tidyverse](http://tidyverse.org/) libraries. 77 | 78 | ```{r libs} 79 | library(tidyverse) 80 | ``` 81 | 82 | Load the data: 83 | 84 | ```{r load} 85 | library(uscenpops) 86 | ``` 87 | 88 | ```{r uscenpops} 89 | uscenpops 90 | ``` 91 | 92 | ## Example 93 | 94 | 95 | ```{r example-1} 96 | 97 | library(dplyr) 98 | library(ggplot2) 99 | 100 | pop_pyr <- uscenpops %>% select(year, age, male, female) %>% 101 | pivot_longer(male:female, names_to = "group", values_to = "count") %>% 102 | group_by(year, group) %>% 103 | mutate(total = sum(count), 104 | pct = (count/total)*100, 105 | base = 0) 106 | 107 | pop_pyr 108 | 109 | ``` 110 | 111 | ```{r example-2, fig.height = 10, fig.width = 10, dpi = 300} 112 | 113 | ## Axis labels 114 | mbreaks <- c("1M", "2M", "3M") 115 | 116 | ## colors 117 | pop_colors <- c("#E69F00", "#0072B2") 118 | 119 | ## In-plot year labels 120 | dat_text <- data.frame( 121 | label = c(seq(1900, 2015, 5), 2019), 122 | year = c(seq(1900, 2015, 5), 2019), 123 | age = rep(95, 25), 124 | count = rep(-2.75e6, 25) 125 | ) 126 | 127 | pop_pyr$count[pop_pyr$group == "male"] <- -pop_pyr$count[pop_pyr$group == "male"] 128 | 129 | p <- pop_pyr %>% 130 | filter(year %in% c(seq(1900, 2015, 5), 2019)) %>% 131 | ggplot(mapping = aes(x = age, ymin = base, 132 | ymax = count, fill = group)) 133 | 134 | p + geom_ribbon(alpha = 0.9, color = "black", size = 0.1) + 135 | geom_label(data = dat_text, 136 | mapping = aes(x = age, y = count, 137 | label = label), inherit.aes = FALSE, 138 | vjust = "inward", hjust = "inward", 139 | fontface = "bold", 140 | color = "gray40", 141 | fill = "gray95") + 142 | scale_y_continuous(labels = c(rev(mbreaks), "0", mbreaks), 143 | breaks = seq(-3e6, 3e6, 1e6), 144 | limits = c(-3e6, 3e6)) + 145 | scale_x_continuous(breaks = seq(10, 100, 10)) + 146 | scale_fill_manual(values = pop_colors, labels = c("Females", "Males")) + 147 | guides(fill = guide_legend(reverse = TRUE)) + 148 | labs(x = "Age", y = "Population in Millions", 149 | title = "Age Distribution of the U.S. Population, 1900-2019", 150 | subtitle = "Age is top-coded at 75 until 1939, at 85 until 1979, and at 100 since then", 151 | caption = "Kieran Healy / kieranhealy.org / Data: US Census Bureau.", 152 | fill = "") + 153 | theme(legend.position = "bottom", 154 | plot.title = element_text(size = rel(2), face = "bold"), 155 | strip.background = element_blank(), 156 | strip.text.x = element_blank()) + 157 | coord_flip() + 158 | facet_wrap(~ year, ncol = 5) 159 | 160 | 161 | 162 | ``` 163 | 164 | ## Source 165 | 166 | The data are sourced from the [US Census Bureau](http://census.gov), from the residential estimates available in various formats and spans at . In any year where multiple months were available, the July estimate was used. 167 | 168 | ## Similar Packages 169 | 170 | - Neal Grantham's [uspops](https://github.com/nsgrantham/uspops) contains _total_ annual population estimates from 1900 to 2018 as well as _state total_ annual estimates over the same period. 171 | 172 | 173 | ## Citation 174 | 175 | ```{r citation} 176 | citation("uscenpops") 177 | ``` 178 | 179 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | # uscenpops 7 | 8 | 9 | 10 | [![Travis build status](https://travis-ci.org/kjhealy/uscenpops.svg?branch=master)](https://travis-ci.org/kjhealy/uscenpops) 11 | 12 | 13 | This package provides a single dataset of censal and intercensal population estimates for the United States by year of age and sex, for every year from 1900 to 2019. 14 | 15 | ## Installation 16 | 17 | `uscenpops` is a data package for use in teaching. 18 | 19 | ### Install direct from GitHub 20 | 21 | You can install the beta version of uscenpops from [GitHub](https://github.com/kjhealy/uscenpops) with: 22 | 23 | 24 | ```r 25 | devtools::install_github("kjhealy/uscenpops") 26 | ``` 27 | 28 | ### Installation using `drat` 29 | 30 | While using `install_github()` works just fine, it would be nicer to be able to just type `install.packages("uscenpops")` or `update.packages("uscenpops")` in the ordinary way. We can do this using Dirk Eddelbuettel's [drat](http://eddelbuettel.github.io/drat/DratForPackageUsers.html) package. Drat provides a convenient way to make R aware of package repositories other than CRAN. 31 | 32 | First, install `drat`: 33 | 34 | 35 | ```r 36 | if (!require("drat")) { 37 | install.packages("drat") 38 | library("drat") 39 | } 40 | ``` 41 | 42 | Then use `drat` to tell R about the repository where `uscenpops` is hosted: 43 | 44 | 45 | ```r 46 | drat::addRepo("kjhealy") 47 | ``` 48 | 49 | You can now install `uscenpops`: 50 | 51 | 52 | ```r 53 | install.packages("uscenpops") 54 | ``` 55 | 56 | To ensure that the `uscenpops` repository is always available, you can add the following line to your `.Rprofile` or `.Rprofile.site` file: 57 | 58 | 59 | ```r 60 | drat::addRepo("kjhealy") 61 | ``` 62 | 63 | With that in place you'll be able to do `install.packages("uscenpops")` or `update.packages("uscenpops")` and have everything work as you'd expect. 64 | 65 | Note that the drat repository only contains data packages that are not on CRAN, so you will never be in danger of grabbing the wrong version of any other package. 66 | 67 | 68 | ## Loading the data 69 | 70 | The package works best with the [tidyverse](http://tidyverse.org/) libraries. 71 | 72 | 73 | ```r 74 | library(tidyverse) 75 | ``` 76 | 77 | Load the data: 78 | 79 | 80 | ```r 81 | library(uscenpops) 82 | ``` 83 | 84 | 85 | ```r 86 | uscenpops 87 | #> # A tibble: 10,520 x 5 88 | #> year age pop male female 89 | #> 90 | #> 1 1900 0 1811000 919000 892000 91 | #> 2 1900 1 1835000 928000 907000 92 | #> 3 1900 2 1846000 932000 914000 93 | #> 4 1900 3 1848000 932000 916000 94 | #> 5 1900 4 1841000 928000 913000 95 | #> 6 1900 5 1827000 921000 906000 96 | #> 7 1900 6 1806000 911000 895000 97 | #> 8 1900 7 1780000 899000 881000 98 | #> 9 1900 8 1750000 884000 866000 99 | #> 10 1900 9 1717000 868000 849000 100 | #> # … with 10,510 more rows 101 | ``` 102 | 103 | ## Example 104 | 105 | 106 | 107 | ```r 108 | 109 | library(dplyr) 110 | library(ggplot2) 111 | 112 | pop_pyr <- uscenpops %>% select(year, age, male, female) %>% 113 | pivot_longer(male:female, names_to = "group", values_to = "count") %>% 114 | group_by(year, group) %>% 115 | mutate(total = sum(count), 116 | pct = (count/total)*100, 117 | base = 0) 118 | 119 | pop_pyr 120 | #> # A tibble: 21,040 x 7 121 | #> # Groups: year, group [240] 122 | #> year age group count total pct base 123 | #> 124 | #> 1 1900 0 male 919000 38867000 2.36 0 125 | #> 2 1900 0 female 892000 37227000 2.40 0 126 | #> 3 1900 1 male 928000 38867000 2.39 0 127 | #> 4 1900 1 female 907000 37227000 2.44 0 128 | #> 5 1900 2 male 932000 38867000 2.40 0 129 | #> 6 1900 2 female 914000 37227000 2.46 0 130 | #> 7 1900 3 male 932000 38867000 2.40 0 131 | #> 8 1900 3 female 916000 37227000 2.46 0 132 | #> 9 1900 4 male 928000 38867000 2.39 0 133 | #> 10 1900 4 female 913000 37227000 2.45 0 134 | #> # … with 21,030 more rows 135 | ``` 136 | 137 | 138 | ```r 139 | 140 | ## Axis labels 141 | mbreaks <- c("1M", "2M", "3M") 142 | 143 | ## colors 144 | pop_colors <- c("#E69F00", "#0072B2") 145 | 146 | ## In-plot year labels 147 | dat_text <- data.frame( 148 | label = c(seq(1900, 2015, 5), 2019), 149 | year = c(seq(1900, 2015, 5), 2019), 150 | age = rep(95, 25), 151 | count = rep(-2.75e6, 25) 152 | ) 153 | 154 | pop_pyr$count[pop_pyr$group == "male"] <- -pop_pyr$count[pop_pyr$group == "male"] 155 | 156 | p <- pop_pyr %>% 157 | filter(year %in% c(seq(1900, 2015, 5), 2019)) %>% 158 | ggplot(mapping = aes(x = age, ymin = base, 159 | ymax = count, fill = group)) 160 | 161 | p + geom_ribbon(alpha = 0.9, color = "black", size = 0.1) + 162 | geom_label(data = dat_text, 163 | mapping = aes(x = age, y = count, 164 | label = label), inherit.aes = FALSE, 165 | vjust = "inward", hjust = "inward", 166 | fontface = "bold", 167 | color = "gray40", 168 | fill = "gray95") + 169 | scale_y_continuous(labels = c(rev(mbreaks), "0", mbreaks), 170 | breaks = seq(-3e6, 3e6, 1e6), 171 | limits = c(-3e6, 3e6)) + 172 | scale_x_continuous(breaks = seq(10, 100, 10)) + 173 | scale_fill_manual(values = pop_colors, labels = c("Females", "Males")) + 174 | guides(fill = guide_legend(reverse = TRUE)) + 175 | labs(x = "Age", y = "Population in Millions", 176 | title = "Age Distribution of the U.S. Population, 1900-2019", 177 | subtitle = "Age is top-coded at 75 until 1939, at 85 until 1979, and at 100 since then", 178 | caption = "Kieran Healy / kieranhealy.org / Data: US Census Bureau.", 179 | fill = "") + 180 | theme(legend.position = "bottom", 181 | plot.title = element_text(size = rel(2), face = "bold"), 182 | strip.background = element_blank(), 183 | strip.text.x = element_blank()) + 184 | coord_flip() + 185 | facet_wrap(~ year, ncol = 5) 186 | ``` 187 | 188 | plot of chunk example-2 189 | 190 | ## Source 191 | 192 | The data are sourced from the [US Census Bureau](http://census.gov), from the residential estimates available in various formats and spans at . In any year where multiple months were available, the July estimate was used. 193 | 194 | ## Similar Packages 195 | 196 | - Neal Grantham's [uspops](https://github.com/nsgrantham/uspops) contains _total_ annual population estimates from 1900 to 2018 as well as _state total_ annual estimates over the same period. 197 | 198 | 199 | ## Citation 200 | 201 | 202 | ```r 203 | citation("uscenpops") 204 | #> 205 | #> To cite the package 'uscenpops' in publications use: 206 | #> 207 | #> Healy K (2020). _uscenpops: US Census Counts_. R package version 0.1.0, http://kjhealy.github.io/uscenpops>. 209 | #> 210 | #> A BibTeX entry for LaTeX users is 211 | #> 212 | #> @Manual{uscenpops, 213 | #> title = {uscenpops: US Census Counts}, 214 | #> date = {2020}, 215 | #> author = {Kieran Healy}, 216 | #> year = {2020}, 217 | #> note = {R package version 0.1.0}, 218 | #> url = {http://kjhealy.github.io/uscenpops}, 219 | #> } 220 | ``` 221 | 222 | --------------------------------------------------------------------------------