├── .github ├── .gitignore └── workflows │ └── R-CMD-check.yaml ├── LICENSE ├── .gitattributes ├── NAMESPACE ├── .Rbuildignore ├── cpsR.Rproj ├── DESCRIPTION ├── .gitignore ├── NEWS.md ├── LICENSE.md ├── R ├── utils.R └── get_data.R ├── man ├── get_asec.Rd └── get_basic.Rd ├── README.Rmd └── README.md /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2021-2023 2 | COPYRIGHT HOLDER: cpsR authors 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(get_asec) 4 | export(get_basic) 5 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^cpsR\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^LICENSE\.md$ 4 | ^\.github$ 5 | ^README\.Rmd$ 6 | ^\.lintr$ 7 | ^CRAN-RELEASE$ 8 | -------------------------------------------------------------------------------- /cpsR.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: knitr 13 | LaTeX: pdfLaTeX 14 | 15 | AutoAppendNewline: Yes 16 | StripTrailingWhitespace: Yes 17 | LineEndingConversion: Posix 18 | 19 | BuildType: Package 20 | PackageUseDevtools: Yes 21 | PackageInstallArgs: --no-multiarch --with-keep.source 22 | PackageRoxygenize: rd,collate,namespace 23 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: cpsR 2 | Title: Load CPS Microdata into R Using the 'Census Bureau Data' API 3 | Version: 1.0.0 4 | Authors@R: 5 | person(given = "Matt", 6 | family = "Saenz", 7 | role = c("aut", "cre"), 8 | email = "mattsaenz165@gmail.com") 9 | Description: Load Current Population Survey (CPS) microdata into R using the 10 | 'Census Bureau Data' API 11 | (), including basic 12 | monthly CPS and CPS ASEC microdata. 13 | URL: https://github.com/matt-saenz/cpsR 14 | BugReports: https://github.com/matt-saenz/cpsR/issues 15 | License: MIT + file LICENSE 16 | Encoding: UTF-8 17 | RoxygenNote: 7.2.3 18 | Imports: 19 | httr, 20 | jsonlite, 21 | tibble 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # History files 2 | .Rhistory 3 | .Rapp.history 4 | 5 | # Session Data files 6 | .RData 7 | 8 | # Example code in package build process 9 | *-Ex.R 10 | 11 | # Output files from R CMD build 12 | /*.tar.gz 13 | 14 | # Output files from R CMD check 15 | /*.Rcheck/ 16 | 17 | # RStudio files 18 | .Rproj.user/ 19 | 20 | # produced vignettes 21 | vignettes/*.html 22 | vignettes/*.pdf 23 | 24 | # OAuth2 token, see https://github.com/hadley/httr/releases/tag/v0.3 25 | .httr-oauth 26 | 27 | # knitr and R markdown default cache directories 28 | /*_cache/ 29 | /cache/ 30 | 31 | # Temporary files created by R markdown 32 | *.utf8.md 33 | *.knit.md 34 | 35 | # Shiny token, see https://shiny.rstudio.com/articles/shinyapps.html 36 | rsconnect/ 37 | .Rproj.user 38 | 39 | # Mac 40 | .DS_Store 41 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # cpsR 1.0.0 2 | 3 | * `get_asec()` and `get_basic()` no longer set an upper limit on supported years. 4 | 5 | # cpsR 0.7.0 6 | 7 | * `get_asec()` now supports CPS ASEC microdata for 1992 to 2013. 8 | * `get_basic()` now supports basic monthly CPS microdata for 2023. 9 | 10 | # cpsR 0.6.0 11 | 12 | * `get_asec()` now supports CPS ASEC microdata for 2022. 13 | * `get_basic()` now supports basic monthly CPS microdata for 1989 to 1993. 14 | 15 | # cpsR 0.5.0 16 | 17 | * `get_basic()` now supports basic monthly CPS microdata for 2022. 18 | 19 | # cpsR 0.4.5 20 | 21 | * Documentation updates and improvements. 22 | 23 | # cpsR 0.4.4 24 | 25 | * Simplified examples based on feedback from CRAN. 26 | 27 | # cpsR 0.4.3 28 | 29 | * Tweaked `DESCRIPTION` based on feedback from CRAN. 30 | 31 | # cpsR 0.4.2 32 | 33 | * Added a `NEWS.md` file to track changes to the package. 34 | * Added examples to resolve CRAN warning. 35 | * Removed `LazyData` from `DESCRIPTION` to resolve CRAN note. 36 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2021-2023 cpsR authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | pull_request: 7 | branches: [main, master] 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | R-CMD-check: 13 | runs-on: ${{ matrix.config.os }} 14 | 15 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 16 | 17 | strategy: 18 | fail-fast: false 19 | matrix: 20 | config: 21 | - {os: macos-latest, r: 'release'} 22 | - {os: windows-latest, r: 'release'} 23 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 24 | - {os: ubuntu-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'oldrel-1'} 26 | 27 | env: 28 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 29 | R_KEEP_PKG_SOURCE: yes 30 | 31 | steps: 32 | - uses: actions/checkout@v3 33 | 34 | - uses: r-lib/actions/setup-pandoc@v2 35 | 36 | - uses: r-lib/actions/setup-r@v2 37 | with: 38 | r-version: ${{ matrix.config.r }} 39 | http-user-agent: ${{ matrix.config.http-user-agent }} 40 | use-public-rspm: true 41 | 42 | - uses: r-lib/actions/setup-r-dependencies@v2 43 | with: 44 | extra-packages: any::rcmdcheck 45 | needs: check 46 | 47 | - uses: r-lib/actions/check-r-package@v2 48 | with: 49 | upload-snapshots: true 50 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | is_number <- function(x) is.numeric(x) && length(x) == 1 2 | is_string <- function(x) is.character(x) && length(x) == 1 3 | `%!in%` <- function(x, table) match(x, table, nomatch = 0) == 0 4 | 5 | 6 | get_key <- function() { 7 | key <- Sys.getenv("CENSUS_API_KEY") 8 | 9 | if (key == "") { 10 | stop( 11 | "Census API key not found, supply with `key` argument or env var `CENSUS_API_KEY`", 12 | call. = FALSE 13 | ) 14 | } 15 | 16 | key 17 | } 18 | 19 | 20 | check_key <- function(key) { 21 | if (!is_string(key) || key == "") { 22 | stop("`key` must be a non-empty string", call. = FALSE) 23 | } 24 | } 25 | 26 | 27 | check_vars <- function(vars) { 28 | if (!is.character(vars)) { 29 | stop("`vars` must be a character vector", call. = FALSE) 30 | } 31 | 32 | if (any(grepl(pattern = "[^A-Za-z0-9_]", x = vars))) { 33 | stop( 34 | "Elements of `vars` must only contain letters, digits, and underscores", 35 | call. = FALSE 36 | ) 37 | } 38 | 39 | if (any(duplicated(tolower(vars)))) { 40 | stop("`vars` must not contain any duplicate elements", call. = FALSE) 41 | } 42 | } 43 | 44 | 45 | check_year <- function(year, min_year) { 46 | if (!is_number(year)) { 47 | stop("`year` must be a number", call. = FALSE) 48 | } 49 | 50 | if (year < min_year) { 51 | stop( 52 | "Invalid `year`, years ", min_year, " and on are currently supported", 53 | call. = FALSE 54 | ) 55 | } 56 | } 57 | 58 | 59 | check_month <- function(month) { 60 | if (!is_number(month) || month %!in% 1:12) { 61 | stop("`month` must be a number ranging from 1 to 12", call. = FALSE) 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /man/get_asec.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data.R 3 | \name{get_asec} 4 | \alias{get_asec} 5 | \title{Load CPS ASEC microdata} 6 | \usage{ 7 | get_asec( 8 | year, 9 | vars, 10 | key = get_key(), 11 | show_url = FALSE, 12 | tibble = TRUE, 13 | convert = TRUE 14 | ) 15 | } 16 | \arguments{ 17 | \item{year}{Year of data to retrieve. Years 1992 and on are currently 18 | supported.} 19 | 20 | \item{vars}{Character vector of variables to retrieve, where each vector 21 | element corresponds to the name of a single variable. Variable names can 22 | be given in uppercase or lowercase but are always made lowercase in the 23 | returned data.} 24 | 25 | \item{key}{\href{https://api.census.gov/data/key_signup.html}{Census API key}. 26 | Defaults to environment variable \code{CENSUS_API_KEY}. See the 27 | \href{https://github.com/matt-saenz/cpsR#census-api-key}{README} for info 28 | on how (and why) to set up env var \code{CENSUS_API_KEY}.} 29 | 30 | \item{show_url}{If \code{TRUE}, show the URL the request was sent to 31 | (with \code{key} suppressed). Defaults to \code{FALSE}.} 32 | 33 | \item{tibble}{If \code{TRUE} (default), return data as a 34 | \href{https://tibble.tidyverse.org}{tibble}. If \code{FALSE}, return data 35 | as a base data frame.} 36 | 37 | \item{convert}{If \code{TRUE} (default), run 38 | \code{\link[utils:type.convert]{type.convert()}} with \code{as.is = TRUE} 39 | on the data returned by the Census API. If \code{FALSE}, all columns in 40 | the returned data will be character vectors (exactly as returned by the 41 | Census API).} 42 | } 43 | \value{ 44 | A \href{https://tibble.tidyverse.org}{tibble} or base data frame. 45 | } 46 | \description{ 47 | \code{get_asec()} loads 48 | \href{https://www.census.gov/data/datasets/time-series/demo/cps/cps-asec.html}{CPS ASEC} 49 | microdata using the Census API. 50 | } 51 | \examples{ 52 | \dontrun{ 53 | asec21 <- get_asec(2021, vars = c("marsupwt", "spm_poor")) 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /man/get_basic.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_data.R 3 | \name{get_basic} 4 | \alias{get_basic} 5 | \title{Load basic monthly CPS microdata} 6 | \usage{ 7 | get_basic( 8 | year, 9 | month, 10 | vars, 11 | key = get_key(), 12 | show_url = FALSE, 13 | tibble = TRUE, 14 | convert = TRUE 15 | ) 16 | } 17 | \arguments{ 18 | \item{year}{Year of data to retrieve. Years 1989 and on are currently 19 | supported.} 20 | 21 | \item{month}{Month of data to retrieve (specified as a number).} 22 | 23 | \item{vars}{Character vector of variables to retrieve, where each vector 24 | element corresponds to the name of a single variable. Variable names can 25 | be given in uppercase or lowercase but are always made lowercase in the 26 | returned data.} 27 | 28 | \item{key}{\href{https://api.census.gov/data/key_signup.html}{Census API key}. 29 | Defaults to environment variable \code{CENSUS_API_KEY}. See the 30 | \href{https://github.com/matt-saenz/cpsR#census-api-key}{README} for info 31 | on how (and why) to set up env var \code{CENSUS_API_KEY}.} 32 | 33 | \item{show_url}{If \code{TRUE}, show the URL the request was sent to 34 | (with \code{key} suppressed). Defaults to \code{FALSE}.} 35 | 36 | \item{tibble}{If \code{TRUE} (default), return data as a 37 | \href{https://tibble.tidyverse.org}{tibble}. If \code{FALSE}, return data 38 | as a base data frame.} 39 | 40 | \item{convert}{If \code{TRUE} (default), run 41 | \code{\link[utils:type.convert]{type.convert()}} with \code{as.is = TRUE} 42 | on the data returned by the Census API. If \code{FALSE}, all columns in 43 | the returned data will be character vectors (exactly as returned by the 44 | Census API).} 45 | } 46 | \value{ 47 | A \href{https://tibble.tidyverse.org}{tibble} or base data frame. 48 | } 49 | \description{ 50 | \code{get_basic()} loads 51 | \href{https://www.census.gov/data/datasets/time-series/demo/cps/cps-basic.html}{basic monthly CPS} 52 | microdata using the Census API. 53 | } 54 | \examples{ 55 | \dontrun{ 56 | sep21 <- get_basic( 57 | year = 2021, 58 | month = 9, 59 | vars = c("pwcmpwgt", "prpertyp", "prtage", "pemlr") 60 | ) 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # cpsR 17 | 18 | 19 | 20 | [![CRAN status](https://www.r-pkg.org/badges/version/cpsR)](https://CRAN.R-project.org/package=cpsR) 21 | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) 22 | [![CRAN downloads](https://cranlogs.r-pkg.org/badges/grand-total/cpsR)](https://cran.r-project.org/package=cpsR) 23 | [![R-CMD-check](https://github.com/matt-saenz/cpsR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/matt-saenz/cpsR/actions/workflows/R-CMD-check.yaml) 24 | 25 | 26 | ## Overview 27 | 28 | Load [Current Population Survey (CPS)](https://www.census.gov/programs-surveys/cps/about.html) microdata into R using the Census Bureau Data API, including [basic monthly CPS](https://www.census.gov/data/datasets/time-series/demo/cps/cps-basic.html) and [CPS ASEC](https://www.census.gov/data/datasets/time-series/demo/cps/cps-asec.html) microdata. 29 | 30 | Note: This product uses the Census Bureau Data API but is not endorsed or certified by the Census Bureau. 31 | 32 | For a Python version of this package, check out [PyCPS](https://github.com/matt-saenz/PyCPS). 33 | 34 | ## Installation 35 | 36 | To install cpsR, run the following code: 37 | 38 | ``` r 39 | install.packages("cpsR") 40 | ``` 41 | 42 | To install the development version of cpsR, run the following code: 43 | 44 | ``` r 45 | # install.packages("devtools") 46 | devtools::install_github("matt-saenz/cpsR") 47 | ``` 48 | 49 | ## Census API key 50 | 51 | In order to use cpsR functions, you must supply a [Census API key](https://api.census.gov/data/key_signup.html) in one of two ways: 52 | 53 | 1. Using the `key` argument (manually) 54 | 2. Using environment variable `CENSUS_API_KEY` (automatically) 55 | 56 | Using environment variable (or env var, for short) `CENSUS_API_KEY` is strongly recommended for two reasons: 57 | 58 | 1. Saves you from having to copy-paste your key around 59 | 2. Allows you to avoid including your key in scripts 60 | 61 | It is important to avoid including your key in scripts if you plan to share your code with others (like in the [example](#example) below) since you should keep your key secret. 62 | 63 | You can set up env var `CENSUS_API_KEY` in two steps: 64 | 65 | First, open your `.Renviron` file. You can do so by running: 66 | 67 | ``` r 68 | # install.packages("usethis") 69 | usethis::edit_r_environ() 70 | ``` 71 | 72 | Second, add your Census API key to your `.Renviron` file like so: 73 | 74 | ``` 75 | CENSUS_API_KEY='your_key_here' 76 | ``` 77 | 78 | This enables cpsR functions to automatically look up your key by running: 79 | 80 | ``` r 81 | Sys.getenv("CENSUS_API_KEY") 82 | ``` 83 | 84 | ## Example 85 | 86 | ```{r example, message=FALSE} 87 | library(cpsR) 88 | library(dplyr) 89 | library(purrr) 90 | 91 | 92 | # Simple use of the basic monthly CPS 93 | 94 | sep21 <- get_basic( 95 | year = 2021, 96 | month = 9, 97 | vars = c("prpertyp", "prtage", "pemlr", "pwcmpwgt") 98 | ) 99 | 100 | sep21 101 | 102 | sep21 %>% 103 | filter(prpertyp == 2 & prtage >= 16) %>% 104 | summarize( 105 | pop16plus = sum(pwcmpwgt), 106 | employed = sum(pwcmpwgt[pemlr %in% 1:2]) 107 | ) %>% 108 | mutate(epop_ratio = employed / pop16plus) 109 | 110 | 111 | # Pulling multiple years of CPS ASEC microdata 112 | 113 | asec <- map_dfr(2020:2021, get_asec, vars = c("h_year", "marsupwt")) 114 | 115 | count(asec, h_year, wt = marsupwt) 116 | ``` 117 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # cpsR 5 | 6 | 7 | 8 | [![CRAN 9 | status](https://www.r-pkg.org/badges/version/cpsR)](https://CRAN.R-project.org/package=cpsR) 10 | [![Project Status: Active – The project has reached a stable, usable 11 | state and is being actively 12 | developed.](https://www.repostatus.org/badges/latest/active.svg)](https://www.repostatus.org/#active) 13 | [![CRAN 14 | downloads](https://cranlogs.r-pkg.org/badges/grand-total/cpsR)](https://cran.r-project.org/package=cpsR) 15 | [![R-CMD-check](https://github.com/matt-saenz/cpsR/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/matt-saenz/cpsR/actions/workflows/R-CMD-check.yaml) 16 | 17 | 18 | ## Overview 19 | 20 | Load [Current Population Survey 21 | (CPS)](https://www.census.gov/programs-surveys/cps/about.html) microdata 22 | into R using the Census Bureau Data API, including [basic monthly 23 | CPS](https://www.census.gov/data/datasets/time-series/demo/cps/cps-basic.html) 24 | and [CPS 25 | ASEC](https://www.census.gov/data/datasets/time-series/demo/cps/cps-asec.html) 26 | microdata. 27 | 28 | Note: This product uses the Census Bureau Data API but is not endorsed 29 | or certified by the Census Bureau. 30 | 31 | For a Python version of this package, check out 32 | [PyCPS](https://github.com/matt-saenz/PyCPS). 33 | 34 | ## Installation 35 | 36 | To install cpsR, run the following code: 37 | 38 | ``` r 39 | install.packages("cpsR") 40 | ``` 41 | 42 | To install the development version of cpsR, run the following code: 43 | 44 | ``` r 45 | # install.packages("devtools") 46 | devtools::install_github("matt-saenz/cpsR") 47 | ``` 48 | 49 | ## Census API key 50 | 51 | In order to use cpsR functions, you must supply a [Census API 52 | key](https://api.census.gov/data/key_signup.html) in one of two ways: 53 | 54 | 1. Using the `key` argument (manually) 55 | 2. Using environment variable `CENSUS_API_KEY` (automatically) 56 | 57 | Using environment variable (or env var, for short) `CENSUS_API_KEY` is 58 | strongly recommended for two reasons: 59 | 60 | 1. Saves you from having to copy-paste your key around 61 | 2. Allows you to avoid including your key in scripts 62 | 63 | It is important to avoid including your key in scripts if you plan to 64 | share your code with others (like in the [example](#example) below) 65 | since you should keep your key secret. 66 | 67 | You can set up env var `CENSUS_API_KEY` in two steps: 68 | 69 | First, open your `.Renviron` file. You can do so by running: 70 | 71 | ``` r 72 | # install.packages("usethis") 73 | usethis::edit_r_environ() 74 | ``` 75 | 76 | Second, add your Census API key to your `.Renviron` file like so: 77 | 78 | CENSUS_API_KEY='your_key_here' 79 | 80 | This enables cpsR functions to automatically look up your key by 81 | running: 82 | 83 | ``` r 84 | Sys.getenv("CENSUS_API_KEY") 85 | ``` 86 | 87 | ## Example 88 | 89 | ``` r 90 | library(cpsR) 91 | library(dplyr) 92 | library(purrr) 93 | 94 | 95 | # Simple use of the basic monthly CPS 96 | 97 | sep21 <- get_basic( 98 | year = 2021, 99 | month = 9, 100 | vars = c("prpertyp", "prtage", "pemlr", "pwcmpwgt") 101 | ) 102 | 103 | sep21 104 | #> # A tibble: 103,858 × 4 105 | #> prpertyp prtage pemlr pwcmpwgt 106 | #> 107 | #> 1 2 80 5 1361. 108 | #> 2 2 85 5 1411. 109 | #> 3 2 80 5 4619. 110 | #> 4 2 80 5 4587. 111 | #> 5 2 42 1 3677. 112 | #> 6 2 42 1 3645. 113 | #> 7 1 9 -1 0 114 | #> 8 2 41 1 3652. 115 | #> 9 2 32 7 4117. 116 | #> 10 2 67 1 2479. 117 | #> # ℹ 103,848 more rows 118 | 119 | sep21 %>% 120 | filter(prpertyp == 2 & prtage >= 16) %>% 121 | summarize( 122 | pop16plus = sum(pwcmpwgt), 123 | employed = sum(pwcmpwgt[pemlr %in% 1:2]) 124 | ) %>% 125 | mutate(epop_ratio = employed / pop16plus) 126 | #> # A tibble: 1 × 3 127 | #> pop16plus employed epop_ratio 128 | #> 129 | #> 1 261765646. 154025931. 0.588 130 | 131 | 132 | # Pulling multiple years of CPS ASEC microdata 133 | 134 | asec <- map_dfr(2020:2021, get_asec, vars = c("h_year", "marsupwt")) 135 | 136 | count(asec, h_year, wt = marsupwt) 137 | #> # A tibble: 2 × 2 138 | #> h_year n 139 | #> 140 | #> 1 2020 325268182. 141 | #> 2 2021 326195440. 142 | ``` 143 | -------------------------------------------------------------------------------- /R/get_data.R: -------------------------------------------------------------------------------- 1 | #' Load CPS ASEC microdata 2 | #' 3 | #' \code{get_asec()} loads 4 | #' \href{https://www.census.gov/data/datasets/time-series/demo/cps/cps-asec.html}{CPS ASEC} 5 | #' microdata using the Census API. 6 | #' 7 | #' @param year Year of data to retrieve. Years 1992 and on are currently 8 | #' supported. 9 | #' @param vars Character vector of variables to retrieve, where each vector 10 | #' element corresponds to the name of a single variable. Variable names can 11 | #' be given in uppercase or lowercase but are always made lowercase in the 12 | #' returned data. 13 | #' @param key \href{https://api.census.gov/data/key_signup.html}{Census API key}. 14 | #' Defaults to environment variable \code{CENSUS_API_KEY}. See the 15 | #' \href{https://github.com/matt-saenz/cpsR#census-api-key}{README} for info 16 | #' on how (and why) to set up env var \code{CENSUS_API_KEY}. 17 | #' @param show_url If \code{TRUE}, show the URL the request was sent to 18 | #' (with \code{key} suppressed). Defaults to \code{FALSE}. 19 | #' @param tibble If \code{TRUE} (default), return data as a 20 | #' \href{https://tibble.tidyverse.org}{tibble}. If \code{FALSE}, return data 21 | #' as a base data frame. 22 | #' @param convert If \code{TRUE} (default), run 23 | #' \code{\link[utils:type.convert]{type.convert()}} with \code{as.is = TRUE} 24 | #' on the data returned by the Census API. If \code{FALSE}, all columns in 25 | #' the returned data will be character vectors (exactly as returned by the 26 | #' Census API). 27 | #' @return A \href{https://tibble.tidyverse.org}{tibble} or base data frame. 28 | #' @examples 29 | #' \dontrun{ 30 | #' asec21 <- get_asec(2021, vars = c("marsupwt", "spm_poor")) 31 | #' } 32 | #' 33 | #' @export 34 | get_asec <- function(year, vars, key = get_key(), 35 | show_url = FALSE, tibble = TRUE, convert = TRUE) { 36 | check_key(key) 37 | 38 | check_year(year, min_year = 1992) 39 | 40 | month <- 3 # Month of CPS ASEC is always March 41 | 42 | url <- make_url("asec", year, month, vars, key) 43 | 44 | message("Getting CPS ASEC microdata for ", year) 45 | 46 | df <- get_data(url, show_url, tibble, convert) 47 | 48 | df 49 | } 50 | 51 | 52 | #' Load basic monthly CPS microdata 53 | #' 54 | #' \code{get_basic()} loads 55 | #' \href{https://www.census.gov/data/datasets/time-series/demo/cps/cps-basic.html}{basic monthly CPS} 56 | #' microdata using the Census API. 57 | #' 58 | #' @param year Year of data to retrieve. Years 1989 and on are currently 59 | #' supported. 60 | #' @param month Month of data to retrieve (specified as a number). 61 | #' @inherit get_asec params return 62 | #' @examples 63 | #' \dontrun{ 64 | #' sep21 <- get_basic( 65 | #' year = 2021, 66 | #' month = 9, 67 | #' vars = c("pwcmpwgt", "prpertyp", "prtage", "pemlr") 68 | #' ) 69 | #' } 70 | #' 71 | #' @export 72 | get_basic <- function(year, month, vars, key = get_key(), 73 | show_url = FALSE, tibble = TRUE, convert = TRUE) { 74 | check_key(key) 75 | 76 | check_year(year, min_year = 1989) 77 | 78 | url <- make_url("basic", year, month, vars, key) 79 | 80 | message(paste("Getting basic monthly CPS microdata for", month.name[month], year)) 81 | 82 | df <- get_data(url, show_url, tibble, convert) 83 | 84 | df 85 | } 86 | 87 | 88 | make_url <- function(dataset, year, month, vars, key) { 89 | check_month(month) 90 | check_vars(vars) 91 | 92 | month_abb <- tolower(month.abb[month]) 93 | collapsed_vars <- toupper(paste(vars, collapse = ",")) 94 | 95 | url <- httr::modify_url( 96 | url = "https://api.census.gov", 97 | path = paste("data", year, "cps", dataset, month_abb, sep = "/"), 98 | query = list(get = collapsed_vars, key = key) 99 | ) 100 | 101 | url 102 | } 103 | 104 | 105 | get_data <- function(url, show_url, tibble, convert) { 106 | if (show_url) { 107 | message("URL: ", sub(pattern = "&key=.*", replacement = "", x = url)) 108 | } 109 | 110 | ua <- httr::user_agent("https://github.com/matt-saenz/cpsR") 111 | resp <- httr::GET(url, ua) 112 | 113 | if (resp$status_code != 200) { 114 | status <- httr::http_status(resp) 115 | 116 | stop( 117 | "Census API request failed [", resp$status_code, "]: ", status$reason, 118 | call. = FALSE 119 | ) 120 | } 121 | 122 | if (httr::http_type(resp) != "application/json") { 123 | stop("Census API did not return JSON", call. = FALSE) 124 | } 125 | 126 | mat <- jsonlite::fromJSON(httr::content(resp, as = "text")) 127 | 128 | if (!is.matrix(mat) || !is.character(mat)) { 129 | stop("Census API data not parsed as expected", call. = FALSE) 130 | } 131 | 132 | df <- build_df(mat, tibble, convert) 133 | 134 | df 135 | } 136 | 137 | 138 | build_df <- function(mat, tibble, convert) { 139 | col_names <- mat[1, , drop = TRUE] # Character vector of column names 140 | cols <- mat[-1, , drop = FALSE] # Character matrix of columns 141 | 142 | df <- as.data.frame(cols, stringsAsFactors = FALSE) # All columns are character vectors 143 | names(df) <- tolower(col_names) # Column names are always made lowercase 144 | 145 | if (convert) { 146 | df <- utils::type.convert(df, as.is = TRUE) 147 | } 148 | 149 | if (tibble) { 150 | df <- tibble::as_tibble(df) 151 | } 152 | 153 | df 154 | } 155 | --------------------------------------------------------------------------------