├── .Rbuildignore
├── .github
└── workflows
│ └── codeql.yml
├── .gitignore
├── DESCRIPTION
├── LICENSE
├── NAMESPACE
├── NEWS.md
├── R
├── data.R
├── expand_json.R
├── hb_shrink.R
├── helpers.R
├── link_poi_naics.R
├── patterns_lookup.R
├── read_core.R
├── read_distancing.R
├── read_many_files.R
├── read_patterns.R
├── read_shop.R
├── safegraph_api.R
├── safegraph_aws.R
├── sample_size_adjust.R
├── scale_to_date.R
├── utils-data-table.R
├── utils-pipe.R
└── volume_over_time.R
├── README.md
├── SafeGraphR.Rproj
├── data
├── canada_cd_pop.rda
├── canada_cd_types.rda
├── cbg_pop.rda
├── county_pop.rda
├── distancing.rda
├── fips_to_names.rda
├── naics_2.rda
├── naics_4.rda
├── naics_codes.rda
├── norm.rda
├── panel.rda
├── pat_NY_NJ.rda
├── pat_naics.rda
└── state_info.rda
├── docs
├── 404.html
├── LICENSE-text.html
├── articles
│ ├── Automatic_Traffic-over-Time_Processing.html
│ ├── Automatic_Traffic-over-Time_Processing_files
│ │ ├── figure-html
│ │ │ └── unnamed-chunk-6-1.png
│ │ ├── header-attrs-2.6
│ │ │ └── header-attrs.js
│ │ └── header-attrs-2.9
│ │ │ └── header-attrs.js
│ ├── SafeGraphR.html
│ ├── SafeGraphR_files
│ │ ├── accessible-code-block-0.0.1
│ │ │ └── empty-anchor.js
│ │ ├── anchor-sections-1.0
│ │ │ ├── anchor-sections.css
│ │ │ └── anchor-sections.js
│ │ ├── header-attrs-2.5
│ │ │ └── header-attrs.js
│ │ ├── header-attrs-2.6
│ │ │ └── header-attrs.js
│ │ └── header-attrs-2.9
│ │ │ └── header-attrs.js
│ ├── distancing_vignette.html
│ ├── distancing_vignette_files
│ │ ├── accessible-code-block-0.0.1
│ │ │ └── empty-anchor.js
│ │ ├── anchor-sections-1.0
│ │ │ ├── anchor-sections.css
│ │ │ └── anchor-sections.js
│ │ ├── figure-html
│ │ │ └── unnamed-chunk-8-1.png
│ │ ├── header-attrs-2.5
│ │ │ └── header-attrs.js
│ │ ├── header-attrs-2.6
│ │ │ └── header-attrs.js
│ │ └── header-attrs-2.9
│ │ │ └── header-attrs.js
│ ├── index.html
│ ├── patterns_vignette.html
│ └── patterns_vignette_files
│ │ ├── accessible-code-block-0.0.1
│ │ └── empty-anchor.js
│ │ ├── anchor-sections-1.0
│ │ ├── anchor-sections.css
│ │ └── anchor-sections.js
│ │ ├── figure-html
│ │ ├── unnamed-chunk-21-1.png
│ │ ├── unnamed-chunk-22-1.png
│ │ └── unnamed-chunk-23-1.png
│ │ ├── header-attrs-2.5
│ │ └── header-attrs.js
│ │ ├── header-attrs-2.6
│ │ └── header-attrs.js
│ │ └── header-attrs-2.9
│ │ └── header-attrs.js
├── authors.html
├── bootstrap-toc.css
├── bootstrap-toc.js
├── docsearch.css
├── docsearch.js
├── index.html
├── link.svg
├── news
│ └── index.html
├── pkgdown.css
├── pkgdown.js
├── pkgdown.yml
├── reference
│ ├── Rplot001.png
│ ├── canada_cd_pop.html
│ ├── canada_cd_types.html
│ ├── cbg_pop.html
│ ├── county_pop.html
│ ├── distancing.html
│ ├── expand_cat_json.html
│ ├── expand_integer_json.html
│ ├── expand_open_hours.html
│ ├── find_date.html
│ ├── fips_from_cbg.html
│ ├── fips_to_names.html
│ ├── graph_template-1.png
│ ├── graph_template.html
│ ├── growth_over_time.html
│ ├── hb_shrink.html
│ ├── index.html
│ ├── link_poi_naics.html
│ ├── ma.html
│ ├── naics_2.html
│ ├── naics_4.html
│ ├── naics_codes.html
│ ├── norm.html
│ ├── panel.html
│ ├── pat_naics.html
│ ├── patterns_lookup.html
│ ├── pipe.html
│ ├── processing_template.html
│ ├── rbind_by_list_pos.html
│ ├── read_core.html
│ ├── read_distancing.html
│ ├── read_many_csvs.html
│ ├── read_many_patterns.html
│ ├── read_many_shop.html
│ ├── read_patterns.html
│ ├── read_shop.html
│ ├── safegraph_api.html
│ ├── safegraph_aws.html
│ ├── sample_size_adjust.html
│ ├── scale_to_date.html
│ ├── scale_yoy.html
│ └── state_info.html
└── sitemap.xml
├── man
├── canada_cd_pop.Rd
├── canada_cd_types.Rd
├── cbg_pop.Rd
├── county_pop.Rd
├── distancing.Rd
├── expand_cat_json.Rd
├── expand_integer_json.Rd
├── expand_open_hours.Rd
├── find_date.Rd
├── fips_from_cbg.Rd
├── fips_to_names.Rd
├── graph_template.Rd
├── growth_over_time.Rd
├── hb_shrink.Rd
├── link_poi_naics.Rd
├── ma.Rd
├── naics_2.Rd
├── naics_4.Rd
├── naics_codes.Rd
├── norm.Rd
├── panel.Rd
├── pat_naics.Rd
├── patterns_lookup.Rd
├── pipe.Rd
├── processing_template.Rd
├── rbind_by_list_pos.Rd
├── read_core.Rd
├── read_distancing.Rd
├── read_many_csvs.Rd
├── read_many_patterns.Rd
├── read_many_shop.Rd
├── read_patterns.Rd
├── read_shop.Rd
├── safegraph_api.Rd
├── safegraph_aws.Rd
├── sample_size_adjust.Rd
├── scale_to_date.Rd
├── scale_yoy.Rd
└── state_info.Rd
└── vignettes
├── .gitignore
├── Automatic_Traffic-over-Time_Processing.Rmd
├── SafeGraphR.Rmd
├── distancing_vignette.Rmd
└── patterns_vignette.Rmd
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 |
--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | # For most projects, this workflow file will not need changing; you simply need
2 | # to commit it to your repository.
3 | #
4 | # You may wish to alter this file to override the set of languages analyzed,
5 | # or to provide custom queries or build logic.
6 | #
7 | # ******** NOTE ********
8 | # We have attempted to detect the languages in your repository. Please check
9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 |
14 | on:
15 | push:
16 | branches: [ "master" ]
17 | pull_request:
18 | # The branches below must be a subset of the branches above
19 | branches: [ "master" ]
20 | schedule:
21 | - cron: '41 12 * * 6'
22 |
23 | jobs:
24 | analyze:
25 | name: Analyze
26 | runs-on: ubuntu-latest
27 | permissions:
28 | actions: read
29 | contents: read
30 | security-events: write
31 |
32 | strategy:
33 | fail-fast: false
34 | matrix:
35 | language: [ ]
36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
38 |
39 | steps:
40 | - name: Checkout repository
41 | uses: actions/checkout@v3
42 |
43 | # Initializes the CodeQL tools for scanning.
44 | - name: Initialize CodeQL
45 | uses: github/codeql-action/init@v2
46 | with:
47 | languages: ${{ matrix.language }}
48 | # If you wish to specify custom queries, you can do so here or in a config file.
49 | # By default, queries listed here will override any specified in a config file.
50 | # Prefix the list here with "+" to use these queries and those in the config file.
51 |
52 | # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
53 | # queries: security-extended,security-and-quality
54 |
55 |
56 | # Autobuild attempts to build any compiled languages (C/C++, C#, Go, or Java).
57 | # If this step fails, then you should remove it and run the build manually (see below)
58 | - name: Autobuild
59 | uses: github/codeql-action/autobuild@v2
60 |
61 | # ℹ️ Command-line programs to run using the OS shell.
62 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
63 |
64 | # If the Autobuild fails above, remove it and uncomment the following three lines.
65 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
66 |
67 | # - run: |
68 | # echo "Run, Build Application using script"
69 | # ./location_of_script_within_repo/buildscript.sh
70 |
71 | - name: Perform CodeQL Analysis
72 | uses: github/codeql-action/analyze@v2
73 | with:
74 | category: "/language:${{matrix.language}}"
75 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | inst/doc
6 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: SafeGraphR
2 | Type: Package
3 | Title: Package for Processing and Analyzing SafeGraph Data
4 | Version: 0.5.2
5 | Authors@R: c(
6 | person(given = "Nick", family = "Huntington-Klein", role = c("aut","cre"),
7 | email = "nhuntington-klein@seattleu.edu",
8 | comment = c(ORCID = "0000-0002-7352-3991")))
9 | Description: This package is designed to make it easy to read SafeGraph files
10 | into R, and to perform basic preprocessing on SafeGraph data to ready it for analysis.
11 | License: Apache 2.0
12 | Encoding: UTF-8
13 | LazyData: true
14 | Depends:
15 | R (>= 3.4)
16 | Imports:
17 | lubridate,
18 | R.utils,
19 | magrittr,
20 | scales,
21 | purrr,
22 | stringr,
23 | jsonlite,
24 | utils,
25 | data.table,
26 | bit64,
27 | aws.s3,
28 | ghql
29 | Suggests:
30 | ggplot2,
31 | directlabels,
32 | ggrepel,
33 | paletteer,
34 | knitr,
35 | rmarkdown
36 | RoxygenNote: 7.1.2
37 | VignetteBuilder: knitr
38 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export("%>%")
4 | export(expand_cat_json)
5 | export(expand_integer_json)
6 | export(expand_open_hours)
7 | export(find_date)
8 | export(fips_from_cbg)
9 | export(graph_template)
10 | export(growth_over_time)
11 | export(hb_shrink)
12 | export(link_poi_naics)
13 | export(ma)
14 | export(patterns_lookup)
15 | export(processing_template)
16 | export(rbind_by_list_pos)
17 | export(read_core)
18 | export(read_distancing)
19 | export(read_many_csvs)
20 | export(read_many_patterns)
21 | export(read_many_shop)
22 | export(read_patterns)
23 | export(read_shop)
24 | export(safegraph_api)
25 | export(safegraph_aws)
26 | export(sample_size_adjust)
27 | export(scale_to_date)
28 | export(scale_yoy)
29 | import(data.table)
30 | importFrom(magrittr,"%>%")
31 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # SafeGraphR 0.3.0
2 |
3 | * Added a `NEWS.md` file to track changes to the package.
4 | * Updated package to work with AWS file structure as updated in December 2020.
5 | * Added the ability to download the most recent Core files in `read_core()`.
6 | * Added the ability to download the appropriate patterns files in `read_many_patterns`, and automated post-read re-aggregation.
7 | * Added (and integrated) the `patterns_lookup()` function, which shows the proper weekly files to look in for data on a particular date, and also can download those files.
8 | * Added the suite of "Volume Over Time" functions which can download, process, and graph `visits_by_day` weekly patterns data.
9 |
10 | # SafeGraphR 0.4.0
11 |
12 | * Switched reliance on `safegraph_place_id` to `placekey` since new files will no longer contain `safegraph_place_id`
13 | * Fixed an issue in `read_many_patterns()` where it would error if `filter` left zero observations in some subfiles
14 |
15 | # SafeGraphR 0.4.1
16 |
17 | * Added considerable support for Canadian data, including place names in `state_info`, `fips_to_names`, `canada_cd_pop`, and `canada_cd_types`.
18 | * Also changed `fips_to_cbg` to support Canada, and as a result the output is now character rather than numeric.
19 |
20 | # SafeGraphR 0.4.2
21 |
22 | * Updated buckets for `safegraph_aws()` (and `patterns_lookup` to match). NOTE THIS IS A BREAKING CHANGE. `safegraph_aws()` now only looks at the most updated versions of the data. Access to previous versions is discontinued, and you'll need to do it by hand using `aws.s3::s3sync`. There's no reason to use the old versions of the data anyway.
23 |
24 | # SafeGraphR 0.4.3
25 |
26 | * Changed data files to include leading zeroes for state and county FIPS
27 |
28 | # SafeGraphR 0.4.4
29 |
30 | * Moved filepaths for backfill data
31 |
32 | # SafeGraphR 0.5.0
33 |
34 | * Added `safegraph_api` and made `safegraph_aws` more flexible to not rely on the soon-shutting-down C19 bucket
35 |
36 | # SafeGraphR 0.5.2
37 |
38 | * Updated vignettes to reflect new usage, removed warning about "upcoming" changes.
39 |
--------------------------------------------------------------------------------
/R/hb_shrink.R:
--------------------------------------------------------------------------------
1 | #' Hierarchical Bayes Shrinkage
2 | #'
3 | #' This is a function that takes a "success" and a "total" variable (often something like "number of devices staying home" and "total number of devices") and shrinks them to the full data set using shrinkage methods for proportions.
4 | #'
5 | #' This is usually called by group, either with \code{dplyr::group_by} or with the \code{by} argument in a \code{data.table}, so that individual observations can be shrunk to the group level.
6 | #'
7 | #' @param success A numeric integer variable containing the number of successes.
8 | #' @param total A numeric integer variable containing the total sample size.
9 | #' @examples
10 | #' \dontrun{
11 | #' # The directory distdat is the folder we have downloaded the distancing data to from AWS.
12 | #' # Read and compile all distancing data from May 1 to May 7
13 | #' distancing <- read_distancing(
14 | #' start = lubridate::ymd('2020-05-01'),
15 | #' end = lubridate::ymd('2020-05-07'),
16 | #' dir = distdat
17 | #' )
18 | #'
19 | #' # Shrink county to state
20 | #' distancing <- distancing[,.(county_fips = county_fips,
21 | #' unshrunk_stay_home = completely_home_device_count/device_count,
22 | #' shrunk_stay_home = hb_shrink(completely_home_device_count, device_count)),
23 | #' by = .(state_fips, date)]
24 | #'
25 | #' }
26 | #' @export
27 |
28 | hb_shrink <- function(success,total) {
29 | expected_theta <- mean(success/total)
30 | var_theta <- var(success/total)
31 |
32 | # Get the beta dist alpha+beta
33 | alpha_plus_beta <- (expected_theta*(1-expected_theta)/var_theta) - 1
34 |
35 | # and separate them out
36 | alpha <- alpha_plus_beta*expected_theta
37 | beta <- alpha_plus_beta*(1-expected_theta)
38 |
39 | # Posteriors!
40 | posterior_alpha <- alpha + success
41 | posterior_beta <- beta + (total - success)
42 |
43 | # Finally, estimate the mean of the beta distribution
44 | return(posterior_alpha/(posterior_alpha+posterior_beta))
45 | }
46 |
--------------------------------------------------------------------------------
/R/link_poi_naics.R:
--------------------------------------------------------------------------------
1 | #' Use a Core Places file to Create a POI-NAICS crosswalk
2 | #'
3 | #' Feed this function the most recent Core Places file, and it will give you back a \code{data.table} with two columns: \code{safegraph_place_id} and \code{naics_code}. Saving this file is recommended. Then, provide this object to \code{read_shop} or \code{read_many_shop} so that you can use \code{'naics_code'} in the \code{by} argument.
4 | #'
5 | #' This function is DEPRECATED. It only works with the OLD Core files (and thus the continued usage of \code{safegraph_place_id} instead of \code{placekey}), and is superseded by the more flexible \code{read_core()} function.
6 | #'
7 | #' @param filename The filename of the \code{ZIP} Core Places file.
8 | #' @param dir The directory that the file is in.
9 | #' @param exdir Name of the directory to unzip to.
10 | #' @param cleanup Set to \code{TRUE} to delete all the unzipped files after being read in.
11 | #' @param silent Suppress timing messages.
12 | #' @examples
13 | #'
14 | #' \dontrun{
15 | #' # Core-USA-June2020-Release-CORE_POI-2020_05-2020-06-06.zip is a Core places file in the working directory
16 | #' poi_link <- link_poi_naics('Core-USA-June2020-Release-CORE_POI-2020_05-2020-06-06.zip')
17 | #' }
18 | #' @export
19 |
20 | link_poi_naics <- function(filename, dir = '.', exdir = dir, cleanup = FALSE, silent = FALSE) {
21 |
22 | # Where's our zip?
23 | if (!(stringr::str_sub(dir,nchar(dir)) == '/')) {
24 | dir <- paste0(dir,'/')
25 | }
26 | if (stringr::str_sub(exdir,nchar(exdir)) == '/') {
27 | exdir <- stringr::str_sub(exdir, 1, nchar(exdir)-1)
28 | }
29 |
30 | f <- paste0(dir,filename)
31 |
32 | # Get the list of files
33 | files_in_zip <- utils::unzip(f,list=TRUE)$Name
34 | # Only the .csv.gzs count
35 | files_in_zip <- files_in_zip[grep('\\.csv\\.gz',files_in_zip)]
36 | # And unzip
37 | utils::unzip(f, files = files_in_zip, exdir = exdir)
38 |
39 | files_in_zip %>%
40 | paste0(dir,.) %>%
41 | purrr::map(function(x) {
42 | if (!silent) {
43 | message(paste('Starting to read',x,'at',Sys.time()))
44 | }
45 | patterns <- data.table::fread(x, select = c('safegraph_place_id',
46 | 'naics_code'))
47 | patterns <- patterns[!is.na(naics_code)]
48 | if (cleanup) {
49 | file.remove(x)
50 | }
51 | return(patterns)
52 | }) %>%
53 | data.table::rbindlist() %>%
54 | unique() %>%
55 | return()
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/R/read_core.R:
--------------------------------------------------------------------------------
1 | #' Read SafeGraph Core
2 | #'
3 | #' Be aware that the files this is designed to work with are large and this function may take a while to execute. This function takes folder of Core files and reads it them in. The output is a \code{data.table}.
4 | #'
5 | #' AS OF SafeGraphR VERSION 0.3.0 THIS FUNCTION ONLY WORKS WITH NEW CORE FILE FORMATS. For old-format Core files, you can still use the less-flexible and otherwise deprecated \code{link_poi_naics()} function.
6 | #'
7 | #' @param dir The directory that the CORE files are in. If this folder contains multiple months of Core files, it will use the most recent (this only works if you are using the standard AWS file structure).
8 | #' @param filter A character string describing a logical statement for filtering the data, for example \code{filter = 'naics_code == 512131'} would give you only movie theater POIs. Will be used as an \code{i} argument in a \code{data.table}, see \code{help(data.table)}. Filtering here instead of afterwards can cut down on time and memory demands.
9 | #' @param select Character vector of variables to get from the file. Set to \code{NULL} to get all variables. If you plan to link the results to a patterns file, you will probably want to include \code{'placekey'} or \code{'placekey'} in this vector. Note that any variables mentioned in \code{filter} MUST be in \code{select} unless \code{select = NULL}.
10 | #' @param key A character string containing an AWS Access Key ID. If \code{key} and \code{secret} are both specified, \code{read_core} will download the most recent Core files and process them. This process assumes your system date is set correctly, and will only check this month's Core and last month's Core, since one of those shold exist.
11 | #' @param secret A character string containing an AWS Secret Access Key.
12 | #' @param silent Suppress timing messages.
13 | #' @param ... Other arguments to be passed to \code{data.table::fread} when reading in the \code{CSV} files inside of the \code{ZIP}. For example, \code{nrows} to only read in a certain number of rows.
14 | #' @examples
15 | #'
16 | #' \dontrun{
17 | #' # Location of our CORE file
18 | #' # Note we probably don't have to specify 2020/10 if that's the most recent one
19 | #' dir <- '../SafeGraph/core_poi/2020/10/'
20 | #'
21 | #' # Let's only get retail POIs in California
22 | #' # And
23 | #' locations <- read_core(dir = dir,
24 | #' filter = 'region == "CA" & floor(naics_code/10000) %in% 44:45')
25 | #' }
26 | #' @export
27 |
28 | read_core <- function(dir = 'core_poi/',
29 | filter = NULL,
30 | select = NULL,
31 | key = NULL,
32 | secret = NULL,
33 | silent = FALSE,
34 | ...) {
35 |
36 | # Are we downloading?
37 | if (!is.null(key) & !is.null(secret)) {
38 | current_date <- lubridate::today()
39 |
40 | corefiles <- list.files(dir, pattern = '.csv.gz', recursive = TRUE)
41 | coredates <- NA
42 | if (length(corefiles) > 0) {
43 | coredates <- corefiles %>%
44 | purrr::map_chr(find_date) %>%
45 | lubridate::ymd() %>%
46 | max()
47 | }
48 | if (is.na(coredates)) {
49 | coredates <- lubridate::ymd('1970-01-01')
50 | }
51 |
52 |
53 |
54 | # If we have the current month, good to go
55 | if (!(lubridate::year(current_date) == lubridate::year(coredates) &
56 | lubridate::month(current_date) == lubridate::month(coredates))) {
57 |
58 | created_dir <- FALSE
59 | if (!dir.exists(paste0(dir,year(current_date),'/',str_pad(month(current_date),2,"left","0"),'/'))) {
60 | dir.create(paste0(dir,year(current_date),'/',str_pad(month(current_date),2,"left","0"),'/'),
61 | recursive = TRUE)
62 | created_dir <- TRUE
63 | }
64 |
65 | try(safegraph_aws(paste0(dir,year(current_date),'/',str_pad(month(current_date),2,"left","0"),'/'),
66 | 'core',
67 | prefix = paste0('core_poi/',year(current_date),'/',str_pad(month(current_date),2,"left","0"),'/'),
68 | key = key, secret = secret))
69 |
70 | # Did we just download anything? If not, try last month too
71 | if (length(list.files(dir, pattern = '.csv.gz', recursive = TRUE)) == length(corefiles)) {
72 | if (created_dir) {
73 | unlink(paste0(dir,year(current_date),'/',str_pad(month(current_date),2,"left","0"),'/'),
74 | recursive = TRUE)
75 | }
76 |
77 | current_date <- current_date - lubridate::months(1)
78 |
79 | try(safegraph_aws(paste0(dir,year(current_date),'/',str_pad(month(current_date),2,"left","0"),'/'),
80 | 'core',
81 | prefix = paste0('core_poi/',year(current_date),'/',str_pad(month(current_date),2,"left","0"),'/'),
82 | key = key, secret = secret))
83 | }
84 | }
85 | }
86 |
87 | # Check if we have multiple months of data
88 | corefiles <- list.files(dir, pattern = '.csv.gz', recursive = TRUE)
89 | if (sum(stringr::str_detect(corefiles, 'core_poi-part1.csv.gz')) > 1) {
90 | coredates <- corefiles %>%
91 | purrr::map_chr(find_date) %>%
92 | lubridate::ymd() %>%
93 | max()
94 | if (is.na(coredates)) {
95 | stop('More than one month of Core data detected in dir, but I can\'t figure out which set of files is most recent. Did you change the file structure or filenames from AWS?')
96 | }
97 |
98 | # Use only the recent month
99 | corefiles <- corefiles[stringr::str_detect(corefiles,
100 | paste0(lubridate::year(coredates),'/',stringr::str_pad(lubridate::month(coredates),2,'left','0'),'/',stringr::str_pad(lubridate::day(coredates),2,'left','0'))
101 | )]
102 | }
103 |
104 | # Now read
105 | corefiles %>%
106 | paste0(dir,.) %>%
107 | purrr::map(function(x) {
108 | if (!silent) {
109 | message(paste('Starting to read',x,'at',Sys.time()))
110 | }
111 | patterns <- data.table::fread(x, select = select, ...)
112 | if (!is.null(filter)) {
113 | patterns <- patterns[eval(parse(text=filter))]
114 | }
115 |
116 | return(patterns)
117 | }) %>%
118 | data.table::rbindlist() %>%
119 | unique() %>%
120 | return()
121 | }
122 |
123 |
--------------------------------------------------------------------------------
/R/read_distancing.R:
--------------------------------------------------------------------------------
1 | #' Read in Stay-at-Home Data
2 | #'
3 | #' Takes a folder of stay-at-home Safegraph data structured how it comes from AWS (i.e. folders 2020/04/03 for April 3 2020) and reads them in.
4 | #'
5 | #' The stay-at-home data is no longer being updated as of April 19, 2021. This function should still work for the old data though.
6 | #'
7 | #' Note that after reading in data, if \code{gen_fips = TRUE}, state and county names can be merged in using \code{data(fips_to_names)}.
8 | #'
9 | #' @param start Date object with the starting date to read in stay-at-home data.
10 | #' @param end Ending date to read stay-at-home data to.
11 | #' @param dir The folder in which the "2020" (etc.) folder resides.
12 | #' @param gen_fips Set to \code{TRUE} to use the \code{origin_census_block_group} variable to generate \code{state_fips} and \code{county_fips} as numeric variables. This will also result in \code{origin_census_block_group} being converted to character.
13 | #' @param by After reading, collapse to this level by \code{sum}ming all the data. Usually \code{c('state_fips','county_fips')} with \code{gen_fips = TRUE}. Set to \code{NULL} to aggregate across all initial rows, or set to \code{FALSE} to not aggregate at all.
14 | #' @param filter A character string describing a logical statement for filtering the data, for example \code{filter = 'state_fips == 6'} would give you only data from California. Will be used as an \code{i} argument in a \code{data.table}, see \code{help(data.table)}. Filtering here instead of afterwards can cut down on time and memory demands.
15 | #' @param select Character vector of variables to get from the file. Set to \code{NULL} to get all variables.
16 | #' @param ... Other arguments to be passed to \code{data.table::fread} when reading in the file. For example, \code{nrows} to only read in a certain number of rows.
17 | #' @examples
18 | #'
19 | #' \dontrun{
20 | #'
21 | #' # The directory distdat is the folder we have downloaded the distancing data to from AWS.
22 | #' # Read and compile all distancing data from May 1 to May 7
23 | #' distancing <- read_distancing(
24 | #' start = lubridate::ymd('2020-05-01'),
25 | #' end = lubridate::ymd('2020-05-07'),
26 | #' dir = distdat
27 | #' )
28 | #'
29 | #' }
30 | #' @export
31 |
32 | read_distancing <- function(start,end,dir = '.',gen_fips = TRUE, by = c('state_fips','county_fips'), filter = NULL, select = c('origin_census_block_group',
33 | 'device_count',
34 | 'completely_home_device_count',
35 | 'part_time_work_behavior_devices',
36 | 'full_time_work_behavior_devices'), ...) {
37 |
38 |
39 | # Make sure defaults are desired
40 | if (getOption("distancing.warning", TRUE) &
41 | identical(select,c('origin_census_block_group',
42 | 'device_count',
43 | 'completely_home_device_count',
44 | 'part_time_work_behavior_devices',
45 | 'full_time_work_behavior_devices')) &
46 | identical(by, c('state_fips','county_fips'))) {
47 | message("Running read_distancing with default select and by - this will select only the device count variables, and aggregate to the county level. Change the select and by options if you don't want this. This message will be displayed only once per session.")
48 | options("distancing.warning" = FALSE)
49 | }
50 |
51 | # Make sure dir ends with /
52 | if (dir == '') {
53 | dir <- '.'
54 | }
55 | if (stringr::str_sub(dir,-1) != '/') {
56 | dir <- paste0(dir, '/')
57 | }
58 |
59 | # List of dates that I want
60 | dates <- start + lubridate::days(0:(end - start))
61 |
62 | # Read in dates one at a time, keep memory low if possible!
63 | for (r in dates) {
64 |
65 | # Where's the prize
66 | datechar <- as.character(lubridate::as_date(r))
67 | target <- paste0(dir,stringr::str_sub(datechar,1,4),
68 | '/',stringr::str_sub(datechar,6,7),
69 | '/',stringr::str_sub(datechar,9,10))
70 | target <- paste0(target,'/',list.files(target))
71 |
72 | print(target)
73 |
74 | # Read in only these columns
75 | if (is.null(select)) {
76 | dt <- data.table::fread(file = target,...)
77 | } else {
78 | dt <- data.table::fread(file = target,select = select,...)
79 | }
80 |
81 | # Convert CBG to string so we can easily extract state and county indicators
82 | if (gen_fips) {
83 | dt[,origin_census_block_group := as.character(origin_census_block_group)]
84 | dt[,c('state_fips','county_fips') := fips_from_cbg(origin_census_block_group)]
85 | }
86 |
87 | # Do filter after gen_fips so you can filter on fips
88 | if (!is.null(filter)) {
89 | dt <- dt[eval(parse(text=filter))]
90 | }
91 |
92 | # Collapse
93 | if (!is.logical(by)) {
94 | # Can keep only summable or by-variables
95 | dt <- subset(dt,select = (sapply(dt,is.numeric) |
96 | names(dt) %in% by))
97 | dt <- dt[,lapply(.SD, sum, na.rm=TRUE), by=by]
98 | }
99 |
100 | # Add the date column
101 | dt[,date := lubridate::as_date(r)]
102 |
103 | # Slap it all together
104 | if (r == dates[1]) {
105 | compiled_data <- dt
106 | } else {
107 | compiled_data <- rbind(dt,compiled_data)
108 | }
109 | }
110 |
111 | return(compiled_data)
112 | }
113 |
--------------------------------------------------------------------------------
/R/read_shop.R:
--------------------------------------------------------------------------------
1 | #' Read a ZIP file with patterns and other data as it comes from the SafeGraph Shop
2 | #'
3 | #' This will open up a ZIP file from the SafeGraph shop and will read all of the data in, performing processing of the patterns files using \code{read_patterns}.
4 | #'
5 | #' The result will be a named list with each of the components of the data.
6 | #'
7 | #' @param filename The filename of the \code{.zip} file from the shop.
8 | #' @param dir The directory the file is in.
9 | #' @param keeplist Character vector of the files in the ZIP to read in. Use \code{'patterns'} to refer to the patterns files.
10 | #' @param exdir Name of the directory to unzip to.
11 | #' @param cleanup Set to \code{TRUE} to delete all the unzipped files after being read in.
12 | #' @param start_date An argument to be passed to \code{read_patterns} giving the first date present in the file, as a date object. When using \code{read_shop} this should usually be included, since the patterns file names in the shop files are not in a format \code{read_patterns} can pick up on automatically.
13 | #' @param by,fun,na.rm,filter,expand_int,expand_cat,expand_name,multi,naics_link,select,gen_fips,silent,... Other arguments to be passed to \code{read_patterns}, specified as in \code{help(read_patterns)}. NOte that \code{gen_fips} is \code{FALSE} here by default, rather than \code{TRUE} as elsewhere, as files from the shop often do not contain the \code{poi_cbg} variable necessary to use it. Check which state indicator variables you have access to, perhaps \code{region}.
14 | #' @examples
15 | #'
16 | #' \dontrun{
17 | #' # In the working directory I have the file 'shop_file.zip' to read in
18 | #'
19 | #' mydata <- read_shop('shop_file.zip',
20 | #' # I only want some of the files
21 | #' keeplist = c('patterns','home_panel_summary.csv'),
22 | #' # For patterns, only keep these variables
23 | #' select = c('raw_visit_counts', 'region', 'bucketed_dwell_times', 'location_name'),
24 | #' # I want two aggregations of patterns - one of total visits by state ('region')
25 | #' # and another by location_name that has the dwell times for each brand
26 | #' multi = list(
27 | #' list(name = 'all',
28 | #' by = 'region'),
29 | #' list(name = 'location_dwells',
30 | #' by = 'location_name',
31 | #' expand_cat = 'bucketed_dwell_times',
32 | #' expand_name = 'bucketed_times')
33 | #' ),
34 | #' # Be sure to specify start_date for read_shop
35 | #' start_date = lubridate::ymd('2020-03-01'))
36 | #'
37 | #' # The result is a list with two items- patterns and home_panel_summary.csv
38 | #' # patterns itself is a list with two data.tables inside - 'all' and 'location_name',
39 | #' # aggregated as given.
40 | #' }
41 | #'
42 | #' @export
43 |
44 | read_shop <- function(filename,dir = '.',keeplist = c('patterns','normalization_stats.csv','home_panel_summary.csv','visit_panel_summary.csv','brand_info.csv'),
45 | exdir = dir, cleanup = TRUE,
46 | by = NULL, fun = sum, na.rm = TRUE, filter = NULL,
47 | expand_int = NULL, expand_cat = NULL,
48 | expand_name = NULL, multi = NULL, naics_link = NULL,
49 | select=NULL, gen_fips = FALSE, silent = FALSE, start_date = NULL, ...) {
50 |
51 | # Where's our zip?
52 | if (stringr::str_sub(dir,nchar(dir)) == '/') {
53 | f <- paste0(dir,filename)
54 | } else {
55 | f <- paste(dir,filename,sep='/')
56 | }
57 | if (dir == '.') {
58 | f <- filename
59 | }
60 |
61 | # Get the list of files
62 | files_in_zip <- utils::unzip(f,list=TRUE)$Name
63 | # And unzip
64 | # If patterns isn't in there, then don't unzip them
65 | # Otherwise, don't bother not-unzipping the rest, won't make a difference
66 | if ('patterns' %in% keeplist) {
67 | utils::unzip(f, exdir = exdir)
68 | } else {
69 | utils::unzip(f, files = keeplist, exdir = exdir)
70 | files_in_zip <- keeplist
71 | }
72 |
73 | # Edit the multi option to make gen_fips FALSE by default
74 | if (!is.null(multi)) {
75 | for (m in 1:length(multi)) {
76 | if (is.null(multi[[m]]$gen_fips)) {
77 | multi[[m]]$gen_fips <- FALSE
78 | }
79 | }
80 | }
81 |
82 | retDT <- list()
83 |
84 | for (k in keeplist) {
85 | if (k == 'patterns') {
86 | # Get our full list of patterns files
87 | patfiles <- files_in_zip[stringr::str_detect(files_in_zip,'patterns')]
88 |
89 | retDT[['patterns']] <- read_many_patterns(filelist = patfiles, dir = exdir, recursive = FALSE, by = by, fun = fun, na.rm = na.rm, filter = filter,
90 | expand_int = expand_int, expand_cat = expand_cat,
91 | expand_name = expand_name, multi = multi, naics_link = naics_link,
92 | select = select, gen_fips = gen_fips, start_date = start_date, silent = silent)
93 | } else {
94 | if (stringr::str_sub(dir,nchar(exdir)) == '/') {
95 | target <- paste0(exdir,k)
96 | } else {
97 | target <- paste(exdir,k,sep='/')
98 | }
99 | retDT[[k]] <- data.table::fread(file=target)
100 | }
101 | }
102 |
103 | if (cleanup) {
104 | for (fiz in files_in_zip) {
105 | if (stringr::str_sub(dir,nchar(exdir)) == '/') {
106 | file.remove(paste0(exdir,fiz))
107 | } else {
108 | file.remove(paste(exdir,fiz,sep='/'))
109 | }
110 | }
111 | }
112 |
113 | return(retDT)
114 | }
115 |
--------------------------------------------------------------------------------
/R/safegraph_aws.R:
--------------------------------------------------------------------------------
1 | #' Download SafeGraph data from AWS COVID Response
2 | #'
3 | #' This is a thin wrapper for \code{aws.s3::s3sync} that will aim you at the right directory to synchronize.
4 | #'
5 | #' NOTE THE BREAKING CHANGE WITH SafeGraphR 0.4.2: BUCKET NAMES ARE CHANGED AND ACCESS TO OUTDATED VERSIONS OF DATA IS REMOVED.
6 | #'
7 | #' This function doesn't add too much, but it does make the default behavior you probably want a bit easier. If you plan to specify the \code{aws.s3::s3sync} "bucket" option yourself, this function is largely useless.
8 | #'
9 | #' See catalog.safegraph.io for more description of the various buckets.
10 | #'
11 | #' @param path The local directory to synchronize.
12 | #' @param dataset The SafeGraph bucket to get from. Can be "weekly" (new method since July 2021), "weekly-backfill" (the new method for times before July 2021; note AS OF AUGUST 2021 this gives the same result as "weekly" but I've kept "weekly-backfill" here in case it switches back to being different later), "monthly" (method since July 2021; also contains backfill folders as \code{*_backfill/}), "neighborhood" (June 2021 and forward), "neighborhood-backfill" (May 2021 and previous), "distancing", "core", "core-canada", "geo-supplement", or, to get the baseline bucket, "none".
13 | #' @param bucket_only Instead of doing an \code{aws.s3::s3sync} call, just return the correct bucket as a string. Then you can use that to do your own \code{aws.s3::s3sync} call, or work with the AWS CLI.
14 | #' @param base_url The base URL to pull the data from.
15 | #' @param key A character string containing an AWS Access Key ID.
16 | #' @param secret A character string containing an AWS Secret Access Key.
17 | #' @param region A character string containing the AWS region.
18 | #' @param prefix Leading part of the objects in the bucket must have this prefix. For example, to download social distancing data only from 2020, set this to "2020/". Some of the backfill buckets can be tricky because folder structure also includes the release date. For example, for "weekly-backfill" if you want patterns data, you want "patterns_backfill/2021/07/15/15/" and THEN followed by the time period you want like "2021/". If you want backfill data from "monthly", for example patterns, it's "patterns_backfill/2021/07/15/16/", then followed by the year/month. The "neighborhood" buckets use "y=2021/m=06/" etc instead of "2021/06".
19 | #' @param prefix_is_dir If \code{FALSE}, the files matching \code{prefix} will be downloaded directly to \code{path}, which may not be desired behavior if \code{prefix} contains a directory (you probably want the directory structure to match!). Set to \code{TRUE} to, in effect, replace \code{path} with \code{paste0(path, prefix)} and so download files to the appropriate folder. Don't use if \code{prefix} also contains file characteristics like extension. This is \code{prefix_IS_dir}, not \code{prefix_CONTAINS_dir}.
20 | #' @param s3 The S3 server that stores the data.
21 | #' @param max_print Temporarily set \code{options(max.print)} to this value. This will massively speed up the function, as \code{aws.s3::s3sync} likes to print the full list of files on the server before moving on. The option will be returned to its original value afterwards. Set to \code{NULL} to not alter any options.
22 | #' @param ... Additional parameters to be sent to \code{aws.s3::s3sync} and from there on to \code{aws.s3:s3HTTP}. "direction" will be ignored.
23 | #' @examples
24 | #'
25 | #' \dontrun{
26 | #'
27 | #' # Download all the recent weekly-patterns files to the working directory
28 | #' safegraph_aws(dataset = 'weekly', key = 'MYINFO', secret = 'MYOTHERINFO')
29 | #'
30 | #' }
31 | #'
32 | #' @export
33 |
34 | safegraph_aws <- function(path = '.',
35 | dataset,
36 | bucket_only = FALSE,
37 | base_url = 's3.wasabisys.com',
38 | key, secret,
39 | region = '',
40 | prefix ='',
41 | prefix_is_dir = FALSE,
42 | s3 = 's3://sg-c19-response/',
43 | max_print = 1,
44 | ...) {
45 |
46 | warning('The safegraph C19 AWS server is to be shut down as of January 31, 2022. This function will still work for enterprise users with their own AWS access.')
47 | if (grepl('new',dataset)) {
48 | stop('As of SafeGraphR 0.4.2, the bucket names are changed and the "new" suffix is no longer required. See help(safegraph_aws).')
49 | }
50 |
51 | if (dataset == 'monthly') {
52 | buck <- 'monthly-patterns-2020-12/release-2021-07/'
53 | } else if (dataset == 'weekly') {
54 | buck <- 'weekly-patterns-delivery-2020-12/release-2021-07/weekly/'
55 | } else if (dataset == 'weekly-backfill') {
56 | buck <- 'weekly-patterns-delivery-2020-12/release-2021-07/weekly/'
57 | } else if (dataset == 'distancing') {
58 | buck <- 'social-distancing/v2/'
59 | } else if (dataset == 'geo-supplement') {
60 | buck <- 'geo-supplement/'
61 | } else if (dataset == 'core') {
62 | buck <- 'core-places-delivery/'
63 | } else if (dataset == 'core-canada') {
64 | buck <- 'core-places-canada/'
65 | } else if (dataset == 'neighborhood-backfill') {
66 | buck <- 'neighborhood-patterns/neighborhood-patterns/2021/07/07/release-2021-07-01/'
67 | } else if (dataset == 'neighborhood') {
68 | buck <- 'neighborhood-patterns/neighborhood-patterns/2021/07/27/release-2021-07-01/'
69 | } else if (dataset != 'none') {
70 | warning('Custom bucket name being used.')
71 | buck <- dataset
72 | }
73 |
74 | if (bucket_only) {
75 | return(buck)
76 | }
77 |
78 | if (prefix_is_dir) {
79 | if (stringr::str_sub(path, -1) != '/') {
80 | path <- paste0(path, '/')
81 | }
82 |
83 | path <- paste0(path, prefix)
84 | }
85 |
86 | if (!is.null(max_print)) {
87 | mp <- options('max.print')$max.print
88 |
89 | options('max.print' = max_print)
90 |
91 | }
92 |
93 | if (!dir.exists(path)) {
94 | dir.create(path, recursive = TRUE)
95 | }
96 |
97 | aws.s3::s3sync(path = path, bucket = s3,
98 | prefix = paste0(buck,prefix),
99 | key = key, base_url = base_url, secret = secret,
100 | region = region, direction = 'download', ...)
101 |
102 | if (!is.null(max_print)) {
103 | options('max.print' = mp)
104 | }
105 |
106 | return(NULL)
107 | }
108 |
--------------------------------------------------------------------------------
/R/sample_size_adjust.R:
--------------------------------------------------------------------------------
1 | #' Adjust SafeGraph Data for Sampling Size Differences
2 | #'
3 | #' This function uses 2016 American Community Survey data to adjust SafeGraph counts for the portion of the population that is sampled. This function will return a \code{data.table} with columns for a geographic ID and the variable \code{adjust_factor}, which you can merge into your data and then multiply whatever count variables you like by \code{adjust_factor} to adjust them for sampling differences.
4 | #'
5 | #' @param data A \code{data.frame} (or \code{tibble} or \code{data.table}) containing (among other things potentially) geographic ID variables and a variable for the number of SafeGraph devices observed in that area. Often this is from a \code{home-panel-summary} file.
6 | #' @param from_id A character vector either giving the variable name of the census block group ID, or both the state FIPS and county FIPS variables (which must be numeric, and in state, then county order). Census block group must be specified if \code{from_level='cbg'}.
7 | #' @param sample_id A character variable giving the variable name of the variable in \code{data} that has the number of SafeGraph observations.
8 | #' @param from_level Either \code{'cbg'} or \code{'county'}, indicating the geographic level that is to be adjusted.
9 | #' @param to_level Either \code{'county'} or \code{'state'}, indicating the geographic level that the \code{from_level} components are to be adjusted to, for example \code{from_level='county'} and \code{to_level='state'} wouuld give an adjustment factor for each county as though each county in the state was sampled at the same rate.
10 | #' @param by The data returned will be on the \code{from_level} level. Specify other vairables here to have it instead be on the \code{from_level}-\code{by} level, perhaps a timecode. \code{by} should not split the \code{from_level} counts. If, for example, \code{by} is used to split a county in two geographic subcounties, then the population adjustment will not be correct.
11 | #' @param pop_data If a populatinon data file other than \code{data(cbg_pop)} or \code{data(county_pop)} should be used, enter it here. Should be in the same format, and with the same variable names, as \code{cbg_pop} if \code{from_level='cbg'}, or the same as \code{county_pop} if \code{from_level='county'}.
12 | #' @examples
13 | #' \dontrun{
14 | #' # The current working directory has many home_panel_summary files
15 | #' # Do some futzing with the census_block_group variable to
16 | #' # Get it in the same format as how it is in cbg_pop
17 | #' home_panel <- read_many_csvs(colClasses= c(census_block_group='character'))
18 | #' home_panel[,census_block_group := as.character(as.numeric(census_block_group))]
19 | #'
20 | #' # Create the data set with the adjust_factor variable
21 | #' # This will adjust CBG populations to county ones, by default
22 | #' adj_factor <- sample_size_adjust(home_panel, by = 'date_range_start')
23 | #'
24 | #' # Now take some distancing data I have
25 | #' # (where census_block_group is stored as origin_census_block_group)
26 | #' data.table::setnames(adj_factor, census_block_group, origin_census_block_group)
27 | #' # and merge in the adjustment factor
28 | #' distancing <- merge(distancing, adj_factor, all.x = TRUE, by = 'origin_census_block_group')
29 | #' # And use that adjustment factor to adjust!
30 | #' distancing[,adj_device_count := device_count*adj_factor]
31 | #'
32 | #' }
33 | #' @export
34 |
35 | sample_size_adjust <- function(data,from_id = 'census_block_group',
36 | sample_id = 'number_devices_residing',
37 | from_level = 'cbg',
38 | to_level = 'county',
39 | by = NULL,
40 | pop_data = NULL) {
41 |
42 | if (!(from_level %in% c('cbg','county'))) {
43 | stop('from_level must be cbg or county.')
44 | }
45 | if (!(to_level %in% c('county','state'))) {
46 | stop('to_level must be county or state.')
47 | }
48 | if (length(from_id) > 1 & from_level == 'cbg') {
49 | stop('Only specify the census block group variable for from_id if from_level is cbg.')
50 | }
51 |
52 | if (from_level == 'cbg' & is.null(pop_data)) {
53 | data("cbg_pop", package = 'SafeGraphR')
54 | pop_data <- cbg_pop
55 | } else if (from_level == 'county' & is.null(pop_data)) {
56 | data("county_pop", package = 'SafeGraphR')
57 | pop_data <- county_pop
58 | }
59 |
60 | # See what proportion the from group is of the to group population
61 | if (to_level == 'county') {
62 | pop_data[,big_pop := sum(unweighted_pop, na.rm = TRUE),by=c('state_fips','county_fips')]
63 | } else if (to_level == 'state') {
64 | pop_data[,big_pop := sum(unweighted_pop, na.rm = TRUE),by='state_fips']
65 | }
66 |
67 | pop_data[,pop_prop := unweighted_pop/big_pop]
68 |
69 | # if we have CBG, get state and county FIPS
70 | data <- data.table::as.data.table(data)
71 | data <- subset(data, select = c(sample_id, from_id, by))
72 |
73 | # For ease of use
74 |
75 | data.table::setnames(data,sample_id,'sample_pop')
76 |
77 | # Create county and state FIPS if we don't have them
78 | if (length(from_id) == 1) {
79 | data[,c('state_fips','county_fips') := fips_from_cbg(eval(parse(text=from_id)))]
80 |
81 | # If we're from-county, collapse
82 | if (from_level == 'county') {
83 | data <- data[, .(sample_pop = sum(sample_pop)), by = c('state_fips','county_fips',
84 | by)]
85 | }
86 |
87 | # Line up names for cbg
88 | data.table::setnames(pop_data, 'poi_cbg', from_id)
89 |
90 | } else {
91 | # rename the population data state and county IDs to match the data
92 | data.table::setnames(pop_data,c('state_fips','county_fips'),from_id)
93 | }
94 |
95 | # Get the to-level of the sample
96 | data <- data[,.(top_sample = sum(sample_pop),
97 | sample_pop = sample_pop),by=c(from_id,by)]
98 |
99 | # Merge together
100 | data <- merge(data,pop_data, all.x = TRUE, by = from_id)
101 |
102 | # And create adjust_factor
103 | data[,adjust_factor := (unweighted_pop/big_pop)/(sample_pop/top_sample)]
104 |
105 | return(unique(subset(data,select=c(from_id,by,'adjust_factor'))))
106 | }
107 |
108 |
--------------------------------------------------------------------------------
/R/utils-data-table.R:
--------------------------------------------------------------------------------
1 | # data.table is generally careful to minimize the scope for namespace
2 | # conflicts (i.e., functions with the same name as in other packages);
3 | # a more conservative approach using @importFrom should be careful to
4 | # import any needed data.table special symbols as well, e.g., if you
5 | # run DT[ , .N, by='grp'] in your package, you'll need to add
6 | # @importFrom data.table .N to prevent the NOTE from R CMD check.
7 | # See ?data.table::`special-symbols` for the list of such symbols
8 | # data.table defines; see the 'Importing data.table' vignette for more
9 | # advice (vignette('datatable-importing', 'data.table')).
10 | #
11 | #' @import data.table
12 | NULL
13 |
--------------------------------------------------------------------------------
/R/utils-pipe.R:
--------------------------------------------------------------------------------
1 | #' Pipe operator
2 | #'
3 | #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
4 | #'
5 | #' @name %>%
6 | #' @rdname pipe
7 | #' @keywords internal
8 | #' @export
9 | #' @importFrom magrittr %>%
10 | #' @usage lhs \%>\% rhs
11 | NULL
12 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SafeGraphR
2 |
3 | **SafeGraphR** is an R package designed to make it easy to read in and process data from [SafeGraph](safegraph.com). You may want to consult the [SafeGraph Community](https://www.safegraph.com/community), the [Awesome SafeGraph Data Science List](https://github.com/SafeGraphInc/awesome-safegraph-datascience), the [Normalization Best Practices](https://colab.research.google.com/drive/16BELpcum4TKoH-5wg8Xym_CGgIGgpu1I?usp=sharing), and especially the [SafeGraph Docs](docs.safegraph.com/).
4 |
5 | You can install **SafeGraphR** directly from GitHub.
6 |
7 | ```r
8 | # if necessary
9 | # install.packages('remotes')
10 | remotes::install_github('SafeGraphInc/SafeGraphR')
11 | ```
12 |
13 | The other pages on this site will walk you through how you can use **SafeGraphR** to work with the data.
14 |
15 |
16 | # Bugs and Help!
17 |
18 | **SafeGraphR** is currently in *beta*. All of its functions work, but of course there may be bugs remaining. The code has also not been checked with every possible combination of options that you could pick. Lastly, the SafeGraph data itself changes format on occasion, which may break some **SafeGraphR** functionality.
19 |
20 | If you run into an issue or bug in the code, please raise an Issue on the **SafeGraphR** Github [Issues page](https://github.com/SafeGraphInc/SafeGraphR/issues).
21 |
22 | If you're just having trouble getting things to work, you can find help at the [Placekey Community Slack Channel](placekey-community.slack.com/).
23 |
24 | Below is a list of what's in the package with a brief description.
25 |
26 | ## Data Reading Functions
27 |
28 | `read_core()`: Read in a Core Places file, which you can then merge with patterns or other data to add information about each location. There is also the older `link_poi_naics()` which does the same thing but can only be used to create a link between POIs and NAICS codes.
29 |
30 | `read_distancing()`: Given a list of dates, reads in and aggregates SafeGraph social-distancing v2 files.
31 |
32 | `read_many_csvs()`: Reads a bunch of CSVs in the same folder and row-binds them all together. Useful for stuff like normalization data.
33 |
34 | `read_many_patterns()` and `read_patterns()`: Reads a bunch of (or one, respectively) monthly or weekly patterns `.csv.gz` files all in the same folder, does appropriate processing, and row-binds the results together.
35 |
36 | `read_many_shop()` and `read_shop()`: Reads a bunch of (or one, respectively) `.zip` files in the format they come in from the shop and combines the data sets inside the zip across zip-files appropriately.
37 |
38 | `safegraph_aws()`: A thin wrapper for `aws.s3::s3sync()` that downloads data from the SafeGraph AWS buckets. As of January 31, 2022, this function will only be useful for enterprise customers with their own AWS access.
39 |
40 | `safegraph_api()`: A function you can use to access the [SafeGraph API](https://shop.safegraph.com/api), which allows you to easily pull data on a small number of SafeGraph POIs at a time.
41 |
42 |
43 | ## Data Processing Functions
44 |
45 | `expand_cat_json()` and `expand_integer_json()`: Take SafeGraph data with a column of categorical (named) or numeric (unnamed) JSON data and expand that column, pivot the data to long format, and then aggregate to the desired level.
46 |
47 | `expand_open_hours()`: Expand the `open_hours` variable into something easy to use!
48 |
49 | `fips_from_cbg()`: Take a census block group identifier and extract the state and/or county FIPS codes.
50 |
51 | `rbind_by_list_pos()`: Take a list of lists of `data.table`s and row-binds them by their position in the sub-list. For example, `rbind_by_list_pos(list(A,B),list(C,D))` would return `list(rbind(A,C),rbind(B,D))`. Can be used after `read_` functions, which in some cases return a list of `data.table`s for each file they read.
52 |
53 | ## Final-Stages Processing Functions
54 |
55 | `hb_shrink()`: Perform hierarchical Bayesian shrinkage on the CBG-to-county or county-to-state level.
56 |
57 | `ma()`: Calculates a (by default) seven day moving average on pre-sorted data with no gaps.
58 |
59 | `sample_size_adjust()`: Adjusts data for differences in sampling rates across geographic locations.
60 |
61 | `scale_to_date()`: Adjusts data to be relative to a specific date.
62 |
63 | `scale_yoy()`: Adjusts data to be relative to the same date the previous year.
64 |
65 | ## Data Sets
66 |
67 | `cbg_pop`: Population data from the easy census file.
68 |
69 | `county_pop`: Population aggregated to the county level.
70 |
71 | `fips_to_names`: Data set linking state and county FIPS codes to state and county names, for merging in and labeling.
72 |
73 | `naics_codes`: Data set linking NAICS codes to NAICS code titles, for merging in and labeling (or just knowing what you're looking at).
74 |
--------------------------------------------------------------------------------
/SafeGraphR.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 |
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 |
--------------------------------------------------------------------------------
/data/canada_cd_pop.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/canada_cd_pop.rda
--------------------------------------------------------------------------------
/data/canada_cd_types.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/canada_cd_types.rda
--------------------------------------------------------------------------------
/data/cbg_pop.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/cbg_pop.rda
--------------------------------------------------------------------------------
/data/county_pop.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/county_pop.rda
--------------------------------------------------------------------------------
/data/distancing.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/distancing.rda
--------------------------------------------------------------------------------
/data/fips_to_names.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/fips_to_names.rda
--------------------------------------------------------------------------------
/data/naics_2.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/naics_2.rda
--------------------------------------------------------------------------------
/data/naics_4.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/naics_4.rda
--------------------------------------------------------------------------------
/data/naics_codes.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/naics_codes.rda
--------------------------------------------------------------------------------
/data/norm.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/norm.rda
--------------------------------------------------------------------------------
/data/panel.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/panel.rda
--------------------------------------------------------------------------------
/data/pat_NY_NJ.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/pat_NY_NJ.rda
--------------------------------------------------------------------------------
/data/pat_naics.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/pat_naics.rda
--------------------------------------------------------------------------------
/data/state_info.rda:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SafeGraphInc/SafeGraphR/78bb38a558037b36c6ee97ff35b865fda9265df5/data/state_info.rda
--------------------------------------------------------------------------------
/docs/404.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
--------------------------------------------------------------------------------
/docs/sitemap.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | /404.html
5 |
6 |
7 | /articles/Automatic_Traffic-over-Time_Processing.html
8 |
9 |
10 | /articles/distancing_vignette.html
11 |
12 |
13 | /articles/index.html
14 |
15 |
16 | /articles/patterns_vignette.html
17 |
18 |
19 | /articles/SafeGraphR.html
20 |
21 |
22 | /authors.html
23 |
24 |
25 | /index.html
26 |
27 |
28 | /LICENSE-text.html
29 |
30 |
31 | /news/index.html
32 |
33 |
34 | /reference/canada_cd_pop.html
35 |
36 |
37 | /reference/canada_cd_types.html
38 |
39 |
40 | /reference/cbg_pop.html
41 |
42 |
43 | /reference/county_pop.html
44 |
45 |
46 | /reference/distancing.html
47 |
48 |
49 | /reference/expand_cat_json.html
50 |
51 |
52 | /reference/expand_integer_json.html
53 |
54 |
55 | /reference/expand_open_hours.html
56 |
57 |
58 | /reference/find_date.html
59 |
60 |
61 | /reference/fips_from_cbg.html
62 |
63 |
64 | /reference/fips_to_names.html
65 |
66 |
67 | /reference/graph_template.html
68 |
69 |
70 | /reference/growth_over_time.html
71 |
72 |
73 | /reference/hb_shrink.html
74 |
75 |
76 | /reference/index.html
77 |
78 |
79 | /reference/link_poi_naics.html
80 |
81 |
82 | /reference/ma.html
83 |
84 |
85 | /reference/naics_2.html
86 |
87 |
88 | /reference/naics_4.html
89 |
90 |
91 | /reference/naics_codes.html
92 |
93 |
94 | /reference/norm.html
95 |
96 |
97 | /reference/panel.html
98 |
99 |
100 | /reference/patterns_lookup.html
101 |
102 |
103 | /reference/pat_naics.html
104 |
105 |
106 | /reference/pipe.html
107 |
108 |
109 | /reference/processing_template.html
110 |
111 |
112 | /reference/rbind_by_list_pos.html
113 |
114 |
115 | /reference/read_core.html
116 |
117 |
118 | /reference/read_distancing.html
119 |
120 |
121 | /reference/read_many_csvs.html
122 |
123 |
124 | /reference/read_many_patterns.html
125 |
126 |
127 | /reference/read_many_shop.html
128 |
129 |
130 | /reference/read_patterns.html
131 |
132 |
133 | /reference/read_shop.html
134 |
135 |
136 | /reference/safegraph_api.html
137 |
138 |
139 | /reference/safegraph_aws.html
140 |
141 |
142 | /reference/sample_size_adjust.html
143 |
144 |
145 | /reference/scale_to_date.html
146 |
147 |
148 | /reference/scale_yoy.html
149 |
150 |
151 | /reference/state_info.html
152 |
153 |
154 |
--------------------------------------------------------------------------------
/man/canada_cd_pop.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{canada_cd_pop}
5 | \alias{canada_cd_pop}
6 | \title{Canadian Census District Populations}
7 | \format{
8 | A \code{data.table} with 293 rows and 3 variables:
9 | \describe{
10 | \item{unweighted_pop}{Population from the 2016 Canadian census.}
11 | \item{state_fips}{Province SGC for the county}
12 | \item{county_fips}{Census division code}
13 | }
14 | }
15 | \source{
16 | \url{https://www12.statcan.gc.ca/census-recensement/2016/dp-pd/hlt-fst/pd-pl/Tables/CompFile.cfm?Lang=Eng&T=701&OFT=FULLCSV}
17 | }
18 | \usage{
19 | canada_cd_pop
20 | }
21 | \description{
22 | Population by census district (with \code{state_fips} and \code{county_fips} identifiers to link with other data sets in the package - sorry for the naming, Canadians). The "unweighted" in the variable name \code{unweighted_pop} doesn't refer to anything specific in the Canadian census, but is so you can easily \code{rbind} this with \code{county_pop}.
23 | }
24 | \details{
25 | This comes from the Canadian census directly instead of SafeGraph.
26 | }
27 | \keyword{datasets}
28 |
--------------------------------------------------------------------------------
/man/canada_cd_types.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{canada_cd_types}
5 | \alias{canada_cd_types}
6 | \title{Additional census division information for Canada}
7 | \format{
8 | An object of class \code{data.table} (inherits from \code{data.frame}) with 293 rows and 5 columns.
9 | }
10 | \usage{
11 | canada_cd_types
12 | }
13 | \description{
14 | A dataset that can be merged with \code{fips_to_names} with information on French names for locations as well as the type of census division it is.
15 | }
16 | \details{
17 | \describe{
18 | \item{state_fips}{Canadian SGC code}
19 | \item{county_fips}{Canadian Census division}
20 | \item{countyname_french}{Census division name in French}
21 | \item{cd_type}{Census district type in English}
22 | \item{cd_type_french}{Census district type in French}
23 | }
24 | }
25 | \keyword{datasets}
26 |
--------------------------------------------------------------------------------
/man/cbg_pop.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{cbg_pop}
5 | \alias{cbg_pop}
6 | \title{Unweighted Population by Census Block Group}
7 | \format{
8 | A \code{data.table} with 220333 rows and 2 variables:
9 | \describe{
10 | \item{poi_cbg}{Census Block Group ID, named for easy merging with the patterns data.}
11 | \item{unweighted_pop}{Population from the 2016 American Community Survey (the "unweighted" part is outdated but kept for consistency with old code).}
12 | }
13 | }
14 | \source{
15 | \url{https://docs.safegraph.com/docs/open-census-data}
16 | }
17 | \usage{
18 | cbg_pop
19 | }
20 | \description{
21 | A dataset containing the unweighted population by Census Block Group from the Open Census file (US Only). Use with \code{fips_from_cbg} to get state and county FIPS codes from the CBG ID.
22 | }
23 | \keyword{datasets}
24 |
--------------------------------------------------------------------------------
/man/county_pop.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{county_pop}
5 | \alias{county_pop}
6 | \title{Unweighted Population by County}
7 | \format{
8 | A \code{data.table} with 3220 rows and 3 variables:
9 | \describe{
10 | \item{unweighted_pop}{Population from the 2016 American Community Survey (the "unweighted" part is outdated but kept for consistency with old code).}
11 | \item{state_fips}{State FIPS code for the county}
12 | \item{county_fips}{County FIPS code for the county}
13 | }
14 | }
15 | \source{
16 | \url{https://docs.safegraph.com/docs/open-census-data}
17 | }
18 | \usage{
19 | county_pop
20 | }
21 | \description{
22 | A dataset containing the unweighted population by county from the Open Census file (US only). See \code{canada_cd_pop} for Canadian county population. Merge with \code{data(fips_to_names)} to name the states and counties.
23 | }
24 | \keyword{datasets}
25 |
--------------------------------------------------------------------------------
/man/distancing.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{distancing}
5 | \alias{distancing}
6 | \title{Example Stay-at-Home Data}
7 | \format{
8 | A \code{data.table} with 45158 rows and 7 variables:
9 | \describe{
10 | \item{date}{The date}
11 | \item{state_fips,county_fips}{The state and county identifiers}
12 | \item{device_count}{The total number of devices observed}
13 | \item{completely_home_device_count}{The total number of devices observed that did not leave their home location on this day}
14 | \item{part_time_work_behavior_devices,full_time_work_behavior_devices}{The total number of devices observed that appear to be engaging in part-time or full-time work behavior (experimental)}
15 | }
16 | }
17 | \source{
18 | SafeGraph
19 | }
20 | \usage{
21 | distancing
22 | }
23 | \description{
24 | Distancing data from June 1 to June 14, aggregated to the county level for the vignette. See [SafeGraph Docs](https://docs.safegraph.com/docs) for full documentation.
25 | }
26 | \keyword{datasets}
27 |
--------------------------------------------------------------------------------
/man/expand_cat_json.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/expand_json.R
3 | \name{expand_cat_json}
4 | \alias{expand_cat_json}
5 | \title{Expand and collapse a categorical JSON column}
6 | \usage{
7 | expand_cat_json(
8 | dt,
9 | expand,
10 | index = "index",
11 | by = NULL,
12 | fast = FALSE,
13 | fun = sum,
14 | na.rm = TRUE,
15 | set_key = TRUE
16 | )
17 | }
18 | \arguments{
19 | \item{dt}{data.table object (or something that can be coerced to data.table)}
20 |
21 | \item{expand}{String indicating the JSON column to be expanded.}
22 |
23 | \item{index}{String indicating the name of the new index column}
24 |
25 | \item{by}{Character vector indicating the variables to group by after expanding. Set to \code{NULL} to aggregate across all initial rows, or set to \code{FALSE} to not aggregate at all (this will also add an \code{initial_rowno} column showing the original row number).}
26 |
27 | \item{fast}{Assumes that all the JSON vectors are of the exact same categories, and adds the values together rather than using whatever is in \code{fun}.}
28 |
29 | \item{fun}{Function that takes a vector and returns a single value to use when collapsing to the \code{by} level. Requires \code{fast = FALSE}.}
30 |
31 | \item{na.rm}{Ignore missing values of \code{expand}}
32 |
33 | \item{set_key}{Set the key of \code{dt} to \code{by}. Set to \code{FALSE} if you have already set the key or want it returned without key.}
34 | }
35 | \description{
36 | This function accepts a \code{data.table} along with a set of grouping variables and a character-format category-style JSON column (i.e. starts with curly brackets, not square).
37 | }
38 | \details{
39 | It expands that JSON column into long format, with one row per observation per value of the JSON column, and then collapses everything according to the set of grouping variables.
40 | }
41 | \examples{
42 |
43 | # Raw example data for expanding/collapsing
44 | patterns <- data.table::data.table(state_fips = c(1,1,2,2),
45 | cat_origin = c('{"a": "2", "b": "3"}',
46 | '{"a": "3", "b": "4"}',
47 | '{"a": "4", "b": "5"}',
48 | '{"a": "5", "b": "6"}'))
49 |
50 | expand_cat_json(patterns, 'cat_origin', by = 'state_fips')[]
51 | }
52 |
--------------------------------------------------------------------------------
/man/expand_integer_json.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/expand_json.R
3 | \name{expand_integer_json}
4 | \alias{expand_integer_json}
5 | \title{Expand and collapse an integer JSON column}
6 | \usage{
7 | expand_integer_json(
8 | dt,
9 | expand,
10 | index = "index",
11 | by = NULL,
12 | fast = TRUE,
13 | fun = sum,
14 | na.rm = TRUE,
15 | set_key = TRUE
16 | )
17 | }
18 | \arguments{
19 | \item{dt}{data.table object (or something that can be coerced to data.table)}
20 |
21 | \item{expand}{String indicating the JSON column to be expanded.}
22 |
23 | \item{index}{String indicating the name of the new index column}
24 |
25 | \item{by}{Character vector indicating the variables to group by after expanding. Set to \code{NULL} to aggregate across all initial rows, or set to \code{FALSE} to not aggregate at all (this will also add an \code{initial_rowno} column showing the original row number).}
26 |
27 | \item{fast}{Assumes that all the JSON vectors are of equal length, and adds the values together rather than using whatever is in \code{fun}.}
28 |
29 | \item{fun}{Function that takes a vector and returns a single value to use when collapsing to the \code{by} level. Requires \code{fast = FALSE}.}
30 |
31 | \item{na.rm}{Ignore missing values of \code{expand}}
32 |
33 | \item{set_key}{Set the key of \code{dt} to \code{by}. Set to \code{FALSE} if you have already set the key or want it returned without key.}
34 | }
35 | \description{
36 | This function accepts a \code{data.table} along with a set of grouping variables and a character-format integer-style JSON column (i.e. starts with square brackets, not curly).
37 | }
38 | \details{
39 | It expands that JSON column into long format, with one row per observation per value of the JSON column, and then collapses everything according to the set of grouping variables.
40 | }
41 | \examples{
42 |
43 | # Example data
44 | patterns <- data.table::data.table(state_fips = c(1,1,2,2),
45 | int_origin = c('[2,3]',
46 | '[3,4]',
47 | '[4,5]',
48 | '[5,6]'))
49 |
50 | expand_integer_json(patterns, 'int_origin', by = 'state_fips')[]
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/man/expand_open_hours.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/helpers.R
3 | \name{expand_open_hours}
4 | \alias{expand_open_hours}
5 | \title{Expands the open_hours variable in the Core file}
6 | \usage{
7 | expand_open_hours(
8 | dt,
9 | format = c("wide", "long", "long-expand", "long_expand"),
10 | open_hours = "open_hours",
11 | colnames = NULL,
12 | drop_missing = FALSE,
13 | convert_hour = TRUE,
14 | days = c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday",
15 | "Sunday")
16 | )
17 | }
18 | \arguments{
19 | \item{dt}{A \code{data.table} containing the \code{open_hours} column (or an object that can be coerced to a \code{data.table}).}
20 |
21 | \item{format}{Can be \code{'wide'} (seven new \code{list}-columns, one for each day), \code{'long'} (turn each row into seven rows, then two new columns: one for day-of-week and one \code{list}-column with opening/closing times), or \code{'long-expand'}/\code{'long_expand'} (\code{'long'} but then also break the single list-column out into a set of numeric start/end columns). Note that for \code{'long-expand'}, many locations have more than one set of open/close hours per day, so there will be more than one open/close column each.}
22 |
23 | \item{open_hours}{A character variable with the name of the \code{open_hours} column.}
24 |
25 | \item{colnames}{For \code{format = 'wide'}, the name stub for the column names, by default \code{'open_hours'} to get \code{'open_hoursSunday'}, \code{'open_hoursMonday'}, etc.. For \code{format='long'}, a two-element vector (by default \code{c('weekday','open_hours')}) with the name of the column indicating the day, and the \code{list}-column with the open hours information in it. For \code{format = 'long-expand'}, a three-element vector with the weekday column, the name stub for "opening hour" and the name stub for "closing hour" (with numbers 1, 2, 3, etc. appended afterwards), by default \code{c('weekday','opens','closes')}.}
26 |
27 | \item{drop_missing}{Drop any rows with a missing \code{open_hours} observation.}
28 |
29 | \item{convert_hour}{Convert hour strings like \code{'15:30'} to numbers like \code{15.5}. This does slow down the function.}
30 |
31 | \item{days}{A character vector of the days to keep. Cutting down here can save some time/memory especially if you are not going \code{format = 'wide'}.}
32 | }
33 | \description{
34 | This function takes the \code{open_hours} variable in an already-read Core file (stored as a \code{data.table}) and expands it to seven \code{list}-type columns, where the elements of the list in each row are a set of vectors for opening/closing times, in military time format (1:30PM = 13.5). So an observation of \code{c(8,10,12,14)} would be a business that opens at 8, closes at 10, opens again at noon, and closes again at 2PM on that day. Options are available to produce long instead of wide expansions as well.
35 | }
36 | \details{
37 | Returns the same \code{data.table} but with the new columns/rows added. May change the order of the data.
38 | }
39 |
--------------------------------------------------------------------------------
/man/find_date.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/read_patterns.R
3 | \name{find_date}
4 | \alias{find_date}
5 | \title{Find the date in a SafeGraph AWS-formatted filepath}
6 | \usage{
7 | find_date(s)
8 | }
9 | \arguments{
10 | \item{s}{The filepath to look for a date in.}
11 | }
12 | \description{
13 | Given a filepath \code{s}, this function will look for the last correctly-parsed date in that string. Given how SafeGraph AWS file structures are, this will give you the date of those files, for example \code{patterns_backfill/2020/12/14/21/2018/01/01} will give you "2018/01/01".
14 | }
15 | \details{
16 | This function returns a string, not a date. You may want to send it to \code{as.Date()} or \code{lubridate::ymd}.
17 |
18 | For backfill data, the date returned will generally be the \code{start_date} for the files. However, for new data, you will want to do \code{as.Date(find_date(s)) - lubridate::days(9)} to get the \code{start_date}.
19 | }
20 | \examples{
21 |
22 | start_date <- find_date('patterns_backfill/2020/12/14/21/2018/01/01') \%>\% as.Date()
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/man/fips_from_cbg.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/helpers.R
3 | \name{fips_from_cbg}
4 | \alias{fips_from_cbg}
5 | \title{Pull state and county FIPS (US) or province and census division (CA) from CBG code}
6 | \usage{
7 | fips_from_cbg(cbg, return = "both")
8 | }
9 | \arguments{
10 | \item{cbg}{CBG code, in numeric or string form. To aid speed since this function is called millions of times, \code{cbg} is not checked to ensure it is a valid CBG identifier.}
11 |
12 | \item{return}{Set to 'state' to get back only state FIPS, 'county' for only county, or 'both' for a list of both (state then county).}
13 | }
14 | \description{
15 | This function takes a CBG code (as numeric or string) and returns the state and county FIPS codes associated with it.
16 | }
17 | \details{
18 | The syntax for this function was developed before the Canadian data was introduced, so it is definitely US-first, down to the function name, with Canadian additions tacked on. Sorry neighbors to the North. Canadian province ("state") and census division ("county") identifiers will be preceded with \code{"CA:"} as in the SafeGraph \code{cbg} variable.
19 |
20 | This function now returns character values rather than numeric, to account for the Canadian data.
21 |
22 | Why does this produce a list and not a vector? For \code{data.table} usage.
23 | }
24 | \examples{
25 |
26 | a_cbg <- '560610112022'
27 | fips_from_cbg(a_cbg)
28 |
29 | # Use with data.table!
30 | DT <- data.table::data.table(cbg = c('560610112022','10310112022'))
31 | DT[,c('state_fips','county_fips') := fips_from_cbg(cbg)]
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/man/fips_to_names.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{fips_to_names}
5 | \alias{fips_to_names}
6 | \title{State and county names by FIPS codes}
7 | \format{
8 | A \code{data.table} with 3142 rows and 4 variables:
9 | \describe{
10 | \item{state_fips}{State FIPS code / Canadian SGC code}
11 | \item{county_fips}{County FIPS code / Canadian Census division}
12 | \item{statename}{The full name of the state / province}
13 | \item{countyname}{The full English name of the county / census division, including "County" for US entries. Merge with \code{canada_cd_types} to get the equivalent division type for Canada and French names.}
14 | \item{iso_country_code}{Indicator for US or Canada}
15 | }
16 | }
17 | \source{
18 | \url{US Census}
19 | }
20 | \usage{
21 | fips_to_names
22 | }
23 | \description{
24 | A dataset that links state and county FIPS codes in the US (as character values) and province and census division codes (Canada) to the names of those states/provinces and counties/census divisions. This data predates the inclusion of Canada in SafeGraph, thus the US-centric naming.
25 | }
26 | \keyword{datasets}
27 |
--------------------------------------------------------------------------------
/man/graph_template.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/volume_over_time.R
3 | \name{graph_template}
4 | \alias{graph_template}
5 | \title{Produce a nice-looking graph of foot traffic growth over time}
6 | \usage{
7 | graph_template(
8 | dt,
9 | date = "date",
10 | growth = "growth",
11 | origin = 0,
12 | filter = NULL,
13 | by = NULL,
14 | x_title = "Date",
15 | y_title = "Foot Traffic Growth",
16 | title = ifelse(is.null(by), "SafeGraph: Foot Traffic Growth",
17 | paste0("SafeGraph: Foot Traffic Growth by ", paste(by, collapse = ", "))),
18 | caption = "7-day moving average applied.",
19 | subtitle = NULL,
20 | label = !is.null(by),
21 | hline = TRUE,
22 | expand_right = NULL,
23 | palette = "ggsci::category20_d3",
24 | manual_palette = NULL,
25 | skip_theming = FALSE,
26 | line_opts = list(size = 1),
27 | label_opts = list(size = 14/ggplot2::.pt, hjust = -0.2, vjust = 0.5, direction = "y"),
28 | hline_opts = list(size = 0.5, linetype = "dashed", color = "black")
29 | )
30 | }
31 | \arguments{
32 | \item{dt}{A \code{data.table} (or something that can be coerced to \code{data.table}). There must be one observation per \code{date} per \code{by} in this data.}
33 |
34 | \item{date}{Character variable indicating the date variable (x axis).}
35 |
36 | \item{growth}{Character variable indicating the growth variable (y axis).}
37 |
38 | \item{origin}{The value indicating no growth/initial value.}
39 |
40 | \item{filter}{A character variable describing a subset of the data to include, for example \code{filter = 'state_fips == 6'} to only include California.}
41 |
42 | \item{by}{A character variable of the variable name to assign to the \code{color} aesthetic in \code{ggplot2::geom_line()}. The values of this variable will also be sent to \code{ggrepel::geom_text_repel()}.}
43 |
44 | \item{x_title}{Axis title for x-axis.}
45 |
46 | \item{y_title}{Axis title for y-axis.}
47 |
48 | \item{title}{Graph title.}
49 |
50 | \item{caption}{Figure caption.}
51 |
52 | \item{subtitle}{Graph subtitle.}
53 |
54 | \item{label}{Should a text label be applied at the end of each line?}
55 |
56 | \item{hline}{Should a horizontal line at the \code{origin} value be included?}
57 |
58 | \item{expand_right}{Number of additional days to extend the x-axis by so as to fit the labels. Defaults to adding 33 percent more days so a quarter of the graph is reserved for labels.}
59 |
60 | \item{palette}{Discrete color palette from the **paletteer** package to be sent to \code{paletteer::scale_color_paletteer_d()}. If you like, the default **ggplot2** color theme is \code{'basetheme::default'}.}
61 |
62 | \item{manual_palette}{Manually-specified color palette to be sent to the \code{values} option of \code{ggplot2::scale_color_manual()}.}
63 |
64 | \item{skip_theming}{Don't apply the template theming, so you can apply your own.}
65 |
66 | \item{line_opts}{A named list of options to be sent to \code{ggplot2::geom_line()}.}
67 |
68 | \item{label_opts}{A named list of options to be sent to \code{ggrepel::geom_text_repel()}. Only relevant if \code{label = TRUE}.}
69 |
70 | \item{hline_opts}{A named list of options to be sent to \code{ggplot2::geom_hline()}, only relevant if \code{hline = TRUE}.}
71 | }
72 | \description{
73 | Produces a line graph with labels at the end of the lines, with theming designed for the purpose. Returns a \code{ggplot} object that can be further modified as normal. Requires that the **ggplot2**, **ggrepel**, and **paletteer** packages be installed.
74 | }
75 | \examples{
76 |
77 | # Generally you'd be doing this with data that comes from read_many_patterns()
78 | # But here's an example using randomly generated data
79 |
80 | dt <- data.table::data.table(date = rep(lubridate::ymd('2020-01-01') + lubridate::days(0:300),2),
81 | state_fips = c(rep(6, 301), rep(7,301)),
82 | visits_by_day = rpois(602, lambda = 10))
83 |
84 | norm <- data.table::data.table(date = rep(lubridate::ymd('2020-01-01') + lubridate::days(0:300),2),
85 | state_fips = c(rep(6, 301), rep(7,301)),
86 | total_devices_seen = rpois(602, lambda = 10000))
87 |
88 | processed_data <- processing_template(dt, norm = norm, by = 'state_fips')
89 |
90 | p <- graph_template(processed_data, by = 'state_fips')
91 |
92 | p
93 |
94 | }
95 |
--------------------------------------------------------------------------------
/man/hb_shrink.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/hb_shrink.R
3 | \name{hb_shrink}
4 | \alias{hb_shrink}
5 | \title{Hierarchical Bayes Shrinkage}
6 | \usage{
7 | hb_shrink(success, total)
8 | }
9 | \arguments{
10 | \item{success}{A numeric integer variable containing the number of successes.}
11 |
12 | \item{total}{A numeric integer variable containing the total sample size.}
13 | }
14 | \description{
15 | This is a function that takes a "success" and a "total" variable (often something like "number of devices staying home" and "total number of devices") and shrinks them to the full data set using shrinkage methods for proportions.
16 | }
17 | \details{
18 | This is usually called by group, either with \code{dplyr::group_by} or with the \code{by} argument in a \code{data.table}, so that individual observations can be shrunk to the group level.
19 | }
20 | \examples{
21 | \dontrun{
22 | # The directory distdat is the folder we have downloaded the distancing data to from AWS.
23 | # Read and compile all distancing data from May 1 to May 7
24 | distancing <- read_distancing(
25 | start = lubridate::ymd('2020-05-01'),
26 | end = lubridate::ymd('2020-05-07'),
27 | dir = distdat
28 | )
29 |
30 | # Shrink county to state
31 | distancing <- distancing[,.(county_fips = county_fips,
32 | unshrunk_stay_home = completely_home_device_count/device_count,
33 | shrunk_stay_home = hb_shrink(completely_home_device_count, device_count)),
34 | by = .(state_fips, date)]
35 |
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/man/link_poi_naics.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/link_poi_naics.R
3 | \name{link_poi_naics}
4 | \alias{link_poi_naics}
5 | \title{Use a Core Places file to Create a POI-NAICS crosswalk}
6 | \usage{
7 | link_poi_naics(
8 | filename,
9 | dir = ".",
10 | exdir = dir,
11 | cleanup = FALSE,
12 | silent = FALSE
13 | )
14 | }
15 | \arguments{
16 | \item{filename}{The filename of the \code{ZIP} Core Places file.}
17 |
18 | \item{dir}{The directory that the file is in.}
19 |
20 | \item{exdir}{Name of the directory to unzip to.}
21 |
22 | \item{cleanup}{Set to \code{TRUE} to delete all the unzipped files after being read in.}
23 |
24 | \item{silent}{Suppress timing messages.}
25 | }
26 | \description{
27 | Feed this function the most recent Core Places file, and it will give you back a \code{data.table} with two columns: \code{safegraph_place_id} and \code{naics_code}. Saving this file is recommended. Then, provide this object to \code{read_shop} or \code{read_many_shop} so that you can use \code{'naics_code'} in the \code{by} argument.
28 | }
29 | \details{
30 | This function is DEPRECATED. It only works with the OLD Core files (and thus the continued usage of \code{safegraph_place_id} instead of \code{placekey}), and is superseded by the more flexible \code{read_core()} function.
31 | }
32 | \examples{
33 |
34 | \dontrun{
35 | # Core-USA-June2020-Release-CORE_POI-2020_05-2020-06-06.zip is a Core places file in the working directory
36 | poi_link <- link_poi_naics('Core-USA-June2020-Release-CORE_POI-2020_05-2020-06-06.zip')
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/man/ma.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/helpers.R
3 | \name{ma}
4 | \alias{ma}
5 | \title{Seven-Day Moving Average}
6 | \usage{
7 | ma(x, n = 7)
8 | }
9 | \arguments{
10 | \item{x}{The variable to calculate the moving average of.}
11 |
12 | \item{n}{The number of lags to cover in the moving average.}
13 | }
14 | \description{
15 | This function returns a (by default) seven-day moving average of the variable passed in. Make sure the data is pre-sorted by date, and grouped by the appropriate grouping. The data should have no gaps in time.
16 | }
17 | \examples{
18 |
19 | ma(1:9)
20 |
21 | }
22 |
--------------------------------------------------------------------------------
/man/naics_2.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{naics_2}
5 | \alias{naics_2}
6 | \title{NAICS 4-Digit Code Titles}
7 | \format{
8 | A \code{data.table} with 24 rows and 2 variables:
9 | \describe{
10 | \item{naics_code}{The NAICS code}
11 | \item{naics_title}{The title of the NAICS code}
12 | }
13 | }
14 | \source{
15 | \url{US Census NAICS page}
16 | }
17 | \usage{
18 | naics_2
19 | }
20 | \description{
21 | A dataset that links two-digit NAICS codes to their descriptive titles using 2017 NAICS codes.
22 | }
23 | \details{
24 | Notice that some \code{naics_title} values are repeated because they cross several two-digit codes.
25 | }
26 | \keyword{datasets}
27 |
--------------------------------------------------------------------------------
/man/naics_4.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{naics_4}
5 | \alias{naics_4}
6 | \title{NAICS 4-Digit Code Titles}
7 | \format{
8 | A \code{data.table} with 311 rows and 2 variables:
9 | \describe{
10 | \item{naics_code}{The NAICS code}
11 | \item{naics_title}{The title of the NAICS code}
12 | }
13 | }
14 | \source{
15 | \url{US Census NAICS page}
16 | }
17 | \usage{
18 | naics_4
19 | }
20 | \description{
21 | A dataset that links four-digit NAICS codes to their descriptive titles using 2017 NAICS codes.
22 | }
23 | \keyword{datasets}
24 |
--------------------------------------------------------------------------------
/man/naics_codes.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{naics_codes}
5 | \alias{naics_codes}
6 | \title{NAICS Code Titles}
7 | \format{
8 | A \code{data.table} with 1069 rows and 2 variables:
9 | \describe{
10 | \item{naics_code}{The NAICS code}
11 | \item{naics_title}{The title of the NAICS code}
12 | }
13 | }
14 | \source{
15 | \url{US Census NAICS page}
16 | }
17 | \usage{
18 | naics_codes
19 | }
20 | \description{
21 | A dataset that links six-digit NAICS codes to their descriptive titles using 2017 NAICS codes.
22 | }
23 | \keyword{datasets}
24 |
--------------------------------------------------------------------------------
/man/norm.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{norm}
5 | \alias{norm}
6 | \title{Example Normalization Data}
7 | \format{
8 | A \code{data.table} with 7 rows and 7 variables:
9 | \describe{
10 | \item{date}{The date}
11 | \item{total_visits}{The total number of visits recorded in SafeGraph on that day}
12 | \item{total_devices_seen}{The total number of individual devices recorded in SafeGraph on that day}
13 | \item{total_home_visits}{Total devices with at least one visit to the home location that day}
14 | }
15 | }
16 | \source{
17 | SafeGraph
18 | }
19 | \usage{
20 | norm
21 | }
22 | \description{
23 | The normalization file from the July 1 weekly patterns pull, for the vignette. See [SafeGraph Docs](https://docs.safegraph.com/docs) for full documentation.
24 | }
25 | \keyword{datasets}
26 |
--------------------------------------------------------------------------------
/man/panel.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{panel}
5 | \alias{panel}
6 | \title{Example Panel Information Data}
7 | \format{
8 | A \code{data.table} with 1155 rows and 4 variables:
9 | \describe{
10 | \item{start_date}{The first date present in the patterns file}
11 | \item{state_fips,county_fips}{State and county identifiers}
12 | \item{number_devices_residing}{The total number of devices with home locations in those counties}
13 | }
14 | }
15 | \source{
16 | SafeGraph
17 | }
18 | \usage{
19 | panel
20 | }
21 | \description{
22 | The home_panel_summary file from the July 1 weekly patterns pull, processed. See [SafeGraph Docs](https://docs.safegraph.com/docs) for full documentation.
23 | }
24 | \keyword{datasets}
25 |
--------------------------------------------------------------------------------
/man/pat_naics.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{pat_naics}
5 | \alias{pat_naics}
6 | \title{Example Patterns Data Aggregated by NAICS Code}
7 | \format{
8 | A \code{data.table} with 9247 rows and 7 variables:
9 | \describe{
10 | \item{date}{The date}
11 | \item{start_date,day}{The first date present in the patterns file, and whether this observation is from that first date (1), the second (2), etc.}
12 | \item{state_fips,county_fips}{Originally would have been the state and county identifiers, but since things were aggregated to the NAICS level (rather than NAICS/state/county), they have been summed up and now mean nothing.}
13 | \item{naics_code}{Six-digit NAICS code}
14 | \item{visits_by_day}{The total number of visits to POIs of this NAICS code on this day}
15 | }
16 |
17 | A \code{data.table} with 2324 rows and 7 variables:
18 | \describe{
19 | \item{date}{The date}
20 | \item{start_date,day}{The first date present in the patterns file, and whether this observation is from that first date (1), the second (2), etc.}
21 | \item{state_fips,county_fips}{The state and county identifiers}
22 | \item{naics_code}{Originally this was the six-digit NAICS code of the associated POI. But since aggregation didn't preserve NAICS, this is nonsense}
23 | \item{visits_by_day}{The total number of visits to POIs in this county on this day}
24 | }
25 | }
26 | \source{
27 | SafeGraph
28 |
29 | SafeGraph
30 | }
31 | \usage{
32 | pat_naics
33 |
34 | pat_naics
35 | }
36 | \description{
37 | The patterns file from the July 1 weekly patterns pull, aggregated to the NAICS code level for the vignette. See [SafeGraph Docs](https://docs.safegraph.com/docs) for full documentation.
38 |
39 | The patterns file from the July 1 weekly patterns pull, aggregated to the county level for the vignette, for New York and New Jersey only. See [SafeGraph Docs](https://docs.safegraph.com/docs) for full documentation.
40 | }
41 | \keyword{datasets}
42 |
--------------------------------------------------------------------------------
/man/patterns_lookup.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/patterns_lookup.R
3 | \name{patterns_lookup}
4 | \alias{patterns_lookup}
5 | \title{SafeGraph File Lookup}
6 | \usage{
7 | patterns_lookup(
8 | dates,
9 | dir = NULL,
10 | old_dir = NULL,
11 | new_dir = NULL,
12 | subfolder = "patterns",
13 | silent = FALSE,
14 | add_ma = 0,
15 | patterns_backfill_date = "2021/08/02/22/",
16 | old_date_split = lubridate::ymd("2021-07-11"),
17 | old_bucket = "weekly-backfill",
18 | new_bucket = "weekly",
19 | key = NULL,
20 | secret = NULL,
21 | list_files = FALSE,
22 | ...
23 | )
24 | }
25 | \arguments{
26 | \item{dates}{A vector of \code{Date} objects (perhaps taking a single \code{Date} object and adding \code{+lubridate::days(0:finish)}) to find the associated files for.}
27 |
28 | \item{dir}{If specified, will append \code{dir} to the start of the filepaths, to get full filepaths. If using both "old" (pre-June 15, 2020) and "new" (post) dates, this will only work if both the "patterns_backfill" (old) and "patterns" (new) folders are in the same folder. Superseded by \code{old_dir} and \code{new_dir} for old and new files, respectively.}
29 |
30 | \item{old_dir}{If specified, will append \code{old_dir} to the start of the filepaths for all "old" (pre-Dec 7 2020) files. This should be the folder that contains the \code{patterns_backfill} folder.}
31 |
32 | \item{new_dir}{If specified, will append \code{new_dir} to the start of the filepaths for all "new" (post-Dec 7, 2020) files. This should be the folder that contains the \code{patterns} folder.}
33 |
34 | \item{subfolder}{Which folder in the AWS bucket to look at. Will append "_backfill" for backfill data. Usually this is "patterns", "normalization_data", or "home_panel_summary".}
35 |
36 | \item{silent}{If specified, will omit the warning for using any dates after the package author last checked the consistency of the SafeGraph file structure.}
37 |
38 | \item{add_ma}{Also looks at the \code{add_ma} days before the dates listed in \code{dates}, so you can calculate an \code{add_ma}-day moving average. Or you could just change the \code{dates} argument yourself to allow this.}
39 |
40 | \item{patterns_backfill_date}{Character variable with the folder structure for the most recent \code{patterns_backfill} pull. i.e., the 2018, 2019, and 2020 folders containing backfill data in their subfolders should set in the \code{paste0(old_dir,'/patterns_backfill/',patterns_backfill_date)} folder.}
41 |
42 | \item{old_date_split}{Date indicating the last day on which "old" data is present, before switching to the "new" data structure.}
43 |
44 | \item{old_bucket, new_bucket}{The \code{safegraph_aws()} \code{dataset} argument for the buckets containing the old and new data, respectively.}
45 |
46 | \item{key}{A character string containing an AWS Access Key ID. If \code{key} and \code{secret} are both specified, \code{patterns_lookup} will download all the files it finds.}
47 |
48 | \item{secret}{A character string containing an AWS Secret Access Key.}
49 |
50 | \item{list_files}{After creating folderpaths (and, possibly, downloading files), run each of them through \code{list.files(pattern = '.csv', recursive = TRUE, full.names = TRUE)} to get a usable list of files. This only works if all the files have already been downloaded.}
51 |
52 | \item{...}{Arguments to be passed to \code{safegraph_aws()}.}
53 | }
54 | \description{
55 | This function, given a date or range of dates, will return a character vector of folder paths in the weekly (new or backfill) data you will need to run through \code{list.files(pattern = '.csv.gz', full.names = TRUE)} after downloading files (or just set \code{list_files = TRUE}. This is done because the subfolder after this is based on the hour the data is released, which can't be predicted ahead of time for future weeks.
56 | }
57 | \examples{
58 |
59 | # We have already downloaded all of AWS data into the working directory and just need to locate and load it
60 | # (if we also wanted to download, we could leave off list_files and pass this to safegraph_aws,
61 | # or add our key and secret here and it would download)
62 | filelist <- patterns_lookup(lubridate::ymd('2020-9-01') + lubridate::days(0:100),
63 | list_files = TRUE)
64 |
65 | dt <- read_many_patterns(filelist = filelist, by = 'brands', expand_int = 'visits_by_day')
66 |
67 | # Now let's get the normalization files
68 |
69 | normlist <- patterns_lookup(lubridate::ymd('2020-9-01') + lubridate::days(0:100),
70 | subfolder = 'normalization_stats',
71 | list_files = TRUE)
72 | norm <- read_many_csvs(filelist = normlist, makedate = TRUE)
73 |
74 | }
75 |
--------------------------------------------------------------------------------
/man/pipe.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/utils-pipe.R
3 | \name{\%>\%}
4 | \alias{\%>\%}
5 | \title{Pipe operator}
6 | \usage{
7 | lhs \%>\% rhs
8 | }
9 | \description{
10 | See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.
11 | }
12 | \keyword{internal}
13 |
--------------------------------------------------------------------------------
/man/processing_template.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/volume_over_time.R
3 | \name{processing_template}
4 | \alias{processing_template}
5 | \title{Perform basic processing and preparation of visits_by_day data}
6 | \usage{
7 | processing_template(
8 | dt,
9 | norm = NULL,
10 | by = NULL,
11 | date = "date",
12 | visits_by_day = "visits_by_day",
13 | origin = 0,
14 | filter = NULL,
15 | single_by = NULL,
16 | ma = 7,
17 | drop_ma = TRUE,
18 | first_date = NULL,
19 | silent = FALSE
20 | )
21 | }
22 | \arguments{
23 | \item{dt}{A \code{data.table} (or something that can be coerced to \code{data.table}).}
24 |
25 | \item{norm}{A \code{data.table} containing columns for \code{date}, any number of the elements of \code{by}, and a final column containing a normalization factor. The \code{visits_by_day} values will be divided by that normalization factor after merging. \code{growth_over_time} will generate this internally for you, but you can make (a standard version of it) easily by just using \code{read_many_csvs(makedate = TRUE)} to load in all of the files in the \code{normalization_stats} or \code{normalization_stats_backfill} folders from AWS, limiting it to just the all-state rows, and then passing in just the \code{date} and \code{total_devices_seen} columns. If null, applies no normalization (if your analysis covers a reasonably long time span, you want normalization).}
26 |
27 | \item{by}{A character vector of the variable names that indicate groups to calculate growth separately by.}
28 |
29 | \item{date}{Character variable indicating the date variable.}
30 |
31 | \item{visits_by_day}{Character variable indicating the variable containing the \code{visits_by_day} numbers.}
32 |
33 | \item{origin}{The value indicating no growth/initial value. The first date for each group will have this value. Usually 0 (for "0 percent growth") or 1 ("100 percent of initial value").}
34 |
35 | \item{filter}{A character variable describing a subset of the data to include, for example \code{filter = 'state_fips == 6'} to only include California.}
36 |
37 | \item{single_by}{A character variable for the name of a new variable that combines all the different variables in \code{by} into one variable, handy for passing to \code{graph_template()}.}
38 |
39 | \item{ma}{Number of days over which to take the moving average.}
40 |
41 | \item{drop_ma}{Drop observations for which \code{adj_visits} is missing because of the moving-average adjustment.}
42 |
43 | \item{first_date}{After implementing the moving-average, drop all values before this date and calculate growth starting from this date. If \code{NULL}, uses the first date that's not missing after the moving average.}
44 |
45 | \item{silent}{Omit the warning and detailed report that occurs for values of \code{dt} that find no match in \code{norm}, as well as the one if you try not to normalize at all.}
46 | }
47 | \description{
48 | This function takes data read in from SafeGraph patterns files that has had \code{expand_integer_json()} already applied to its \code{visits_by_day} variable (or used the \code{expand_int = 'visits_by_day'} option in \code{read_patterns()} or \code{read_many_patterns()}). It aggregates the data to the \code{date-by} level, normalizes according to the size of the sample, calculates a moving average, and also calculates growth since the \code{start_date} for each \code{by} category. The resulting \code{data.table}, with one row per \code{date} per combination of \code{by}, can be used for results and insight, or passed to \code{graph_template()} for a quick graph.
49 | }
50 | \details{
51 | The result is the same \code{data.table} that was passed in, with some modifications: the data will be aggregated (using \code{sum}) to the \code{date-by} level, with \code{visits_by_day} as the only other surviving column. Three new columns are added: The normalization variable (from \code{norm}, or just a variable \code{norm} equal to 1 if \code{norm = NULL}), \code{adj_visits}, which is \code{visits_by_day} adjusted for sample size and with a moving average applied, and \code{growth} which tracks the percentage change relative to the earliest value of \code{adj_visits} that is not missing.
52 | }
53 | \examples{
54 |
55 | # Generally you'd be doing this with data that comes from read_many_patterns()
56 | # But here's an example using randomly generated data
57 |
58 | dt <- data.table::data.table(date = rep(lubridate::ymd('2020-01-01') + lubridate::days(0:300),2),
59 | state_fips = c(rep(6, 301), rep(7,301)),
60 | visits_by_day = rpois(602, lambda = 10))
61 |
62 | norm <- data.table::data.table(date = rep(lubridate::ymd('2020-01-01') + lubridate::days(0:300),2),
63 | state_fips = c(rep(6, 301), rep(7,301)),
64 | total_devices_seen = rpois(602, lambda = 10000))
65 |
66 | processed_data <- processing_template(dt, norm = norm, by = 'state_fips')
67 |
68 | }
69 |
--------------------------------------------------------------------------------
/man/rbind_by_list_pos.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/helpers.R
3 | \name{rbind_by_list_pos}
4 | \alias{rbind_by_list_pos}
5 | \title{Row-binds data.tables in a list of lists}
6 | \usage{
7 | rbind_by_list_pos(dtl, ignore_names = FALSE)
8 | }
9 | \arguments{
10 | \item{dtl}{List of lists of \code{data.table}s.}
11 |
12 | \item{ignore_names}{If the list is named, match objects across lists only by their position in the list and not by their names.}
13 | }
14 | \description{
15 | This function takes a list of lists of \code{data.table}s (or anything that \code{data.table::rbind} accepts, like \code{data.frame}s), and then row-binds them by position or name. For example, if passed \code{list(list(first=A,second=B),list(first=C,second=D))}, you would get back \code{list(first=rbind(A,C),second=rbind(B,D))}.
16 | }
17 | \examples{
18 |
19 | list_of_lists <- list(
20 | list(data.frame(a = 1), data.frame(a = 2), data.frame(a = 3)),
21 | list(data.frame(a = 4), data.frame(a = 5), data.frame(a = 6))
22 | )
23 | rbind_by_list_pos(list_of_lists)
24 |
25 | list_of_named_lists <- list(
26 | list(A = data.frame(a = 1), B = data.frame(a = 2), C = data.frame(a = 3)),
27 | list(C = data.frame(a = 4), A = data.frame(a = 5), B = data.frame(a = 6))
28 | )
29 | rbind_by_list_pos(list_of_named_lis)
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/man/read_core.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/read_core.R
3 | \name{read_core}
4 | \alias{read_core}
5 | \title{Read SafeGraph Core}
6 | \usage{
7 | read_core(
8 | dir = "core_poi/",
9 | filter = NULL,
10 | select = NULL,
11 | key = NULL,
12 | secret = NULL,
13 | silent = FALSE,
14 | ...
15 | )
16 | }
17 | \arguments{
18 | \item{dir}{The directory that the CORE files are in. If this folder contains multiple months of Core files, it will use the most recent (this only works if you are using the standard AWS file structure).}
19 |
20 | \item{filter}{A character string describing a logical statement for filtering the data, for example \code{filter = 'naics_code == 512131'} would give you only movie theater POIs. Will be used as an \code{i} argument in a \code{data.table}, see \code{help(data.table)}. Filtering here instead of afterwards can cut down on time and memory demands.}
21 |
22 | \item{select}{Character vector of variables to get from the file. Set to \code{NULL} to get all variables. If you plan to link the results to a patterns file, you will probably want to include \code{'placekey'} or \code{'placekey'} in this vector. Note that any variables mentioned in \code{filter} MUST be in \code{select} unless \code{select = NULL}.}
23 |
24 | \item{key}{A character string containing an AWS Access Key ID. If \code{key} and \code{secret} are both specified, \code{read_core} will download the most recent Core files and process them. This process assumes your system date is set correctly, and will only check this month's Core and last month's Core, since one of those shold exist.}
25 |
26 | \item{secret}{A character string containing an AWS Secret Access Key.}
27 |
28 | \item{silent}{Suppress timing messages.}
29 |
30 | \item{...}{Other arguments to be passed to \code{data.table::fread} when reading in the \code{CSV} files inside of the \code{ZIP}. For example, \code{nrows} to only read in a certain number of rows.}
31 | }
32 | \description{
33 | Be aware that the files this is designed to work with are large and this function may take a while to execute. This function takes folder of Core files and reads it them in. The output is a \code{data.table}.
34 | }
35 | \details{
36 | AS OF SafeGraphR VERSION 0.3.0 THIS FUNCTION ONLY WORKS WITH NEW CORE FILE FORMATS. For old-format Core files, you can still use the less-flexible and otherwise deprecated \code{link_poi_naics()} function.
37 | }
38 | \examples{
39 |
40 | \dontrun{
41 | # Location of our CORE file
42 | # Note we probably don't have to specify 2020/10 if that's the most recent one
43 | dir <- '../SafeGraph/core_poi/2020/10/'
44 |
45 | # Let's only get retail POIs in California
46 | # And
47 | locations <- read_core(dir = dir,
48 | filter = 'region == "CA" & floor(naics_code/10000) \%in\% 44:45')
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/man/read_distancing.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/read_distancing.R
3 | \name{read_distancing}
4 | \alias{read_distancing}
5 | \title{Read in Stay-at-Home Data}
6 | \usage{
7 | read_distancing(
8 | start,
9 | end,
10 | dir = ".",
11 | gen_fips = TRUE,
12 | by = c("state_fips", "county_fips"),
13 | filter = NULL,
14 | select = c("origin_census_block_group", "device_count",
15 | "completely_home_device_count", "part_time_work_behavior_devices",
16 | "full_time_work_behavior_devices"),
17 | ...
18 | )
19 | }
20 | \arguments{
21 | \item{start}{Date object with the starting date to read in stay-at-home data.}
22 |
23 | \item{end}{Ending date to read stay-at-home data to.}
24 |
25 | \item{dir}{The folder in which the "2020" (etc.) folder resides.}
26 |
27 | \item{gen_fips}{Set to \code{TRUE} to use the \code{origin_census_block_group} variable to generate \code{state_fips} and \code{county_fips} as numeric variables. This will also result in \code{origin_census_block_group} being converted to character.}
28 |
29 | \item{by}{After reading, collapse to this level by \code{sum}ming all the data. Usually \code{c('state_fips','county_fips')} with \code{gen_fips = TRUE}. Set to \code{NULL} to aggregate across all initial rows, or set to \code{FALSE} to not aggregate at all.}
30 |
31 | \item{filter}{A character string describing a logical statement for filtering the data, for example \code{filter = 'state_fips == 6'} would give you only data from California. Will be used as an \code{i} argument in a \code{data.table}, see \code{help(data.table)}. Filtering here instead of afterwards can cut down on time and memory demands.}
32 |
33 | \item{select}{Character vector of variables to get from the file. Set to \code{NULL} to get all variables.}
34 |
35 | \item{...}{Other arguments to be passed to \code{data.table::fread} when reading in the file. For example, \code{nrows} to only read in a certain number of rows.}
36 | }
37 | \description{
38 | Takes a folder of stay-at-home Safegraph data structured how it comes from AWS (i.e. folders 2020/04/03 for April 3 2020) and reads them in.
39 | }
40 | \details{
41 | The stay-at-home data is no longer being updated as of April 19, 2021. This function should still work for the old data though.
42 |
43 | Note that after reading in data, if \code{gen_fips = TRUE}, state and county names can be merged in using \code{data(fips_to_names)}.
44 | }
45 | \examples{
46 |
47 | \dontrun{
48 |
49 | # The directory distdat is the folder we have downloaded the distancing data to from AWS.
50 | # Read and compile all distancing data from May 1 to May 7
51 | distancing <- read_distancing(
52 | start = lubridate::ymd('2020-05-01'),
53 | end = lubridate::ymd('2020-05-07'),
54 | dir = distdat
55 | )
56 |
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/man/read_many_csvs.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/read_many_files.R
3 | \name{read_many_csvs}
4 | \alias{read_many_csvs}
5 | \title{Read and row-bind many CSVs}
6 | \usage{
7 | read_many_csvs(
8 | dir = ".",
9 | recursive = TRUE,
10 | filelist = NULL,
11 | makedate = FALSE,
12 | ...
13 | )
14 | }
15 | \arguments{
16 | \item{dir}{Name of the directory the files are in.}
17 |
18 | \item{recursive}{Search in all subdirectories as well.}
19 |
20 | \item{filelist}{Optionally specify only a subset of the filename to read in (can contain paths).}
21 |
22 | \item{makedate}{Use \code{year}, \code{month}, and \code{day} columns in the data to create a \code{date} variable. Works with normalization files.}
23 |
24 | \item{...}{Other arguments to pass to \code{data.table::fread}.}
25 | }
26 | \description{
27 | This accepts a directory. It will load every \code{csv} or \code{csv.gz} in that folder and attempt to row-bind them together. You can alternately specify a list of files if you don't want everything in the folder. This is designed for use with the normalization and home-summary files as downloaded from AWS.
28 | }
29 | \examples{
30 | \dontrun{
31 |
32 | # The current working directory contains all the normalization .csv files
33 | normalization <- read_many_csvs(makedate = TRUE)
34 |
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/man/read_many_patterns.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/read_many_files.R
3 | \name{read_many_patterns}
4 | \alias{read_many_patterns}
5 | \title{Read and row-bind many patterns files}
6 | \usage{
7 | read_many_patterns(
8 | dir = ".",
9 | recursive = TRUE,
10 | filelist = NULL,
11 | start_date = NULL,
12 | post_by = !is.null(by),
13 | by = NULL,
14 | fun = sum,
15 | na.rm = TRUE,
16 | filter = NULL,
17 | expand_int = NULL,
18 | expand_cat = NULL,
19 | expand_name = NULL,
20 | multi = NULL,
21 | naics_link = NULL,
22 | select = NULL,
23 | gen_fips = TRUE,
24 | silent = FALSE,
25 | ...
26 | )
27 | }
28 | \arguments{
29 | \item{dir}{Name of the directory the files are in.}
30 |
31 | \item{recursive}{Search in all subdirectories as well, as with the since-June-24-2020 format of the AWS downloads. There is not currently a way to include only a subset of these subdirectory files. Perhaps run \code{list.files(recursive=TRUE)} on your own and pass a subset of the results to the \code{filelist} option.}
32 |
33 | \item{filelist}{A vector of filenames to read in, OR a named list of options to send to \code{patterns_lookup()}. This list must include \code{dates} for the dates of data you want, and \code{list_files} will be set to \code{TRUE}. If you like, add \code{key} and \code{secret} to this list to also download the files you need.}
34 |
35 | \item{start_date}{A vector of dates giving the first date present in each zip file, to be passed to \code{read_patterns} giving the first date present in the file, as a date object. Unlike in \code{read_patterns}, this value will be added to the data as a variable called \code{start_date} so you can use it in \code{post_by}.}
36 |
37 | \item{post_by}{After reading in all the files, re-perform aggregation to this level. Use a character vector of variable names (or a list of vectors if using \code{multi}). Or just set to \code{TRUE} to have \code{post_by = by} plus, if present, \code{expand_name} or \code{'date'}. Set to \code{FALSE} to skip re-aggregation. Including \code{'start_date'} in both \code{by} and \code{post_by} is a good idea if you aren't using an approach that creates a \code{date} variable. By default this is \code{TRUE} unless \code{by = NULL} (if \code{by = NULL} in a \code{multi} option, it will still be \code{TRUE} by default for that).}
38 |
39 | \item{by, fun, na.rm, filter, expand_int, expand_cat, expand_name, multi, naics_link, select, gen_fips, silent, ...}{Arguments to be passed to \code{read_patterns}, specified as in \code{help(read_patterns)}.}
40 | }
41 | \description{
42 | This accepts a directory. It will use \code{read_patterns} to load every \code{csv.gz} in that folder, assuming they are all patterns files. It will then row-bind together each of the produced processed files. Finally, if \code{post_by} is specified, it will re-perform the aggregation, handy for new-format patterns files that split the same week's data across multiple files.
43 | }
44 | \details{
45 | Note that after reading in data, if \code{gen_fips = TRUE}, state and county names can be merged in using \code{data(fips_to_names)}.
46 | }
47 | \examples{
48 | \dontrun{
49 | # Our current working directory is full of .csv.gz files!
50 | # Too many... we will probably run out of memory if we try to read them all in at once, so let's chunk it
51 | files <- list.files(pattern = '.gz', recursive = TRUE)
52 | patterns <- read_many_patterns(filelist = files[1:10],
53 | # We only need these variables (and poi_cbg which is auto-added with gen_fips = TRUE)
54 | select = c('brands','visits_by_day'),
55 | # We want two formatted files to come out. The first aggregates to the state-brand-day level, getting visits by day
56 | multi = list(list(name = 'by_brands', by = c('state_fips','brands'), expand_int = 'visits_by_day'),
57 | # The second aggregates to the state-county-day level but only for Colorado and COnnecticut (see the filter)
58 | list(name = 'co_and_ct', by = c('state_fips','county_fips'), filter = 'state_fips \%in\% 8:9', expand_int = 'visits_by_day')))
59 | patterns_brands <- patterns[[1]]
60 | patterns_co_and_ct <- patterns[[2]]
61 |
62 | # Alternately, find the files we need for the seven days starting December 7, 2020,
63 | # read them all in (and if we'd given key and secret too, download them first),
64 | # and then aggregate to the state-date level
65 | dt <- read_many_patterns(filelist = list(dates = lubridate::ymd("2020-12-07") + lubridate::days(0:6)),
66 | by = "state_fips", expand_int = 'visits_by_day',
67 | select = 'visits_by_day')
68 |
69 |
70 | # don't forget that if you want weekly data but AREN'T using visits_by_day
71 | # (for example if you're using visitors_home_cbg)
72 | # you want start_date in your by option, as in the second list in multi here
73 | dt <- read_many_patterns(filelist = list(dates = lubridate::ymd("2020-12-07") + lubridate::days(0:6)),
74 | select = c('visits_by_day','visitor_home_cbgs'),
75 | multi = list(list(name = 'visits',by = 'state_fips',
76 | expand_int = 'visits_by_day',filter = 'state_fips == 6'),
77 | list(name = 'cbg',by = c('start_date','state_fips'),
78 | expand_cat = 'visitor_home_cbgs', filter = 'state_fips == 6')))
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/man/read_many_shop.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/read_many_files.R
3 | \name{read_many_shop}
4 | \alias{read_many_shop}
5 | \title{Read and row-bind many files from the SafeGraph Shop}
6 | \usage{
7 | read_many_shop(
8 | dir = ".",
9 | recursive = FALSE,
10 | filelist = NULL,
11 | start_date = NULL,
12 | keeplist = c("patterns", "normalization_stats.csv", "home_panel_summary.csv",
13 | "visit_panel_summary.csv", "brand_info.csv"),
14 | exdir = dir,
15 | cleanup = TRUE,
16 | by = NULL,
17 | fun = sum,
18 | na.rm = TRUE,
19 | filter = NULL,
20 | expand_int = NULL,
21 | expand_cat = NULL,
22 | expand_name = NULL,
23 | multi = NULL,
24 | naics_link = NULL,
25 | select = NULL,
26 | gen_fips = FALSE,
27 | silent = FALSE,
28 | ...
29 | )
30 | }
31 | \arguments{
32 | \item{dir}{Name of the directory the files are in.}
33 |
34 | \item{recursive}{Look for files in all subdirectories as well.}
35 |
36 | \item{filelist}{Optionally specify only a subset of the filename to read in.}
37 |
38 | \item{start_date}{A vector of dates giving the first date present in each zip file, to be passed to \code{read_patterns} giving the first date present in the file, as a date object. When using \code{read_many_shop} this **really** should be included, since the patterns file names in the shop files are not in a format \code{read_patterns} can pick up on automatically. If left unspecified, will produce an error. To truly go ahead unspecified, set this to \code{FALSE}.}
39 |
40 | \item{keeplist, exdir, cleanup}{Arguments to be passed to \code{read_shop}, specified as in \code{help(read_shop)}.}
41 |
42 | \item{by, fun, na.rm, filter, expand_int, expand_cat, expand_name, multi, naics_link, select, gen_fips, silent, ...}{Other arguments to be passed to \code{read_patterns}, specified as in \code{help(read_patterns)}.}
43 | }
44 | \description{
45 | This accepts a directory. It will use \code{read_shop} to load every \code{zip} in that folder, assuming they are all files downloaded from the SafeGraph shop. It will then row-bind together each of the subfiles, so you'll get a list where one entry all the normalization data row-bound together, another is all the patterns files, and so on.
46 | .
47 | Note that after reading in data, if \code{gen_fips = TRUE}, state and county names can be merged in using \code{data(fips_to_names)}.
48 | }
49 | \examples{
50 |
51 | \dontrun{
52 | # In the working directory we have two shop ZIP files, one for March and one for April.
53 | mydata <- read_shop(# I only want some of the sub-files
54 | keeplist = c('patterns','home_panel_summary.csv'),
55 | # For patterns, only keep these variables
56 | select = c('raw_visit_counts', 'region', 'bucketed_dwell_times', 'location_name'),
57 | # I want two aggregations of patterns - one of total visits by state ('region')
58 | # and another by location_name that has the dwell times for each brand
59 | multi = list(
60 | list(name = 'all',
61 | by = 'region'),
62 | list(name = 'location_dwells',
63 | by = 'location_name',
64 | expand_cat = 'bucketed_dwell_times',
65 | expand_name = 'bucketed_times')
66 | ),
67 | # Be sure to specify start_date for read_shop
68 | start_date = c(lubridate::ymd('2020-03-01'),lubridate::ymd('2020-04-01')))
69 |
70 | # The result is a list with two items- patterns and home_panel_summary.csv
71 | # patterns itself is a list with two data.tables inside - 'all' and 'location_name',
72 | # aggregated as given.
73 |
74 | }
75 |
76 | }
77 |
--------------------------------------------------------------------------------
/man/read_patterns.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/read_patterns.R
3 | \name{read_patterns}
4 | \alias{read_patterns}
5 | \title{Read SafeGraph Patterns}
6 | \usage{
7 | read_patterns(
8 | filename,
9 | dir = ".",
10 | by = NULL,
11 | fun = function(x) sum(x, na.rm = TRUE),
12 | na.rm = TRUE,
13 | filter = NULL,
14 | expand_int = NULL,
15 | expand_cat = NULL,
16 | expand_name = NULL,
17 | multi = NULL,
18 | naics_link = NULL,
19 | select = NULL,
20 | gen_fips = TRUE,
21 | start_date = NULL,
22 | silent = FALSE,
23 | ...
24 | )
25 | }
26 | \arguments{
27 | \item{filename}{The filename of the \code{.csv.gz} file or the path to the file. Note that if \code{start_date} is not specified, \code{read_patterns} will attempt to get the start date from the first ten characters of the path. In "new format" filepaths ("2020/01/09/core-patterns-part-1.csv.gz"), nine days will be subtracted from the date found.}
28 |
29 | \item{dir}{The directory in which the file sits.}
30 |
31 | \item{by}{A character vector giving the variable names of the level to be collapsed to using \code{fun}. The resulting data will have X rows per unique combination of \code{by}, where X is 1 if no expand variables are specified, or the length of the expand variable if specified. Set to \code{NULL} to aggregate across all initial rows, or set to \code{FALSE} to not aggregate at all (this will also add an \code{initial_rowno} column showing the original row number). You can also avoid aggregating by doing \code{by = 'placekey'} which might play more nicely with some of the other features..}
32 |
33 | \item{fun}{Function to use to aggregate the expanded variable to the \code{by} level.}
34 |
35 | \item{na.rm}{Whether to remove any missing values of the expanded variable before aggregating. Does not remove missing values of the \code{by} variables. May not be necessary if \code{fun} handles \code{NA}s on its own.}
36 |
37 | \item{filter}{A character string describing a logical statement for filtering the data, for example \code{filter = 'state_fips == 6'} would give you only data from California. Will be used as an \code{i} argument in a \code{data.table}, see \code{help(data.table)}. Filtering here instead of afterwards can cut down on time and memory demands.}
38 |
39 | \item{expand_int}{A character variable with the name of The first e JSON variable in integer format ([1,2,3,...]) to be expanded into rows. Cannot be specified along with \code{expand_cat}.}
40 |
41 | \item{expand_cat}{A JSON variable in categorical format ({A: 2, B: 3, etc.}) to be expanded into rows. Ignored if \code{expand_int} is specified.}
42 |
43 | \item{expand_name}{The name of the new variable to be created with the category index for the expanded variable.}
44 |
45 | \item{multi}{A list of lists, for the purposes of creating a list of multiple processed files. This will vastly speed up processing over doing each of them one at a time. Each named list has the entry \code{name} as well as any of the options \code{by, fun, filter, expand_int, expand_cat, expand_name} as specified above. If specified, will override other entries of \code{by}, etc..}
46 |
47 | \item{naics_link}{A \code{data.table}, possibly produced by \code{link_poi_naics}, that links \code{placekey} and \code{naics_code}. This will allow you to include \code{'naics_code'} in the \code{by} argument. Technically you could have stuff other than \code{naics_code} in here and use that in \code{by} too, I won't stop ya.}
48 |
49 | \item{select}{Character vector of variables to get from the file. Set to \code{NULL} to get all variables. **Specifying select is very much recommended, and will speed up the function a lot.**}
50 |
51 | \item{gen_fips}{Set to \code{TRUE} to use the \code{poi_cbg} variable to generate \code{state_fips} and \code{county_fips} variables. This will also result in \code{poi_cbg} being converted to character.}
52 |
53 | \item{start_date}{The first date in the file, as a date object. If omitted, will assume that the filename begins YYYY-MM-DD.}
54 |
55 | \item{silent}{Set to TRUE to suppress timecode message.}
56 |
57 | \item{...}{Other arguments to be passed to \code{data.table::fread} when reading in the file. For example, \code{nrows} to only read in a certain number of rows.}
58 | }
59 | \description{
60 | Be aware that the files this is designed to work with are large and this function may take a while to execute. This function takes a single \code{.csv.gz} SafeGraph patterns file and reads it in. The output is a \code{data.table} (or a list of them if multiple are specified) including the file \code{filename} collapsed and expanded in different ways.
61 | }
62 | \details{
63 | Note that after reading in data, if \code{gen_fips = TRUE}, state and county names can be merged in using \code{data(fips_to_names)}.
64 | }
65 | \examples{
66 |
67 | \dontrun{
68 | # 'patterns-part-1.csv.gz' is a weekly patterns file in the main-file folder, which is the working directory
69 | patterns <- read_patterns('patterns-part-1.csv.gz',
70 | # We only need these variables (and poi_cbg which is auto-added with gen_fips = TRUE)
71 | select = c('brands','visits_by_day'),
72 | # We want two formatted files to come out. The first aggregates to the state-brand-day level, getting visits by day
73 | multi = list(list(name = 'by_brands', by = c('state_fips','brands'), expand_int = 'visits_by_day'),
74 | # The second aggregates to the state-county-day level but only for Colorado and COnnecticut (see the filter)
75 | list(name = 'co_and_ct', by = c('state_fips','county_fips'), filter = 'state_fips \%in\% 8:9', expand_int = 'visits_by_day')))
76 | patterns_brands <- patterns[[1]]
77 | patterns_co_and_ct <- patterns[[2]]
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/man/read_shop.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/read_shop.R
3 | \name{read_shop}
4 | \alias{read_shop}
5 | \title{Read a ZIP file with patterns and other data as it comes from the SafeGraph Shop}
6 | \usage{
7 | read_shop(
8 | filename,
9 | dir = ".",
10 | keeplist = c("patterns", "normalization_stats.csv", "home_panel_summary.csv",
11 | "visit_panel_summary.csv", "brand_info.csv"),
12 | exdir = dir,
13 | cleanup = TRUE,
14 | by = NULL,
15 | fun = sum,
16 | na.rm = TRUE,
17 | filter = NULL,
18 | expand_int = NULL,
19 | expand_cat = NULL,
20 | expand_name = NULL,
21 | multi = NULL,
22 | naics_link = NULL,
23 | select = NULL,
24 | gen_fips = FALSE,
25 | silent = FALSE,
26 | start_date = NULL,
27 | ...
28 | )
29 | }
30 | \arguments{
31 | \item{filename}{The filename of the \code{.zip} file from the shop.}
32 |
33 | \item{dir}{The directory the file is in.}
34 |
35 | \item{keeplist}{Character vector of the files in the ZIP to read in. Use \code{'patterns'} to refer to the patterns files.}
36 |
37 | \item{exdir}{Name of the directory to unzip to.}
38 |
39 | \item{cleanup}{Set to \code{TRUE} to delete all the unzipped files after being read in.}
40 |
41 | \item{by, fun, na.rm, filter, expand_int, expand_cat, expand_name, multi, naics_link, select, gen_fips, silent, ...}{Other arguments to be passed to \code{read_patterns}, specified as in \code{help(read_patterns)}. NOte that \code{gen_fips} is \code{FALSE} here by default, rather than \code{TRUE} as elsewhere, as files from the shop often do not contain the \code{poi_cbg} variable necessary to use it. Check which state indicator variables you have access to, perhaps \code{region}.}
42 |
43 | \item{start_date}{An argument to be passed to \code{read_patterns} giving the first date present in the file, as a date object. When using \code{read_shop} this should usually be included, since the patterns file names in the shop files are not in a format \code{read_patterns} can pick up on automatically.}
44 | }
45 | \description{
46 | This will open up a ZIP file from the SafeGraph shop and will read all of the data in, performing processing of the patterns files using \code{read_patterns}.
47 | }
48 | \details{
49 | The result will be a named list with each of the components of the data.
50 | }
51 | \examples{
52 |
53 | \dontrun{
54 | # In the working directory I have the file 'shop_file.zip' to read in
55 |
56 | mydata <- read_shop('shop_file.zip',
57 | # I only want some of the files
58 | keeplist = c('patterns','home_panel_summary.csv'),
59 | # For patterns, only keep these variables
60 | select = c('raw_visit_counts', 'region', 'bucketed_dwell_times', 'location_name'),
61 | # I want two aggregations of patterns - one of total visits by state ('region')
62 | # and another by location_name that has the dwell times for each brand
63 | multi = list(
64 | list(name = 'all',
65 | by = 'region'),
66 | list(name = 'location_dwells',
67 | by = 'location_name',
68 | expand_cat = 'bucketed_dwell_times',
69 | expand_name = 'bucketed_times')
70 | ),
71 | # Be sure to specify start_date for read_shop
72 | start_date = lubridate::ymd('2020-03-01'))
73 |
74 | # The result is a list with two items- patterns and home_panel_summary.csv
75 | # patterns itself is a list with two data.tables inside - 'all' and 'location_name',
76 | # aggregated as given.
77 | }
78 |
79 | }
80 |
--------------------------------------------------------------------------------
/man/safegraph_api.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/safegraph_api.R
3 | \name{safegraph_api}
4 | \alias{safegraph_api}
5 | \title{Download SafeGraph data from the API}
6 | \usage{
7 | safegraph_api(
8 | key,
9 | placekeys = NULL,
10 | location = NULL,
11 | search = NULL,
12 | first = 10,
13 | after = NULL,
14 | dataset = "core",
15 | date = NULL,
16 | select = NULL,
17 | batch = FALSE,
18 | display_call = FALSE,
19 | ...
20 | )
21 | }
22 | \arguments{
23 | \item{key}{A character string containing an API Access Key. See \url{https://docs.safegraph.com/reference/access-and-authentication} to get one.}
24 |
25 | \item{placekeys}{A character vector of Placekeys to look up data with. If this is more than 20 Placekeys long, batching will be performed automatically as long as \code{batch = TRUE}. Cannot be longer than 1000 entries. Exactly one of \code{placekeys}, \code{address}, or \code{search} must be specified.}
26 |
27 | \item{location}{A named vector of location data, or one-row \code{data.frame} with appropriately-named columns, that specifies a single place of interest to look up data for. Available location variable names, and the different combinations that uniquely identify a single location, are available at \url{https://docs.safegraph.com/reference/lookup-name-address}. Exactly one of \code{placekeys}, \code{address}, or \code{search} must be specified.}
28 |
29 | \item{search}{A named list of filter settings that specifies a set of filter criteria for the SafeGraph POIs. Data will be returned for the first \code{first} matches.}
30 |
31 | \item{first}{If using \code{search}, return only the first \code{first} matches found. If set to any number above \code{20}, batching will be performed automatically if \code{batch = TRUE}. Will not accept a value above \code{1000}.}
32 |
33 | \item{after}{If using \code{search}, skip the first \code{after} matches before returning the next \code{first} matches.}
34 |
35 | \item{dataset}{The SafeGraph response dataset(s) to get from. Can be \code{'core'} for the SafeGraph Core data, \code{'geometry'} for geometry files, or \code{'weekly_patterns'} or \code{'monthly_patterns'} for weekly/monthly patterns data. Weekly patterns data will be the week of your choosing; monthly patterns data will always be the most recent month. Or, if using the \code{location} or \code{search} options, set to \code{'placekey'} to return only the Placekeys and not actual data (note you'll get the Placekeys anyway with all the other options). Defaults to \code{'weekly_patterns'}. See \url{https://docs.safegraph.com/reference/safegraph-response-datasets} for more information.}
36 |
37 | \item{date}{If \code{dataset = 'weekly_patterns'}, this option is required. A string in \code{'YYYY-MM-DD'} format specifying the week of data you want. Can be any day in that week. The \code{start_date} and \code{end_date} variant currently not supported.}
38 |
39 | \item{select}{A character vector with the names of the variables you want returned. Defaults to all variables in the dataset. For the list of variables in each \code{dataset}, see the "Response Objects" section on \url{https://docs.safegraph.com/reference/safegraph-response-datasets}. For variables like \code{brands}, which has sub-variables \code{brand_id} and \code{brand_name}, putting \code{brands} will get all the sub-variables, or you can just get the sub-variables by themselves.}
40 |
41 | \item{batch}{Set to \code{TRUE} to allow for batching of results if there are more than 20 POIs being returned. Batching may be quite slow if there are a lot of matches! See the rate limiting in the Placekey API docs. Also note the 1000-per-minute rate limit, so if you decide to run multiple of your own large \code{safegraph_api} calls you may want to space them out.}
42 |
43 | \item{display_call}{Set to \code{TRUE} to print out the API call.}
44 |
45 | \item{...}{Currently unused}
46 | }
47 | \description{
48 | THIS FUNCTION IS NOT YET FULLY OPERATIONAL AND WILL ONLY WORK FOR PLACEKEY CALLS
49 | }
50 | \details{
51 | This function will allow you to make API requests of the SafeGraph API. See the documentation for the Places API here: \url{https://docs.safegraph.com/docs/places-api}.
52 | }
53 | \examples{
54 |
55 | \dontrun{
56 |
57 | # You can look up data for individual placekeys
58 | mydat = safegraph_api('MY API KEY', placekeys = "222-223@5x4-4b6-mff", select = 'open_hours')
59 | # Or a vector of them
60 |
61 |
62 | # For specific addresses
63 | address = list('location_name' = "Taco Bell",'street_address' = "710 3rd St", 'city'="San Francisco",'region' = "CA", 'iso_country_code' = "US")
64 | mydat = safegraph_api('MY API KEY', location= address, select = 'open_hours')
65 |
66 | # Or for (a subset of) POIs that match a search
67 | search <- list('city' = 'San Francisco', 'brand' = 'Starbucks')
68 | mydat = safegraph_api('MY API KEY', search = search, select = 'raw_visit_counts',
69 | dataset = 'weekly_patterns', date = '2021-01-01')
70 | }
71 |
72 | }
73 |
--------------------------------------------------------------------------------
/man/safegraph_aws.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/safegraph_aws.R
3 | \name{safegraph_aws}
4 | \alias{safegraph_aws}
5 | \title{Download SafeGraph data from AWS COVID Response}
6 | \usage{
7 | safegraph_aws(
8 | path = ".",
9 | dataset,
10 | bucket_only = FALSE,
11 | base_url = "s3.wasabisys.com",
12 | key,
13 | secret,
14 | region = "",
15 | prefix = "",
16 | prefix_is_dir = FALSE,
17 | s3 = "s3://sg-c19-response/",
18 | max_print = 1,
19 | ...
20 | )
21 | }
22 | \arguments{
23 | \item{path}{The local directory to synchronize.}
24 |
25 | \item{dataset}{The SafeGraph bucket to get from. Can be "weekly" (new method since July 2021), "weekly-backfill" (the new method for times before July 2021; note AS OF AUGUST 2021 this gives the same result as "weekly" but I've kept "weekly-backfill" here in case it switches back to being different later), "monthly" (method since July 2021; also contains backfill folders as \code{*_backfill/}), "neighborhood" (June 2021 and forward), "neighborhood-backfill" (May 2021 and previous), "distancing", "core", "core-canada", "geo-supplement", or, to get the baseline bucket, "none".}
26 |
27 | \item{bucket_only}{Instead of doing an \code{aws.s3::s3sync} call, just return the correct bucket as a string. Then you can use that to do your own \code{aws.s3::s3sync} call, or work with the AWS CLI.}
28 |
29 | \item{base_url}{The base URL to pull the data from.}
30 |
31 | \item{key}{A character string containing an AWS Access Key ID.}
32 |
33 | \item{secret}{A character string containing an AWS Secret Access Key.}
34 |
35 | \item{region}{A character string containing the AWS region.}
36 |
37 | \item{prefix}{Leading part of the objects in the bucket must have this prefix. For example, to download social distancing data only from 2020, set this to "2020/". Some of the backfill buckets can be tricky because folder structure also includes the release date. For example, for "weekly-backfill" if you want patterns data, you want "patterns_backfill/2021/07/15/15/" and THEN followed by the time period you want like "2021/". If you want backfill data from "monthly", for example patterns, it's "patterns_backfill/2021/07/15/16/", then followed by the year/month. The "neighborhood" buckets use "y=2021/m=06/" etc instead of "2021/06".}
38 |
39 | \item{prefix_is_dir}{If \code{FALSE}, the files matching \code{prefix} will be downloaded directly to \code{path}, which may not be desired behavior if \code{prefix} contains a directory (you probably want the directory structure to match!). Set to \code{TRUE} to, in effect, replace \code{path} with \code{paste0(path, prefix)} and so download files to the appropriate folder. Don't use if \code{prefix} also contains file characteristics like extension. This is \code{prefix_IS_dir}, not \code{prefix_CONTAINS_dir}.}
40 |
41 | \item{s3}{The S3 server that stores the data.}
42 |
43 | \item{max_print}{Temporarily set \code{options(max.print)} to this value. This will massively speed up the function, as \code{aws.s3::s3sync} likes to print the full list of files on the server before moving on. The option will be returned to its original value afterwards. Set to \code{NULL} to not alter any options.}
44 |
45 | \item{...}{Additional parameters to be sent to \code{aws.s3::s3sync} and from there on to \code{aws.s3:s3HTTP}. "direction" will be ignored.}
46 | }
47 | \description{
48 | This is a thin wrapper for \code{aws.s3::s3sync} that will aim you at the right directory to synchronize.
49 | }
50 | \details{
51 | NOTE THE BREAKING CHANGE WITH SafeGraphR 0.4.2: BUCKET NAMES ARE CHANGED AND ACCESS TO OUTDATED VERSIONS OF DATA IS REMOVED.
52 |
53 | This function doesn't add too much, but it does make the default behavior you probably want a bit easier. If you plan to specify the \code{aws.s3::s3sync} "bucket" option yourself, this function is largely useless.
54 |
55 | See catalog.safegraph.io for more description of the various buckets.
56 | }
57 | \examples{
58 |
59 | \dontrun{
60 |
61 | # Download all the recent weekly-patterns files to the working directory
62 | safegraph_aws(dataset = 'weekly', key = 'MYINFO', secret = 'MYOTHERINFO')
63 |
64 | }
65 |
66 | }
67 |
--------------------------------------------------------------------------------
/man/sample_size_adjust.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/sample_size_adjust.R
3 | \name{sample_size_adjust}
4 | \alias{sample_size_adjust}
5 | \title{Adjust SafeGraph Data for Sampling Size Differences}
6 | \usage{
7 | sample_size_adjust(
8 | data,
9 | from_id = "census_block_group",
10 | sample_id = "number_devices_residing",
11 | from_level = "cbg",
12 | to_level = "county",
13 | by = NULL,
14 | pop_data = NULL
15 | )
16 | }
17 | \arguments{
18 | \item{data}{A \code{data.frame} (or \code{tibble} or \code{data.table}) containing (among other things potentially) geographic ID variables and a variable for the number of SafeGraph devices observed in that area. Often this is from a \code{home-panel-summary} file.}
19 |
20 | \item{from_id}{A character vector either giving the variable name of the census block group ID, or both the state FIPS and county FIPS variables (which must be numeric, and in state, then county order). Census block group must be specified if \code{from_level='cbg'}.}
21 |
22 | \item{sample_id}{A character variable giving the variable name of the variable in \code{data} that has the number of SafeGraph observations.}
23 |
24 | \item{from_level}{Either \code{'cbg'} or \code{'county'}, indicating the geographic level that is to be adjusted.}
25 |
26 | \item{to_level}{Either \code{'county'} or \code{'state'}, indicating the geographic level that the \code{from_level} components are to be adjusted to, for example \code{from_level='county'} and \code{to_level='state'} wouuld give an adjustment factor for each county as though each county in the state was sampled at the same rate.}
27 |
28 | \item{by}{The data returned will be on the \code{from_level} level. Specify other vairables here to have it instead be on the \code{from_level}-\code{by} level, perhaps a timecode. \code{by} should not split the \code{from_level} counts. If, for example, \code{by} is used to split a county in two geographic subcounties, then the population adjustment will not be correct.}
29 |
30 | \item{pop_data}{If a populatinon data file other than \code{data(cbg_pop)} or \code{data(county_pop)} should be used, enter it here. Should be in the same format, and with the same variable names, as \code{cbg_pop} if \code{from_level='cbg'}, or the same as \code{county_pop} if \code{from_level='county'}.}
31 | }
32 | \description{
33 | This function uses 2016 American Community Survey data to adjust SafeGraph counts for the portion of the population that is sampled. This function will return a \code{data.table} with columns for a geographic ID and the variable \code{adjust_factor}, which you can merge into your data and then multiply whatever count variables you like by \code{adjust_factor} to adjust them for sampling differences.
34 | }
35 | \examples{
36 | \dontrun{
37 | # The current working directory has many home_panel_summary files
38 | # Do some futzing with the census_block_group variable to
39 | # Get it in the same format as how it is in cbg_pop
40 | home_panel <- read_many_csvs(colClasses= c(census_block_group='character'))
41 | home_panel[,census_block_group := as.character(as.numeric(census_block_group))]
42 |
43 | # Create the data set with the adjust_factor variable
44 | # This will adjust CBG populations to county ones, by default
45 | adj_factor <- sample_size_adjust(home_panel, by = 'date_range_start')
46 |
47 | # Now take some distancing data I have
48 | # (where census_block_group is stored as origin_census_block_group)
49 | data.table::setnames(adj_factor, census_block_group, origin_census_block_group)
50 | # and merge in the adjustment factor
51 | distancing <- merge(distancing, adj_factor, all.x = TRUE, by = 'origin_census_block_group')
52 | # And use that adjustment factor to adjust!
53 | distancing[,adj_device_count := device_count*adj_factor]
54 |
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/man/scale_to_date.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/scale_to_date.R
3 | \name{scale_to_date}
4 | \alias{scale_to_date}
5 | \title{Scale data relative to its value on a date}
6 | \usage{
7 | scale_to_date(
8 | data,
9 | adj_vars,
10 | date,
11 | date_var = "date",
12 | by = NULL,
13 | growth = TRUE,
14 | format_percent = FALSE,
15 | accuracy = 0.1
16 | )
17 | }
18 | \arguments{
19 | \item{data}{Any type of data set that can be coerced to a \code{data.table}. Note that a \code{data.table} will be returned.}
20 |
21 | \item{adj_vars}{Character vector of the variable names you'd like adjusted to be relative to the date.}
22 |
23 | \item{date}{The date you'd like everything relative to, as a date object.}
24 |
25 | \item{date_var}{The name of the date variable, as a string.}
26 |
27 | \item{by}{Character vector of the variable names you'd like the operation to be performed by. There should only be one observation for which \code{date_var == date} within each combination of the \code{by} variables, or else your results will be arbitrary.}
28 |
29 | \item{growth}{Set to \code{TRUE} to get \code{new/old - 1} (i.e. a percentage growth). Set to \code{FALSE} to get \code{new/old} (i.e. a relative value).}
30 |
31 | \item{format_percent}{Set to \code{TRUE} to get back a formatted percentage, i.e. "50\%", instead of a number.}
32 |
33 | \item{accuracy}{If \code{format_percent = TRUE}, the number of digits after the decimal place to round to, as in \code{scales::percent}.}
34 | }
35 | \description{
36 | Pick a date and provide some variables. Those variables will be adjusted to be relative to their value on that date. Usually used to calculate foot traffic growth relative to a certain date.
37 | }
38 | \examples{
39 |
40 | # Create some data to scale relative to
41 | patterns <- data.table(date = c(lubridate::ymd('2020-01-15'),
42 | lubridate::ymd('2020-01-16'),
43 | lubridate::ymd('2020-01-17')),
44 | visits_by_day = c(1,2,3))
45 |
46 | # Make everything relative to January 15!
47 | scale_to_date(patterns, 'visits_by_day', lubridate::ymd('2020-01-15'))[]
48 | }
49 |
--------------------------------------------------------------------------------
/man/scale_yoy.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/scale_to_date.R
3 | \name{scale_yoy}
4 | \alias{scale_yoy}
5 | \title{Calculate year-on-year change}
6 | \usage{
7 | scale_yoy(
8 | data,
9 | yoy_vars,
10 | date_var = "date",
11 | leap_year_fillin = TRUE,
12 | by = NULL,
13 | growth = TRUE,
14 | format_percent = FALSE,
15 | accuracy = 0.1
16 | )
17 | }
18 | \arguments{
19 | \item{data}{Any type of data set that can be coerced to a \code{data.table}.}
20 |
21 | \item{yoy_vars}{String vector of the variable names you want to calculate year-on-year change for.}
22 |
23 | \item{date_var}{The name of the date variable, as a string. Must be formatted as Date objects.}
24 |
25 | \item{leap_year_fillin}{If the date is Feb. 29, the previous year will not have a Feb. 29. Set to \code{TRUE} to fill in by linear interpolation. If set to \code{TRUE}, returned data will be sorted by \code{by} and \code{date_var}.}
26 |
27 | \item{by}{Character vector of the variable names you'd like the operation to be performed by. There should only be one observation per date per combination of \code{by}.}
28 |
29 | \item{growth}{Set to \code{TRUE} to get \code{new/old - 1} (i.e. a percentage growth). Set to \code{FALSE} to get \code{new/old} (i.e. a relative value).}
30 |
31 | \item{format_percent}{Set to \code{TRUE} to get back a formatted percentage, i.e. "50\%", instead of a number.}
32 |
33 | \item{accuracy}{If \code{format_percent = TRUE}, the number of digits after the decimal place to round to, as in \code{scales::percent}.}
34 | }
35 | \description{
36 | This takes a data set with a date variable and calculates year-on-year changes for a set of variables of your choice. Returns a \code{data.table}.
37 | }
38 | \details{
39 | This will add new variables using \code{yoy_vars}, adding \code{lag} and \code{YOY} variants.
40 | }
41 | \examples{
42 |
43 | # Create some fake data to do year-on-year calculations with
44 | patterns <- data.table::data.table(date = c(lubridate::ymd('2019-01-15'),
45 | lubridate::ymd('2019-01-16'),
46 | lubridate::ymd('2020-01-15'),
47 | lubridate::ymd('2020-01-16')),
48 | visits_by_day = c(1,2,3,4))
49 |
50 | # And scale relative to the year before!
51 | scale_yoy(patterns, 'visits_by_day')[]
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/man/state_info.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/data.R
3 | \docType{data}
4 | \name{state_info}
5 | \alias{state_info}
6 | \title{State Information}
7 | \format{
8 | A \code{data.table} with 51 rows and 4 variables:
9 | \describe{
10 | \item{statename}{The full name of the state / province}
11 | \item{CensusRegion}{The broad Census regions}
12 | \item{region}{The state's two-digit abbreviation / the province's international alpha code}
13 | \item{state_fips}{State FIPS code / Canadian SGC code}
14 | \item{iso_country_code}{Indicator for US or Canada}
15 | }
16 | }
17 | \source{
18 | \url{US Census}
19 | }
20 | \usage{
21 | state_info
22 | }
23 | \description{
24 | A dataset that links state (and Washington DC) names, FIPs codes, two-letter abbreviations (called "region" because this is what it is called in SafeGraph files that use it), and Census regions. Can be merged with \code{fips_to_names} using \code{state_fips} and \code{statename}.
25 | }
26 | \details{
27 | This also includes Canadian data on provinces.
28 |
29 | Note that this is a data set purely of Canadian provinces, US *states*, and DC. Some SafeGraph files contain information on \code{region} values of \code{GU} (Guam), \code{PR} (Puerto Rico), etc., but those will be lost if merging with \code{state_info}.
30 | }
31 | \keyword{datasets}
32 |
--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 |
--------------------------------------------------------------------------------
/vignettes/SafeGraphR.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "SafeGraphR"
3 | output: rmarkdown::html_vignette
4 | vignette: >
5 | %\VignetteIndexEntry{SafeGraphR}
6 | %\VignetteEngine{knitr::rmarkdown}
7 | %\VignetteEncoding{UTF-8}
8 | ---
9 |
10 | ```{r, include = FALSE}
11 | knitr::opts_chunk$set(
12 | collapse = TRUE,
13 | comment = "#>"
14 | )
15 | ```
16 |
17 | **SafeGraphR** is an R package designed to make it easy to read in and process data from [SafeGraph](https://safegraph.com), including data that comes through the SafeGraph COVID-19 consortium or the [catalog](https://catalog.safegraph.io). You may want to consult the [Quick Start Guide](https://docs.google.com/document/d/1Xx-nzOX1qF3WfOpg4D8aemwFrrAkQaJuT0-1-CbgxQs/edit), the [Awesome SafeGraph Data Science List](https://github.com/SafeGraphInc/awesome-safegraph-datascience), the [Normalization Best Practices](https://colab.research.google.com/drive/16BELpcum4TKoH-5wg8Xym_CGgIGgpu1I?usp=sharing), and especially the [SafeGraph Docs](https://docs.safegraph.com).
18 |
19 | You can install **SafeGraphR** directly from GitHub.
20 |
21 | ```{r, eval = FALSE}
22 | # if necessary
23 | # install.packages('remotes')
24 | remotes::install_github('SafeGraphInc/SafeGraphR')
25 | ```
26 |
27 | The other pages on this site will walk you through how you can use **SafeGraphR** to work with the data.
28 |
29 | ---
30 |
31 | # Bugs and Help!
32 |
33 | **SafeGraphR** is currently in *beta*. All of its functions work, but of course there may be bugs remaining. The code has also not been checked with every possible combination of options that you could pick. Lastly, the SafeGraph data itself changes format on occasion, which may break some **SafeGraphR** functionality.
34 |
35 | If you run into an issue or bug in the code, please raise an Issue on the **SafeGraphR** Github [Issues page](https://github.com/SafeGraphInc/SafeGraphR/issues).
36 |
37 | If you're just having trouble getting things to work, you can find help at the [SafeGraph COVID Consortium Slack Channel](https://safegraphcovid19.slack.com/) in the *r-troubleshooting* room.
38 |
--------------------------------------------------------------------------------