├── cleanup ├── .gitignore ├── R ├── sysdata.rda ├── data.R ├── zzz.R ├── bikedata-package.R ├── distmat.R ├── bikedata-files.R └── utils.R ├── cleanup.win ├── data ├── lo_stns.rda └── bike_test_data.rda ├── tests ├── testthat.R ├── testthat │ ├── test-daily-trips.R │ ├── test-stations.R │ ├── test-db-stats.R │ └── test-store-data.R └── make_test_data.Rmd ├── vignettes ├── la_map.png ├── la_map_simple.png └── makefile ├── inst ├── db │ └── testdb.sqlite └── CITATION ├── docs ├── articles │ ├── la_map.png │ ├── la_map_simple.png │ ├── bikedata_files │ │ └── figure-html │ │ │ └── plot-la-1.png │ ├── makefile │ └── index.html ├── pkgdown.yml ├── link.svg ├── docsearch.js ├── jquery.sticky-kit.min.js ├── pkgdown.js ├── pkgdown.css ├── CONDUCT.html ├── reference │ ├── bike_test_data.html │ ├── bike_cities.html │ ├── bike_match_matrices.html │ ├── bike_rm_test_data.html │ ├── bike_write_test_data.html │ ├── bike_distmat.html │ └── bike_rm_db.html ├── authors.html └── paper.html ├── src ├── Makevars ├── common.h ├── sqlite3db-utils.h ├── utils.h ├── sqlite3db-setup.h ├── read-station-files.h ├── bikedata_init.c ├── read-city-files.h ├── sqlite3db-add-data.h ├── sqlite3db-utils.cpp ├── RcppExports.cpp └── sqlite3db-setup.cpp ├── bikedata.Rproj ├── makefile ├── .Rbuildignore ├── man ├── bike_cities.Rd ├── lo_stns.Rd ├── bike_test_data.Rd ├── bike_stored_files.Rd ├── bike_rm_test_data.Rd ├── index_bikedata_db.Rd ├── bike_rm_db.Rd ├── bike_write_test_data.Rd ├── bike_stations.Rd ├── bike_match_matrices.Rd ├── bike_latest_files.Rd ├── bike_summary_stats.Rd ├── bike_datelimits.Rd ├── bike_distmat.Rd ├── bike_db_totals.Rd ├── bike_demographic_data.Rd ├── dl_bikedata.Rd ├── bikedata.Rd ├── bike_daily_trips.Rd ├── store_bikedata.Rd └── bike_tripmat.Rd ├── .hooks └── description ├── NAMESPACE ├── cran-comments.md ├── .github └── workflows │ ├── test-coverage.yaml │ ├── R-CMD-check.yaml │ └── extra-os.yaml ├── CONTRIBUTING.md ├── .pre-commit-config.yaml ├── paper.md ├── DESCRIPTION ├── paper.bib ├── NEWS.md └── data-raw └── sysdata.Rmd /cleanup: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm -f src/*.o src/vendor/sqlite3/*.o src/*.so 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | aaa.Rmd 2 | .Rproj.user 3 | 4 | # vim files 5 | .*.un~ 6 | .*.swp 7 | -------------------------------------------------------------------------------- /R/sysdata.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/bikedata/HEAD/R/sysdata.rda -------------------------------------------------------------------------------- /cleanup.win: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | rm -f src/*.o src/vendor/sqlite3/*.o src/*.so 4 | -------------------------------------------------------------------------------- /data/lo_stns.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/bikedata/HEAD/data/lo_stns.rda -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(bikedata) 3 | 4 | test_check("bikedata") 5 | -------------------------------------------------------------------------------- /vignettes/la_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/bikedata/HEAD/vignettes/la_map.png -------------------------------------------------------------------------------- /inst/db/testdb.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/bikedata/HEAD/inst/db/testdb.sqlite -------------------------------------------------------------------------------- /data/bike_test_data.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/bikedata/HEAD/data/bike_test_data.rda -------------------------------------------------------------------------------- /docs/articles/la_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/bikedata/HEAD/docs/articles/la_map.png -------------------------------------------------------------------------------- /vignettes/la_map_simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/bikedata/HEAD/vignettes/la_map_simple.png -------------------------------------------------------------------------------- /docs/articles/la_map_simple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/bikedata/HEAD/docs/articles/la_map_simple.png -------------------------------------------------------------------------------- /docs/pkgdown.yml: -------------------------------------------------------------------------------- 1 | pandoc: 2.3.1 2 | pkgdown: 1.1.0 3 | pkgdown_sha: ~ 4 | articles: 5 | bikedata: bikedata.html 6 | 7 | -------------------------------------------------------------------------------- /src/Makevars: -------------------------------------------------------------------------------- 1 | PKG_CPPFLAGS=-I. -DRSQLITE_USE_BUNDLED_SQLITE 2 | 3 | PKG_LIBS = vendor/sqlite3/sqlite3.o 4 | 5 | $(SHLIB): $(PKG_LIBS) 6 | -------------------------------------------------------------------------------- /docs/articles/bikedata_files/figure-html/plot-la-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/bikedata/HEAD/docs/articles/bikedata_files/figure-html/plot-la-1.png -------------------------------------------------------------------------------- /bikedata.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: No 4 | SaveWorkspace: No 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | Encoding: UTF-8 9 | 10 | BuildType: Package 11 | PackageUseDevtools: Yes 12 | PackageInstallArgs: --no-multiarch --with-keep.source 13 | PackageRoxygenize: rd,collate,namespace,vignette 14 | -------------------------------------------------------------------------------- /docs/articles/makefile: -------------------------------------------------------------------------------- 1 | LFILE = bikedata 2 | 3 | all: knith open 4 | 5 | knith: $(LFILE).Rmd 6 | echo "rmarkdown::render('$(LFILE).Rmd',output_file='$(LFILE).html')" | R --no-save -q 7 | 8 | knitr: $(LFILE).Rmd 9 | echo "rmarkdown::render('$(LFILE).Rmd',rmarkdown::md_document(variant='markdown_github'))" | R --no-save -q 10 | 11 | open: $(LFILE).html 12 | xdg-open $(LFILE).html & 13 | 14 | clean: 15 | rm -rf *.html *.png 16 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | LFILE = README 2 | 3 | all: knith open 4 | 5 | knith: $(LFILE).Rmd 6 | echo "rmarkdown::render('$(LFILE).Rmd',output_file='$(LFILE).html')" | R --no-save -q 7 | 8 | knitr: $(LFILE).Rmd 9 | echo "rmarkdown::render('$(LFILE).Rmd',rmarkdown::md_document(variant='markdown_github'))" | R --no-save -q 10 | 11 | open: $(LFILE).html 12 | xdg-open $(LFILE).html & 13 | 14 | clean: 15 | rm -rf *.html *.png README_cache 16 | -------------------------------------------------------------------------------- /vignettes/makefile: -------------------------------------------------------------------------------- 1 | LFILE = bikedata 2 | 3 | all: knith open 4 | 5 | knith: $(LFILE).Rmd 6 | echo "rmarkdown::render('$(LFILE).Rmd',output_file='$(LFILE).html')" | R --no-save -q 7 | 8 | knitr: $(LFILE).Rmd 9 | echo "rmarkdown::render('$(LFILE).Rmd',rmarkdown::md_document(variant='markdown_github'))" | R --no-save -q 10 | 11 | open: $(LFILE).html 12 | xdg-open $(LFILE).html & 13 | 14 | clean: 15 | rm -rf *.html *.png 16 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^README\.Rmd$ 2 | ^\.gitignore$ 3 | ^\.travis\.yml$ 4 | ^appveyor\.yml$ 5 | ^_pkgdown\.yml$ 6 | ^makefile$ 7 | ^vignettes/makefile$ 8 | ^.*\.Rproj$ 9 | ^aaa\.Rmd$ 10 | ^script\.R$ 11 | ^docs$ 12 | ^data-raw$ 13 | ^data/nomenclatura* 14 | ^paper\.md$ 15 | ^paper\.bib$ 16 | ^\.Rproj\.user$ 17 | ^codemeta\.json$ 18 | ^src/vendor/sqlite3/sqlite3.o$ 19 | ^CODE_OF_CONDUCT.md$ 20 | ^CONTRIBUTING.md$ 21 | ^cran-comments.md$ 22 | ^\.github$ 23 | ^\.pre-commit-config\.yaml$ 24 | ^\.hooks$ 25 | -------------------------------------------------------------------------------- /man/bike_cities.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/utils.R 3 | \name{bike_cities} 4 | \alias{bike_cities} 5 | \title{List of cities currently included in bikedata} 6 | \usage{ 7 | bike_cities() 8 | } 9 | \value{ 10 | A \code{data.frame} of cities, abbreviations, and names of bike 11 | systems currently able to be accessed. 12 | } 13 | \description{ 14 | List of cities currently included in bikedata 15 | } 16 | \examples{ 17 | bike_cities () 18 | } 19 | -------------------------------------------------------------------------------- /.hooks/description: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | s <- gert::git_status() 4 | chk <- ("DESCRIPTION" %in% s$file && 5 | (s$status [s$file == "DESCRIPTION"] == "modified" | 6 | s$status [s$file == "DESCRIPTION"] == "new")) 7 | if (!chk) 8 | stop ("DESCRIPTION has not been updated") 9 | 10 | f <- file.path (rprojroot::find_root("DESCRIPTION"), "DESCRIPTION") 11 | x <- system2 ("git", args = c ("diff", "--cached", "-U0", f), stdout = TRUE) 12 | if (!any (grepl ("^\\+Version", x))) 13 | stop ("Version number in DESCRIPTION has not been incremented") 14 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(bike_cities) 4 | export(bike_daily_trips) 5 | export(bike_datelimits) 6 | export(bike_db_totals) 7 | export(bike_demographic_data) 8 | export(bike_distmat) 9 | export(bike_latest_files) 10 | export(bike_match_matrices) 11 | export(bike_rm_db) 12 | export(bike_rm_test_data) 13 | export(bike_stations) 14 | export(bike_stored_files) 15 | export(bike_summary_stats) 16 | export(bike_tripmat) 17 | export(bike_write_test_data) 18 | export(dl_bikedata) 19 | export(download_bikedata) 20 | export(index_bikedata_db) 21 | export(store_bikedata) 22 | importFrom(Rcpp,evalCpp) 23 | importFrom(magrittr,"%>%") 24 | useDynLib(bikedata, .registration = TRUE) 25 | -------------------------------------------------------------------------------- /man/lo_stns.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{lo_stns} 5 | \alias{lo_stns} 6 | \title{Docking stations for London, U.K.} 7 | \format{ 8 | A \code{data.frame} of the four columns described above. 9 | } 10 | \usage{ 11 | lo_stns 12 | } 13 | \description{ 14 | A \code{data.frame} of station id values, names, and geographic coordinates 15 | for 786 stations for London, U.K. These stations are generally (and by 16 | default) downloaded automatically to ensure they are always up to date, but 17 | such downloading can be disabled in the \code{store_bikedata()} function by 18 | setting \code{latest_lo_stns = FALSE}. 19 | } 20 | \keyword{datasets} 21 | -------------------------------------------------------------------------------- /src/common.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include // tolower 9 | #include // count 10 | 11 | #include 12 | 13 | 14 | // Stores the header data structure for a given city and file type, as directly 15 | // read in data from R/sysdata.rda, as generated by the data-raw/sysdata.Rmd 16 | // script 17 | struct HeaderStruct { 18 | unsigned int nvalues; 19 | bool data_has_stations, terminal_quote; 20 | std::vector quoted; 21 | std::vector position_file2db; 22 | }; 23 | 24 | // total number of fields in the trip table of database 25 | const unsigned int num_db_fields = 15; 26 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | # CRAN notes for bikedata_0.2.5 submission 2 | 3 | This package was previously, "Archived again on 2020-02-12 as check issues were not corrected on re-submission. UBSAN reports integer overflow, valgrind reports use of uninitialized values." My recent resubmission still manifest the integer overflow problem which this submission now rectifies. The problem arose through me overseeing an inline conversion to as part of a variable. I have confirmed with g++ UBSAN that the present submission fixes the issue. Please accept my apologies for any inconvenience. 4 | 5 | This submission has also been tested on: 6 | 7 | # Test environments 8 | 9 | - CRAN win-builder: R-oldrelease, R-release, R-devel 10 | * Ubuntu 16.04 (on `travis-ci`): R-release, R-devel, R-oldrelease 11 | * OSX: R-release (on `travis-ci`) 12 | 13 | -------------------------------------------------------------------------------- /src/sqlite3db-utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /*************************************************************************** 3 | * Project: bikedata 4 | * File: splite3db-utils.h 5 | * Language: C++ 6 | * 7 | * Author: Mark Padgham 8 | * E-Mail: mark.padgham@email.com 9 | * 10 | * Description: Utility functions for interaction with sqlite3 database. 11 | * 12 | * Compiler Options: -std=c++11 13 | ***************************************************************************/ 14 | 15 | #include "common.h" 16 | #include "utils.h" 17 | #include "vendor/sqlite3/sqlite3.h" 18 | 19 | #define BUFFER_SIZE 512 20 | 21 | namespace db_utils { 22 | 23 | int get_max_trip_id (sqlite3 * dbcon); 24 | int get_max_stn_id (sqlite3 * dbcon); 25 | int get_stn_table_size (sqlite3 * dbcon); 26 | 27 | } // end namespace db_utils 28 | -------------------------------------------------------------------------------- /docs/link.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 8 | 12 | 13 | -------------------------------------------------------------------------------- /src/utils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | // [[Rcpp::depends(BH)]] 4 | #include 5 | 6 | namespace utils { 7 | 8 | char *strtokm(char *str, const char *delim); 9 | std::string str_token (std::string * line, const char * delim); 10 | void rm_dos_end (char *str); 11 | bool strfound (const std::string str, const std::string target); 12 | 13 | std::string convert_datetime (std::string str); 14 | std::string convert_datetime_dmy (std::string str); 15 | bool date_is_standard (const std::string ymd); 16 | bool time_is_standard (const std::string hms); 17 | std::string convert_date (std::string ymd); 18 | std::string convert_date_dmy (std::string ymd); 19 | std::string convert_time (std::string hms); 20 | void zero_pad (std::string &t); 21 | 22 | long int timediff (std::string t1, std::string t2); 23 | long int daynum (int y, int m, int d); 24 | 25 | } // end namespace utils 26 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | citHeader("To cite bikedata in publications use:") 2 | 3 | citEntry(entry = "Article", 4 | title = "bikedata", 5 | author = personList(as.person("Mark Padgham"), 6 | as.person("Richard Ellison")), 7 | journal = "The Journal of Open Source Software", 8 | year = "2017", 9 | volume = "2", 10 | number = "20", 11 | month = "Dec", 12 | publisher = "The Open Journal", 13 | url = "https://doi.org/10.21105/joss.00471", 14 | doi = "10.21105/joss.00471", 15 | 16 | textVersion = 17 | paste("Mark Padgham, Richard Ellison (2017).", 18 | "bikedata", 19 | "Journal of Open Source Software, 2(20).", 20 | "URL https://doi.org/10.21105/joss.00471") 21 | ) 22 | -------------------------------------------------------------------------------- /man/bike_test_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data.R 3 | \docType{data} 4 | \name{bike_test_data} 5 | \alias{bike_test_data} 6 | \title{Test data for all 6 cities} 7 | \format{ 8 | A list of one data frame for each of the five cities of (bo, dc, la, 9 | lo, ny), plus two more for chicago stations and trips (ch_st, ch_tr). Each of 10 | these (except "ch_st") contains 200 representative trips. 11 | } 12 | \usage{ 13 | bike_test_data 14 | } 15 | \description{ 16 | A data set containing for each of the six cities a \code{data.frame} object 17 | of 200 trips. 18 | } 19 | \note{ 20 | These data are only used to convert to \code{.zip}-compressed files 21 | using \code{bike_write_test_data()}. These \code{.zip} files can be 22 | subsequently read into an SQLite3 database using \code{store_bikedata}. 23 | } 24 | \keyword{datasets} 25 | -------------------------------------------------------------------------------- /.github/workflows/test-coverage.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | pull_request: 6 | branches: 7 | - main 8 | 9 | name: test-coverage 10 | 11 | jobs: 12 | 13 | test-coverage: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | env: 18 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 19 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 20 | 21 | steps: 22 | 23 | - uses: actions/checkout@v4 24 | 25 | - uses: r-lib/actions/setup-r@v2 26 | with: 27 | use-public-rspm: true 28 | 29 | - uses: r-lib/actions/setup-r-dependencies@v2 30 | with: 31 | extra-packages: any::covr 32 | needs: coverage 33 | 34 | - name: Test coverage 35 | run: covr::codecov(line_exclusions = list('R/distmat.R','src/vendor/sqlite3/sqlite3.c','src/sqlite3db-add-data.cpp'=c(398:420))) 36 | shell: Rscript {0} 37 | -------------------------------------------------------------------------------- /man/bike_stored_files.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/database-stats.R 3 | \name{bike_stored_files} 4 | \alias{bike_stored_files} 5 | \title{Get names of files read into database} 6 | \usage{ 7 | bike_stored_files(bikedb, city) 8 | } 9 | \arguments{ 10 | \item{bikedb}{A string containing the path to the SQLite3 database.} 11 | 12 | \item{city}{Optional city for which filenames are to be obtained} 13 | } 14 | \description{ 15 | Get names of files read into database 16 | } 17 | \examples{ 18 | \dontrun{ 19 | data_dir <- tempdir () 20 | bike_write_test_data (data_dir = data_dir) 21 | bikedb <- file.path (data_dir, 'testdb') 22 | store_bikedata (data_dir = data_dir, bikedb = bikedb) 23 | files <- bike_stored_files (bikedb = bikedb) 24 | # returns a tibble with names of all stored files 25 | 26 | bike_rm_test_data (data_dir = data_dir) 27 | bike_rm_db (bikedb) 28 | # don't forget to remove real data! 29 | # file.remove (list.files ('.', pattern = '.zip')) 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/sqlite3db-setup.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /*************************************************************************** 3 | * Project: bikedata 4 | * File: splite3db-admin.cpp 5 | * Language: C++ 6 | * 7 | * Author: Mark Padgham 8 | * E-Mail: mark.padgham@email.com 9 | * 10 | * Description: Routines to construct sqlite3 database and associated 11 | * indexes. Routines to store and add data are in 'sqlite3db-add-data.h' 12 | * 13 | * Compiler Options: -std=c++11 14 | ***************************************************************************/ 15 | 16 | #include 17 | 18 | // [[Rcpp::depends(BH)]] 19 | #include 20 | #include "common.h" 21 | #include "utils.h" 22 | #include "sqlite3db-add-data.h" 23 | #include "vendor/sqlite3/sqlite3.h" 24 | 25 | int rcpp_create_sqlite3_db (const char * bikedb); 26 | int rcpp_create_db_indexes (const char* bikedb, Rcpp::CharacterVector tables, 27 | Rcpp::CharacterVector cols, bool reindex); 28 | int rcpp_create_city_index (const char* bikedb, bool reindex); 29 | -------------------------------------------------------------------------------- /man/bike_rm_test_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/write-test-data.R 3 | \name{bike_rm_test_data} 4 | \alias{bike_rm_test_data} 5 | \title{Removes test data written with 'bike_write_test_data()'} 6 | \usage{ 7 | bike_rm_test_data(data_dir = tempdir()) 8 | } 9 | \arguments{ 10 | \item{data_dir}{Directory in which data were extracted.} 11 | } 12 | \value{ 13 | Number of files successfully removed, which should equal six. 14 | } 15 | \description{ 16 | The function \code{bike_write_test_data()} writes several small 17 | zip-compressed files to disk. The default location is \code{tempdir()}, in 18 | which case these files will be automatically removed on termination of 19 | current R session. If, however, any other value for \code{data_dir} is passed 20 | to \code{bike_write_test_data()}, then the resultant files ought be deleted 21 | by calling this function. 22 | } 23 | \examples{ 24 | \dontrun{ 25 | bike_write_test_data () 26 | list.files (tempdir ()) 27 | bike_rm_test_data () 28 | 29 | bike_write_test_data (data_dir = getwd ()) 30 | list.files () 31 | bike_rm_test_data (data_dir = getwd ()) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /man/index_bikedata_db.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/store-bikedata.R 3 | \name{index_bikedata_db} 4 | \alias{index_bikedata_db} 5 | \title{Add indexes to database created with store_bikedata} 6 | \usage{ 7 | index_bikedata_db(bikedb) 8 | } 9 | \arguments{ 10 | \item{bikedb}{The SQLite3 database containing the bikedata.} 11 | } 12 | \description{ 13 | Add indexes to database created with store_bikedata 14 | } 15 | \examples{ 16 | \dontrun{ 17 | data_dir <- tempdir () 18 | bike_write_test_data (data_dir = data_dir) 19 | # or download some real data! 20 | # dl_bikedata (city = "la", data_dir = data_dir) 21 | bikedb <- file.path (data_dir, "testdb") 22 | store_bikedata (data_dir = data_dir, bikedb = bikedb) 23 | # create database indexes for quicker access: 24 | index_bikedata_db (bikedb = bikedb) 25 | 26 | trips <- bike_tripmat (bikedb = bikedb, city = "LA") # trip matrix 27 | stations <- bike_stations (bikedb = bikedb) # station data 28 | 29 | bike_rm_test_data (data_dir = data_dir) 30 | bike_rm_db (bikedb) 31 | # don't forget to remove real data! 32 | # file.remove (list.files (data_dir, pattern = ".zip")) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/read-station-files.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /*************************************************************************** 3 | * Project: bikedata 4 | * File: read-station-files.h 5 | * Language: C++ 6 | * 7 | * Author: Mark Padgham 8 | * E-Mail: mark.padgham@email.com 9 | * 10 | * Description: Routines to read and store data on bike docking stations in 11 | * the stations table of the SQLite3 database. 12 | * 13 | * Compiler Options: -std=c++11 14 | ***************************************************************************/ 15 | 16 | #include 17 | 18 | #include "sqlite3db-utils.h" 19 | 20 | namespace stns { 21 | int import_to_station_table (sqlite3 * dbcon, 22 | std::map stationqry); 23 | 24 | std::map get_bo_stn_table (sqlite3 * dbcon); 25 | std::map get_dc_stn_table (sqlite3 * dbcon); 26 | std::unordered_set get_stn_ids (sqlite3 * dbcon, std::string ci); 27 | 28 | } // end namespace stns 29 | 30 | int rcpp_import_stn_df (const char * bikedb, Rcpp::DataFrame stn_data, 31 | std::string city); 32 | -------------------------------------------------------------------------------- /man/bike_rm_db.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/store-bikedata.R 3 | \name{bike_rm_db} 4 | \alias{bike_rm_db} 5 | \title{Remove SQLite3 database generated with 'store_bikedat()'} 6 | \usage{ 7 | bike_rm_db(bikedb) 8 | } 9 | \arguments{ 10 | \item{bikedb}{The SQLite3 database containing the bikedata.} 11 | } 12 | \value{ 13 | TRUE if \code{bikedb} successfully removed; otherwise FALSE 14 | } 15 | \description{ 16 | If no directory is specified the \code{bikedb} argument passed to 17 | \code{store_bikedata}, the database is created in \code{tempdir()}. This 18 | function provides a convenient way to remove the database in such cases by 19 | simply passing the name. 20 | } 21 | \examples{ 22 | \dontrun{ 23 | data_dir <- tempdir () 24 | bike_write_test_data (data_dir = data_dir) 25 | # or download some real data! 26 | # dl_bikedata (city = "la", data_dir = data_dir) 27 | bikedb <- file.path (data_dir, "testdb") 28 | store_bikedata (data_dir = data_dir, bikedb = bikedb) 29 | 30 | bike_rm_test_data (data_dir = data_dir) 31 | bike_rm_db (bikedb) 32 | # don't forget to remove real data! 33 | # file.remove (list.files (data_dir, pattern = ".zip")) 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /R/data.R: -------------------------------------------------------------------------------- 1 | #' Test data for all 6 cities 2 | #' 3 | #' A data set containing for each of the six cities a \code{data.frame} object 4 | #' of 200 trips. 5 | #' 6 | #' @docType data 7 | #' 8 | #' @format A list of one data frame for each of the five cities of (bo, dc, la, 9 | #' lo, ny), plus two more for chicago stations and trips (ch_st, ch_tr). Each of 10 | #' these (except "ch_st") contains 200 representative trips. 11 | #' 12 | #' @note These data are only used to convert to \code{.zip}-compressed files 13 | #' using \code{bike_write_test_data()}. These \code{.zip} files can be 14 | #' subsequently read into an SQLite3 database using \code{store_bikedata}. 15 | "bike_test_data" 16 | 17 | #' Docking stations for London, U.K. 18 | #' 19 | #' A \code{data.frame} of station id values, names, and geographic coordinates 20 | #' for 786 stations for London, U.K. These stations are generally (and by 21 | #' default) downloaded automatically to ensure they are always up to date, but 22 | #' such downloading can be disabled in the \code{store_bikedata()} function by 23 | #' setting \code{latest_lo_stns = FALSE}. 24 | #' 25 | #' @docType data 26 | #' 27 | #' @format A \code{data.frame} of the four columns described above. 28 | "lo_stns" 29 | -------------------------------------------------------------------------------- /man/bike_write_test_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/write-test-data.R 3 | \name{bike_write_test_data} 4 | \alias{bike_write_test_data} 5 | \title{Writes test data bundled with package to zip files} 6 | \usage{ 7 | bike_write_test_data(data_dir = tempdir()) 8 | } 9 | \arguments{ 10 | \item{data_dir}{Directory in which data are to be extracted. Defaults to 11 | \code{tempdir()}. If any other directory is specified, files ought to be 12 | removed with \code{bike_rm_test_data()}.} 13 | } 14 | \description{ 15 | Writes very small test files to disk that can be used to test the package. 16 | The entire package works by reading zip-compressed data files provided by the 17 | various hire bicycle systems. This function generates some equivalent data 18 | that can be read into an \code{SQLite} database by the 19 | \code{store_bikedata()} function, so that all other package functionality can 20 | then be tested from the resultant database. This function is also used in the 21 | examples of all other functions. 22 | } 23 | \examples{ 24 | \dontrun{ 25 | bike_write_test_data () 26 | list.files (tempdir ()) 27 | bike_rm_test_data () 28 | 29 | bike_write_test_data (data_dir = '.') 30 | list.files () 31 | bike_rm_test_data (data_dir = '.') 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /man/bike_stations.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/stations.R 3 | \name{bike_stations} 4 | \alias{bike_stations} 5 | \title{Extract station matrix from SQLite3 database} 6 | \usage{ 7 | bike_stations(bikedb, city) 8 | } 9 | \arguments{ 10 | \item{bikedb}{A string containing the path to the SQLite3 database. 11 | If no directory specified, it is presumed to be in \code{tempdir()}.} 12 | 13 | \item{city}{Optional city (or vector of cities) for which stations are to be 14 | extracted} 15 | } 16 | \value{ 17 | Matrix containing data for each station 18 | } 19 | \description{ 20 | Extract station matrix from SQLite3 database 21 | } 22 | \examples{ 23 | \dontrun{ 24 | data_dir <- tempdir () 25 | bike_write_test_data (data_dir = data_dir) 26 | # or download some real data! 27 | # dl_bikedata (city = 'la', data_dir = data_dir) 28 | bikedb <- file.path (data_dir, 'testdb') 29 | store_bikedata (data_dir = data_dir, bikedb = bikedb) 30 | # create database indexes for quicker access: 31 | index_bikedata_db (bikedb = bikedb) 32 | 33 | stations <- bike_stations (bikedb) 34 | head (stations) 35 | 36 | bike_rm_test_data (data_dir = data_dir) 37 | bike_rm_db (bikedb) 38 | # don't forget to remove real data! 39 | # file.remove (list.files (data_dir, pattern = '.zip')) 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /man/bike_match_matrices.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distmat.R 3 | \name{bike_match_matrices} 4 | \alias{bike_match_matrices} 5 | \title{Match rows and columns of distance and trip matrices} 6 | \usage{ 7 | bike_match_matrices(mat1, mat2) 8 | } 9 | \arguments{ 10 | \item{mat1}{A wide- or long-form trip or distance matrix returned from 11 | \code{\link{bike_tripmat}} or \code{\link{bike_distmat}}.} 12 | 13 | \item{mat2}{The corresponding distance or trip matrix.} 14 | } 15 | \value{ 16 | A list of the same matrices with matching start and end stations, and 17 | in the same order passed to the routine (that is, \code{mat1} then 18 | \code{mat2}). Each kind of matrix will be identified and named accordingly as 19 | either "trip" or "dist". Matrices are returned in same format (long or wide) 20 | as submitted. 21 | } 22 | \description{ 23 | Match rows and columns of distance and trip matrices 24 | } 25 | \note{ 26 | Distance matrices returned from \code{bike_distamat} use all stations 27 | listed for a given system, while trip matrices extracted with 28 | \link{bike_tripmat} will often have fewer stations because operational 29 | station numbers commonly vary over time. This function reconciles the two 30 | matrices through matching all row and column names (or just station IDs for 31 | long-form matrices), enabling then to be directly compared. 32 | } 33 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | pull_request: 6 | branches: 7 | - main 8 | 9 | name: R-CMD-check 10 | 11 | jobs: 12 | 13 | R-CMD-check: 14 | 15 | runs-on: ${{ matrix.config.os }} 16 | 17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | config: 23 | - {os: macOS-latest, r: 'release'} 24 | #- {os: windows-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 26 | - {os: ubuntu-latest, r: 'release'} 27 | - {os: ubuntu-latest, r: 'oldrel-1'} 28 | 29 | env: 30 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 31 | R_KEEP_PKG_SOURCE: yes 32 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 33 | 34 | steps: 35 | 36 | - uses: actions/checkout@v4 37 | 38 | - uses: r-lib/actions/setup-pandoc@v2 39 | 40 | - uses: r-lib/actions/setup-r@v2 41 | with: 42 | r-version: ${{ matrix.config.r }} 43 | http-user-agent: ${{ matrix.config.http-user-agent }} 44 | use-public-rspm: true 45 | 46 | - uses: r-lib/actions/setup-r-dependencies@v2 47 | with: 48 | extra-packages: any::rcmdcheck 49 | needs: check 50 | 51 | - uses: r-lib/actions/check-r-package@v2 52 | -------------------------------------------------------------------------------- /.github/workflows/extra-os.yaml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | pull_request: 6 | branches: 7 | - main 8 | 9 | name: extra-OS-check 10 | 11 | jobs: 12 | 13 | extra-OS-check: 14 | 15 | runs-on: ${{ matrix.config.os }} 16 | 17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | config: 23 | #- {os: macOS-latest, r: 'release'} 24 | - {os: windows-latest, r: 'release'} 25 | #- {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 26 | #- {os: ubuntu-latest, r: 'release'} 27 | #- {os: ubuntu-latest, r: 'oldrel-1'} 28 | 29 | env: 30 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 31 | R_KEEP_PKG_SOURCE: yes 32 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 33 | 34 | steps: 35 | 36 | - uses: actions/checkout@v4 37 | 38 | - uses: r-lib/actions/setup-pandoc@v2 39 | 40 | - uses: r-lib/actions/setup-r@v2 41 | with: 42 | r-version: ${{ matrix.config.r }} 43 | http-user-agent: ${{ matrix.config.http-user-agent }} 44 | use-public-rspm: true 45 | 46 | - uses: r-lib/actions/setup-r-dependencies@v2 47 | with: 48 | extra-packages: any::rcmdcheck 49 | needs: check 50 | 51 | - uses: r-lib/actions/check-r-package@v2 52 | -------------------------------------------------------------------------------- /src/bikedata_init.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include // for NULL 4 | #include 5 | 6 | /* FIXME: 7 | Check these declarations against the C/Fortran source code. 8 | */ 9 | 10 | /* .Call calls */ 11 | extern SEXP _bikedata_rcpp_create_city_index(SEXP, SEXP); 12 | extern SEXP _bikedata_rcpp_create_db_indexes(SEXP, SEXP, SEXP, SEXP); 13 | extern SEXP _bikedata_rcpp_create_sqlite3_db(SEXP); 14 | extern SEXP _bikedata_rcpp_import_stn_df(SEXP, SEXP, SEXP); 15 | extern SEXP _bikedata_rcpp_import_to_file_table(SEXP, SEXP, SEXP, SEXP); 16 | extern SEXP _bikedata_rcpp_import_to_trip_table(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); 17 | 18 | static const R_CallMethodDef CallEntries[] = { 19 | {"_bikedata_rcpp_create_city_index", (DL_FUNC) &_bikedata_rcpp_create_city_index, 2}, 20 | {"_bikedata_rcpp_create_db_indexes", (DL_FUNC) &_bikedata_rcpp_create_db_indexes, 4}, 21 | {"_bikedata_rcpp_create_sqlite3_db", (DL_FUNC) &_bikedata_rcpp_create_sqlite3_db, 1}, 22 | {"_bikedata_rcpp_import_stn_df", (DL_FUNC) &_bikedata_rcpp_import_stn_df, 3}, 23 | {"_bikedata_rcpp_import_to_file_table", (DL_FUNC) &_bikedata_rcpp_import_to_file_table, 4}, 24 | {"_bikedata_rcpp_import_to_trip_table", (DL_FUNC) &_bikedata_rcpp_import_to_trip_table, 6}, 25 | {NULL, NULL, 0} 26 | }; 27 | 28 | void R_init_bikedata(DllInfo *dll) 29 | { 30 | R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); 31 | R_useDynamicSymbols(dll, FALSE); 32 | } 33 | -------------------------------------------------------------------------------- /man/bike_latest_files.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/database-stats.R 3 | \name{bike_latest_files} 4 | \alias{bike_latest_files} 5 | \title{Check whether files in database are the latest published files} 6 | \usage{ 7 | bike_latest_files(bikedb) 8 | } 9 | \arguments{ 10 | \item{bikedb}{A string containing the path to the SQLite3 database. 11 | If no directory specified, it is presumed to be in \code{tempdir()}.} 12 | } 13 | \value{ 14 | A named vector of binary values: TRUE is files in \code{bikedb} are 15 | the latest versions; otherwise FALSE, in which case \code{store_bikedata} 16 | could be run to update the database. 17 | } 18 | \description{ 19 | Check whether files in database are the latest published files 20 | } 21 | \examples{ 22 | \dontrun{ 23 | data_dir <- tempdir () 24 | bike_write_test_data (data_dir = data_dir) 25 | # or download some real data! 26 | # dl_bikedata (city = 'la', data_dir = data_dir) 27 | # Remove one London file that triggers an API call which may fail tests: 28 | file.remove (file.path (tempdir(), 29 | "01aJourneyDataExtract10Jan16-23Jan16.csv")) 30 | bikedb <- file.path (data_dir, 'testdb') 31 | store_bikedata (data_dir = data_dir, bikedb = bikedb) 32 | # bike_latest_files (bikedb) 33 | # All false because test data are not current, but would pass with real data 34 | 35 | bike_rm_test_data (data_dir = data_dir) 36 | bike_rm_db (bikedb) 37 | # don't forget to remove real data! 38 | # file.remove (list.files (data_dir, pattern = '.zip')) 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /man/bike_summary_stats.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/database-stats.R 3 | \name{bike_summary_stats} 4 | \alias{bike_summary_stats} 5 | \title{Extract summary statistics of database} 6 | \usage{ 7 | bike_summary_stats(bikedb) 8 | } 9 | \arguments{ 10 | \item{bikedb}{A string containing the path to the SQLite3 database. 11 | If no directory specified, it is presumed to be in \code{tempdir()}.} 12 | } 13 | \value{ 14 | A \code{data.frame} containing numbers of trips and stations along 15 | with times and dates of first and last trips for each city in database and a 16 | final column indicating whether the files match the latest published 17 | versions. 18 | } 19 | \description{ 20 | Extract summary statistics of database 21 | } 22 | \examples{ 23 | \dontrun{ 24 | data_dir <- tempdir () 25 | bike_write_test_data (data_dir = data_dir) 26 | # dl_bikedata (city = "la", data_dir = data_dir) # or some real data! 27 | # Remove one London file that triggers an API call which may fail tests: 28 | file.remove (file.path (tempdir(), 29 | "01aJourneyDataExtract10Jan16-23Jan16.csv")) 30 | bikedb <- file.path (data_dir, "testdb") 31 | store_bikedata (data_dir = data_dir, bikedb = bikedb) 32 | # create database indexes for quicker access: 33 | index_bikedata_db (bikedb = bikedb) 34 | 35 | bike_summary_stats ("testdb") 36 | 37 | bike_rm_test_data (data_dir = data_dir) 38 | bike_rm_db (bikedb) 39 | # don't forget to remove real data! 40 | # file.remove (list.files (".", pattern = ".zip")) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/read-city-files.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | * Project: bikedata 3 | * File: read-city-files 4 | * Language: C++ 5 | * 6 | * Author: Mark Padgham 7 | * E-Mail: mark.padgham@email.com 8 | * 9 | * Description: Routines to read single lines of the data files for 10 | * different cities. 11 | * 12 | * Compiler Options: -std=c++11 13 | ***************************************************************************/ 14 | #pragma once 15 | 16 | #include "common.h" 17 | #include "utils.h" 18 | #include "vendor/sqlite3/sqlite3.h" 19 | 20 | namespace city { 21 | 22 | unsigned int read_one_line_generic (sqlite3_stmt * stmt, char * line, 23 | std::map * stationqry, 24 | const std::string city, const HeaderStruct &headers, 25 | std::map &stn_map); 26 | unsigned int read_one_line_london (sqlite3_stmt * stmt, char * line); 27 | unsigned int read_one_line_nabsa (sqlite3_stmt * stmt, char * line, 28 | std::map * stationqry, 29 | std::string city); 30 | 31 | std::string convert_usertype (std::string ut); 32 | std::string convert_gender (std::string g); 33 | 34 | std::string convert_bo_stn_name (std::string &station_name, 35 | std::map &stn_map); 36 | std::string convert_dc_stn_name (std::string &station_name, bool id, 37 | std::map &stn_map); 38 | 39 | } // end namespace city 40 | -------------------------------------------------------------------------------- /man/bike_datelimits.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/database-stats.R 3 | \name{bike_datelimits} 4 | \alias{bike_datelimits} 5 | \title{Extract date-time limits from trip database} 6 | \usage{ 7 | bike_datelimits(bikedb, city) 8 | } 9 | \arguments{ 10 | \item{bikedb}{A string containing the path to the SQLite3 database. 11 | If no directory specified, it is presumed to be in \code{tempdir()}.} 12 | 13 | \item{city}{If given, date limits are calculated only for trips in 14 | that city.} 15 | } 16 | \value{ 17 | A vector of 2 elements giving the date-time of the first and last 18 | trips 19 | } 20 | \description{ 21 | Extract date-time limits from trip database 22 | } 23 | \examples{ 24 | \dontrun{ 25 | data_dir <- tempdir () 26 | bike_write_test_data (data_dir = data_dir) 27 | # dl_bikedata (city = 'la', data_dir = data_dir) # or some real data! 28 | # Remove one London file that triggers an API call which may fail tests: 29 | file.remove (file.path (tempdir(), 30 | "01aJourneyDataExtract10Jan16-23Jan16.csv")) 31 | bikedb <- file.path (data_dir, 'testdb') 32 | store_bikedata (data_dir = data_dir, bikedb = bikedb) 33 | # create database indexes for quicker access: 34 | index_bikedata_db (bikedb = bikedb) 35 | 36 | bike_datelimits ('testdb') # overall limits for all cities 37 | bike_datelimits ('testdb', city = 'NYC') 38 | bike_datelimits ('testdb', city = 'los angeles') 39 | 40 | bike_rm_test_data (data_dir = data_dir) 41 | bike_rm_db (bikedb) 42 | # don't forget to remove real data! 43 | # file.remove (list.files ('.', pattern = '.zip')) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/sqlite3db-add-data.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | /*************************************************************************** 3 | * Project: bikedata 4 | * File: splite3db-add-data.h 5 | * Language: C++ 6 | * 7 | * Author: Mark Padgham 8 | * E-Mail: mark.padgham@email.com 9 | * 10 | * Description: Routines to store and add data to sqlite3 database. 11 | * Routines to construct sqlite3 database and associated 12 | * indexes are in 'sqlite3db-add-data.cpp'. 13 | * 14 | * Compiler Options: -std=c++11 15 | ***************************************************************************/ 16 | 17 | #include "common.h" 18 | #include "utils.h" 19 | #include "vendor/sqlite3/sqlite3.h" 20 | #include "sqlite3db-utils.h" 21 | #include "read-station-files.h" 22 | #include "read-city-files.h" 23 | 24 | #include 25 | #include 26 | #include 27 | 28 | // [[Rcpp::depends(BH)]] 29 | #include 30 | 31 | int rcpp_import_to_trip_table (const char* bikedb, 32 | Rcpp::CharacterVector datafiles, std::string city, 33 | std::string header_file_name, bool data_has_stations, bool quiet); 34 | int rcpp_import_to_file_table (const char * bikedb, 35 | Rcpp::CharacterVector datafiles, std::string city, int nfiles); 36 | 37 | namespace db_add { 38 | 39 | HeaderStruct get_field_positions (const std::string fname, 40 | const std::string header_file_name, bool data_has_stations, 41 | const std::string city); 42 | void get_field_quotes (const std::string line, HeaderStruct &headers); 43 | void dump_headers (const HeaderStruct &headers); 44 | 45 | } // end namespace db_add 46 | -------------------------------------------------------------------------------- /man/bike_distmat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/distmat.R 3 | \name{bike_distmat} 4 | \alias{bike_distmat} 5 | \title{Extract station-to-station distance matrix} 6 | \usage{ 7 | bike_distmat(bikedb, city, expand = 0.5, long = FALSE, quiet = TRUE) 8 | } 9 | \arguments{ 10 | \item{bikedb}{A string containing the path to the SQLite3 database. 11 | If no directory specified, it is presumed to be in \code{tempdir()}.} 12 | 13 | \item{city}{City for which tripmat is to be aggregated} 14 | 15 | \item{expand}{Distances are calculated by routing through the OpenStreetMap 16 | street network surrounding the bike stations, with the street network 17 | expanded by this amount to ensure all stations can be connected.} 18 | 19 | \item{long}{If FALSE, a square distance matrix of (num-stations, 20 | num_stations) is returned; if TRUE, a long-format matrix of (stn-from, 21 | stn-to, distance) is returned.} 22 | 23 | \item{quiet}{If FALSE, progress is displayed on screen} 24 | } 25 | \value{ 26 | If \code{long = FALSE}, a square matrix of numbers of trips between 27 | each station, otherwise a long-form \pkg{tibble} with three columns of of 28 | (start_station_id, end_station_id, distance) 29 | } 30 | \description{ 31 | Extract station-to-station distance matrix 32 | } 33 | \note{ 34 | Distance matrices returned from \code{bike_distamat} use all stations 35 | listed for a given system, while trip matrices extracted with 36 | \link{bike_tripmat} will often have fewer stations because operational 37 | station numbers commonly vary over time. The two matrices may be reconciled 38 | with the \code{match_trips2dists} function, enabling then to be directly 39 | compared. 40 | } 41 | -------------------------------------------------------------------------------- /man/bike_db_totals.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/database-stats.R 3 | \name{bike_db_totals} 4 | \alias{bike_db_totals} 5 | \title{Count number of entries in sqlite3 database tables} 6 | \usage{ 7 | bike_db_totals(bikedb, trips = TRUE, city) 8 | } 9 | \arguments{ 10 | \item{bikedb}{A string containing the path to the SQLite3 database.} 11 | 12 | \item{trips}{If true, numbers of trips are counted; otherwise numbers of 13 | stations} 14 | 15 | \item{city}{Optional city for which numbers of trips are to be counted} 16 | } 17 | \description{ 18 | Count number of entries in sqlite3 database tables 19 | } 20 | \examples{ 21 | \dontrun{ 22 | data_dir <- tempdir () 23 | bike_write_test_data (data_dir = data_dir) 24 | bikedb <- file.path (data_dir, 'testdb') 25 | # latest_lo_stns is set to FALSE just to avoid download on CRAN; this should 26 | # normally remain at default value of TRUE: 27 | store_bikedata (data_dir = data_dir, bikedb = bikedb, latest_lo_stns = FALSE) 28 | # create database indexes for quicker access: 29 | index_bikedata_db (bikedb = bikedb) 30 | 31 | bike_db_totals (bikedb = bikedb, trips = TRUE) # total trips 32 | bike_db_totals (bikedb = bikedb, trips = TRUE, city = 'ch') 33 | bike_db_totals (bikedb = bikedb, trips = TRUE, city = 'ny') 34 | bike_db_totals (bikedb = bikedb, trips = FALSE) # total stations 35 | bike_db_totals (bikedb = bikedb, trips = FALSE, city = 'ch') 36 | bike_db_totals (bikedb = bikedb, trips = FALSE, city = 'ny') 37 | # numbers of stations can also be extracted with 38 | nrow (bike_stations (bikedb = bikedb)) 39 | nrow (bike_stations (bikedb = bikedb, city = 'ch')) 40 | 41 | bike_rm_test_data (data_dir = data_dir) 42 | bike_rm_db (bikedb) 43 | # don't forget to remove real data! 44 | # file.remove (list.files ('.', pattern = '.zip')) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to bikedata 2 | 3 | ## Opening issues 4 | 5 | The easiest way to note any behavioural curiosities or to request any new 6 | features is by opening a [github issue](https://github.com/ropensci/bikedata/issues). 7 | 8 | 9 | ## Development guidelines 10 | 11 | If you'd like to contribute changes to `bikedata`, we use [the GitHub 12 | flow](https://guides.github.com/introduction/flow/index.html) for proposing, 13 | submitting, reviewing, and accepting changes. If you haven't done this before, 14 | there's a nice overview of git [here](http://r-pkgs.had.co.nz/git.html), as well 15 | as best practices for submitting pull requests 16 | [here](http://r-pkgs.had.co.nz/git.html#pr-make). 17 | 18 | The `bikedata` coding style diverges somewhat from [this commonly used R style 19 | guide](http://adv-r.had.co.nz/Style.html), primarily in the following two ways, 20 | both of which improve code readability: (1) All curly braces are vertically aligned: 21 | ```r 22 | this <- function () 23 | { 24 | x <- 1 25 | } 26 | ``` 27 | and **not** 28 | ```r 29 | this <- function(){ 30 | x <- 1 31 | } 32 | ``` 33 | and (2) Also highlighted in that code is the additional whitespace which 34 | permeates `bikedata` code. Words of text are separated by whitespace, and so 35 | code words should be too: 36 | ```r 37 | this <- function1 (function2 (x)) 38 | ``` 39 | and **not** 40 | ```r 41 | this <- function1(function2(x)) 42 | ``` 43 | with the natural result that one ends up writing 44 | ```r 45 | this <- function () 46 | ``` 47 | with a space between `function` and `()`. That's it. 48 | 49 | 50 | ## Code of Conduct 51 | 52 | We want to encourage a warm, welcoming, and safe environment for contributing to 53 | this project. See the [code of 54 | conduct](https://github.com/ropensci/bikedata/blob/master/CODE_OF_CONDUCT.md) 55 | for more information. 56 | -------------------------------------------------------------------------------- /man/bike_demographic_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/database-stats.R 3 | \name{bike_demographic_data} 4 | \alias{bike_demographic_data} 5 | \title{Static summary of which systems provide demographic data} 6 | \usage{ 7 | bike_demographic_data() 8 | } 9 | \value{ 10 | A \code{data.frame} detailing the kinds of demographic data provided 11 | by the different systems 12 | } 13 | \description{ 14 | Static summary of which systems provide demographic data 15 | } 16 | \examples{ 17 | bike_demographic_data () 18 | # Examples of filtering data by demographic parameters: 19 | \dontrun{ 20 | data_dir <- tempdir () 21 | bike_write_test_data (data_dir = data_dir) 22 | bikedb <- file.path (data_dir, "testdb") 23 | store_bikedata (data_dir = data_dir, bikedb = bikedb) 24 | # create database indexes for quicker access: 25 | index_bikedata_db (bikedb = bikedb) 26 | 27 | sum (bike_tripmat (bikedb = bikedb, city = "bo")) # 200 trips 28 | sum (bike_tripmat (bikedb = bikedb, city = "bo", birth_year = 1990)) # 9 29 | sum (bike_tripmat (bikedb = bikedb, city = "bo", gender = "f")) # 22 30 | sum (bike_tripmat (bikedb = bikedb, city = "bo", gender = 2)) # 22 31 | sum (bike_tripmat (bikedb = bikedb, city = "bo", gender = 1)) # = m; 68 32 | sum (bike_tripmat (bikedb = bikedb, city = "bo", gender = 0)) # = n; 9 33 | # Sum of gender-filtered trips is less than total because \code{gender = 0} 34 | # extracts all registered users with unspecified genders, while without 35 | # gender filtering extracts all trips for registered and non-registered 36 | # users. 37 | 38 | # The following generates an error because Washinton DC's DivvyBike system 39 | # does not provide demographic data 40 | sum (bike_tripmat (bikedb = bikedb, city = "dc", birth_year = 1990)) 41 | bike_rm_test_data (data_dir = data_dir) 42 | bike_rm_db (bikedb) 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # All available hooks: https://pre-commit.com/hooks.html 2 | # R specific hooks: https://github.com/lorenzwalthert/precommit 3 | repos: 4 | - repo: https://github.com/lorenzwalthert/precommit 5 | rev: v0.2.2.9013 6 | hooks: 7 | #- id: style-files 8 | # args: [--style_pkg=styler, --style_fun=tidyverse_style] 9 | # codemeta must be above use-tidy-description when both are used 10 | - id: codemeta-description-updated 11 | - id: use-tidy-description 12 | - id: spell-check 13 | exclude: > 14 | (?x)^( 15 | data/.*| 16 | (.*/|)\.Rprofile| 17 | (.*/|)\.Renviron| 18 | (.*/|)\.gitignore| 19 | (.*/|)NAMESPACE| 20 | (.*/|)WORDLIST| 21 | (.*/|)\.travis.yml| 22 | (.*/|)appveyor.yml| 23 | (.*/|)\.Rbuildignore| 24 | (.*/|)\.pre-commit-.*| 25 | .*\.[rR]| 26 | .*\.Rproj| 27 | .*\.py| 28 | .*\.feather| 29 | .*\.rds| 30 | .*\.Rds| 31 | .*\.sh| 32 | .*\.RData 33 | )$ 34 | - id: readme-rmd-rendered 35 | - repo: https://github.com/pre-commit/pre-commit-hooks 36 | rev: v4.1.0 37 | hooks: 38 | - id: check-added-large-files 39 | args: ['--maxkb=200'] 40 | - id: end-of-file-fixer 41 | exclude: '\.Rd' 42 | - repo: local 43 | hooks: 44 | - id: forbid-to-commit 45 | name: Don't commit common R artifacts 46 | entry: Cannot commit .Rhistory, .RData, .Rds or .rds. 47 | language: fail 48 | files: '\.Rhistory|\.RData|\.Rds|\.rds$' 49 | - id: description version 50 | name: Version has been incremeneted in DESCRIPTION 51 | entry: .hooks/description 52 | language: script 53 | # `exclude: ` to allow committing specific files. 54 | -------------------------------------------------------------------------------- /tests/testthat/test-daily-trips.R: -------------------------------------------------------------------------------- 1 | context ("daily trips") 2 | 3 | require (testthat) 4 | 5 | bikedb <- system.file ("db", "testdb.sqlite", package = "bikedata") 6 | 7 | test_that ("no db arg", { 8 | expect_error (bike_daily_trips (), 9 | "Can't get daily trips if bikedb isn't provided") 10 | }) 11 | 12 | test_that ("db does not exist", { 13 | expect_error (bike_daily_trips (a), "object 'a' not found") 14 | expect_error (bike_daily_trips ("a"), "file a does not exist") 15 | expect_error (bike_daily_trips (a = "a"), "unused argument") 16 | expect_error (bike_daily_trips (bikedb = "a"), 17 | "file a does not exist") 18 | }) 19 | 20 | test_that ("no city", { 21 | expect_error (bike_daily_trips (bikedb), 22 | "bikedb contains multiple cities; please specify one") 23 | }) 24 | 25 | test_that ("daily trips", { 26 | nt <- bike_daily_trips (bikedb = bikedb, city = "ny") 27 | expect_equal (nrow (nt), 1) # only one day of trips 28 | expect_equal (nt$numtrips, 200) 29 | expect_is (nt$numtrips, "integer") 30 | nt <- bike_daily_trips (bikedb = bikedb, city = "ny", 31 | standardise = TRUE) 32 | expect_is (nt$numtrips, "numeric") 33 | 34 | expect_equal (bike_daily_trips (bikedb = bikedb, 35 | city = "ny")$numtrips, 200) 36 | expect_equal (bike_daily_trips (bikedb = bikedb, 37 | city = "ny", 38 | member = TRUE)$numtrips, 191) 39 | expect_equal (bike_daily_trips (bikedb = bikedb, city = "ny", 40 | gender = "f")$numtrips, 22) 41 | expect_equal (bike_daily_trips (bikedb = bikedb, city = "ny", 42 | station = "173", 43 | gender = 1)$numtrips, 1) 44 | }) 45 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(libname, pkgname) { # nolint 2 | msg <- paste0 ("Data for London, U.K. powered by TfL Open Data:\n", 3 | " Contains OS data \u24B8 Crown copyright and ", 4 | "database rights 2016\n", 5 | "Data for New York City provided and owned by:\n", 6 | " NYC Bike Share, LLC and ", 7 | "Jersey City Bike Share, LLC (\"Bikeshare\")\n", 8 | " see https://www.citibikenyc.com/data-sharing-policy\n", 9 | "Data for Washington DC (Captialbikeshare), ", 10 | "Chiago (Divvybikes) and Boston (Hubway)\n", 11 | " provided and owned by Motivate International Inc.\n", 12 | " see https://www.capitalbikeshare.com/data-license-agreement\n", #nolint 13 | " and https://www.divvybikes.com/data-license-agreement\n", 14 | " and https://www.thehubway.com/data-license-agreement\n", 15 | "Nice Ride Minnesota license", 16 | " https://assets.niceridemn.com/data-license-agreement.html") # nolint 17 | packageStartupMessage (msg) 18 | } 19 | 20 | .onLoad <- function (libname, pkgname) { # nolint 21 | 22 | requireNamespace("utils", quietly = TRUE) 23 | # make data set names global to avoid CHECK notes 24 | utils::globalVariables ("sysdata") 25 | f <- file.path (tempdir (), "bikedata_headers.csv") 26 | # write.csv calls write.table, and the latter can then not be found on some 27 | # systems (including travis), even with requireNamespace. Safer to directly 28 | # and explicitly call the fns here: 29 | #utils::write.csv (sysdata$headers, file = f, row.names = FALSE) 30 | utils::write.table (sysdata$headers, file = f, row.names = FALSE, 31 | sep = ",") 32 | f <- file.path (tempdir (), "field_names.csv") 33 | #utils::write.csv (sysdata$field_names, file = f, 34 | # row.names = FALSE, quote = FALSE) 35 | utils::write.table (sysdata$field_names, file = f, row.names = FALSE, 36 | quote = FALSE, sep = ",") 37 | 38 | invisible () 39 | } 40 | -------------------------------------------------------------------------------- /man/dl_bikedata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dl-bikedata.R 3 | \name{dl_bikedata} 4 | \alias{dl_bikedata} 5 | \alias{download_bikedata} 6 | \title{Download hire bicycle data} 7 | \usage{ 8 | dl_bikedata(city, data_dir = tempdir(), dates = NULL, quiet = FALSE) 9 | 10 | download_bikedata(city, data_dir = tempdir(), dates = NULL, quiet = FALSE) 11 | } 12 | \arguments{ 13 | \item{city}{City for which to download bike data, or name of corresponding 14 | bike system (see Details below).} 15 | 16 | \item{data_dir}{Directory to which to download the files} 17 | 18 | \item{dates}{Character vector of dates to download data with dates formated 19 | as YYYYMM.} 20 | 21 | \item{quiet}{If FALSE, progress is displayed on screen} 22 | } 23 | \description{ 24 | Download data for subsequent storage via \link{store_bikedata}. 25 | } 26 | \note{ 27 | Only files that don't already exist in \code{data_dir} will be 28 | downloaded, and this function may thus be used to update a directory of files 29 | by downloading more recent files. If a particular file request fails, 30 | downloading will continue regardless. To ensure all files are downloaded, 31 | this function may need to be run several times until a message appears 32 | declaring that 'All data files already exist' 33 | } 34 | \section{Details}{ 35 | 36 | This function produces (generally) zip-compressed data in R's temporary 37 | directory. City names are not case sensitive, and must only be long enough to 38 | unambiguously designate the desired city. Names of corresponding bike systems 39 | can also be given. Currently possible cities (with minimal designations in 40 | parentheses) and names of bike hire systems are: 41 | \tabular{lr}{ 42 | Boston (bo)\tab Hubway\cr 43 | Chicago (ch)\tab Divvy Bikes\cr 44 | Washington, D.C. (dc)\tab Capital Bike Share\cr 45 | Los Angeles (la)\tab Metro Bike Share\cr 46 | London (lo)\tab Santander Cycles\cr 47 | Minnesota (mn)\tab NiceRide\cr 48 | New York City (ny)\tab Citibike\cr 49 | Philadelphia (ph)\tab Indego\cr 50 | San Francisco Bay Area (sf)\tab Ford GoBike\cr 51 | } 52 | 53 | Ensure you have a fast internet connection and at least 100 Mb space 54 | } 55 | 56 | \examples{ 57 | \dontrun{ 58 | dl_bikedata (city = 'New York City USA', dates = 201601:201613) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /tests/testthat/test-stations.R: -------------------------------------------------------------------------------- 1 | context ("stations") 2 | 3 | require (testthat) 4 | 5 | bikedb <- system.file ("db", "testdb.sqlite", package = "bikedata") 6 | 7 | test_that ("bike_stations function", { 8 | st <- bike_stations (bikedb) 9 | expect_equal (names (st), c ("id", "city", "stn_id", "name", 10 | "longitude", "latitude")) 11 | expect_true (nrow (st) == 2192) 12 | expect_equal (length (unique (st$city)), 6) 13 | expect_equal (nrow (st [st$city == "bo", ]), 93) 14 | expect_equal (nrow (st [st$city == "ch", ]), 581) 15 | expect_equal (nrow (st [st$city == "dc", ]), 456) 16 | expect_equal (nrow (st [st$city == "la", ]), 50) 17 | expect_equal (nrow (st [st$city == "lo", ]), 779) 18 | expect_equal (nrow (st [st$city == "ny", ]), 233) 19 | }) 20 | 21 | test_all <- (identical (Sys.getenv ("MPADGE_LOCAL"), "true") | 22 | identical (Sys.getenv ("GITHUB_WORKFLOW"), "test-coverage")) 23 | 24 | # extra tests for other cities 25 | test_that ("stations for extra cities", { 26 | if (test_all) { 27 | 28 | st <- NULL 29 | tryCatch (st <- bike_get_gu_stations (), 30 | warning = function (w) NULL, 31 | error = function (e) NULL) 32 | if (length (st) == 0) st <- NULL 33 | if (!is.null (st)) { 34 | 35 | expect_equal (ncol (st), 4) 36 | expect_true (nrow (st) > 200) # currently 243 37 | } 38 | 39 | st <- NULL 40 | data_dir <- tempdir () 41 | flists <- list (flist_csv_stns = NULL) 42 | tryCatch (st <- bike_get_bo_stations (flists, data_dir), 43 | warning = function (w) NULL, 44 | error = function (e) NULL) 45 | if (length (st) == 0) st <- NULL 46 | if (!is.null (st)) { 47 | 48 | expect_equal (ncol (st), 4) 49 | expect_true (nrow (st) > 200) # currently 300 50 | } 51 | } 52 | }) 53 | -------------------------------------------------------------------------------- /paper.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: bikedata 3 | tags: 4 | - public hire bicycle 5 | - open data 6 | - R 7 | authors: 8 | - name: Mark Padgham 9 | affiliation: 1 10 | orcid: 0000-0003-2172-5265 11 | - name: Richard Ellison 12 | affiliation: 2 13 | affiliations: 14 | - name: Department of Geoinformatics, University of Salzburg, Austria 15 | index: 1 16 | - name: Institute of Transport and Logistics Studies, The University of Sydney, Australia 17 | index: 2 18 | bibliography: paper.bib 19 | date: 28 Nov 2017 20 | --- 21 | 22 | # Summary 23 | 24 | The R package `bikedata` collates and facilitates access to arguably the world's 25 | largest open ongoing dataset on human mobility. All other comparable sources of 26 | data (such public transit data, or mobile phone data) are either not publicly 27 | available, or have been released only at single distinct times for single 28 | distinct purposes. Many public hire bicycle systems in the U.S.A., along with 29 | Santander Cycles in London, U.K., issue ongoing releases of their usage data, 30 | providing a unique source of data for analysing, visualising, and understanding 31 | human movement and urban environments [@Austwick2013; @Borgnat2011; 32 | @Padgham2012]. Such data provide an invaluable resource for urban planners, 33 | geographers, social and health scientists and policy makers, data visualisation 34 | specialists, and data-affine users of the systems themselves. The `bikedata` 35 | package aims to provide unified access to usage statistics from all public hire 36 | bicycle systems which provide data. These currently including Santander Cycles 37 | in London, U.K., and from the U.S.A., citibike in New York City NY, Divvy in 38 | Chicago IL, Capital Bikeshare in Washington DC, Hubway in Boston MA, Metro in 39 | Los Angeles LA, and Indego in Philadelphia PA. Additional systems will be added 40 | on an ongoing basis. The package facilitates the three necessary steps of (1) 41 | downloading data; (2) storing data in a readily accessible form (in this case in 42 | a single SQLite3 database); (3) extracting aggregate statistics. The two 43 | primary aggregate statistics are matrices of numbers of trips between all pairs 44 | of stations, and daily time series. Both forms of aggregation may be extracted 45 | for specific dates, times, or demographic characteristics of cyclists. 46 | 47 | # References 48 | -------------------------------------------------------------------------------- /docs/docsearch.js: -------------------------------------------------------------------------------- 1 | $(function() { 2 | 3 | // register a handler to move the focus to the search bar 4 | // upon pressing shift + "/" (i.e. "?") 5 | $(document).on('keydown', function(e) { 6 | if (e.shiftKey && e.keyCode == 191) { 7 | e.preventDefault(); 8 | $("#search-input").focus(); 9 | } 10 | }); 11 | 12 | $(document).ready(function() { 13 | // do keyword highlighting 14 | /* modified from https://jsfiddle.net/julmot/bL6bb5oo/ */ 15 | var mark = function() { 16 | 17 | var referrer = document.URL ; 18 | var paramKey = "q" ; 19 | 20 | if (referrer.indexOf("?") !== -1) { 21 | var qs = referrer.substr(referrer.indexOf('?') + 1); 22 | var qs_noanchor = qs.split('#')[0]; 23 | var qsa = qs_noanchor.split('&'); 24 | var keyword = ""; 25 | 26 | for (var i = 0; i < qsa.length; i++) { 27 | var currentParam = qsa[i].split('='); 28 | 29 | if (currentParam.length !== 2) { 30 | continue; 31 | } 32 | 33 | if (currentParam[0] == paramKey) { 34 | keyword = decodeURIComponent(currentParam[1].replace(/\+/g, "%20")); 35 | } 36 | } 37 | 38 | if (keyword !== "") { 39 | $(".contents").unmark({ 40 | done: function() { 41 | $(".contents").mark(keyword); 42 | } 43 | }); 44 | } 45 | } 46 | }; 47 | 48 | mark(); 49 | }); 50 | }); 51 | 52 | /* Search term highlighting ------------------------------*/ 53 | 54 | function matchedWords(hit) { 55 | var words = []; 56 | 57 | var hierarchy = hit._highlightResult.hierarchy; 58 | // loop to fetch from lvl0, lvl1, etc. 59 | for (var idx in hierarchy) { 60 | words = words.concat(hierarchy[idx].matchedWords); 61 | } 62 | 63 | var content = hit._highlightResult.content; 64 | if (content) { 65 | words = words.concat(content.matchedWords); 66 | } 67 | 68 | // return unique words 69 | var words_uniq = [...new Set(words)]; 70 | return words_uniq; 71 | } 72 | 73 | function updateHitURL(hit) { 74 | 75 | var words = matchedWords(hit); 76 | var url = ""; 77 | 78 | if (hit.anchor) { 79 | url = hit.url_without_anchor + '?q=' + escape(words.join(" ")) + '#' + hit.anchor; 80 | } else { 81 | url = hit.url + '?q=' + escape(words.join(" ")); 82 | } 83 | 84 | return url; 85 | } 86 | -------------------------------------------------------------------------------- /man/bikedata.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/bikedata-package.R 3 | \docType{package} 4 | \name{bikedata} 5 | \alias{bikedata} 6 | \title{Download and aggregate data from public bicycle hire systems} 7 | \description{ 8 | Download data from all public bicycle hire systems which provide open data, 9 | currently including 10 | \itemize{ 11 | \item Santander Cycles London, U.K. 12 | \item citibike New York City NY, U.S.A. 13 | \item Divvy Chicago IL, U.S.A. 14 | \item Capital BikeShare Washingon DC, U.S.A. 15 | \item Hubway Boston MA, U.S.A. 16 | \item Metro Los Angeles CA, U.S.A. 17 | } 18 | } 19 | \section{Download and store data}{ 20 | 21 | \itemize{ 22 | \item \code{dl_bikedata} Download data for particular cities and dates 23 | \item \code{store_bikedata} Store data in \code{SQLite3} database 24 | } 25 | } 26 | 27 | \section{Sample data for testing package}{ 28 | 29 | \itemize{ 30 | \item \code{bike_test_data} Description of test data included with package 31 | \item \code{bike_write_test_data} Write test data to disk in form precisely 32 | reflecting data provided by all systems 33 | \item \code{bike_rm_test_data} Remove data written to disk with 34 | \code{bike_write_test_data} 35 | } 36 | } 37 | 38 | \section{Functions to aggregate trip data}{ 39 | 40 | \itemize{ 41 | \item \code{bike_daily_trips} Aggregate daily time series of total trips 42 | \item \code{bike_stations} Extract table detailing locations and names of 43 | bicycle docking stations 44 | \item \code{bike_tripmat} Extract aggregate counts of trips between all pairs 45 | of stations within a given city 46 | } 47 | } 48 | 49 | \section{Summary Statistics}{ 50 | 51 | \itemize{ 52 | \item \code{bike_summary_stats} Overall quantitative summary of database 53 | contents. All of the following functions provide individual aspects of this 54 | summary. 55 | \item \code{bike_db_totals} Count total numbers of trips or stations, either 56 | for entire database or a specified city. 57 | \item \code{bike_datelimits} Return dates of first and last trips, either for 58 | entire database or a specified city. 59 | \item \code{bike_demographic_data} Simple table indicating which cities 60 | include demographic parameters with their data 61 | \item \code{bike_latest_files} Check whether files contained in database are 62 | latest published versions 63 | } 64 | } 65 | 66 | \author{ 67 | Mark Padgham 68 | } 69 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: bikedata 2 | Title: Download and Aggregate Data from Public Hire Bicycle Systems 3 | Version: 0.2.5.046 4 | Authors@R: c( 5 | person("Mark", "Padgham", , "mark.padgham@email.com", role = c("aut", "cre"), 6 | comment = c(ORCID = "https://orcid.org/0000-0003-2172-5265")), 7 | person("Richard", "Ellison", role = "aut"), 8 | person("Tom", "Buckley", role = "aut"), 9 | person("Ryszard", "Szymański", , "ryszard.szymanski@outlook.com", role = "ctb"), 10 | person("Bea", "Hernández", role = "rev", 11 | comment = "Bea reviewed the package for ropensci, see https://github.com/ropensci/onboarding/issues/116"), 12 | person("Elaine", "McVey", role = "rev", 13 | comment = "Elaine reviewed the package for ropensci, see https://github.com/ropensci/onboarding/issues/116"), 14 | person(, "SQLite Consortium", role = "ctb", 15 | comment = "Authors of included SQLite code") 16 | ) 17 | Description: Download and aggregate data from all public hire bicycle 18 | systems which provide open data, currently including 'Santander' 19 | Cycles in London, U.K.; from the U.S.A., 'Ford GoBike' in San 20 | Francisco CA, 'citibike' in New York City NY, 'Divvy' in Chicago IL, 21 | 'Capital Bikeshare' in Washington DC, 'Hubway' in Boston MA, 'Metro' 22 | in Los Angeles LA, 'Indego' in Philadelphia PA, and 'Nice Ride' in 23 | Minnesota; 'Bixi' from Montreal, Canada; and 'mibici' from 24 | Guadalajara, Mexico. 25 | License: GPL-3 26 | URL: https://docs.ropensci.org/bikedata/, 27 | https://github.com/ropensci/bikedata 28 | BugReports: https://github.com/ropensci/bikedata/issues 29 | Depends: 30 | R (>= 3.0) 31 | Imports: 32 | brio, 33 | DBI, 34 | httr, 35 | lubridate, 36 | magrittr, 37 | methods, 38 | Rcpp, 39 | readxl, 40 | reshape2, 41 | RSQLite, 42 | tibble, 43 | xml2 44 | Suggests: 45 | dodgr, 46 | knitr, 47 | markdown, 48 | rmarkdown, 49 | testthat 50 | LinkingTo: 51 | BH, 52 | Rcpp 53 | VignetteBuilder: 54 | knitr 55 | Encoding: UTF-8 56 | LazyData: true 57 | NeedsCompilation: yes 58 | RoxygenNote: 7.1.2 59 | SystemRequirements: C++11 60 | X-schema.org-applicationCategory: Data Access 61 | X-schema.org-isPartOf: https://ropensci.org 62 | X-schema.org-keywords: bicycle-hire-systems, bike-hire-systems, bike-hire, 63 | bicycle-hire, database, bike-data 64 | -------------------------------------------------------------------------------- /paper.bib: -------------------------------------------------------------------------------- 1 | @Article{Austwick2013, 2 | Title = {The Structure of Spatial Networks and Communities in Bicycle Sharing Systems}, 3 | Author = {Austwick, Martin Zaltz AND O’Brien, Oliver AND Strano, Emanuele AND Viana, Matheus}, 4 | Journal = {PLoS ONE}, 5 | Year = {2013}, 6 | Month = {09}, 7 | Number = {9}, 8 | Pages = {e74685}, 9 | Volume = {8}, 10 | Doi = {10.1371/journal.pone.0074685} 11 | } 12 | 13 | @Article{Borgnat2011, 14 | Title = {Shared Bicycles in a City: A Signal Processing and Data Analysis Perspective}, 15 | Author = {P. Borgnat and C. Robardet and J.-B. Rouquier and E. Fleury and P. Abry and P. Flandrin}, 16 | Journal = {Advances in Complex Systems}, 17 | Year = {2011}, 18 | Pages = {415--438}, 19 | Volume = {14}, 20 | Doi = {10.1142/S0219525911002950} 21 | } 22 | 23 | @InCollection{Guenther2013, 24 | Title = {Journey Data Based Arrival Forecasting for Bicycle Hire Schemes}, 25 | Author = {Guenther, MarcelC. and Bradley, JeremyT.}, 26 | Booktitle = {Analytical and Stochastic Modeling Techniques and Applications}, 27 | Publisher = {Springer Berlin Heidelberg}, 28 | Year = {2013}, 29 | Editor = {Dudin, Alexander and De Turck, Koen}, 30 | Pages = {214-231}, 31 | Series = {Lecture Notes in Computer Science}, 32 | Volume = {7984}, 33 | Doi = {10.1007/978-3-642-39408-9_16}, 34 | ISBN = {978-3-642-39407-2} 35 | } 36 | 37 | 38 | @Article{Padgham2012, 39 | Title = {Human Movement Is Both Diffusive and Directed}, 40 | Author = {Padgham, , Mark}, 41 | Journal = {PLoS ONE}, 42 | Year = {2012}, 43 | Month = {05}, 44 | Number = {5}, 45 | Pages = {e37754}, 46 | Volume = {7}, 47 | Doi = {10.1371/journal.pone.0037754} 48 | } 49 | 50 | -------------------------------------------------------------------------------- /R/bikedata-package.R: -------------------------------------------------------------------------------- 1 | #' Download and aggregate data from public bicycle hire systems 2 | #' 3 | #' Download data from all public bicycle hire systems which provide open data, 4 | #' currently including 5 | #' \itemize{ 6 | #' \item Santander Cycles London, U.K. 7 | #' \item citibike New York City NY, U.S.A. 8 | #' \item Divvy Chicago IL, U.S.A. 9 | #' \item Capital BikeShare Washingon DC, U.S.A. 10 | #' \item Hubway Boston MA, U.S.A. 11 | #' \item Metro Los Angeles CA, U.S.A. 12 | #' } 13 | #' 14 | #' @section Download and store data: 15 | #' \itemize{ 16 | #' \item \code{dl_bikedata} Download data for particular cities and dates 17 | #' \item \code{store_bikedata} Store data in \code{SQLite3} database 18 | #' } 19 | #' 20 | #' @section Sample data for testing package: 21 | #' \itemize{ 22 | #' \item \code{bike_test_data} Description of test data included with package 23 | #' \item \code{bike_write_test_data} Write test data to disk in form precisely 24 | #' reflecting data provided by all systems 25 | #' \item \code{bike_rm_test_data} Remove data written to disk with 26 | #' \code{bike_write_test_data} 27 | #' } 28 | #' 29 | #' @section Functions to aggregate trip data: 30 | #' \itemize{ 31 | #' \item \code{bike_daily_trips} Aggregate daily time series of total trips 32 | #' \item \code{bike_stations} Extract table detailing locations and names of 33 | #' bicycle docking stations 34 | #' \item \code{bike_tripmat} Extract aggregate counts of trips between all pairs 35 | #' of stations within a given city 36 | #' } 37 | #' 38 | #' @section Summary Statistics: 39 | #' \itemize{ 40 | #' \item \code{bike_summary_stats} Overall quantitative summary of database 41 | #' contents. All of the following functions provide individual aspects of this 42 | #' summary. 43 | #' \item \code{bike_db_totals} Count total numbers of trips or stations, either 44 | #' for entire database or a specified city. 45 | #' \item \code{bike_datelimits} Return dates of first and last trips, either for 46 | #' entire database or a specified city. 47 | #' \item \code{bike_demographic_data} Simple table indicating which cities 48 | #' include demographic parameters with their data 49 | #' \item \code{bike_latest_files} Check whether files contained in database are 50 | #' latest published versions 51 | #' } 52 | #' 53 | #' @name bikedata 54 | #' @docType package 55 | #' @author Mark Padgham 56 | #' @importFrom magrittr %>% 57 | #' @importFrom Rcpp evalCpp 58 | #' @useDynLib bikedata, .registration = TRUE 59 | NULL 60 | -------------------------------------------------------------------------------- /man/bike_daily_trips.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/database-stats.R 3 | \name{bike_daily_trips} 4 | \alias{bike_daily_trips} 5 | \title{Extract daily trip counts for all stations} 6 | \usage{ 7 | bike_daily_trips( 8 | bikedb, 9 | city, 10 | station, 11 | member, 12 | birth_year, 13 | gender, 14 | standardise = FALSE 15 | ) 16 | } 17 | \arguments{ 18 | \item{bikedb}{A string containing the path to the SQLite3 database. 19 | If no directory specified, it is presumed to be in \code{tempdir()}.} 20 | 21 | \item{city}{City for which trips are to be counted - mandatory if database 22 | contains data for more than one city} 23 | 24 | \item{station}{Optional argument specifying bike station for which trips are 25 | to be counted} 26 | 27 | \item{member}{If given, extract only trips by registered members 28 | (\code{member = 1} or \code{TRUE}) or not (\code{member = 0} or 29 | \code{FALSE}).} 30 | 31 | \item{birth_year}{If given, extract only trips by registered members whose 32 | declared birth years equal or lie within the specified value or values.} 33 | 34 | \item{gender}{If given, extract only records for trips by registered 35 | users declaring the specified genders (\code{f/m/.} or \code{2/1/0}).} 36 | 37 | \item{standardise}{If TRUE, daily trip counts are standardised to the 38 | relative numbers of bike stations in operation for each day, so daily trip 39 | counts are increased during (generally early) periods with relatively fewer 40 | stations, and decreased during (generally later) periods with more stations.} 41 | } 42 | \value{ 43 | A \code{data.frame} containing daily dates and total numbers of trips 44 | } 45 | \description{ 46 | Extract daily trip counts for all stations 47 | } 48 | \examples{ 49 | \dontrun{ 50 | bike_write_test_data () # by default in tempdir () 51 | # dl_bikedata (city = "la", data_dir = data_dir) # or some real data! 52 | store_bikedata (data_dir = tempdir (), bikedb = "testdb") 53 | # create database indexes for quicker access: 54 | index_bikedata_db (bikedb = "testdb") 55 | 56 | bike_daily_trips (bikedb = "testdb", city = "ny") 57 | bike_daily_trips (bikedb = "testdb", city = "ny", member = TRUE) 58 | bike_daily_trips (bikedb = "testdb", city = "ny", gender = "f") 59 | bike_daily_trips (bikedb = "testdb", city = "ny", station = "173", 60 | gender = 1) 61 | 62 | bike_rm_test_data () 63 | bike_rm_db ("testdb") 64 | # don't forget to remove real data! 65 | # file.remove (list.files (".", pattern = ".zip")) 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/sqlite3db-utils.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | * Project: bikedata 3 | * File: splite3db-utils.cpp 4 | * Language: C++ 5 | * 6 | * Author: Mark Padgham 7 | * E-Mail: mark.padgham@email.com 8 | * 9 | * Description: Utility functions for interaction with sqlite3 database. 10 | * 11 | * Compiler Options: -std=c++11 12 | ***************************************************************************/ 13 | 14 | #include "sqlite3db-utils.h" 15 | 16 | //' get_max_trip_id 17 | //' 18 | //' @param dbcon Active connection to sqlite3 database 19 | //' 20 | //' @return Maximal database primary ID of trips table 21 | //' 22 | //' @noRd 23 | int db_utils::get_max_trip_id (sqlite3 * dbcon) 24 | { 25 | sqlite3_stmt * stmt; 26 | char qry_id [BUFFER_SIZE] = "\0"; 27 | int rc = sprintf(qry_id, "SELECT MAX(id) FROM trips"); 28 | rc = sqlite3_prepare_v2(dbcon, qry_id, BUFFER_SIZE, &stmt, nullptr); 29 | rc = sqlite3_step (stmt); 30 | int max_trip_id = sqlite3_column_int (stmt, 0); 31 | sqlite3_finalize (stmt); 32 | (void) rc; // supress unused variable warning; 33 | 34 | return max_trip_id; 35 | } 36 | 37 | //' get_max_stn_id 38 | //' 39 | //' @param dbcon Active connection to sqlite3 database 40 | //' 41 | //' @return Maximal database primary ID of station table 42 | //' 43 | //' @noRd 44 | int db_utils::get_max_stn_id (sqlite3 * dbcon) 45 | { 46 | sqlite3_stmt * stmt; 47 | char qry_id [BUFFER_SIZE] = "\0"; 48 | sprintf(qry_id, "SELECT MAX(id) FROM stations"); 49 | int rc = sqlite3_prepare_v2(dbcon, qry_id, BUFFER_SIZE, &stmt, nullptr); 50 | rc = sqlite3_step (stmt); 51 | int max_stn_id = sqlite3_column_int (stmt, 0); 52 | sqlite3_finalize (stmt); 53 | (void) rc; // supress unused variable warning; 54 | 55 | return max_stn_id; 56 | } 57 | 58 | //' get_stn_table_size 59 | //' 60 | //' @param dbcon Active connection to sqlite3 database 61 | //' 62 | //' @return Number of stations in table 63 | //' 64 | //' @noRd 65 | int db_utils::get_stn_table_size (sqlite3 * dbcon) 66 | { 67 | sqlite3_stmt * stmt; 68 | char qry_id [BUFFER_SIZE] = "\0"; 69 | sprintf(qry_id, "SELECT COUNT(*) FROM stations"); 70 | int rc = sqlite3_prepare_v2(dbcon, qry_id, BUFFER_SIZE, &stmt, nullptr); 71 | rc = sqlite3_step (stmt); 72 | int num_stns = sqlite3_column_int (stmt, 0); 73 | sqlite3_finalize (stmt); 74 | (void) rc; // supress unused variable warning; 75 | 76 | return num_stns; 77 | } 78 | -------------------------------------------------------------------------------- /docs/jquery.sticky-kit.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | Sticky-kit v1.1.2 | WTFPL | Leaf Corcoran 2015 | http://leafo.net 3 | */ 4 | (function(){var b,f;b=this.jQuery||window.jQuery;f=b(window);b.fn.stick_in_parent=function(d){var A,w,J,n,B,K,p,q,k,E,t;null==d&&(d={});t=d.sticky_class;B=d.inner_scrolling;E=d.recalc_every;k=d.parent;q=d.offset_top;p=d.spacer;w=d.bottoming;null==q&&(q=0);null==k&&(k=void 0);null==B&&(B=!0);null==t&&(t="is_stuck");A=b(document);null==w&&(w=!0);J=function(a,d,n,C,F,u,r,G){var v,H,m,D,I,c,g,x,y,z,h,l;if(!a.data("sticky_kit")){a.data("sticky_kit",!0);I=A.height();g=a.parent();null!=k&&(g=g.closest(k)); 5 | if(!g.length)throw"failed to find stick parent";v=m=!1;(h=null!=p?p&&a.closest(p):b("
"))&&h.css("position",a.css("position"));x=function(){var c,f,e;if(!G&&(I=A.height(),c=parseInt(g.css("border-top-width"),10),f=parseInt(g.css("padding-top"),10),d=parseInt(g.css("padding-bottom"),10),n=g.offset().top+c+f,C=g.height(),m&&(v=m=!1,null==p&&(a.insertAfter(h),h.detach()),a.css({position:"",top:"",width:"",bottom:""}).removeClass(t),e=!0),F=a.offset().top-(parseInt(a.css("margin-top"),10)||0)-q, 6 | u=a.outerHeight(!0),r=a.css("float"),h&&h.css({width:a.outerWidth(!0),height:u,display:a.css("display"),"vertical-align":a.css("vertical-align"),"float":r}),e))return l()};x();if(u!==C)return D=void 0,c=q,z=E,l=function(){var b,l,e,k;if(!G&&(e=!1,null!=z&&(--z,0>=z&&(z=E,x(),e=!0)),e||A.height()===I||x(),e=f.scrollTop(),null!=D&&(l=e-D),D=e,m?(w&&(k=e+u+c>C+n,v&&!k&&(v=!1,a.css({position:"fixed",bottom:"",top:c}).trigger("sticky_kit:unbottom"))),eb&&!v&&(c-=l,c=Math.max(b-u,c),c=Math.min(q,c),m&&a.css({top:c+"px"})))):e>F&&(m=!0,b={position:"fixed",top:c},b.width="border-box"===a.css("box-sizing")?a.outerWidth()+"px":a.width()+"px",a.css(b).addClass(t),null==p&&(a.after(h),"left"!==r&&"right"!==r||h.append(a)),a.trigger("sticky_kit:stick")),m&&w&&(null==k&&(k=e+u+c>C+n),!v&&k)))return v=!0,"static"===g.css("position")&&g.css({position:"relative"}), 8 | a.css({position:"absolute",bottom:d,top:"auto"}).trigger("sticky_kit:bottom")},y=function(){x();return l()},H=function(){G=!0;f.off("touchmove",l);f.off("scroll",l);f.off("resize",y);b(document.body).off("sticky_kit:recalc",y);a.off("sticky_kit:detach",H);a.removeData("sticky_kit");a.css({position:"",bottom:"",top:"",width:""});g.position("position","");if(m)return null==p&&("left"!==r&&"right"!==r||a.insertAfter(h),h.remove()),a.removeClass(t)},f.on("touchmove",l),f.on("scroll",l),f.on("resize", 9 | y),b(document.body).on("sticky_kit:recalc",y),a.on("sticky_kit:detach",H),setTimeout(l,0)}};n=0;for(K=this.length;n= these. 35 | # To ensure this is failsafe, tests for numbers of stations are simply 36 | # >= 93 + 581 + 456 + 5 + 233 + 700 = 2113 37 | 38 | if (test_all) { 39 | 40 | 41 | test_that ("write and store data", { 42 | bikedb <- file.path (tempdir (), "testdb") 43 | expect_silent (bike_write_test_data (data_dir = tempdir ())) 44 | expect_silent (n <- store_bikedata (data_dir = tempdir (), 45 | bikedb = bikedb, 46 | quiet = TRUE)) 47 | expect_silent (bike_rm_db (bikedb)) 48 | expect_message (n <- store_bikedata (data_dir = tempdir (), 49 | bikedb = bikedb, 50 | quiet = FALSE)) 51 | expect_true (file.exists (bikedb)) 52 | expect_silent (index_bikedata_db (bikedb = bikedb)) 53 | # some windows test machines do not allow file deletion, so 54 | # numbers of lines are incremented with each CRAN matrix 55 | # test. The following is therefore >= rather than just == 56 | #expect_equal (n, 1568) 57 | expect_true (n >= 1568) 58 | }) 59 | 60 | test_that ("stations from downloaded data", { 61 | bikedb <- file.path (tempdir (), "testdb") 62 | st <- bike_stations (bikedb) 63 | expect_true (nrow (st) >= 2000) 64 | }) 65 | 66 | # some windows machines also don"t clean all 13 files up, so this is 67 | # necessary: 68 | test_that ("remove data", { 69 | expect_true (bike_rm_test_data (data_dir = tempdir ()) >= 10) 70 | }) 71 | 72 | bikedb <- file.path (tempdir (), "testdb") 73 | chk <- tryCatch (file.remove (bikedb), 74 | warning = function (w) NULL, 75 | error = function (e) NULL) 76 | } 77 | -------------------------------------------------------------------------------- /docs/pkgdown.js: -------------------------------------------------------------------------------- 1 | /* http://gregfranko.com/blog/jquery-best-practices/ */ 2 | (function($) { 3 | $(function() { 4 | 5 | $("#sidebar") 6 | .stick_in_parent({offset_top: 40}) 7 | .on('sticky_kit:bottom', function(e) { 8 | $(this).parent().css('position', 'static'); 9 | }) 10 | .on('sticky_kit:unbottom', function(e) { 11 | $(this).parent().css('position', 'relative'); 12 | }); 13 | 14 | $('body').scrollspy({ 15 | target: '#sidebar', 16 | offset: 60 17 | }); 18 | 19 | $('[data-toggle="tooltip"]').tooltip(); 20 | 21 | var cur_path = paths(location.pathname); 22 | var links = $("#navbar ul li a"); 23 | var max_length = -1; 24 | var pos = -1; 25 | for (var i = 0; i < links.length; i++) { 26 | if (links[i].getAttribute("href") === "#") 27 | continue; 28 | var path = paths(links[i].pathname); 29 | 30 | var length = prefix_length(cur_path, path); 31 | if (length > max_length) { 32 | max_length = length; 33 | pos = i; 34 | } 35 | } 36 | 37 | // Add class to parent
  • , and enclosing
  • if in dropdown 38 | if (pos >= 0) { 39 | var menu_anchor = $(links[pos]); 40 | menu_anchor.parent().addClass("active"); 41 | menu_anchor.closest("li.dropdown").addClass("active"); 42 | } 43 | }); 44 | 45 | function paths(pathname) { 46 | var pieces = pathname.split("/"); 47 | pieces.shift(); // always starts with / 48 | 49 | var end = pieces[pieces.length - 1]; 50 | if (end === "index.html" || end === "") 51 | pieces.pop(); 52 | return(pieces); 53 | } 54 | 55 | function prefix_length(needle, haystack) { 56 | if (needle.length > haystack.length) 57 | return(0); 58 | 59 | // Special case for length-0 haystack, since for loop won't run 60 | if (haystack.length === 0) { 61 | return(needle.length === 0 ? 1 : 0); 62 | } 63 | 64 | for (var i = 0; i < haystack.length; i++) { 65 | if (needle[i] != haystack[i]) 66 | return(i); 67 | } 68 | 69 | return(haystack.length); 70 | } 71 | 72 | /* Clipboard --------------------------*/ 73 | 74 | function changeTooltipMessage(element, msg) { 75 | var tooltipOriginalTitle=element.getAttribute('data-original-title'); 76 | element.setAttribute('data-original-title', msg); 77 | $(element).tooltip('show'); 78 | element.setAttribute('data-original-title', tooltipOriginalTitle); 79 | } 80 | 81 | if(Clipboard.isSupported()) { 82 | $(document).ready(function() { 83 | var copyButton = ""; 84 | 85 | $(".examples, div.sourceCode").addClass("hasCopyButton"); 86 | 87 | // Insert copy buttons: 88 | $(copyButton).prependTo(".hasCopyButton"); 89 | 90 | // Initialize tooltips: 91 | $('.btn-copy-ex').tooltip({container: 'body'}); 92 | 93 | // Initialize clipboard: 94 | var clipboardBtnCopies = new Clipboard('[data-clipboard-copy]', { 95 | text: function(trigger) { 96 | return trigger.parentNode.textContent; 97 | } 98 | }); 99 | 100 | clipboardBtnCopies.on('success', function(e) { 101 | changeTooltipMessage(e.trigger, 'Copied!'); 102 | e.clearSelection(); 103 | }); 104 | 105 | clipboardBtnCopies.on('error', function() { 106 | changeTooltipMessage(e.trigger,'Press Ctrl+C or Command+C to copy'); 107 | }); 108 | }); 109 | } 110 | })(window.jQuery || window.$) 111 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | 0.2.5 2 | ================== 3 | Minor changes: 4 | - `store_bikedata()` now has additional parameter `latest_lo_stns` that should 5 | generally just be left at default value, but which can be set to `FALSE` for 6 | truly offline use. 7 | - Update bundled version of sqlite3 from 3.28 to 3.30 8 | - minor bug fixes 9 | 10 | 0.2.4 11 | ================== 12 | Back on CRAN after being removed due to dependency (dodgr) having been removed 13 | 14 | 0.2.3 15 | =================== 16 | Minor changes: 17 | - Fix dl_bikedata for Philadelphia 18 | - Improve robustness of tests 19 | 20 | 0.2.2 21 | =================== 22 | Minor changes: 23 | - add NEWS & README to CRAN description 24 | - Minor bug fixes 25 | 26 | 0.2.1 27 | =================== 28 | - New helper function `bike_cities` to directly list cities included in current 29 | package version 30 | 31 | Minor changes: 32 | - Bug fix for San Fran thanks to @tbdv (see issue #78) 33 | - Bug fix for LA (see issue #87) 34 | 35 | 0.2.0 36 | =================== 37 | - Major expansion to include new cities of San Francisco, Minneapolis/St Paul, 38 | Montreal Canada, and Guadalajara Mexico 39 | - Most code restructured to greatly ease the process of adding new cities (see 40 | github wiki for how to do this). 41 | - New co-author: Tom Buckley 42 | 43 | Minor changes: 44 | - Bug fix for LA data which previously caused error due to invisible mac OSX 45 | system files being bundled with the distributed data 46 | - More accurate date processing for quarterly LA data 47 | 48 | 0.1.1 49 | =================== 50 | - Important bug in `dodgr` package rectified previously bug-ridden 51 | `bike_distmat()` calculations (thanks Joris Klingen). 52 | - Files for Washington DC Capital Bike Share system changed from quarterly to 53 | annual dumps 54 | - One rogue .xlsx file from London now processed and read properly (among all 55 | other well-behaved .csv files). 56 | - Update bundled sqlite3: 3.21 -> 3.22 57 | 58 | 59 | 0.1.0 60 | =================== 61 | - New function `bike_distmat()` calculates distance matrices between all pairs 62 | of stations as routed through street networks for each city. 63 | - Helper function `bike_match_matrices()` matches distance and trip matrices by 64 | start and end stations, so they can be directly compared in standard 65 | statistical routines. 66 | - North American Bike Share Association (NABSA) systems (currently LA and 67 | Philly) now distinguish member versus non-member based on whether usage is 68 | 30-day pass or "Walk-up". 69 | 70 | minor changes 71 | - `dl_bikedata()` function also aliased to `download_bikedata()`, so both do the 72 | same job. 73 | - Repeated runs of `store_bikedata()` on pre-existing databases sometimes 74 | re-added old data. This has now been fixed so only new data are added with 75 | each repeated call. 76 | - Dates for NABSA cities (LA and Philadelphia) are given in different formats, 77 | all of which are now appropriately handled. 78 | - Internally bundled sqlite3 upgraded v3.19 -> v3.21 79 | 80 | 81 | 0.0.4 82 | =================== 83 | - Database no longer automatically indexed; rather indexes must be actively 84 | generated with `index_bikedata_db()`. This makes multiple usages of 85 | `store_bikedata()` faster and easier. 86 | - `store_bikedata()` fixed so it only unzips files not already in database (it 87 | used to unzip them all) 88 | - Internal changes to improve consistency (mostly through using the DBI 89 | package). 90 | 91 | 92 | 0.0.3 93 | =================== 94 | - Minor changes only 95 | - More informative messages when data for specified dates not available 96 | 97 | 0.0.2 98 | =================== 99 | - No change to package functionality 100 | - Drop dplyr dependency after dplyr 0.7 upgrade 101 | 102 | 0.0.1 (31 May 2017) 103 | =================== 104 | - Initial CRAN release 105 | -------------------------------------------------------------------------------- /docs/articles/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Articles • bikedata 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 44 | 45 | 46 | 47 | 48 | 49 |
    50 |
    51 | 90 | 91 | 92 |
    93 | 94 |
    95 |
    96 | 99 | 100 |
    101 |

    The bikedata package

    102 |

    103 | 104 | 107 |
    108 |
    109 |
    110 | 111 |
    112 | 115 | 116 |
    117 |

    Site built with pkgdown.

    118 |
    119 | 120 |
    121 |
    122 | 123 | 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /src/RcppExports.cpp: -------------------------------------------------------------------------------- 1 | // Generated by using Rcpp::compileAttributes() -> do not edit by hand 2 | // Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393 3 | 4 | #include 5 | 6 | using namespace Rcpp; 7 | 8 | #ifdef RCPP_USE_GLOBAL_ROSTREAM 9 | Rcpp::Rostream& Rcpp::Rcout = Rcpp::Rcpp_cout_get(); 10 | Rcpp::Rostream& Rcpp::Rcerr = Rcpp::Rcpp_cerr_get(); 11 | #endif 12 | 13 | // rcpp_import_stn_df 14 | int rcpp_import_stn_df(const char * bikedb, Rcpp::DataFrame stn_data, std::string city); 15 | RcppExport SEXP _bikedata_rcpp_import_stn_df(SEXP bikedbSEXP, SEXP stn_dataSEXP, SEXP citySEXP) { 16 | BEGIN_RCPP 17 | Rcpp::RObject rcpp_result_gen; 18 | Rcpp::RNGScope rcpp_rngScope_gen; 19 | Rcpp::traits::input_parameter< const char * >::type bikedb(bikedbSEXP); 20 | Rcpp::traits::input_parameter< Rcpp::DataFrame >::type stn_data(stn_dataSEXP); 21 | Rcpp::traits::input_parameter< std::string >::type city(citySEXP); 22 | rcpp_result_gen = Rcpp::wrap(rcpp_import_stn_df(bikedb, stn_data, city)); 23 | return rcpp_result_gen; 24 | END_RCPP 25 | } 26 | // rcpp_import_to_trip_table 27 | int rcpp_import_to_trip_table(const char* bikedb, Rcpp::CharacterVector datafiles, std::string city, std::string header_file_name, bool data_has_stations, bool quiet); 28 | RcppExport SEXP _bikedata_rcpp_import_to_trip_table(SEXP bikedbSEXP, SEXP datafilesSEXP, SEXP citySEXP, SEXP header_file_nameSEXP, SEXP data_has_stationsSEXP, SEXP quietSEXP) { 29 | BEGIN_RCPP 30 | Rcpp::RObject rcpp_result_gen; 31 | Rcpp::RNGScope rcpp_rngScope_gen; 32 | Rcpp::traits::input_parameter< const char* >::type bikedb(bikedbSEXP); 33 | Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type datafiles(datafilesSEXP); 34 | Rcpp::traits::input_parameter< std::string >::type city(citySEXP); 35 | Rcpp::traits::input_parameter< std::string >::type header_file_name(header_file_nameSEXP); 36 | Rcpp::traits::input_parameter< bool >::type data_has_stations(data_has_stationsSEXP); 37 | Rcpp::traits::input_parameter< bool >::type quiet(quietSEXP); 38 | rcpp_result_gen = Rcpp::wrap(rcpp_import_to_trip_table(bikedb, datafiles, city, header_file_name, data_has_stations, quiet)); 39 | return rcpp_result_gen; 40 | END_RCPP 41 | } 42 | // rcpp_import_to_file_table 43 | int rcpp_import_to_file_table(const char * bikedb, Rcpp::CharacterVector datafiles, std::string city, int nfiles); 44 | RcppExport SEXP _bikedata_rcpp_import_to_file_table(SEXP bikedbSEXP, SEXP datafilesSEXP, SEXP citySEXP, SEXP nfilesSEXP) { 45 | BEGIN_RCPP 46 | Rcpp::RObject rcpp_result_gen; 47 | Rcpp::RNGScope rcpp_rngScope_gen; 48 | Rcpp::traits::input_parameter< const char * >::type bikedb(bikedbSEXP); 49 | Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type datafiles(datafilesSEXP); 50 | Rcpp::traits::input_parameter< std::string >::type city(citySEXP); 51 | Rcpp::traits::input_parameter< int >::type nfiles(nfilesSEXP); 52 | rcpp_result_gen = Rcpp::wrap(rcpp_import_to_file_table(bikedb, datafiles, city, nfiles)); 53 | return rcpp_result_gen; 54 | END_RCPP 55 | } 56 | // rcpp_create_sqlite3_db 57 | int rcpp_create_sqlite3_db(const char * bikedb); 58 | RcppExport SEXP _bikedata_rcpp_create_sqlite3_db(SEXP bikedbSEXP) { 59 | BEGIN_RCPP 60 | Rcpp::RObject rcpp_result_gen; 61 | Rcpp::RNGScope rcpp_rngScope_gen; 62 | Rcpp::traits::input_parameter< const char * >::type bikedb(bikedbSEXP); 63 | rcpp_result_gen = Rcpp::wrap(rcpp_create_sqlite3_db(bikedb)); 64 | return rcpp_result_gen; 65 | END_RCPP 66 | } 67 | // rcpp_create_db_indexes 68 | int rcpp_create_db_indexes(const char* bikedb, Rcpp::CharacterVector tables, Rcpp::CharacterVector cols, bool reindex); 69 | RcppExport SEXP _bikedata_rcpp_create_db_indexes(SEXP bikedbSEXP, SEXP tablesSEXP, SEXP colsSEXP, SEXP reindexSEXP) { 70 | BEGIN_RCPP 71 | Rcpp::RObject rcpp_result_gen; 72 | Rcpp::RNGScope rcpp_rngScope_gen; 73 | Rcpp::traits::input_parameter< const char* >::type bikedb(bikedbSEXP); 74 | Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type tables(tablesSEXP); 75 | Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type cols(colsSEXP); 76 | Rcpp::traits::input_parameter< bool >::type reindex(reindexSEXP); 77 | rcpp_result_gen = Rcpp::wrap(rcpp_create_db_indexes(bikedb, tables, cols, reindex)); 78 | return rcpp_result_gen; 79 | END_RCPP 80 | } 81 | // rcpp_create_city_index 82 | int rcpp_create_city_index(const char* bikedb, bool reindex); 83 | RcppExport SEXP _bikedata_rcpp_create_city_index(SEXP bikedbSEXP, SEXP reindexSEXP) { 84 | BEGIN_RCPP 85 | Rcpp::RObject rcpp_result_gen; 86 | Rcpp::RNGScope rcpp_rngScope_gen; 87 | Rcpp::traits::input_parameter< const char* >::type bikedb(bikedbSEXP); 88 | Rcpp::traits::input_parameter< bool >::type reindex(reindexSEXP); 89 | rcpp_result_gen = Rcpp::wrap(rcpp_create_city_index(bikedb, reindex)); 90 | return rcpp_result_gen; 91 | END_RCPP 92 | } 93 | -------------------------------------------------------------------------------- /tests/make_test_data.Rmd: -------------------------------------------------------------------------------- 1 | # Script to assemble the `bike_test_data.rda` file 2 | 3 | ```{r} 4 | devtools::load_all (".", export_all = FALSE) 5 | library (magrittr) 6 | data_dir <- tempdir () 7 | nrows <- 200 # number of rows to read from each file 8 | 9 | names (bike_test_data) 10 | head (bike_test_data$la) 11 | 12 | # ----- DC ----- 13 | dl_bikedata (city = "dc", data_dir = data_dir, dates = 201701) 14 | f <- list.files (tempdir ()) 15 | f <- file.path (data_dir, f [grep ("capitalbikeshare", f)]) 16 | fi <- unzip (f, list = TRUE)$Name 17 | unzip (f, files = fi [1], exdir = data_dir, junkpaths = TRUE) 18 | dc <- read.csv (file.path (data_dir, fi [1]), header = TRUE, nrows = nrows) 19 | 20 | # ----- LO ----- 21 | dl_bikedata (city = "lo", data_dir = data_dir, dates = 201601) 22 | f <- list.files (tempdir ()) 23 | f <- file.path (data_dir, f [grep ("JourneyDataExtract", f)]) 24 | lo <- read.csv (f [1], header = TRUE, nrows = nrows) 25 | 26 | # ----- BO ----- 27 | # These 3 different time periods have 3 different formats 28 | dl_bikedata (city = "bo", data_dir = data_dir, dates = 2012) 29 | dl_bikedata (city = "bo", data_dir = data_dir, dates = 201701) 30 | dl_bikedata (city = "bo", data_dir = data_dir, dates = 201801) 31 | f <- list.files (tempdir ()) 32 | f <- file.path (data_dir, f [grep ("hubway", f)]) 33 | # grepping all at once doesn't put them in this order: 34 | f <- c (f [grep ("2012", f)], f [grep ("2017", f)], f [grep ("2018", f)]) 35 | bo12 <- read.csv (f [1], header = TRUE, nrows = nrows) 36 | fi <- unzip (f [2], list = TRUE)$Name 37 | unzip (f [2], files = fi, exdir = data_dir, junkpaths = TRUE) 38 | bo17 <- read.csv (file.path (data_dir, fi), header = TRUE, nrows = nrows) 39 | fi <- unzip (f [3], list = TRUE)$Name 40 | unzip (f [3], files = fi, exdir = data_dir, junkpaths = TRUE) 41 | bo18 <- read.csv (file.path (data_dir, fi), header = TRUE, nrows = nrows) 42 | # stations also need to be downloaded 43 | dl_files <- bikedata:::get_bike_files (city = 'bo') 44 | dl_files <- dl_files [which (grepl ('Stations', dl_files))] 45 | for (f in dl_files) 46 | { 47 | furl <- gsub (" ", "%20", f) 48 | f <- gsub (" ", "", f) 49 | destfile <- file.path (data_dir, basename(f)) 50 | resp <- httr::GET (furl, httr::write_disk (destfile, overwrite = TRUE)) 51 | } 52 | f <- list.files (tempdir ()) 53 | f <- file.path (data_dir, f [grep ("hubway_stations", f, ignore.case = TRUE)]) 54 | bo_st1 <- read.csv (f [1], header = TRUE) 55 | bo_st2 <- read.csv (f [2], header = TRUE) 56 | 57 | # ----- NY ----- 58 | dl_bikedata (city = "ny", data_dir = data_dir, dates = 201612) 59 | f <- list.files (tempdir ()) 60 | f <- file.path (data_dir, f [grep ("^201612-citibike", f)]) 61 | fi <- unzip (f, list = TRUE)$Name 62 | unzip (f, files = fi [1], exdir = data_dir, junkpaths = TRUE) 63 | ny <- read.csv (file.path (data_dir, fi [1]), header = TRUE, nrows = nrows) 64 | 65 | # ----- CH ----- 66 | dl_bikedata (city = "ch", data_dir = data_dir, dates = 201612) 67 | f <- list.files (tempdir ()) 68 | f <- file.path (data_dir, f [grep ("Divvy", f)]) 69 | fi <- unzip (f, list = TRUE)$Name 70 | fitr <- fi [grep ("Trips_2016_Q4", fi)] 71 | fist <- fi [grep ("Stations_2016_Q4", fi)] 72 | unzip (f, files = c (fitr, fist), exdir = data_dir, junkpaths = TRUE) 73 | ch_tr <- read.csv (file.path (data_dir, fitr), header = TRUE, nrows = nrows) 74 | ch_st <- read.csv (file.path (data_dir, fist), header = TRUE) 75 | 76 | # ----- LA ----- 77 | dl_bikedata (city = "la", data_dir = data_dir, dates = 201701) 78 | f <- list.files (tempdir ()) 79 | f <- file.path (data_dir, f [grep ("la_metro", f)]) 80 | fi <- unzip (f, list = TRUE)$Name 81 | unzip (f, files = fi, exdir = data_dir, junkpaths = TRUE) 82 | la <- read.csv (file.path (data_dir, fi), header = TRUE, nrows = nrows) 83 | 84 | # ----- MN ----- 85 | # data have to be pre-downloaded 86 | mn_dir <- "/data/data/bikes/mn" 87 | f <- list.files (mn_dir, full.names = TRUE) [3] # random file for 2012 88 | fi <- unzip (f, list = TRUE)$Name 89 | fitr <- fi [grepl ("trip", fi)] 90 | fist <- fi [grepl ("station", fi)] 91 | unzip (f, files = c (fitr, fist), exdir = data_dir, junkpaths = TRUE) 92 | mn_tr <- read.csv (file.path (data_dir, basename (fitr)), 93 | header = TRUE, nrows = nrows) 94 | mn_st <- read.csv (file.path (data_dir, basename (fist)), 95 | header = TRUE) 96 | 97 | bike_test_data <- list (dc = dc, 98 | lo = lo, 99 | bo12 = bo12, 100 | bo17 = bo17, 101 | bo18 = bo18, 102 | bo_st1 = bo_st1, 103 | bo_st2 = bo_st2, 104 | ny = ny, 105 | ch_tr = ch_tr, 106 | ch_st = ch_st, 107 | la = la, 108 | mn_tr = mn_tr, 109 | mn_st = mn_st) 110 | save (bike_test_data, file = "./data/bike_test_data.rda", compress = "xz") 111 | ``` 112 | -------------------------------------------------------------------------------- /docs/pkgdown.css: -------------------------------------------------------------------------------- 1 | /* Sticky footer */ 2 | 3 | /** 4 | * Basic idea: https://philipwalton.github.io/solved-by-flexbox/demos/sticky-footer/ 5 | * Details: https://github.com/philipwalton/solved-by-flexbox/blob/master/assets/css/components/site.css 6 | * 7 | * .Site -> body > .container 8 | * .Site-content -> body > .container .row 9 | * .footer -> footer 10 | * 11 | * Key idea seems to be to ensure that .container and __all its parents__ 12 | * have height set to 100% 13 | * 14 | */ 15 | 16 | html, body { 17 | height: 100%; 18 | } 19 | 20 | body > .container { 21 | display: flex; 22 | height: 100%; 23 | flex-direction: column; 24 | 25 | padding-top: 60px; 26 | } 27 | 28 | body > .container .row { 29 | flex: 1 0 auto; 30 | } 31 | 32 | footer { 33 | margin-top: 45px; 34 | padding: 35px 0 36px; 35 | border-top: 1px solid #e5e5e5; 36 | color: #666; 37 | display: flex; 38 | flex-shrink: 0; 39 | } 40 | footer p { 41 | margin-bottom: 0; 42 | } 43 | footer div { 44 | flex: 1; 45 | } 46 | footer .pkgdown { 47 | text-align: right; 48 | } 49 | footer p { 50 | margin-bottom: 0; 51 | } 52 | 53 | img.icon { 54 | float: right; 55 | } 56 | 57 | img { 58 | max-width: 100%; 59 | } 60 | 61 | /* Typographic tweaking ---------------------------------*/ 62 | 63 | .contents h1.page-header { 64 | margin-top: calc(-60px + 1em); 65 | } 66 | 67 | /* Section anchors ---------------------------------*/ 68 | 69 | a.anchor { 70 | margin-left: -30px; 71 | display:inline-block; 72 | width: 30px; 73 | height: 30px; 74 | visibility: hidden; 75 | 76 | background-image: url(./link.svg); 77 | background-repeat: no-repeat; 78 | background-size: 20px 20px; 79 | background-position: center center; 80 | } 81 | 82 | .hasAnchor:hover a.anchor { 83 | visibility: visible; 84 | } 85 | 86 | @media (max-width: 767px) { 87 | .hasAnchor:hover a.anchor { 88 | visibility: hidden; 89 | } 90 | } 91 | 92 | 93 | /* Fixes for fixed navbar --------------------------*/ 94 | 95 | .contents h1, .contents h2, .contents h3, .contents h4 { 96 | padding-top: 60px; 97 | margin-top: -40px; 98 | } 99 | 100 | /* Static header placement on mobile devices */ 101 | @media (max-width: 767px) { 102 | .navbar-fixed-top { 103 | position: absolute; 104 | } 105 | .navbar { 106 | padding: 0; 107 | } 108 | } 109 | 110 | 111 | /* Sidebar --------------------------*/ 112 | 113 | #sidebar { 114 | margin-top: 30px; 115 | } 116 | #sidebar h2 { 117 | font-size: 1.5em; 118 | margin-top: 1em; 119 | } 120 | 121 | #sidebar h2:first-child { 122 | margin-top: 0; 123 | } 124 | 125 | #sidebar .list-unstyled li { 126 | margin-bottom: 0.5em; 127 | } 128 | 129 | .orcid { 130 | height: 16px; 131 | vertical-align: middle; 132 | } 133 | 134 | /* Reference index & topics ----------------------------------------------- */ 135 | 136 | .ref-index th {font-weight: normal;} 137 | 138 | .ref-index td {vertical-align: top;} 139 | .ref-index .alias {width: 40%;} 140 | .ref-index .title {width: 60%;} 141 | 142 | .ref-index .alias {width: 40%;} 143 | .ref-index .title {width: 60%;} 144 | 145 | .ref-arguments th {text-align: right; padding-right: 10px;} 146 | .ref-arguments th, .ref-arguments td {vertical-align: top;} 147 | .ref-arguments .name {width: 20%;} 148 | .ref-arguments .desc {width: 80%;} 149 | 150 | /* Nice scrolling for wide elements --------------------------------------- */ 151 | 152 | table { 153 | display: block; 154 | overflow: auto; 155 | } 156 | 157 | /* Syntax highlighting ---------------------------------------------------- */ 158 | 159 | pre { 160 | word-wrap: normal; 161 | word-break: normal; 162 | border: 1px solid #eee; 163 | } 164 | 165 | pre, code { 166 | background-color: #f8f8f8; 167 | color: #333; 168 | } 169 | 170 | pre code { 171 | overflow: auto; 172 | word-wrap: normal; 173 | white-space: pre; 174 | } 175 | 176 | pre .img { 177 | margin: 5px 0; 178 | } 179 | 180 | pre .img img { 181 | background-color: #fff; 182 | display: block; 183 | height: auto; 184 | } 185 | 186 | code a, pre a { 187 | color: #375f84; 188 | } 189 | 190 | a.sourceLine:hover { 191 | text-decoration: none; 192 | } 193 | 194 | .fl {color: #1514b5;} 195 | .fu {color: #000000;} /* function */ 196 | .ch,.st {color: #036a07;} /* string */ 197 | .kw {color: #264D66;} /* keyword */ 198 | .co {color: #888888;} /* comment */ 199 | 200 | .message { color: black; font-weight: bolder;} 201 | .error { color: orange; font-weight: bolder;} 202 | .warning { color: #6A0366; font-weight: bolder;} 203 | 204 | /* Clipboard --------------------------*/ 205 | 206 | .hasCopyButton { 207 | position: relative; 208 | } 209 | 210 | .btn-copy-ex { 211 | position: absolute; 212 | right: 0; 213 | top: 0; 214 | visibility: hidden; 215 | } 216 | 217 | .hasCopyButton:hover button.btn-copy-ex { 218 | visibility: visible; 219 | } 220 | 221 | /* mark.js ----------------------------*/ 222 | 223 | mark { 224 | background-color: rgba(255, 255, 51, 0.5); 225 | border-bottom: 2px solid rgba(255, 153, 51, 0.3); 226 | padding: 1px; 227 | } 228 | 229 | /* vertical spacing after htmlwidgets */ 230 | .html-widget { 231 | margin-bottom: 10px; 232 | } 233 | -------------------------------------------------------------------------------- /man/bike_tripmat.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/tripmat.R 3 | \name{bike_tripmat} 4 | \alias{bike_tripmat} 5 | \title{Extract station-to-station trip matrix or data.frame from SQLite3 database} 6 | \usage{ 7 | bike_tripmat( 8 | bikedb, 9 | city, 10 | start_date, 11 | end_date, 12 | start_time, 13 | end_time, 14 | weekday, 15 | member, 16 | birth_year, 17 | gender, 18 | standardise = FALSE, 19 | long = FALSE, 20 | quiet = FALSE 21 | ) 22 | } 23 | \arguments{ 24 | \item{bikedb}{A string containing the path to the SQLite3 database. 25 | If no directory specified, it is presumed to be in \code{tempdir()}.} 26 | 27 | \item{city}{City for which tripmat is to be aggregated} 28 | 29 | \item{start_date}{If given (as year, month, day) , extract only those records 30 | from and including this date} 31 | 32 | \item{end_date}{If given (as year, month, day), extract only those records to 33 | and including this date} 34 | 35 | \item{start_time}{If given, extract only those records starting from and 36 | including this time of each day} 37 | 38 | \item{end_time}{If given, extract only those records ending at and including 39 | this time of each day} 40 | 41 | \item{weekday}{If given, extract only those records including the nominated 42 | weekdays. This can be a vector of numeric, starting with Sunday=1, or 43 | unambiguous characters, so "sa" and "tu" for Saturday and Tuesday.} 44 | 45 | \item{member}{If given, extract only trips by registered members 46 | (\code{member = 1} or \code{TRUE}) or not (\code{member = 0} or 47 | \code{FALSE}).} 48 | 49 | \item{birth_year}{If given, extract only trips by registered members whose 50 | declared birth years equal or lie within the specified value or values.} 51 | 52 | \item{gender}{If given, extract only records for trips by registered 53 | users declaring the specified genders (\code{f/m/.} or \code{2/1/0}).} 54 | 55 | \item{standardise}{If TRUE, numbers of trips are standardised to the 56 | operating durations of each stations, so trip numbers are increased for 57 | stations that have only operated a short time, and vice versa.} 58 | 59 | \item{long}{If FALSE, a square tripmat of (num-stations, num_stations) is 60 | returned; if TRUE, a long-format matrix of (stn-from, stn-to, ntrips) is 61 | returned.} 62 | 63 | \item{quiet}{If FALSE, progress is displayed on screen} 64 | } 65 | \value{ 66 | If \code{long = FALSE}, a square matrix of numbers of trips between 67 | each station, otherwise a long-form \pkg{tibble} with three columns of of 68 | (\code{start_station_id, end_station_id, numtrips}). 69 | } 70 | \description{ 71 | Extract station-to-station trip matrix or data.frame from SQLite3 database 72 | } 73 | \note{ 74 | The \code{city} parameter should be given for databases containing data 75 | from multiple cities, otherwise most of the resultant trip matrix is likely 76 | to be empty. Both dates and times may be given either in numeric or 77 | character format, with arbitrary (or no) delimiters between fields. Single 78 | numeric times are interpreted as hours, with 24 interpreted as day's end at 79 | 23:59:59. 80 | 81 | If \code{standardise = TRUE}, the trip matrix will have the same number 82 | of trips, but they will be re-distributed as described, with more recent 83 | stations having more trips than older stations. Trip number are also 84 | non-integer in this case, whereas they are always integer-valued for 85 | \code{standardise = FALSE}. 86 | } 87 | \examples{ 88 | \dontrun{ 89 | data_dir <- tempdir () 90 | bike_write_test_data (data_dir = data_dir) 91 | # or download some real data! 92 | # dl_bikedata (city = "la", data_dir = data_dir) 93 | bikedb <- file.path (data_dir, "testdb") 94 | store_bikedata (data_dir = data_dir, bikedb = bikedb) 95 | # create database indexes for quicker access: 96 | index_bikedata_db (bikedb = bikedb) 97 | 98 | 99 | tm <- bike_tripmat (bikedb = bikedb, city = "ny") # full trip matrix 100 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", 101 | start_date = 20161201, end_date = 20161201) 102 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", start_time = 1) 103 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", start_time = "01:00") 104 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", end_time = "01:00") 105 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", 106 | start_date = 20161201, start_time = 1) 107 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", start_date = 20161201, 108 | end_date = 20161201, start_time = 1, end_time = 2) 109 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", weekday = 5) 110 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", 111 | weekday = c("f", "sa", "th")) 112 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", 113 | weekday = c("f", "th", "sa")) 114 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", member = 1) 115 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", birth_year = 1976) 116 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", birth_year = 1976:1990) 117 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", gender = "f") 118 | tm <- bike_tripmat (bikedb = bikedb, city = "ny", 119 | gender = "m", birth_year = 1976:1990) 120 | 121 | bike_rm_test_data (data_dir = data_dir) 122 | bike_rm_db (bikedb) 123 | # don't forget to remove real data! 124 | # file.remove (list.files (data_dir, pattern = ".zip")) 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /data-raw/sysdata.Rmd: -------------------------------------------------------------------------------- 1 | # The sysdata.rda object 2 | 3 | ## DC station locations 4 | 5 | The function `R/stations.R/bike_get_dc_stations` has code to extract and process 6 | DC stations. The data can be obtained from 7 | http://opendata.dc.gov/datasets/capital-bike-share-locations/, using 8 | Download->Spreadsheet. The code is reproduced here 9 | ```{r} 10 | stations_dc <- read.csv ("Capital_Bike_Share_Locations.csv") 11 | names (stations_dc) <- tolower (names (stations_dc)) 12 | name <- noquote (gsub ("'", "", stations_dc$address)) #nolint 13 | name <- trimws (name, which = 'right') # trim terminal white space 14 | stations_dc <- data.frame (id = stations_dc$terminal_number, 15 | name = name, 16 | lon = stations_dc$longitude, 17 | lat = stations_dc$latitude, 18 | stringsAsFactors = FALSE) 19 | ``` 20 | 21 | ## Bike Header Field Names 22 | 23 | The fields stored in the `bikedata` database are: 24 | 25 | | number | field | 26 | | ---- | ----------------------- | 27 | | 1 | duration | 28 | | 2 | start_time | 29 | | 3 | end_time | 30 | | 4 | start_station_id | 31 | | 5 | start_station_name | 32 | | 6 | start_station_latitude | 33 | | 7 | start_station_longitude | 34 | | 8 | end_station_id | 35 | | 9 | end_station_name | 36 | | 10 | end_station_latitude | 37 | | 11 | end_station_longitude | 38 | | 12 | bike_id | 39 | | 13 | user_type | 40 | | 14 | birth_year | 41 | | 15 | gender | 42 | 43 | Each file has at least some of these fields, but different systems naturally use 44 | different nomenclatures. The `header_names` structure maps different system 45 | names for these fields onto the above names. All names are converted to lower 46 | case and all white space and underscores removed, so entries here should be all 47 | lower case with no white space. 48 | 49 | old DC files had "Duration (ms)", but no longer do. 50 | LA has "passholder_type", which can be "Flex Pass" = annual, or "Monthly Pass" 51 | PH has "passholder_type", which can be "IndegoFlex" or "Indego30" 52 | 53 | Note that an extra city column is needed because LA has "start_station" and 54 | "end_station" for the ID columns, while MN has these for the station name 55 | columns. 56 | 57 | ```{r} 58 | fields <- c ("duration", "starttime", "endtime", "startstationid", 59 | "startstationname", "startstationlatitude", 60 | "startstationlongitude", "endstationid", "endstationname", 61 | "endstationlatitude", "endstationlongitude", "bikeid", 62 | "usertype", "birthyear", "gender") 63 | 64 | duration <- c ("duration", "tripduration", "totalduration", "durationsec", 65 | "durationseconds", "totalduration(ms)") 66 | 67 | starttime <- c ("starttime", "startdate", "iniciodelviaje") 68 | endtime <- c ("endtime", "enddate", "stoptime", "findelviaje") 69 | 70 | startstationid <- c ("startstationid", "startstationnumber", "fromstationid", 71 | "startterminal", "startstation", "startstationcode", 72 | "origenid") 73 | startstationname <- c ("startstationname", "fromstationname", "startstation") 74 | startstationlatitude <- c ("startstationlatitude", "startlat") 75 | startstationlongitude <- c ("startstationlongitude", "startlon") 76 | 77 | endstationid <- c ("endstationid", "endstationnumber", "tostationid", 78 | "endstation", "endterminal", "endstationcode", 79 | "destinoid") 80 | endstationname <- c ("endstationname", "tostationname", "endstation") 81 | endstationlatitude <- c ("endstationlatitude", "endlat") 82 | endstationlongitude <- c ("endstationlongitude", "endlon") 83 | 84 | bikeid <- c ("bikeid", "bikenumber", "bike#") 85 | usertype <- c ("usertype", "membertype", "type", "subscribertype", 86 | "subscriptiontype", "accounttype", "passholdertype", 87 | "ismember", "usuarioid") 88 | birthyear <- c ("birthyear", "birthday", "memberbirthyear", 89 | "edad","anodenacimento") 90 | gender <- c ("gender", "membergender", "genero") 91 | 92 | field_names <- data.frame (matrix (nrow = 0, ncol = 2)) 93 | for (f in fields) 94 | { 95 | field_names <- rbind (field_names, 96 | cbind (rep (f, length (get (f))), get (f))) 97 | } 98 | names (field_names) <- c ("field", "variation") 99 | field_names$index <- field_names$field 100 | levels (field_names$index) <- seq (unique (field_names$index)) 101 | field_names$index <- as.numeric (field_names$index) 102 | 103 | field_names$city <- "all" 104 | field_names$city [field_names$field == "startstationid" & 105 | field_names$variation == "startstation"] <- "la" 106 | field_names$city [field_names$field == "endstationname" & 107 | field_names$variation == "endstation"] <- "mn" 108 | field_names$city [field_names$field == "startstationname" & 109 | field_names$variation == "startstation"] <- "mn" 110 | field_names$city [field_names$field == "endstationid" & 111 | field_names$variation == "endstation"] <- "la" 112 | ``` 113 | 114 | And this then saves the correponding `data.frame` to the package data: 115 | ```{r} 116 | data_dir <- file.path (here::here (), "R") 117 | f <- file.path (data_dir, "sysdata.rda") 118 | load ("./R/sysdata.rda") 119 | stations_dc <- sysdata$stations_dc # comment out to refresh using above code 120 | sysdata <- list (stations_dc = stations_dc, field_names = field_names) 121 | save (sysdata, file = f, compress = "xz") 122 | ``` 123 | -------------------------------------------------------------------------------- /docs/CONDUCT.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Contributor Code of Conduct • bikedata 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 44 | 45 | 46 | 47 | 48 | 49 |
    50 |
    51 | 90 | 91 | 92 |
    93 | 94 |
    95 |
    96 | 99 | 100 |
    101 | 102 |

    As contributors and maintainers of this project, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities.

    103 |

    We are committed to making participation in this project a harassment-free experience for everyone, regardless of level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, or religion.

    104 |

    Examples of unacceptable behavior by participants include the use of sexual language or imagery, derogatory comments or personal attacks, trolling, public or private harassment, insults, or other unprofessional conduct.

    105 |

    Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct. Project maintainers who do not follow the Code of Conduct may be removed from the project team.

    106 |

    Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by opening an issue or contacting one or more of the project maintainers.

    107 |

    This Code of Conduct is adapted from the Contributor Covenant (http:contributor-covenant.org), version 1.0.0, available at http://contributor-covenant.org/version/1/0/0/

    108 |
    109 | 110 |
    111 | 112 |
    113 | 114 | 115 |
    116 | 119 | 120 |
    121 |

    Site built with pkgdown.

    122 |
    123 | 124 |
    125 |
    126 | 127 | 128 | 129 | 130 | 131 | 132 | -------------------------------------------------------------------------------- /docs/reference/bike_test_data.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Test data for all 6 cities — bike_test_data • bikedata 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 48 | 49 | 50 | 51 | 52 | 53 |
    54 |
    55 | 94 | 95 | 96 |
    97 | 98 |
    99 |
    100 | 105 | 106 |
    107 | 108 |

    A data set containing for each of the six cities a data.frame object 109 | of 200 trips.

    110 | 111 |
    112 | 113 |
    bike_test_data
    114 | 115 |

    Format

    116 | 117 |

    A list of one data frame for each of the five cities of (bo, dc, la, 118 | lo, ny), plus two more for chicago stations and trips (ch_st, ch_tr). Each of 119 | these (except 'ch_st') contains 200 representative trips.

    120 | 121 |

    Note

    122 | 123 |

    These data are only used to convert to .zip-compressed files 124 | using bike_write_test_data(). These .zip files can be 125 | subsequently read into an SQLite3 database using store_bikedata.

    126 | 127 | 128 |
    129 | 139 |
    140 | 141 |
    142 | 145 | 146 |
    147 |

    Site built with pkgdown.

    148 |
    149 | 150 |
    151 |
    152 | 153 | 154 | 155 | 156 | 157 | 158 | -------------------------------------------------------------------------------- /docs/authors.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Citation and Authors • bikedata 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 44 | 45 | 46 | 47 | 48 | 49 |
    50 |
    51 | 90 | 91 | 92 |
    93 | 94 |
    95 |
    96 | 99 | 100 |

    Padgham M, Ellison R (2017). 101 | “bikedata.” 102 | The Journal of Open Source Software, 2(20). 103 | doi: 10.21105/joss.00471, https://doi.org/10.21105/joss.00471. 104 |

    105 |
    @Article{,
    106 |   title = {bikedata},
    107 |   author = {Mark Padgham and Richard Ellison},
    108 |   journal = {The Journal of Open Source Software},
    109 |   year = {2017},
    110 |   volume = {2},
    111 |   number = {20},
    112 |   month = {Dec},
    113 |   publisher = {The Open Journal},
    114 |   url = {https://doi.org/10.21105/joss.00471},
    115 |   doi = {10.21105/joss.00471},
    116 | }
    117 | 120 | 121 |
      122 |
    • 123 |

      Mark Padgham. Author, maintainer. 124 |

      125 |
    • 126 |
    • 127 |

      Richard Ellison. Author. 128 |

      129 |
    • 130 |
    • 131 |

      Tom Buckley. Author. 132 |

      133 |
    • 134 |
    • 135 |

      Bea Hernández. Reviewer. 136 |
      Bea reviewed the package for ropensci, see https://github.com/ropensci/onboarding/issues/116

      137 |
    • 138 |
    • 139 |

      Elaine McVey. Reviewer. 140 |
      Elaine reviewed the package for ropensci, see https://github.com/ropensci/onboarding/issues/116

      141 |
    • 142 |
    • 143 |

      SQLite Consortium. Contributor. 144 |
      Authors of included SQLite code

      145 |
    • 146 |
    147 | 148 |
    149 | 150 |
    151 | 152 | 153 |
    154 | 157 | 158 |
    159 |

    Site built with pkgdown.

    160 |
    161 | 162 |
    163 |
    164 | 165 | 166 | 167 | 168 | 169 | 170 | -------------------------------------------------------------------------------- /R/distmat.R: -------------------------------------------------------------------------------- 1 | #' Extract station-to-station distance matrix 2 | #' 3 | #' @param bikedb A string containing the path to the SQLite3 database. 4 | #' If no directory specified, it is presumed to be in \code{tempdir()}. 5 | #' @param city City for which tripmat is to be aggregated 6 | #' @param expand Distances are calculated by routing through the OpenStreetMap 7 | #' street network surrounding the bike stations, with the street network 8 | #' expanded by this amount to ensure all stations can be connected. 9 | #' @param long If FALSE, a square distance matrix of (num-stations, 10 | #' num_stations) is returned; if TRUE, a long-format matrix of (stn-from, 11 | #' stn-to, distance) is returned. 12 | #' @param quiet If FALSE, progress is displayed on screen 13 | #' 14 | #' @return If \code{long = FALSE}, a square matrix of numbers of trips between 15 | #' each station, otherwise a long-form \pkg{tibble} with three columns of of 16 | #' (start_station_id, end_station_id, distance) 17 | #' 18 | #' @note Distance matrices returned from \code{bike_distamat} use all stations 19 | #' listed for a given system, while trip matrices extracted with 20 | #' \link{bike_tripmat} will often have fewer stations because operational 21 | #' station numbers commonly vary over time. The two matrices may be reconciled 22 | #' with the \code{match_trips2dists} function, enabling then to be directly 23 | #' compared. 24 | #' 25 | #' @export 26 | bike_distmat <- function (bikedb, city, expand = 0.5, 27 | long = FALSE, quiet = TRUE) { 28 | 29 | if (missing (bikedb)) 30 | stop ("Can't get trip matrix if bikedb isn't provided") 31 | 32 | requireNamespace ("dodgr") 33 | 34 | bikedb <- check_db_arg (bikedb = bikedb) 35 | city <- check_city_arg (bikedb = bikedb, city = city) 36 | stns <- bike_stations (bikedb = bikedb, city = city) 37 | cols <- c ("longitude", "latitude", "stn_id") 38 | xy <- stns [, which (names (stns) %in% cols)] %>% 39 | remove_xy_outliers () 40 | stn_id <- xy$stn_id # names for matrix 41 | xy <- xy [, which (names (xy) %in% cols [1:2])] # remove ID 42 | dmat <- dodgr::dodgr_dists (from = xy, to = xy, quiet = quiet) 43 | rownames (dmat) <- colnames (dmat) <- stn_id 44 | 45 | if (long) { 46 | 47 | dmat <- reshape2::melt (dmat, 48 | id.vars = c (rownames (dmat), colnames (dmat))) 49 | colnames (dmat) <- c ("start_station_id", "end_station_id", "distance") 50 | dmat <- tibble::as_tibble (dmat) 51 | } else { 52 | 53 | attr (dmat, "variable") <- "distance" # used in match_matrices 54 | } 55 | 56 | return (dmat) 57 | } 58 | 59 | #' Some systems like Boston have outliers, presunably due to something like 60 | #' humans mistyping a digit. These completely muck up distmat extraction, so are 61 | #' removed here. 62 | #' @noRd 63 | remove_xy_outliers <- function (xy) { 64 | 65 | xmn <- mean (xy$longitude) 66 | ymn <- mean (xy$latitude) 67 | d <- sqrt ((xy$longitude - xmn) ^ 2 + (xy$latitude - ymn) ^ 2) 68 | dsd <- stats::sd (c (-d, d)) 69 | if (any (d > (10 * dsd))) { 70 | 71 | indx <- which (d < (10 * dsd)) 72 | xy <- xy [indx, ] 73 | } 74 | return (xy) 75 | } 76 | 77 | 78 | #' Match rows and columns of distance and trip matrices 79 | #' 80 | #' @param mat1 A wide- or long-form trip or distance matrix returned from 81 | #' \code{\link{bike_tripmat}} or \code{\link{bike_distmat}}. 82 | #' @param mat2 The corresponding distance or trip matrix. 83 | #' 84 | #' @return A list of the same matrices with matching start and end stations, and 85 | #' in the same order passed to the routine (that is, \code{mat1} then 86 | #' \code{mat2}). Each kind of matrix will be identified and named accordingly as 87 | #' either "trip" or "dist". Matrices are returned in same format (long or wide) 88 | #' as submitted. 89 | #' 90 | #' @note Distance matrices returned from \code{bike_distamat} use all stations 91 | #' listed for a given system, while trip matrices extracted with 92 | #' \link{bike_tripmat} will often have fewer stations because operational 93 | #' station numbers commonly vary over time. This function reconciles the two 94 | #' matrices through matching all row and column names (or just station IDs for 95 | #' long-form matrices), enabling then to be directly compared. 96 | #' 97 | #' @export 98 | bike_match_matrices <- function (mat1, mat2) { 99 | 100 | # convert both to wide form first 101 | long <- FALSE 102 | if (!nrow (mat1) == ncol (mat1)) { 103 | 104 | mat1 <- long2wide (mat1) 105 | if (nrow (mat2) == ncol (mat2)) 106 | message ("One matrix is long-form, the other is wide; ", 107 | "will return both matrices in wide form") 108 | else 109 | long <- TRUE 110 | } 111 | if (!nrow (mat2) == ncol (mat2)) 112 | mat2 <- long2wide (mat2) 113 | 114 | nms <- intersect (rownames (mat1), rownames (mat2)) 115 | mat1 <- match_one_mat (mat1, nms, long = long) 116 | mat2 <- match_one_mat (mat2, nms, long = long) 117 | 118 | ret <- list (mat1, mat2) 119 | names (ret) <- c (is_trip_or_dist (mat1), is_trip_or_dist (mat2)) 120 | return (ret) 121 | } 122 | 123 | #' match one trip or distance matrix to the \code{nms} common to both 124 | #' The \code{long} param determines the return form, not the input form. 125 | #' @noRd 126 | match_one_mat <- function (mat, nms, long = FALSE) { 127 | 128 | variable <- attr (mat, "variable") 129 | indx <- match (nms, rownames (mat)) 130 | mat <- mat [indx, indx] 131 | if (!is.null (variable)) 132 | attr (mat, "variable") <- variable 133 | 134 | if (long) 135 | mat <- bike_wide2long (mat) %>% tibble::as_tibble () 136 | 137 | return (mat) 138 | } 139 | 140 | #' Determine whether matrix is trip or distance matrix 141 | #' @noRd 142 | is_trip_or_dist <- function (mat) { 143 | 144 | variable <- "numtrips" 145 | if (nrow (mat) == ncol (mat)) { 146 | 147 | variable <- attr (mat, "variable") 148 | } else { 149 | 150 | if ("distance" %in% names (mat)) 151 | variable <- "distance" 152 | } 153 | if (variable == "distance") 154 | variable <- "dist" 155 | else 156 | variable <- "trip" 157 | 158 | return (variable) 159 | } 160 | -------------------------------------------------------------------------------- /docs/reference/bike_cities.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | List of cities currently included in bikedata — bike_cities • bikedata 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 93 | 94 | 95 |
    96 | 97 |
    98 |
    99 | 104 | 105 |
    106 | 107 |

    List of cities currently included in bikedata

    108 | 109 |
    110 | 111 |
    bike_cities()
    112 | 113 |

    Value

    114 | 115 |

    A data.frame of cities, abbreviations, and names of bike 116 | systems currently able to be accessed.

    117 | 118 | 119 |

    Examples

    120 |
    bike_cities ()
    #> city city_name bike_system 121 | #> 1 bo Boston Hubway 122 | #> 2 ch Chicago Divvy 123 | #> 3 dc Washington DC CapitalBikeShare 124 | #> 4 gu Guadalajara mibici 125 | #> 5 la Los Angeles Metro 126 | #> 6 lo London Santander 127 | #> 7 mo Montreal Bixi 128 | #> 8 mn Minneapolis NiceRide 129 | #> 9 ny New York Citibike 130 | #> 10 ph Philadelphia Indego 131 | #> 11 sf Bay Area FordGoBike
    132 |
    133 | 143 |
    144 | 145 |
    146 | 149 | 150 |
    151 |

    Site built with pkgdown.

    152 |
    153 | 154 |
    155 |
    156 | 157 | 158 | 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /docs/paper.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Summary • bikedata 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 44 | 45 | 46 | 47 | 48 | 49 |
    50 |
    51 | 90 | 91 | 92 |
    93 | 94 |
    95 |
    96 | 99 | 100 | 101 |
    102 |

    title: bikedata tags: - public hire bicycle - open data - R authors: - name: Mark Padgham affiliation: 1 orcid: 0000-0003-2172-5265 - name: Richard Ellison affiliation: 2 affiliations: - name: Department of Geoinformatics, University of Salzburg, Austria index: 1 - name: Institute of Transport and Logistics Studies, The University of Sydney, Australia index: 2 bibliography: paper.bib date: 28 Nov 2017 —

    103 |
    104 | 105 |

    The R package bikedata collates and facilitates access to arguably the world’s largest open ongoing dataset on human mobility. All other comparable sources of data (such public transit data, or mobile phone data) are either not publicly available, or have been released only at single distinct times for single distinct purposes. Many public hire bicycle systems in the U.S.A., along with Santander Cycles in London, U.K., issue ongoing releases of their usage data, providing a unique source of data for analysing, visualising, and understanding human movement and urban environments [@Austwick2013; @Borgnat2011; @Padgham2012]. Such data provide an invaluable resource for urban planners, geographers, social and health scientists and policy makers, data visualisation specialists, and data-affine users of the systems themselves. The bikedata package aims to provide unified access to usage statistics from all public hire bicycle systems which provide data. These currently including Santander Cycles in London, U.K., and from the U.S.A., citibike in New York City NY, Divvy in Chicago IL, Capital Bikeshare in Washington DC, Hubway in Boston MA, Metro in Los Angeles LA, and Indego in Philadelphia PA. Additional systems will be added on an ongoing basis. The package facilitates the three necessary steps of (1) downloading data; (2) storing data in a readily accessible form (in this case in a single SQLite3 database); (3) extracting aggregate statistics. The two primary aggregate statistics are matrices of numbers of trips between all pairs of stations, and daily time series. Both forms of aggregation may be extracted for specific dates, times, or demographic characteristics of cyclists.

    106 |
    107 |
    108 |

    109 | References

    110 |
    111 | 112 | 113 |
    114 | 115 |
    116 | 117 | 118 |
    119 | 122 | 123 |
    124 |

    Site built with pkgdown.

    125 |
    126 | 127 |
    128 |
    129 | 130 | 131 | 132 | 133 | 134 | 135 | -------------------------------------------------------------------------------- /R/bikedata-files.R: -------------------------------------------------------------------------------- 1 | #' get_awsbike_files 2 | #' 3 | #' Returns list of URLs for each trip data file from nominated Amazon Web 4 | #' Services system 5 | #' 6 | #' @param name Name of the AWS bucket in which data are stored 7 | #' 8 | #' @return List of URLs used to download data 9 | #' 10 | #' @note bukets which work straight from AWS are 11 | #' c ("tripdata", "capitalbikeshare-data", "hubway-data") for the cities 12 | #' c ("ny", "dc", "bo"), respectively. 13 | #' 14 | #' @noRd 15 | get_aws_bike_files <- function (bucket) { 16 | 17 | host <- "https://s3.amazonaws.com" 18 | aws_url <- sprintf ("https://%s.s3.amazonaws.com", bucket) 19 | 20 | doc <- httr::content (httr::GET (aws_url), encoding = "UTF-8") 21 | nodes <- xml2::xml_children (doc) 22 | # NOTE: xml2::xml_find_all (doc, ".//Key") should work here but doesn't, so 23 | # this manually does what that would do 24 | files <- lapply (nodes, function (i) 25 | if (grepl ("zip|csv", i)) 26 | strsplit (strsplit (as.character (i), 27 | "") [[1]] [2], "") [[1]] [1]) 28 | files <- unlist (files) 29 | 30 | # nyc citibike data has a redundamt file as first item 31 | if (bucket == "tripdata") 32 | files <- files [2:length (files)] 33 | 34 | paste0 (host, "/", bucket, "/", files) 35 | } 36 | 37 | 38 | #' get_london_bike_files 39 | #' 40 | #' Returns list of URLs for each trip data file from London's Santander Cycles 41 | #' system 42 | #' 43 | #' @return List of URLs used to download data 44 | #' 45 | #' @noRd 46 | get_london_bike_files <- function () { 47 | 48 | # First get list of base file names from AWS: 49 | aws_url <- "https://s3-eu-west-1.amazonaws.com/cycling.data.tfl.gov.uk/" 50 | doc <- httr::content (httr::GET (aws_url), encoding = "UTF-8") 51 | nodes <- xml2::xml_children (doc) 52 | getflist <- function (nodes, type = "zip") { 53 | 54 | f <- lapply (nodes, function (i) if (grepl (type, i)) 55 | strsplit (strsplit (as.character (i), "") [[1]] [2], 56 | "") [[1]] [1]) 57 | basename (unlist (f)) 58 | } 59 | flist_zip <- getflist (nodes, type = "zip") 60 | flist_zip <- flist_zip [which (grepl ("usage", flist_zip))] 61 | flist_csv <- getflist (nodes, type = "csv") 62 | flist_xlsx <- getflist (nodes, type = "xlsx") 63 | 64 | # Then convert to tfl.gov.uk filenames 65 | addr_base <- "http://cycling.data.tfl.gov.uk/usage-stats/" 66 | paste0 (addr_base, sort (c (flist_zip, flist_csv, flist_xlsx))) 67 | } 68 | 69 | 70 | 71 | #' get_nabsa_files 72 | #' 73 | #' Get list of URL for trip data from North American Bike Share Association 74 | #' systems (currently LA and Philly). 75 | #' 76 | #' @noRd 77 | get_nabsa_files <- function (city) { 78 | 79 | if (city == "ph") 80 | the_url <- "https://www.rideindego.com/about/data/" 81 | else if (city == "la") 82 | the_url <- "https://bikeshare.metro.net/about/data/" 83 | else 84 | stop ("nabsa cities must be ph or la") 85 | 86 | doc <- httr::content (httr::GET (the_url), encoding = "UTF-8", 87 | as = "parsed") 88 | hrefs <- xml2::xml_attr (xml2::xml_find_all (doc, ".//a"), "href") 89 | hrefs <- hrefs [which (grepl ("\\.zip", hrefs) & 90 | !grepl ("[Ss]tation", hrefs))] 91 | 92 | if (city == "la") { 93 | 94 | the_url_sh <- "https://bikeshare.metro.net/" 95 | hrefs <- as.character (vapply (hrefs, function (i) 96 | gsub ("../../", the_url_sh, i, 97 | fixed = TRUE), 98 | "character")) 99 | } 100 | 101 | return (hrefs) 102 | } 103 | 104 | 105 | #' get_montreal_bike_files 106 | #' 107 | #' Returns list of URLs for each trip data file from Montreal's Bixi system 108 | #' 109 | #' @return List of URLs used to download data 110 | #' 111 | #' @noRd 112 | get_montreal_bike_files <- function () { 113 | 114 | host <- "https://montreal.bixi.com/en/open-data" 115 | . <- NULL # suppress R CMD check note #nolint 116 | nodes <- httr::content (httr::GET (host), encoding = "UTF-8") %>% 117 | xml2::xml_find_all (".//div") 118 | nodes <- nodes [which (xml2::xml_attr (nodes, "class") == 119 | "container open-data-history")] 120 | hrefs <- xml2::xml_find_all (nodes, ".//a") %>% 121 | xml2::xml_attr ("href") 122 | unique (hrefs) 123 | } 124 | 125 | #' get_guadala_bike_files 126 | #' 127 | #' Returns list of URLs for each trip data file from Guadalajara's mibici system 128 | #' 129 | #' @return List of URLs used to download data 130 | #' 131 | #' @noRd 132 | get_guadala_bike_files <- function () { 133 | 134 | host_base <- "https://www.mibici.net" 135 | host <- paste0 (host_base, "/en/open-data/") 136 | . <- NULL # suppress R CMD check note #nolint 137 | nodes <- httr::content (httr::GET (host), encoding = "UTF-8") %>% 138 | xml2::xml_find_all (".//div") 139 | nodes <- nodes [which (xml2::xml_attr (nodes, "class") == 140 | "unit one-quarter")] 141 | hrefs <- xml2::xml_find_all (nodes, ".//a") %>% 142 | xml2::xml_attr ("href") 143 | hrefs <- paste0 (host_base, hrefs [grepl ("datos", hrefs)]) 144 | unique (hrefs) 145 | } 146 | 147 | #' get_bike_files 148 | #' 149 | #' Returns list of URLs for each trip data file from nominated system 150 | #' 151 | #' @param city The city for which data are to be obtained 152 | #' 153 | #' @return List of URLs used to download data 154 | #' 155 | #' @noRd 156 | get_bike_files <- function (city) { 157 | 158 | aws_cities <- c ("ny", "dc", "bo", "sf", "ch") 159 | buckets <- c ("tripdata", "capitalbikeshare-data", 160 | "hubway-data", "fordgobike-data", "divvy-data") 161 | nabsa_cities <- c ("la", "ph") 162 | 163 | if (city %in% aws_cities) { 164 | 165 | bucket <- buckets [match (city, aws_cities)] 166 | files <- get_aws_bike_files (bucket) 167 | } else if (city %in% nabsa_cities) 168 | files <- get_nabsa_files (city = city) 169 | else if (city == "gu") 170 | files <- get_guadala_bike_files () 171 | else if (city == "lo") 172 | files <- get_london_bike_files () 173 | else if (city == "mn") 174 | warning ("Data for the Nice Ride MN system must be downloaded ", 175 | "manually from\nhttps://www.niceridemn.com/system-data/, and ", 176 | "loaded using store_bikedata") 177 | else if (city == "mo") 178 | files <- get_montreal_bike_files () 179 | 180 | return (files) 181 | } 182 | -------------------------------------------------------------------------------- /src/sqlite3db-setup.cpp: -------------------------------------------------------------------------------- 1 | /*************************************************************************** 2 | * Project: bikedata 3 | * File: splite3db-admin.cpp 4 | * Language: C++ 5 | * 6 | * Author: Mark Padgham 7 | * E-Mail: mark.padgham@email.com 8 | * 9 | * Description: Routines to construct sqlite3 database and associated 10 | * indexes. Routines to store and add data are in 'sqlite3db-add-data.h' 11 | * 12 | * Compiler Options: -std=c++11 13 | ***************************************************************************/ 14 | 15 | #include "sqlite3db-setup.h" 16 | 17 | //' rcpp_create_sqlite3_db 18 | //' 19 | //' Initial creation of SQLite3 database 20 | //' 21 | //' @param bikedb A string containing the path to the Sqlite3 database to 22 | //' be created. 23 | //' 24 | //' @return integer result code 25 | //' 26 | //' @noRd 27 | // [[Rcpp::export]] 28 | int rcpp_create_sqlite3_db (const char * bikedb) 29 | { 30 | sqlite3 *dbcon; 31 | char *zErrMsg = nullptr; 32 | int rc; 33 | 34 | rc = sqlite3_open_v2(bikedb, &dbcon, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE, nullptr); 35 | if (rc != SQLITE_OK) 36 | throw std::runtime_error ("Can't establish sqlite3 connection"); 37 | 38 | rc = sqlite3_exec(dbcon, "SELECT InitSpatialMetadata(1);", nullptr, nullptr, &zErrMsg); 39 | sqlite3_free (zErrMsg); 40 | 41 | // NOTE: Database structure is ordered according to the order of the NYC 42 | // citibike system, so each line of data from that city can be injected 43 | // straight into the db. All other cities require re-ordering of data to 44 | // this citibike sequence prior to injection into db. 45 | 46 | std::string createqry = "CREATE TABLE trips (" 47 | "id integer primary key," 48 | "city text," 49 | "trip_duration numeric," 50 | "start_time timestamp without time zone," 51 | "stop_time timestamp without time zone," 52 | "start_station_id text," 53 | "end_station_id text," 54 | "bike_id text," 55 | "user_type text," 56 | "birth_year text," 57 | "gender text" 58 | ");" 59 | "CREATE TABLE stations (" 60 | " id integer primary key," 61 | " city text," 62 | " stn_id text," 63 | " name text," 64 | " longitude text," 65 | " latitude text," 66 | " UNIQUE (stn_id, name)" 67 | ");" 68 | "CREATE TABLE datafiles (" 69 | " id integer primary key," 70 | " city text," 71 | " name text" 72 | ");"; 73 | 74 | const char *sql = createqry.c_str (); 75 | rc = sqlite3_exec(dbcon, sql, nullptr, nullptr, &zErrMsg); 76 | sqlite3_free (zErrMsg); 77 | 78 | rc = sqlite3_close_v2(dbcon); 79 | if (rc != SQLITE_OK) 80 | throw std::runtime_error ("Unable to close sqlite database"); 81 | 82 | return rc; 83 | } 84 | 85 | 86 | //' rcpp_create_db_indexes 87 | //' 88 | //' Creates the specified indexes in the database to speed up queries. Note 89 | //' that for the full dataset this may take some time. 90 | //' 91 | //' @param bikedb A string containing the path to the sqlite3 database to use. 92 | //' @param tables A vector with the tables for which to create indexes. This 93 | //' vector should be the same length as the cols vector. 94 | //' @param cols A vector with the fields for which to create indexes. 95 | //' @param reindex If false, indexes are created, otherwise they are simply 96 | //' reindexed. 97 | //' 98 | //' @return integer result code 99 | //' 100 | //' @noRd 101 | // [[Rcpp::export]] 102 | int rcpp_create_db_indexes (const char* bikedb, Rcpp::CharacterVector tables, 103 | Rcpp::CharacterVector cols, bool reindex) 104 | { 105 | sqlite3 *dbcon; 106 | char *zErrMsg = nullptr; 107 | int rc; 108 | 109 | rc = sqlite3_open_v2(bikedb, &dbcon, SQLITE_OPEN_READWRITE, nullptr); 110 | if (rc != SQLITE_OK) 111 | throw std::runtime_error ("Can't establish sqlite3 connection"); 112 | 113 | for (int i = 0; i < cols.length(); ++i) 114 | { 115 | Rcpp::checkUserInterrupt (); 116 | std::string idxname = "idx_" + tables[i] + "_" + 117 | (std::string)cols[i]; 118 | boost::replace_all(idxname, "(", "_"); 119 | boost::replace_all(idxname, ")", "_"); 120 | boost::replace_all(idxname, " ", "_"); 121 | 122 | std::string idxqry; 123 | if (reindex) 124 | idxqry = "REINDEX " + idxname; 125 | else 126 | idxqry = "CREATE INDEX " + idxname + " ON " + 127 | (char *)(tables [i]) + "(" + (char *)(cols [i]) + ")"; 128 | 129 | rc = sqlite3_exec(dbcon, idxqry.c_str(), nullptr, nullptr, &zErrMsg); 130 | if (rc != SQLITE_OK) 131 | { 132 | std::string errMsg = "Unable to execute index query: " + 133 | idxqry; 134 | throw std::runtime_error (errMsg); 135 | } 136 | } 137 | 138 | rc = sqlite3_close_v2(dbcon); 139 | if (rc != SQLITE_OK) 140 | throw std::runtime_error ("Unable to close sqlite database"); 141 | sqlite3_free (zErrMsg); 142 | 143 | return(rc); 144 | } 145 | 146 | //' rcpp_create_city_index 147 | //' 148 | //' Creates city index in the database. This function is *always* run, while the 149 | //' 'create_db_indexes' function is optionally run. 150 | //' 151 | //' @param bikedb A string containing the path to the sqlite3 database to use. 152 | //' @param reindex If false, indexes are created, otherwise they are simply 153 | //' reindexed. 154 | //' 155 | //' @return integer result code 156 | //' 157 | //' @noRd 158 | // [[Rcpp::export]] 159 | int rcpp_create_city_index (const char* bikedb, bool reindex) 160 | { 161 | sqlite3 *dbcon; 162 | char *zErrMsg = nullptr; 163 | int rc; 164 | 165 | rc = sqlite3_open_v2(bikedb, &dbcon, SQLITE_OPEN_READWRITE, nullptr); 166 | if (rc != SQLITE_OK) 167 | throw std::runtime_error ("Can't establish sqlite3 connection"); 168 | 169 | std::string idxname = "idx_trips_city"; 170 | std::string idxqry; 171 | if (reindex) 172 | idxqry = "REINDEX " + idxname; 173 | else 174 | idxqry = "CREATE INDEX " + idxname + " ON trips(city)"; 175 | 176 | rc = sqlite3_exec(dbcon, idxqry.c_str(), nullptr, nullptr, &zErrMsg); 177 | 178 | if (rc != SQLITE_OK) 179 | { 180 | std::string errMsg = "Unable to execute index query: " + idxqry; 181 | throw std::runtime_error (errMsg); 182 | } 183 | 184 | rc = sqlite3_close_v2(dbcon); 185 | if (rc != SQLITE_OK) 186 | throw std::runtime_error ("Unable to close sqlite database"); 187 | sqlite3_free (zErrMsg); 188 | 189 | return(rc); 190 | } 191 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | #' List of cities currently included in bikedata 2 | #' 3 | #' @return A \code{data.frame} of cities, abbreviations, and names of bike 4 | #' systems currently able to be accessed. 5 | #' 6 | #' @export 7 | #' 8 | #' @examples 9 | #' bike_cities () 10 | bike_cities <- function () { 11 | 12 | dat <- bike_demographic_data () 13 | dat$demographic_data <- NULL 14 | return (dat) 15 | } 16 | 17 | 18 | #' Convert city names to two-letter prefixes 19 | #' 20 | #' @param city Name of one or more cities or corresponding bicycle hire systems 21 | #' 22 | #' @return A two letter prefix matching (bo, ch, dc, la, lo, mn, ny, ph) 23 | #' 24 | #' @noRd 25 | convert_city_names <- function (city) { 26 | 27 | city <- gsub (" ", "", city) 28 | if (any (nchar (city) >= 4)) { 29 | 30 | if (substring (tolower (city), 1, 4) == "sant") 31 | city <- "lo" 32 | else if (substring (tolower (city), 1, 4) == "sanf") 33 | city <- "sf" 34 | } 35 | city <- substring (gsub ("[[:punct:]]", "", tolower (city)), 1, 3) 36 | indx_lo <- which (city %in% c ("lon", "los")) 37 | indx <- which (!seq (city) %in% indx_lo) 38 | if (length (indx_lo) > 0) { 39 | 40 | city_lo <- city [indx_lo] 41 | city <- city [indx] 42 | city_lo [city_lo == "lon"] <- "lo" 43 | city_lo [city_lo == "los"] <- "la" 44 | } 45 | city <- substring (city, 1, 2) 46 | 47 | city_names <- c ("ny", "ne", "ci", # nyc citibike 48 | "bo", "hu", # boston hubway 49 | "ch", "di", # chicago divvy bike 50 | "wa", "dc", "ca", # washington dc capital bike share 51 | "la", "me", # LA metro 52 | "lo", "sa", # london santander 53 | "ph", "in", # philly indego 54 | "mn", "mi", # minneapolis/st.paul nice ride 55 | "fo", "go", "sf", # ford gobike san fran 56 | "mo", "bi", # montreal bixi 57 | "gu") # guadalajara mibici 58 | city_code <- c ("ny", "ny", "ny", "bo", "bo", "ch", "ch", 59 | "dc", "dc", "dc", "la", "la", "lo", "lo", "ph", "ph", 60 | "mn", "mn", "sf", "sf", "sf", "mo", "mo", "gu") 61 | city_code <- city_code [pmatch (city, city_names)] 62 | 63 | if (length (indx_lo) > 0) { 64 | 65 | city <- rep (NA, min (1, length (city))) 66 | city [indx_lo] <- city_lo 67 | city [indx] <- city_code 68 | } else 69 | city <- city_code 70 | 71 | if (any (is.na (city))) 72 | stop ("city not recognised") 73 | 74 | return (city) 75 | } 76 | 77 | #' check city arg 78 | #' 79 | #' @param bikedb Name of database holding bike trip data 80 | #' @param city Name of city as passed to functions such as \code{bike_tripmat}, 81 | #' \code{bike_stations}, or \code{bike_distmat} 82 | #' @return Standardised version of \code{city} parameter 83 | #' @noRd 84 | check_city_arg <- function (bikedb, city) { 85 | 86 | db_cities <- bike_cities_in_db (bikedb) 87 | if (missing (city)) { 88 | 89 | if (length (db_cities) > 1) { 90 | 91 | stop ("bikedb contains multiple cities; please specify one.", 92 | "cities in current database are [", 93 | paste (db_cities, collapse = " "), "]") 94 | } else 95 | city <- db_cities [1] 96 | } else if (!missing (city)) { 97 | 98 | city <- convert_city_names (city) 99 | if (is.na (city)) 100 | stop ("city not recognised") 101 | if (!city %in% bike_cities_in_db (bikedb)) 102 | stop ("city ", city, " not represented in database") 103 | } 104 | return (city) 105 | } 106 | 107 | #' Perform checks for name, existance, and structure of bikedb 108 | #' 109 | #' @param bikedb A string containing the path to the SQLite3 database. 110 | #' If no directory specified, it is presumed to be in \code{tempdir()}. 111 | #' 112 | #' @return Potentially modified string containing full path 113 | #' 114 | #' @noRd 115 | check_db_arg <- function (bikedb) { 116 | 117 | if (exists (bikedb, envir = parent.frame ())) 118 | bikedb <- get (bikedb, envir = parent.frame ()) 119 | 120 | bikedb <- expand_home (bikedb) 121 | 122 | # Note that dirname (bikedb) == '.' can not be used because that prevents 123 | # bikedb = "./bikedb", so grepl must be used instead. 124 | if (!grepl ("/", bikedb) | !grepl ("*//*", bikedb)) 125 | bikedb <- file.path (tempdir (), bikedb) 126 | 127 | if (!file.exists (bikedb)) 128 | stop ("file ", basename (bikedb), " does not exist") 129 | 130 | db <- DBI::dbConnect(RSQLite::SQLite(), bikedb, create = FALSE) 131 | qry <- "SELECT name FROM sqlite_master WHERE type = \"table\"" 132 | tbls <- DBI::dbGetQuery(db, qry) [, 1] 133 | DBI::dbDisconnect(db) 134 | if (!identical (tbls, c ("trips", "stations", "datafiles"))) 135 | stop ("bikedb does not appear to be a bikedata database") 136 | 137 | return (bikedb) 138 | } 139 | 140 | # expand unix-style tidle for home directory 141 | expand_home <- function (x) { 142 | 143 | if (grepl ("~", x)) 144 | x <- gsub ("~", Sys.getenv ("HOME"), x) 145 | return (x) 146 | } 147 | 148 | # check whether data_dir exists and add option to create if not 149 | # no code coverage coz it's interactive 150 | check_data_dir <- function (x) { # nocov start 151 | 152 | split_path <- function (x) { 153 | 154 | if (dirname(x) == x) 155 | x 156 | else 157 | c (basename (x), split_path (dirname (x))) 158 | } 159 | if (!file.exists (x)) { 160 | 161 | message ("directory ", x, " does not exist") 162 | inp <- readline ("Should it be created (y/n)? ") %>% 163 | tolower () 164 | if (substring (inp, 1, 1) == "y") { 165 | 166 | xsp <- rev (split_path (x)) [-1] 167 | for (i in seq_along (xsp)) { 168 | 169 | fp <- do.call (file.path, as.list (xsp [1:i])) 170 | if (!file.exists (fp)) 171 | dir.create (fp) 172 | } 173 | } else { 174 | 175 | stop ("Okay, stopping now") 176 | } 177 | } 178 | invisible (x) 179 | } # nocov end 180 | 181 | # header files are parsed using sysdata.rda, which is written on load to the 182 | # following file, subsequently read directly within the C++ routines 183 | header_file_name <- function () { 184 | 185 | file.path (tempdir (), "field_names.csv") 186 | } 187 | 188 | data_has_stations <- function (city) { 189 | 190 | cities <- bike_demographic_data ()$city 191 | ret <- rep (FALSE, length (cities)) 192 | cities_with_station_data <- c ("ny", "la", "ph", "sf") 193 | ret [cities %in% cities_with_station_data] <- TRUE 194 | return (ret [which (cities == city)]) 195 | } 196 | -------------------------------------------------------------------------------- /docs/reference/bike_match_matrices.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Match rows and columns of distance and trip matrices — bike_match_matrices • bikedata 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 93 | 94 | 95 |
    96 | 97 |
    98 |
    99 | 104 | 105 |
    106 | 107 |

    Match rows and columns of distance and trip matrices

    108 | 109 |
    110 | 111 |
    bike_match_matrices(mat1, mat2)
    112 | 113 |

    Arguments

    114 | 115 | 116 | 117 | 118 | 120 | 121 | 122 | 123 | 124 | 125 |
    mat1

    A wide- or long-form trip or distance matrix returned from 119 | bike_tripmat or bike_distmat.

    mat2

    The corresponding distance or trip matrix.

    126 | 127 |

    Value

    128 | 129 |

    A list of the same matrices with matching start and end stations, and 130 | in the same order passed to the routine (that is, mat1 then 131 | mat2). Each kind of matrix will be identified and named accordingly as 132 | either "trip" or "dist". Matrices are returned in same format (long or wide) 133 | as submitted.

    134 | 135 |

    Note

    136 | 137 |

    Distance matrices returned from bike_distamat use all stations 138 | listed for a given system, while trip matrices extracted with 139 | bike_tripmat will often have fewer stations because operational 140 | station numbers commonly vary over time. This function reconciles the two 141 | matrices through matching all row and column names (or just station IDs for 142 | long-form matrices), enabling then to be directly compared.

    143 | 144 | 145 |
    146 | 157 |
    158 | 159 |
    160 | 163 | 164 |
    165 |

    Site built with pkgdown.

    166 |
    167 | 168 |
    169 |
    170 | 171 | 172 | 173 | 174 | 175 | 176 | -------------------------------------------------------------------------------- /docs/reference/bike_rm_test_data.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Removes test data written with 'bike_write_test_data()' — bike_rm_test_data • bikedata 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 52 | 53 | 54 | 55 | 56 | 57 |
    58 |
    59 | 98 | 99 | 100 |
    101 | 102 |
    103 |
    104 | 109 | 110 |
    111 | 112 |

    The function bike_write_test_data() writes several small 113 | zip-compressed files to disk. The default location is tempdir(), in 114 | which case these files will be automatically removed on termination of 115 | current R session. If, however, any other value for data_dir is passed 116 | to bike_write_test_data(), then the resultant files ought be deleted 117 | by calling this function.

    118 | 119 |
    120 | 121 |
    bike_rm_test_data(data_dir = tempdir())
    122 | 123 |

    Arguments

    124 | 125 | 126 | 127 | 128 | 129 | 130 |
    data_dir

    Directory in which data were extracted.

    131 | 132 |

    Value

    133 | 134 |

    Number of files successfully removed, which should equal six.

    135 | 136 | 137 |

    Examples

    138 |
    # NOT RUN {
    139 | bike_write_test_data ()
    140 | list.files (tempdir ())
    141 | bike_rm_test_data ()
    142 | 
    143 | bike_write_test_data (data_dir = getwd ())
    144 | list.files ()
    145 | bike_rm_test_data (data_dir = getwd ())
    146 | # }
    147 |
    148 | 159 |
    160 | 161 |
    162 | 165 | 166 |
    167 |

    Site built with pkgdown.

    168 |
    169 | 170 |
    171 |
    172 | 173 | 174 | 175 | 176 | 177 | 178 | -------------------------------------------------------------------------------- /docs/reference/bike_write_test_data.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Writes test data bundled with package to zip files — bike_write_test_data • bikedata 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 53 | 54 | 55 | 56 | 57 | 58 |
    59 |
    60 | 99 | 100 | 101 |
    102 | 103 |
    104 |
    105 | 110 | 111 |
    112 | 113 |

    Writes very small test files to disk that can be used to test the package. 114 | The entire package works by reading zip-compressed data files provided by the 115 | various hire bicycle systems. This function generates some equivalent data 116 | that can be read into an SQLite database by the 117 | store_bikedata() function, so that all other package functionality can 118 | then be tested from the resultant database. This function is also used in the 119 | examples of all other functions.

    120 | 121 |
    122 | 123 |
    bike_write_test_data(data_dir = tempdir())
    124 | 125 |

    Arguments

    126 | 127 | 128 | 129 | 130 | 133 | 134 |
    data_dir

    Directory in which data are to be extracted. Defaults to 131 | tempdir(). If any other directory is specified, files ought to be 132 | removed with bike_rm_test_data().

    135 | 136 | 137 |

    Examples

    138 |
    # NOT RUN {
    139 | bike_write_test_data ()
    140 | list.files (tempdir ())
    141 | bike_rm_test_data ()
    142 | 
    143 | bike_write_test_data (data_dir = '.')
    144 | list.files ()
    145 | bike_rm_test_data (data_dir = '.')
    146 | # }
    147 |
    148 | 157 |
    158 | 159 |
    160 | 163 | 164 |
    165 |

    Site built with pkgdown.

    166 |
    167 | 168 |
    169 |
    170 | 171 | 172 | 173 | 174 | 175 | 176 | -------------------------------------------------------------------------------- /docs/reference/bike_distmat.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Extract station-to-station distance matrix — bike_distmat • bikedata 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 47 | 48 | 49 | 50 | 51 | 52 |
    53 |
    54 | 93 | 94 | 95 |
    96 | 97 |
    98 |
    99 | 104 | 105 |
    106 | 107 |

    Extract station-to-station distance matrix

    108 | 109 |
    110 | 111 |
    bike_distmat(bikedb, city, expand = 0.5, long = FALSE, quiet = TRUE)
    112 | 113 |

    Arguments

    114 | 115 | 116 | 117 | 118 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 130 | 131 | 132 | 133 | 136 | 137 | 138 | 139 | 140 | 141 |
    bikedb

    A string containing the path to the SQLite3 database. 119 | If no directory specified, it is presumed to be in tempdir().

    city

    City for which tripmat is to be aggregated

    expand

    Distances are calculated by routing through the OpenStreetMap 128 | street network surrounding the bike stations, with the street network 129 | expanded by this amount to ensure all stations can be connected.

    long

    If FALSE, a square distance matrix of (num-stations, 134 | num_stations) is returned; if TRUE, a long-format matrix of (stn-from, 135 | stn-to, distance) is returned.

    quiet

    If FALSE, progress is displayed on screen

    142 | 143 |

    Value

    144 | 145 |

    If long = FALSE, a square matrix of numbers of trips between 146 | each station, otherwise a long-form tibble with three columns of of 147 | (start_station_id, end_station_id, distance)

    148 | 149 |

    Note

    150 | 151 |

    Distance matrices returned from bike_distamat use all stations 152 | listed for a given system, while trip matrices extracted with 153 | bike_tripmat will often have fewer stations because operational 154 | station numbers commonly vary over time. The two matrices may be reconciled 155 | with the match_trips2dists function, enabling then to be directly 156 | compared.

    157 | 158 | 159 |
    160 | 171 |
    172 | 173 |
    174 | 177 | 178 |
    179 |

    Site built with pkgdown.

    180 |
    181 | 182 |
    183 |
    184 | 185 | 186 | 187 | 188 | 189 | 190 | -------------------------------------------------------------------------------- /docs/reference/bike_rm_db.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Remove SQLite3 database generated with 'store_bikedat()' — bike_rm_db • bikedata 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 50 | 51 | 52 | 53 | 54 | 55 |
    56 |
    57 | 96 | 97 | 98 |
    99 | 100 |
    101 |
    102 | 107 | 108 |
    109 | 110 |

    If no directory is specified the bikedb argument passed to 111 | store_bikedata, the database is created in tempdir(). This 112 | function provides a convenient way to remove the database in such cases by 113 | simply passing the name.

    114 | 115 |
    116 | 117 |
    bike_rm_db(bikedb)
    118 | 119 |

    Arguments

    120 | 121 | 122 | 123 | 124 | 125 | 126 |
    bikedb

    The SQLite3 database containing the bikedata.

    127 | 128 |

    Value

    129 | 130 |

    TRUE if bikedb successfully removed; otherwise FALSE

    131 | 132 | 133 |

    Examples

    134 |
    # NOT RUN {
    135 | data_dir <- tempdir ()
    136 | bike_write_test_data (data_dir = data_dir)
    137 | # or download some real data!
    138 | # dl_bikedata (city = 'la', data_dir = data_dir)
    139 | bikedb <- file.path (data_dir, 'testdb')
    140 | store_bikedata (data_dir = data_dir, bikedb = bikedb)
    141 | 
    142 | bike_rm_test_data (data_dir = data_dir)
    143 | bike_rm_db (bikedb)
    144 | # don't forget to remove real data!
    145 | # file.remove (list.files (data_dir, pattern = '.zip'))
    146 | # }
    147 |
    148 | 159 |
    160 | 161 | 171 |
    172 | 173 | 174 | 175 | 176 | 177 | 178 | --------------------------------------------------------------------------------