├── .Rbuildignore ├── .github ├── .gitignore └── workflows │ ├── R-CMD-check.yaml │ └── rostemplate-gh-pages.yaml ├── .gitignore ├── CITATION.cff ├── DESCRIPTION ├── LICENSE ├── LICENSE.md ├── NAMESPACE ├── NEWS.md ├── R ├── available-data.R ├── cite.R ├── codebook.R ├── connect.R ├── convert.R ├── data-dir.R ├── dev-tools.R ├── disconnect.R ├── download_data.R ├── duckdb-helpers.R ├── folders.R ├── get-zones.R ├── get.R ├── global-params.R ├── internal-utils.R ├── onload.R ├── quick-get.R └── spanishoddata-package.R ├── README.md ├── README.qmd ├── _pkgdown.yml ├── codemeta.json ├── cran-comments.md ├── inst ├── CITATION ├── extdata │ ├── data_links_v1_2024-08-07.xml.gz │ ├── data_links_v2_2024-08-07.xml.gz │ ├── muni_v2_ref.rds │ ├── sql-queries │ │ ├── province_names_enum.sql │ │ ├── v1-nt-distritos-clean-csv-view-en.sql │ │ ├── v1-nt-distritos-clean-csv-view-es.sql │ │ ├── v1-nt-distritos-raw-csv-view.sql │ │ ├── v1-nt-enum-ntrips.sql │ │ ├── v1-nt-municipios-clean-csv-view-en.sql │ │ ├── v1-nt-municipios-clean-csv-view-es.sql │ │ ├── v1-nt-municipios-raw-csv-view.sql │ │ ├── v1-od-distritos-clean-csv-view-en.sql │ │ ├── v1-od-distritos-clean-csv-view-es.sql │ │ ├── v1-od-distritos-raw-csv-view.sql │ │ ├── v1-od-enum-activity-en.sql │ │ ├── v1-od-enum-activity-es.sql │ │ ├── v1-od-enum-distance.sql │ │ ├── v1-od-municipios-clean-csv-view-en.sql │ │ ├── v1-od-municipios-clean-csv-view-es.sql │ │ ├── v1-od-municipios-raw-csv-view.sql │ │ ├── v2-nt-distritos-clean-csv-view-en.sql │ │ ├── v2-nt-distritos-clean-csv-view-es.sql │ │ ├── v2-nt-distritos-raw-csv-view.sql │ │ ├── v2-nt-enum-age.sql │ │ ├── v2-nt-enum-ntrips.sql │ │ ├── v2-nt-enum-sex-en.sql │ │ ├── v2-nt-enum-sex-es.sql │ │ ├── v2-nt-gau-clean-csv-view-en.sql │ │ ├── v2-nt-gau-clean-csv-view-es.sql │ │ ├── v2-nt-gau-raw-csv-view.sql │ │ ├── v2-nt-municipios-clean-csv-view-en.sql │ │ ├── v2-nt-municipios-clean-csv-view-es.sql │ │ ├── v2-nt-municipios-raw-csv-view.sql │ │ ├── v2-od-distritos-clean-csv-view-en.sql │ │ ├── v2-od-distritos-clean-csv-view-es.sql │ │ ├── v2-od-distritos-raw-csv-view.sql │ │ ├── v2-od-enum-activity-en.sql │ │ ├── v2-od-enum-activity-es.sql │ │ ├── v2-od-enum-age.sql │ │ ├── v2-od-enum-distance.sql │ │ ├── v2-od-enum-income.sql │ │ ├── v2-od-enum-sex-en.sql │ │ ├── v2-od-enum-sex-es.sql │ │ ├── v2-od-gau-clean-csv-view-en.sql │ │ ├── v2-od-gau-clean-csv-view-es.sql │ │ ├── v2-od-gau-raw-csv-view.sql │ │ ├── v2-od-municipios-clean-csv-view-en.sql │ │ ├── v2-od-municipios-clean-csv-view-es.sql │ │ ├── v2-od-municipios-raw-csv-view.sql │ │ ├── v2-os-distritos-clean-csv-view-en.sql │ │ ├── v2-os-distritos-clean-csv-view-es.sql │ │ ├── v2-os-distritos-raw-csv-view.sql │ │ ├── v2-os-gau-clean-csv-view-en.sql │ │ ├── v2-os-gau-clean-csv-view-es.sql │ │ ├── v2-os-gau-raw-csv-view.sql │ │ ├── v2-os-municipios-clean-csv-view-en.sql │ │ ├── v2-os-municipios-clean-csv-view-es.sql │ │ └── v2-os-municipios-raw-csv-view.sql │ ├── url_file_sizes_v1.txt.gz │ └── url_file_sizes_v2.txt.gz ├── schemaorg.json └── vignette-include │ ├── csv-date-filter-note.qmd │ ├── install-package.qmd │ ├── missing-dates-outages.qmd │ ├── overall-approach.qmd │ └── setup-data-directory.qmd ├── man ├── figures │ ├── README-desire-lines-1.png │ ├── README-distritos-1.png │ ├── README-salamanca-plot-1.png │ ├── README-salamanca-zones-1.png │ ├── README-trips-per-hour-1.png │ ├── card.png │ ├── flowmapblue-animated.png │ ├── flowmapblue-standard-01.png │ ├── flowmapblue-standard-02.png │ ├── flowmapblue-standard-time.png │ ├── flows_plot_all_districts.png │ ├── flows_plot_barcelona.png │ ├── lifecycle-deprecated.svg │ ├── lifecycle-experimental.svg │ ├── lifecycle-stable.svg │ ├── lifecycle-superseded.svg │ ├── logo-before-hex.png │ ├── logo.png │ ├── package-functions-overview.svg │ ├── zones_barcelona_fua_plot.png │ └── zones_barcelona_plot.png ├── global_quiet_param.Rd ├── spanishoddata-package.Rd ├── spod_available_data.Rd ├── spod_available_data_v1.Rd ├── spod_available_data_v2.Rd ├── spod_available_ram.Rd ├── spod_cite.Rd ├── spod_clean_zones_v1.Rd ├── spod_clean_zones_v2.Rd ├── spod_codebook.Rd ├── spod_connect.Rd ├── spod_convert.Rd ├── spod_convert_dates_to_ranges.Rd ├── spod_dates_argument_to_dates_seq.Rd ├── spod_disconnect.Rd ├── spod_download.Rd ├── spod_download_zones_v1.Rd ├── spod_duckdb_create_province_enum.Rd ├── spod_duckdb_filter_by_dates.Rd ├── spod_duckdb_limit_resources.Rd ├── spod_duckdb_number_of_trips.Rd ├── spod_duckdb_od.Rd ├── spod_duckdb_overnight_stays.Rd ├── spod_duckdb_set_temp.Rd ├── spod_expand_dates_from_regex.Rd ├── spod_files_sizes.Rd ├── spod_get.Rd ├── spod_get_data_dir.Rd ├── spod_get_file_size_from_url.Rd ├── spod_get_hmac_secret.Rd ├── spod_get_latest_v1_file_list.Rd ├── spod_get_latest_v2_file_list.Rd ├── spod_get_temp_dir.Rd ├── spod_get_valid_dates.Rd ├── spod_get_zones.Rd ├── spod_get_zones_v1.Rd ├── spod_get_zones_v2.Rd ├── spod_graphql_valid_dates.Rd ├── spod_infer_data_v_from_dates.Rd ├── spod_is_data_version_overlaps.Rd ├── spod_match_data_type.Rd ├── spod_match_data_type_for_local_folders.Rd ├── spod_quick_get_od.Rd ├── spod_quick_get_zones.Rd ├── spod_read_sql.Rd ├── spod_request_length.Rd ├── spod_set_data_dir.Rd ├── spod_sql_where_dates.Rd ├── spod_subfolder_clean_data_cache.Rd ├── spod_subfolder_metadata_cache.Rd ├── spod_subfolder_raw_data_cache.Rd ├── spod_unique_separated_ids.Rd └── spod_zone_names_en2es.Rd ├── pkgdown ├── assets │ ├── codebooks │ │ ├── 20241024_validaciones_estudios_basicos_bigdata_v1.0_en.pdf │ │ ├── README_-_formato_ficheros_movilidad_MITMA_20201228_en.pdf │ │ ├── a3_informe_metodologico_estudio_movilidad_mitms_v8_en.pdf │ │ └── mitma_-_estudio_movilidad_covid-19_informe_metodologico_v3_en.pdf │ └── media │ │ ├── barcelona-time.gif │ │ ├── flowmapblue-animated.mp4 │ │ ├── flowmapblue-standard-time.mp4 │ │ ├── flowmapblue-standard.mp4 │ │ └── spain-folding-flows.gif └── favicon │ ├── apple-touch-icon-120x120.png │ ├── apple-touch-icon-152x152.png │ ├── apple-touch-icon-180x180.png │ ├── apple-touch-icon-60x60.png │ ├── apple-touch-icon-76x76.png │ ├── apple-touch-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── favicon-96x96.png │ ├── favicon.ico │ ├── favicon.svg │ ├── site.webmanifest │ ├── web-app-manifest-192x192.png │ └── web-app-manifest-512x512.png ├── spanishoddata.Rproj ├── tests ├── testthat.R └── testthat │ ├── test-internal_utils.R │ └── test-quick-get.R ├── tools ├── generate_package_logo.R └── meta-data-update-and-submission.R └── vignettes ├── .gitignore ├── convert.qmd ├── disaggregation.qmd ├── flowmaps-interactive.qmd ├── flowmaps-static.qmd ├── media ├── disaggregated.png ├── duckdb-parquet-csv-speed-mean-hourly-v1.svg ├── flows_plot.svg ├── flows_plot_barcelona.svg ├── mermaid-that-does-not-render.mermaid └── osm.png ├── quick-get.qmd ├── references.bib ├── v1-2020-2021-mitma-data-codebook.qmd └── v2-2022-onwards-mitma-data-codebook.qmd /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^data-raw$ 2 | ^.*\.csv$ 3 | ^.*\.csv\.gz$ 4 | README.qmd 5 | .db$ 6 | ^draft-code$ 7 | ^_pkgdown\.yml$ 8 | ^docs$ 9 | ^pkgdown$ 10 | ^\.github$ 11 | ^LICENSE\.md$ 12 | ^.*\.Rproj$ 13 | ^\.Rproj\.user$ 14 | ^private$ 15 | ^doc$ 16 | ^Meta$ 17 | ^vignettes/*_files$ 18 | ^CITATION\.cff$ 19 | ^codemeta\.json$ 20 | ^vignettes/.quarto$ 21 | ^vignettes/webmedia$ 22 | \.mp4$ 23 | \.gif$ 24 | ^tools$ 25 | ^_pkgdown\.yaml$ 26 | ^vignettes/disaggregation\.qmd$ 27 | ^vignettes/flowmaps-interactive\.qmd$ 28 | ^vignettes/flowmaps-static\.qmd$ 29 | ^man/figures/card\.png$ 30 | ^man/figures/.*flowmapblue.* 31 | ^cran-comments\.md$ 32 | ^CRAN-SUBMISSION$ 33 | -------------------------------------------------------------------------------- /.github/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | R-version 3 | depends.Rds 4 | -------------------------------------------------------------------------------- /.github/workflows/R-CMD-check.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | 9 | name: R-CMD-check 10 | 11 | permissions: read-all 12 | 13 | jobs: 14 | R-CMD-check: 15 | runs-on: ${{ matrix.config.os }} 16 | 17 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 18 | 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | config: 23 | - {os: macos-latest, r: 'release'} 24 | - {os: windows-latest, r: 'release'} 25 | - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} 26 | - {os: ubuntu-latest, r: 'release'} 27 | - {os: ubuntu-latest, r: 'oldrel-1'} 28 | 29 | env: 30 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 31 | R_KEEP_PKG_SOURCE: yes 32 | 33 | steps: 34 | - uses: actions/checkout@v4 35 | 36 | - uses: r-lib/actions/setup-pandoc@v2 37 | 38 | - uses: r-lib/actions/setup-r@v2 39 | with: 40 | r-version: ${{ matrix.config.r }} 41 | http-user-agent: ${{ matrix.config.http-user-agent }} 42 | use-public-rspm: true 43 | 44 | - uses: r-lib/actions/setup-r-dependencies@v2 45 | with: 46 | extra-packages: any::rcmdcheck 47 | needs: check 48 | 49 | - uses: r-lib/actions/check-r-package@v2 50 | with: 51 | upload-snapshots: true 52 | build_args: 'c("--no-manual", "--compact-vignettes=gs+qpdf")' 53 | -------------------------------------------------------------------------------- /.github/workflows/rostemplate-gh-pages.yaml: -------------------------------------------------------------------------------- 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help 3 | on: 4 | push: 5 | branches: [main, master] 6 | tags: ['*'] 7 | release: 8 | types: [published] 9 | 10 | name: rostemplate-gh-pages 11 | permissions: write-all 12 | 13 | jobs: 14 | rostemplate-gh-pages: 15 | runs-on: ubuntu-latest 16 | env: 17 | GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} 18 | steps: 19 | - uses: actions/checkout@v4 20 | 21 | - uses: r-lib/actions/setup-pandoc@v2 22 | 23 | - uses: r-lib/actions/setup-r@v2 24 | with: 25 | use-public-rspm: true 26 | 27 | - uses: r-lib/actions/setup-r-dependencies@v2 28 | with: 29 | extra-packages: 30 | local::. 31 | any::pkgdown 32 | ropenspain/rostemplate 33 | needs: website 34 | 35 | - name: Deploy package 36 | run: | 37 | git config --local user.name "github-actions[bot]" 38 | git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com" 39 | Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE, clean = TRUE)' 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore all gz files 2 | *.gz 3 | 4 | # Exceptions for gz files in inst/extdata 5 | !inst/extdata/*.gz 6 | 7 | movilidad.duckdb 8 | .Rhistory 9 | zonificacion_distritos* 10 | *.duckdb 11 | docs 12 | private 13 | 14 | /.quarto/ 15 | .Rproj.user 16 | inst/doc 17 | .Renviron 18 | /doc/ 19 | /Meta/ 20 | vignettes/.quarto 21 | 22 | # macOS artifacts 23 | .DS_Store 24 | ._.DS_Store 25 | **/.DS_Store 26 | **/._.DS_Store 27 | .Rprofile 28 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: spanishoddata 2 | Title: Get Spanish Origin-Destination Data 3 | Version: 0.1.1.9000 4 | Authors@R: c( 5 | person("Egor", "Kotov", , "kotov.egor@gmail.com", role = c("aut", "cre"), 6 | comment = c(ORCID = "0000-0001-6690-5345")), 7 | person("Robin", "Lovelace", , "rob00x@gmail.com", role = "aut", 8 | comment = c(ORCID = "0000-0001-5679-6536")), 9 | person("Eugeni", "Vidal-Tortosa", role = "ctb", 10 | comment = c(ORCID = "0000-0001-5199-4103")) 11 | ) 12 | Description: Gain seamless access to origin-destination (OD) data from the 13 | Spanish Ministry of Transport, hosted at 14 | . 15 | This package simplifies the management of these large datasets by 16 | providing tools to download zone boundaries, handle associated 17 | origin-destination data, and process it efficiently with the 'duckdb' 18 | database interface. Local caching minimizes repeated downloads, 19 | streamlining workflows for researchers and analysts. Extensive 20 | documentation is available at 21 | , offering 22 | guides on creating static and dynamic mobility flow visualizations and 23 | transforming large datasets into analysis-ready formats. 24 | License: MIT + file LICENSE 25 | URL: https://rOpenSpain.github.io/spanishoddata/, 26 | https://github.com/rOpenSpain/spanishoddata 27 | BugReports: https://github.com/rOpenSpain/spanishoddata/issues 28 | Depends: 29 | R (>= 4.1.0) 30 | Imports: 31 | checkmate, 32 | curl (>= 5.0.0), 33 | DBI, 34 | digest, 35 | dplyr, 36 | duckdb (>= 0.5.0), 37 | fs, 38 | glue, 39 | here, 40 | httr2, 41 | jsonlite, 42 | lifecycle, 43 | lubridate, 44 | memoise, 45 | memuse, 46 | openssl, 47 | parallelly, 48 | purrr, 49 | readr, 50 | rlang, 51 | sf, 52 | stats, 53 | stringr, 54 | tibble, 55 | xml2 56 | Suggests: 57 | flowmapblue, 58 | flowmapper (>= 0.1.2), 59 | furrr, 60 | future, 61 | hexSticker, 62 | mapSpain, 63 | quarto, 64 | remotes, 65 | scales, 66 | testthat (>= 3.0.0), 67 | tidyverse 68 | VignetteBuilder: 69 | quarto 70 | Config/Needs/website: rmarkdown 71 | Config/testthat/edition: 3 72 | Encoding: UTF-8 73 | Roxygen: list(markdown = TRUE) 74 | RoxygenNote: 7.3.2 75 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | YEAR: 2024 2 | COPYRIGHT HOLDER: spanishoddata authors 3 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2024 spanishoddata authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(spod_available_data) 4 | export(spod_cite) 5 | export(spod_codebook) 6 | export(spod_connect) 7 | export(spod_convert) 8 | export(spod_disconnect) 9 | export(spod_download) 10 | export(spod_get) 11 | export(spod_get_data_dir) 12 | export(spod_get_valid_dates) 13 | export(spod_get_zones) 14 | export(spod_quick_get_od) 15 | export(spod_quick_get_zones) 16 | export(spod_set_data_dir) 17 | importFrom(lifecycle,deprecated) 18 | importFrom(memoise,memoise) 19 | importFrom(rlang,.data) 20 | importFrom(stats,median) 21 | importFrom(utils,URLencode) 22 | importFrom(utils,vignette) 23 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # spanishoddata (development version) 2 | 3 | ## New features 4 | 5 | * `spod_quick_get_zones()` is a new function to quickly get municipality geometries to match with the data retrieved with `spod_quick_get_od()` [#163](https://github.com/rOpenSpain/spanishoddata/pull/163). Requests to get geometies are cached in memory of the current R session with `memoise` package. 6 | 7 | ## Bug fixes 8 | 9 | * `spod_quick_get_od()` is working again. We fixed it to work with the updated API of the Spanish Ministry of Transport (PR [#163](https://github.com/rOpenSpain/spanishoddata/pull/163), issue [#162](https://github.com/rOpenSpain/spanishoddata/issues/162)). It will remain experimental, as the API may change in the future. 10 | 11 | * `spod_convert()` can now accept `overwrite = 'update'` with `save_format = 'parquet'` ([#161](https://github.com/rOpenSpain/spanishoddata/pull/161)) previously it failed because of the incorrect check that asserted only `TRUE` or `FALSE` ([#160](https://github.com/rOpenSpain/spanishoddata/issues/160)) 12 | 13 | # spanishoddata 0.1.1 14 | 15 | ## New features 16 | 17 | * `spod_cite()` function to easily cite the package and the data ([#134](https://github.com/rOpenSpain/spanishoddata/pull/134)) 18 | 19 | ## Breaking changes 20 | 21 | * `hour` column is superseeded by `time_slot` column in the output of `spod_get()` and `spod_convert()`. `time_slot` is deprecated. It is still present in the tables, but will be removed in the end of 2025 but going forward please use the new `hour` column. Otherwise it is exactly the same as before, this is just a name change. (#132) 22 | 23 | ## Other changes 24 | 25 | * `spod_quick_get()` does not rely on metadata download anymore and can be used without setting the data directory with `spod_set_data_dir()` (and therefore does not cause a warning if the data directory is not set). 26 | 27 | * `hour` (ex-`time_slot`) column is now right next to the date column in the output of `spod_get()` and `spod_convert()` (#) 28 | 29 | * maximum available CPU cores check is now turned off to improve compatibility when running the package from within a container in high performance computing environments (see [#130](https://github.com/rOpenSpain/spanishoddata/issues/130) and [#140](https://github.com/rOpenSpain/spanishoddata/pull/140) for details) 30 | 31 | * minor documentation improvements and updates 32 | 33 | * minor bug fixes 34 | 35 | # spanishoddata 0.1.0 36 | 37 | * Initial CRAN submission. 38 | -------------------------------------------------------------------------------- /R/cite.R: -------------------------------------------------------------------------------- 1 | #' Cite the package and the data 2 | #' 3 | #' @param what Character vector specifying what to cite. 4 | #' Can include "package", "data", "methodology_v1", "methodology_v2", or "all". 5 | #' Default is "all". 6 | #' @param format Character vector specifying output format(s). 7 | #' Can include "text", "markdown", "bibtex", or "all". 8 | #' Default is "all". 9 | #' @return Nothing. Prints citation in plain text, markdown, BibTeX, or all formats at once to console. 10 | #' @export 11 | #' 12 | #' @examples 13 | #' # Cite everything in all formats 14 | #' \dontrun{ 15 | #' spod_cite() 16 | #' } 17 | #' 18 | #' # Cite just the package in BibTeX format 19 | #' \dontrun{ 20 | #' spod_cite(what = "package", format = "bibtex") 21 | #' } 22 | #' 23 | #' # Cite both methodologies in plain text 24 | #' \dontrun{ 25 | #' spod_cite(what = c("methodology_v1", "methodology_v2"), format = "text") 26 | #' } 27 | spod_cite <- function( 28 | what = "all", 29 | format = "all" 30 | ) { 31 | # 1. Define valid inputs 32 | valid_what <- c("all", "package", "data", "methodology_v1", "methodology_v2") 33 | valid_format <- c("all", "text", "markdown", "bibtex") 34 | 35 | # 2. Use checkmate to validate arguments 36 | checkmate::assertCharacter(what, any.missing = FALSE, min.len = 1) 37 | checkmate::assertCharacter(format, any.missing = FALSE, min.len = 1) 38 | checkmate::assertSubset(what, choices = valid_what) 39 | checkmate::assertSubset(format, choices = valid_format) 40 | 41 | # 3. Expand "all" options 42 | # If "all" is included in what, use all sources except the "all" string itself 43 | if ("all" %in% what) { 44 | what <- unique(c(what, valid_what[valid_what != "all"])) 45 | } 46 | # If "all" is included in format, use all formats except the "all" string itself 47 | if ("all" %in% format) { 48 | format <- unique(c(format, valid_format[valid_format != "all"])) 49 | } 50 | 51 | # Now remove the literal "all" from each to avoid confusion 52 | what <- setdiff(what, "all") 53 | format <- setdiff(format, "all") 54 | 55 | # 4. Get the citation object 56 | cit <- utils::citation("spanishoddata") 57 | 58 | # 5. Function to get citation by key 59 | get_citation_by_key <- function(key) { 60 | idx <- which(sapply(cit, function(x) x$key == key)) 61 | if (length(idx) > 0) return(cit[idx]) 62 | return(NULL) 63 | } 64 | 65 | # 6. Map what options to citation keys 66 | citation_keys <- list( 67 | package = "r-spanishoddata", 68 | data = "mitms_mobility_web", 69 | methodology_v1 = "mitma_methodology_2020_v3", 70 | methodology_v2 = "mitms_methodology_2022_v8" 71 | ) 72 | 73 | # 7. Collect the requested citations 74 | citations_to_show <- list() 75 | for (w in what) { 76 | key <- citation_keys[[w]] 77 | cit_entry <- get_citation_by_key(key) 78 | if (!is.null(cit_entry)) { 79 | citations_to_show[[w]] <- cit_entry 80 | } 81 | } 82 | 83 | # If nothing was found (e.g., user gave an empty vector) 84 | if (length(citations_to_show) == 0) { 85 | message("No valid citations found for the requested 'what'.") 86 | return(invisible(NULL)) 87 | } 88 | 89 | # 8. Helper functions for formatting output 90 | 91 | # Plain text 92 | format_text <- function(citation) { 93 | text <- format(citation, style = "text") 94 | # remove asterisks or underscores used for emphasis in the default text 95 | text <- gsub("\\*([^*]*)\\*", "\\1", text) 96 | text <- gsub("_([^_]*)_", "\\1", text) 97 | # Clean up URLs (remove angle brackets) 98 | text <- gsub("<(http[^>]*)>", "\\1", text) 99 | paste(text, collapse = "\n") 100 | } 101 | 102 | # Markdown 103 | format_markdown <- function(citation) { 104 | text <- format(citation, style = "text") 105 | # minimal transformation to markdown: italicize text within asterisks 106 | text <- gsub("\\*([^*]*)\\*", "_\\1_", text) 107 | # remove angle brackets around URLs 108 | text <- gsub("<(http[^>]*)>", "\\1", text) 109 | paste(text, collapse = "\n") 110 | } 111 | 112 | # 9. Print the citations in requested formats 113 | for (f in format) { 114 | if (f == "text") { 115 | cat("\nPlain text citations:\n---------------------\n") 116 | for (cit_item in citations_to_show) { 117 | cat(format_text(cit_item), "\n\n") 118 | } 119 | } else if (f == "markdown") { 120 | cat("\nMarkdown citations:\n-------------------\n") 121 | for (cit_item in citations_to_show) { 122 | cat(format_markdown(cit_item), "\n\n") 123 | } 124 | } else if (f == "bibtex") { 125 | cat("\nBibTeX citations:\n-----------------\n") 126 | for (cit_item in citations_to_show) { 127 | print(utils::toBibtex(cit_item)) 128 | cat("\n") 129 | } 130 | } 131 | } 132 | 133 | invisible(NULL) 134 | } 135 | -------------------------------------------------------------------------------- /R/codebook.R: -------------------------------------------------------------------------------- 1 | #' View codebooks for v1 and v2 open mobility data 2 | #' 3 | #' @description 4 | #' 5 | #' `r lifecycle::badge("stable")` 6 | #' 7 | #' Opens relevant vignette with a codebook for v1 (2020-2021) and v2 (2022 onwards) data or provide a webpage if vignette is missing. 8 | #' 9 | #' 10 | #' @param ver An `integer` or `numeric` value. The version of the data. Defaults to 1. Can be `1` for v1 (2020-2021) data and 2 for v2 (2022 onwards) data. 11 | #' @return Nothing, opens vignette if it is installed. If vignette is missing, prints a message with a link to a webpage with the codebook. 12 | #' @importFrom utils vignette 13 | #' @export 14 | #' @examples 15 | #' 16 | #' # View codebook for v1 (2020-2021) data 17 | #' spod_codebook(ver = 1) 18 | #' 19 | #' # View codebook for v2 (2022 onwards) data 20 | #' spod_codebook(ver = 2) 21 | #' 22 | spod_codebook <- function(ver = 1) { 23 | # Validate input 24 | checkmate::assertIntegerish(ver, max.len = 1) 25 | if (!ver %in% c(1, 2)) { 26 | stop("Invalid version number. Must be 1 (for v1 2020-2021 data) or 2 (for v2 2022 onwards).") 27 | } 28 | 29 | if (ver == 1){ 30 | help <- vignette( 31 | topic = "v1-2020-2021-mitma-data-codebook", 32 | package = "spanishoddata" 33 | ) 34 | if( inherits(help, what = "vignette") ){ 35 | return(help) 36 | } else { 37 | message("For some reason the codebook was not installed with the package. Please refer to the online version at: https://ropenspain.github.io/spanishoddata/articles/v1-2020-2021-mitma-data-codebook.html") 38 | } 39 | } else if (ver == 2) { 40 | help <- vignette( 41 | topic = "v2-2022-onwards-mitma-data-codebook", 42 | package = "spanishoddata" 43 | ) 44 | if( inherits(help, what = "vignette") ){ 45 | return(help) 46 | } else { 47 | message("For some reason the codebook was not installed with the package. Please refer to the online version at: https://ropenspain.github.io/spanishoddata/articles/v2-2022-onwards-mitma-data-codebook.html") 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /R/data-dir.R: -------------------------------------------------------------------------------- 1 | #' Set the data directory 2 | #' 3 | #' @description 4 | #' 5 | #' `r lifecycle::badge("stable")` 6 | #' 7 | #' This function sets the data directory in the environment variable SPANISH_OD_DATA_DIR, so that all other functions in the package can access the data. It also creates the directory if it doesn't exist. 8 | #' 9 | #' @param data_dir The data directory to set. 10 | #' @inheritParams global_quiet_param 11 | #' @return Nothing. If quiet is `FALSE`, prints a message with the path and confirmation that the path exists. 12 | #' @export 13 | #' @examples 14 | #' spod_set_data_dir(tempdir()) 15 | #' 16 | spod_set_data_dir <- function( 17 | data_dir, 18 | quiet = FALSE 19 | ){ 20 | checkmate::assert_character(data_dir, len = 1, null.ok = FALSE) 21 | checkmate::assert_flag(quiet) 22 | 23 | data_dir_abs_path <- fs::path_abs(data_dir) 24 | 25 | tryCatch({ 26 | # Check if the directory exists; if not, attempt to create it 27 | if (!dir.exists(data_dir_abs_path)) { 28 | if(quiet == FALSE){ 29 | message("Data directory ", data_dir_abs_path, " does not exist. Attempting to create it.") 30 | } 31 | fs::dir_create(data_dir_abs_path, recurse = TRUE) 32 | } 33 | data_dir_real_path <- fs::path_real(data_dir_abs_path) 34 | # Check for write permissions 35 | test_file <- fs::path(data_dir_real_path, ".test_write") 36 | file.create(test_file) 37 | fs::file_delete(test_file) 38 | if(quiet == FALSE){ 39 | message("Data directory is writeable.") 40 | } 41 | 42 | # Set the environment variable 43 | Sys.setenv(SPANISH_OD_DATA_DIR = data_dir_real_path) 44 | 45 | if(quiet == FALSE){ 46 | message("Data directory successfully set to: ", data_dir_real_path) 47 | } 48 | }, error = function(e) { 49 | message("Error: Unable to create or access the directory at '", data_dir_abs_path, "'.") 50 | message("This may be due to write access restrictions or system permissions issues.") 51 | message("Please verify that you have write permissions for the specified path and try again.") 52 | stop(e) # Re-throw the error for debugging purposes, if needed 53 | }) 54 | 55 | return(invisible(TRUE)) 56 | } 57 | 58 | #' Get the data directory 59 | #' 60 | #' @description 61 | #' 62 | #' `r lifecycle::badge("stable")` 63 | #' 64 | #' This function retrieves the data directory from the environment variable SPANISH_OD_DATA_DIR. 65 | #' If the environment variable is not set, it returns the temporary directory. 66 | #' @inheritParams global_quiet_param 67 | #' @return A `character` vector of length 1 containing the path to the data directory where the package will download and convert the data. 68 | #' @export 69 | #' @examples 70 | #' spod_set_data_dir(tempdir()) 71 | #' spod_get_data_dir() 72 | #' 73 | spod_get_data_dir <- function(quiet = FALSE) { 74 | checkmate::assert_flag(quiet) 75 | data_dir_env <- Sys.getenv("SPANISH_OD_DATA_DIR") 76 | if (data_dir_env == "") { 77 | if (isFALSE(quiet)) warning("Warning: SPANISH_OD_DATA_DIR is not set. Using the temporary directory, which is not recommended, as the data will be deleted when the session ends.\n\n To set the data directory, use `Sys.setenv(SPANISH_OD_DATA_DIR = '/path/to/data')` or set SPANISH_OD_DATA_DIR permanently in the environment by editing the `.Renviron` file locally for current project with `usethis::edit_r_environ('project')` or `file.edit('.Renviron')` or globally for all projects with `usethis::edit_r_environ('user')` or `file.edit('~/.Renviron')`.") 78 | data_dir_env <- tempdir() # if not set, use the temp directory 79 | } 80 | # check if dir exists and create it if it doesn't 81 | data_dir_env_abs <- fs::path_abs(data_dir_env) 82 | if (!dir.exists(data_dir_env_abs)) { 83 | fs::dir_create(data_dir_env_abs, recurse = TRUE) 84 | } 85 | data_dir_env_real <- fs::path_real(data_dir_env_abs) 86 | return(data_dir_env_real) 87 | } 88 | -------------------------------------------------------------------------------- /R/dev-tools.R: -------------------------------------------------------------------------------- 1 | # This file is for internal functions that update some of the package internal packaged data. These functions are not intended neither to be used by the user nor to be used in any of the package functions. 2 | 3 | #' Get files sizes for remote files of v1 and v2 data and save them into a csv.gz file in the inst/extdata folder. 4 | #' @param ver The version of the data (1 or 2). Can be both. Defaults to 2, as v1 data is not being updated since 2021. 5 | #' @return Nothing. Only saves a csv.gz file with up to date file sizes in the inst/extdata folder. 6 | #' 7 | #' @keywords internal 8 | #' 9 | spod_files_sizes <- function(ver = 2) { 10 | data_dir <- spod_get_data_dir() 11 | 12 | if (any(ver %in% 1)){ 13 | v1 <- spod_available_data(1) 14 | 15 | # takes about 1 minute 16 | future::plan(future::multisession, workers = 6) 17 | v1$remote_file_size_mb <- furrr::future_map_dbl( 18 | .x = v1$target_url, 19 | .f = ~ spod_get_file_size_from_url(x_url = .x), 20 | .progress = TRUE 21 | ) 22 | future::plan(future::sequential) 23 | 24 | v1_url_file_sizes <- v1[, c("target_url", "remote_file_size_mb")] 25 | readr::write_csv( 26 | x = v1_url_file_sizes, 27 | file = "inst/extdata/url_file_sizes_v1.txt.gz" 28 | ) 29 | } 30 | 31 | if (any(ver %in% 2)){ 32 | v2 <- spod_available_data(2) 33 | if(all(v2$size_imputed == FALSE)){ 34 | stop("all file sizes are known") 35 | } 36 | v2_known_size <- v2[v2$size_imputed == FALSE, ] 37 | v2_unknown_size <- v2[v2$size_imputed == TRUE, ] 38 | 39 | # takes about 5 minutes on full data set, but less when only updating the previously uknown files 40 | future::plan(future::multisession, workers = 6) 41 | v2_unknown_size$remote_file_size_mb <- furrr::future_map_dbl( 42 | .x = v2_unknown_size$target_url, 43 | .f = ~ spod_get_file_size_from_url(x_url = .x), 44 | .progress = TRUE 45 | ) 46 | future::plan(future::sequential) 47 | 48 | v2_combined <- dplyr::bind_rows(v2_known_size, v2_unknown_size) 49 | v2_url_file_sizes <- v2_combined[, c("target_url", "remote_file_size_mb")] 50 | readr::write_csv( 51 | x = v2_url_file_sizes, 52 | file = "inst/extdata/url_file_sizes_v2.txt.gz" 53 | ) 54 | } 55 | 56 | } 57 | 58 | 59 | #' Get file size from URL 60 | #' @param x_url URL 61 | #' @return File size in MB 62 | #' @importFrom utils URLencode 63 | #' @keywords internal 64 | spod_get_file_size_from_url <- function(x_url){ 65 | 66 | url <- utils::URLencode(x_url) 67 | headers <- curlGetHeaders(url) 68 | content_length_line <- grep("Content-Length", headers, value = TRUE) 69 | content_length_value <- sub("Content-Length:\\s*(\\d+).*", "\\1", content_length_line) 70 | 71 | # Convert bytes to MB (1 MB = 1024 * 1024 bytes) 72 | file_size_mb <- as.numeric(content_length_value) / (1024 * 1024) 73 | 74 | return(file_size_mb) 75 | } 76 | -------------------------------------------------------------------------------- /R/disconnect.R: -------------------------------------------------------------------------------- 1 | #' Safely disconnect from data and free memory 2 | #' 3 | #' @description 4 | #' 5 | #' `r lifecycle::badge("stable")` 6 | #' 7 | #' This function is to ensure that `DuckDB` connections to CSV.gz files (created via `spod_get()`), as well as to `DuckDB` files or folders of `parquet` files (created via `spod_convert()`) are closed properly to prevent conflicting connections. Essentially this is just a wrapper around `DBI::dbDisconnect()` that reaches out into the `.$src$con` object of the `tbl_duckdb_connection` connection object that is returned to the user via `spod_get()` and `spod_connect()`. After disonnecting the database, it also frees up memory by running `gc()`. 8 | #' @param tbl_con A `tbl_duckdb_connection` connection object that you get from either `spod_get()` or `spod_connect()`. 9 | #' @param free_mem A `logical`. Whether to free up memory by running `gc()`. Defaults to `TRUE`. 10 | #' @return No return value, called for side effect of disconnecting from the database and freeing up memory. 11 | #' @export 12 | #' @examplesIf interactive() 13 | #' \donttest{ 14 | #' # Set data dir for file downloads 15 | #' spod_set_data_dir(tempdir()) 16 | #' 17 | #' # basic example 18 | #' # create a connection to the v1 data without converting 19 | #' # this creates a duckdb database connection to CSV files 20 | #' od_distr <- spod_get( 21 | #' "od", 22 | #' zones = "distr", 23 | #' dates = c("2020-03-01", "2020-03-02") 24 | #' ) 25 | #' # disconnect from the database connection 26 | #' spod_disconnect(od_distr) 27 | #' 28 | #' # Advanced example 29 | #' # download and convert data 30 | #' dates_1 <- c(start = "2020-02-17", end = "2020-02-19") 31 | #' db_2 <- spod_convert( 32 | #' type = "od", 33 | #' zones = "distr", 34 | #' dates = dates_1, 35 | #' overwrite = TRUE 36 | #' ) 37 | #' 38 | #' # now connect to the converted data 39 | #' my_od_data_2 <- spod_connect(db_2) 40 | #' 41 | #' # disconnect from the database 42 | #' spod_disconnect(my_od_data_2) 43 | #' } 44 | #' 45 | spod_disconnect <- function( 46 | tbl_con, 47 | free_mem = TRUE 48 | ) { 49 | # Validate imputs 50 | checkmate::assert_class(tbl_con, "tbl_duckdb_connection") 51 | checkmate::assert_flag(free_mem) 52 | 53 | DBI::dbDisconnect(tbl_con$src$con, shutdown = TRUE) 54 | if (free_mem){ 55 | gc() 56 | } 57 | return(invisible(NULL)) 58 | } 59 | -------------------------------------------------------------------------------- /R/folders.R: -------------------------------------------------------------------------------- 1 | #' Get metadata cache subfolder name 2 | #' 3 | #' Change subfolder name in the code of this function for metadata cache here to apply globally, as all functions in the package should use this function to get the metadata cache path. 4 | #' @return A `character` string with the subfolder name for the raw data cache. 5 | #' @keywords internal 6 | spod_subfolder_metadata_cache <- function() { 7 | "metadata_cache" 8 | } 9 | 10 | #' Get raw data cache subfolder name 11 | #' 12 | #' Change subfolder name in the code of this function for raw data cache here to apply globally, as all functions in the package should use this function to get the raw data cache path. 13 | #' @inheritParams spod_available_data 14 | #' @return A `character` string with the subfolder name for the raw data cache. 15 | #' @keywords internal 16 | spod_subfolder_raw_data_cache <- function(ver = 1) { 17 | ver <- as.integer(ver) 18 | if (!ver %in% c(1, 2)) { 19 | stop("Invalid version number. Must be 1 or 2.") 20 | } 21 | base_subdir_name <- "raw_data_cache" 22 | return(paste0(base_subdir_name, "/v", ver, "/")) 23 | } 24 | 25 | #' Get clean data subfolder name 26 | #' 27 | #' Change subfolder name in the code of this function for clean data cache here to apply globally, as all functions in the package should use this function to get the clean data cache path. 28 | #' @inheritParams spod_available_data 29 | #' @return A `character` string with the subfolder name for the clean data cache. 30 | #' @keywords internal 31 | spod_subfolder_clean_data_cache <- function(ver = 1) { 32 | ver <- as.integer(ver) 33 | if (!ver %in% c(1, 2)) { 34 | stop("Invalid version number. Must be 1 or 2.") 35 | } 36 | base_subdir_name <- "clean_data" 37 | return(paste0(base_subdir_name, "/v", ver, "/")) 38 | } 39 | 40 | #' Get temporary directory for DuckDB intermediate spilling 41 | #' 42 | #' @description 43 | #' Get the The path to the temp folder for DuckDB for \href{https://duckdb.org/2024/07/09/memory-management.html#intermediate-spilling}{intermediate spilling} in case the set memory limit and/or physical memory of the computer is too low to perform the query. 44 | #' @param data_dir The directory where the data is stored. Defaults to the value returned by `spod_get_data_dir()`. 45 | #' @return A `character` string with the path to the temp folder for `DuckDB` for \href{https://duckdb.org/2024/07/09/memory-management.html#intermediate-spilling}{intermediate spilling}. 46 | #' @keywords internal 47 | spod_get_temp_dir <- function( 48 | data_dir = spod_get_data_dir() 49 | ) { 50 | temp_dir <- fs::path(data_dir, "temp") 51 | if (!dir.exists(temp_dir)) { 52 | fs::dir_create(temp_dir) 53 | } 54 | return(temp_dir) 55 | } 56 | -------------------------------------------------------------------------------- /R/global-params.R: -------------------------------------------------------------------------------- 1 | #' @title Global Quiet Parameter 2 | #' 3 | #' @description 4 | #' Documentation for the `quiet` parameter, used globally. 5 | #' 6 | #' @param quiet A `logical` value indicating whether to suppress messages. Default is `FALSE`. 7 | #' @return Nothing. This function is just a placeholder for global quiet parameter. 8 | #' @keywords internal 9 | global_quiet_param <- function(quiet = FALSE){ 10 | # this is just a placeholder for global quiet parameter 11 | } 12 | -------------------------------------------------------------------------------- /R/onload.R: -------------------------------------------------------------------------------- 1 | .onLoad <- function(libname, pkgname) { 2 | op <- options() 3 | op.spanishoddata <- list( 4 | spanishoddata.graphql_api_endpoint = "https://mapas-movilidad.transportes.gob.es/api/graphql", 5 | spanishoddata.user_agent = "spanishoddata R package, https://github.com/rOpenSpain/spanishoddata/" 6 | ) 7 | toset <- !(names(op.spanishoddata) %in% names(op)) 8 | if (any(toset)) options(op.spanishoddata[toset]) 9 | 10 | invisible() 11 | } 12 | -------------------------------------------------------------------------------- /R/spanishoddata-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | ## usethis namespace: start 5 | #' @importFrom lifecycle deprecated 6 | ## usethis namespace: end 7 | NULL 8 | -------------------------------------------------------------------------------- /_pkgdown.yml: -------------------------------------------------------------------------------- 1 | url: https://rOpenSpain.github.io/spanishoddata/ 2 | template: 3 | bootstrap: 5 4 | package: rostemplate 5 | 6 | opengraph: 7 | image: 8 | src: man/figures/card.png 9 | 10 | authors: 11 | Egor Kotov: 12 | href: "https://www.ekotov.pro" 13 | Robin Lovelace: 14 | href: "https://www.robinlovelace.net/" 15 | 16 | articles: 17 | - title: Documentation 18 | navbar: ~ 19 | contents: 20 | - v1-2020-2021-mitma-data-codebook 21 | - v2-2022-onwards-mitma-data-codebook 22 | - quick-get 23 | - convert 24 | - disaggregation 25 | - flowmaps-static 26 | - flowmaps-interactive 27 | 28 | navbar: 29 | structure: 30 | left: [intro, reference, articles, tutorials, news] 31 | right: [search, github, lightswitch] 32 | 33 | reference: 34 | - title: "Analysing up to 1 week of data" 35 | desc: > 36 | Quickly download and analyse just a few days of mobility data 37 | contents: 38 | - spod_available_data 39 | - spod_get_zones 40 | - spod_get 41 | - spod_disconnect 42 | - title: "Analysing long time periods (months or even years)" 43 | desc: > 44 | Download data for longer periods, convert them into analysis ready format such as `DuckDB` or `Parquet` for out-of-memory analysis of this large data 45 | contents: 46 | - spod_available_data 47 | - spod_get_zones 48 | - spod_download 49 | - spod_convert 50 | - spod_connect 51 | - spod_disconnect 52 | - title: "Analysing up to 1 day of trips with no extra variables" 53 | desc: > 54 | Quickly get a single day of flows between municipalities (without hourly data or any other attributes) for 2022 and onwards 55 | contents: 56 | - spod_quick_get_od 57 | - spod_quick_get_zones 58 | - title: "Helper functions" 59 | contents: 60 | - spod_codebook 61 | - spod_available_data 62 | - spod_get_valid_dates 63 | - spod_set_data_dir 64 | - spod_get_data_dir 65 | - spod_cite 66 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | 2 | ## R CMD check results 3 | 4 | 0 errors | 0 warnings | 1 note 5 | 6 | * Introduced the dependency on R >= 4.1.0 to address the NOTE: "Missing dependency on R >= 4.1.0 because package code uses the pipe" 7 | 8 | * The links to www.ine.es and www.transportes.gob.es give false positives with 403, while in fact they work just fine in the web browser. It must be some bot protection of the Spanish websites. 9 | -------------------------------------------------------------------------------- /inst/CITATION: -------------------------------------------------------------------------------- 1 | c( 2 | bibentry( 3 | header = "To cite the `spanishoddata` R package use:", 4 | key = "r-spanishoddata", 5 | bibtype = "Manual", 6 | title = "spanishoddata", 7 | author = c( 8 | person("Egor", "Kotov", , "kotov.egor@gmail.com", role = c("aut", "cre"), 9 | comment = c(ORCID = "0000-0001-6690-5345")), 10 | person("Robin", "Lovelace", , "rob00x@gmail.com", role = "aut", 11 | comment = c(ORCID = "0000-0001-5679-6536")), 12 | person("Eugeni", "Vidal-Tortosa", role = "ctb", 13 | comment = c(ORCID = "0000-0001-5199-4103")) 14 | ), 15 | year = "2024", 16 | url = "https://github.com/rOpenSpain/spanishoddata", 17 | doi = "10.32614/CRAN.package.spanishoddata" 18 | ), 19 | 20 | 21 | bibentry( 22 | header = "To cite the official website of the mobility study use:", 23 | key = "mitms_mobility_web", 24 | bibtype = "Misc", 25 | title = "Estudio de la movilidad con Big Data (Study of mobility with Big Data)", 26 | author = person("Ministerio de Transportes y Movilidad Sostenible (MITMS)"), 27 | year = "2024", 28 | url = "https://www.transportes.gob.es/ministerio/proyectos-singulares/estudio-de-movilidad-con-big-data", 29 | ), 30 | 31 | bibentry( 32 | header = "To cite the methodology for 2022 and onwards data use:", 33 | key = "mitms_methodology_2022_v8", 34 | bibtype = "Manual", 35 | title = "Estudio de movilidad de viajeros de ámbito nacional aplicando la tecnología Big Data. Informe metodológico (Study of National Traveler mobility Using Big Data Technology. Methodological Report)", 36 | author = person("Ministerio de Transportes y Movilidad Sostenible (MITMS)"), 37 | year = "2024", 38 | url = "https://www.transportes.gob.es/recursos_mfom/paginabasica/recursos/a3_informe_metodologico_estudio_movilidad_mitms_v8.pdf", 39 | ), 40 | 41 | bibentry( 42 | header = "To cite the methodology for 2020-2021 data use:", 43 | key = "mitma_methodology_2020_v3", 44 | bibtype = "Manual", 45 | title = "Análisis de la movilidad en España con tecnología Big Data durante el estado de alarma para la gestión de la crisis del COVID-19 (Analysis of mobility in Spain with Big Data technology during the state of alarm for COVID-19 crisis management)", 46 | author = person("Ministerio de Transportes, Movilidad y Agenda Urbana (MITMA)"), 47 | year = "2021", 48 | url = "https://cdn.mitma.gob.es/portal-web-drupal/covid-19/bigdata/mitma_-_estudio_movilidad_covid-19_informe_metodologico_v3.pdf", 49 | ) 50 | ) 51 | 52 | citFooter("See package website for more details: https://ropenspain.github.io/spanishoddata/") 53 | -------------------------------------------------------------------------------- /inst/extdata/data_links_v1_2024-08-07.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/inst/extdata/data_links_v1_2024-08-07.xml.gz -------------------------------------------------------------------------------- /inst/extdata/data_links_v2_2024-08-07.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/inst/extdata/data_links_v2_2024-08-07.xml.gz -------------------------------------------------------------------------------- /inst/extdata/muni_v2_ref.rds: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/inst/extdata/muni_v2_ref.rds -------------------------------------------------------------------------------- /inst/extdata/sql-queries/province_names_enum.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE INE_PROV_NAME_ENUM AS ENUM ( 2 | 'UNDEFINED', -- so that ENUM code is 0 for UNDEFINED. Thanks to that 3 | /* The provinces below are listed in such order, so that ENUM codes for them will match with the official INE codes, e.g. Alicante/Alacant will match with internal ENUM code 3, and INE code for it is "03" */ 4 | 'Araba/Álava', 5 | 'Albacete', 6 | 'Alicante/Alacant', 7 | 'Almería', 8 | 'Ávila', 9 | 'Badajoz', 10 | 'Balears, Illes', 11 | 'Barcelona', 12 | 'Burgos', 13 | 'Cáceres', 14 | 'Cádiz', 15 | 'Castellón/Castelló', 16 | 'Ciudad Real', 17 | 'Córdoba', 18 | 'Coruña, A', 19 | 'Cuenca', 20 | 'Girona', 21 | 'Granada', 22 | 'Guadalajara', 23 | 'Gipuzkoa', 24 | 'Huelva', 25 | 'Huesca', 26 | 'Jaén', 27 | 'León', 28 | 'Lleida', 29 | 'Rioja, La', 30 | 'Lugo', 31 | 'Madrid', 32 | 'Málaga', 33 | 'Murcia', 34 | 'Navarra', 35 | 'Ourense', 36 | 'Asturias', 37 | 'Palencia', 38 | 'Palmas, Las', 39 | 'Pontevedra', 40 | 'Salamanca', 41 | 'Santa Cruz de Tenerife', 42 | 'Cantabria', 43 | 'Segovia', 44 | 'Sevilla', 45 | 'Soria', 46 | 'Tarragona', 47 | 'Teruel', 48 | 'Toledo', 49 | 'Valencia/València', 50 | 'Valladolid', 51 | 'Bizkaia', 52 | 'Zamora', 53 | 'Zaragoza', 54 | 'Ceuta', 55 | 'Melilla' 56 | ); 57 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-nt-distritos-clean-csv-view-en.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW nt_csv_clean AS SELECT 2 | fecha AS date, 3 | CAST (CASE distrito 4 | WHEN 'externo' THEN 'external' 5 | ELSE distrito 6 | END AS ZONES_ENUM) 7 | AS id, 8 | CAST(numero_viajes AS N_TRIPS_ENUM) AS n_trips, 9 | personas AS n_persons, 10 | CAST(year AS INTEGER) AS year, 11 | CAST(month AS INTEGER) AS month, 12 | CAST(day AS INTEGER) AS day 13 | FROM nt_csv_raw; 14 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-nt-distritos-clean-csv-view-es.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW nt_csv_clean AS SELECT 2 | fecha, 3 | CAST (distrito AS ZONES_ENUM) AS distrito, 4 | CAST(numero_viajes AS N_TRIPS_ENUM) AS numero_viajes, 5 | personas, 6 | CAST(year AS INTEGER) AS ano, 7 | CAST(month AS INTEGER) AS mes, 8 | CAST(day AS INTEGER) AS dia 9 | FROM nt_csv_raw; 10 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-nt-distritos-raw-csv-view.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW nt_csv_raw AS SELECT * 2 | /* csv_folder needs to be replaced with a valid path 3 | in R use glue::glue() */ 4 | FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE, 5 | columns={{ 6 | 'fecha': 'DATE', 7 | 'distrito': 'VARCHAR', 8 | 'numero_viajes': 'VARCHAR', 9 | 'personas': 'DOUBLE' 10 | }}, 11 | dateformat='%Y%m%d'); 12 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-nt-enum-ntrips.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE N_TRIPS_ENUM AS ENUM ('0', '1', '2', '2+'); 2 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-nt-municipios-clean-csv-view-en.sql: -------------------------------------------------------------------------------- 1 | -- Create the relationships view from the relaciones_distrito_mitma.csv 2 | CREATE VIEW relations_districts_municipalities AS 3 | SELECT 4 | distrito_mitma, 5 | municipio_mitma 6 | FROM 7 | read_csv_auto('{relations_districts_municipalities}', 8 | delim = '|', 9 | columns={{ 10 | 'distrito': 'VARCHAR', 11 | 'distrito_mitma': 'VARCHAR', 12 | 'municipio_mitma': 'VARCHAR' 13 | }} 14 | ); 15 | 16 | -- Create the nt_csv_clean view with the necessary joins, recoding, and aggregation 17 | CREATE VIEW nt_csv_clean AS 18 | SELECT 19 | d.fecha AS date, 20 | CAST(CASE r.municipio_mitma 21 | WHEN 'externo' THEN 'external' 22 | ELSE r.municipio_mitma 23 | END AS ZONES_ENUM) AS id, 24 | CAST(d.numero_viajes AS N_TRIPS_ENUM) AS n_trips, 25 | SUM(d.personas) AS n_persons, 26 | CAST(d.year AS INTEGER) AS year, 27 | CAST(d.month AS INTEGER) AS month, 28 | CAST(d.day AS INTEGER) AS day 29 | FROM 30 | nt_csv_raw d 31 | LEFT JOIN 32 | relations_districts_municipalities r ON d.distrito = r.distrito_mitma 33 | GROUP BY 34 | d.fecha, 35 | r.municipio_mitma, 36 | d.numero_viajes, 37 | d.year, 38 | d.month, 39 | d.day; 40 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-nt-municipios-clean-csv-view-es.sql: -------------------------------------------------------------------------------- 1 | -- Create the relationships view from the relaciones_distrito_mitma.csv 2 | CREATE VIEW relations_districts_municipalities AS 3 | SELECT 4 | distrito_mitma, 5 | municipio_mitma 6 | FROM 7 | read_csv_auto('{relations_districts_municipalities}', 8 | delim = '|', 9 | columns={{ 10 | 'distrito': 'VARCHAR', 11 | 'distrito_mitma': 'VARCHAR', 12 | 'municipio_mitma': 'VARCHAR' 13 | }} 14 | ); 15 | 16 | -- Create the nt_csv_clean view with the necessary joins, recoding, and aggregation 17 | CREATE VIEW nt_csv_clean AS 18 | SELECT 19 | d.fecha AS fecha, 20 | CAST(r.municipio_mitma AS ZONES_ENUM) AS municipio_mitma, 21 | CAST(d.numero_viajes AS N_TRIPS_ENUM) AS numero_viajes, 22 | SUM(d.personas) AS personas, 23 | CAST(d.year AS INTEGER) AS ano, 24 | CAST(d.month AS INTEGER) AS mes, 25 | CAST(d.day AS INTEGER) AS dia 26 | FROM 27 | nt_csv_raw d 28 | LEFT JOIN 29 | relations_districts_municipalities r ON d.distrito = r.distrito_mitma 30 | GROUP BY 31 | d.fecha, 32 | r.municipio_mitma, 33 | d.numero_viajes, 34 | d.year, 35 | d.month, 36 | d.day; 37 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-nt-municipios-raw-csv-view.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW nt_csv_raw AS SELECT * 2 | /* csv_folder needs to be replaced with a valid path 3 | in R use glue::glue() */ 4 | FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE, 5 | columns={{ 6 | 'fecha': 'DATE', 7 | 'distrito': 'VARCHAR', 8 | 'numero_viajes': 'VARCHAR', 9 | 'personas': 'DOUBLE' 10 | }}, 11 | dateformat='%Y%m%d'); 12 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-od-distritos-clean-csv-view-en.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW od_csv_clean AS SELECT 2 | fecha AS date, 3 | periodo AS hour, 4 | CAST (CASE origen 5 | WHEN 'externo' THEN 'external' 6 | ELSE origen 7 | END AS ZONES_ENUM) 8 | AS id_origin, 9 | CAST (CASE destino 10 | WHEN 'externo' THEN 'external' 11 | ELSE destino 12 | END AS ZONES_ENUM) 13 | AS id_destination, 14 | CAST(distancia AS DISTANCE_ENUM) AS distance, 15 | CAST(CASE actividad_origen 16 | WHEN 'casa' THEN 'home' 17 | WHEN 'otros' THEN 'other' 18 | WHEN 'trabajo_estudio' THEN 'work_or_study' 19 | END AS ACTIV_ENUM) AS activity_origin, 20 | CAST(CASE actividad_destino 21 | WHEN 'casa' THEN 'home' 22 | WHEN 'otros' THEN 'other' 23 | WHEN 'trabajo_estudio' THEN 'work_or_study' 24 | END AS ACTIV_ENUM) AS activity_destination, 25 | CAST(residencia AS INE_PROV_CODE_ENUM) AS residence_province_ine_code, 26 | CAST (CASE residencia 27 | WHEN '01' THEN 'Araba/Álava' 28 | WHEN '02' THEN 'Albacete' 29 | WHEN '03' THEN 'Alicante/Alacant' 30 | WHEN '04' THEN 'Almería' 31 | WHEN '05' THEN 'Ávila' 32 | WHEN '06' THEN 'Badajoz' 33 | WHEN '07' THEN 'Balears, Illes' 34 | WHEN '08' THEN 'Barcelona' 35 | WHEN '09' THEN 'Burgos' 36 | WHEN '10' THEN 'Cáceres' 37 | WHEN '11' THEN 'Cádiz' 38 | WHEN '12' THEN 'Castellón/Castelló' 39 | WHEN '13' THEN 'Ciudad Real' 40 | WHEN '14' THEN 'Córdoba' 41 | WHEN '15' THEN 'Coruña, A' 42 | WHEN '16' THEN 'Cuenca' 43 | WHEN '17' THEN 'Girona' 44 | WHEN '18' THEN 'Granada' 45 | WHEN '19' THEN 'Guadalajara' 46 | WHEN '20' THEN 'Gipuzkoa' 47 | WHEN '21' THEN 'Huelva' 48 | WHEN '22' THEN 'Huesca' 49 | WHEN '23' THEN 'Jaén' 50 | WHEN '24' THEN 'León' 51 | WHEN '25' THEN 'Lleida' 52 | WHEN '26' THEN 'Rioja, La' 53 | WHEN '27' THEN 'Lugo' 54 | WHEN '28' THEN 'Madrid' 55 | WHEN '29' THEN 'Málaga' 56 | WHEN '30' THEN 'Murcia' 57 | WHEN '31' THEN 'Navarra' 58 | WHEN '32' THEN 'Ourense' 59 | WHEN '33' THEN 'Asturias' 60 | WHEN '34' THEN 'Palencia' 61 | WHEN '35' THEN 'Palmas, Las' 62 | WHEN '36' THEN 'Pontevedra' 63 | WHEN '37' THEN 'Salamanca' 64 | WHEN '38' THEN 'Santa Cruz de Tenerife' 65 | WHEN '39' THEN 'Cantabria' 66 | WHEN '40' THEN 'Segovia' 67 | WHEN '41' THEN 'Sevilla' 68 | WHEN '42' THEN 'Soria' 69 | WHEN '43' THEN 'Tarragona' 70 | WHEN '44' THEN 'Teruel' 71 | WHEN '45' THEN 'Toledo' 72 | WHEN '46' THEN 'Valencia/València' 73 | WHEN '47' THEN 'Valladolid' 74 | WHEN '48' THEN 'Bizkaia' 75 | WHEN '49' THEN 'Zamora' 76 | WHEN '50' THEN 'Zaragoza' 77 | WHEN '51' THEN 'Ceuta' 78 | WHEN '52' THEN 'Melilla' 79 | END AS INE_PROV_NAME_ENUM) AS residence_province_name, 80 | viajes AS n_trips, 81 | viajes_km AS trips_total_length_km, 82 | CAST(year AS INTEGER) AS year, 83 | CAST(month AS INTEGER) AS month, 84 | CAST(day AS INTEGER) AS day, 85 | periodo AS time_slot 86 | FROM od_csv_raw; 87 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-od-distritos-clean-csv-view-es.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW od_csv_clean AS SELECT 2 | fecha AS date, 3 | periodo, 4 | CAST(origen AS ZONES_ENUM) AS origen, 5 | CAST(destino AS ZONES_ENUM) AS destino, 6 | CAST(CASE actividad_origen 7 | WHEN 'casa' THEN 'home' 8 | WHEN 'otros' THEN 'other' 9 | WHEN 'trabajo_estudio' THEN 'work_or_study' 10 | END AS ACTIV_ENUM) AS actividad_origen, 11 | CAST(CASE actividad_destino 12 | WHEN 'casa' THEN 'home' 13 | WHEN 'otros' THEN 'other' 14 | WHEN 'trabajo_estudio' THEN 'work_or_study' 15 | END AS ACTIV_ENUM) AS actividad_destino, 16 | CAST(distancia AS DISTANCE_ENUM) AS distancia, 17 | CAST(residencia AS INE_PROV_CODE_ENUM) AS residencia, 18 | CAST (CASE residencia 19 | WHEN '01' THEN 'Araba/Álava' 20 | WHEN '02' THEN 'Albacete' 21 | WHEN '03' THEN 'Alicante/Alacant' 22 | WHEN '04' THEN 'Almería' 23 | WHEN '05' THEN 'Ávila' 24 | WHEN '06' THEN 'Badajoz' 25 | WHEN '07' THEN 'Balears, Illes' 26 | WHEN '08' THEN 'Barcelona' 27 | WHEN '09' THEN 'Burgos' 28 | WHEN '10' THEN 'Cáceres' 29 | WHEN '11' THEN 'Cádiz' 30 | WHEN '12' THEN 'Castellón/Castelló' 31 | WHEN '13' THEN 'Ciudad Real' 32 | WHEN '14' THEN 'Córdoba' 33 | WHEN '15' THEN 'Coruña, A' 34 | WHEN '16' THEN 'Cuenca' 35 | WHEN '17' THEN 'Girona' 36 | WHEN '18' THEN 'Granada' 37 | WHEN '19' THEN 'Guadalajara' 38 | WHEN '20' THEN 'Gipuzkoa' 39 | WHEN '21' THEN 'Huelva' 40 | WHEN '22' THEN 'Huesca' 41 | WHEN '23' THEN 'Jaén' 42 | WHEN '24' THEN 'León' 43 | WHEN '25' THEN 'Lleida' 44 | WHEN '26' THEN 'Rioja, La' 45 | WHEN '27' THEN 'Lugo' 46 | WHEN '28' THEN 'Madrid' 47 | WHEN '29' THEN 'Málaga' 48 | WHEN '30' THEN 'Murcia' 49 | WHEN '31' THEN 'Navarra' 50 | WHEN '32' THEN 'Ourense' 51 | WHEN '33' THEN 'Asturias' 52 | WHEN '34' THEN 'Palencia' 53 | WHEN '35' THEN 'Palmas, Las' 54 | WHEN '36' THEN 'Pontevedra' 55 | WHEN '37' THEN 'Salamanca' 56 | WHEN '38' THEN 'Santa Cruz de Tenerife' 57 | WHEN '39' THEN 'Cantabria' 58 | WHEN '40' THEN 'Segovia' 59 | WHEN '41' THEN 'Sevilla' 60 | WHEN '42' THEN 'Soria' 61 | WHEN '43' THEN 'Tarragona' 62 | WHEN '44' THEN 'Teruel' 63 | WHEN '45' THEN 'Toledo' 64 | WHEN '46' THEN 'Valencia/València' 65 | WHEN '47' THEN 'Valladolid' 66 | WHEN '48' THEN 'Bizkaia' 67 | WHEN '49' THEN 'Zamora' 68 | WHEN '50' THEN 'Zaragoza' 69 | WHEN '51' THEN 'Ceuta' 70 | WHEN '52' THEN 'Melilla' 71 | END AS INE_PROV_NAME_ENUM) AS residencia_nombre, 72 | viajes, 73 | viajes_km, 74 | CAST(year AS INTEGER) AS ano, 75 | CAST(month AS INTEGER) AS mes, 76 | CAST(day AS INTEGER) AS dia 77 | FROM od_csv_raw; 78 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-od-distritos-raw-csv-view.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW od_csv_raw AS SELECT * 2 | /* csv_folder needs to be replaced with a valid path 3 | in R use glue::glue() */ 4 | FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE, 5 | columns={{ 6 | 'fecha': 'DATE', 7 | 'origen': 'VARCHAR', 8 | 'destino': 'VARCHAR', 9 | 'actividad_origen': 'VARCHAR', 10 | 'actividad_destino': 'VARCHAR', 11 | 'residencia': 'VARCHAR', 12 | 'edad': 'VARCHAR', 13 | 'periodo': 'INTEGER', 14 | 'distancia': 'VARCHAR', 15 | 'viajes': 'DOUBLE', 16 | 'viajes_km': 'DOUBLE' 17 | }}, 18 | dateformat='%Y%m%d'); 19 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-od-enum-activity-en.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE ACTIV_ENUM AS ENUM ( 2 | 'home', 3 | 'work_or_study', 4 | 'other'); 5 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-od-enum-activity-es.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE ACTIV_ENUM AS ENUM ( 2 | 'casa', 3 | 'trabajo_estudio', 4 | 'otros'); 5 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-od-enum-distance.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE DISTANCE_ENUM AS ENUM ( 2 | '0005-002', 3 | '002-005', 4 | '005-010', 5 | '010-050', 6 | '050-100', 7 | '100+'); 8 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-od-municipios-clean-csv-view-en.sql: -------------------------------------------------------------------------------- 1 | -- Create the relationships view from the relaciones_distrito_mitma.csv 2 | CREATE VIEW relations_districts_municipalities AS 3 | SELECT 4 | distrito_mitma, 5 | municipio_mitma 6 | FROM 7 | read_csv_auto('{relations_districts_municipalities}', 8 | delim = '|', 9 | columns={{ 10 | 'distrito': 'VARCHAR', 11 | 'distrito_mitma': 'VARCHAR', 12 | 'municipio_mitma': 'VARCHAR' 13 | }} 14 | ); 15 | 16 | -- Create the od_csv_clean view with necessary joins, recoding, and aggregation 17 | CREATE VIEW od_csv_clean AS 18 | SELECT 19 | d.fecha AS date, 20 | CAST(m1.municipio_mitma AS ZONES_ENUM) AS id_origin, 21 | CAST(m2.municipio_mitma AS ZONES_ENUM) AS id_destination, 22 | CAST(CASE d.actividad_origen 23 | WHEN 'casa' THEN 'home' 24 | WHEN 'otros' THEN 'other' 25 | WHEN 'trabajo_estudio' THEN 'work_or_study' 26 | END AS ACTIV_ENUM) AS activity_origin, 27 | CAST(CASE d.actividad_destino 28 | WHEN 'casa' THEN 'home' 29 | WHEN 'otros' THEN 'other' 30 | WHEN 'trabajo_estudio' THEN 'work_or_study' 31 | END AS ACTIV_ENUM) AS activity_destination, 32 | CAST(d.residencia AS INE_PROV_CODE_ENUM) AS residence_province_ine_code, 33 | CAST(CASE d.residencia 34 | WHEN '01' THEN 'Araba/Álava' 35 | WHEN '02' THEN 'Albacete' 36 | WHEN '03' THEN 'Alicante/Alacant' 37 | WHEN '04' THEN 'Almería' 38 | WHEN '05' THEN 'Ávila' 39 | WHEN '06' THEN 'Badajoz' 40 | WHEN '07' THEN 'Balears, Illes' 41 | WHEN '08' THEN 'Barcelona' 42 | WHEN '09' THEN 'Burgos' 43 | WHEN '10' THEN 'Cáceres' 44 | WHEN '11' THEN 'Cádiz' 45 | WHEN '12' THEN 'Castellón/Castelló' 46 | WHEN '13' THEN 'Ciudad Real' 47 | WHEN '14' THEN 'Córdoba' 48 | WHEN '15' THEN 'Coruña, A' 49 | WHEN '16' THEN 'Cuenca' 50 | WHEN '17' THEN 'Girona' 51 | WHEN '18' THEN 'Granada' 52 | WHEN '19' THEN 'Guadalajara' 53 | WHEN '20' THEN 'Gipuzkoa' 54 | WHEN '21' THEN 'Huelva' 55 | WHEN '22' THEN 'Huesca' 56 | WHEN '23' THEN 'Jaén' 57 | WHEN '24' THEN 'León' 58 | WHEN '25' THEN 'Lleida' 59 | WHEN '26' THEN 'Rioja, La' 60 | WHEN '27' THEN 'Lugo' 61 | WHEN '28' THEN 'Madrid' 62 | WHEN '29' THEN 'Málaga' 63 | WHEN '30' THEN 'Murcia' 64 | WHEN '31' THEN 'Navarra' 65 | WHEN '32' THEN 'Ourense' 66 | WHEN '33' THEN 'Asturias' 67 | WHEN '34' THEN 'Palencia' 68 | WHEN '35' THEN 'Palmas, Las' 69 | WHEN '36' THEN 'Pontevedra' 70 | WHEN '37' THEN 'Salamanca' 71 | WHEN '38' THEN 'Santa Cruz de Tenerife' 72 | WHEN '39' THEN 'Cantabria' 73 | WHEN '40' THEN 'Segovia' 74 | WHEN '41' THEN 'Sevilla' 75 | WHEN '42' THEN 'Soria' 76 | WHEN '43' THEN 'Tarragona' 77 | WHEN '44' THEN 'Teruel' 78 | WHEN '45' THEN 'Toledo' 79 | WHEN '46' THEN 'Valencia/València' 80 | WHEN '47' THEN 'Valladolid' 81 | WHEN '48' THEN 'Bizkaia' 82 | WHEN '49' THEN 'Zamora' 83 | WHEN '50' THEN 'Zaragoza' 84 | WHEN '51' THEN 'Ceuta' 85 | WHEN '52' THEN 'Melilla' 86 | END AS INE_PROV_NAME_ENUM) AS residence_province_name, 87 | d.periodo AS hour, 88 | CAST(d.distancia AS DISTANCE_ENUM) AS distance, 89 | SUM(d.viajes) AS n_trips, 90 | SUM(d.viajes_km) AS trips_total_length_km, 91 | CAST(d.year AS INTEGER) AS year, 92 | CAST(d.month AS INTEGER) AS month, 93 | CAST(d.day AS INTEGER) AS day 94 | FROM 95 | od_csv_raw d 96 | LEFT JOIN 97 | relations_districts_municipalities m1 ON d.origen = m1.distrito_mitma 98 | LEFT JOIN 99 | relations_districts_municipalities m2 ON d.destino = m2.distrito_mitma 100 | GROUP BY 101 | d.fecha, 102 | m1.municipio_mitma, 103 | m2.municipio_mitma, 104 | d.actividad_origen, 105 | d.actividad_destino, 106 | d.residencia, 107 | d.periodo, 108 | d.distancia, 109 | d.year, 110 | d.month, 111 | d.day; 112 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-od-municipios-clean-csv-view-es.sql: -------------------------------------------------------------------------------- 1 | -- Create the relationships view from the relaciones_distrito_mitma.csv 2 | CREATE VIEW relations_districts_municipalities AS 3 | SELECT 4 | distrito_mitma, 5 | municipio_mitma 6 | FROM 7 | read_csv_auto('{relations_districts_municipalities}', 8 | delim = '|', 9 | columns={{ 10 | 'distrito': 'VARCHAR', 11 | 'distrito_mitma': 'VARCHAR', 12 | 'municipio_mitma': 'VARCHAR' 13 | }} 14 | ); 15 | 16 | -- Create the od_csv_clean view with necessary joins, recoding, and aggregation 17 | CREATE VIEW od_csv_clean AS 18 | SELECT 19 | d.fecha AS fecha, 20 | CAST(m1.municipio_mitma AS ZONES_ENUM) AS origen, 21 | CAST(m2.municipio_mitma AS ZONES_ENUM) AS destino, 22 | CAST(CASE d.actividad_origen 23 | WHEN 'casa' THEN 'home' 24 | WHEN 'otros' THEN 'other' 25 | WHEN 'trabajo_estudio' THEN 'work_or_study' 26 | END AS ACTIV_ENUM) AS actividad_origen, 27 | CAST(CASE d.actividad_destino 28 | WHEN 'casa' THEN 'home' 29 | WHEN 'otros' THEN 'other' 30 | WHEN 'trabajo_estudio' THEN 'work_or_study' 31 | END AS ACTIV_ENUM) AS actividad_destino, 32 | CAST(d.residencia AS INE_PROV_CODE_ENUM) AS residencia, 33 | CAST(CASE d.residencia 34 | WHEN '01' THEN 'Araba/Álava' 35 | WHEN '02' THEN 'Albacete' 36 | WHEN '03' THEN 'Alicante/Alacant' 37 | WHEN '04' THEN 'Almería' 38 | WHEN '05' THEN 'Ávila' 39 | WHEN '06' THEN 'Badajoz' 40 | WHEN '07' THEN 'Balears, Illes' 41 | WHEN '08' THEN 'Barcelona' 42 | WHEN '09' THEN 'Burgos' 43 | WHEN '10' THEN 'Cáceres' 44 | WHEN '11' THEN 'Cádiz' 45 | WHEN '12' THEN 'Castellón/Castelló' 46 | WHEN '13' THEN 'Ciudad Real' 47 | WHEN '14' THEN 'Córdoba' 48 | WHEN '15' THEN 'Coruña, A' 49 | WHEN '16' THEN 'Cuenca' 50 | WHEN '17' THEN 'Girona' 51 | WHEN '18' THEN 'Granada' 52 | WHEN '19' THEN 'Guadalajara' 53 | WHEN '20' THEN 'Gipuzkoa' 54 | WHEN '21' THEN 'Huelva' 55 | WHEN '22' THEN 'Huesca' 56 | WHEN '23' THEN 'Jaén' 57 | WHEN '24' THEN 'León' 58 | WHEN '25' THEN 'Lleida' 59 | WHEN '26' THEN 'Rioja, La' 60 | WHEN '27' THEN 'Lugo' 61 | WHEN '28' THEN 'Madrid' 62 | WHEN '29' THEN 'Málaga' 63 | WHEN '30' THEN 'Murcia' 64 | WHEN '31' THEN 'Navarra' 65 | WHEN '32' THEN 'Ourense' 66 | WHEN '33' THEN 'Asturias' 67 | WHEN '34' THEN 'Palencia' 68 | WHEN '35' THEN 'Palmas, Las' 69 | WHEN '36' THEN 'Pontevedra' 70 | WHEN '37' THEN 'Salamanca' 71 | WHEN '38' THEN 'Santa Cruz de Tenerife' 72 | WHEN '39' THEN 'Cantabria' 73 | WHEN '40' THEN 'Segovia' 74 | WHEN '41' THEN 'Sevilla' 75 | WHEN '42' THEN 'Soria' 76 | WHEN '43' THEN 'Tarragona' 77 | WHEN '44' THEN 'Teruel' 78 | WHEN '45' THEN 'Toledo' 79 | WHEN '46' THEN 'Valencia/València' 80 | WHEN '47' THEN 'Valladolid' 81 | WHEN '48' THEN 'Bizkaia' 82 | WHEN '49' THEN 'Zamora' 83 | WHEN '50' THEN 'Zaragoza' 84 | WHEN '51' THEN 'Ceuta' 85 | WHEN '52' THEN 'Melilla' 86 | END AS INE_PROV_NAME_ENUM) AS residencia_nombre, 87 | d.periodo AS periodo, 88 | CAST(d.distancia AS DISTANCE_ENUM) AS distancia, 89 | SUM(d.viajes) AS viajes, 90 | SUM(d.viajes_km) AS viajes_km, 91 | CAST(d.year AS INTEGER) AS ano, 92 | CAST(d.month AS INTEGER) AS mes, 93 | CAST(d.day AS INTEGER) AS dia 94 | FROM 95 | od_csv_raw d 96 | LEFT JOIN 97 | relations_districts_municipalities m1 ON d.origen = m1.distrito_mitma 98 | LEFT JOIN 99 | relations_districts_municipalities m2 ON d.destino = m2.distrito_mitma 100 | GROUP BY 101 | d.fecha, 102 | m1.municipio_mitma, 103 | m2.municipio_mitma, 104 | d.actividad_origen, 105 | d.actividad_destino, 106 | d.residencia, 107 | d.periodo, 108 | d.distancia, 109 | d.year, 110 | d.month, 111 | d.day; 112 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v1-od-municipios-raw-csv-view.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW od_csv_raw AS SELECT * 2 | /* csv_folder needs to be replaced with a valid path 3 | in R use glue::glue() 4 | # this file actually connects to raw data with districts, because of the bugs described in 5 | # http://www.ekotov.pro/mitma-data-issues/issues/011-v1-tpp-mismatch-zone-ids-in-table-and-spatial-data.html 6 | # http://www.ekotov.pro/mitma-data-issues/issues/012-v1-tpp-district-files-in-municipality-folders.html 7 | # the decision was to use distrcit data and aggregate it to replicate municipal data */ 8 | FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE, 9 | columns={{ 10 | 'fecha': 'DATE', 11 | 'origen': 'VARCHAR', 12 | 'destino': 'VARCHAR', 13 | 'actividad_origen': 'VARCHAR', 14 | 'actividad_destino': 'VARCHAR', 15 | 'residencia': 'VARCHAR', 16 | 'edad': 'VARCHAR', 17 | 'periodo': 'INTEGER', 18 | 'distancia': 'VARCHAR', 19 | 'viajes': 'DOUBLE', 20 | 'viajes_km': 'DOUBLE' 21 | }}, 22 | dateformat='%Y%m%d'); 23 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-nt-distritos-clean-csv-view-en.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW nt_csv_clean AS SELECT 2 | fecha AS date, 3 | CAST(zona_pernoctacion AS ZONES_ENUM) AS id, 4 | CAST(CASE edad 5 | WHEN 'NA' THEN NULL 6 | WHEN '0-25' THEN '0-25' 7 | WHEN '25-45' THEN '25-45' 8 | WHEN '45-65' THEN '45-65' 9 | WHEN '65-100' THEN '65-100' 10 | ELSE NULL 11 | END AS AGE_ENUM) 12 | AS age, 13 | CAST(CASE sexo 14 | WHEN 'NA' THEN NULL 15 | WHEN 'mujer' THEN 'female' 16 | WHEN 'hombre' THEN 'male' 17 | END AS SEX_ENUM) 18 | AS sex, 19 | CAST(numero_viajes AS N_TRIPS_ENUM) AS n_trips, 20 | personas AS n_persons, 21 | CAST(year AS INTEGER) AS year, 22 | CAST(month AS INTEGER) AS month, 23 | CAST(day AS INTEGER) AS day 24 | FROM nt_csv_raw; 25 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-nt-distritos-clean-csv-view-es.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW nt_csv_clean AS SELECT 2 | fecha, 3 | CAST(zona_pernoctacion AS ZONES_ENUM) AS zona_pernoctacion, 4 | CAST(CASE edad 5 | WHEN 'NA' THEN NULL 6 | WHEN '0-25' THEN '0-25' 7 | WHEN '25-45' THEN '25-45' 8 | WHEN '45-65' THEN '45-65' 9 | WHEN '65-100' THEN '65-100' 10 | ELSE NULL 11 | END AS AGE_ENUM) 12 | AS edad, 13 | CAST (CASE sexo 14 | WHEN 'NA' THEN NULL 15 | ELSE sexo 16 | END AS SEX_ENUM) 17 | AS sexo, 18 | CAST(numero_viajes AS N_TRIPS_ENUM) AS numero_viajes, 19 | personas, 20 | CAST(year AS INTEGER) AS ano, 21 | CAST(month AS INTEGER) AS mes, 22 | CAST(day AS INTEGER) AS dia 23 | FROM nt_csv_raw; 24 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-nt-distritos-raw-csv-view.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW nt_csv_raw AS SELECT * 2 | /* csv_folder needs to be replaced with a valid path 3 | in R use glue::glue() */ 4 | FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE, 5 | columns={{ 6 | 'fecha': 'DATE', 7 | 'zona_pernoctacion': 'VARCHAR', 8 | 'edad': 'VARCHAR', 9 | "sexo": 'VARCHAR', 10 | 'numero_viajes': 'VARCHAR', 11 | 'personas': 'DOUBLE' 12 | }}, 13 | dateformat='%Y%m%d'); 14 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-nt-enum-age.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE AGE_ENUM AS ENUM ( 2 | '0-25', 3 | '25-45', 4 | '45-65', 5 | '65-100'); 6 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-nt-enum-ntrips.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE N_TRIPS_ENUM AS ENUM ('0', '1', '2', '2+'); 2 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-nt-enum-sex-en.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE SEX_ENUM AS ENUM ( 2 | 'female', 3 | 'male'); 4 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-nt-enum-sex-es.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE SEX_ENUM AS ENUM ( 2 | 'mujer', 3 | 'hombre'); 4 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-nt-gau-clean-csv-view-en.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW nt_csv_clean AS SELECT 2 | fecha AS date, 3 | CAST(zona_pernoctacion AS ZONES_ENUM) AS id, 4 | CAST(CASE edad 5 | WHEN 'NA' THEN NULL 6 | WHEN '0-25' THEN '0-25' 7 | WHEN '25-45' THEN '25-45' 8 | WHEN '45-65' THEN '45-65' 9 | WHEN '65-100' THEN '65-100' 10 | ELSE NULL 11 | END AS AGE_ENUM) 12 | AS age, 13 | CAST(CASE sexo 14 | WHEN 'NA' THEN NULL 15 | WHEN 'mujer' THEN 'female' 16 | WHEN 'hombre' THEN 'male' 17 | END AS SEX_ENUM) 18 | AS sex, 19 | CAST(numero_viajes AS N_TRIPS_ENUM) AS n_trips, 20 | personas AS n_persons, 21 | CAST(year AS INTEGER) AS year, 22 | CAST(month AS INTEGER) AS month, 23 | CAST(day AS INTEGER) AS day 24 | FROM nt_csv_raw; 25 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-nt-gau-clean-csv-view-es.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW nt_csv_clean AS SELECT 2 | fecha, 3 | CAST(zona_pernoctacion AS ZONES_ENUM) AS zona_pernoctacion, 4 | CAST(CASE edad 5 | WHEN 'NA' THEN NULL 6 | WHEN '0-25' THEN '0-25' 7 | WHEN '25-45' THEN '25-45' 8 | WHEN '45-65' THEN '45-65' 9 | WHEN '65-100' THEN '65-100' 10 | ELSE NULL 11 | END AS AGE_ENUM) 12 | AS edad, 13 | CAST (CASE sexo 14 | WHEN 'NA' THEN NULL 15 | ELSE sexo 16 | END AS SEX_ENUM) 17 | AS sexo, 18 | CAST(numero_viajes AS N_TRIPS_ENUM) AS numero_viajes, 19 | personas, 20 | CAST(year AS INTEGER) AS ano, 21 | CAST(month AS INTEGER) AS mes, 22 | CAST(day AS INTEGER) AS dia 23 | FROM nt_csv_raw; 24 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-nt-gau-raw-csv-view.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW nt_csv_raw AS SELECT * 2 | /* csv_folder needs to be replaced with a valid path 3 | in R use glue::glue() */ 4 | FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE, 5 | columns={{ 6 | 'fecha': 'DATE', 7 | 'zona_pernoctacion': 'VARCHAR', 8 | 'edad': 'VARCHAR', 9 | "sexo": 'VARCHAR', 10 | 'numero_viajes': 'VARCHAR', 11 | 'personas': 'DOUBLE' 12 | }}, 13 | dateformat='%Y%m%d'); 14 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-nt-municipios-clean-csv-view-en.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW nt_csv_clean AS SELECT 2 | fecha AS date, 3 | CAST(zona_pernoctacion AS ZONES_ENUM) AS id, 4 | CAST(CASE edad 5 | WHEN 'NA' THEN NULL 6 | WHEN '0-25' THEN '0-25' 7 | WHEN '25-45' THEN '25-45' 8 | WHEN '45-65' THEN '45-65' 9 | WHEN '65-100' THEN '65-100' 10 | ELSE NULL 11 | END AS AGE_ENUM) 12 | AS age, 13 | CAST(CASE sexo 14 | WHEN 'NA' THEN NULL 15 | WHEN 'mujer' THEN 'female' 16 | WHEN 'hombre' THEN 'male' 17 | END AS SEX_ENUM) 18 | AS sex, 19 | CAST(numero_viajes AS N_TRIPS_ENUM) AS n_trips, 20 | personas AS n_persons, 21 | CAST(year AS INTEGER) AS year, 22 | CAST(month AS INTEGER) AS month, 23 | CAST(day AS INTEGER) AS day 24 | FROM nt_csv_raw; 25 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-nt-municipios-clean-csv-view-es.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW nt_csv_clean AS SELECT 2 | fecha, 3 | CAST(zona_pernoctacion AS ZONES_ENUM) AS zona_pernoctacion, 4 | CAST(CASE edad 5 | WHEN 'NA' THEN NULL 6 | WHEN '0-25' THEN '0-25' 7 | WHEN '25-45' THEN '25-45' 8 | WHEN '45-65' THEN '45-65' 9 | WHEN '65-100' THEN '65-100' 10 | ELSE NULL 11 | END AS AGE_ENUM) 12 | AS edad, 13 | CAST (CASE sexo 14 | WHEN 'NA' THEN NULL 15 | ELSE sexo 16 | END AS SEX_ENUM) 17 | AS sexo, 18 | CAST(numero_viajes AS N_TRIPS_ENUM) AS numero_viajes, 19 | personas, 20 | CAST(year AS INTEGER) AS ano, 21 | CAST(month AS INTEGER) AS mes, 22 | CAST(day AS INTEGER) AS dia 23 | FROM nt_csv_raw; 24 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-nt-municipios-raw-csv-view.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW nt_csv_raw AS SELECT * 2 | /* csv_folder needs to be replaced with a valid path 3 | in R use glue::glue() */ 4 | FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE, 5 | columns={{ 6 | 'fecha': 'DATE', 7 | 'zona_pernoctacion': 'VARCHAR', 8 | 'edad': 'VARCHAR', 9 | "sexo": 'VARCHAR', 10 | 'numero_viajes': 'VARCHAR', 11 | 'personas': 'DOUBLE' 12 | }}, 13 | dateformat='%Y%m%d'); 14 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-distritos-clean-csv-view-en.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW od_csv_clean AS SELECT 2 | fecha AS date, 3 | periodo AS hour, 4 | CAST (CASE origen 5 | WHEN 'externo' THEN 'external' 6 | ELSE origen 7 | END AS ZONES_ENUM) 8 | AS id_origin, 9 | CAST (CASE destino 10 | WHEN 'externo' THEN 'external' 11 | ELSE destino 12 | END AS ZONES_ENUM) 13 | AS id_destination, 14 | CAST (distancia AS DISTANCE_ENUM) AS distance, 15 | CAST (CASE actividad_origen 16 | WHEN 'casa' THEN 'home' 17 | WHEN 'frecuente' THEN 'frequent_activity' 18 | WHEN 'no_frecuente' THEN 'infrequent_activity' 19 | WHEN 'trabajo_estudio' THEN 'work_or_study' 20 | END AS ACTIV_ENUM) 21 | AS activity_origin, 22 | CAST (CASE actividad_destino 23 | WHEN 'casa' THEN 'home' 24 | WHEN 'frecuente' THEN 'frequent_activity' 25 | WHEN 'no_frecuente' THEN 'infrequent_activity' 26 | WHEN 'trabajo_estudio' THEN 'work_or_study' 27 | END AS ACTIV_ENUM) 28 | AS activity_destination, 29 | CASE estudio_origen_posible 30 | WHEN 'si' THEN TRUE 31 | WHEN 'no' THEN FALSE 32 | END AS study_possible_origin, 33 | CASE estudio_destino_posible 34 | WHEN 'si' THEN TRUE 35 | WHEN 'no' THEN FALSE 36 | END AS study_possible_destination, 37 | CAST(residencia AS INE_PROV_CODE_ENUM) AS residence_province_ine_code, 38 | CAST (CASE residencia 39 | WHEN '01' THEN 'Araba/Álava' 40 | WHEN '02' THEN 'Albacete' 41 | WHEN '03' THEN 'Alicante/Alacant' 42 | WHEN '04' THEN 'Almería' 43 | WHEN '05' THEN 'Ávila' 44 | WHEN '06' THEN 'Badajoz' 45 | WHEN '07' THEN 'Balears, Illes' 46 | WHEN '08' THEN 'Barcelona' 47 | WHEN '09' THEN 'Burgos' 48 | WHEN '10' THEN 'Cáceres' 49 | WHEN '11' THEN 'Cádiz' 50 | WHEN '12' THEN 'Castellón/Castelló' 51 | WHEN '13' THEN 'Ciudad Real' 52 | WHEN '14' THEN 'Córdoba' 53 | WHEN '15' THEN 'Coruña, A' 54 | WHEN '16' THEN 'Cuenca' 55 | WHEN '17' THEN 'Girona' 56 | WHEN '18' THEN 'Granada' 57 | WHEN '19' THEN 'Guadalajara' 58 | WHEN '20' THEN 'Gipuzkoa' 59 | WHEN '21' THEN 'Huelva' 60 | WHEN '22' THEN 'Huesca' 61 | WHEN '23' THEN 'Jaén' 62 | WHEN '24' THEN 'León' 63 | WHEN '25' THEN 'Lleida' 64 | WHEN '26' THEN 'Rioja, La' 65 | WHEN '27' THEN 'Lugo' 66 | WHEN '28' THEN 'Madrid' 67 | WHEN '29' THEN 'Málaga' 68 | WHEN '30' THEN 'Murcia' 69 | WHEN '31' THEN 'Navarra' 70 | WHEN '32' THEN 'Ourense' 71 | WHEN '33' THEN 'Asturias' 72 | WHEN '34' THEN 'Palencia' 73 | WHEN '35' THEN 'Palmas, Las' 74 | WHEN '36' THEN 'Pontevedra' 75 | WHEN '37' THEN 'Salamanca' 76 | WHEN '38' THEN 'Santa Cruz de Tenerife' 77 | WHEN '39' THEN 'Cantabria' 78 | WHEN '40' THEN 'Segovia' 79 | WHEN '41' THEN 'Sevilla' 80 | WHEN '42' THEN 'Soria' 81 | WHEN '43' THEN 'Tarragona' 82 | WHEN '44' THEN 'Teruel' 83 | WHEN '45' THEN 'Toledo' 84 | WHEN '46' THEN 'Valencia/València' 85 | WHEN '47' THEN 'Valladolid' 86 | WHEN '48' THEN 'Bizkaia' 87 | WHEN '49' THEN 'Zamora' 88 | WHEN '50' THEN 'Zaragoza' 89 | WHEN '51' THEN 'Ceuta' 90 | WHEN '52' THEN 'Melilla' 91 | END AS INE_PROV_NAME_ENUM) 92 | AS residence_province_name, 93 | CAST (renta AS INCOME_ENUM) AS income, 94 | CAST (CASE edad 95 | WHEN 'NA' THEN NULL 96 | WHEN '0-25' THEN '0-25' 97 | WHEN '25-45' THEN '25-45' 98 | WHEN '45-65' THEN '45-65' 99 | WHEN '65-100' THEN '65-100' 100 | ELSE NULL 101 | END AS AGE_ENUM) 102 | AS age, 103 | CAST (CASE sexo 104 | WHEN 'NA' THEN NULL 105 | WHEN 'mujer' THEN 'female' 106 | WHEN 'hombre' THEN 'male' 107 | END AS SEX_ENUM) 108 | AS sex, 109 | viajes AS n_trips, 110 | viajes_km AS trips_total_length_km, 111 | CAST(year AS INTEGER) AS year, 112 | CAST(month AS INTEGER) AS month, 113 | CAST(day AS INTEGER) AS day, 114 | periodo AS time_slot 115 | FROM od_csv_raw; 116 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-distritos-clean-csv-view-es.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW od_csv_clean AS SELECT 2 | fecha, 3 | periodo, 4 | CAST (origen AS ZONES_ENUM) AS origen, 5 | CAST (destino AS ZONES_ENUM) AS destino, 6 | CAST (distancia AS DISTANCE_ENUM) AS distancia, 7 | CAST (CASE actividad_origen 8 | WHEN 'casa' THEN 'home' 9 | WHEN 'frecuente' THEN 'frequent_activity' 10 | WHEN 'no_frecuente' THEN 'infrequent_activity' 11 | WHEN 'trabajo_estudio' THEN 'work_or_study' 12 | END AS ACTIV_ENUM) 13 | AS actividad_origen, 14 | CAST (CASE actividad_destino 15 | WHEN 'casa' THEN 'home' 16 | WHEN 'frecuente' THEN 'frequent_activity' 17 | WHEN 'no_frecuente' THEN 'infrequent_activity' 18 | WHEN 'trabajo_estudio' THEN 'work_or_study' 19 | END AS ACTIV_ENUM) 20 | AS actividad_destino, 21 | CASE estudio_origen_posible 22 | WHEN 'si' THEN TRUE 23 | WHEN 'no' THEN FALSE 24 | END AS estudio_origen_posible, 25 | CASE estudio_destino_posible 26 | WHEN 'si' THEN TRUE 27 | WHEN 'no' THEN FALSE 28 | END AS estudio_destino_posible, 29 | CAST(residencia AS INE_PROV_CODE_ENUM) AS residencia, 30 | CAST (CASE residencia 31 | WHEN '01' THEN 'Araba/Álava' 32 | WHEN '02' THEN 'Albacete' 33 | WHEN '03' THEN 'Alicante/Alacant' 34 | WHEN '04' THEN 'Almería' 35 | WHEN '05' THEN 'Ávila' 36 | WHEN '06' THEN 'Badajoz' 37 | WHEN '07' THEN 'Balears, Illes' 38 | WHEN '08' THEN 'Barcelona' 39 | WHEN '09' THEN 'Burgos' 40 | WHEN '10' THEN 'Cáceres' 41 | WHEN '11' THEN 'Cádiz' 42 | WHEN '12' THEN 'Castellón/Castelló' 43 | WHEN '13' THEN 'Ciudad Real' 44 | WHEN '14' THEN 'Córdoba' 45 | WHEN '15' THEN 'Coruña, A' 46 | WHEN '16' THEN 'Cuenca' 47 | WHEN '17' THEN 'Girona' 48 | WHEN '18' THEN 'Granada' 49 | WHEN '19' THEN 'Guadalajara' 50 | WHEN '20' THEN 'Gipuzkoa' 51 | WHEN '21' THEN 'Huelva' 52 | WHEN '22' THEN 'Huesca' 53 | WHEN '23' THEN 'Jaén' 54 | WHEN '24' THEN 'León' 55 | WHEN '25' THEN 'Lleida' 56 | WHEN '26' THEN 'Rioja, La' 57 | WHEN '27' THEN 'Lugo' 58 | WHEN '28' THEN 'Madrid' 59 | WHEN '29' THEN 'Málaga' 60 | WHEN '30' THEN 'Murcia' 61 | WHEN '31' THEN 'Navarra' 62 | WHEN '32' THEN 'Ourense' 63 | WHEN '33' THEN 'Asturias' 64 | WHEN '34' THEN 'Palencia' 65 | WHEN '35' THEN 'Palmas, Las' 66 | WHEN '36' THEN 'Pontevedra' 67 | WHEN '37' THEN 'Salamanca' 68 | WHEN '38' THEN 'Santa Cruz de Tenerife' 69 | WHEN '39' THEN 'Cantabria' 70 | WHEN '40' THEN 'Segovia' 71 | WHEN '41' THEN 'Sevilla' 72 | WHEN '42' THEN 'Soria' 73 | WHEN '43' THEN 'Tarragona' 74 | WHEN '44' THEN 'Teruel' 75 | WHEN '45' THEN 'Toledo' 76 | WHEN '46' THEN 'Valencia/València' 77 | WHEN '47' THEN 'Valladolid' 78 | WHEN '48' THEN 'Bizkaia' 79 | WHEN '49' THEN 'Zamora' 80 | WHEN '50' THEN 'Zaragoza' 81 | WHEN '51' THEN 'Ceuta' 82 | WHEN '52' THEN 'Melilla' 83 | END AS INE_PROV_NAME_ENUM) 84 | AS residencia_nombre, 85 | CAST (renta AS INCOME_ENUM) AS renta, 86 | CAST (CASE edad 87 | WHEN 'NA' THEN NULL 88 | WHEN '0-25' THEN '0-25' 89 | WHEN '25-45' THEN '25-45' 90 | WHEN '45-65' THEN '45-65' 91 | WHEN '65-100' THEN '65-100' 92 | ELSE NULL 93 | END AS AGE_ENUM) 94 | AS edad, 95 | CAST (CASE sexo 96 | WHEN 'NA' THEN NULL 97 | WHEN 'mujer' THEN 'female' 98 | WHEN 'hombre' THEN 'male' 99 | END AS SEX_ENUM) 100 | AS sexo, 101 | viajes, 102 | viajes_km, 103 | CAST(year AS INTEGER) AS ano, 104 | CAST(month AS INTEGER) AS mes, 105 | CAST(day AS INTEGER) AS dia 106 | FROM od_csv_raw; 107 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-distritos-raw-csv-view.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW od_csv_raw AS SELECT * 2 | /* csv_folder needs to be replaced with a valid path 3 | in R use glue::glue() */ 4 | FROM read_csv_auto('{csv_folder}**/*.csv.gz', 5 | delim='|', 6 | header=TRUE, 7 | hive_partitioning=TRUE, 8 | columns={{ 9 | 'fecha': 'DATE', 10 | 'periodo': 'INTEGER', 11 | 'origen': 'VARCHAR', 12 | 'destino': 'VARCHAR', 13 | 'distancia': 'VARCHAR', 14 | 'actividad_origen': 'VARCHAR', 15 | 'actividad_destino': 'VARCHAR', 16 | 'estudio_origen_posible': 'VARCHAR', 17 | 'estudio_destino_posible': 'VARCHAR', 18 | 'residencia': 'VARCHAR', 19 | 'renta': 'VARCHAR', 20 | 'edad': 'VARCHAR', 21 | 'sexo': 'VARCHAR', 22 | 'viajes': 'DOUBLE', 23 | 'viajes_km': 'DOUBLE' 24 | }}, 25 | dateformat='%Y%m%d'); 26 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-enum-activity-en.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE ACTIV_ENUM AS ENUM ( 2 | 'home', 3 | 'frequent_activity', 4 | 'infrequent_activity', 5 | 'work_or_study'); 6 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-enum-activity-es.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE ACTIV_ENUM AS ENUM ( 2 | 'casa', 3 | 'frecuente' 4 | 'no_frecuente', 5 | 'trabajo_estudio'); 6 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-enum-age.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE AGE_ENUM AS ENUM ( 2 | '0-25', 3 | '25-45', 4 | '45-65', 5 | '65-100'); 6 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-enum-distance.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE DISTANCE_ENUM AS ENUM ( 2 | '0.5-2', 3 | '2-10', 4 | '10-50', 5 | '>50'); 6 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-enum-income.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE INCOME_ENUM AS ENUM ( 2 | '<10', 3 | '10-15', 4 | '>15'); 5 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-enum-sex-en.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE SEX_ENUM AS ENUM ( 2 | 'female', 3 | 'male'); 4 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-enum-sex-es.sql: -------------------------------------------------------------------------------- 1 | CREATE TYPE SEX_ENUM AS ENUM ( 2 | 'mujer', 3 | 'hombre'); 4 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-gau-clean-csv-view-en.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW od_csv_clean AS SELECT 2 | fecha AS date, 3 | periodo AS hour, 4 | CAST (CASE origen 5 | WHEN 'externo' THEN 'external' 6 | ELSE origen 7 | END AS ZONES_ENUM) 8 | AS id_origin, 9 | CAST (CASE destino 10 | WHEN 'externo' THEN 'external' 11 | ELSE destino 12 | END AS ZONES_ENUM) 13 | AS id_destination, 14 | CAST (distancia AS DISTANCE_ENUM) AS distance, 15 | CAST (CASE actividad_origen 16 | WHEN 'casa' THEN 'home' 17 | WHEN 'frecuente' THEN 'frequent_activity' 18 | WHEN 'no_frecuente' THEN 'infrequent_activity' 19 | WHEN 'trabajo_estudio' THEN 'work_or_study' 20 | END AS ACTIV_ENUM) 21 | AS activity_origin, 22 | CAST (CASE actividad_destino 23 | WHEN 'casa' THEN 'home' 24 | WHEN 'frecuente' THEN 'frequent_activity' 25 | WHEN 'no_frecuente' THEN 'infrequent_activity' 26 | WHEN 'trabajo_estudio' THEN 'work_or_study' 27 | END AS ACTIV_ENUM) 28 | AS activity_destination, 29 | CASE estudio_origen_posible 30 | WHEN 'si' THEN TRUE 31 | WHEN 'no' THEN FALSE 32 | END AS study_possible_origin, 33 | CASE estudio_destino_posible 34 | WHEN 'si' THEN TRUE 35 | WHEN 'no' THEN FALSE 36 | END AS study_possible_destination, 37 | CAST(residencia AS INE_PROV_CODE_ENUM) AS residence_province_ine_code, 38 | CAST (CASE residencia 39 | WHEN '01' THEN 'Araba/Álava' 40 | WHEN '02' THEN 'Albacete' 41 | WHEN '03' THEN 'Alicante/Alacant' 42 | WHEN '04' THEN 'Almería' 43 | WHEN '05' THEN 'Ávila' 44 | WHEN '06' THEN 'Badajoz' 45 | WHEN '07' THEN 'Balears, Illes' 46 | WHEN '08' THEN 'Barcelona' 47 | WHEN '09' THEN 'Burgos' 48 | WHEN '10' THEN 'Cáceres' 49 | WHEN '11' THEN 'Cádiz' 50 | WHEN '12' THEN 'Castellón/Castelló' 51 | WHEN '13' THEN 'Ciudad Real' 52 | WHEN '14' THEN 'Córdoba' 53 | WHEN '15' THEN 'Coruña, A' 54 | WHEN '16' THEN 'Cuenca' 55 | WHEN '17' THEN 'Girona' 56 | WHEN '18' THEN 'Granada' 57 | WHEN '19' THEN 'Guadalajara' 58 | WHEN '20' THEN 'Gipuzkoa' 59 | WHEN '21' THEN 'Huelva' 60 | WHEN '22' THEN 'Huesca' 61 | WHEN '23' THEN 'Jaén' 62 | WHEN '24' THEN 'León' 63 | WHEN '25' THEN 'Lleida' 64 | WHEN '26' THEN 'Rioja, La' 65 | WHEN '27' THEN 'Lugo' 66 | WHEN '28' THEN 'Madrid' 67 | WHEN '29' THEN 'Málaga' 68 | WHEN '30' THEN 'Murcia' 69 | WHEN '31' THEN 'Navarra' 70 | WHEN '32' THEN 'Ourense' 71 | WHEN '33' THEN 'Asturias' 72 | WHEN '34' THEN 'Palencia' 73 | WHEN '35' THEN 'Palmas, Las' 74 | WHEN '36' THEN 'Pontevedra' 75 | WHEN '37' THEN 'Salamanca' 76 | WHEN '38' THEN 'Santa Cruz de Tenerife' 77 | WHEN '39' THEN 'Cantabria' 78 | WHEN '40' THEN 'Segovia' 79 | WHEN '41' THEN 'Sevilla' 80 | WHEN '42' THEN 'Soria' 81 | WHEN '43' THEN 'Tarragona' 82 | WHEN '44' THEN 'Teruel' 83 | WHEN '45' THEN 'Toledo' 84 | WHEN '46' THEN 'Valencia/València' 85 | WHEN '47' THEN 'Valladolid' 86 | WHEN '48' THEN 'Bizkaia' 87 | WHEN '49' THEN 'Zamora' 88 | WHEN '50' THEN 'Zaragoza' 89 | WHEN '51' THEN 'Ceuta' 90 | WHEN '52' THEN 'Melilla' 91 | END AS INE_PROV_NAME_ENUM) 92 | AS residence_province_name, 93 | CAST (renta AS INCOME_ENUM) AS income, 94 | CAST (CASE edad 95 | WHEN 'NA' THEN NULL 96 | WHEN '0-25' THEN '0-25' 97 | WHEN '25-45' THEN '25-45' 98 | WHEN '45-65' THEN '45-65' 99 | WHEN '65-100' THEN '65-100' 100 | ELSE NULL 101 | END AS AGE_ENUM) 102 | AS age, 103 | CAST (CASE sexo 104 | WHEN 'NA' THEN NULL 105 | WHEN 'mujer' THEN 'female' 106 | WHEN 'hombre' THEN 'male' 107 | END AS SEX_ENUM) 108 | AS sex, 109 | viajes AS n_trips, 110 | viajes_km AS trips_total_length_km, 111 | CAST(year AS INTEGER) AS year, 112 | CAST(month AS INTEGER) AS month, 113 | CAST(day AS INTEGER) AS day 114 | FROM od_csv_raw; 115 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-gau-clean-csv-view-es.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW od_csv_clean AS SELECT 2 | fecha, 3 | periodo, 4 | CAST (origen AS ZONES_ENUM) AS origen, 5 | CAST (destino AS ZONES_ENUM) AS destino, 6 | CAST (distancia AS DISTANCE_ENUM) AS distancia, 7 | CAST (actividad_origen AS ACTIV_ENUM) AS actividad_origen, 8 | CAST (actividad_destino AS ACTIV_ENUM) AS actividad_destino, 9 | CASE estudio_origen_posible 10 | WHEN 'si' THEN TRUE 11 | WHEN 'no' THEN FALSE 12 | END AS estudio_origen_posible, 13 | CASE estudio_destino_posible 14 | WHEN 'si' THEN TRUE 15 | WHEN 'no' THEN FALSE 16 | END AS estudio_destino_posible, 17 | CAST(residencia AS INE_PROV_CODE_ENUM) AS residencia, 18 | CAST (CASE residencia 19 | WHEN '01' THEN 'Araba/Álava' 20 | WHEN '02' THEN 'Albacete' 21 | WHEN '03' THEN 'Alicante/Alacant' 22 | WHEN '04' THEN 'Almería' 23 | WHEN '05' THEN 'Ávila' 24 | WHEN '06' THEN 'Badajoz' 25 | WHEN '07' THEN 'Balears, Illes' 26 | WHEN '08' THEN 'Barcelona' 27 | WHEN '09' THEN 'Burgos' 28 | WHEN '10' THEN 'Cáceres' 29 | WHEN '11' THEN 'Cádiz' 30 | WHEN '12' THEN 'Castellón/Castelló' 31 | WHEN '13' THEN 'Ciudad Real' 32 | WHEN '14' THEN 'Córdoba' 33 | WHEN '15' THEN 'Coruña, A' 34 | WHEN '16' THEN 'Cuenca' 35 | WHEN '17' THEN 'Girona' 36 | WHEN '18' THEN 'Granada' 37 | WHEN '19' THEN 'Guadalajara' 38 | WHEN '20' THEN 'Gipuzkoa' 39 | WHEN '21' THEN 'Huelva' 40 | WHEN '22' THEN 'Huesca' 41 | WHEN '23' THEN 'Jaén' 42 | WHEN '24' THEN 'León' 43 | WHEN '25' THEN 'Lleida' 44 | WHEN '26' THEN 'Rioja, La' 45 | WHEN '27' THEN 'Lugo' 46 | WHEN '28' THEN 'Madrid' 47 | WHEN '29' THEN 'Málaga' 48 | WHEN '30' THEN 'Murcia' 49 | WHEN '31' THEN 'Navarra' 50 | WHEN '32' THEN 'Ourense' 51 | WHEN '33' THEN 'Asturias' 52 | WHEN '34' THEN 'Palencia' 53 | WHEN '35' THEN 'Palmas, Las' 54 | WHEN '36' THEN 'Pontevedra' 55 | WHEN '37' THEN 'Salamanca' 56 | WHEN '38' THEN 'Santa Cruz de Tenerife' 57 | WHEN '39' THEN 'Cantabria' 58 | WHEN '40' THEN 'Segovia' 59 | WHEN '41' THEN 'Sevilla' 60 | WHEN '42' THEN 'Soria' 61 | WHEN '43' THEN 'Tarragona' 62 | WHEN '44' THEN 'Teruel' 63 | WHEN '45' THEN 'Toledo' 64 | WHEN '46' THEN 'Valencia/València' 65 | WHEN '47' THEN 'Valladolid' 66 | WHEN '48' THEN 'Bizkaia' 67 | WHEN '49' THEN 'Zamora' 68 | WHEN '50' THEN 'Zaragoza' 69 | WHEN '51' THEN 'Ceuta' 70 | WHEN '52' THEN 'Melilla' 71 | END AS INE_PROV_NAME_ENUM) 72 | AS residencia_nombre, 73 | CAST (renta AS INCOME_ENUM) AS renta, 74 | CAST (CASE edad 75 | WHEN 'NA' THEN NULL 76 | WHEN '0-25' THEN '0-25' 77 | WHEN '25-45' THEN '25-45' 78 | WHEN '45-65' THEN '45-65' 79 | WHEN '65-100' THEN '65-100' 80 | ELSE NULL 81 | END AS AGE_ENUM) 82 | AS edad, 83 | CAST (CASE sexo 84 | WHEN 'NA' THEN NULL 85 | ELSE sexo 86 | END AS SEX_ENUM) 87 | AS sexo, 88 | viajes, 89 | viajes_km, 90 | CAST(year AS INTEGER) AS ano, 91 | CAST(month AS INTEGER) AS mes, 92 | CAST(day AS INTEGER) AS dia 93 | FROM od_csv_raw; 94 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-gau-raw-csv-view.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW od_csv_raw AS SELECT * 2 | /* csv_folder needs to be replaced with a valid path 3 | in R use glue::glue() */ 4 | FROM read_csv_auto('{csv_folder}**/*.csv.gz', 5 | delim='|', 6 | header=TRUE, 7 | hive_partitioning=TRUE, 8 | columns={{ 9 | 'fecha': 'DATE', 10 | 'periodo': 'INTEGER', 11 | 'origen': 'VARCHAR', 12 | 'destino': 'VARCHAR', 13 | 'distancia': 'VARCHAR', 14 | 'actividad_origen': 'VARCHAR', 15 | 'actividad_destino': 'VARCHAR', 16 | 'estudio_origen_posible': 'VARCHAR', 17 | 'estudio_destino_posible': 'VARCHAR', 18 | 'residencia': 'VARCHAR', 19 | 'renta': 'VARCHAR', 20 | 'edad': 'VARCHAR', 21 | 'sexo': 'VARCHAR', 22 | 'viajes': 'DOUBLE', 23 | 'viajes_km': 'DOUBLE' 24 | }}, 25 | dateformat='%Y%m%d'); 26 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-municipios-clean-csv-view-en.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW od_csv_clean AS SELECT 2 | fecha AS date, 3 | periodo AS hour, 4 | CAST (CASE origen 5 | WHEN 'externo' THEN 'external' 6 | ELSE origen 7 | END AS ZONES_ENUM) 8 | AS id_origin, 9 | CAST (CASE destino 10 | WHEN 'externo' THEN 'external' 11 | ELSE destino 12 | END AS ZONES_ENUM) 13 | AS id_destination, 14 | CAST (distancia AS DISTANCE_ENUM) AS distance, 15 | CAST (CASE actividad_origen 16 | WHEN 'casa' THEN 'home' 17 | WHEN 'frecuente' THEN 'frequent_activity' 18 | WHEN 'no_frecuente' THEN 'infrequent_activity' 19 | WHEN 'trabajo_estudio' THEN 'work_or_study' 20 | END AS ACTIV_ENUM) 21 | AS activity_origin, 22 | CAST (CASE actividad_destino 23 | WHEN 'casa' THEN 'home' 24 | WHEN 'frecuente' THEN 'frequent_activity' 25 | WHEN 'no_frecuente' THEN 'infrequent_activity' 26 | WHEN 'trabajo_estudio' THEN 'work_or_study' 27 | END AS ACTIV_ENUM) 28 | AS activity_destination, 29 | CASE estudio_origen_posible 30 | WHEN 'si' THEN TRUE 31 | WHEN 'no' THEN FALSE 32 | END AS study_possible_origin, 33 | CASE estudio_destino_posible 34 | WHEN 'si' THEN TRUE 35 | WHEN 'no' THEN FALSE 36 | END AS study_possible_destination, 37 | CAST(residencia AS INE_PROV_CODE_ENUM) AS residence_province_ine_code, 38 | CAST (CASE residencia 39 | WHEN '01' THEN 'Araba/Álava' 40 | WHEN '02' THEN 'Albacete' 41 | WHEN '03' THEN 'Alicante/Alacant' 42 | WHEN '04' THEN 'Almería' 43 | WHEN '05' THEN 'Ávila' 44 | WHEN '06' THEN 'Badajoz' 45 | WHEN '07' THEN 'Balears, Illes' 46 | WHEN '08' THEN 'Barcelona' 47 | WHEN '09' THEN 'Burgos' 48 | WHEN '10' THEN 'Cáceres' 49 | WHEN '11' THEN 'Cádiz' 50 | WHEN '12' THEN 'Castellón/Castelló' 51 | WHEN '13' THEN 'Ciudad Real' 52 | WHEN '14' THEN 'Córdoba' 53 | WHEN '15' THEN 'Coruña, A' 54 | WHEN '16' THEN 'Cuenca' 55 | WHEN '17' THEN 'Girona' 56 | WHEN '18' THEN 'Granada' 57 | WHEN '19' THEN 'Guadalajara' 58 | WHEN '20' THEN 'Gipuzkoa' 59 | WHEN '21' THEN 'Huelva' 60 | WHEN '22' THEN 'Huesca' 61 | WHEN '23' THEN 'Jaén' 62 | WHEN '24' THEN 'León' 63 | WHEN '25' THEN 'Lleida' 64 | WHEN '26' THEN 'Rioja, La' 65 | WHEN '27' THEN 'Lugo' 66 | WHEN '28' THEN 'Madrid' 67 | WHEN '29' THEN 'Málaga' 68 | WHEN '30' THEN 'Murcia' 69 | WHEN '31' THEN 'Navarra' 70 | WHEN '32' THEN 'Ourense' 71 | WHEN '33' THEN 'Asturias' 72 | WHEN '34' THEN 'Palencia' 73 | WHEN '35' THEN 'Palmas, Las' 74 | WHEN '36' THEN 'Pontevedra' 75 | WHEN '37' THEN 'Salamanca' 76 | WHEN '38' THEN 'Santa Cruz de Tenerife' 77 | WHEN '39' THEN 'Cantabria' 78 | WHEN '40' THEN 'Segovia' 79 | WHEN '41' THEN 'Sevilla' 80 | WHEN '42' THEN 'Soria' 81 | WHEN '43' THEN 'Tarragona' 82 | WHEN '44' THEN 'Teruel' 83 | WHEN '45' THEN 'Toledo' 84 | WHEN '46' THEN 'Valencia/València' 85 | WHEN '47' THEN 'Valladolid' 86 | WHEN '48' THEN 'Bizkaia' 87 | WHEN '49' THEN 'Zamora' 88 | WHEN '50' THEN 'Zaragoza' 89 | WHEN '51' THEN 'Ceuta' 90 | WHEN '52' THEN 'Melilla' 91 | END AS INE_PROV_NAME_ENUM) 92 | AS residence_province_name, 93 | CAST (renta AS INCOME_ENUM) AS income, 94 | CAST (CASE edad 95 | WHEN 'NA' THEN NULL 96 | WHEN '0-25' THEN '0-25' 97 | WHEN '25-45' THEN '25-45' 98 | WHEN '45-65' THEN '45-65' 99 | WHEN '65-100' THEN '65-100' 100 | ELSE NULL 101 | END AS AGE_ENUM) 102 | AS age, 103 | CAST (CASE sexo 104 | WHEN 'NA' THEN NULL 105 | WHEN 'mujer' THEN 'female' 106 | WHEN 'hombre' THEN 'male' 107 | END AS SEX_ENUM) 108 | AS sex, 109 | viajes AS n_trips, 110 | viajes_km AS trips_total_length_km, 111 | CAST(year AS INTEGER) AS year, 112 | CAST(month AS INTEGER) AS month, 113 | CAST(day AS INTEGER) AS day 114 | FROM od_csv_raw; 115 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-municipios-clean-csv-view-es.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW od_csv_clean AS SELECT 2 | fecha, 3 | periodo, 4 | CAST (origen AS ZONES_ENUM) AS origen, 5 | CAST (destino AS ZONES_ENUM) AS destino, 6 | CAST (distancia AS DISTANCE_ENUM) AS distancia, 7 | CAST (actividad_origen AS ACTIV_ENUM) AS actividad_origen, 8 | CAST (actividad_destino AS ACTIV_ENUM) AS actividad_destino, 9 | CASE estudio_origen_posible 10 | WHEN 'si' THEN TRUE 11 | WHEN 'no' THEN FALSE 12 | END AS estudio_origen_posible, 13 | CASE estudio_destino_posible 14 | WHEN 'si' THEN TRUE 15 | WHEN 'no' THEN FALSE 16 | END AS estudio_destino_posible, 17 | CAST(residencia AS INE_PROV_CODE_ENUM) AS residencia, 18 | CAST (CASE residencia 19 | WHEN '01' THEN 'Araba/Álava' 20 | WHEN '02' THEN 'Albacete' 21 | WHEN '03' THEN 'Alicante/Alacant' 22 | WHEN '04' THEN 'Almería' 23 | WHEN '05' THEN 'Ávila' 24 | WHEN '06' THEN 'Badajoz' 25 | WHEN '07' THEN 'Balears, Illes' 26 | WHEN '08' THEN 'Barcelona' 27 | WHEN '09' THEN 'Burgos' 28 | WHEN '10' THEN 'Cáceres' 29 | WHEN '11' THEN 'Cádiz' 30 | WHEN '12' THEN 'Castellón/Castelló' 31 | WHEN '13' THEN 'Ciudad Real' 32 | WHEN '14' THEN 'Córdoba' 33 | WHEN '15' THEN 'Coruña, A' 34 | WHEN '16' THEN 'Cuenca' 35 | WHEN '17' THEN 'Girona' 36 | WHEN '18' THEN 'Granada' 37 | WHEN '19' THEN 'Guadalajara' 38 | WHEN '20' THEN 'Gipuzkoa' 39 | WHEN '21' THEN 'Huelva' 40 | WHEN '22' THEN 'Huesca' 41 | WHEN '23' THEN 'Jaén' 42 | WHEN '24' THEN 'León' 43 | WHEN '25' THEN 'Lleida' 44 | WHEN '26' THEN 'Rioja, La' 45 | WHEN '27' THEN 'Lugo' 46 | WHEN '28' THEN 'Madrid' 47 | WHEN '29' THEN 'Málaga' 48 | WHEN '30' THEN 'Murcia' 49 | WHEN '31' THEN 'Navarra' 50 | WHEN '32' THEN 'Ourense' 51 | WHEN '33' THEN 'Asturias' 52 | WHEN '34' THEN 'Palencia' 53 | WHEN '35' THEN 'Palmas, Las' 54 | WHEN '36' THEN 'Pontevedra' 55 | WHEN '37' THEN 'Salamanca' 56 | WHEN '38' THEN 'Santa Cruz de Tenerife' 57 | WHEN '39' THEN 'Cantabria' 58 | WHEN '40' THEN 'Segovia' 59 | WHEN '41' THEN 'Sevilla' 60 | WHEN '42' THEN 'Soria' 61 | WHEN '43' THEN 'Tarragona' 62 | WHEN '44' THEN 'Teruel' 63 | WHEN '45' THEN 'Toledo' 64 | WHEN '46' THEN 'Valencia/València' 65 | WHEN '47' THEN 'Valladolid' 66 | WHEN '48' THEN 'Bizkaia' 67 | WHEN '49' THEN 'Zamora' 68 | WHEN '50' THEN 'Zaragoza' 69 | WHEN '51' THEN 'Ceuta' 70 | WHEN '52' THEN 'Melilla' 71 | END AS INE_PROV_NAME_ENUM) 72 | AS residencia_nombre, 73 | CAST (renta AS INCOME_ENUM) AS renta, 74 | CAST (CASE edad 75 | WHEN 'NA' THEN NULL 76 | WHEN '0-25' THEN '0-25' 77 | WHEN '25-45' THEN '25-45' 78 | WHEN '45-65' THEN '45-65' 79 | WHEN '65-100' THEN '65-100' 80 | ELSE NULL 81 | END AS AGE_ENUM) 82 | AS edad, 83 | CAST (CASE sexo 84 | WHEN 'NA' THEN NULL 85 | ELSE sexo 86 | END AS SEX_ENUM) 87 | AS sexo, 88 | viajes, 89 | viajes_km, 90 | CAST(year AS INTEGER) AS ano, 91 | CAST(month AS INTEGER) AS mes, 92 | CAST(day AS INTEGER) AS dia 93 | FROM od_csv_raw; 94 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-od-municipios-raw-csv-view.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW od_csv_raw AS SELECT * 2 | /* csv_folder needs to be replaced with a valid path 3 | in R use glue::glue() */ 4 | FROM read_csv_auto('{csv_folder}**/*.csv.gz', 5 | delim='|', 6 | header=TRUE, 7 | hive_partitioning=TRUE, 8 | columns={{ 9 | 'fecha': 'DATE', 10 | 'periodo': 'INTEGER', 11 | 'origen': 'VARCHAR', 12 | 'destino': 'VARCHAR', 13 | 'distancia': 'VARCHAR', 14 | 'actividad_origen': 'VARCHAR', 15 | 'actividad_destino': 'VARCHAR', 16 | 'estudio_origen_posible': 'VARCHAR', 17 | 'estudio_destino_posible': 'VARCHAR', 18 | 'residencia': 'VARCHAR', 19 | 'renta': 'VARCHAR', 20 | 'edad': 'VARCHAR', 21 | 'sexo': 'VARCHAR', 22 | 'viajes': 'DOUBLE', 23 | 'viajes_km': 'DOUBLE' 24 | }}, 25 | dateformat='%Y%m%d'); 26 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-os-distritos-clean-csv-view-en.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW os_csv_clean AS SELECT 2 | fecha AS date, 3 | CAST(zona_residencia AS RESID_ZONES_ENUM) AS id_residence, 4 | CAST(zona_pernoctacion AS OVERNIGHT_ZONES_ENUM) AS id_overnight_stay, 5 | personas AS n_persons, 6 | CAST(year AS INTEGER) AS year, 7 | CAST(month AS INTEGER) AS month, 8 | CAST(day AS INTEGER) AS day 9 | FROM os_csv_raw; 10 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-os-distritos-clean-csv-view-es.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW os_csv_clean AS SELECT 2 | fecha, 3 | CAST(zona_residencia AS RESID_ZONES_ENUM) AS zona_residencia, 4 | CAST(zona_pernoctacion AS OVERNIGHT_ZONES_ENUM) AS zona_pernoctacion, 5 | personas, 6 | CAST(year AS INTEGER) AS ano, 7 | CAST(month AS INTEGER) AS mes, 8 | CAST(day AS INTEGER) AS dia 9 | FROM os_csv_raw; 10 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-os-distritos-raw-csv-view.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW os_csv_raw AS SELECT * 2 | /* csv_folder needs to be replaced with a valid path 3 | in R use glue::glue() */ 4 | FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE, 5 | columns={{ 6 | 'fecha': 'DATE', 7 | 'zona_residencia': 'VARCHAR', 8 | 'zona_pernoctacion': 'VARCHAR', 9 | 'personas': 'DOUBLE' 10 | }}, 11 | dateformat='%Y%m%d'); 12 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-os-gau-clean-csv-view-en.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW os_csv_clean AS SELECT 2 | fecha AS date, 3 | CAST(zona_residencia AS RESID_ZONES_ENUM) AS id_residence, 4 | CAST(zona_pernoctacion AS OVERNIGHT_ZONES_ENUM) AS id_overnight_stay, 5 | personas AS n_persons, 6 | CAST(year AS INTEGER) AS year, 7 | CAST(month AS INTEGER) AS month, 8 | CAST(day AS INTEGER) AS day 9 | FROM os_csv_raw; 10 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-os-gau-clean-csv-view-es.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW os_csv_clean AS SELECT 2 | fecha, 3 | CAST(zona_residencia AS RESID_ZONES_ENUM) AS zona_residencia, 4 | CAST(zona_pernoctacion AS OVERNIGHT_ZONES_ENUM) AS zona_pernoctacion, 5 | personas, 6 | CAST(year AS INTEGER) AS ano, 7 | CAST(month AS INTEGER) AS mes, 8 | CAST(day AS INTEGER) AS dia 9 | FROM os_csv_raw; 10 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-os-gau-raw-csv-view.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW os_csv_raw AS SELECT * 2 | /* csv_folder needs to be replaced with a valid path 3 | in R use glue::glue() */ 4 | FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE, 5 | columns={{ 6 | 'fecha': 'DATE', 7 | 'zona_residencia': 'VARCHAR', 8 | 'zona_pernoctacion': 'VARCHAR', 9 | 'personas': 'DOUBLE' 10 | }}, 11 | dateformat='%Y%m%d'); 12 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-os-municipios-clean-csv-view-en.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW os_csv_clean AS SELECT 2 | fecha AS date, 3 | CAST(zona_residencia AS RESID_ZONES_ENUM) AS id_residence, 4 | CAST(zona_pernoctacion AS OVERNIGHT_ZONES_ENUM) AS id_overnight_stay, 5 | personas AS n_persons, 6 | CAST(year AS INTEGER) AS year, 7 | CAST(month AS INTEGER) AS month, 8 | CAST(day AS INTEGER) AS day 9 | FROM os_csv_raw; 10 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-os-municipios-clean-csv-view-es.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW os_csv_clean AS SELECT 2 | fecha, 3 | CAST(zona_residencia AS RESID_ZONES_ENUM) AS zona_residencia, 4 | CAST(zona_pernoctacion AS OVERNIGHT_ZONES_ENUM) AS zona_pernoctacion, 5 | personas, 6 | CAST(year AS INTEGER) AS ano, 7 | CAST(month AS INTEGER) AS mes, 8 | CAST(day AS INTEGER) AS dia 9 | FROM os_csv_raw; 10 | -------------------------------------------------------------------------------- /inst/extdata/sql-queries/v2-os-municipios-raw-csv-view.sql: -------------------------------------------------------------------------------- 1 | CREATE VIEW os_csv_raw AS SELECT * 2 | /* csv_folder needs to be replaced with a valid path 3 | in R use glue::glue() */ 4 | FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE, 5 | columns={{ 6 | 'fecha': 'DATE', 7 | 'zona_residencia': 'VARCHAR', 8 | 'zona_pernoctacion': 'VARCHAR', 9 | 'personas': 'DOUBLE' 10 | }}, 11 | dateformat='%Y%m%d'); 12 | -------------------------------------------------------------------------------- /inst/extdata/url_file_sizes_v1.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/inst/extdata/url_file_sizes_v1.txt.gz -------------------------------------------------------------------------------- /inst/extdata/url_file_sizes_v2.txt.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/inst/extdata/url_file_sizes_v2.txt.gz -------------------------------------------------------------------------------- /inst/schemaorg.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": "https://schema.org", 3 | "@graph": [ 4 | { 5 | "type": "SoftwareSourceCode", 6 | "author": [ 7 | { 8 | "id": "https://orcid.org/0000-0001-6690-5345", 9 | "type": "Person", 10 | "email": "kotov.egor@gmail.com", 11 | "familyName": "Kotov", 12 | "givenName": "Egor" 13 | }, 14 | { 15 | "id": "https://orcid.org/0000-0001-5679-6536", 16 | "type": "Person", 17 | "email": "rob00x@gmail.com", 18 | "familyName": "Lovelace", 19 | "givenName": "Robin" 20 | } 21 | ], 22 | "codeRepository": "https://github.com/rOpenSpain/spanishoddata", 23 | "contributor": { 24 | "id": "https://orcid.org/0000-0001-5199-4103", 25 | "type": "Person", 26 | "familyName": "Vidal-Tortosa", 27 | "givenName": "Eugeni" 28 | }, 29 | "description": "Gain seamless access to origin-destination (OD) data from the Spanish Ministry of Transport, hosted at . This package simplifies the management of these large datasets by providing tools to download zone boundaries, handle associated origin-destination data, and process it efficiently with the 'duckdb' database interface. Local caching minimizes repeated downloads, streamlining workflows for researchers and analysts. Extensive documentation is available at , offering guides on creating static and dynamic mobility flow visualizations and transforming large datasets into analysis-ready formats.", 30 | "license": "https://spdx.org/licenses/MIT", 31 | "name": "spanishoddata: Get Spanish Origin-Destination Data", 32 | "programmingLanguage": { 33 | "type": "ComputerLanguage", 34 | "name": "R", 35 | "url": "https://r-project.org" 36 | }, 37 | "provider": { 38 | "id": "https://cran.r-project.org", 39 | "type": "Organization", 40 | "name": "Comprehensive R Archive Network (CRAN)", 41 | "url": "https://cran.r-project.org" 42 | }, 43 | "runtimePlatform": "R version 4.4.3 (2025-02-28)", 44 | "version": "0.1.1" 45 | }, 46 | { 47 | "type": "SoftwareSourceCode", 48 | "author": { 49 | "type": "Organization", 50 | "name": "Ministerio de Transportes y Movilidad Sostenible (MITMS)" 51 | }, 52 | "name": "Estudio de movilidad de viajeros de ámbito nacional aplicando la tecnología Big Data. Informe metodológico (Study of National Traveler mobility Using Big Data Technology. Methodological Report)" 53 | }, 54 | { 55 | "type": "SoftwareSourceCode", 56 | "author": { 57 | "type": "Organization", 58 | "name": "Ministerio de Transportes, Movilidad y Agenda Urbana (MITMA)" 59 | }, 60 | "name": "Análisis de la movilidad en España con tecnología Big Data durante el estado de alarma para la gestión de la crisis del COVID-19 (Analysis of mobility in Spain with Big Data technology during the state of alarm for COVID-19 crisis management)" 61 | }, 62 | { 63 | "id": "https://doi.org/10.32614/CRAN.package.spanishoddata", 64 | "type": "SoftwareSourceCode", 65 | "author": [ 66 | { 67 | "id": "https://orcid.org/0000-0001-6690-5345", 68 | "type": "Person", 69 | "email": "kotov.egor@gmail.com", 70 | "familyName": "Kotov", 71 | "givenName": "Egor" 72 | }, 73 | { 74 | "id": "https://orcid.org/0000-0001-5679-6536", 75 | "type": "Person", 76 | "email": "rob00x@gmail.com", 77 | "familyName": "Lovelace", 78 | "givenName": "Robin" 79 | } 80 | ], 81 | "name": "spanishoddata" 82 | } 83 | ] 84 | } 85 | -------------------------------------------------------------------------------- /inst/vignette-include/csv-date-filter-note.qmd: -------------------------------------------------------------------------------- 1 | 2 | ::: callout-note 3 | 4 | As long as you use a table connection object created with `spod_get()` function, it is much quicker to filter the dates by the `year`, `month` and `day` variables, rather than by the `date` variable. This is because the data for each day is in a separate CSV file located in folders that look like `year=2020/month=2/day=14`. So when filtering by the `date` field, R will have to scan all CSV files comparing the specified date with what is stored inside each CSV file. However, if you query by `year`, `month` and `day` variables, R only needs to check these against the path to each CSV file, which is much quicker. This caveat is only relevant as long as you use `spod_get()` . If you convert (see the [relevant vignette](convert.qmd)) the downloaded data to a format that it optimized for quick analysis, you can use whichever field you want, it should not affect the performance. 5 | 6 | ::: 7 | -------------------------------------------------------------------------------- /inst/vignette-include/install-package.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | execute: 3 | eval: false 4 | --- 5 | 6 | 7 | ## Install the package {#install-package} 8 | 9 | Install from CRAN: 10 | 11 | ```{r} 12 | install.packages("spanishoddata") 13 | ``` 14 | 15 | 16 | 17 |
Alternative installation and developemnt 18 | 19 | You can also install the latest development version of the package from rOpenSpain R universe: 20 | 21 | ```{r} 22 | install.packages("spanishoddata", 23 | repos = c("https://ropenspain.r-universe.dev", 24 | "https://cloud.r-project.org")) 25 | ``` 26 | 27 | 28 | Alternative way to install the development version from GitHub: 29 | 30 | ```{r} 31 | if (!require("remotes")) install.packages("remotes") 32 | 33 | remotes::install_github("rOpenSpain/spanishoddata", 34 | force = TRUE, dependencies = TRUE) 35 | ``` 36 | 37 | **For Developers** 38 | 39 | To load the package locally, clone it and navigate to the root of the package in the terminal, e.g. with the following: 40 | 41 | ```bash 42 | gh repo clone rOpenSpain/spanishoddata 43 | code spanishoddata 44 | # with rstudio: 45 | rstudio spanishoddata/spanishoddata.Rproj 46 | ``` 47 | 48 | Then run the following command from the R console: 49 | 50 | ```{r} 51 | #| eval: false 52 | devtools::load_all() 53 | ``` 54 | 55 |
56 | -------------------------------------------------------------------------------- /inst/vignette-include/missing-dates-outages.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | execute: 3 | eval: false 4 | --- 5 | 6 | 7 | ::: callout-warning 8 | 9 | 10 | Due to mobile network outages, the data on certain dates is missing. Kindly keep this in mind when calculating mean monthly or weekly flows. 11 | 12 | \ 13 | 14 | Please check the [original data page](https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/opendata-movilidad){target="_blank"} for currently known missing dates. At the time of writing, the following dates are missing: 26, 27, 30, 31 October; 1, 2 and 3 November 2023; 4, 18, 19 April 2024, 10 and 11 November 2024. You can use `spod_get_valid_dates()` function to get all available dates. 15 | 16 | 17 | ::: 18 | -------------------------------------------------------------------------------- /inst/vignette-include/overall-approach.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | execute: 3 | eval: false 4 | --- 5 | 6 | 7 | ## Overall approach to accessing the data 8 | 9 | If you only need flows data aggregated by day at municipal level, you can use the `spod_quick_get_od()` function. This will download the data directly from the web API and let you analyse it in-memory. More on this in the [Quickly get daily data](https://ropenspain.github.io/spanishoddata/articles/quick-get.html) vignette. 10 | 11 | If you only want to analyse the data for a few days, you can use the `spod_get()` function. It will download the raw data in CSV format and let you analyse it in-memory. This is what we cover in the steps on this page. 12 | 13 | If you need longer periods (several months or years), you should use the `spod_convert()` and `spod_connect()` functions, which will convert the data into special format which is much faster for analysis, for this see the [Download and convert OD datasets](https://ropenspain.github.io/spanishoddata/articles/convert.html) vignette. `spod_get_zones()` will give you spatial data with zones that can be matched with the origin-destination flows from the functions above using zones 'id's. Please see a simple example below, and also consult the vignettes with detailed data description and instructions in the package vignettes with `spod_codebook(ver = 1)` and `spod_codebook(ver = 2)`, or simply visit the package website at [https://ropenspain.github.io/spanishoddata/](https://ropenspain.github.io/spanishoddata/). The @fig-overall-flow presents the overall approach to accessing the data in the `spanishoddata` package. 14 | -------------------------------------------------------------------------------- /inst/vignette-include/setup-data-directory.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | execute: 3 | eval: false 4 | --- 5 | 6 | 7 | ## Set the data directory {#set-data-folder} 8 | 9 | Choose where `{spanishoddata}` should download (and convert) the data by setting the data directory following command: 10 | 11 | ```{r} 12 | spod_set_data_dir(data_dir = "~/spanish_od_data") 13 | ``` 14 | 15 | The function above will also ensure that the directory is created and that you have sufficient permissions to write to it. 16 | 17 |
Setting data directory for advanced users 18 | 19 | You can also set the data directory with an environment variable: 20 | 21 | ```{r} 22 | Sys.setenv(SPANISH_OD_DATA_DIR = "~/spanish_od_data") 23 | ``` 24 | 25 | The package will create this directory if it does not exist on the first run of any function that downloads the data. 26 | 27 | To permanently set the directory for all projects, you can specify the data directory globally by setting the `SPANISH_OD_DATA_DIR` environment variable, e.g. with the following command: 28 | 29 | ```{r} 30 | #| eval: false 31 | usethis::edit_r_environ() 32 | # Then set the data directory globally, by typing this line in the file: 33 | ``` 34 | 35 | ``` 36 | SPANISH_OD_DATA_DIR = "~/spanish_od_data" 37 | ``` 38 | 39 | You can also set the data directory locally, just for the current project. Set the 'envar' in the working directory by editing `.Renviron` file in the root of the project: 40 | 41 | ```{r} 42 | #| eval: false 43 | file.edit(".Renviron") 44 | ``` 45 | 46 | 47 |
48 | -------------------------------------------------------------------------------- /man/figures/README-desire-lines-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/README-desire-lines-1.png -------------------------------------------------------------------------------- /man/figures/README-distritos-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/README-distritos-1.png -------------------------------------------------------------------------------- /man/figures/README-salamanca-plot-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/README-salamanca-plot-1.png -------------------------------------------------------------------------------- /man/figures/README-salamanca-zones-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/README-salamanca-zones-1.png -------------------------------------------------------------------------------- /man/figures/README-trips-per-hour-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/README-trips-per-hour-1.png -------------------------------------------------------------------------------- /man/figures/card.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/card.png -------------------------------------------------------------------------------- /man/figures/flowmapblue-animated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/flowmapblue-animated.png -------------------------------------------------------------------------------- /man/figures/flowmapblue-standard-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/flowmapblue-standard-01.png -------------------------------------------------------------------------------- /man/figures/flowmapblue-standard-02.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/flowmapblue-standard-02.png -------------------------------------------------------------------------------- /man/figures/flowmapblue-standard-time.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/flowmapblue-standard-time.png -------------------------------------------------------------------------------- /man/figures/flows_plot_all_districts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/flows_plot_all_districts.png -------------------------------------------------------------------------------- /man/figures/flows_plot_barcelona.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/flows_plot_barcelona.png -------------------------------------------------------------------------------- /man/figures/lifecycle-deprecated.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: deprecated 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | deprecated 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-experimental.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: experimental 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | experimental 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/lifecycle-stable.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: stable 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 19 | 20 | lifecycle 21 | 22 | 25 | 26 | stable 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /man/figures/lifecycle-superseded.svg: -------------------------------------------------------------------------------- 1 | 2 | lifecycle: superseded 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | lifecycle 18 | 19 | superseded 20 | 21 | 22 | -------------------------------------------------------------------------------- /man/figures/logo-before-hex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/logo-before-hex.png -------------------------------------------------------------------------------- /man/figures/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/logo.png -------------------------------------------------------------------------------- /man/figures/zones_barcelona_fua_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/zones_barcelona_fua_plot.png -------------------------------------------------------------------------------- /man/figures/zones_barcelona_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/zones_barcelona_plot.png -------------------------------------------------------------------------------- /man/global_quiet_param.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/global-params.R 3 | \name{global_quiet_param} 4 | \alias{global_quiet_param} 5 | \title{Global Quiet Parameter} 6 | \usage{ 7 | global_quiet_param(quiet = FALSE) 8 | } 9 | \arguments{ 10 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.} 11 | } 12 | \value{ 13 | Nothing. This function is just a placeholder for global quiet parameter. 14 | } 15 | \description{ 16 | Documentation for the \code{quiet} parameter, used globally. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spanishoddata-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/spanishoddata-package.R 3 | \docType{package} 4 | \name{spanishoddata-package} 5 | \alias{spanishoddata} 6 | \alias{spanishoddata-package} 7 | \title{spanishoddata: Get Spanish Origin-Destination Data} 8 | \description{ 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}} 10 | 11 | Gain seamless access to origin-destination (OD) data from the Spanish Ministry of Transport, hosted at \url{https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/opendata-movilidad}. This package simplifies the management of these large datasets by providing tools to download zone boundaries, handle associated origin-destination data, and process it efficiently with the 'duckdb' database interface. Local caching minimizes repeated downloads, streamlining workflows for researchers and analysts. Extensive documentation is available at \url{https://ropenspain.github.io/spanishoddata/index.html}, offering guides on creating static and dynamic mobility flow visualizations and transforming large datasets into analysis-ready formats. 12 | } 13 | \seealso{ 14 | Useful links: 15 | \itemize{ 16 | \item \url{https://rOpenSpain.github.io/spanishoddata/} 17 | \item \url{https://github.com/rOpenSpain/spanishoddata} 18 | \item Report bugs at \url{https://github.com/rOpenSpain/spanishoddata/issues} 19 | } 20 | 21 | } 22 | \author{ 23 | \strong{Maintainer}: Egor Kotov \email{kotov.egor@gmail.com} (\href{https://orcid.org/0000-0001-6690-5345}{ORCID}) 24 | 25 | Authors: 26 | \itemize{ 27 | \item Robin Lovelace \email{rob00x@gmail.com} (\href{https://orcid.org/0000-0001-5679-6536}{ORCID}) 28 | } 29 | 30 | Other contributors: 31 | \itemize{ 32 | \item Eugeni Vidal-Tortosa (\href{https://orcid.org/0000-0001-5199-4103}{ORCID}) [contributor] 33 | } 34 | 35 | } 36 | \keyword{internal} 37 | -------------------------------------------------------------------------------- /man/spod_available_data.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/available-data.R 3 | \name{spod_available_data} 4 | \alias{spod_available_data} 5 | \title{Get available data list} 6 | \usage{ 7 | spod_available_data( 8 | ver = 2, 9 | check_local_files = FALSE, 10 | quiet = FALSE, 11 | data_dir = spod_get_data_dir() 12 | ) 13 | } 14 | \arguments{ 15 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.} 16 | 17 | \item{check_local_files}{Whether to check if the local files exist. Defaults to \code{FALSE}.} 18 | 19 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.} 20 | 21 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.} 22 | } 23 | \value{ 24 | A tibble with links, release dates of files in the data, dates of data coverage, local paths to files, and the download status. 25 | \describe{ 26 | \item{target_url}{\code{character}. The URL link to the data file.} 27 | \item{pub_ts}{\code{POSIXct}. The timestamp of when the file was published.} 28 | \item{file_extension}{\code{character}. The file extension of the data file (e.g., 'tar', 'gz').} 29 | \item{data_ym}{\code{Date}. The year and month of the data coverage, if available.} 30 | \item{data_ymd}{\code{Date}. The specific date of the data coverage, if available.} 31 | \item{local_path}{\code{character}. The local file path where the data is stored.} 32 | \item{downloaded}{\code{logical}. Indicator of whether the data file has been downloaded locally. This is only available if \code{check_local_files} is \code{TRUE}.} 33 | } 34 | } 35 | \description{ 36 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} 37 | 38 | Get a table with links to available data files for the specified data version. Optionally check (see arguments) if certain files have already been downloaded into the cache directory specified with SPANISH_OD_DATA_DIR environment variable (set by \link{spod_set_data_dir}) or a custom path specified with \code{data_dir} argument. 39 | } 40 | \examples{ 41 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 42 | \donttest{ 43 | 44 | # Set data dir for file downloads 45 | spod_set_data_dir(tempdir()) 46 | 47 | # Get available data list for v1 (2020-2021) data 48 | spod_available_data(ver = 1) 49 | 50 | # Get available data list for v2 (2022 onwards) data 51 | spod_available_data(ver = 2) 52 | 53 | # Get available data list for v2 (2022 onwards) data 54 | # while also checking for local files that are already downloaded 55 | spod_available_data(ver = 2, check_local_files = TRUE) 56 | } 57 | \dontshow{\}) # examplesIf} 58 | } 59 | -------------------------------------------------------------------------------- /man/spod_available_data_v1.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/available-data.R 3 | \name{spod_available_data_v1} 4 | \alias{spod_available_data_v1} 5 | \title{Get the available v1 data list} 6 | \usage{ 7 | spod_available_data_v1( 8 | data_dir = spod_get_data_dir(), 9 | check_local_files = FALSE, 10 | quiet = FALSE 11 | ) 12 | } 13 | \arguments{ 14 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.} 15 | 16 | \item{check_local_files}{Whether to check if the local files exist. Defaults to \code{FALSE}.} 17 | 18 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.} 19 | } 20 | \value{ 21 | A tibble with links, release dates of files in the data, dates of data coverage, local paths to files, and the download status. 22 | \describe{ 23 | \item{target_url}{\code{character}. The URL link to the data file.} 24 | \item{pub_ts}{\code{POSIXct}. The timestamp of when the file was published.} 25 | \item{file_extension}{\code{character}. The file extension of the data file (e.g., 'tar', 'gz').} 26 | \item{data_ym}{\code{Date}. The year and month of the data coverage, if available.} 27 | \item{data_ymd}{\code{Date}. The specific date of the data coverage, if available.} 28 | \item{local_path}{\code{character}. The local file path where the data is stored.} 29 | \item{downloaded}{\code{logical}. Indicator of whether the data file has been downloaded locally. This is only available if \code{check_local_files} is \code{TRUE}.} 30 | } 31 | } 32 | \description{ 33 | This function provides a table of the available data list of MITMA v1 (2020-2021), both remote and local. 34 | } 35 | \keyword{internal} 36 | -------------------------------------------------------------------------------- /man/spod_available_data_v2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/available-data.R 3 | \name{spod_available_data_v2} 4 | \alias{spod_available_data_v2} 5 | \title{Get the data dictionary} 6 | \usage{ 7 | spod_available_data_v2( 8 | data_dir = spod_get_data_dir(), 9 | check_local_files = FALSE, 10 | quiet = FALSE 11 | ) 12 | } 13 | \arguments{ 14 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.} 15 | 16 | \item{check_local_files}{Whether to check if the local files exist. Defaults to \code{FALSE}.} 17 | 18 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.} 19 | } 20 | \value{ 21 | A tibble with links, release dates of files in the data, dates of data coverage, local paths to files, and the download status. 22 | \describe{ 23 | \item{target_url}{\code{character}. The URL link to the data file.} 24 | \item{pub_ts}{\code{POSIXct}. The timestamp of when the file was published.} 25 | \item{file_extension}{\code{character}. The file extension of the data file (e.g., 'tar', 'gz').} 26 | \item{data_ym}{\code{Date}. The year and month of the data coverage, if available.} 27 | \item{data_ymd}{\code{Date}. The specific date of the data coverage, if available.} 28 | \item{local_path}{\code{character}. The local file path where the data is stored.} 29 | \item{downloaded}{\code{logical}. Indicator of whether the data file has been downloaded locally. This is only available if \code{check_local_files} is \code{TRUE}.} 30 | } 31 | } 32 | \description{ 33 | This function retrieves the data dictionary for the specified data directory. 34 | } 35 | \keyword{internal} 36 | -------------------------------------------------------------------------------- /man/spod_available_ram.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal-utils.R 3 | \name{spod_available_ram} 4 | \alias{spod_available_ram} 5 | \title{Get available RAM} 6 | \usage{ 7 | spod_available_ram() 8 | } 9 | \value{ 10 | A \code{numeric} amount of available RAM in GB. 11 | } 12 | \description{ 13 | Get available RAM 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/spod_cite.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/cite.R 3 | \name{spod_cite} 4 | \alias{spod_cite} 5 | \title{Cite the package and the data} 6 | \usage{ 7 | spod_cite(what = "all", format = "all") 8 | } 9 | \arguments{ 10 | \item{what}{Character vector specifying what to cite. 11 | Can include "package", "data", "methodology_v1", "methodology_v2", or "all". 12 | Default is "all".} 13 | 14 | \item{format}{Character vector specifying output format(s). 15 | Can include "text", "markdown", "bibtex", or "all". 16 | Default is "all".} 17 | } 18 | \value{ 19 | Nothing. Prints citation in plain text, markdown, BibTeX, or all formats at once to console. 20 | } 21 | \description{ 22 | Cite the package and the data 23 | } 24 | \examples{ 25 | # Cite everything in all formats 26 | \dontrun{ 27 | spod_cite() 28 | } 29 | 30 | # Cite just the package in BibTeX format 31 | \dontrun{ 32 | spod_cite(what = "package", format = "bibtex") 33 | } 34 | 35 | # Cite both methodologies in plain text 36 | \dontrun{ 37 | spod_cite(what = c("methodology_v1", "methodology_v2"), format = "text") 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /man/spod_clean_zones_v1.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get-zones.R 3 | \name{spod_clean_zones_v1} 4 | \alias{spod_clean_zones_v1} 5 | \title{Fixes common issues in the zones data and cleans up variable names} 6 | \usage{ 7 | spod_clean_zones_v1(zones_path, zones) 8 | } 9 | \arguments{ 10 | \item{zones_path}{The path to the zones spatial data file.} 11 | 12 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}) for both data versions. Additionaly, these can be \code{"large_urban_areas"} (or \code{"lua"}, or the original Spanish \code{"grandes_areas_urbanas"}, or \code{"gau"}) for v2 data (2022 onwards).} 13 | } 14 | \value{ 15 | A spatial object containing the cleaned zones data. 16 | } 17 | \description{ 18 | This function fixes any invalid geometries in the zones data and renames the "ID" column to "id". 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/spod_clean_zones_v2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get-zones.R 3 | \name{spod_clean_zones_v2} 4 | \alias{spod_clean_zones_v2} 5 | \title{Fixes common issues in the zones data and cleans up variable names} 6 | \usage{ 7 | spod_clean_zones_v2(zones_path) 8 | } 9 | \arguments{ 10 | \item{zones_path}{The path to the zones spatial data file.} 11 | } 12 | \value{ 13 | A spatial `sf`` object containing the cleaned zones data. 14 | } 15 | \description{ 16 | This function fixes any invalid geometries in the zones data and renames the "ID" column to "id". It also attacches the population counts and zone names provided in the csv files supplied by the original data provider. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_codebook.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/codebook.R 3 | \name{spod_codebook} 4 | \alias{spod_codebook} 5 | \title{View codebooks for v1 and v2 open mobility data} 6 | \usage{ 7 | spod_codebook(ver = 1) 8 | } 9 | \arguments{ 10 | \item{ver}{An \code{integer} or \code{numeric} value. The version of the data. Defaults to 1. Can be \code{1} for v1 (2020-2021) data and 2 for v2 (2022 onwards) data.} 11 | } 12 | \value{ 13 | Nothing, opens vignette if it is installed. If vignette is missing, prints a message with a link to a webpage with the codebook. 14 | } 15 | \description{ 16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} 17 | 18 | Opens relevant vignette with a codebook for v1 (2020-2021) and v2 (2022 onwards) data or provide a webpage if vignette is missing. 19 | } 20 | \examples{ 21 | 22 | # View codebook for v1 (2020-2021) data 23 | spod_codebook(ver = 1) 24 | 25 | # View codebook for v2 (2022 onwards) data 26 | spod_codebook(ver = 2) 27 | 28 | } 29 | -------------------------------------------------------------------------------- /man/spod_connect.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/connect.R 3 | \name{spod_connect} 4 | \alias{spod_connect} 5 | \title{Connect to data converted to \code{DuckDB} or hive-style \code{parquet} files} 6 | \usage{ 7 | spod_connect( 8 | data_path, 9 | target_table_name = NULL, 10 | quiet = FALSE, 11 | max_mem_gb = max(4, spod_available_ram() - 4), 12 | max_n_cpu = max(1, parallelly::availableCores() - 1), 13 | temp_path = spod_get_temp_dir() 14 | ) 15 | } 16 | \arguments{ 17 | \item{data_path}{a path to the \code{DuckDB} database file with '.duckdb' extension, or a path to the folder with \code{parquet} files. Eigher one should have been created with the \link{spod_convert} function.} 18 | 19 | \item{target_table_name}{Default is \code{NULL}. When connecting to a folder of \code{parquet} files, this argument is ignored. When connecting to a \code{DuckDB} database, a \code{character} vector of length 1 with the table name to open from the database file. If not specified, it will be guessed from the \code{data_path} argument and from table names that are available in the database. If you have not manually interfered with the database, this should be guessed automatically and you do not need to specify it.} 20 | 21 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.} 22 | 23 | \item{max_mem_gb}{The maximum memory to use in GB. A conservative default is 3 GB, which should be enough for resaving the data to \code{DuckDB} form a folder of CSV.gz files while being small enough to fit in memory of most even old computers. For data analysis using the already converted data (in \code{DuckDB} or Parquet format) or with the raw CSV.gz data, it is recommended to increase it according to available resources.} 24 | 25 | \item{max_n_cpu}{The maximum number of threads to use. Defaults to the number of available cores minus 1.} 26 | 27 | \item{temp_path}{The path to the temp folder for DuckDB for \href{https://duckdb.org/2024/07/09/memory-management.html#intermediate-spilling}{intermediate spilling} in case the set memory limit and/or physical memory of the computer is too low to perform the query. By default this is set to the \code{temp} directory in the data folder defined by SPANISH_OD_DATA_DIR environment variable. Otherwise, for queries on folders of CSV files or parquet files, the temporary path would be set to the current R working directory, which probably is undesirable, as the current working directory can be on a slow storage, or storage that may have limited space, compared to the data folder.} 28 | } 29 | \value{ 30 | a \code{DuckDB} table connection object. 31 | } 32 | \description{ 33 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} 34 | 35 | This function allows the user to quickly connect to the data converted to DuckDB with the \link{spod_convert} function. This function simplifies the connection process. The user is free to use the \code{DBI} and \code{DuckDB} packages to connect to the data manually, or to use the \code{arrow} package to connect to the \code{parquet} files folder. 36 | } 37 | \examples{ 38 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 39 | \donttest{ 40 | # Set data dir for file downloads 41 | spod_set_data_dir(tempdir()) 42 | 43 | # download and convert data 44 | dates_1 <- c(start = "2020-02-17", end = "2020-02-18") 45 | db_2 <- spod_convert( 46 | type = "number_of_trips", 47 | zones = "distr", 48 | dates = dates_1, 49 | overwrite = TRUE 50 | ) 51 | 52 | # now connect to the converted data 53 | my_od_data_2 <- spod_connect(db_2) 54 | 55 | # disconnect from the database 56 | spod_disconnect(my_od_data_2) 57 | } 58 | \dontshow{\}) # examplesIf} 59 | } 60 | -------------------------------------------------------------------------------- /man/spod_convert_dates_to_ranges.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal-utils.R 3 | \name{spod_convert_dates_to_ranges} 4 | \alias{spod_convert_dates_to_ranges} 5 | \title{Convert dates to ranges} 6 | \usage{ 7 | spod_convert_dates_to_ranges(dates) 8 | } 9 | \arguments{ 10 | \item{dates}{A \code{character} vector of dates.} 11 | } 12 | \value{ 13 | A \code{character} vector of date ranges. 14 | } 15 | \description{ 16 | This internal helper function reduces a vector of dates to a vector of date ranges to shorten the warning and error messages that mention the valid date ranges. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_dates_argument_to_dates_seq.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal-utils.R 3 | \name{spod_dates_argument_to_dates_seq} 4 | \alias{spod_dates_argument_to_dates_seq} 5 | \title{Convert multiple formates of date arguments to a sequence of dates} 6 | \usage{ 7 | spod_dates_argument_to_dates_seq(dates) 8 | } 9 | \arguments{ 10 | \item{dates}{A \code{character} or \code{Date} vector of dates to process. Kindly keep in mind that v1 and v2 data follow different data collection methodologies and may not be directly comparable. Therefore, do not try to request data from both versions for the same date range. If you need to compare data from both versions, please refer to the respective codebooks and methodology documents. The v1 data covers the period from 2020-02-14 to 2021-05-09, and the v2 data covers the period from 2022-01-01 to the present until further notice. The true dates range is checked against the available data for each version on every function run. 11 | 12 | The possible values can be any of the following: 13 | \itemize{ 14 | \item For the \code{spod_get()} and \code{spod_convert()} functions, the \code{dates} can be set to "cached_v1" or "cached_v2" to request data from cached (already previously downloaded) v1 (2020-2021) or v2 (2022 onwards) data. In this case, the function will identify and use all data files that have been downloaded and cached locally, (e.g. using an explicit run of \code{spod_download()}, or any data requests made using the \code{spod_get()} or \code{spod_convert()} functions). 15 | \item A single date in ISO (YYYY-MM-DD) or YYYYMMDD format. \code{character} or \code{Date} object. 16 | \item A vector of dates in ISO (YYYY-MM-DD) or YYYYMMDD format. \code{character} or \code{Date} object. Can be any non-consecutive sequence of dates. 17 | \item A date range 18 | \itemize{ 19 | \item eigher a \code{character} or \code{Date} object of length 2 with clearly named elements \code{start} and \code{end} in ISO (YYYY-MM-DD) or YYYYMMDD format. E.g. \code{c(start = "2020-02-15", end = "2020-02-17")}; 20 | \item or a \code{character} object of the form \code{YYYY-MM-DD_YYYY-MM-DD} or \code{YYYYMMDD_YYYYMMDD}. For example, \verb{2020-02-15_2020-02-17} or \verb{20200215_20200217}. 21 | } 22 | \item A regular expression to match dates in the format \code{YYYYMMDD}. \code{character} object. For example, \verb{^202002} will match all dates in February 2020. 23 | }} 24 | } 25 | \value{ 26 | A \code{character} vector of dates in ISO format (YYYY-MM-DD). 27 | } 28 | \description{ 29 | This function processes the date arguments provided to various functions in the package. It can handle single dates and arbitratry sequences (vectors) of dates in ISO (YYYY-MM-DD) and YYYYMMDD format. It can also handle date ranges in the format 'YYYY-MM-DD_YYYY-MM-DD' (or 'YYYYMMDD_YYYYMMDD'), date ranges in named vec and regular expressions to match dates in the format \code{YYYYMMDD}. 30 | } 31 | \keyword{internal} 32 | -------------------------------------------------------------------------------- /man/spod_disconnect.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/disconnect.R 3 | \name{spod_disconnect} 4 | \alias{spod_disconnect} 5 | \title{Safely disconnect from data and free memory} 6 | \usage{ 7 | spod_disconnect(tbl_con, free_mem = TRUE) 8 | } 9 | \arguments{ 10 | \item{tbl_con}{A \code{tbl_duckdb_connection} connection object that you get from either \code{spod_get()} or \code{spod_connect()}.} 11 | 12 | \item{free_mem}{A \code{logical}. Whether to free up memory by running \code{gc()}. Defaults to \code{TRUE}.} 13 | } 14 | \value{ 15 | No return value, called for side effect of disconnecting from the database and freeing up memory. 16 | } 17 | \description{ 18 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} 19 | 20 | This function is to ensure that \code{DuckDB} connections to CSV.gz files (created via \code{spod_get()}), as well as to \code{DuckDB} files or folders of \code{parquet} files (created via \code{spod_convert()}) are closed properly to prevent conflicting connections. Essentially this is just a wrapper around \code{DBI::dbDisconnect()} that reaches out into the \code{.$src$con} object of the \code{tbl_duckdb_connection} connection object that is returned to the user via \code{spod_get()} and \code{spod_connect()}. After disonnecting the database, it also frees up memory by running \code{gc()}. 21 | } 22 | \examples{ 23 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 24 | \donttest{ 25 | # Set data dir for file downloads 26 | spod_set_data_dir(tempdir()) 27 | 28 | # basic example 29 | # create a connection to the v1 data without converting 30 | # this creates a duckdb database connection to CSV files 31 | od_distr <- spod_get( 32 | "od", 33 | zones = "distr", 34 | dates = c("2020-03-01", "2020-03-02") 35 | ) 36 | # disconnect from the database connection 37 | spod_disconnect(od_distr) 38 | 39 | # Advanced example 40 | # download and convert data 41 | dates_1 <- c(start = "2020-02-17", end = "2020-02-19") 42 | db_2 <- spod_convert( 43 | type = "od", 44 | zones = "distr", 45 | dates = dates_1, 46 | overwrite = TRUE 47 | ) 48 | 49 | # now connect to the converted data 50 | my_od_data_2 <- spod_connect(db_2) 51 | 52 | # disconnect from the database 53 | spod_disconnect(my_od_data_2) 54 | } 55 | \dontshow{\}) # examplesIf} 56 | } 57 | -------------------------------------------------------------------------------- /man/spod_download_zones_v1.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get-zones.R 3 | \name{spod_download_zones_v1} 4 | \alias{spod_download_zones_v1} 5 | \title{Downloads and extracts the raw v1 zones data} 6 | \usage{ 7 | spod_download_zones_v1( 8 | zones = c("districts", "dist", "distr", "distritos", "municipalities", "muni", 9 | "municip", "municipios"), 10 | data_dir = spod_get_data_dir(), 11 | quiet = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}).} 16 | 17 | \item{data_dir}{The directory where the data is stored.} 18 | 19 | \item{quiet}{Boolean flag to control the display of messages.} 20 | } 21 | \value{ 22 | A \code{character} string containing the path to the downloaded and extracted file. 23 | } 24 | \description{ 25 | This function ensures that the necessary v1 raw data for zones files are downloaded and extracted from the specified data directory. 26 | } 27 | \keyword{internal} 28 | -------------------------------------------------------------------------------- /man/spod_duckdb_create_province_enum.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/duckdb-helpers.R 3 | \name{spod_duckdb_create_province_enum} 4 | \alias{spod_duckdb_create_province_enum} 5 | \title{Create province names ENUM in a duckdb connection} 6 | \usage{ 7 | spod_duckdb_create_province_enum(con) 8 | } 9 | \arguments{ 10 | \item{con}{A \code{duckdb} connection.} 11 | } 12 | \value{ 13 | A \code{duckdb} connection with \code{INE_PROV_NAME_ENUM} and \code{INE_PROV_CODE_ENUM} created. 14 | } 15 | \description{ 16 | Create province names ENUM in a duckdb connection 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_duckdb_filter_by_dates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/duckdb-helpers.R 3 | \name{spod_duckdb_filter_by_dates} 4 | \alias{spod_duckdb_filter_by_dates} 5 | \title{Filter a duckdb conenction by dates} 6 | \usage{ 7 | spod_duckdb_filter_by_dates(con, source_view_name, new_view_name, dates) 8 | } 9 | \arguments{ 10 | \item{con}{A duckdb connection} 11 | 12 | \item{source_view_name}{The name of the source duckdb "view" (the virtual table, in the context of current package likely connected to a folder of CSV files)} 13 | 14 | \item{new_view_name}{The name of the new duckdb "view" (the virtual table, in the context of current package likely connected to a folder of CSV files).} 15 | 16 | \item{dates}{A \code{character} or \code{Date} vector of dates to process. Kindly keep in mind that v1 and v2 data follow different data collection methodologies and may not be directly comparable. Therefore, do not try to request data from both versions for the same date range. If you need to compare data from both versions, please refer to the respective codebooks and methodology documents. The v1 data covers the period from 2020-02-14 to 2021-05-09, and the v2 data covers the period from 2022-01-01 to the present until further notice. The true dates range is checked against the available data for each version on every function run. 17 | 18 | The possible values can be any of the following: 19 | \itemize{ 20 | \item For the \code{spod_get()} and \code{spod_convert()} functions, the \code{dates} can be set to "cached_v1" or "cached_v2" to request data from cached (already previously downloaded) v1 (2020-2021) or v2 (2022 onwards) data. In this case, the function will identify and use all data files that have been downloaded and cached locally, (e.g. using an explicit run of \code{spod_download()}, or any data requests made using the \code{spod_get()} or \code{spod_convert()} functions). 21 | \item A single date in ISO (YYYY-MM-DD) or YYYYMMDD format. \code{character} or \code{Date} object. 22 | \item A vector of dates in ISO (YYYY-MM-DD) or YYYYMMDD format. \code{character} or \code{Date} object. Can be any non-consecutive sequence of dates. 23 | \item A date range 24 | \itemize{ 25 | \item eigher a \code{character} or \code{Date} object of length 2 with clearly named elements \code{start} and \code{end} in ISO (YYYY-MM-DD) or YYYYMMDD format. E.g. \code{c(start = "2020-02-15", end = "2020-02-17")}; 26 | \item or a \code{character} object of the form \code{YYYY-MM-DD_YYYY-MM-DD} or \code{YYYYMMDD_YYYYMMDD}. For example, \verb{2020-02-15_2020-02-17} or \verb{20200215_20200217}. 27 | } 28 | \item A regular expression to match dates in the format \code{YYYYMMDD}. \code{character} object. For example, \verb{^202002} will match all dates in February 2020. 29 | }} 30 | } 31 | \value{ 32 | A \code{duckdb} connection with original views and a new filtered view. 33 | } 34 | \description{ 35 | IMPORTANT: This function assumes that the table or view that is being filtered has separate \code{year}, \code{month} and \code{day} columns with integer values. This is done so that the filtering is faster on CSV files that are stored in a folder structure with hive-style \verb{/year=2020/month=2/day=14/}. 36 | } 37 | \keyword{internal} 38 | -------------------------------------------------------------------------------- /man/spod_duckdb_limit_resources.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/duckdb-helpers.R 3 | \name{spod_duckdb_limit_resources} 4 | \alias{spod_duckdb_limit_resources} 5 | \title{Set maximum memory and number of threads for a \code{DuckDB} connection} 6 | \usage{ 7 | spod_duckdb_limit_resources( 8 | con, 9 | max_mem_gb = max(4, spod_available_ram() - 4), 10 | max_n_cpu = max(1, parallelly::availableCores() - 1) 11 | ) 12 | } 13 | \arguments{ 14 | \item{con}{A \code{duckdb} connection} 15 | 16 | \item{max_mem_gb}{The maximum memory to use in GB. A conservative default is 3 GB, which should be enough for resaving the data to \code{DuckDB} form a folder of CSV.gz files while being small enough to fit in memory of most even old computers. For data analysis using the already converted data (in \code{DuckDB} or Parquet format) or with the raw CSV.gz data, it is recommended to increase it according to available resources.} 17 | 18 | \item{max_n_cpu}{The maximum number of threads to use. Defaults to the number of available cores minus 1.} 19 | } 20 | \value{ 21 | A \code{duckdb} connection. 22 | } 23 | \description{ 24 | Set maximum memory and number of threads for a \code{DuckDB} connection 25 | } 26 | \keyword{internal} 27 | -------------------------------------------------------------------------------- /man/spod_duckdb_number_of_trips.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/duckdb-helpers.R 3 | \name{spod_duckdb_number_of_trips} 4 | \alias{spod_duckdb_number_of_trips} 5 | \title{Create a duckdb number of trips table} 6 | \usage{ 7 | spod_duckdb_number_of_trips( 8 | con = DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:", read_only = FALSE), 9 | zones = c("districts", "dist", "distr", "distritos", "municipalities", "muni", 10 | "municip", "municipios", "lua", "large_urban_areas", "gau", "grandes_areas_urbanas"), 11 | ver = NULL, 12 | data_dir = spod_get_data_dir() 13 | ) 14 | } 15 | \arguments{ 16 | \item{con}{A duckdb connection object. If not specified, a new in-memory connection will be created.} 17 | 18 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}) for both data versions. Additionaly, these can be \code{"large_urban_areas"} (or \code{"lua"}, or the original Spanish \code{"grandes_areas_urbanas"}, or \code{"gau"}) for v2 data (2022 onwards).} 19 | 20 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.} 21 | 22 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.} 23 | } 24 | \value{ 25 | A \code{duckdb} connection object with 2 views: 26 | \itemize{ 27 | \item \code{od_csv_raw} - a raw table view of all cached CSV files with the origin-destination data that has been previously cached in $SPANISH_OD_DATA_DIR 28 | \item \code{od_csv_clean} - a cleaned-up table view of \code{od_csv_raw} with column names and values translated and mapped to English. This still includes all cached data. 29 | } 30 | } 31 | \description{ 32 | This function creates a duckdb connection to the number of trips data stored in a folder of CSV.gz files. 33 | } 34 | \keyword{internal} 35 | -------------------------------------------------------------------------------- /man/spod_duckdb_overnight_stays.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/duckdb-helpers.R 3 | \name{spod_duckdb_overnight_stays} 4 | \alias{spod_duckdb_overnight_stays} 5 | \title{Create a duckdb overnight stays table} 6 | \usage{ 7 | spod_duckdb_overnight_stays( 8 | con = DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:", read_only = FALSE), 9 | zones = c("districts", "dist", "distr", "distritos", "municipalities", "muni", 10 | "municip", "municipios", "lua", "large_urban_areas", "gau", "grandes_areas_urbanas"), 11 | ver = NULL, 12 | data_dir = spod_get_data_dir() 13 | ) 14 | } 15 | \arguments{ 16 | \item{con}{A duckdb connection object. If not specified, a new in-memory connection will be created.} 17 | 18 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}) for both data versions. Additionaly, these can be \code{"large_urban_areas"} (or \code{"lua"}, or the original Spanish \code{"grandes_areas_urbanas"}, or \code{"gau"}) for v2 data (2022 onwards).} 19 | 20 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.} 21 | 22 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.} 23 | } 24 | \value{ 25 | A \code{duckdb} connection object with 2 views: 26 | \itemize{ 27 | \item \code{od_csv_raw} - a raw table view of all cached CSV files with the origin-destination data that has been previously cached in $SPANISH_OD_DATA_DIR 28 | \item \code{od_csv_clean} - a cleaned-up table view of \code{od_csv_raw} with column names and values translated and mapped to English. This still includes all cached data. 29 | } 30 | } 31 | \description{ 32 | This function creates a duckdb connection to the overnight stays data stored in a folder of CSV.gz files. 33 | } 34 | \keyword{internal} 35 | -------------------------------------------------------------------------------- /man/spod_duckdb_set_temp.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/duckdb-helpers.R 3 | \name{spod_duckdb_set_temp} 4 | \alias{spod_duckdb_set_temp} 5 | \title{Set temp file for DuckDB connection} 6 | \usage{ 7 | spod_duckdb_set_temp(con, temp_path = spod_get_temp_dir()) 8 | } 9 | \arguments{ 10 | \item{con}{A duckdb connection} 11 | 12 | \item{temp_path}{The path to the temp folder for DuckDB for \href{https://duckdb.org/2024/07/09/memory-management.html#intermediate-spilling}{intermediate spilling} in case the set memory limit and/or physical memory of the computer is too low to perform the query. By default this is set to the \code{temp} directory in the data folder defined by SPANISH_OD_DATA_DIR environment variable. Otherwise, for queries on folders of CSV files or parquet files, the temporary path would be set to the current R working directory, which probably is undesirable, as the current working directory can be on a slow storage, or storage that may have limited space, compared to the data folder.} 13 | } 14 | \value{ 15 | A \code{duckdb} connection. 16 | } 17 | \description{ 18 | Set temp file for DuckDB connection 19 | } 20 | \keyword{internal} 21 | -------------------------------------------------------------------------------- /man/spod_expand_dates_from_regex.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal-utils.R 3 | \name{spod_expand_dates_from_regex} 4 | \alias{spod_expand_dates_from_regex} 5 | \title{Function to expand dates from a regex} 6 | \usage{ 7 | spod_expand_dates_from_regex(date_regex) 8 | } 9 | \arguments{ 10 | \item{date_regex}{A regular expression to match dates in the format 'yyyymmdd'.} 11 | } 12 | \value{ 13 | A \code{character} vector of dates matching the regex. 14 | } 15 | \description{ 16 | This function generates a sequence of dates from a regular expression pattern based on the provided regular expression. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_files_sizes.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dev-tools.R 3 | \name{spod_files_sizes} 4 | \alias{spod_files_sizes} 5 | \title{Get files sizes for remote files of v1 and v2 data and save them into a csv.gz file in the inst/extdata folder.} 6 | \usage{ 7 | spod_files_sizes(ver = 2) 8 | } 9 | \arguments{ 10 | \item{ver}{The version of the data (1 or 2). Can be both. Defaults to 2, as v1 data is not being updated since 2021.} 11 | } 12 | \value{ 13 | Nothing. Only saves a csv.gz file with up to date file sizes in the inst/extdata folder. 14 | } 15 | \description{ 16 | Get files sizes for remote files of v1 and v2 data and save them into a csv.gz file in the inst/extdata folder. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_get_data_dir.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-dir.R 3 | \name{spod_get_data_dir} 4 | \alias{spod_get_data_dir} 5 | \title{Get the data directory} 6 | \usage{ 7 | spod_get_data_dir(quiet = FALSE) 8 | } 9 | \arguments{ 10 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.} 11 | } 12 | \value{ 13 | A \code{character} vector of length 1 containing the path to the data directory where the package will download and convert the data. 14 | } 15 | \description{ 16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} 17 | 18 | This function retrieves the data directory from the environment variable SPANISH_OD_DATA_DIR. 19 | If the environment variable is not set, it returns the temporary directory. 20 | } 21 | \examples{ 22 | spod_set_data_dir(tempdir()) 23 | spod_get_data_dir() 24 | 25 | } 26 | -------------------------------------------------------------------------------- /man/spod_get_file_size_from_url.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/dev-tools.R 3 | \name{spod_get_file_size_from_url} 4 | \alias{spod_get_file_size_from_url} 5 | \title{Get file size from URL} 6 | \usage{ 7 | spod_get_file_size_from_url(x_url) 8 | } 9 | \arguments{ 10 | \item{x_url}{URL} 11 | } 12 | \value{ 13 | File size in MB 14 | } 15 | \description{ 16 | Get file size from URL 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_get_hmac_secret.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/quick-get.R 3 | \name{spod_get_hmac_secret} 4 | \alias{spod_get_hmac_secret} 5 | \title{Get the HMAC secret from the mapas-movilidad website} 6 | \usage{ 7 | spod_get_hmac_secret(base_url = "https://mapas-movilidad.transportes.gob.es") 8 | } 9 | \arguments{ 10 | \item{base_url}{The base URL of the mapas-movilidad website} 11 | } 12 | \value{ 13 | Character vector with the HMAC secret. 14 | } 15 | \description{ 16 | Get the HMAC secret from the mapas-movilidad website 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_get_latest_v1_file_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/available-data.R 3 | \name{spod_get_latest_v1_file_list} 4 | \alias{spod_get_latest_v1_file_list} 5 | \title{Get latest file list from the XML for MITMA open mobility data v1 (2020-2021)} 6 | \usage{ 7 | spod_get_latest_v1_file_list( 8 | data_dir = spod_get_data_dir(), 9 | xml_url = "https://opendata-movilidad.mitma.es/RSS.xml" 10 | ) 11 | } 12 | \arguments{ 13 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.} 14 | 15 | \item{xml_url}{The URL of the XML file to download. Defaults to "https://opendata-movilidad.mitma.es/RSS.xml".} 16 | } 17 | \value{ 18 | The path to the downloaded XML file. 19 | } 20 | \description{ 21 | Get latest file list from the XML for MITMA open mobility data v1 (2020-2021) 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /man/spod_get_latest_v2_file_list.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/available-data.R 3 | \name{spod_get_latest_v2_file_list} 4 | \alias{spod_get_latest_v2_file_list} 5 | \title{Get latest file list from the XML for MITMA open mobility data v2 (2022 onwards)} 6 | \usage{ 7 | spod_get_latest_v2_file_list( 8 | data_dir = spod_get_data_dir(), 9 | xml_url = "https://movilidad-opendata.mitma.es/RSS.xml" 10 | ) 11 | } 12 | \arguments{ 13 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.} 14 | 15 | \item{xml_url}{The URL of the XML file to download. Defaults to "https://movilidad-opendata.mitma.es/RSS.xml".} 16 | } 17 | \value{ 18 | The path to the downloaded XML file. 19 | } 20 | \description{ 21 | Get latest file list from the XML for MITMA open mobility data v2 (2022 onwards) 22 | } 23 | \keyword{internal} 24 | -------------------------------------------------------------------------------- /man/spod_get_temp_dir.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/folders.R 3 | \name{spod_get_temp_dir} 4 | \alias{spod_get_temp_dir} 5 | \title{Get temporary directory for DuckDB intermediate spilling} 6 | \usage{ 7 | spod_get_temp_dir(data_dir = spod_get_data_dir()) 8 | } 9 | \arguments{ 10 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.} 11 | } 12 | \value{ 13 | A \code{character} string with the path to the temp folder for \code{DuckDB} for \href{https://duckdb.org/2024/07/09/memory-management.html#intermediate-spilling}{intermediate spilling}. 14 | } 15 | \description{ 16 | Get the The path to the temp folder for DuckDB for \href{https://duckdb.org/2024/07/09/memory-management.html#intermediate-spilling}{intermediate spilling} in case the set memory limit and/or physical memory of the computer is too low to perform the query. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_get_valid_dates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal-utils.R 3 | \name{spod_get_valid_dates} 4 | \alias{spod_get_valid_dates} 5 | \title{Get valid dates for the specified data version} 6 | \usage{ 7 | spod_get_valid_dates(ver = NULL) 8 | } 9 | \arguments{ 10 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.} 11 | } 12 | \value{ 13 | A vector of type \code{Date} with all possible valid dates for the specified data version (v1 for 2020-2021 and v2 for 2020 onwards). 14 | } 15 | \description{ 16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} 17 | 18 | Get all metadata for requested data version and identify all dates available for download. 19 | } 20 | \examples{ 21 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 22 | \donttest{ 23 | # Get all valid dates for v1 (2020-2021) data 24 | spod_get_valid_dates(ver = 1) 25 | 26 | # Get all valid dates for v2 (2020 onwards) data 27 | spod_get_valid_dates(ver = 2) 28 | } 29 | \dontshow{\}) # examplesIf} 30 | } 31 | -------------------------------------------------------------------------------- /man/spod_get_zones_v1.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get-zones.R 3 | \name{spod_get_zones_v1} 4 | \alias{spod_get_zones_v1} 5 | \title{Retrieves the zones for v1 data} 6 | \usage{ 7 | spod_get_zones_v1( 8 | zones = c("districts", "dist", "distr", "distritos", "municipalities", "muni", 9 | "municip", "municipios"), 10 | data_dir = spod_get_data_dir(), 11 | quiet = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}).} 16 | 17 | \item{data_dir}{The directory where the data is stored.} 18 | 19 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.} 20 | } 21 | \value{ 22 | An \code{sf} object (Simple Feature collection) with 2 fields: 23 | \describe{ 24 | \item{id}{A character vector containing the unique identifier for each zone, to be matched with identifiers in the tabular data.} 25 | \item{geometry}{A \code{MULTIPOLYGON} column containing the spatial geometry of each zone, stored as an sf object. 26 | The geometry is projected in the ETRS89 / UTM zone 30N coordinate reference system (CRS), with XY dimensions.} 27 | } 28 | } 29 | \description{ 30 | This function retrieves the zones data from the specified data directory. 31 | It can retrieve either "distritos" or "municipios" zones data. 32 | } 33 | \keyword{internal} 34 | -------------------------------------------------------------------------------- /man/spod_get_zones_v2.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get-zones.R 3 | \name{spod_get_zones_v2} 4 | \alias{spod_get_zones_v2} 5 | \title{Retrieves the zones v2 data} 6 | \usage{ 7 | spod_get_zones_v2( 8 | zones = c("districts", "dist", "distr", "distritos", "municipalities", "muni", 9 | "municip", "municipios", "lua", "large_urban_areas", "gau", "grandes_areas_urbanas"), 10 | data_dir = spod_get_data_dir(), 11 | quiet = FALSE 12 | ) 13 | } 14 | \arguments{ 15 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}).} 16 | 17 | \item{data_dir}{The directory where the data is stored.} 18 | 19 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.} 20 | } 21 | \value{ 22 | An \code{sf} object (Simple Feature collection) with 4 fields: 23 | \describe{ 24 | \item{id}{A character vector containing the unique identifier for each zone, to be matched with identifiers in the tabular data.} 25 | \item{name}{A character vector with the name of the zone.} 26 | \item{population}{A numeric vector representing the population of each zone (as of 2022).} 27 | \item{geometry}{A \code{MULTIPOLYGON} column containing the spatial geometry of each zone, stored as an sf object. 28 | The geometry is projected in the ETRS89 / UTM zone 30N coordinate reference system (CRS), with XY dimensions.} 29 | } 30 | } 31 | \description{ 32 | This function retrieves the zones data from the specified data directory. 33 | It can retrieve either "distritos" or "municipios" zones data. 34 | } 35 | \keyword{internal} 36 | -------------------------------------------------------------------------------- /man/spod_graphql_valid_dates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal-utils.R 3 | \name{spod_graphql_valid_dates} 4 | \alias{spod_graphql_valid_dates} 5 | \title{Get valid dates from the GraphQL API} 6 | \usage{ 7 | spod_graphql_valid_dates() 8 | } 9 | \value{ 10 | A \code{Date} vector of dates that are valid to request data with \code{spod_quick_get_od()}. 11 | } 12 | \description{ 13 | Get valid dates from the GraphQL API 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/spod_infer_data_v_from_dates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal-utils.R 3 | \name{spod_infer_data_v_from_dates} 4 | \alias{spod_infer_data_v_from_dates} 5 | \title{Infer data version from dates} 6 | \usage{ 7 | spod_infer_data_v_from_dates(dates, ignore_missing_dates = FALSE) 8 | } 9 | \arguments{ 10 | \item{dates}{A \code{character} or \code{Date} vector of dates to process. Kindly keep in mind that v1 and v2 data follow different data collection methodologies and may not be directly comparable. Therefore, do not try to request data from both versions for the same date range. If you need to compare data from both versions, please refer to the respective codebooks and methodology documents. The v1 data covers the period from 2020-02-14 to 2021-05-09, and the v2 data covers the period from 2022-01-01 to the present until further notice. The true dates range is checked against the available data for each version on every function run. 11 | 12 | The possible values can be any of the following: 13 | \itemize{ 14 | \item For the \code{spod_get()} and \code{spod_convert()} functions, the \code{dates} can be set to "cached_v1" or "cached_v2" to request data from cached (already previously downloaded) v1 (2020-2021) or v2 (2022 onwards) data. In this case, the function will identify and use all data files that have been downloaded and cached locally, (e.g. using an explicit run of \code{spod_download()}, or any data requests made using the \code{spod_get()} or \code{spod_convert()} functions). 15 | \item A single date in ISO (YYYY-MM-DD) or YYYYMMDD format. \code{character} or \code{Date} object. 16 | \item A vector of dates in ISO (YYYY-MM-DD) or YYYYMMDD format. \code{character} or \code{Date} object. Can be any non-consecutive sequence of dates. 17 | \item A date range 18 | \itemize{ 19 | \item eigher a \code{character} or \code{Date} object of length 2 with clearly named elements \code{start} and \code{end} in ISO (YYYY-MM-DD) or YYYYMMDD format. E.g. \code{c(start = "2020-02-15", end = "2020-02-17")}; 20 | \item or a \code{character} object of the form \code{YYYY-MM-DD_YYYY-MM-DD} or \code{YYYYMMDD_YYYYMMDD}. For example, \verb{2020-02-15_2020-02-17} or \verb{20200215_20200217}. 21 | } 22 | \item A regular expression to match dates in the format \code{YYYYMMDD}. \code{character} object. For example, \verb{^202002} will match all dates in February 2020. 23 | }} 24 | 25 | \item{ignore_missing_dates}{Logical. If \code{TRUE}, the function will not raise an error if the some of the specified dates are missing. Any dates that are missing will be skipped, however the data for any valid dates will be acquired. Defaults to \code{FALSE}.} 26 | } 27 | \value{ 28 | An \code{integer} indicating the inferred data version. 29 | } 30 | \description{ 31 | Infer data version from dates 32 | } 33 | \keyword{internal} 34 | -------------------------------------------------------------------------------- /man/spod_is_data_version_overlaps.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal-utils.R 3 | \name{spod_is_data_version_overlaps} 4 | \alias{spod_is_data_version_overlaps} 5 | \title{Check if specified dates span both data versions} 6 | \usage{ 7 | spod_is_data_version_overlaps(dates) 8 | } 9 | \arguments{ 10 | \item{dates}{A \code{Dates} vector of dates to check.} 11 | } 12 | \value{ 13 | \code{TRUE} if the dates span both data versions, \code{FALSE} otherwise. 14 | } 15 | \description{ 16 | This function checks if the specified dates or date ranges span both v1 and v2 data versions. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_match_data_type.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal-utils.R 3 | \name{spod_match_data_type} 4 | \alias{spod_match_data_type} 5 | \title{Match data types for normalisation} 6 | \usage{ 7 | spod_match_data_type( 8 | type = c("od", "origin-destination", "viajes", "os", "overnight_stays", 9 | "pernoctaciones", "nt", "number_of_trips", "personas") 10 | ) 11 | } 12 | \arguments{ 13 | \item{type}{The type of data to match. Can be "od", "origin-destination", "os", "overnight_stays", or "nt", "number_of_trips".} 14 | } 15 | \value{ 16 | A \code{character} string with the folder name for the specified data type. Or \code{NULL} if the type is not recognized. 17 | } 18 | \description{ 19 | Match data types for normalisation 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/spod_match_data_type_for_local_folders.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal-utils.R 3 | \name{spod_match_data_type_for_local_folders} 4 | \alias{spod_match_data_type_for_local_folders} 5 | \title{Match data types to folders} 6 | \usage{ 7 | spod_match_data_type_for_local_folders( 8 | type = c("od", "origin-destination", "os", "overnight_stays", "nt", "number_of_trips"), 9 | ver = c(1, 2) 10 | ) 11 | } 12 | \arguments{ 13 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.} 14 | } 15 | \value{ 16 | A \code{character} string with the folder name for the specified data type. Or \code{NULL} if the data type is not recognized. 17 | } 18 | \description{ 19 | Match data types to folders 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /man/spod_quick_get_od.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/quick-get.R 3 | \name{spod_quick_get_od} 4 | \alias{spod_quick_get_od} 5 | \title{Get daily trip counts per origin-destionation municipality from 2022 onward} 6 | \usage{ 7 | spod_quick_get_od( 8 | date = NA, 9 | min_trips = 100, 10 | distances = c("500m-2km", "2-10km", "10-50km", "50+km"), 11 | id_origin = NA, 12 | id_destination = NA 13 | ) 14 | } 15 | \arguments{ 16 | \item{date}{A character or Date object specifying the date for which to retrieve the data. If date is a character, the date must be in "YYYY-MM-DD" or "YYYYMMDD" format.} 17 | 18 | \item{min_trips}{A numeric value specifying the minimum number of journeys per origin-destination pair to retrieve. Defaults to 100 to reduce the amount of data returned. Can be set to 0 to retrieve all data.} 19 | 20 | \item{distances}{A character vector specifying the distances to retrieve. Valid values are "500m-2km", "2-10km", "10-50km", and "50+km". Defaults to \code{c("500m-2km", "2-10km", "10-50km", "50+km")}. The resulting data will not have number of trips per category of distance. Therefore, if you want to retrieve the number of trips per distance category, you need to make 4 separate calls to this function or use \code{\link[=spod_get]{spod_get()}} instead to get the full data from source CSV files.} 21 | 22 | \item{id_origin}{A character vector specifying the origin municipalities to retrieve. If not provided, all origin municipalities will be included. Valid municipality IDs can be found in the dataset returned by \code{spod_get_zones(zones = "muni", ver = 2)}.} 23 | 24 | \item{id_destination}{A character vector specifying the target municipalities to retrieve. If not provided, all target municipalities will be included. Valid municipality IDs can be found in the dataset returned by \code{spod_get_zones(zones = "muni", ver = 2)}.} 25 | } 26 | \value{ 27 | A \code{tibble} containing the flows for the specified date, minimum number of journeys, distances and origin-destination pairs if specified. The columns are: 28 | \describe{ 29 | \item{date}{The date of the trips.} 30 | \item{id_origin}{The origin municipality ID.} 31 | \item{id_destination}{The target municipality ID.} 32 | \item{n_trips}{The number of trips between the origin and target municipality.} 33 | \item{trips_total_length_km}{The total length of trips in kilometers.} 34 | } 35 | } 36 | \description{ 37 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 38 | 39 | \strong{WARNING: this function may stop working at any time, as the API may change}. This function provides a quick way to get daily aggregated (no hourly data) trip counts per origin-destination municipality from v2 data (2022 onward). Compared to \code{\link[=spod_get]{spod_get()}}, which downloads large CSV files, this function downloads the data directly from the GraphQL API. An interactive web map with this data is available at \url{https://mapas-movilidad.transportes.gob.es/}. No data aggregation is performed on your computer (unlike in \code{\link[=spod_get]{spod_get()}}), so you do not need to worry about memory usage and do not have to use a powerful computer with multiple CPU cores just to get this simple data. Only about 1 MB of data is downloaded for a single day. The limitation of this function is that it can only retrieve data for a single day at a time and only with total number of trips and total km travelled. So it is not possible to get any of the extra variables available in the full dataset via \code{\link[=spod_get]{spod_get()}}. 40 | } 41 | \examples{ 42 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 43 | \donttest{ 44 | od_1000 <- spod_quick_get_od( 45 | date = "2022-01-01", 46 | min_trips = 1000 47 | ) 48 | } 49 | \dontshow{\}) # examplesIf} 50 | } 51 | -------------------------------------------------------------------------------- /man/spod_quick_get_zones.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/quick-get.R 3 | \name{spod_quick_get_zones} 4 | \alias{spod_quick_get_zones} 5 | \title{Get the municipalities geometries} 6 | \usage{ 7 | spod_quick_get_zones(zones = "municipalities") 8 | } 9 | \arguments{ 10 | \item{zones}{A character string specifying the zones to retrieve. Valid values are "municipalities", "muni", "municip", and "municipios". Defaults to "municipalities".} 11 | } 12 | \value{ 13 | A \code{sf} object with the municipalities geometries to match with the data retrieved with \code{\link[=spod_quick_get_od]{spod_quick_get_od()}}. 14 | } 15 | \description{ 16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}} 17 | 18 | This function fetches the municipalities (for now this is the only option) geometries from the mapas-movilidad website and returns a \code{sf} object with the municipalities geometries. This is intended for use with the flows data retrieved by the \code{\link[=spod_quick_get_od]{spod_quick_get_od()}} function. An interactive web map with this data is available at \url{https://mapas-movilidad.transportes.gob.es/}. These municipality geometries only include Spanish municipalities (and not the NUTS3 regions in Portugal and France) and do not contain extra columns that you can get with the \code{\link[=spod_get_zones]{spod_get_zones()}} function. The function caches the retrieved geometries in memory of the current R session to reduce the number of requests to the mapas-movilidad website. 19 | } 20 | \examples{ 21 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf} 22 | \donttest{ 23 | municipalities_sf <- spod_quick_get_zones() 24 | } 25 | \dontshow{\}) # examplesIf} 26 | } 27 | -------------------------------------------------------------------------------- /man/spod_read_sql.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/duckdb-helpers.R 3 | \name{spod_read_sql} 4 | \alias{spod_read_sql} 5 | \title{Load an SQL query, glue it, dplyr::sql it} 6 | \usage{ 7 | spod_read_sql(sql_file_name) 8 | } 9 | \arguments{ 10 | \item{sql_file_name}{The name of the SQL file to load from the package installation directory.} 11 | } 12 | \value{ 13 | Text of the SQL query of class \code{sql}/\code{character}. 14 | } 15 | \description{ 16 | Load an SQL query from a specified file in package installation directory, glue::collapse it, glue::glue it in case of any variables that need to be replaced, and dplyr::sql it for additional safety. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_request_length.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal-utils.R 3 | \name{spod_request_length} 4 | \alias{spod_request_length} 5 | \title{Get the length of the request payload} 6 | \usage{ 7 | spod_request_length(graphql_query) 8 | } 9 | \arguments{ 10 | \item{graphql_query}{Character. The GraphQL query.} 11 | } 12 | \value{ 13 | Numeric. The length of the request payload. 14 | } 15 | \description{ 16 | Get the length of the request payload 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_set_data_dir.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/data-dir.R 3 | \name{spod_set_data_dir} 4 | \alias{spod_set_data_dir} 5 | \title{Set the data directory} 6 | \usage{ 7 | spod_set_data_dir(data_dir, quiet = FALSE) 8 | } 9 | \arguments{ 10 | \item{data_dir}{The data directory to set.} 11 | 12 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.} 13 | } 14 | \value{ 15 | Nothing. If quiet is \code{FALSE}, prints a message with the path and confirmation that the path exists. 16 | } 17 | \description{ 18 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}} 19 | 20 | This function sets the data directory in the environment variable SPANISH_OD_DATA_DIR, so that all other functions in the package can access the data. It also creates the directory if it doesn't exist. 21 | } 22 | \examples{ 23 | spod_set_data_dir(tempdir()) 24 | 25 | } 26 | -------------------------------------------------------------------------------- /man/spod_sql_where_dates.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/duckdb-helpers.R 3 | \name{spod_sql_where_dates} 4 | \alias{spod_sql_where_dates} 5 | \title{Generate a WHERE part of an SQL query from a sequence of dates} 6 | \usage{ 7 | spod_sql_where_dates(dates) 8 | } 9 | \arguments{ 10 | \item{dates}{A Dates vector of dates to process.} 11 | } 12 | \value{ 13 | A character vector of the SQL query. 14 | } 15 | \description{ 16 | Generate a WHERE part of an SQL query from a sequence of dates 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_subfolder_clean_data_cache.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/folders.R 3 | \name{spod_subfolder_clean_data_cache} 4 | \alias{spod_subfolder_clean_data_cache} 5 | \title{Get clean data subfolder name} 6 | \usage{ 7 | spod_subfolder_clean_data_cache(ver = 1) 8 | } 9 | \arguments{ 10 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.} 11 | } 12 | \value{ 13 | A \code{character} string with the subfolder name for the clean data cache. 14 | } 15 | \description{ 16 | Change subfolder name in the code of this function for clean data cache here to apply globally, as all functions in the package should use this function to get the clean data cache path. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_subfolder_metadata_cache.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/folders.R 3 | \name{spod_subfolder_metadata_cache} 4 | \alias{spod_subfolder_metadata_cache} 5 | \title{Get metadata cache subfolder name} 6 | \usage{ 7 | spod_subfolder_metadata_cache() 8 | } 9 | \value{ 10 | A \code{character} string with the subfolder name for the raw data cache. 11 | } 12 | \description{ 13 | Change subfolder name in the code of this function for metadata cache here to apply globally, as all functions in the package should use this function to get the metadata cache path. 14 | } 15 | \keyword{internal} 16 | -------------------------------------------------------------------------------- /man/spod_subfolder_raw_data_cache.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/folders.R 3 | \name{spod_subfolder_raw_data_cache} 4 | \alias{spod_subfolder_raw_data_cache} 5 | \title{Get raw data cache subfolder name} 6 | \usage{ 7 | spod_subfolder_raw_data_cache(ver = 1) 8 | } 9 | \arguments{ 10 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.} 11 | } 12 | \value{ 13 | A \code{character} string with the subfolder name for the raw data cache. 14 | } 15 | \description{ 16 | Change subfolder name in the code of this function for raw data cache here to apply globally, as all functions in the package should use this function to get the raw data cache path. 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_unique_separated_ids.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal-utils.R 3 | \name{spod_unique_separated_ids} 4 | \alias{spod_unique_separated_ids} 5 | \title{Remove duplicate values in a semicolon-separated string} 6 | \usage{ 7 | spod_unique_separated_ids(column) 8 | } 9 | \arguments{ 10 | \item{column}{A \code{character} vector column in a data frame to remove duplicates from.} 11 | } 12 | \value{ 13 | A \code{character} vector with semicolon-separated unique IDs. 14 | } 15 | \description{ 16 | Remove duplicate IDs in a semicolon-separated string in a selected column in a data frame 17 | } 18 | \keyword{internal} 19 | -------------------------------------------------------------------------------- /man/spod_zone_names_en2es.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/internal-utils.R 3 | \name{spod_zone_names_en2es} 4 | \alias{spod_zone_names_en2es} 5 | \title{Translate zone names from English to Spanish} 6 | \usage{ 7 | spod_zone_names_en2es( 8 | zones = c("districts", "dist", "distr", "distritos", "municipalities", "muni", 9 | "municip", "municipios", "lua", "large_urban_areas", "gau", "grandes_areas_urbanas") 10 | ) 11 | } 12 | \arguments{ 13 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}) for both data versions. Additionaly, these can be \code{"large_urban_areas"} (or \code{"lua"}, or the original Spanish \code{"grandes_areas_urbanas"}, or \code{"gau"}) for v2 data (2022 onwards).} 14 | } 15 | \value{ 16 | A \code{character} string with the translated zone name. Or \code{NULL} if the zone name is not recognized. 17 | } 18 | \description{ 19 | Translate zone names from English to Spanish 20 | } 21 | \keyword{internal} 22 | -------------------------------------------------------------------------------- /pkgdown/assets/codebooks/20241024_validaciones_estudios_basicos_bigdata_v1.0_en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/codebooks/20241024_validaciones_estudios_basicos_bigdata_v1.0_en.pdf -------------------------------------------------------------------------------- /pkgdown/assets/codebooks/README_-_formato_ficheros_movilidad_MITMA_20201228_en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/codebooks/README_-_formato_ficheros_movilidad_MITMA_20201228_en.pdf -------------------------------------------------------------------------------- /pkgdown/assets/codebooks/a3_informe_metodologico_estudio_movilidad_mitms_v8_en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/codebooks/a3_informe_metodologico_estudio_movilidad_mitms_v8_en.pdf -------------------------------------------------------------------------------- /pkgdown/assets/codebooks/mitma_-_estudio_movilidad_covid-19_informe_metodologico_v3_en.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/codebooks/mitma_-_estudio_movilidad_covid-19_informe_metodologico_v3_en.pdf -------------------------------------------------------------------------------- /pkgdown/assets/media/barcelona-time.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/media/barcelona-time.gif -------------------------------------------------------------------------------- /pkgdown/assets/media/flowmapblue-animated.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/media/flowmapblue-animated.mp4 -------------------------------------------------------------------------------- /pkgdown/assets/media/flowmapblue-standard-time.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/media/flowmapblue-standard-time.mp4 -------------------------------------------------------------------------------- /pkgdown/assets/media/flowmapblue-standard.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/media/flowmapblue-standard.mp4 -------------------------------------------------------------------------------- /pkgdown/assets/media/spain-folding-flows.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/media/spain-folding-flows.gif -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/apple-touch-icon-120x120.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/apple-touch-icon-152x152.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/apple-touch-icon-180x180.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/apple-touch-icon-60x60.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/apple-touch-icon-76x76.png -------------------------------------------------------------------------------- /pkgdown/favicon/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/apple-touch-icon.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/favicon-16x16.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/favicon-32x32.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/favicon-96x96.png -------------------------------------------------------------------------------- /pkgdown/favicon/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/favicon.ico -------------------------------------------------------------------------------- /pkgdown/favicon/site.webmanifest: -------------------------------------------------------------------------------- 1 | { 2 | "name": "", 3 | "short_name": "", 4 | "icons": [ 5 | { 6 | "src": "/web-app-manifest-192x192.png", 7 | "sizes": "192x192", 8 | "type": "image/png", 9 | "purpose": "maskable" 10 | }, 11 | { 12 | "src": "/web-app-manifest-512x512.png", 13 | "sizes": "512x512", 14 | "type": "image/png", 15 | "purpose": "maskable" 16 | } 17 | ], 18 | "theme_color": "#ffffff", 19 | "background_color": "#ffffff", 20 | "display": "standalone" 21 | } -------------------------------------------------------------------------------- /pkgdown/favicon/web-app-manifest-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/web-app-manifest-192x192.png -------------------------------------------------------------------------------- /pkgdown/favicon/web-app-manifest-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/web-app-manifest-512x512.png -------------------------------------------------------------------------------- /spanishoddata.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | ProjectId: 0eb7deaa-2778-4211-9274-917281de2007 3 | 4 | RestoreWorkspace: No 5 | SaveWorkspace: No 6 | AlwaysSaveHistory: Default 7 | 8 | EnableCodeIndexing: Yes 9 | UseSpacesForTab: Yes 10 | NumSpacesForTab: 2 11 | Encoding: UTF-8 12 | 13 | RnwWeave: Sweave 14 | LaTeX: pdfLaTeX 15 | 16 | AutoAppendNewline: Yes 17 | StripTrailingWhitespace: Yes 18 | LineEndingConversion: Posix 19 | 20 | BuildType: Package 21 | PackageUseDevtools: Yes 22 | PackageInstallArgs: --no-multiarch --with-keep.source 23 | PackageRoxygenize: rd,collate,namespace,vignette 24 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | # This file is part of the standard setup for testthat. 2 | # It is recommended that you do not modify it. 3 | # 4 | # Where should you do additional test configuration? 5 | # Learn more about the roles of various files in: 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview 7 | # * https://testthat.r-lib.org/articles/special-files.html 8 | 9 | library(testthat) 10 | library(spanishoddata) 11 | 12 | test_check("spanishoddata") 13 | -------------------------------------------------------------------------------- /tests/testthat/test-internal_utils.R: -------------------------------------------------------------------------------- 1 | # Prepare the testing environment using bundled xml files to avoid downloading data from the internet 2 | 3 | extdata_path <- system.file("extdata", package = "spanishoddata") 4 | gz_files <- list.files(extdata_path, pattern = "(data_links_.*\\.xml\\.gz)|(url_file_sizes_v[1-2]\\.csv\\.gz)", full.names = TRUE) 5 | 6 | if (length(gz_files) == 0) stop("No gzipped XML files found.") 7 | 8 | # Create a temporary directory 9 | test_data_dir <- tempfile() 10 | dir.create(test_data_dir, recursive = TRUE) 11 | # Create metadata directory 12 | metadata_dir <- paste0(test_data_dir, "/", spod_subfolder_metadata_cache()) 13 | dir.create(metadata_dir, recursive = TRUE) 14 | 15 | current_date <- format(Sys.time(), format = "%Y-%m-%d", usetz = FALSE) 16 | 17 | # Copy and rename gzipped XML files to the temporary directory 18 | for (gz_file in gz_files) { 19 | if (grepl("v1", gz_file)) { 20 | file.copy(gz_file, file.path(metadata_dir, paste0("data_links_v1_", current_date, ".xml.gz"))) 21 | } else if (grepl("v2", gz_file)) { 22 | file.copy(gz_file, file.path(metadata_dir, paste0("data_links_v2_", current_date, ".xml.gz"))) 23 | } 24 | } 25 | 26 | # Set the environment variable to the test directory 27 | Sys.setenv(SPANISH_OD_DATA_DIR = test_data_dir) 28 | # Sys.getenv("SPANISH_OD_DATA_DIR") 29 | 30 | test_that("single ISO date input", { 31 | dates <- "2023-07-01" 32 | result <- spod_dates_argument_to_dates_seq(dates) 33 | expect_equal(result, as.Date("2023-07-01")) 34 | }) 35 | 36 | test_that("single YYYYMMDD date input", { 37 | dates <- "20230701" 38 | result <- spod_dates_argument_to_dates_seq(dates) 39 | expect_equal(result, as.Date("2023-07-01")) 40 | }) 41 | 42 | test_that("vector of ISO dates", { 43 | dates <- c("2023-07-01", "2023-07-03", "2023-07-05") 44 | result <- spod_dates_argument_to_dates_seq(dates) 45 | expect_equal(result, as.Date(c("2023-07-01", "2023-07-03", "2023-07-05"))) 46 | }) 47 | 48 | test_that("vector of YYYYMMDD dates", { 49 | dates <- c("20230701", "20230703", "20230705") 50 | result <- spod_dates_argument_to_dates_seq(dates) 51 | expect_equal(result, as.Date(c("2023-07-01", "2023-07-03", "2023-07-05"))) 52 | }) 53 | 54 | test_that("date range in ISO format", { 55 | dates <- "2023-07-01_2023-07-05" 56 | result <- spod_dates_argument_to_dates_seq(dates) 57 | expect_equal(result, seq.Date(from = as.Date("2023-07-01"), to = as.Date("2023-07-05"), by = "day")) 58 | }) 59 | 60 | test_that("date range in YYYYMMDD format", { 61 | dates <- "20230701_20230705" 62 | result <- spod_dates_argument_to_dates_seq(dates) 63 | expect_equal(result, seq.Date(from = as.Date("2023-07-01"), to = as.Date("2023-07-05"), by = "day")) 64 | }) 65 | 66 | test_that("named vector date range in ISO format", { 67 | dates <- c(start = "2023-07-01", end = "2023-07-05") 68 | result <- spod_dates_argument_to_dates_seq(dates) 69 | expect_equal(result, seq.Date(from = as.Date("2023-07-01"), to = as.Date("2023-07-05"), by = "day")) 70 | }) 71 | 72 | test_that("named vector date range in YYYYMMDD format", { 73 | dates <- c(start = "20230701", end = "20230705") 74 | result <- spod_dates_argument_to_dates_seq(dates) 75 | expect_equal(result, seq.Date(from = as.Date("2023-07-01"), to = as.Date("2023-07-05"), by = "day")) 76 | }) 77 | 78 | test_that("regex pattern matching dates", { 79 | dates <- "^202307" 80 | result <- spod_dates_argument_to_dates_seq(dates) 81 | expected_dates <- seq.Date(from = as.Date("2023-07-01"), to = as.Date("2023-07-31"), by = "day") 82 | expect_equal(result, expected_dates) 83 | }) 84 | 85 | test_that("invalid input type", { 86 | dates <- 20230701 87 | expect_error(spod_dates_argument_to_dates_seq(dates), "Invalid date input format. Please provide a character vector or Date object.") 88 | }) 89 | 90 | test_that("dates span both v1 and v2 data", { 91 | dates <- c("2021-05-09", "2022-01-01") 92 | expect_error( 93 | spod_dates_argument_to_dates_seq(dates), 94 | "Dates found in both v1 and v2 data." 95 | ) 96 | }) 97 | 98 | # clean up 99 | unlink(test_data_dir, recursive = TRUE) 100 | -------------------------------------------------------------------------------- /tests/testthat/test-quick-get.R: -------------------------------------------------------------------------------- 1 | # some tests are disabled as the API endpoint is not working because of the new restrictions see https://github.com/rOpenSpain/spanishoddata/issues/162 2 | test_that("spod_quick_get_od fails out of range dates", { 3 | skip_on_ci() 4 | skip_on_cran() 5 | expect_error( 6 | spod_quick_get_od( 7 | date = "2021-12-31", 8 | ), 9 | ".*Must be within valid range.*" 10 | ) 11 | }) 12 | 13 | test_that("spod_quick_get_od fails on invalid date format", { 14 | expect_error( 15 | spod_quick_get_od( 16 | date = "202212-31" 17 | ), 18 | ".*Invalid date format.*" 19 | ) 20 | }) 21 | 22 | test_that("spod_quick_get_od fails on incorrect distances", { 23 | expect_error( 24 | spod_quick_get_od( 25 | date = "2022-01-01", 26 | distances = c("invalid", "0-200") 27 | ), 28 | ".*Invalid distance value.*" 29 | ) 30 | }) 31 | 32 | test_that("spod_quick_get_od fails on negative min_trips", { 33 | expect_error( 34 | spod_quick_get_od( 35 | date = "2022-01-02", 36 | min_trips = -1 37 | ), 38 | ".*Assertion.*failed.*" 39 | ) 40 | }) 41 | 42 | test_that("spod_quick_get_od fails on invalid municipality IDs", { 43 | expect_error( 44 | spod_quick_get_od( 45 | date = "2022-01-03", 46 | id_origin = "invalid" 47 | ), 48 | ".*Invalid municipality ID.*" 49 | ) 50 | 51 | expect_error( 52 | spod_quick_get_od( 53 | date = "2022-01-04", 54 | id_destination = "invalid" 55 | ), 56 | ".*Invalid municipality ID.*" 57 | ) 58 | 59 | expect_error( 60 | spod_quick_get_od( 61 | date = "2022-01-05", 62 | id_origin = "invalid", 63 | id_destination = "invalid" 64 | ), 65 | ".*Invalid municipality ID.*" 66 | ) 67 | }) 68 | -------------------------------------------------------------------------------- /tools/generate_package_logo.R: -------------------------------------------------------------------------------- 1 | library(mapSpain) 2 | library(hexSticker) 3 | library(spanishoddata) 4 | library(flowmapper) 5 | library(tidyverse) 6 | library(sf) 7 | 8 | od <- spod_get("od", zones = "distr", dates = "2022-04-06") 9 | districts <- spod_get_zones("distr", ver = 2) 10 | 11 | spain_for_vis <- esp_get_ccaa() 12 | spain_for_join <- esp_get_ccaa(moveCAN = FALSE) 13 | 14 | flows_by_district <- od |> 15 | group_by(id_origin, id_destination) |> 16 | summarise(n_trips = sum(n_trips, na.rm = TRUE), .groups = "drop") |> 17 | collect() |> 18 | arrange(desc(id_origin), id_destination, n_trips) 19 | 20 | 21 | district_centroids <- districts |> 22 | st_centroid() |> 23 | st_transform(crs = st_crs(spain_for_join)) 24 | 25 | ca_distr <- district_centroids |> 26 | st_join(spain_for_join) |> 27 | st_drop_geometry() |> 28 | filter(!is.na(ccaa.shortname.en)) |> 29 | select(id, ca_name = ccaa.shortname.en) 30 | 31 | flows_by_ca <- flows_by_district |> 32 | left_join(ca_distr |> 33 | rename(id_orig = ca_name), 34 | by = c("id_origin" = "id") 35 | ) |> 36 | left_join(ca_distr |> 37 | rename(id_dest = ca_name), 38 | by = c("id_destination" = "id") 39 | ) |> 40 | group_by(id_orig, id_dest) |> 41 | summarise(n_trips = sum(n_trips, na.rm = TRUE), 42 | .groups = "drop") |> 43 | rename(o = id_orig, d = id_dest, value = n_trips) 44 | 45 | spain_for_vis_coords <- spain_for_vis |> 46 | st_centroid() |> 47 | st_coordinates() |> 48 | as.data.frame() |> 49 | mutate(name = spain_for_vis$ccaa.shortname.en) |> 50 | rename(x = X, y = Y) 51 | 52 | # create base ggplot with boundaries removing any extra elements 53 | base_plot <- ggplot() + 54 | geom_sf(data = spain_for_vis, fill=NA, col = "grey30", linewidth = 0.05)+ 55 | theme_classic(base_size = 20) + 56 | labs(title = "", 57 | subtitle = "", fill = "", caption = "") + 58 | theme( 59 | axis.line = element_blank(), 60 | axis.text = element_blank(), 61 | axis.ticks = element_blank(), 62 | axis.title = element_blank(), 63 | panel.background = element_rect(fill='transparent'), 64 | plot.background = element_rect(fill='transparent', color=NA), 65 | panel.grid.major = element_blank(), 66 | panel.grid.minor = element_blank(), 67 | legend.background = element_rect(fill='transparent'), 68 | legend.box.background = element_rect(fill='transparent') 69 | ) 70 | 71 | # flows_by_ca_twoway_coords |> arrange(desc(flow_ab)) 72 | # add the flows 73 | flows_plot <- base_plot|> 74 | add_flowmap( 75 | od = flows_by_ca, 76 | nodes = spain_for_vis_coords, 77 | node_radius_factor = 1, 78 | edge_width_factor = 1, 79 | arrow_point_angle = 35, 80 | node_buffer_factor = 1.5, 81 | outline_col = "grey80", 82 | k_node = 10 # play around with this parameter to aggregate nodes and flows 83 | ) 84 | 85 | # customise colours and remove legend, as we need a clean image for the logo 86 | flows_plot <- flows_plot + 87 | guides(fill="none") + 88 | scale_fill_gradient(low="#FABB29", high = "#AB061F") 89 | 90 | 91 | # flows_plot 92 | 93 | sticker(flows_plot, 94 | 95 | # package name 96 | package= "spanishoddata", 97 | p_size=4, p_y = 1.6, 98 | p_color = "gray25", p_family="Roboto", 99 | 100 | # ggplot image size and position 101 | s_x=1.02, s_y=1.19, s_width=2.6, s_height=2.72, 102 | 103 | # white hex 104 | h_fill="#ffffff", h_color="grey", h_size=1.3, 105 | 106 | # url 107 | url = "github.com/rOpenSpain/spanishoddata", 108 | u_color= "gray25", 109 | u_family = "Roboto", 110 | u_size = 1.2, 111 | 112 | # save output name and resolution 113 | filename="./man/figures/logo.png", dpi=300 # 114 | ) 115 | -------------------------------------------------------------------------------- /tools/meta-data-update-and-submission.R: -------------------------------------------------------------------------------- 1 | # before release 2 | # usethis::use_version("patch") 3 | # usethis::use_version("minor") 4 | usethis::use_tidy_description() 5 | cffr::cff_write() 6 | codemetar::write_codemeta(write_minimeta = T) 7 | # urlchecker::url_check() 8 | # devtools::check(remote = TRUE, manual = TRUE) 9 | # devtools::check(cran = TRUE) 10 | # devtools::check_win_devel() 11 | # revdepcheck::revdep_check(num_workers = 4) 12 | 13 | # devtools::submit_cran() 14 | 15 | # usethis::use_github_release() 16 | # usethis::use_dev_version(push = TRUE) 17 | -------------------------------------------------------------------------------- /vignettes/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | *.R 3 | *_files 4 | 5 | /.quarto/ 6 | -------------------------------------------------------------------------------- /vignettes/disaggregation.qmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "OD data disaggregation" 3 | vignette: > 4 | %\VignetteIndexEntry{OD data disaggregation} 5 | %\VignetteEngine{quarto::html} 6 | %\VignetteEncoding{UTF-8} 7 | bibliography: references.bib 8 | number-sections: true 9 | format: 10 | html: 11 | toc: true 12 | toc-depth: 2 13 | code-overflow: wrap 14 | execute: 15 | eval: false 16 | --- 17 | 18 | 19 | ```{r, include = FALSE} 20 | knitr::opts_chunk$set( 21 | collapse = TRUE, 22 | comment = "#>" 23 | ) 24 | ``` 25 | 26 | 27 | ```{r setup} 28 | remotes::install_dev("tmap") 29 | library(spanishoddata) 30 | library(tidyverse) 31 | library(sf) 32 | library(tmap) 33 | ``` 34 | 35 | ## Introduction 36 | 37 | This vignette demonstrates origin-destination (OD) data disaggregation using the `{odjitter}` package. 38 | The package is an implementation of the method described in the paper "Jittering: A Computationally Efficient Method for Generating Realistic Route Networks from Origin-Destination Data" [@lovelace2022jittering] for adding value to OD data by disaggregating desire lines. 39 | This can be especially useful for transport planning purposes in which high levels of geographic resolution are required (see also the [`od2net`](https://od2net.org/){target="_blank"} for direct network generation from OD data). 40 | 41 | ## Data preparation 42 | 43 | We'll start by loading a week's worth of origin-destination data for the city of Salamanca, building on the example in the README (note: these chunks are not evaluated): 44 | 45 | ```{r} 46 | od_db <- spod_get( 47 | type = "od", 48 | zones = "distritos", 49 | dates = c(start = "2024-03-01", end = "2024-03-07") 50 | ) 51 | distritos <- spod_get_zones("distritos", ver = 2) 52 | distritos_wgs84 <- distritos |> 53 | sf::st_simplify(dTolerance = 200) |> 54 | sf::st_transform(4326) 55 | od_national_aggregated <- od_db |> 56 | group_by(id_origin, id_destination) |> 57 | summarise(Trips = sum(n_trips), .groups = "drop") |> 58 | filter(Trips > 500) |> 59 | collect() |> 60 | arrange(desc(Trips)) 61 | od_national_aggregated 62 | od_national_interzonal <- od_national_aggregated |> 63 | filter(id_origin != id_destination) 64 | salamanca_zones <- zonebuilder::zb_zone("Salamanca") 65 | distritos_salamanca <- distritos_wgs84[salamanca_zones, ] 66 | ids_salamanca <- distritos_salamanca$id 67 | od_salamanca <- od_national_interzonal |> 68 | filter(id_origin %in% ids_salamanca) |> 69 | filter(id_destination %in% ids_salamanca) |> 70 | arrange(Trips) 71 | od_salamanca_sf <- od::od_to_sf( 72 | od_salamanca, 73 | z = distritos_salamanca 74 | ) 75 | ``` 76 | 77 | 78 | ## Disaggregating desire lines 79 | 80 | For this you'll need some additional dependencies: 81 | 82 | ```{r} 83 | remotes::install_github("dabreegster/odjitter", subdir = "r") 84 | remotes::install_github("nptscot/osmactive") 85 | ``` 86 | 87 | We'll get the road network from OSM: 88 | 89 | ```{r} 90 | #| results: hide 91 | salamanca_boundary <- sf::st_union(distritos_salamanca) 92 | osm_full <- osmactive::get_travel_network(salamanca_boundary) 93 | ``` 94 | 95 | ```{r} 96 | #| label: osm 97 | osm <- osm_full[salamanca_boundary, ] 98 | drive_net <- osmactive::get_driving_network(osm) 99 | drive_net_major <- osmactive::get_driving_network_major(osm) 100 | cycle_net <- osmactive::get_cycling_network(osm) 101 | cycle_net <- osmactive::distance_to_road(cycle_net, drive_net_major) 102 | cycle_net <- osmactive::classify_cycle_infrastructure(cycle_net) 103 | map_net <- osmactive::plot_osm_tmap(cycle_net) 104 | map_net 105 | ``` 106 | 107 | ![](media/osm.png) 108 | 109 | We can use the road network to disaggregate the desire lines: 110 | 111 | ```{r} 112 | od_jittered <- odjitter::jitter( 113 | od_salamanca_sf, 114 | zones = distritos_salamanca, 115 | subpoints = drive_net, 116 | disaggregation_threshold = 1000, 117 | disaggregation_key = "Trips" 118 | ) 119 | ``` 120 | 121 | Let's plot the disaggregated desire lines: 122 | 123 | ```{r} 124 | #| label: disaggregated 125 | od_jittered |> 126 | arrange(Trips) |> 127 | ggplot() + 128 | geom_sf(aes(colour = Trips), size = 1) + 129 | scale_colour_viridis_c() + 130 | geom_sf(data = drive_net_major, colour = "black") + 131 | theme_void() 132 | ``` 133 | 134 | ![](media/disaggregated.png) 135 | 136 | The results show that you can add value to the OD data by disaggregating the desire lines with the `{odjitter}` package. 137 | This can be useful for understanding the spatial distribution of trips within a zone for transport planning. 138 | 139 | We have plotted the disaggregated desire lines on top of the major road network in Salamanca. 140 | A next step could be routing to help prioritise infrastructure improvements. 141 | -------------------------------------------------------------------------------- /vignettes/media/disaggregated.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/vignettes/media/disaggregated.png -------------------------------------------------------------------------------- /vignettes/media/mermaid-that-does-not-render.mermaid: -------------------------------------------------------------------------------- 1 | flowchart TB 2 | M["For daily aggregated flows at munucipal level (2022 onwards) 3 | get data from GraphQL API directly"] -->|"spod_quick_get_od( 4 | date = '2022-03-15')" 5 | | F["'tbl' object with 'id' for origins and destinations and trip counts"] 6 | 7 | A["Houlry flows and other data for a few days starting in 2020 8 | get data from raw CSV.gz files"] -->|"spod_get( 9 | type = 'origin-destination', 10 | zones = 'districts', 11 | dates = c(start = '2020-02-14', end = '2020-02-21') )" 12 | | F 13 | 14 | C["Analyse longer periods (several months) 15 | or even the whole dataset over several years"] 16 | -->|"spod_convert( 17 | type = 'origin-destination', 18 | zones = 'districts', 19 | dates = c(start = '2020-02-14', end = '2021-05-09') )"| D["path to converted data"] 20 | D -->|"spod_connect()" | F 21 | 22 | F -->|"dplyr functions: select(), filter(), mutate(), group_by(), summarise(), etc..."| G["dplyr::collect()"] 23 | G --> H["**flows_data** - data.frame / tibble"] --> R["left_join(**zones**, **flows_data**)"] --> XX["spatial data matched by 'id' with aggegated mobility flows"] 24 | 25 | X["spatial data with zones"] --> |"spod_get_zones( 26 | zones = 'districts', 27 | ver = 1 )"| Y["**zones** - polygons with zones in sf object 28 | with 'id' that match with origins and destinations"] --> R 29 | -------------------------------------------------------------------------------- /vignettes/media/osm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/vignettes/media/osm.png -------------------------------------------------------------------------------- /vignettes/references.bib: -------------------------------------------------------------------------------- 1 | @article{lovelace_od_2024, 2 | title = {od: {Manipulate} and {Map} {Origin}-{Destination} {Data}}, 3 | url = {https://cran.r-project.org/package=od}, 4 | doi = {10.32614/CRAN.package.od}, 5 | language = {en}, 6 | author = {Lovelace, Robin and Morgan, Malcolm}, 7 | month = aug, 8 | year = {2024} 9 | } 10 | 11 | @Misc{mitms_mobility_web, 12 | title = {Estudio de la movilidad con Big Data (Study of mobility with Big Data)}, 13 | author = {{Ministerio de Transportes y Movilidad Sostenible MITMS}}, 14 | year = {2024}, 15 | url = {https://www.transportes.gob.es/ministerio/proyectos-singulares/estudio-de-movilidad-con-big-data}, 16 | } 17 | 18 | @Manual{mitma_methodology_2020_v3, 19 | title = {Análisis de la movilidad en España con tecnología Big Data durante el estado de alarma para la gestión de la crisis del COVID-19 (Analysis of mobility in Spain with Big Data technology during the state of alarm for COVID-19 crisis management)}, 20 | author = {{Ministerio de Transportes, Movilidad y Agenda Urbana (MITMA)}}, 21 | year = {2021}, 22 | url = {https://cdn.mitma.gob.es/portal-web-drupal/covid-19/bigdata/mitma_-_estudio_movilidad_covid-19_informe_metodologico_v3.pdf}, 23 | } 24 | 25 | @Manual{mitms_methodology_2022_v8, 26 | title = {Estudio de movilidad de viajeros de ámbito nacional aplicando la tecnología Big Data. Informe metodológico (Study of National Traveler mobility Using Big Data Technology. Methodological Report)}, 27 | author = {{Ministerio de Transportes y Movilidad Sostenible (MITMS)}}, 28 | year = {2024}, 29 | url = {https://www.transportes.gob.es/recursos_mfom/paginabasica/recursos/a3_informe_metodologico_estudio_movilidad_mitms_v8.pdf}, 30 | } 31 | 32 | 33 | @Manual{duckdb-r, 34 | title = {duckdb: DBI Package for the DuckDB Database Management System}, 35 | author = {Hannes Mühleisen and Mark Raasveldt}, 36 | year = {2024}, 37 | note = {R package version 1.0.0-2}, 38 | url = {https://CRAN.R-project.org/package=duckdb}, 39 | doi = {10.32614/CRAN.package.duckdb} 40 | } 41 | 42 | @Manual{od-r, 43 | title = {od: Manipulate and Map Origin-Destination Data}, 44 | author = {Robin Lovelace and Malcolm Morgan}, 45 | year = {2024}, 46 | note = {R package version 0.5.1}, 47 | url = {https://CRAN.R-project.org/package=od}, 48 | doi = {10.32614/CRAN.package.od} 49 | } 50 | 51 | @Manual{hexSticker-r, 52 | title = {hexSticker: Create Hexagon Sticker in R}, 53 | author = {Guangchuang Yu}, 54 | year = {2020}, 55 | note = {R package version 0.4.9}, 56 | url = {https://CRAN.R-project.org/package=hexSticker}, 57 | doi = {10.32614/CRAN.package.hexSticker} 58 | } 59 | 60 | @Manual{R-mapspain, 61 | title = {{mapSpain}: Administrative Boundaries of Spain}, 62 | year = {2024}, 63 | version = {0.9.2}, 64 | author = {Diego Hernangómez}, 65 | doi = {10.5281/zenodo.5366622}, 66 | url = {https://ropenspain.github.io/mapSpain/}, 67 | abstract = {Administrative Boundaries of Spain at several levels (Autonomous Communities, Provinces, Municipalities) based on the GISCO Eurostat database and CartoBase SIANE from Instituto Geografico Nacional . It also provides a leaflet plugin and the ability of downloading and processing static tiles.}, 68 | } 69 | 70 | @Manual{flowmapper-r, 71 | title = {flowmapper: Draw Flows (Migration, Goods, Money, Information) on 'ggplot2' 72 | Plots}, 73 | author = {Johannes Mast}, 74 | year = {2024}, 75 | note = {R package version 0.1.1, commit f8b7ab7942b4a14ffd5342935d2d45a7c7b3e5d2}, 76 | url = {https://github.com/JohMast/flowmapper}, 77 | doi = {10.32614/CRAN.package.flowmapper} 78 | } 79 | 80 | @Manual{flowmapblue_r, 81 | title = {Flowmap.blue widget for R}, 82 | author = {Ilya Boyandin}, 83 | year = {2024}, 84 | url = {https://github.com/FlowmapBlue/flowmapblue.R}, 85 | doi = {10.32614/CRAN.package.flowmapblue}, 86 | } 87 | 88 | @article{lovelace2022jittering, 89 | title={Jittering: A computationally efficient method for generating realistic route networks from origin-destination data}, 90 | author={Lovelace, Robin and F{\'e}lix, Rosa and Carlino, Dustin}, 91 | journal={Findings}, 92 | year={2022}, 93 | publisher={Findings Press} 94 | } 95 | --------------------------------------------------------------------------------