├── .Rbuildignore
├── .github
    ├── .gitignore
    └── workflows
    │   ├── R-CMD-check.yaml
    │   └── rostemplate-gh-pages.yaml
├── .gitignore
├── CITATION.cff
├── DESCRIPTION
├── LICENSE
├── LICENSE.md
├── NAMESPACE
├── NEWS.md
├── R
    ├── available-data.R
    ├── cite.R
    ├── codebook.R
    ├── connect.R
    ├── convert.R
    ├── data-dir.R
    ├── dev-tools.R
    ├── disconnect.R
    ├── download_data.R
    ├── duckdb-helpers.R
    ├── folders.R
    ├── get-zones.R
    ├── get.R
    ├── global-params.R
    ├── internal-utils.R
    ├── onload.R
    ├── quick-get.R
    └── spanishoddata-package.R
├── README.md
├── README.qmd
├── _pkgdown.yml
├── codemeta.json
├── cran-comments.md
├── inst
    ├── CITATION
    ├── extdata
    │   ├── data_links_v1_2024-08-07.xml.gz
    │   ├── data_links_v2_2024-08-07.xml.gz
    │   ├── muni_v2_ref.rds
    │   ├── sql-queries
    │   │   ├── province_names_enum.sql
    │   │   ├── v1-nt-distritos-clean-csv-view-en.sql
    │   │   ├── v1-nt-distritos-clean-csv-view-es.sql
    │   │   ├── v1-nt-distritos-raw-csv-view.sql
    │   │   ├── v1-nt-enum-ntrips.sql
    │   │   ├── v1-nt-municipios-clean-csv-view-en.sql
    │   │   ├── v1-nt-municipios-clean-csv-view-es.sql
    │   │   ├── v1-nt-municipios-raw-csv-view.sql
    │   │   ├── v1-od-distritos-clean-csv-view-en.sql
    │   │   ├── v1-od-distritos-clean-csv-view-es.sql
    │   │   ├── v1-od-distritos-raw-csv-view.sql
    │   │   ├── v1-od-enum-activity-en.sql
    │   │   ├── v1-od-enum-activity-es.sql
    │   │   ├── v1-od-enum-distance.sql
    │   │   ├── v1-od-municipios-clean-csv-view-en.sql
    │   │   ├── v1-od-municipios-clean-csv-view-es.sql
    │   │   ├── v1-od-municipios-raw-csv-view.sql
    │   │   ├── v2-nt-distritos-clean-csv-view-en.sql
    │   │   ├── v2-nt-distritos-clean-csv-view-es.sql
    │   │   ├── v2-nt-distritos-raw-csv-view.sql
    │   │   ├── v2-nt-enum-age.sql
    │   │   ├── v2-nt-enum-ntrips.sql
    │   │   ├── v2-nt-enum-sex-en.sql
    │   │   ├── v2-nt-enum-sex-es.sql
    │   │   ├── v2-nt-gau-clean-csv-view-en.sql
    │   │   ├── v2-nt-gau-clean-csv-view-es.sql
    │   │   ├── v2-nt-gau-raw-csv-view.sql
    │   │   ├── v2-nt-municipios-clean-csv-view-en.sql
    │   │   ├── v2-nt-municipios-clean-csv-view-es.sql
    │   │   ├── v2-nt-municipios-raw-csv-view.sql
    │   │   ├── v2-od-distritos-clean-csv-view-en.sql
    │   │   ├── v2-od-distritos-clean-csv-view-es.sql
    │   │   ├── v2-od-distritos-raw-csv-view.sql
    │   │   ├── v2-od-enum-activity-en.sql
    │   │   ├── v2-od-enum-activity-es.sql
    │   │   ├── v2-od-enum-age.sql
    │   │   ├── v2-od-enum-distance.sql
    │   │   ├── v2-od-enum-income.sql
    │   │   ├── v2-od-enum-sex-en.sql
    │   │   ├── v2-od-enum-sex-es.sql
    │   │   ├── v2-od-gau-clean-csv-view-en.sql
    │   │   ├── v2-od-gau-clean-csv-view-es.sql
    │   │   ├── v2-od-gau-raw-csv-view.sql
    │   │   ├── v2-od-municipios-clean-csv-view-en.sql
    │   │   ├── v2-od-municipios-clean-csv-view-es.sql
    │   │   ├── v2-od-municipios-raw-csv-view.sql
    │   │   ├── v2-os-distritos-clean-csv-view-en.sql
    │   │   ├── v2-os-distritos-clean-csv-view-es.sql
    │   │   ├── v2-os-distritos-raw-csv-view.sql
    │   │   ├── v2-os-gau-clean-csv-view-en.sql
    │   │   ├── v2-os-gau-clean-csv-view-es.sql
    │   │   ├── v2-os-gau-raw-csv-view.sql
    │   │   ├── v2-os-municipios-clean-csv-view-en.sql
    │   │   ├── v2-os-municipios-clean-csv-view-es.sql
    │   │   └── v2-os-municipios-raw-csv-view.sql
    │   ├── url_file_sizes_v1.txt.gz
    │   └── url_file_sizes_v2.txt.gz
    ├── schemaorg.json
    └── vignette-include
    │   ├── csv-date-filter-note.qmd
    │   ├── install-package.qmd
    │   ├── missing-dates-outages.qmd
    │   ├── overall-approach.qmd
    │   └── setup-data-directory.qmd
├── man
    ├── figures
    │   ├── README-desire-lines-1.png
    │   ├── README-distritos-1.png
    │   ├── README-salamanca-plot-1.png
    │   ├── README-salamanca-zones-1.png
    │   ├── README-trips-per-hour-1.png
    │   ├── card.png
    │   ├── flowmapblue-animated.png
    │   ├── flowmapblue-standard-01.png
    │   ├── flowmapblue-standard-02.png
    │   ├── flowmapblue-standard-time.png
    │   ├── flows_plot_all_districts.png
    │   ├── flows_plot_barcelona.png
    │   ├── lifecycle-deprecated.svg
    │   ├── lifecycle-experimental.svg
    │   ├── lifecycle-stable.svg
    │   ├── lifecycle-superseded.svg
    │   ├── logo-before-hex.png
    │   ├── logo.png
    │   ├── package-functions-overview.svg
    │   ├── zones_barcelona_fua_plot.png
    │   └── zones_barcelona_plot.png
    ├── global_quiet_param.Rd
    ├── spanishoddata-package.Rd
    ├── spod_available_data.Rd
    ├── spod_available_data_v1.Rd
    ├── spod_available_data_v2.Rd
    ├── spod_available_ram.Rd
    ├── spod_cite.Rd
    ├── spod_clean_zones_v1.Rd
    ├── spod_clean_zones_v2.Rd
    ├── spod_codebook.Rd
    ├── spod_connect.Rd
    ├── spod_convert.Rd
    ├── spod_convert_dates_to_ranges.Rd
    ├── spod_dates_argument_to_dates_seq.Rd
    ├── spod_disconnect.Rd
    ├── spod_download.Rd
    ├── spod_download_zones_v1.Rd
    ├── spod_duckdb_create_province_enum.Rd
    ├── spod_duckdb_filter_by_dates.Rd
    ├── spod_duckdb_limit_resources.Rd
    ├── spod_duckdb_number_of_trips.Rd
    ├── spod_duckdb_od.Rd
    ├── spod_duckdb_overnight_stays.Rd
    ├── spod_duckdb_set_temp.Rd
    ├── spod_expand_dates_from_regex.Rd
    ├── spod_files_sizes.Rd
    ├── spod_get.Rd
    ├── spod_get_data_dir.Rd
    ├── spod_get_file_size_from_url.Rd
    ├── spod_get_hmac_secret.Rd
    ├── spod_get_latest_v1_file_list.Rd
    ├── spod_get_latest_v2_file_list.Rd
    ├── spod_get_temp_dir.Rd
    ├── spod_get_valid_dates.Rd
    ├── spod_get_zones.Rd
    ├── spod_get_zones_v1.Rd
    ├── spod_get_zones_v2.Rd
    ├── spod_graphql_valid_dates.Rd
    ├── spod_infer_data_v_from_dates.Rd
    ├── spod_is_data_version_overlaps.Rd
    ├── spod_match_data_type.Rd
    ├── spod_match_data_type_for_local_folders.Rd
    ├── spod_quick_get_od.Rd
    ├── spod_quick_get_zones.Rd
    ├── spod_read_sql.Rd
    ├── spod_request_length.Rd
    ├── spod_set_data_dir.Rd
    ├── spod_sql_where_dates.Rd
    ├── spod_subfolder_clean_data_cache.Rd
    ├── spod_subfolder_metadata_cache.Rd
    ├── spod_subfolder_raw_data_cache.Rd
    ├── spod_unique_separated_ids.Rd
    └── spod_zone_names_en2es.Rd
├── pkgdown
    ├── assets
    │   ├── codebooks
    │   │   ├── 20241024_validaciones_estudios_basicos_bigdata_v1.0_en.pdf
    │   │   ├── README_-_formato_ficheros_movilidad_MITMA_20201228_en.pdf
    │   │   ├── a3_informe_metodologico_estudio_movilidad_mitms_v8_en.pdf
    │   │   └── mitma_-_estudio_movilidad_covid-19_informe_metodologico_v3_en.pdf
    │   └── media
    │   │   ├── barcelona-time.gif
    │   │   ├── flowmapblue-animated.mp4
    │   │   ├── flowmapblue-standard-time.mp4
    │   │   ├── flowmapblue-standard.mp4
    │   │   └── spain-folding-flows.gif
    └── favicon
    │   ├── apple-touch-icon-120x120.png
    │   ├── apple-touch-icon-152x152.png
    │   ├── apple-touch-icon-180x180.png
    │   ├── apple-touch-icon-60x60.png
    │   ├── apple-touch-icon-76x76.png
    │   ├── apple-touch-icon.png
    │   ├── favicon-16x16.png
    │   ├── favicon-32x32.png
    │   ├── favicon-96x96.png
    │   ├── favicon.ico
    │   ├── favicon.svg
    │   ├── site.webmanifest
    │   ├── web-app-manifest-192x192.png
    │   └── web-app-manifest-512x512.png
├── spanishoddata.Rproj
├── tests
    ├── testthat.R
    └── testthat
    │   ├── test-internal_utils.R
    │   └── test-quick-get.R
├── tools
    ├── generate_package_logo.R
    └── meta-data-update-and-submission.R
└── vignettes
    ├── .gitignore
    ├── convert.qmd
    ├── disaggregation.qmd
    ├── flowmaps-interactive.qmd
    ├── flowmaps-static.qmd
    ├── media
        ├── disaggregated.png
        ├── duckdb-parquet-csv-speed-mean-hourly-v1.svg
        ├── flows_plot.svg
        ├── flows_plot_barcelona.svg
        ├── mermaid-that-does-not-render.mermaid
        └── osm.png
    ├── quick-get.qmd
    ├── references.bib
    ├── v1-2020-2021-mitma-data-codebook.qmd
    └── v2-2022-onwards-mitma-data-codebook.qmd


/.Rbuildignore:
--------------------------------------------------------------------------------
 1 | ^data-raw$
 2 | ^.*\.csv$
 3 | ^.*\.csv\.gz$
 4 | README.qmd
 5 | .db$
 6 | ^draft-code$
 7 | ^_pkgdown\.yml$
 8 | ^docs$
 9 | ^pkgdown$
10 | ^\.github$
11 | ^LICENSE\.md$
12 | ^.*\.Rproj$
13 | ^\.Rproj\.user$
14 | ^private$
15 | ^doc$
16 | ^Meta$
17 | ^vignettes/*_files$
18 | ^CITATION\.cff$
19 | ^codemeta\.json$
20 | ^vignettes/.quarto$
21 | ^vignettes/webmedia$
22 | \.mp4$
23 | \.gif$
24 | ^tools$
25 | ^_pkgdown\.yaml$
26 | ^vignettes/disaggregation\.qmd$
27 | ^vignettes/flowmaps-interactive\.qmd$
28 | ^vignettes/flowmaps-static\.qmd$
29 | ^man/figures/card\.png$
30 | ^man/figures/.*flowmapblue.*
31 | ^cran-comments\.md$
32 | ^CRAN-SUBMISSION$
33 | 


--------------------------------------------------------------------------------
/.github/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | R-version
3 | depends.Rds
4 | 


--------------------------------------------------------------------------------
/.github/workflows/R-CMD-check.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/v2/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main]
 6 |   pull_request:
 7 |     branches: [main]
 8 | 
 9 | name: R-CMD-check
10 | 
11 | permissions: read-all
12 | 
13 | jobs:
14 |   R-CMD-check:
15 |     runs-on: ${{ matrix.config.os }}
16 | 
17 |     name: ${{ matrix.config.os }} (${{ matrix.config.r }})
18 | 
19 |     strategy:
20 |       fail-fast: false
21 |       matrix:
22 |         config:
23 |           - {os: macos-latest,   r: 'release'}
24 |           - {os: windows-latest, r: 'release'}
25 |           - {os: ubuntu-latest,   r: 'devel', http-user-agent: 'release'}
26 |           - {os: ubuntu-latest,   r: 'release'}
27 |           - {os: ubuntu-latest,   r: 'oldrel-1'}
28 | 
29 |     env:
30 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
31 |       R_KEEP_PKG_SOURCE: yes
32 | 
33 |     steps:
34 |       - uses: actions/checkout@v4
35 | 
36 |       - uses: r-lib/actions/setup-pandoc@v2
37 | 
38 |       - uses: r-lib/actions/setup-r@v2
39 |         with:
40 |           r-version: ${{ matrix.config.r }}
41 |           http-user-agent: ${{ matrix.config.http-user-agent }}
42 |           use-public-rspm: true
43 | 
44 |       - uses: r-lib/actions/setup-r-dependencies@v2
45 |         with:
46 |           extra-packages: any::rcmdcheck
47 |           needs: check
48 | 
49 |       - uses: r-lib/actions/check-r-package@v2
50 |         with:
51 |           upload-snapshots: true
52 |           build_args: 'c("--no-manual", "--compact-vignettes=gs+qpdf")'
53 | 


--------------------------------------------------------------------------------
/.github/workflows/rostemplate-gh-pages.yaml:
--------------------------------------------------------------------------------
 1 | # Workflow derived from https://github.com/r-lib/actions/tree/master/examples
 2 | # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help
 3 | on:
 4 |   push:
 5 |     branches: [main, master]
 6 |     tags: ['*']
 7 |   release:
 8 |     types: [published]
 9 | 
10 | name: rostemplate-gh-pages
11 | permissions: write-all
12 | 
13 | jobs:
14 |   rostemplate-gh-pages:
15 |     runs-on: ubuntu-latest
16 |     env:
17 |       GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
18 |     steps:
19 |       - uses: actions/checkout@v4
20 | 
21 |       - uses: r-lib/actions/setup-pandoc@v2
22 | 
23 |       - uses: r-lib/actions/setup-r@v2
24 |         with:
25 |           use-public-rspm: true
26 | 
27 |       - uses: r-lib/actions/setup-r-dependencies@v2
28 |         with:
29 |           extra-packages:
30 |             local::.
31 |             any::pkgdown
32 |             ropenspain/rostemplate
33 |           needs: website
34 | 
35 |       - name: Deploy package
36 |         run: |
37 |           git config --local user.name "github-actions[bot]"
38 |           git config --local user.email "41898282+github-actions[bot]@users.noreply.github.com"
39 |           Rscript -e 'pkgdown::deploy_to_branch(new_process = FALSE, clean = TRUE)'
40 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Ignore all gz files
 2 | *.gz
 3 | 
 4 | # Exceptions for gz files in inst/extdata
 5 | !inst/extdata/*.gz
 6 | 
 7 | movilidad.duckdb
 8 | .Rhistory
 9 | zonificacion_distritos*
10 | *.duckdb
11 | docs
12 | private
13 | 
14 | /.quarto/
15 | .Rproj.user
16 | inst/doc
17 | .Renviron
18 | /doc/
19 | /Meta/
20 | vignettes/.quarto
21 | 
22 | # macOS artifacts
23 | .DS_Store
24 | ._.DS_Store
25 | **/.DS_Store
26 | **/._.DS_Store
27 | .Rprofile
28 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: spanishoddata
 2 | Title: Get Spanish Origin-Destination Data
 3 | Version: 0.1.1.9000
 4 | Authors@R: c(
 5 |     person("Egor", "Kotov", , "kotov.egor@gmail.com", role = c("aut", "cre"),
 6 |            comment = c(ORCID = "0000-0001-6690-5345")),
 7 |     person("Robin", "Lovelace", , "rob00x@gmail.com", role = "aut",
 8 |            comment = c(ORCID = "0000-0001-5679-6536")),
 9 |     person("Eugeni", "Vidal-Tortosa", role = "ctb",
10 |            comment = c(ORCID = "0000-0001-5199-4103"))
11 |   )
12 | Description: Gain seamless access to origin-destination (OD) data from the
13 |     Spanish Ministry of Transport, hosted at
14 |     <https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/opendata-movilidad>.
15 |     This package simplifies the management of these large datasets by
16 |     providing tools to download zone boundaries, handle associated
17 |     origin-destination data, and process it efficiently with the 'duckdb'
18 |     database interface.  Local caching minimizes repeated downloads,
19 |     streamlining workflows for researchers and analysts. Extensive
20 |     documentation is available at
21 |     <https://ropenspain.github.io/spanishoddata/index.html>, offering
22 |     guides on creating static and dynamic mobility flow visualizations and
23 |     transforming large datasets into analysis-ready formats.
24 | License: MIT + file LICENSE
25 | URL: https://rOpenSpain.github.io/spanishoddata/,
26 |     https://github.com/rOpenSpain/spanishoddata
27 | BugReports: https://github.com/rOpenSpain/spanishoddata/issues
28 | Depends:
29 |     R (>= 4.1.0)
30 | Imports: 
31 |     checkmate,
32 |     curl (>= 5.0.0),
33 |     DBI,
34 |     digest,
35 |     dplyr,
36 |     duckdb (>= 0.5.0),
37 |     fs,
38 |     glue,
39 |     here,
40 |     httr2,
41 |     jsonlite,
42 |     lifecycle,
43 |     lubridate,
44 |     memoise,
45 |     memuse,
46 |     openssl,
47 |     parallelly,
48 |     purrr,
49 |     readr,
50 |     rlang,
51 |     sf,
52 |     stats,
53 |     stringr,
54 |     tibble,
55 |     xml2
56 | Suggests: 
57 |     flowmapblue,
58 |     flowmapper (>= 0.1.2),
59 |     furrr,
60 |     future,
61 |     hexSticker,
62 |     mapSpain,
63 |     quarto,
64 |     remotes,
65 |     scales,
66 |     testthat (>= 3.0.0),
67 |     tidyverse
68 | VignetteBuilder:
69 |     quarto
70 | Config/Needs/website: rmarkdown
71 | Config/testthat/edition: 3
72 | Encoding: UTF-8
73 | Roxygen: list(markdown = TRUE)
74 | RoxygenNote: 7.3.2
75 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | YEAR: 2024
2 | COPYRIGHT HOLDER: spanishoddata authors
3 | 


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2024 spanishoddata authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(spod_available_data)
 4 | export(spod_cite)
 5 | export(spod_codebook)
 6 | export(spod_connect)
 7 | export(spod_convert)
 8 | export(spod_disconnect)
 9 | export(spod_download)
10 | export(spod_get)
11 | export(spod_get_data_dir)
12 | export(spod_get_valid_dates)
13 | export(spod_get_zones)
14 | export(spod_quick_get_od)
15 | export(spod_quick_get_zones)
16 | export(spod_set_data_dir)
17 | importFrom(lifecycle,deprecated)
18 | importFrom(memoise,memoise)
19 | importFrom(rlang,.data)
20 | importFrom(stats,median)
21 | importFrom(utils,URLencode)
22 | importFrom(utils,vignette)
23 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | # spanishoddata (development version)
 2 | 
 3 | ## New features
 4 | 
 5 | * `spod_quick_get_zones()` is a new function to quickly get municipality geometries to match with the data retrieved with `spod_quick_get_od()` [#163](https://github.com/rOpenSpain/spanishoddata/pull/163). Requests to get geometies are cached in memory of the current R session with `memoise` package.
 6 | 
 7 | ## Bug fixes
 8 | 
 9 | * `spod_quick_get_od()` is working again. We fixed it to work with the updated API of the Spanish Ministry of Transport (PR [#163](https://github.com/rOpenSpain/spanishoddata/pull/163), issue [#162](https://github.com/rOpenSpain/spanishoddata/issues/162)). It will remain experimental, as the API may change in the future.
10 | 
11 | * `spod_convert()` can now accept `overwrite = 'update'` with `save_format = 'parquet'` ([#161](https://github.com/rOpenSpain/spanishoddata/pull/161)) previously it failed because of the incorrect check that asserted only `TRUE` or `FALSE` ([#160](https://github.com/rOpenSpain/spanishoddata/issues/160))
12 | 
13 | # spanishoddata 0.1.1
14 | 
15 | ## New features
16 | 
17 | * `spod_cite()` function to easily cite the package and the data ([#134](https://github.com/rOpenSpain/spanishoddata/pull/134))
18 | 
19 | ## Breaking changes
20 | 
21 | * `hour` column is superseeded by `time_slot` column in the output of `spod_get()` and `spod_convert()`. `time_slot` is deprecated. It is still present in the tables, but will be removed in the end of 2025 but going forward please use the new `hour` column. Otherwise it is exactly the same as before, this is just a name change. (#132)
22 | 
23 | ## Other changes
24 | 
25 | * `spod_quick_get()` does not rely on metadata download anymore and can be used without setting the data directory with `spod_set_data_dir()` (and therefore does not cause a warning if the data directory is not set).
26 | 
27 | * `hour` (ex-`time_slot`) column is now right next to the date column in the output of `spod_get()` and `spod_convert()` (#)
28 | 
29 | * maximum available CPU cores check is now turned off to improve compatibility when running the package from within a container in high performance computing environments (see [#130](https://github.com/rOpenSpain/spanishoddata/issues/130) and [#140](https://github.com/rOpenSpain/spanishoddata/pull/140) for details)
30 | 
31 | * minor documentation improvements and updates
32 | 
33 | * minor bug fixes
34 | 
35 | # spanishoddata 0.1.0
36 | 
37 | * Initial CRAN submission.
38 | 


--------------------------------------------------------------------------------
/R/cite.R:
--------------------------------------------------------------------------------
  1 | #' Cite the package and the data
  2 | #'
  3 | #' @param what Character vector specifying what to cite.
  4 | #'   Can include "package", "data", "methodology_v1", "methodology_v2", or "all".
  5 | #'   Default is "all".
  6 | #' @param format Character vector specifying output format(s).
  7 | #'   Can include "text", "markdown", "bibtex", or "all".
  8 | #'   Default is "all".
  9 | #' @return Nothing. Prints citation in plain text, markdown, BibTeX, or all formats at once to console.
 10 | #' @export
 11 | #'
 12 | #' @examples
 13 | #' # Cite everything in all formats
 14 | #' \dontrun{
 15 | #'  spod_cite()
 16 | #' }
 17 | #'
 18 | #' # Cite just the package in BibTeX format
 19 | #' \dontrun{
 20 | #'  spod_cite(what = "package", format = "bibtex")
 21 | #' }
 22 | #'
 23 | #' # Cite both methodologies in plain text
 24 | #' \dontrun{
 25 | #'  spod_cite(what = c("methodology_v1", "methodology_v2"), format = "text")
 26 | #' }
 27 | spod_cite <- function(
 28 |   what = "all",
 29 |   format = "all"
 30 | ) {
 31 |   # 1. Define valid inputs
 32 |   valid_what   <- c("all", "package", "data", "methodology_v1", "methodology_v2")
 33 |   valid_format <- c("all", "text", "markdown", "bibtex")
 34 |   
 35 |   # 2. Use checkmate to validate arguments
 36 |   checkmate::assertCharacter(what, any.missing = FALSE, min.len = 1)
 37 |   checkmate::assertCharacter(format, any.missing = FALSE, min.len = 1)
 38 |   checkmate::assertSubset(what, choices = valid_what)
 39 |   checkmate::assertSubset(format, choices = valid_format)
 40 |   
 41 |   # 3. Expand "all" options
 42 |   # If "all" is included in what, use all sources except the "all" string itself
 43 |   if ("all" %in% what) {
 44 |     what <- unique(c(what, valid_what[valid_what != "all"]))
 45 |   }
 46 |   # If "all" is included in format, use all formats except the "all" string itself
 47 |   if ("all" %in% format) {
 48 |     format <- unique(c(format, valid_format[valid_format != "all"]))
 49 |   }
 50 |   
 51 |   # Now remove the literal "all" from each to avoid confusion
 52 |   what   <- setdiff(what, "all")
 53 |   format <- setdiff(format, "all")
 54 |   
 55 |   # 4. Get the citation object
 56 |   cit <- utils::citation("spanishoddata")
 57 |   
 58 |   # 5. Function to get citation by key
 59 |   get_citation_by_key <- function(key) {
 60 |     idx <- which(sapply(cit, function(x) x$key == key))
 61 |     if (length(idx) > 0) return(cit[idx])
 62 |     return(NULL)
 63 |   }
 64 |   
 65 |   # 6. Map what options to citation keys
 66 |   citation_keys <- list(
 67 |     package        = "r-spanishoddata",
 68 |     data           = "mitms_mobility_web",
 69 |     methodology_v1 = "mitma_methodology_2020_v3",
 70 |     methodology_v2 = "mitms_methodology_2022_v8"
 71 |   )
 72 |   
 73 |   # 7. Collect the requested citations
 74 |   citations_to_show <- list()
 75 |   for (w in what) {
 76 |     key <- citation_keys[[w]]
 77 |     cit_entry <- get_citation_by_key(key)
 78 |     if (!is.null(cit_entry)) {
 79 |       citations_to_show[[w]] <- cit_entry
 80 |     }
 81 |   }
 82 |   
 83 |   # If nothing was found (e.g., user gave an empty vector)
 84 |   if (length(citations_to_show) == 0) {
 85 |     message("No valid citations found for the requested 'what'.")
 86 |     return(invisible(NULL))
 87 |   }
 88 |   
 89 |   # 8. Helper functions for formatting output
 90 |   
 91 |   # Plain text
 92 |   format_text <- function(citation) {
 93 |     text <- format(citation, style = "text")
 94 |     # remove asterisks or underscores used for emphasis in the default text
 95 |     text <- gsub("\\*([^*]*)\\*", "\\1", text)
 96 |     text <- gsub("_([^_]*)_", "\\1", text)
 97 |     # Clean up URLs (remove angle brackets)
 98 |     text <- gsub("<(http[^>]*)>", "\\1", text)
 99 |     paste(text, collapse = "\n")
100 |   }
101 |   
102 |   # Markdown
103 |   format_markdown <- function(citation) {
104 |     text <- format(citation, style = "text")
105 |     # minimal transformation to markdown: italicize text within asterisks
106 |     text <- gsub("\\*([^*]*)\\*", "_\\1_", text)
107 |     # remove angle brackets around URLs
108 |     text <- gsub("<(http[^>]*)>", "\\1", text)
109 |     paste(text, collapse = "\n")
110 |   }
111 |   
112 |   # 9. Print the citations in requested formats
113 |   for (f in format) {
114 |     if (f == "text") {
115 |       cat("\nPlain text citations:\n---------------------\n")
116 |       for (cit_item in citations_to_show) {
117 |         cat(format_text(cit_item), "\n\n")
118 |       }
119 |     } else if (f == "markdown") {
120 |       cat("\nMarkdown citations:\n-------------------\n")
121 |       for (cit_item in citations_to_show) {
122 |         cat(format_markdown(cit_item), "\n\n")
123 |       }
124 |     } else if (f == "bibtex") {
125 |       cat("\nBibTeX citations:\n-----------------\n")
126 |       for (cit_item in citations_to_show) {
127 |         print(utils::toBibtex(cit_item))
128 |         cat("\n")
129 |       }
130 |     }
131 |   }
132 |   
133 |   invisible(NULL)
134 | }
135 | 


--------------------------------------------------------------------------------
/R/codebook.R:
--------------------------------------------------------------------------------
 1 | #' View codebooks for v1 and v2 open mobility data
 2 | #' 
 3 | #' @description
 4 | #' 
 5 | #' `r lifecycle::badge("stable")`
 6 | #' 
 7 | #' Opens relevant vignette with a codebook for v1 (2020-2021) and v2 (2022 onwards) data or provide a webpage if vignette is missing.
 8 | #' 
 9 | #' 
10 | #' @param ver An `integer` or `numeric` value. The version of the data. Defaults to 1. Can be `1` for v1 (2020-2021) data and 2 for v2 (2022 onwards) data.
11 | #' @return Nothing, opens vignette if it is installed. If vignette is missing, prints a message with a link to a webpage with the codebook.
12 | #' @importFrom utils vignette
13 | #' @export
14 | #' @examples
15 | #' 
16 | #' # View codebook for v1 (2020-2021) data
17 | #' spod_codebook(ver = 1)
18 | #' 
19 | #' # View codebook for v2 (2022 onwards) data
20 | #' spod_codebook(ver = 2)
21 | #' 
22 | spod_codebook <- function(ver = 1) {
23 |   # Validate input
24 |   checkmate::assertIntegerish(ver, max.len = 1)
25 |   if (!ver %in% c(1, 2)) {
26 |     stop("Invalid version number. Must be 1 (for v1 2020-2021 data) or 2 (for v2 2022 onwards).")
27 |   }
28 | 
29 |   if (ver == 1){
30 |     help <- vignette(
31 |       topic = "v1-2020-2021-mitma-data-codebook",
32 |       package = "spanishoddata"
33 |     )
34 |     if( inherits(help, what = "vignette") ){
35 |       return(help)
36 |     } else {
37 |       message("For some reason the codebook was not installed with the package. Please refer to the online version at: https://ropenspain.github.io/spanishoddata/articles/v1-2020-2021-mitma-data-codebook.html")
38 |     }
39 |   } else if (ver == 2) {
40 |     help <- vignette(
41 |       topic = "v2-2022-onwards-mitma-data-codebook",
42 |       package = "spanishoddata"
43 |     ) 
44 |     if( inherits(help, what = "vignette") ){
45 |       return(help)
46 |     } else {
47 |       message("For some reason the codebook was not installed with the package. Please refer to the online version at: https://ropenspain.github.io/spanishoddata/articles/v2-2022-onwards-mitma-data-codebook.html")
48 |     }
49 |   }
50 | }
51 | 


--------------------------------------------------------------------------------
/R/data-dir.R:
--------------------------------------------------------------------------------
 1 | #' Set the data directory
 2 | #' 
 3 | #' @description
 4 | #' 
 5 | #' `r lifecycle::badge("stable")`
 6 | #' 
 7 | #' This function sets the data directory in the environment variable SPANISH_OD_DATA_DIR, so that all other functions in the package can access the data. It also creates the directory if it doesn't exist.
 8 | #' 
 9 | #' @param data_dir The data directory to set.
10 | #' @inheritParams global_quiet_param
11 | #' @return Nothing. If quiet is `FALSE`, prints a message with the path and confirmation that the path exists.
12 | #' @export
13 | #' @examples
14 | #' spod_set_data_dir(tempdir())
15 | #' 
16 | spod_set_data_dir <- function(
17 |   data_dir,
18 |   quiet = FALSE
19 | ){
20 |   checkmate::assert_character(data_dir, len = 1, null.ok = FALSE)
21 |   checkmate::assert_flag(quiet)
22 | 
23 |   data_dir_abs_path <- fs::path_abs(data_dir)
24 |   
25 |   tryCatch({
26 |     # Check if the directory exists; if not, attempt to create it
27 |     if (!dir.exists(data_dir_abs_path)) {
28 |       if(quiet == FALSE){
29 |         message("Data directory ", data_dir_abs_path, " does not exist. Attempting to create it.")
30 |       }
31 |       fs::dir_create(data_dir_abs_path, recurse = TRUE)
32 |     }
33 |     data_dir_real_path <- fs::path_real(data_dir_abs_path)
34 |     # Check for write permissions
35 |     test_file <- fs::path(data_dir_real_path, ".test_write")
36 |     file.create(test_file)
37 |     fs::file_delete(test_file)
38 |     if(quiet == FALSE){
39 |       message("Data directory is writeable.")
40 |     }
41 |     
42 |     # Set the environment variable
43 |     Sys.setenv(SPANISH_OD_DATA_DIR = data_dir_real_path)
44 |     
45 |     if(quiet == FALSE){
46 |       message("Data directory successfully set to: ", data_dir_real_path)
47 |     }
48 |   }, error = function(e) {
49 |     message("Error: Unable to create or access the directory at '", data_dir_abs_path, "'.")
50 |     message("This may be due to write access restrictions or system permissions issues.")
51 |     message("Please verify that you have write permissions for the specified path and try again.")
52 |     stop(e) # Re-throw the error for debugging purposes, if needed
53 |   })
54 |   
55 |   return(invisible(TRUE))
56 | }
57 | 
58 | #' Get the data directory
59 | #'
60 | #' @description
61 | #' 
62 | #' `r lifecycle::badge("stable")`
63 | #' 
64 | #' This function retrieves the data directory from the environment variable SPANISH_OD_DATA_DIR.
65 | #' If the environment variable is not set, it returns the temporary directory.
66 | #' @inheritParams global_quiet_param
67 | #' @return A `character` vector of length 1 containing the path to the data directory where the package will download and convert the data.
68 | #' @export
69 | #' @examples
70 | #' spod_set_data_dir(tempdir())
71 | #' spod_get_data_dir()
72 | #' 
73 | spod_get_data_dir <- function(quiet = FALSE) {
74 |   checkmate::assert_flag(quiet)
75 |   data_dir_env <- Sys.getenv("SPANISH_OD_DATA_DIR")
76 |   if (data_dir_env == "") {
77 |     if (isFALSE(quiet)) warning("Warning: SPANISH_OD_DATA_DIR is not set. Using the temporary directory, which is not recommended, as the data will be deleted when the session ends.\n\n To set the data directory, use `Sys.setenv(SPANISH_OD_DATA_DIR = '/path/to/data')` or set SPANISH_OD_DATA_DIR permanently in the environment by editing the `.Renviron` file locally for current project with `usethis::edit_r_environ('project')` or `file.edit('.Renviron')` or globally for all projects with `usethis::edit_r_environ('user')` or `file.edit('~/.Renviron')`.")
78 |     data_dir_env <- tempdir() # if not set, use the temp directory
79 |   }
80 |   # check if dir exists and create it if it doesn't
81 |   data_dir_env_abs <- fs::path_abs(data_dir_env)
82 |   if (!dir.exists(data_dir_env_abs)) {
83 |     fs::dir_create(data_dir_env_abs, recurse = TRUE)
84 |   }
85 |   data_dir_env_real <- fs::path_real(data_dir_env_abs)
86 |   return(data_dir_env_real)
87 | }
88 | 


--------------------------------------------------------------------------------
/R/dev-tools.R:
--------------------------------------------------------------------------------
 1 | # This file is for internal functions that update some of the package internal packaged data. These functions are not intended neither to be used by the user nor to be used in any of the package functions.
 2 | 
 3 | #' Get files sizes for remote files of v1 and v2 data and save them into a csv.gz file in the inst/extdata folder.
 4 | #' @param ver The version of the data (1 or 2). Can be both. Defaults to 2, as v1 data is not being updated since 2021.
 5 | #' @return Nothing. Only saves a csv.gz file with up to date file sizes in the inst/extdata folder.
 6 | #' 
 7 | #' @keywords internal
 8 | #' 
 9 | spod_files_sizes <- function(ver = 2) {
10 |   data_dir <- spod_get_data_dir()
11 |   
12 |   if (any(ver %in% 1)){
13 |     v1 <- spod_available_data(1)
14 |     
15 |     # takes about 1 minute
16 |     future::plan(future::multisession, workers = 6)
17 |     v1$remote_file_size_mb <- furrr::future_map_dbl(
18 |       .x = v1$target_url,
19 |       .f = ~ spod_get_file_size_from_url(x_url = .x),
20 |       .progress = TRUE
21 |     )
22 |     future::plan(future::sequential)
23 | 
24 |     v1_url_file_sizes <- v1[, c("target_url", "remote_file_size_mb")]
25 |     readr::write_csv(
26 |       x = v1_url_file_sizes,
27 |       file = "inst/extdata/url_file_sizes_v1.txt.gz"
28 |     )
29 |   }
30 | 
31 |   if (any(ver %in% 2)){
32 |     v2 <- spod_available_data(2)
33 |     if(all(v2$size_imputed == FALSE)){
34 |       stop("all file sizes are known")
35 |     }
36 |     v2_known_size <- v2[v2$size_imputed == FALSE, ]
37 |     v2_unknown_size <- v2[v2$size_imputed == TRUE, ]
38 | 
39 |     # takes about 5 minutes on full data set, but less when only updating the previously uknown files
40 |     future::plan(future::multisession, workers = 6)
41 |     v2_unknown_size$remote_file_size_mb <- furrr::future_map_dbl(
42 |       .x = v2_unknown_size$target_url,
43 |       .f = ~ spod_get_file_size_from_url(x_url = .x),
44 |       .progress = TRUE
45 |     )
46 |     future::plan(future::sequential)
47 | 
48 |     v2_combined <- dplyr::bind_rows(v2_known_size, v2_unknown_size)
49 |     v2_url_file_sizes <- v2_combined[, c("target_url", "remote_file_size_mb")]
50 |     readr::write_csv(
51 |       x = v2_url_file_sizes,
52 |       file = "inst/extdata/url_file_sizes_v2.txt.gz"
53 |     )
54 |   }
55 |   
56 | }
57 | 
58 | 
59 | #' Get file size from URL
60 | #' @param x_url URL
61 | #' @return File size in MB
62 | #' @importFrom utils URLencode
63 | #' @keywords internal
64 | spod_get_file_size_from_url <- function(x_url){
65 |   
66 |   url <- utils::URLencode(x_url)
67 |   headers <- curlGetHeaders(url)
68 |   content_length_line <- grep("Content-Length", headers, value = TRUE)
69 |   content_length_value <- sub("Content-Length:\\s*(\\d+).*", "\\1", content_length_line)
70 |   
71 |   # Convert bytes to MB (1 MB = 1024 * 1024 bytes)
72 |   file_size_mb <- as.numeric(content_length_value) / (1024 * 1024)
73 |   
74 |   return(file_size_mb)
75 | }
76 | 


--------------------------------------------------------------------------------
/R/disconnect.R:
--------------------------------------------------------------------------------
 1 | #' Safely disconnect from data and free memory
 2 | #' 
 3 | #' @description
 4 | #' 
 5 | #' `r lifecycle::badge("stable")`
 6 | #' 
 7 | #' This function is to ensure that `DuckDB` connections to CSV.gz files (created via `spod_get()`), as well as to `DuckDB` files or folders of `parquet` files (created via `spod_convert()`) are closed properly to prevent conflicting connections. Essentially this is just a wrapper around `DBI::dbDisconnect()` that reaches out into the `.$src$con` object of the `tbl_duckdb_connection` connection object that is returned to the user via `spod_get()` and `spod_connect()`. After disonnecting the database, it also frees up memory by running `gc()`.
 8 | #' @param tbl_con A `tbl_duckdb_connection` connection object that you get from either `spod_get()` or `spod_connect()`.
 9 | #' @param free_mem A `logical`. Whether to free up memory by running `gc()`. Defaults to `TRUE`.
10 | #' @return No return value, called for side effect of disconnecting from the database and freeing up memory.
11 | #' @export
12 | #' @examplesIf interactive()
13 | #' \donttest{
14 | #' # Set data dir for file downloads
15 | #' spod_set_data_dir(tempdir())
16 | #' 
17 | #' # basic example
18 | #' # create a connection to the v1 data without converting
19 | #' # this creates a duckdb database connection to CSV files
20 | #' od_distr <- spod_get(
21 | #'  "od",
22 | #'  zones = "distr",
23 | #'  dates = c("2020-03-01", "2020-03-02")
24 | #' )
25 | #' # disconnect from the database connection
26 | #' spod_disconnect(od_distr)
27 | #' 
28 | #' # Advanced example
29 | #' # download and convert data
30 | #' dates_1 <- c(start = "2020-02-17", end = "2020-02-19")
31 | #' db_2 <- spod_convert(
32 | #'  type = "od",
33 | #'  zones = "distr",
34 | #'  dates = dates_1,
35 | #'  overwrite = TRUE
36 | #' )
37 | #' 
38 | #' # now connect to the converted data
39 | #' my_od_data_2 <- spod_connect(db_2)
40 | #' 
41 | #' # disconnect from the database
42 | #' spod_disconnect(my_od_data_2)
43 | #' }
44 | #' 
45 | spod_disconnect <- function(
46 |   tbl_con,
47 |   free_mem = TRUE
48 | ) {
49 |   # Validate imputs
50 |   checkmate::assert_class(tbl_con, "tbl_duckdb_connection")
51 |   checkmate::assert_flag(free_mem)
52 | 
53 |   DBI::dbDisconnect(tbl_con$src$con, shutdown = TRUE)
54 |   if (free_mem){
55 |     gc()
56 |   }
57 |   return(invisible(NULL))
58 | }
59 | 


--------------------------------------------------------------------------------
/R/folders.R:
--------------------------------------------------------------------------------
 1 | #' Get metadata cache subfolder name
 2 | #'
 3 | #' Change subfolder name in the code of this function for metadata cache here to apply globally, as all functions in the package should use this function to get the metadata cache path.
 4 | #' @return A `character` string with the subfolder name for the raw data cache.
 5 | #' @keywords internal
 6 | spod_subfolder_metadata_cache <- function() {
 7 |   "metadata_cache"
 8 | }
 9 | 
10 | #' Get raw data cache subfolder name
11 | #'
12 | #' Change subfolder name in the code of this function for raw data cache here to apply globally, as all functions in the package should use this function to get the raw data cache path.
13 | #' @inheritParams spod_available_data
14 | #' @return A `character` string with the subfolder name for the raw data cache.
15 | #' @keywords internal
16 | spod_subfolder_raw_data_cache <- function(ver = 1) {
17 |   ver <- as.integer(ver)
18 |   if (!ver %in% c(1, 2)) {
19 |     stop("Invalid version number. Must be 1 or 2.")
20 |   }
21 |   base_subdir_name <- "raw_data_cache"
22 |   return(paste0(base_subdir_name, "/v", ver, "/"))
23 | }
24 | 
25 | #' Get clean data subfolder name
26 | #'
27 | #' Change subfolder name in the code of this function for clean data cache here to apply globally, as all functions in the package should use this function to get the clean data cache path.
28 | #' @inheritParams spod_available_data
29 | #' @return A `character` string with the subfolder name for the clean data cache.
30 | #' @keywords internal
31 | spod_subfolder_clean_data_cache <- function(ver = 1) {
32 |   ver <- as.integer(ver)
33 |   if (!ver %in% c(1, 2)) {
34 |     stop("Invalid version number. Must be 1 or 2.")
35 |   }
36 |   base_subdir_name <- "clean_data"
37 |   return(paste0(base_subdir_name, "/v", ver, "/"))
38 | }
39 | 
40 | #' Get temporary directory for DuckDB intermediate spilling
41 | #' 
42 | #' @description
43 | #' Get the The path to the temp folder for DuckDB for \href{https://duckdb.org/2024/07/09/memory-management.html#intermediate-spilling}{intermediate spilling} in case the set memory limit and/or physical memory of the computer is too low to perform the query.
44 | #' @param data_dir The directory where the data is stored. Defaults to the value returned by `spod_get_data_dir()`.
45 | #' @return A `character` string with the path to the temp folder for `DuckDB` for \href{https://duckdb.org/2024/07/09/memory-management.html#intermediate-spilling}{intermediate spilling}.
46 | #' @keywords internal
47 | spod_get_temp_dir <- function(
48 |   data_dir = spod_get_data_dir()
49 | ) {
50 |   temp_dir <- fs::path(data_dir, "temp")
51 |   if (!dir.exists(temp_dir)) {
52 |     fs::dir_create(temp_dir)
53 |   }
54 |   return(temp_dir)
55 | }
56 | 


--------------------------------------------------------------------------------
/R/global-params.R:
--------------------------------------------------------------------------------
 1 | #' @title Global Quiet Parameter
 2 | #'
 3 | #' @description
 4 | #' Documentation for the `quiet` parameter, used globally.
 5 | #'
 6 | #' @param quiet A `logical` value indicating whether to suppress messages. Default is `FALSE`.
 7 | #' @return Nothing. This function is just a placeholder for global quiet parameter.
 8 | #' @keywords internal
 9 | global_quiet_param <- function(quiet = FALSE){
10 |   # this is just a placeholder for global quiet parameter
11 | }
12 | 


--------------------------------------------------------------------------------
/R/onload.R:
--------------------------------------------------------------------------------
 1 | .onLoad <- function(libname, pkgname) {
 2 |   op <- options()
 3 |   op.spanishoddata <- list(
 4 |     spanishoddata.graphql_api_endpoint = "https://mapas-movilidad.transportes.gob.es/api/graphql",
 5 |     spanishoddata.user_agent = "spanishoddata R package, https://github.com/rOpenSpain/spanishoddata/"
 6 |   )
 7 |   toset <- !(names(op.spanishoddata) %in% names(op))
 8 |   if (any(toset)) options(op.spanishoddata[toset])
 9 | 
10 |   invisible()
11 | }
12 | 


--------------------------------------------------------------------------------
/R/spanishoddata-package.R:
--------------------------------------------------------------------------------
1 | #' @keywords internal
2 | "_PACKAGE"
3 | 
4 | ## usethis namespace: start
5 | #' @importFrom lifecycle deprecated
6 | ## usethis namespace: end
7 | NULL
8 | 


--------------------------------------------------------------------------------
/_pkgdown.yml:
--------------------------------------------------------------------------------
 1 | url: https://rOpenSpain.github.io/spanishoddata/
 2 | template:
 3 |   bootstrap: 5
 4 |   package: rostemplate
 5 | 
 6 |   opengraph:
 7 |     image:
 8 |       src: man/figures/card.png
 9 | 
10 | authors:
11 |   Egor Kotov:
12 |     href: "https://www.ekotov.pro"
13 |   Robin Lovelace:
14 |     href: "https://www.robinlovelace.net/"
15 | 
16 | articles:
17 | - title: Documentation
18 |   navbar: ~
19 |   contents:
20 |   - v1-2020-2021-mitma-data-codebook
21 |   - v2-2022-onwards-mitma-data-codebook
22 |   - quick-get
23 |   - convert
24 |   - disaggregation
25 |   - flowmaps-static
26 |   - flowmaps-interactive
27 | 
28 | navbar:
29 |   structure:
30 |     left:  [intro, reference, articles, tutorials, news]
31 |     right: [search, github, lightswitch]
32 | 
33 | reference:
34 |   - title: "Analysing up to 1 week of data"
35 |     desc: >
36 |       Quickly download and analyse just a few days of mobility data
37 |     contents:
38 |       - spod_available_data
39 |       - spod_get_zones
40 |       - spod_get
41 |       - spod_disconnect
42 |   - title: "Analysing long time periods (months or even years)"
43 |     desc: >
44 |       Download data for longer periods, convert them into analysis ready format such as `DuckDB` or `Parquet` for out-of-memory analysis of this large data
45 |     contents:
46 |       - spod_available_data
47 |       - spod_get_zones
48 |       - spod_download
49 |       - spod_convert
50 |       - spod_connect
51 |       - spod_disconnect
52 |   - title: "Analysing up to 1 day of trips with no extra variables"
53 |     desc: >
54 |       Quickly get a single day of flows between municipalities (without hourly data or any other attributes) for 2022 and onwards
55 |     contents:
56 |       - spod_quick_get_od
57 |       - spod_quick_get_zones
58 |   - title: "Helper functions"
59 |     contents:
60 |       - spod_codebook
61 |       - spod_available_data
62 |       - spod_get_valid_dates
63 |       - spod_set_data_dir
64 |       - spod_get_data_dir
65 |       - spod_cite
66 | 


--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | 
2 | ## R CMD check results
3 | 
4 | 0 errors | 0 warnings | 1 note
5 | 
6 | * Introduced the dependency on R >= 4.1.0 to address the NOTE: "Missing dependency on R >= 4.1.0 because package code uses the pipe"
7 | 
8 | * The links to www.ine.es and www.transportes.gob.es give false positives with 403, while in fact they work just fine in the web browser. It must be some bot protection of the Spanish websites.
9 | 


--------------------------------------------------------------------------------
/inst/CITATION:
--------------------------------------------------------------------------------
 1 | c(
 2 | bibentry(
 3 |        header = "To cite the `spanishoddata` R package use:",
 4 |        key      = "r-spanishoddata",
 5 |        bibtype  = "Manual",
 6 |        title    = "spanishoddata",
 7 |        author   = c(
 8 |        person("Egor", "Kotov", , "kotov.egor@gmail.com", role = c("aut", "cre"),
 9 |               comment = c(ORCID = "0000-0001-6690-5345")),
10 |        person("Robin", "Lovelace", , "rob00x@gmail.com", role = "aut",
11 |               comment = c(ORCID = "0000-0001-5679-6536")),
12 |        person("Eugeni", "Vidal-Tortosa", role = "ctb",
13 |               comment = c(ORCID = "0000-0001-5199-4103"))
14 |        ),
15 |        year     = "2024",
16 |        url      = "https://github.com/rOpenSpain/spanishoddata",
17 |        doi      = "10.32614/CRAN.package.spanishoddata"
18 | ),
19 | 
20 | 
21 | bibentry(
22 |        header = "To cite the official website of the mobility study use:",
23 |        key      = "mitms_mobility_web",
24 |        bibtype  = "Misc",
25 |        title    = "Estudio de la movilidad con Big Data (Study of mobility with Big Data)",
26 |        author   = person("Ministerio de Transportes y Movilidad Sostenible (MITMS)"),
27 |        year     = "2024",
28 |        url      = "https://www.transportes.gob.es/ministerio/proyectos-singulares/estudio-de-movilidad-con-big-data",
29 | ),
30 | 
31 | bibentry(
32 |        header = "To cite the methodology for 2022 and onwards data use:",
33 |        key      = "mitms_methodology_2022_v8",
34 |        bibtype  = "Manual",
35 |        title    = "Estudio de movilidad de viajeros de ámbito nacional aplicando la tecnología Big Data. Informe metodológico (Study of National Traveler mobility Using Big Data Technology. Methodological Report)",
36 |        author   = person("Ministerio de Transportes y Movilidad Sostenible (MITMS)"),
37 |        year     = "2024",
38 |        url      = "https://www.transportes.gob.es/recursos_mfom/paginabasica/recursos/a3_informe_metodologico_estudio_movilidad_mitms_v8.pdf",
39 | ),
40 | 
41 | bibentry(
42 |        header = "To cite the methodology for 2020-2021 data use:",
43 |        key      = "mitma_methodology_2020_v3",
44 |        bibtype  = "Manual",
45 |        title    = "Análisis de la movilidad en España con tecnología Big Data durante el estado de alarma para la gestión de la crisis del COVID-19 (Analysis of mobility in Spain with Big Data technology during the state of alarm for COVID-19 crisis management)",
46 |        author   = person("Ministerio de Transportes, Movilidad y Agenda Urbana (MITMA)"),
47 |        year     = "2021",
48 |        url      = "https://cdn.mitma.gob.es/portal-web-drupal/covid-19/bigdata/mitma_-_estudio_movilidad_covid-19_informe_metodologico_v3.pdf",
49 | )
50 | )
51 | 
52 | citFooter("See package website for more details: https://ropenspain.github.io/spanishoddata/")
53 | 


--------------------------------------------------------------------------------
/inst/extdata/data_links_v1_2024-08-07.xml.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/inst/extdata/data_links_v1_2024-08-07.xml.gz


--------------------------------------------------------------------------------
/inst/extdata/data_links_v2_2024-08-07.xml.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/inst/extdata/data_links_v2_2024-08-07.xml.gz


--------------------------------------------------------------------------------
/inst/extdata/muni_v2_ref.rds:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/inst/extdata/muni_v2_ref.rds


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/province_names_enum.sql:
--------------------------------------------------------------------------------
 1 | CREATE TYPE INE_PROV_NAME_ENUM AS ENUM (
 2 |     'UNDEFINED', -- so that ENUM code is 0 for UNDEFINED. Thanks to that
 3 |     /* The provinces below are listed in such order, so that ENUM codes for them will match with the official INE codes, e.g. Alicante/Alacant will match with internal ENUM code 3, and INE code for it is "03" */
 4 |     'Araba/Álava',
 5 |     'Albacete',
 6 |     'Alicante/Alacant',
 7 |     'Almería',
 8 |     'Ávila',
 9 |     'Badajoz',
10 |     'Balears, Illes',
11 |     'Barcelona',
12 |     'Burgos',
13 |     'Cáceres',
14 |     'Cádiz',
15 |     'Castellón/Castelló',
16 |     'Ciudad Real',
17 |     'Córdoba',
18 |     'Coruña, A',
19 |     'Cuenca',
20 |     'Girona',
21 |     'Granada',
22 |     'Guadalajara',
23 |     'Gipuzkoa',
24 |     'Huelva',
25 |     'Huesca',
26 |     'Jaén',
27 |     'León',
28 |     'Lleida',
29 |     'Rioja, La',
30 |     'Lugo',
31 |     'Madrid',
32 |     'Málaga',
33 |     'Murcia',
34 |     'Navarra',
35 |     'Ourense',
36 |     'Asturias',
37 |     'Palencia',
38 |     'Palmas, Las',
39 |     'Pontevedra',
40 |     'Salamanca',
41 |     'Santa Cruz de Tenerife',
42 |     'Cantabria',
43 |     'Segovia',
44 |     'Sevilla',
45 |     'Soria',
46 |     'Tarragona',
47 |     'Teruel',
48 |     'Toledo',
49 |     'Valencia/València',
50 |     'Valladolid',
51 |     'Bizkaia',
52 |     'Zamora',
53 |     'Zaragoza',
54 |     'Ceuta',
55 |     'Melilla'
56 | );
57 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-nt-distritos-clean-csv-view-en.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW nt_csv_clean AS SELECT
 2 |     fecha AS date,
 3 |     CAST (CASE distrito
 4 |         WHEN 'externo' THEN 'external'
 5 |         ELSE distrito
 6 |         END AS ZONES_ENUM)
 7 |         AS id,
 8 |     CAST(numero_viajes AS N_TRIPS_ENUM) AS n_trips,
 9 |     personas AS n_persons,
10 |     CAST(year AS INTEGER) AS year,
11 |     CAST(month AS INTEGER) AS month,
12 |     CAST(day AS INTEGER) AS day
13 | FROM nt_csv_raw;
14 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-nt-distritos-clean-csv-view-es.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW nt_csv_clean AS SELECT
 2 |     fecha,
 3 |     CAST (distrito AS ZONES_ENUM) AS distrito,
 4 |     CAST(numero_viajes AS N_TRIPS_ENUM) AS numero_viajes,
 5 |     personas,
 6 |     CAST(year AS INTEGER) AS ano,
 7 |     CAST(month AS INTEGER) AS mes,
 8 |     CAST(day AS INTEGER) AS dia
 9 | FROM nt_csv_raw;
10 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-nt-distritos-raw-csv-view.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW nt_csv_raw AS SELECT *
 2 |     /* csv_folder needs to be replaced with a valid path
 3 |     in R use glue::glue() */
 4 |     FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE,
 5 |     columns={{
 6 |     'fecha': 'DATE',
 7 |     'distrito': 'VARCHAR',
 8 |     'numero_viajes': 'VARCHAR',
 9 |     'personas': 'DOUBLE'
10 |     }},
11 |     dateformat='%Y%m%d');
12 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-nt-enum-ntrips.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE N_TRIPS_ENUM AS ENUM ('0', '1', '2', '2+');
2 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-nt-municipios-clean-csv-view-en.sql:
--------------------------------------------------------------------------------
 1 | -- Create the relationships view from the relaciones_distrito_mitma.csv
 2 | CREATE VIEW relations_districts_municipalities AS 
 3 | SELECT 
 4 |     distrito_mitma, 
 5 |     municipio_mitma 
 6 | FROM 
 7 |     read_csv_auto('{relations_districts_municipalities}',
 8 |     delim = '|',
 9 |     columns={{
10 |         'distrito': 'VARCHAR',
11 |         'distrito_mitma': 'VARCHAR',
12 |         'municipio_mitma': 'VARCHAR'
13 |     }}
14 | );
15 | 
16 | -- Create the nt_csv_clean view with the necessary joins, recoding, and aggregation
17 | CREATE VIEW nt_csv_clean AS 
18 | SELECT
19 |     d.fecha AS date,
20 |     CAST(CASE r.municipio_mitma
21 |         WHEN 'externo' THEN 'external'
22 |         ELSE r.municipio_mitma
23 |         END AS ZONES_ENUM) AS id,
24 |     CAST(d.numero_viajes AS N_TRIPS_ENUM) AS n_trips,
25 |     SUM(d.personas) AS n_persons,
26 |     CAST(d.year AS INTEGER) AS year,
27 |     CAST(d.month AS INTEGER) AS month,
28 |     CAST(d.day AS INTEGER) AS day
29 | FROM 
30 |     nt_csv_raw d
31 | LEFT JOIN 
32 |     relations_districts_municipalities r ON d.distrito = r.distrito_mitma 
33 | GROUP BY 
34 |     d.fecha,
35 |     r.municipio_mitma,
36 |     d.numero_viajes,
37 |     d.year,
38 |     d.month,
39 |     d.day;
40 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-nt-municipios-clean-csv-view-es.sql:
--------------------------------------------------------------------------------
 1 | -- Create the relationships view from the relaciones_distrito_mitma.csv
 2 | CREATE VIEW relations_districts_municipalities AS 
 3 | SELECT 
 4 |     distrito_mitma, 
 5 |     municipio_mitma 
 6 | FROM 
 7 |     read_csv_auto('{relations_districts_municipalities}',
 8 |     delim = '|',
 9 |     columns={{
10 |         'distrito': 'VARCHAR',
11 |         'distrito_mitma': 'VARCHAR',
12 |         'municipio_mitma': 'VARCHAR'
13 |     }}
14 | );
15 | 
16 | -- Create the nt_csv_clean view with the necessary joins, recoding, and aggregation
17 | CREATE VIEW nt_csv_clean AS 
18 | SELECT
19 |     d.fecha AS fecha,
20 |     CAST(r.municipio_mitma AS ZONES_ENUM) AS municipio_mitma,
21 |     CAST(d.numero_viajes AS N_TRIPS_ENUM) AS numero_viajes,
22 |     SUM(d.personas) AS personas,
23 |     CAST(d.year AS INTEGER) AS ano,
24 |     CAST(d.month AS INTEGER) AS mes,
25 |     CAST(d.day AS INTEGER) AS dia
26 | FROM 
27 |     nt_csv_raw d
28 | LEFT JOIN 
29 |     relations_districts_municipalities r ON d.distrito = r.distrito_mitma 
30 | GROUP BY 
31 |     d.fecha,
32 |     r.municipio_mitma,
33 |     d.numero_viajes,
34 |     d.year,
35 |     d.month,
36 |     d.day;
37 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-nt-municipios-raw-csv-view.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW nt_csv_raw AS SELECT *
 2 |     /* csv_folder needs to be replaced with a valid path
 3 |     in R use glue::glue() */
 4 |     FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE,
 5 |     columns={{
 6 |     'fecha': 'DATE',
 7 |     'distrito': 'VARCHAR',
 8 |     'numero_viajes': 'VARCHAR',
 9 |     'personas': 'DOUBLE'
10 |     }},
11 |     dateformat='%Y%m%d');
12 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-od-distritos-clean-csv-view-en.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW od_csv_clean AS SELECT
 2 |     fecha AS date,
 3 |     periodo AS hour,
 4 |     CAST (CASE origen
 5 |         WHEN 'externo' THEN 'external'
 6 |         ELSE origen
 7 |         END AS ZONES_ENUM)
 8 |         AS id_origin,
 9 |     CAST (CASE destino
10 |         WHEN 'externo' THEN 'external'
11 |         ELSE destino
12 |         END AS ZONES_ENUM)
13 |         AS id_destination,
14 |     CAST(distancia AS DISTANCE_ENUM) AS distance,
15 |     CAST(CASE actividad_origen
16 |         WHEN 'casa' THEN 'home'
17 |         WHEN 'otros' THEN 'other'
18 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
19 |         END AS ACTIV_ENUM) AS activity_origin,
20 |     CAST(CASE actividad_destino
21 |         WHEN 'casa' THEN 'home'
22 |         WHEN 'otros' THEN 'other'
23 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
24 |         END AS ACTIV_ENUM) AS activity_destination,
25 |     CAST(residencia AS INE_PROV_CODE_ENUM) AS residence_province_ine_code,
26 |     CAST (CASE residencia
27 |         WHEN '01' THEN 'Araba/Álava'
28 |         WHEN '02' THEN 'Albacete'
29 |         WHEN '03' THEN 'Alicante/Alacant'
30 |         WHEN '04' THEN 'Almería'
31 |         WHEN '05' THEN 'Ávila'
32 |         WHEN '06' THEN 'Badajoz'
33 |         WHEN '07' THEN 'Balears, Illes'
34 |         WHEN '08' THEN 'Barcelona'
35 |         WHEN '09' THEN 'Burgos'
36 |         WHEN '10' THEN 'Cáceres'
37 |         WHEN '11' THEN 'Cádiz'
38 |         WHEN '12' THEN 'Castellón/Castelló'
39 |         WHEN '13' THEN 'Ciudad Real'
40 |         WHEN '14' THEN 'Córdoba'
41 |         WHEN '15' THEN 'Coruña, A'
42 |         WHEN '16' THEN 'Cuenca'
43 |         WHEN '17' THEN 'Girona'
44 |         WHEN '18' THEN 'Granada'
45 |         WHEN '19' THEN 'Guadalajara'
46 |         WHEN '20' THEN 'Gipuzkoa'
47 |         WHEN '21' THEN 'Huelva'
48 |         WHEN '22' THEN 'Huesca'
49 |         WHEN '23' THEN 'Jaén'
50 |         WHEN '24' THEN 'León'
51 |         WHEN '25' THEN 'Lleida'
52 |         WHEN '26' THEN 'Rioja, La'
53 |         WHEN '27' THEN 'Lugo'
54 |         WHEN '28' THEN 'Madrid'
55 |         WHEN '29' THEN 'Málaga'
56 |         WHEN '30' THEN 'Murcia'
57 |         WHEN '31' THEN 'Navarra'
58 |         WHEN '32' THEN 'Ourense'
59 |         WHEN '33' THEN 'Asturias'
60 |         WHEN '34' THEN 'Palencia'
61 |         WHEN '35' THEN 'Palmas, Las'
62 |         WHEN '36' THEN 'Pontevedra'
63 |         WHEN '37' THEN 'Salamanca'
64 |         WHEN '38' THEN 'Santa Cruz de Tenerife'
65 |         WHEN '39' THEN 'Cantabria'
66 |         WHEN '40' THEN 'Segovia'
67 |         WHEN '41' THEN 'Sevilla'
68 |         WHEN '42' THEN 'Soria'
69 |         WHEN '43' THEN 'Tarragona'
70 |         WHEN '44' THEN 'Teruel'
71 |         WHEN '45' THEN 'Toledo'
72 |         WHEN '46' THEN 'Valencia/València'
73 |         WHEN '47' THEN 'Valladolid'
74 |         WHEN '48' THEN 'Bizkaia'
75 |         WHEN '49' THEN 'Zamora'
76 |         WHEN '50' THEN 'Zaragoza'
77 |         WHEN '51' THEN 'Ceuta'
78 |         WHEN '52' THEN 'Melilla'
79 |         END AS INE_PROV_NAME_ENUM) AS residence_province_name,
80 |     viajes AS n_trips,
81 |     viajes_km AS trips_total_length_km,
82 |     CAST(year AS INTEGER) AS year,
83 |     CAST(month AS INTEGER) AS month,
84 |     CAST(day AS INTEGER) AS day,
85 |     periodo AS time_slot
86 | FROM od_csv_raw;
87 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-od-distritos-clean-csv-view-es.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW od_csv_clean AS SELECT
 2 |     fecha AS date,
 3 |     periodo,
 4 |     CAST(origen AS ZONES_ENUM) AS origen,
 5 |     CAST(destino AS ZONES_ENUM) AS destino,
 6 |     CAST(CASE actividad_origen
 7 |         WHEN 'casa' THEN 'home'
 8 |         WHEN 'otros' THEN 'other'
 9 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
10 |         END AS ACTIV_ENUM) AS actividad_origen,
11 |     CAST(CASE actividad_destino
12 |         WHEN 'casa' THEN 'home'
13 |         WHEN 'otros' THEN 'other'
14 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
15 |         END AS ACTIV_ENUM) AS actividad_destino,
16 |     CAST(distancia AS DISTANCE_ENUM) AS distancia,
17 |     CAST(residencia AS INE_PROV_CODE_ENUM) AS residencia,
18 |     CAST (CASE residencia
19 |         WHEN '01' THEN 'Araba/Álava'
20 |         WHEN '02' THEN 'Albacete'
21 |         WHEN '03' THEN 'Alicante/Alacant'
22 |         WHEN '04' THEN 'Almería'
23 |         WHEN '05' THEN 'Ávila'
24 |         WHEN '06' THEN 'Badajoz'
25 |         WHEN '07' THEN 'Balears, Illes'
26 |         WHEN '08' THEN 'Barcelona'
27 |         WHEN '09' THEN 'Burgos'
28 |         WHEN '10' THEN 'Cáceres'
29 |         WHEN '11' THEN 'Cádiz'
30 |         WHEN '12' THEN 'Castellón/Castelló'
31 |         WHEN '13' THEN 'Ciudad Real'
32 |         WHEN '14' THEN 'Córdoba'
33 |         WHEN '15' THEN 'Coruña, A'
34 |         WHEN '16' THEN 'Cuenca'
35 |         WHEN '17' THEN 'Girona'
36 |         WHEN '18' THEN 'Granada'
37 |         WHEN '19' THEN 'Guadalajara'
38 |         WHEN '20' THEN 'Gipuzkoa'
39 |         WHEN '21' THEN 'Huelva'
40 |         WHEN '22' THEN 'Huesca'
41 |         WHEN '23' THEN 'Jaén'
42 |         WHEN '24' THEN 'León'
43 |         WHEN '25' THEN 'Lleida'
44 |         WHEN '26' THEN 'Rioja, La'
45 |         WHEN '27' THEN 'Lugo'
46 |         WHEN '28' THEN 'Madrid'
47 |         WHEN '29' THEN 'Málaga'
48 |         WHEN '30' THEN 'Murcia'
49 |         WHEN '31' THEN 'Navarra'
50 |         WHEN '32' THEN 'Ourense'
51 |         WHEN '33' THEN 'Asturias'
52 |         WHEN '34' THEN 'Palencia'
53 |         WHEN '35' THEN 'Palmas, Las'
54 |         WHEN '36' THEN 'Pontevedra'
55 |         WHEN '37' THEN 'Salamanca'
56 |         WHEN '38' THEN 'Santa Cruz de Tenerife'
57 |         WHEN '39' THEN 'Cantabria'
58 |         WHEN '40' THEN 'Segovia'
59 |         WHEN '41' THEN 'Sevilla'
60 |         WHEN '42' THEN 'Soria'
61 |         WHEN '43' THEN 'Tarragona'
62 |         WHEN '44' THEN 'Teruel'
63 |         WHEN '45' THEN 'Toledo'
64 |         WHEN '46' THEN 'Valencia/València'
65 |         WHEN '47' THEN 'Valladolid'
66 |         WHEN '48' THEN 'Bizkaia'
67 |         WHEN '49' THEN 'Zamora'
68 |         WHEN '50' THEN 'Zaragoza'
69 |         WHEN '51' THEN 'Ceuta'
70 |         WHEN '52' THEN 'Melilla'
71 |         END AS INE_PROV_NAME_ENUM) AS residencia_nombre,
72 |     viajes,
73 |     viajes_km,
74 |     CAST(year AS INTEGER) AS ano,
75 |     CAST(month AS INTEGER) AS mes,
76 |     CAST(day AS INTEGER) AS dia
77 | FROM od_csv_raw;
78 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-od-distritos-raw-csv-view.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW od_csv_raw AS SELECT *
 2 |     /* csv_folder needs to be replaced with a valid path
 3 |     in R use glue::glue() */
 4 |     FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE,
 5 |     columns={{
 6 |         'fecha': 'DATE',
 7 |         'origen': 'VARCHAR',
 8 |         'destino': 'VARCHAR',
 9 |         'actividad_origen': 'VARCHAR',
10 |         'actividad_destino': 'VARCHAR',
11 |         'residencia': 'VARCHAR',
12 |         'edad': 'VARCHAR',
13 |         'periodo': 'INTEGER',
14 |         'distancia': 'VARCHAR',
15 |         'viajes': 'DOUBLE',
16 |         'viajes_km': 'DOUBLE'
17 |     }},
18 |     dateformat='%Y%m%d');
19 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-od-enum-activity-en.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE ACTIV_ENUM AS ENUM (
2 |     'home',
3 |     'work_or_study',
4 |     'other');
5 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-od-enum-activity-es.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE ACTIV_ENUM AS ENUM (
2 |   'casa',
3 |   'trabajo_estudio',
4 |   'otros');
5 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-od-enum-distance.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE DISTANCE_ENUM AS ENUM (
2 |   '0005-002',
3 |   '002-005',
4 |   '005-010',
5 |   '010-050',
6 |   '050-100',
7 |   '100+');
8 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-od-municipios-clean-csv-view-en.sql:
--------------------------------------------------------------------------------
  1 | -- Create the relationships view from the relaciones_distrito_mitma.csv
  2 | CREATE VIEW relations_districts_municipalities AS 
  3 | SELECT 
  4 |     distrito_mitma, 
  5 |     municipio_mitma 
  6 | FROM 
  7 |     read_csv_auto('{relations_districts_municipalities}',
  8 |     delim = '|',
  9 |     columns={{
 10 |         'distrito': 'VARCHAR',
 11 |         'distrito_mitma': 'VARCHAR',
 12 |         'municipio_mitma': 'VARCHAR'
 13 |     }}
 14 | );
 15 | 
 16 | -- Create the od_csv_clean view with necessary joins, recoding, and aggregation
 17 | CREATE VIEW od_csv_clean AS 
 18 | SELECT
 19 |     d.fecha AS date,
 20 |     CAST(m1.municipio_mitma AS ZONES_ENUM) AS id_origin,
 21 |     CAST(m2.municipio_mitma AS ZONES_ENUM) AS id_destination,
 22 |     CAST(CASE d.actividad_origen
 23 |         WHEN 'casa' THEN 'home'
 24 |         WHEN 'otros' THEN 'other'
 25 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
 26 |         END AS ACTIV_ENUM) AS activity_origin,
 27 |     CAST(CASE d.actividad_destino
 28 |         WHEN 'casa' THEN 'home'
 29 |         WHEN 'otros' THEN 'other'
 30 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
 31 |         END AS ACTIV_ENUM) AS activity_destination,
 32 |     CAST(d.residencia AS INE_PROV_CODE_ENUM) AS residence_province_ine_code,
 33 |     CAST(CASE d.residencia
 34 |         WHEN '01' THEN 'Araba/Álava'
 35 |         WHEN '02' THEN 'Albacete'
 36 |         WHEN '03' THEN 'Alicante/Alacant'
 37 |         WHEN '04' THEN 'Almería'
 38 |         WHEN '05' THEN 'Ávila'
 39 |         WHEN '06' THEN 'Badajoz'
 40 |         WHEN '07' THEN 'Balears, Illes'
 41 |         WHEN '08' THEN 'Barcelona'
 42 |         WHEN '09' THEN 'Burgos'
 43 |         WHEN '10' THEN 'Cáceres'
 44 |         WHEN '11' THEN 'Cádiz'
 45 |         WHEN '12' THEN 'Castellón/Castelló'
 46 |         WHEN '13' THEN 'Ciudad Real'
 47 |         WHEN '14' THEN 'Córdoba'
 48 |         WHEN '15' THEN 'Coruña, A'
 49 |         WHEN '16' THEN 'Cuenca'
 50 |         WHEN '17' THEN 'Girona'
 51 |         WHEN '18' THEN 'Granada'
 52 |         WHEN '19' THEN 'Guadalajara'
 53 |         WHEN '20' THEN 'Gipuzkoa'
 54 |         WHEN '21' THEN 'Huelva'
 55 |         WHEN '22' THEN 'Huesca'
 56 |         WHEN '23' THEN 'Jaén'
 57 |         WHEN '24' THEN 'León'
 58 |         WHEN '25' THEN 'Lleida'
 59 |         WHEN '26' THEN 'Rioja, La'
 60 |         WHEN '27' THEN 'Lugo'
 61 |         WHEN '28' THEN 'Madrid'
 62 |         WHEN '29' THEN 'Málaga'
 63 |         WHEN '30' THEN 'Murcia'
 64 |         WHEN '31' THEN 'Navarra'
 65 |         WHEN '32' THEN 'Ourense'
 66 |         WHEN '33' THEN 'Asturias'
 67 |         WHEN '34' THEN 'Palencia'
 68 |         WHEN '35' THEN 'Palmas, Las'
 69 |         WHEN '36' THEN 'Pontevedra'
 70 |         WHEN '37' THEN 'Salamanca'
 71 |         WHEN '38' THEN 'Santa Cruz de Tenerife'
 72 |         WHEN '39' THEN 'Cantabria'
 73 |         WHEN '40' THEN 'Segovia'
 74 |         WHEN '41' THEN 'Sevilla'
 75 |         WHEN '42' THEN 'Soria'
 76 |         WHEN '43' THEN 'Tarragona'
 77 |         WHEN '44' THEN 'Teruel'
 78 |         WHEN '45' THEN 'Toledo'
 79 |         WHEN '46' THEN 'Valencia/València'
 80 |         WHEN '47' THEN 'Valladolid'
 81 |         WHEN '48' THEN 'Bizkaia'
 82 |         WHEN '49' THEN 'Zamora'
 83 |         WHEN '50' THEN 'Zaragoza'
 84 |         WHEN '51' THEN 'Ceuta'
 85 |         WHEN '52' THEN 'Melilla'
 86 |         END AS INE_PROV_NAME_ENUM) AS residence_province_name,
 87 |     d.periodo AS hour,
 88 |     CAST(d.distancia AS DISTANCE_ENUM) AS distance,
 89 |     SUM(d.viajes) AS n_trips,
 90 |     SUM(d.viajes_km) AS trips_total_length_km,
 91 |     CAST(d.year AS INTEGER) AS year,
 92 |     CAST(d.month AS INTEGER) AS month,
 93 |     CAST(d.day AS INTEGER) AS day
 94 | FROM 
 95 |     od_csv_raw d
 96 | LEFT JOIN 
 97 |     relations_districts_municipalities m1 ON d.origen = m1.distrito_mitma
 98 | LEFT JOIN 
 99 |     relations_districts_municipalities m2 ON d.destino = m2.distrito_mitma
100 | GROUP BY 
101 |     d.fecha, 
102 |     m1.municipio_mitma,
103 |     m2.municipio_mitma,
104 |     d.actividad_origen,
105 |     d.actividad_destino,
106 |     d.residencia,
107 |     d.periodo,
108 |     d.distancia,
109 |     d.year,
110 |     d.month,
111 |     d.day;
112 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-od-municipios-clean-csv-view-es.sql:
--------------------------------------------------------------------------------
  1 | -- Create the relationships view from the relaciones_distrito_mitma.csv
  2 | CREATE VIEW relations_districts_municipalities AS 
  3 | SELECT
  4 |     distrito_mitma, 
  5 |     municipio_mitma 
  6 | FROM 
  7 |     read_csv_auto('{relations_districts_municipalities}',
  8 |     delim = '|',
  9 |     columns={{
 10 |         'distrito': 'VARCHAR',
 11 |         'distrito_mitma': 'VARCHAR',
 12 |         'municipio_mitma': 'VARCHAR'
 13 |     }}
 14 | );
 15 | 
 16 | -- Create the od_csv_clean view with necessary joins, recoding, and aggregation
 17 | CREATE VIEW od_csv_clean AS 
 18 | SELECT
 19 |     d.fecha AS fecha,
 20 |     CAST(m1.municipio_mitma AS ZONES_ENUM) AS origen,
 21 |     CAST(m2.municipio_mitma AS ZONES_ENUM) AS destino,
 22 |     CAST(CASE d.actividad_origen
 23 |         WHEN 'casa' THEN 'home'
 24 |         WHEN 'otros' THEN 'other'
 25 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
 26 |         END AS ACTIV_ENUM) AS actividad_origen,
 27 |     CAST(CASE d.actividad_destino
 28 |         WHEN 'casa' THEN 'home'
 29 |         WHEN 'otros' THEN 'other'
 30 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
 31 |         END AS ACTIV_ENUM) AS actividad_destino,
 32 |     CAST(d.residencia AS INE_PROV_CODE_ENUM) AS residencia,
 33 |     CAST(CASE d.residencia
 34 |         WHEN '01' THEN 'Araba/Álava'
 35 |         WHEN '02' THEN 'Albacete'
 36 |         WHEN '03' THEN 'Alicante/Alacant'
 37 |         WHEN '04' THEN 'Almería'
 38 |         WHEN '05' THEN 'Ávila'
 39 |         WHEN '06' THEN 'Badajoz'
 40 |         WHEN '07' THEN 'Balears, Illes'
 41 |         WHEN '08' THEN 'Barcelona'
 42 |         WHEN '09' THEN 'Burgos'
 43 |         WHEN '10' THEN 'Cáceres'
 44 |         WHEN '11' THEN 'Cádiz'
 45 |         WHEN '12' THEN 'Castellón/Castelló'
 46 |         WHEN '13' THEN 'Ciudad Real'
 47 |         WHEN '14' THEN 'Córdoba'
 48 |         WHEN '15' THEN 'Coruña, A'
 49 |         WHEN '16' THEN 'Cuenca'
 50 |         WHEN '17' THEN 'Girona'
 51 |         WHEN '18' THEN 'Granada'
 52 |         WHEN '19' THEN 'Guadalajara'
 53 |         WHEN '20' THEN 'Gipuzkoa'
 54 |         WHEN '21' THEN 'Huelva'
 55 |         WHEN '22' THEN 'Huesca'
 56 |         WHEN '23' THEN 'Jaén'
 57 |         WHEN '24' THEN 'León'
 58 |         WHEN '25' THEN 'Lleida'
 59 |         WHEN '26' THEN 'Rioja, La'
 60 |         WHEN '27' THEN 'Lugo'
 61 |         WHEN '28' THEN 'Madrid'
 62 |         WHEN '29' THEN 'Málaga'
 63 |         WHEN '30' THEN 'Murcia'
 64 |         WHEN '31' THEN 'Navarra'
 65 |         WHEN '32' THEN 'Ourense'
 66 |         WHEN '33' THEN 'Asturias'
 67 |         WHEN '34' THEN 'Palencia'
 68 |         WHEN '35' THEN 'Palmas, Las'
 69 |         WHEN '36' THEN 'Pontevedra'
 70 |         WHEN '37' THEN 'Salamanca'
 71 |         WHEN '38' THEN 'Santa Cruz de Tenerife'
 72 |         WHEN '39' THEN 'Cantabria'
 73 |         WHEN '40' THEN 'Segovia'
 74 |         WHEN '41' THEN 'Sevilla'
 75 |         WHEN '42' THEN 'Soria'
 76 |         WHEN '43' THEN 'Tarragona'
 77 |         WHEN '44' THEN 'Teruel'
 78 |         WHEN '45' THEN 'Toledo'
 79 |         WHEN '46' THEN 'Valencia/València'
 80 |         WHEN '47' THEN 'Valladolid'
 81 |         WHEN '48' THEN 'Bizkaia'
 82 |         WHEN '49' THEN 'Zamora'
 83 |         WHEN '50' THEN 'Zaragoza'
 84 |         WHEN '51' THEN 'Ceuta'
 85 |         WHEN '52' THEN 'Melilla'
 86 |         END AS INE_PROV_NAME_ENUM) AS residencia_nombre,
 87 |     d.periodo AS periodo,
 88 |     CAST(d.distancia AS DISTANCE_ENUM) AS distancia,
 89 |     SUM(d.viajes) AS viajes,
 90 |     SUM(d.viajes_km) AS viajes_km,
 91 |     CAST(d.year AS INTEGER) AS ano,
 92 |     CAST(d.month AS INTEGER) AS mes,
 93 |     CAST(d.day AS INTEGER) AS dia
 94 | FROM 
 95 |     od_csv_raw d
 96 | LEFT JOIN 
 97 |     relations_districts_municipalities m1 ON d.origen = m1.distrito_mitma
 98 | LEFT JOIN 
 99 |     relations_districts_municipalities m2 ON d.destino = m2.distrito_mitma
100 | GROUP BY 
101 |     d.fecha, 
102 |     m1.municipio_mitma,
103 |     m2.municipio_mitma,
104 |     d.actividad_origen,
105 |     d.actividad_destino,
106 |     d.residencia,
107 |     d.periodo,
108 |     d.distancia,
109 |     d.year,
110 |     d.month,
111 |     d.day;
112 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v1-od-municipios-raw-csv-view.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW od_csv_raw AS SELECT *
 2 |     /* csv_folder needs to be replaced with a valid path
 3 |     in R use glue::glue() 
 4 |     # this file actually connects to raw data with districts, because of the bugs described in
 5 |     # http://www.ekotov.pro/mitma-data-issues/issues/011-v1-tpp-mismatch-zone-ids-in-table-and-spatial-data.html
 6 |     # http://www.ekotov.pro/mitma-data-issues/issues/012-v1-tpp-district-files-in-municipality-folders.html
 7 |     # the decision was to use distrcit data and aggregate it to replicate municipal data */
 8 |     FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE,
 9 |     columns={{
10 |         'fecha': 'DATE',
11 |         'origen': 'VARCHAR',
12 |         'destino': 'VARCHAR',
13 |         'actividad_origen': 'VARCHAR',
14 |         'actividad_destino': 'VARCHAR',
15 |         'residencia': 'VARCHAR',
16 |         'edad': 'VARCHAR',
17 |         'periodo': 'INTEGER',
18 |         'distancia': 'VARCHAR',
19 |         'viajes': 'DOUBLE',
20 |         'viajes_km': 'DOUBLE'
21 |     }},
22 |     dateformat='%Y%m%d');
23 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-nt-distritos-clean-csv-view-en.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW nt_csv_clean AS SELECT
 2 |     fecha AS date,
 3 |     CAST(zona_pernoctacion AS ZONES_ENUM) AS id,
 4 |     CAST(CASE edad
 5 |         WHEN 'NA' THEN NULL
 6 |         WHEN '0-25' THEN '0-25'
 7 |         WHEN '25-45' THEN '25-45'
 8 |         WHEN '45-65' THEN '45-65'
 9 |         WHEN '65-100' THEN '65-100'
10 |         ELSE NULL
11 |         END AS AGE_ENUM)
12 |         AS age,
13 |     CAST(CASE sexo
14 |         WHEN 'NA' THEN NULL
15 |         WHEN 'mujer' THEN 'female'
16 |         WHEN 'hombre' THEN 'male'
17 |         END AS SEX_ENUM)
18 |         AS sex,
19 |     CAST(numero_viajes AS N_TRIPS_ENUM) AS n_trips,
20 |     personas AS n_persons,
21 |     CAST(year AS INTEGER) AS year,
22 |     CAST(month AS INTEGER) AS month,
23 |     CAST(day AS INTEGER) AS day
24 | FROM nt_csv_raw;
25 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-nt-distritos-clean-csv-view-es.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW nt_csv_clean AS SELECT
 2 |     fecha,
 3 |     CAST(zona_pernoctacion AS ZONES_ENUM) AS zona_pernoctacion,
 4 |     CAST(CASE edad
 5 |         WHEN 'NA' THEN NULL
 6 |         WHEN '0-25' THEN '0-25'
 7 |         WHEN '25-45' THEN '25-45'
 8 |         WHEN '45-65' THEN '45-65'
 9 |         WHEN '65-100' THEN '65-100'
10 |         ELSE NULL
11 |         END AS AGE_ENUM)
12 |         AS edad,
13 |     CAST (CASE sexo
14 |         WHEN 'NA' THEN NULL
15 |         ELSE sexo
16 |         END AS SEX_ENUM)
17 |         AS sexo,
18 |     CAST(numero_viajes AS N_TRIPS_ENUM) AS numero_viajes,
19 |     personas,
20 |     CAST(year AS INTEGER) AS ano,
21 |     CAST(month AS INTEGER) AS mes,
22 |     CAST(day AS INTEGER) AS dia
23 | FROM nt_csv_raw;
24 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-nt-distritos-raw-csv-view.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW nt_csv_raw AS SELECT *
 2 |     /* csv_folder needs to be replaced with a valid path
 3 |     in R use glue::glue() */
 4 |     FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE,
 5 |     columns={{
 6 |     'fecha': 'DATE',
 7 |     'zona_pernoctacion': 'VARCHAR',
 8 |     'edad': 'VARCHAR',
 9 |     "sexo": 'VARCHAR',
10 |     'numero_viajes': 'VARCHAR',
11 |     'personas': 'DOUBLE'
12 |     }},
13 |     dateformat='%Y%m%d');
14 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-nt-enum-age.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE AGE_ENUM AS ENUM (
2 |   '0-25',
3 |   '25-45',
4 |   '45-65',
5 |   '65-100');
6 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-nt-enum-ntrips.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE N_TRIPS_ENUM AS ENUM ('0', '1', '2', '2+');
2 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-nt-enum-sex-en.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE SEX_ENUM AS ENUM (
2 |   'female',
3 |   'male');
4 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-nt-enum-sex-es.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE SEX_ENUM AS ENUM (
2 |   'mujer',
3 |   'hombre');
4 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-nt-gau-clean-csv-view-en.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW nt_csv_clean AS SELECT
 2 |     fecha AS date,
 3 |     CAST(zona_pernoctacion AS ZONES_ENUM) AS id,
 4 |     CAST(CASE edad
 5 |         WHEN 'NA' THEN NULL
 6 |         WHEN '0-25' THEN '0-25'
 7 |         WHEN '25-45' THEN '25-45'
 8 |         WHEN '45-65' THEN '45-65'
 9 |         WHEN '65-100' THEN '65-100'
10 |         ELSE NULL
11 |         END AS AGE_ENUM)
12 |         AS age,
13 |     CAST(CASE sexo
14 |         WHEN 'NA' THEN NULL
15 |         WHEN 'mujer' THEN 'female'
16 |         WHEN 'hombre' THEN 'male'
17 |         END AS SEX_ENUM)
18 |         AS sex,
19 |     CAST(numero_viajes AS N_TRIPS_ENUM) AS n_trips,
20 |     personas AS n_persons,
21 |     CAST(year AS INTEGER) AS year,
22 |     CAST(month AS INTEGER) AS month,
23 |     CAST(day AS INTEGER) AS day
24 | FROM nt_csv_raw;
25 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-nt-gau-clean-csv-view-es.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW nt_csv_clean AS SELECT
 2 |     fecha,
 3 |     CAST(zona_pernoctacion AS ZONES_ENUM) AS zona_pernoctacion,
 4 |     CAST(CASE edad
 5 |         WHEN 'NA' THEN NULL
 6 |         WHEN '0-25' THEN '0-25'
 7 |         WHEN '25-45' THEN '25-45'
 8 |         WHEN '45-65' THEN '45-65'
 9 |         WHEN '65-100' THEN '65-100'
10 |         ELSE NULL
11 |         END AS AGE_ENUM)
12 |         AS edad,
13 |     CAST (CASE sexo
14 |         WHEN 'NA' THEN NULL
15 |         ELSE sexo
16 |         END AS SEX_ENUM)
17 |         AS sexo,
18 |     CAST(numero_viajes AS N_TRIPS_ENUM) AS numero_viajes,
19 |     personas,
20 |     CAST(year AS INTEGER) AS ano,
21 |     CAST(month AS INTEGER) AS mes,
22 |     CAST(day AS INTEGER) AS dia
23 | FROM nt_csv_raw;
24 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-nt-gau-raw-csv-view.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW nt_csv_raw AS SELECT *
 2 |     /* csv_folder needs to be replaced with a valid path
 3 |     in R use glue::glue() */
 4 |     FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE,
 5 |     columns={{
 6 |     'fecha': 'DATE',
 7 |     'zona_pernoctacion': 'VARCHAR',
 8 |     'edad': 'VARCHAR',
 9 |     "sexo": 'VARCHAR',
10 |     'numero_viajes': 'VARCHAR',
11 |     'personas': 'DOUBLE'
12 |     }},
13 |     dateformat='%Y%m%d');
14 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-nt-municipios-clean-csv-view-en.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW nt_csv_clean AS SELECT
 2 |     fecha AS date,
 3 |     CAST(zona_pernoctacion AS ZONES_ENUM) AS id,
 4 |     CAST(CASE edad
 5 |         WHEN 'NA' THEN NULL
 6 |         WHEN '0-25' THEN '0-25'
 7 |         WHEN '25-45' THEN '25-45'
 8 |         WHEN '45-65' THEN '45-65'
 9 |         WHEN '65-100' THEN '65-100'
10 |         ELSE NULL
11 |         END AS AGE_ENUM)
12 |         AS age,
13 |     CAST(CASE sexo
14 |         WHEN 'NA' THEN NULL
15 |         WHEN 'mujer' THEN 'female'
16 |         WHEN 'hombre' THEN 'male'
17 |         END AS SEX_ENUM)
18 |         AS sex,
19 |     CAST(numero_viajes AS N_TRIPS_ENUM) AS n_trips,
20 |     personas AS n_persons,
21 |     CAST(year AS INTEGER) AS year,
22 |     CAST(month AS INTEGER) AS month,
23 |     CAST(day AS INTEGER) AS day
24 | FROM nt_csv_raw;
25 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-nt-municipios-clean-csv-view-es.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW nt_csv_clean AS SELECT
 2 |     fecha,
 3 |     CAST(zona_pernoctacion AS ZONES_ENUM) AS zona_pernoctacion,
 4 |     CAST(CASE edad
 5 |         WHEN 'NA' THEN NULL
 6 |         WHEN '0-25' THEN '0-25'
 7 |         WHEN '25-45' THEN '25-45'
 8 |         WHEN '45-65' THEN '45-65'
 9 |         WHEN '65-100' THEN '65-100'
10 |         ELSE NULL
11 |         END AS AGE_ENUM)
12 |         AS edad,
13 |     CAST (CASE sexo
14 |         WHEN 'NA' THEN NULL
15 |         ELSE sexo
16 |         END AS SEX_ENUM)
17 |         AS sexo,
18 |     CAST(numero_viajes AS N_TRIPS_ENUM) AS numero_viajes,
19 |     personas,
20 |     CAST(year AS INTEGER) AS ano,
21 |     CAST(month AS INTEGER) AS mes,
22 |     CAST(day AS INTEGER) AS dia
23 | FROM nt_csv_raw;
24 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-nt-municipios-raw-csv-view.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW nt_csv_raw AS SELECT *
 2 |     /* csv_folder needs to be replaced with a valid path
 3 |     in R use glue::glue() */
 4 |     FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE,
 5 |     columns={{
 6 |     'fecha': 'DATE',
 7 |     'zona_pernoctacion': 'VARCHAR',
 8 |     'edad': 'VARCHAR',
 9 |     "sexo": 'VARCHAR',
10 |     'numero_viajes': 'VARCHAR',
11 |     'personas': 'DOUBLE'
12 |     }},
13 |     dateformat='%Y%m%d');
14 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-distritos-clean-csv-view-en.sql:
--------------------------------------------------------------------------------
  1 | CREATE VIEW od_csv_clean AS SELECT
  2 |     fecha AS date,
  3 |     periodo AS hour,
  4 |     CAST (CASE origen
  5 |         WHEN 'externo' THEN 'external'
  6 |         ELSE origen
  7 |         END AS ZONES_ENUM)
  8 |         AS id_origin,
  9 |     CAST (CASE destino
 10 |         WHEN 'externo' THEN 'external'
 11 |         ELSE destino
 12 |         END AS ZONES_ENUM)
 13 |         AS id_destination,
 14 |     CAST (distancia AS DISTANCE_ENUM) AS distance,
 15 |     CAST (CASE actividad_origen
 16 |         WHEN 'casa' THEN 'home'
 17 |         WHEN 'frecuente' THEN 'frequent_activity'
 18 |         WHEN 'no_frecuente' THEN 'infrequent_activity'
 19 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
 20 |         END AS ACTIV_ENUM)
 21 |         AS activity_origin,
 22 |     CAST (CASE actividad_destino
 23 |         WHEN 'casa' THEN 'home'
 24 |         WHEN 'frecuente' THEN 'frequent_activity'
 25 |         WHEN 'no_frecuente' THEN 'infrequent_activity'
 26 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
 27 |         END AS ACTIV_ENUM)
 28 |         AS activity_destination,
 29 |     CASE estudio_origen_posible
 30 |         WHEN 'si' THEN TRUE
 31 |         WHEN 'no' THEN FALSE
 32 |         END AS study_possible_origin,
 33 |     CASE estudio_destino_posible
 34 |         WHEN 'si' THEN TRUE
 35 |         WHEN 'no' THEN FALSE
 36 |         END AS study_possible_destination,
 37 |     CAST(residencia AS INE_PROV_CODE_ENUM) AS residence_province_ine_code,
 38 |     CAST (CASE residencia
 39 |         WHEN '01' THEN 'Araba/Álava'
 40 |         WHEN '02' THEN 'Albacete'
 41 |         WHEN '03' THEN 'Alicante/Alacant'
 42 |         WHEN '04' THEN 'Almería'
 43 |         WHEN '05' THEN 'Ávila'
 44 |         WHEN '06' THEN 'Badajoz'
 45 |         WHEN '07' THEN 'Balears, Illes'
 46 |         WHEN '08' THEN 'Barcelona'
 47 |         WHEN '09' THEN 'Burgos'
 48 |         WHEN '10' THEN 'Cáceres'
 49 |         WHEN '11' THEN 'Cádiz'
 50 |         WHEN '12' THEN 'Castellón/Castelló'
 51 |         WHEN '13' THEN 'Ciudad Real'
 52 |         WHEN '14' THEN 'Córdoba'
 53 |         WHEN '15' THEN 'Coruña, A'
 54 |         WHEN '16' THEN 'Cuenca'
 55 |         WHEN '17' THEN 'Girona'
 56 |         WHEN '18' THEN 'Granada'
 57 |         WHEN '19' THEN 'Guadalajara'
 58 |         WHEN '20' THEN 'Gipuzkoa'
 59 |         WHEN '21' THEN 'Huelva'
 60 |         WHEN '22' THEN 'Huesca'
 61 |         WHEN '23' THEN 'Jaén'
 62 |         WHEN '24' THEN 'León'
 63 |         WHEN '25' THEN 'Lleida'
 64 |         WHEN '26' THEN 'Rioja, La'
 65 |         WHEN '27' THEN 'Lugo'
 66 |         WHEN '28' THEN 'Madrid'
 67 |         WHEN '29' THEN 'Málaga'
 68 |         WHEN '30' THEN 'Murcia'
 69 |         WHEN '31' THEN 'Navarra'
 70 |         WHEN '32' THEN 'Ourense'
 71 |         WHEN '33' THEN 'Asturias'
 72 |         WHEN '34' THEN 'Palencia'
 73 |         WHEN '35' THEN 'Palmas, Las'
 74 |         WHEN '36' THEN 'Pontevedra'
 75 |         WHEN '37' THEN 'Salamanca'
 76 |         WHEN '38' THEN 'Santa Cruz de Tenerife'
 77 |         WHEN '39' THEN 'Cantabria'
 78 |         WHEN '40' THEN 'Segovia'
 79 |         WHEN '41' THEN 'Sevilla'
 80 |         WHEN '42' THEN 'Soria'
 81 |         WHEN '43' THEN 'Tarragona'
 82 |         WHEN '44' THEN 'Teruel'
 83 |         WHEN '45' THEN 'Toledo'
 84 |         WHEN '46' THEN 'Valencia/València'
 85 |         WHEN '47' THEN 'Valladolid'
 86 |         WHEN '48' THEN 'Bizkaia'
 87 |         WHEN '49' THEN 'Zamora'
 88 |         WHEN '50' THEN 'Zaragoza'
 89 |         WHEN '51' THEN 'Ceuta'
 90 |         WHEN '52' THEN 'Melilla'
 91 |         END AS INE_PROV_NAME_ENUM)
 92 |         AS residence_province_name,
 93 |     CAST (renta AS INCOME_ENUM) AS income,
 94 |     CAST (CASE edad
 95 |         WHEN 'NA' THEN NULL
 96 |         WHEN '0-25' THEN '0-25'
 97 |         WHEN '25-45' THEN '25-45'
 98 |         WHEN '45-65' THEN '45-65'
 99 |         WHEN '65-100' THEN '65-100'
100 |         ELSE NULL
101 |         END AS AGE_ENUM)
102 |         AS age,
103 |     CAST (CASE sexo
104 |         WHEN 'NA' THEN NULL
105 |         WHEN 'mujer' THEN 'female'
106 |         WHEN 'hombre' THEN 'male'
107 |         END AS SEX_ENUM)
108 |         AS sex,
109 |     viajes AS n_trips,
110 |     viajes_km AS trips_total_length_km,
111 |     CAST(year AS INTEGER) AS year,
112 |     CAST(month AS INTEGER) AS month,
113 |     CAST(day AS INTEGER) AS day,
114 |     periodo AS time_slot
115 |     FROM od_csv_raw;
116 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-distritos-clean-csv-view-es.sql:
--------------------------------------------------------------------------------
  1 | CREATE VIEW od_csv_clean AS SELECT
  2 |     fecha,
  3 |     periodo,
  4 |     CAST (origen AS ZONES_ENUM) AS origen,
  5 |     CAST (destino AS ZONES_ENUM) AS destino,
  6 |     CAST (distancia AS DISTANCE_ENUM) AS distancia,
  7 |     CAST (CASE actividad_origen
  8 |         WHEN 'casa' THEN 'home'
  9 |         WHEN 'frecuente' THEN 'frequent_activity'
 10 |         WHEN 'no_frecuente' THEN 'infrequent_activity'
 11 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
 12 |         END AS ACTIV_ENUM)
 13 |         AS actividad_origen,
 14 |     CAST (CASE actividad_destino
 15 |         WHEN 'casa' THEN 'home'
 16 |         WHEN 'frecuente' THEN 'frequent_activity'
 17 |         WHEN 'no_frecuente' THEN 'infrequent_activity'
 18 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
 19 |         END AS ACTIV_ENUM)
 20 |         AS actividad_destino,
 21 |     CASE estudio_origen_posible
 22 |         WHEN 'si' THEN TRUE
 23 |         WHEN 'no' THEN FALSE
 24 |         END AS estudio_origen_posible,
 25 |     CASE estudio_destino_posible
 26 |         WHEN 'si' THEN TRUE
 27 |         WHEN 'no' THEN FALSE
 28 |         END AS estudio_destino_posible,
 29 |     CAST(residencia AS INE_PROV_CODE_ENUM) AS residencia,
 30 |     CAST (CASE residencia
 31 |         WHEN '01' THEN 'Araba/Álava'
 32 |         WHEN '02' THEN 'Albacete'
 33 |         WHEN '03' THEN 'Alicante/Alacant'
 34 |         WHEN '04' THEN 'Almería'
 35 |         WHEN '05' THEN 'Ávila'
 36 |         WHEN '06' THEN 'Badajoz'
 37 |         WHEN '07' THEN 'Balears, Illes'
 38 |         WHEN '08' THEN 'Barcelona'
 39 |         WHEN '09' THEN 'Burgos'
 40 |         WHEN '10' THEN 'Cáceres'
 41 |         WHEN '11' THEN 'Cádiz'
 42 |         WHEN '12' THEN 'Castellón/Castelló'
 43 |         WHEN '13' THEN 'Ciudad Real'
 44 |         WHEN '14' THEN 'Córdoba'
 45 |         WHEN '15' THEN 'Coruña, A'
 46 |         WHEN '16' THEN 'Cuenca'
 47 |         WHEN '17' THEN 'Girona'
 48 |         WHEN '18' THEN 'Granada'
 49 |         WHEN '19' THEN 'Guadalajara'
 50 |         WHEN '20' THEN 'Gipuzkoa'
 51 |         WHEN '21' THEN 'Huelva'
 52 |         WHEN '22' THEN 'Huesca'
 53 |         WHEN '23' THEN 'Jaén'
 54 |         WHEN '24' THEN 'León'
 55 |         WHEN '25' THEN 'Lleida'
 56 |         WHEN '26' THEN 'Rioja, La'
 57 |         WHEN '27' THEN 'Lugo'
 58 |         WHEN '28' THEN 'Madrid'
 59 |         WHEN '29' THEN 'Málaga'
 60 |         WHEN '30' THEN 'Murcia'
 61 |         WHEN '31' THEN 'Navarra'
 62 |         WHEN '32' THEN 'Ourense'
 63 |         WHEN '33' THEN 'Asturias'
 64 |         WHEN '34' THEN 'Palencia'
 65 |         WHEN '35' THEN 'Palmas, Las'
 66 |         WHEN '36' THEN 'Pontevedra'
 67 |         WHEN '37' THEN 'Salamanca'
 68 |         WHEN '38' THEN 'Santa Cruz de Tenerife'
 69 |         WHEN '39' THEN 'Cantabria'
 70 |         WHEN '40' THEN 'Segovia'
 71 |         WHEN '41' THEN 'Sevilla'
 72 |         WHEN '42' THEN 'Soria'
 73 |         WHEN '43' THEN 'Tarragona'
 74 |         WHEN '44' THEN 'Teruel'
 75 |         WHEN '45' THEN 'Toledo'
 76 |         WHEN '46' THEN 'Valencia/València'
 77 |         WHEN '47' THEN 'Valladolid'
 78 |         WHEN '48' THEN 'Bizkaia'
 79 |         WHEN '49' THEN 'Zamora'
 80 |         WHEN '50' THEN 'Zaragoza'
 81 |         WHEN '51' THEN 'Ceuta'
 82 |         WHEN '52' THEN 'Melilla'
 83 |         END AS INE_PROV_NAME_ENUM)
 84 |         AS residencia_nombre,
 85 |     CAST (renta AS INCOME_ENUM) AS renta,
 86 |     CAST (CASE edad
 87 |         WHEN 'NA' THEN NULL
 88 |         WHEN '0-25' THEN '0-25'
 89 |         WHEN '25-45' THEN '25-45'
 90 |         WHEN '45-65' THEN '45-65'
 91 |         WHEN '65-100' THEN '65-100'
 92 |         ELSE NULL
 93 |         END AS AGE_ENUM)
 94 |         AS edad,
 95 |     CAST (CASE sexo
 96 |         WHEN 'NA' THEN NULL
 97 |         WHEN 'mujer' THEN 'female'
 98 |         WHEN 'hombre' THEN 'male'
 99 |         END AS SEX_ENUM)
100 |         AS sexo,
101 |     viajes,
102 |     viajes_km,
103 |     CAST(year AS INTEGER) AS ano,
104 |     CAST(month AS INTEGER) AS mes,
105 |     CAST(day AS INTEGER) AS dia
106 |     FROM od_csv_raw;
107 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-distritos-raw-csv-view.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW od_csv_raw AS SELECT *
 2 |     /* csv_folder needs to be replaced with a valid path
 3 |     in R use glue::glue() */
 4 |     FROM read_csv_auto('{csv_folder}**/*.csv.gz',
 5 |         delim='|',
 6 |         header=TRUE,
 7 |         hive_partitioning=TRUE,
 8 |         columns={{
 9 |             'fecha': 'DATE',
10 |             'periodo': 'INTEGER',
11 |             'origen': 'VARCHAR',
12 |             'destino': 'VARCHAR',
13 |             'distancia': 'VARCHAR',
14 |             'actividad_origen': 'VARCHAR',
15 |             'actividad_destino': 'VARCHAR',
16 |             'estudio_origen_posible': 'VARCHAR',
17 |             'estudio_destino_posible': 'VARCHAR',
18 |             'residencia': 'VARCHAR',
19 |             'renta': 'VARCHAR',
20 |             'edad': 'VARCHAR',
21 |             'sexo': 'VARCHAR',
22 |             'viajes': 'DOUBLE',
23 |             'viajes_km': 'DOUBLE'
24 |     }},
25 |     dateformat='%Y%m%d');
26 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-enum-activity-en.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE ACTIV_ENUM AS ENUM (
2 |     'home',
3 |     'frequent_activity',
4 |     'infrequent_activity',
5 |     'work_or_study');
6 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-enum-activity-es.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE ACTIV_ENUM AS ENUM (
2 |     'casa',
3 |     'frecuente'
4 |     'no_frecuente',
5 |     'trabajo_estudio');
6 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-enum-age.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE AGE_ENUM AS ENUM (
2 |   '0-25',
3 |   '25-45',
4 |   '45-65',
5 |   '65-100');
6 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-enum-distance.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE DISTANCE_ENUM AS ENUM (
2 |   '0.5-2',
3 |   '2-10',
4 |   '10-50',
5 |   '>50');
6 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-enum-income.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE INCOME_ENUM AS ENUM (
2 |   '<10',
3 |   '10-15',
4 |   '>15');
5 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-enum-sex-en.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE SEX_ENUM AS ENUM (
2 |   'female',
3 |   'male');
4 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-enum-sex-es.sql:
--------------------------------------------------------------------------------
1 | CREATE TYPE SEX_ENUM AS ENUM (
2 |   'mujer',
3 |   'hombre');
4 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-gau-clean-csv-view-en.sql:
--------------------------------------------------------------------------------
  1 | CREATE VIEW od_csv_clean AS SELECT
  2 |     fecha AS date,
  3 |     periodo AS hour,
  4 |     CAST (CASE origen
  5 |         WHEN 'externo' THEN 'external'
  6 |         ELSE origen
  7 |         END AS ZONES_ENUM)
  8 |         AS id_origin,
  9 |     CAST (CASE destino
 10 |         WHEN 'externo' THEN 'external'
 11 |         ELSE destino
 12 |         END AS ZONES_ENUM)
 13 |         AS id_destination,
 14 |     CAST (distancia AS DISTANCE_ENUM) AS distance,
 15 |     CAST (CASE actividad_origen
 16 |         WHEN 'casa' THEN 'home'
 17 |         WHEN 'frecuente' THEN 'frequent_activity'
 18 |         WHEN 'no_frecuente' THEN 'infrequent_activity'
 19 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
 20 |         END AS ACTIV_ENUM)
 21 |         AS activity_origin,
 22 |     CAST (CASE actividad_destino
 23 |         WHEN 'casa' THEN 'home'
 24 |         WHEN 'frecuente' THEN 'frequent_activity'
 25 |         WHEN 'no_frecuente' THEN 'infrequent_activity'
 26 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
 27 |         END AS ACTIV_ENUM)
 28 |         AS activity_destination,
 29 |     CASE estudio_origen_posible
 30 |         WHEN 'si' THEN TRUE
 31 |         WHEN 'no' THEN FALSE
 32 |         END AS study_possible_origin,
 33 |     CASE estudio_destino_posible
 34 |         WHEN 'si' THEN TRUE
 35 |         WHEN 'no' THEN FALSE
 36 |         END AS study_possible_destination,
 37 |     CAST(residencia AS INE_PROV_CODE_ENUM) AS residence_province_ine_code,
 38 |     CAST (CASE residencia
 39 |         WHEN '01' THEN 'Araba/Álava'
 40 |         WHEN '02' THEN 'Albacete'
 41 |         WHEN '03' THEN 'Alicante/Alacant'
 42 |         WHEN '04' THEN 'Almería'
 43 |         WHEN '05' THEN 'Ávila'
 44 |         WHEN '06' THEN 'Badajoz'
 45 |         WHEN '07' THEN 'Balears, Illes'
 46 |         WHEN '08' THEN 'Barcelona'
 47 |         WHEN '09' THEN 'Burgos'
 48 |         WHEN '10' THEN 'Cáceres'
 49 |         WHEN '11' THEN 'Cádiz'
 50 |         WHEN '12' THEN 'Castellón/Castelló'
 51 |         WHEN '13' THEN 'Ciudad Real'
 52 |         WHEN '14' THEN 'Córdoba'
 53 |         WHEN '15' THEN 'Coruña, A'
 54 |         WHEN '16' THEN 'Cuenca'
 55 |         WHEN '17' THEN 'Girona'
 56 |         WHEN '18' THEN 'Granada'
 57 |         WHEN '19' THEN 'Guadalajara'
 58 |         WHEN '20' THEN 'Gipuzkoa'
 59 |         WHEN '21' THEN 'Huelva'
 60 |         WHEN '22' THEN 'Huesca'
 61 |         WHEN '23' THEN 'Jaén'
 62 |         WHEN '24' THEN 'León'
 63 |         WHEN '25' THEN 'Lleida'
 64 |         WHEN '26' THEN 'Rioja, La'
 65 |         WHEN '27' THEN 'Lugo'
 66 |         WHEN '28' THEN 'Madrid'
 67 |         WHEN '29' THEN 'Málaga'
 68 |         WHEN '30' THEN 'Murcia'
 69 |         WHEN '31' THEN 'Navarra'
 70 |         WHEN '32' THEN 'Ourense'
 71 |         WHEN '33' THEN 'Asturias'
 72 |         WHEN '34' THEN 'Palencia'
 73 |         WHEN '35' THEN 'Palmas, Las'
 74 |         WHEN '36' THEN 'Pontevedra'
 75 |         WHEN '37' THEN 'Salamanca'
 76 |         WHEN '38' THEN 'Santa Cruz de Tenerife'
 77 |         WHEN '39' THEN 'Cantabria'
 78 |         WHEN '40' THEN 'Segovia'
 79 |         WHEN '41' THEN 'Sevilla'
 80 |         WHEN '42' THEN 'Soria'
 81 |         WHEN '43' THEN 'Tarragona'
 82 |         WHEN '44' THEN 'Teruel'
 83 |         WHEN '45' THEN 'Toledo'
 84 |         WHEN '46' THEN 'Valencia/València'
 85 |         WHEN '47' THEN 'Valladolid'
 86 |         WHEN '48' THEN 'Bizkaia'
 87 |         WHEN '49' THEN 'Zamora'
 88 |         WHEN '50' THEN 'Zaragoza'
 89 |         WHEN '51' THEN 'Ceuta'
 90 |         WHEN '52' THEN 'Melilla'
 91 |         END AS INE_PROV_NAME_ENUM)
 92 |         AS residence_province_name,
 93 |     CAST (renta AS INCOME_ENUM) AS income,
 94 |     CAST (CASE edad
 95 |         WHEN 'NA' THEN NULL
 96 |         WHEN '0-25' THEN '0-25'
 97 |         WHEN '25-45' THEN '25-45'
 98 |         WHEN '45-65' THEN '45-65'
 99 |         WHEN '65-100' THEN '65-100'
100 |         ELSE NULL
101 |         END AS AGE_ENUM)
102 |         AS age,
103 |     CAST (CASE sexo
104 |         WHEN 'NA' THEN NULL
105 |         WHEN 'mujer' THEN 'female'
106 |         WHEN 'hombre' THEN 'male'
107 |         END AS SEX_ENUM)
108 |         AS sex,
109 |     viajes AS n_trips,
110 |     viajes_km AS trips_total_length_km,
111 |     CAST(year AS INTEGER) AS year,
112 |     CAST(month AS INTEGER) AS month,
113 |     CAST(day AS INTEGER) AS day
114 |     FROM od_csv_raw;
115 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-gau-clean-csv-view-es.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW od_csv_clean AS SELECT
 2 |     fecha,
 3 |     periodo,
 4 |     CAST (origen AS ZONES_ENUM) AS origen,
 5 |     CAST (destino AS ZONES_ENUM) AS destino,
 6 |     CAST (distancia AS DISTANCE_ENUM) AS distancia,
 7 |     CAST (actividad_origen AS ACTIV_ENUM) AS actividad_origen,
 8 |     CAST (actividad_destino AS ACTIV_ENUM) AS actividad_destino,
 9 |     CASE estudio_origen_posible
10 |         WHEN 'si' THEN TRUE
11 |         WHEN 'no' THEN FALSE
12 |         END AS estudio_origen_posible,
13 |     CASE estudio_destino_posible
14 |         WHEN 'si' THEN TRUE
15 |         WHEN 'no' THEN FALSE
16 |         END AS estudio_destino_posible,
17 |     CAST(residencia AS INE_PROV_CODE_ENUM) AS residencia,
18 |     CAST (CASE residencia
19 |         WHEN '01' THEN 'Araba/Álava'
20 |         WHEN '02' THEN 'Albacete'
21 |         WHEN '03' THEN 'Alicante/Alacant'
22 |         WHEN '04' THEN 'Almería'
23 |         WHEN '05' THEN 'Ávila'
24 |         WHEN '06' THEN 'Badajoz'
25 |         WHEN '07' THEN 'Balears, Illes'
26 |         WHEN '08' THEN 'Barcelona'
27 |         WHEN '09' THEN 'Burgos'
28 |         WHEN '10' THEN 'Cáceres'
29 |         WHEN '11' THEN 'Cádiz'
30 |         WHEN '12' THEN 'Castellón/Castelló'
31 |         WHEN '13' THEN 'Ciudad Real'
32 |         WHEN '14' THEN 'Córdoba'
33 |         WHEN '15' THEN 'Coruña, A'
34 |         WHEN '16' THEN 'Cuenca'
35 |         WHEN '17' THEN 'Girona'
36 |         WHEN '18' THEN 'Granada'
37 |         WHEN '19' THEN 'Guadalajara'
38 |         WHEN '20' THEN 'Gipuzkoa'
39 |         WHEN '21' THEN 'Huelva'
40 |         WHEN '22' THEN 'Huesca'
41 |         WHEN '23' THEN 'Jaén'
42 |         WHEN '24' THEN 'León'
43 |         WHEN '25' THEN 'Lleida'
44 |         WHEN '26' THEN 'Rioja, La'
45 |         WHEN '27' THEN 'Lugo'
46 |         WHEN '28' THEN 'Madrid'
47 |         WHEN '29' THEN 'Málaga'
48 |         WHEN '30' THEN 'Murcia'
49 |         WHEN '31' THEN 'Navarra'
50 |         WHEN '32' THEN 'Ourense'
51 |         WHEN '33' THEN 'Asturias'
52 |         WHEN '34' THEN 'Palencia'
53 |         WHEN '35' THEN 'Palmas, Las'
54 |         WHEN '36' THEN 'Pontevedra'
55 |         WHEN '37' THEN 'Salamanca'
56 |         WHEN '38' THEN 'Santa Cruz de Tenerife'
57 |         WHEN '39' THEN 'Cantabria'
58 |         WHEN '40' THEN 'Segovia'
59 |         WHEN '41' THEN 'Sevilla'
60 |         WHEN '42' THEN 'Soria'
61 |         WHEN '43' THEN 'Tarragona'
62 |         WHEN '44' THEN 'Teruel'
63 |         WHEN '45' THEN 'Toledo'
64 |         WHEN '46' THEN 'Valencia/València'
65 |         WHEN '47' THEN 'Valladolid'
66 |         WHEN '48' THEN 'Bizkaia'
67 |         WHEN '49' THEN 'Zamora'
68 |         WHEN '50' THEN 'Zaragoza'
69 |         WHEN '51' THEN 'Ceuta'
70 |         WHEN '52' THEN 'Melilla'
71 |         END AS INE_PROV_NAME_ENUM)
72 |         AS residencia_nombre,
73 |     CAST (renta AS INCOME_ENUM) AS renta,
74 |     CAST (CASE edad
75 |         WHEN 'NA' THEN NULL
76 |         WHEN '0-25' THEN '0-25'
77 |         WHEN '25-45' THEN '25-45'
78 |         WHEN '45-65' THEN '45-65'
79 |         WHEN '65-100' THEN '65-100'
80 |         ELSE NULL
81 |         END AS AGE_ENUM)
82 |         AS edad,
83 |     CAST (CASE sexo
84 |         WHEN 'NA' THEN NULL
85 |         ELSE sexo
86 |         END AS SEX_ENUM)
87 |         AS sexo,
88 |     viajes,
89 |     viajes_km,
90 |     CAST(year AS INTEGER) AS ano,
91 |     CAST(month AS INTEGER) AS mes,
92 |     CAST(day AS INTEGER) AS dia
93 |     FROM od_csv_raw;
94 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-gau-raw-csv-view.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW od_csv_raw AS SELECT *
 2 |     /* csv_folder needs to be replaced with a valid path
 3 |     in R use glue::glue() */
 4 |     FROM read_csv_auto('{csv_folder}**/*.csv.gz',
 5 |         delim='|',
 6 |         header=TRUE,
 7 |         hive_partitioning=TRUE,
 8 |         columns={{
 9 |             'fecha': 'DATE',
10 |             'periodo': 'INTEGER',
11 |             'origen': 'VARCHAR',
12 |             'destino': 'VARCHAR',
13 |             'distancia': 'VARCHAR',
14 |             'actividad_origen': 'VARCHAR',
15 |             'actividad_destino': 'VARCHAR',
16 |             'estudio_origen_posible': 'VARCHAR',
17 |             'estudio_destino_posible': 'VARCHAR',
18 |             'residencia': 'VARCHAR',
19 |             'renta': 'VARCHAR',
20 |             'edad': 'VARCHAR',
21 |             'sexo': 'VARCHAR',
22 |             'viajes': 'DOUBLE',
23 |             'viajes_km': 'DOUBLE'
24 |     }},
25 |     dateformat='%Y%m%d');
26 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-municipios-clean-csv-view-en.sql:
--------------------------------------------------------------------------------
  1 | CREATE VIEW od_csv_clean AS SELECT
  2 |     fecha AS date,
  3 |     periodo AS hour,
  4 |     CAST (CASE origen
  5 |         WHEN 'externo' THEN 'external'
  6 |         ELSE origen
  7 |         END AS ZONES_ENUM)
  8 |         AS id_origin,
  9 |     CAST (CASE destino
 10 |         WHEN 'externo' THEN 'external'
 11 |         ELSE destino
 12 |         END AS ZONES_ENUM)
 13 |         AS id_destination,
 14 |     CAST (distancia AS DISTANCE_ENUM) AS distance,
 15 |     CAST (CASE actividad_origen
 16 |         WHEN 'casa' THEN 'home'
 17 |         WHEN 'frecuente' THEN 'frequent_activity'
 18 |         WHEN 'no_frecuente' THEN 'infrequent_activity'
 19 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
 20 |         END AS ACTIV_ENUM)
 21 |         AS activity_origin,
 22 |     CAST (CASE actividad_destino
 23 |         WHEN 'casa' THEN 'home'
 24 |         WHEN 'frecuente' THEN 'frequent_activity'
 25 |         WHEN 'no_frecuente' THEN 'infrequent_activity'
 26 |         WHEN 'trabajo_estudio' THEN 'work_or_study'
 27 |         END AS ACTIV_ENUM)
 28 |         AS activity_destination,
 29 |     CASE estudio_origen_posible
 30 |         WHEN 'si' THEN TRUE
 31 |         WHEN 'no' THEN FALSE
 32 |         END AS study_possible_origin,
 33 |     CASE estudio_destino_posible
 34 |         WHEN 'si' THEN TRUE
 35 |         WHEN 'no' THEN FALSE
 36 |         END AS study_possible_destination,
 37 |     CAST(residencia AS INE_PROV_CODE_ENUM) AS residence_province_ine_code,
 38 |     CAST (CASE residencia
 39 |         WHEN '01' THEN 'Araba/Álava'
 40 |         WHEN '02' THEN 'Albacete'
 41 |         WHEN '03' THEN 'Alicante/Alacant'
 42 |         WHEN '04' THEN 'Almería'
 43 |         WHEN '05' THEN 'Ávila'
 44 |         WHEN '06' THEN 'Badajoz'
 45 |         WHEN '07' THEN 'Balears, Illes'
 46 |         WHEN '08' THEN 'Barcelona'
 47 |         WHEN '09' THEN 'Burgos'
 48 |         WHEN '10' THEN 'Cáceres'
 49 |         WHEN '11' THEN 'Cádiz'
 50 |         WHEN '12' THEN 'Castellón/Castelló'
 51 |         WHEN '13' THEN 'Ciudad Real'
 52 |         WHEN '14' THEN 'Córdoba'
 53 |         WHEN '15' THEN 'Coruña, A'
 54 |         WHEN '16' THEN 'Cuenca'
 55 |         WHEN '17' THEN 'Girona'
 56 |         WHEN '18' THEN 'Granada'
 57 |         WHEN '19' THEN 'Guadalajara'
 58 |         WHEN '20' THEN 'Gipuzkoa'
 59 |         WHEN '21' THEN 'Huelva'
 60 |         WHEN '22' THEN 'Huesca'
 61 |         WHEN '23' THEN 'Jaén'
 62 |         WHEN '24' THEN 'León'
 63 |         WHEN '25' THEN 'Lleida'
 64 |         WHEN '26' THEN 'Rioja, La'
 65 |         WHEN '27' THEN 'Lugo'
 66 |         WHEN '28' THEN 'Madrid'
 67 |         WHEN '29' THEN 'Málaga'
 68 |         WHEN '30' THEN 'Murcia'
 69 |         WHEN '31' THEN 'Navarra'
 70 |         WHEN '32' THEN 'Ourense'
 71 |         WHEN '33' THEN 'Asturias'
 72 |         WHEN '34' THEN 'Palencia'
 73 |         WHEN '35' THEN 'Palmas, Las'
 74 |         WHEN '36' THEN 'Pontevedra'
 75 |         WHEN '37' THEN 'Salamanca'
 76 |         WHEN '38' THEN 'Santa Cruz de Tenerife'
 77 |         WHEN '39' THEN 'Cantabria'
 78 |         WHEN '40' THEN 'Segovia'
 79 |         WHEN '41' THEN 'Sevilla'
 80 |         WHEN '42' THEN 'Soria'
 81 |         WHEN '43' THEN 'Tarragona'
 82 |         WHEN '44' THEN 'Teruel'
 83 |         WHEN '45' THEN 'Toledo'
 84 |         WHEN '46' THEN 'Valencia/València'
 85 |         WHEN '47' THEN 'Valladolid'
 86 |         WHEN '48' THEN 'Bizkaia'
 87 |         WHEN '49' THEN 'Zamora'
 88 |         WHEN '50' THEN 'Zaragoza'
 89 |         WHEN '51' THEN 'Ceuta'
 90 |         WHEN '52' THEN 'Melilla'
 91 |         END AS INE_PROV_NAME_ENUM)
 92 |         AS residence_province_name,
 93 |     CAST (renta AS INCOME_ENUM) AS income,
 94 |     CAST (CASE edad
 95 |         WHEN 'NA' THEN NULL
 96 |         WHEN '0-25' THEN '0-25'
 97 |         WHEN '25-45' THEN '25-45'
 98 |         WHEN '45-65' THEN '45-65'
 99 |         WHEN '65-100' THEN '65-100'
100 |         ELSE NULL
101 |         END AS AGE_ENUM)
102 |         AS age,
103 |     CAST (CASE sexo
104 |         WHEN 'NA' THEN NULL
105 |         WHEN 'mujer' THEN 'female'
106 |         WHEN 'hombre' THEN 'male'
107 |         END AS SEX_ENUM)
108 |         AS sex,
109 |     viajes AS n_trips,
110 |     viajes_km AS trips_total_length_km,
111 |     CAST(year AS INTEGER) AS year,
112 |     CAST(month AS INTEGER) AS month,
113 |     CAST(day AS INTEGER) AS day
114 |     FROM od_csv_raw;
115 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-municipios-clean-csv-view-es.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW od_csv_clean AS SELECT
 2 |     fecha,
 3 |     periodo,
 4 |     CAST (origen AS ZONES_ENUM) AS origen,
 5 |     CAST (destino AS ZONES_ENUM) AS destino,
 6 |     CAST (distancia AS DISTANCE_ENUM) AS distancia,
 7 |     CAST (actividad_origen AS ACTIV_ENUM) AS actividad_origen,
 8 |     CAST (actividad_destino AS ACTIV_ENUM) AS actividad_destino,
 9 |     CASE estudio_origen_posible
10 |         WHEN 'si' THEN TRUE
11 |         WHEN 'no' THEN FALSE
12 |         END AS estudio_origen_posible,
13 |     CASE estudio_destino_posible
14 |         WHEN 'si' THEN TRUE
15 |         WHEN 'no' THEN FALSE
16 |         END AS estudio_destino_posible,
17 |     CAST(residencia AS INE_PROV_CODE_ENUM) AS residencia,
18 |     CAST (CASE residencia
19 |         WHEN '01' THEN 'Araba/Álava'
20 |         WHEN '02' THEN 'Albacete'
21 |         WHEN '03' THEN 'Alicante/Alacant'
22 |         WHEN '04' THEN 'Almería'
23 |         WHEN '05' THEN 'Ávila'
24 |         WHEN '06' THEN 'Badajoz'
25 |         WHEN '07' THEN 'Balears, Illes'
26 |         WHEN '08' THEN 'Barcelona'
27 |         WHEN '09' THEN 'Burgos'
28 |         WHEN '10' THEN 'Cáceres'
29 |         WHEN '11' THEN 'Cádiz'
30 |         WHEN '12' THEN 'Castellón/Castelló'
31 |         WHEN '13' THEN 'Ciudad Real'
32 |         WHEN '14' THEN 'Córdoba'
33 |         WHEN '15' THEN 'Coruña, A'
34 |         WHEN '16' THEN 'Cuenca'
35 |         WHEN '17' THEN 'Girona'
36 |         WHEN '18' THEN 'Granada'
37 |         WHEN '19' THEN 'Guadalajara'
38 |         WHEN '20' THEN 'Gipuzkoa'
39 |         WHEN '21' THEN 'Huelva'
40 |         WHEN '22' THEN 'Huesca'
41 |         WHEN '23' THEN 'Jaén'
42 |         WHEN '24' THEN 'León'
43 |         WHEN '25' THEN 'Lleida'
44 |         WHEN '26' THEN 'Rioja, La'
45 |         WHEN '27' THEN 'Lugo'
46 |         WHEN '28' THEN 'Madrid'
47 |         WHEN '29' THEN 'Málaga'
48 |         WHEN '30' THEN 'Murcia'
49 |         WHEN '31' THEN 'Navarra'
50 |         WHEN '32' THEN 'Ourense'
51 |         WHEN '33' THEN 'Asturias'
52 |         WHEN '34' THEN 'Palencia'
53 |         WHEN '35' THEN 'Palmas, Las'
54 |         WHEN '36' THEN 'Pontevedra'
55 |         WHEN '37' THEN 'Salamanca'
56 |         WHEN '38' THEN 'Santa Cruz de Tenerife'
57 |         WHEN '39' THEN 'Cantabria'
58 |         WHEN '40' THEN 'Segovia'
59 |         WHEN '41' THEN 'Sevilla'
60 |         WHEN '42' THEN 'Soria'
61 |         WHEN '43' THEN 'Tarragona'
62 |         WHEN '44' THEN 'Teruel'
63 |         WHEN '45' THEN 'Toledo'
64 |         WHEN '46' THEN 'Valencia/València'
65 |         WHEN '47' THEN 'Valladolid'
66 |         WHEN '48' THEN 'Bizkaia'
67 |         WHEN '49' THEN 'Zamora'
68 |         WHEN '50' THEN 'Zaragoza'
69 |         WHEN '51' THEN 'Ceuta'
70 |         WHEN '52' THEN 'Melilla'
71 |         END AS INE_PROV_NAME_ENUM)
72 |         AS residencia_nombre,
73 |     CAST (renta AS INCOME_ENUM) AS renta,
74 |     CAST (CASE edad
75 |         WHEN 'NA' THEN NULL
76 |         WHEN '0-25' THEN '0-25'
77 |         WHEN '25-45' THEN '25-45'
78 |         WHEN '45-65' THEN '45-65'
79 |         WHEN '65-100' THEN '65-100'
80 |         ELSE NULL
81 |         END AS AGE_ENUM)
82 |         AS edad,
83 |     CAST (CASE sexo
84 |         WHEN 'NA' THEN NULL
85 |         ELSE sexo
86 |         END AS SEX_ENUM)
87 |         AS sexo,
88 |     viajes,
89 |     viajes_km,
90 |     CAST(year AS INTEGER) AS ano,
91 |     CAST(month AS INTEGER) AS mes,
92 |     CAST(day AS INTEGER) AS dia
93 |     FROM od_csv_raw;
94 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-od-municipios-raw-csv-view.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW od_csv_raw AS SELECT *
 2 |     /* csv_folder needs to be replaced with a valid path
 3 |     in R use glue::glue() */
 4 |     FROM read_csv_auto('{csv_folder}**/*.csv.gz',
 5 |         delim='|',
 6 |         header=TRUE,
 7 |         hive_partitioning=TRUE,
 8 |         columns={{
 9 |             'fecha': 'DATE',
10 |             'periodo': 'INTEGER',
11 |             'origen': 'VARCHAR',
12 |             'destino': 'VARCHAR',
13 |             'distancia': 'VARCHAR',
14 |             'actividad_origen': 'VARCHAR',
15 |             'actividad_destino': 'VARCHAR',
16 |             'estudio_origen_posible': 'VARCHAR',
17 |             'estudio_destino_posible': 'VARCHAR',
18 |             'residencia': 'VARCHAR',
19 |             'renta': 'VARCHAR',
20 |             'edad': 'VARCHAR',
21 |             'sexo': 'VARCHAR',
22 |             'viajes': 'DOUBLE',
23 |             'viajes_km': 'DOUBLE'
24 |     }},
25 |     dateformat='%Y%m%d');
26 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-os-distritos-clean-csv-view-en.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW os_csv_clean AS SELECT
 2 |     fecha AS date,
 3 |     CAST(zona_residencia AS RESID_ZONES_ENUM) AS id_residence,
 4 |     CAST(zona_pernoctacion AS OVERNIGHT_ZONES_ENUM) AS id_overnight_stay,
 5 |     personas AS n_persons,
 6 |     CAST(year AS INTEGER) AS year,
 7 |     CAST(month AS INTEGER) AS month,
 8 |     CAST(day AS INTEGER) AS day
 9 | FROM os_csv_raw;
10 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-os-distritos-clean-csv-view-es.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW os_csv_clean AS SELECT
 2 |     fecha,
 3 |     CAST(zona_residencia AS RESID_ZONES_ENUM) AS zona_residencia,
 4 |     CAST(zona_pernoctacion AS OVERNIGHT_ZONES_ENUM) AS zona_pernoctacion,
 5 |     personas,
 6 |     CAST(year AS INTEGER) AS ano,
 7 |     CAST(month AS INTEGER) AS mes,
 8 |     CAST(day AS INTEGER) AS dia
 9 | FROM os_csv_raw;
10 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-os-distritos-raw-csv-view.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW os_csv_raw AS SELECT *
 2 |     /* csv_folder needs to be replaced with a valid path
 3 |     in R use glue::glue() */
 4 |     FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE,
 5 |     columns={{
 6 |     'fecha': 'DATE',
 7 |     'zona_residencia': 'VARCHAR',
 8 |     'zona_pernoctacion': 'VARCHAR',
 9 |     'personas': 'DOUBLE'
10 |     }},
11 |     dateformat='%Y%m%d');
12 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-os-gau-clean-csv-view-en.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW os_csv_clean AS SELECT
 2 |     fecha AS date,
 3 |     CAST(zona_residencia AS RESID_ZONES_ENUM) AS id_residence,
 4 |     CAST(zona_pernoctacion AS OVERNIGHT_ZONES_ENUM) AS id_overnight_stay,
 5 |     personas AS n_persons,
 6 |     CAST(year AS INTEGER) AS year,
 7 |     CAST(month AS INTEGER) AS month,
 8 |     CAST(day AS INTEGER) AS day
 9 | FROM os_csv_raw;
10 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-os-gau-clean-csv-view-es.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW os_csv_clean AS SELECT
 2 |     fecha,
 3 |     CAST(zona_residencia AS RESID_ZONES_ENUM) AS zona_residencia,
 4 |     CAST(zona_pernoctacion AS OVERNIGHT_ZONES_ENUM) AS zona_pernoctacion,
 5 |     personas,
 6 |     CAST(year AS INTEGER) AS ano,
 7 |     CAST(month AS INTEGER) AS mes,
 8 |     CAST(day AS INTEGER) AS dia
 9 | FROM os_csv_raw;
10 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-os-gau-raw-csv-view.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW os_csv_raw AS SELECT *
 2 |     /* csv_folder needs to be replaced with a valid path
 3 |     in R use glue::glue() */
 4 |     FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE,
 5 |     columns={{
 6 |     'fecha': 'DATE',
 7 |     'zona_residencia': 'VARCHAR',
 8 |     'zona_pernoctacion': 'VARCHAR',
 9 |     'personas': 'DOUBLE'
10 |     }},
11 |     dateformat='%Y%m%d');
12 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-os-municipios-clean-csv-view-en.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW os_csv_clean AS SELECT
 2 |     fecha AS date,
 3 |     CAST(zona_residencia AS RESID_ZONES_ENUM) AS id_residence,
 4 |     CAST(zona_pernoctacion AS OVERNIGHT_ZONES_ENUM) AS id_overnight_stay,
 5 |     personas AS n_persons,
 6 |     CAST(year AS INTEGER) AS year,
 7 |     CAST(month AS INTEGER) AS month,
 8 |     CAST(day AS INTEGER) AS day
 9 | FROM os_csv_raw;
10 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-os-municipios-clean-csv-view-es.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW os_csv_clean AS SELECT
 2 |     fecha,
 3 |     CAST(zona_residencia AS RESID_ZONES_ENUM) AS zona_residencia,
 4 |     CAST(zona_pernoctacion AS OVERNIGHT_ZONES_ENUM) AS zona_pernoctacion,
 5 |     personas,
 6 |     CAST(year AS INTEGER) AS ano,
 7 |     CAST(month AS INTEGER) AS mes,
 8 |     CAST(day AS INTEGER) AS dia
 9 | FROM os_csv_raw;
10 | 


--------------------------------------------------------------------------------
/inst/extdata/sql-queries/v2-os-municipios-raw-csv-view.sql:
--------------------------------------------------------------------------------
 1 | CREATE VIEW os_csv_raw AS SELECT *
 2 |     /* csv_folder needs to be replaced with a valid path
 3 |     in R use glue::glue() */
 4 |     FROM read_csv_auto('{csv_folder}**/*.csv.gz', delim='|', header=TRUE, hive_partitioning=TRUE,
 5 |     columns={{
 6 |     'fecha': 'DATE',
 7 |     'zona_residencia': 'VARCHAR',
 8 |     'zona_pernoctacion': 'VARCHAR',
 9 |     'personas': 'DOUBLE'
10 |     }},
11 |     dateformat='%Y%m%d');
12 | 


--------------------------------------------------------------------------------
/inst/extdata/url_file_sizes_v1.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/inst/extdata/url_file_sizes_v1.txt.gz


--------------------------------------------------------------------------------
/inst/extdata/url_file_sizes_v2.txt.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/inst/extdata/url_file_sizes_v2.txt.gz


--------------------------------------------------------------------------------
/inst/schemaorg.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "@context": "https://schema.org",
 3 |   "@graph": [
 4 |     {
 5 |       "type": "SoftwareSourceCode",
 6 |       "author": [
 7 |         {
 8 |           "id": "https://orcid.org/0000-0001-6690-5345",
 9 |           "type": "Person",
10 |           "email": "kotov.egor@gmail.com",
11 |           "familyName": "Kotov",
12 |           "givenName": "Egor"
13 |         },
14 |         {
15 |           "id": "https://orcid.org/0000-0001-5679-6536",
16 |           "type": "Person",
17 |           "email": "rob00x@gmail.com",
18 |           "familyName": "Lovelace",
19 |           "givenName": "Robin"
20 |         }
21 |       ],
22 |       "codeRepository": "https://github.com/rOpenSpain/spanishoddata",
23 |       "contributor": {
24 |         "id": "https://orcid.org/0000-0001-5199-4103",
25 |         "type": "Person",
26 |         "familyName": "Vidal-Tortosa",
27 |         "givenName": "Eugeni"
28 |       },
29 |       "description": "Gain seamless access to origin-destination (OD) data from the Spanish Ministry of Transport, hosted at <https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/opendata-movilidad>. This package simplifies the management of these large datasets by providing tools to download zone boundaries, handle associated origin-destination data, and process it efficiently with the 'duckdb' database interface. Local caching minimizes repeated downloads, streamlining workflows for researchers and analysts. Extensive documentation is available at <https://ropenspain.github.io/spanishoddata/index.html>, offering guides on creating static and dynamic mobility flow visualizations and transforming large datasets into analysis-ready formats.",
30 |       "license": "https://spdx.org/licenses/MIT",
31 |       "name": "spanishoddata: Get Spanish Origin-Destination Data",
32 |       "programmingLanguage": {
33 |         "type": "ComputerLanguage",
34 |         "name": "R",
35 |         "url": "https://r-project.org"
36 |       },
37 |       "provider": {
38 |         "id": "https://cran.r-project.org",
39 |         "type": "Organization",
40 |         "name": "Comprehensive R Archive Network (CRAN)",
41 |         "url": "https://cran.r-project.org"
42 |       },
43 |       "runtimePlatform": "R version 4.4.3 (2025-02-28)",
44 |       "version": "0.1.1"
45 |     },
46 |     {
47 |       "type": "SoftwareSourceCode",
48 |       "author": {
49 |         "type": "Organization",
50 |         "name": "Ministerio de Transportes y Movilidad Sostenible (MITMS)"
51 |       },
52 |       "name": "Estudio de movilidad de viajeros de ámbito nacional aplicando la tecnología Big Data. Informe metodológico (Study of National Traveler mobility Using Big Data Technology. Methodological Report)"
53 |     },
54 |     {
55 |       "type": "SoftwareSourceCode",
56 |       "author": {
57 |         "type": "Organization",
58 |         "name": "Ministerio de Transportes, Movilidad y Agenda Urbana (MITMA)"
59 |       },
60 |       "name": "Análisis de la movilidad en España con tecnología Big Data durante el estado de alarma para la gestión de la crisis del COVID-19 (Analysis of mobility in Spain with Big Data technology during the state of alarm for COVID-19 crisis management)"
61 |     },
62 |     {
63 |       "id": "https://doi.org/10.32614/CRAN.package.spanishoddata",
64 |       "type": "SoftwareSourceCode",
65 |       "author": [
66 |         {
67 |           "id": "https://orcid.org/0000-0001-6690-5345",
68 |           "type": "Person",
69 |           "email": "kotov.egor@gmail.com",
70 |           "familyName": "Kotov",
71 |           "givenName": "Egor"
72 |         },
73 |         {
74 |           "id": "https://orcid.org/0000-0001-5679-6536",
75 |           "type": "Person",
76 |           "email": "rob00x@gmail.com",
77 |           "familyName": "Lovelace",
78 |           "givenName": "Robin"
79 |         }
80 |       ],
81 |       "name": "spanishoddata"
82 |     }
83 |   ]
84 | }
85 | 


--------------------------------------------------------------------------------
/inst/vignette-include/csv-date-filter-note.qmd:
--------------------------------------------------------------------------------
1 | 
2 | ::: callout-note
3 | 
4 | As long as you use a table connection object created with `spod_get()` function, it is much quicker to filter the dates by the `year`, `month` and `day` variables, rather than by the `date` variable. This is because the data for each day is in a separate CSV file located in folders that look like `year=2020/month=2/day=14`. So when filtering by the `date` field, R will have to scan all CSV files comparing the specified date with what is stored inside each CSV file. However, if you query by `year`, `month` and `day` variables, R only needs to check these against the path to each CSV file, which is much quicker. This caveat is only relevant as long as you use `spod_get()` . If you convert (see the [relevant vignette](convert.qmd)) the downloaded data to a format that it optimized for quick analysis, you can use whichever field you want, it should not affect the performance.
5 | 
6 | :::
7 | 


--------------------------------------------------------------------------------
/inst/vignette-include/install-package.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | execute: 
 3 |   eval: false
 4 | ---
 5 | 
 6 | 
 7 | ## Install the package {#install-package}
 8 | 
 9 | Install from CRAN:
10 | 
11 | ```{r}
12 | install.packages("spanishoddata")
13 | ```
14 | 
15 | 
16 | 
17 | <details><summary>Alternative installation and developemnt</summary>
18 | 
19 | You can also install the latest development version of the package from rOpenSpain R universe:
20 | 
21 | ```{r}
22 | install.packages("spanishoddata",
23 |   repos = c("https://ropenspain.r-universe.dev",
24 |     "https://cloud.r-project.org"))
25 | ```
26 | 
27 | 
28 | Alternative way to install the development version from GitHub:
29 | 
30 | ```{r}
31 | if (!require("remotes")) install.packages("remotes")
32 | 
33 | remotes::install_github("rOpenSpain/spanishoddata",
34 |   force = TRUE, dependencies = TRUE)
35 | ```
36 | 
37 | **For Developers**
38 | 
39 | To load the package locally, clone it and navigate to the root of the package in the terminal, e.g. with the following:
40 | 
41 | ```bash
42 | gh repo clone rOpenSpain/spanishoddata
43 | code spanishoddata
44 | # with rstudio:
45 | rstudio spanishoddata/spanishoddata.Rproj
46 | ```
47 | 
48 | Then run the following command from the R console:
49 | 
50 | ```{r}
51 | #| eval: false
52 | devtools::load_all()
53 | ```
54 | 
55 | </details>
56 | 


--------------------------------------------------------------------------------
/inst/vignette-include/missing-dates-outages.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | execute: 
 3 |   eval: false
 4 | ---
 5 | 
 6 | 
 7 | ::: callout-warning
 8 | 
 9 | 
10 | Due to mobile network outages, the data on certain dates is missing. Kindly keep this in mind when calculating mean monthly or weekly flows.
11 | 
12 | \
13 | 
14 | Please check the [original data page](https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/opendata-movilidad){target="_blank"} for currently known missing dates. At the time of writing, the following dates are missing: 26, 27, 30, 31 October; 1, 2 and 3 November 2023; 4, 18, 19 April 2024, 10 and 11 November 2024. You can use `spod_get_valid_dates()` function to get all available dates.
15 | 
16 | 
17 | :::
18 | 


--------------------------------------------------------------------------------
/inst/vignette-include/overall-approach.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | execute: 
 3 |   eval: false
 4 | ---
 5 | 
 6 | 
 7 | ## Overall approach to accessing the data
 8 | 
 9 | If you only need flows data aggregated by day at municipal level, you can use the `spod_quick_get_od()` function. This will download the data directly from the web API and let you analyse it in-memory. More on this in the [Quickly get daily data](https://ropenspain.github.io/spanishoddata/articles/quick-get.html) vignette.
10 | 
11 | If you only want to analyse the data for a few days, you can use the `spod_get()` function. It will download the raw data in CSV format and let you analyse it in-memory. This is what we cover in the steps on this page.
12 | 
13 | If you need longer periods (several months or years), you should use the `spod_convert()` and `spod_connect()` functions, which will convert the data into special format which is much faster for analysis, for this see the [Download and convert OD datasets](https://ropenspain.github.io/spanishoddata/articles/convert.html) vignette. `spod_get_zones()` will give you spatial data with zones that can be matched with the origin-destination flows from the functions above using zones 'id's. Please see a simple example below, and also consult the vignettes with detailed data description and instructions in the package vignettes with `spod_codebook(ver = 1)` and `spod_codebook(ver = 2)`, or simply visit the package website at [https://ropenspain.github.io/spanishoddata/](https://ropenspain.github.io/spanishoddata/). The @fig-overall-flow presents the overall approach to accessing the data in the `spanishoddata` package.
14 | 


--------------------------------------------------------------------------------
/inst/vignette-include/setup-data-directory.qmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | execute: 
 3 |   eval: false
 4 | ---
 5 | 
 6 | 
 7 | ## Set the data directory {#set-data-folder}
 8 | 
 9 | Choose where `{spanishoddata}` should download (and convert) the data by setting the data directory following command:
10 | 
11 | ```{r}
12 | spod_set_data_dir(data_dir = "~/spanish_od_data")
13 | ```
14 | 
15 | The function above will also ensure that the directory is created and that you have sufficient permissions to write to it.
16 | 
17 | <details><summary>Setting data directory for advanced users</summary>
18 | 
19 | You can also set the data directory with an environment variable:
20 | 
21 | ```{r}
22 | Sys.setenv(SPANISH_OD_DATA_DIR = "~/spanish_od_data")
23 | ```
24 | 
25 | The package will create this directory if it does not exist on the first run of any function that downloads the data.
26 | 
27 | To permanently set the directory for all projects, you can specify the data directory globally by setting the `SPANISH_OD_DATA_DIR` environment variable, e.g. with the following command:
28 | 
29 | ```{r}
30 | #| eval: false
31 | usethis::edit_r_environ()
32 | # Then set the data directory globally, by typing this line in the file:
33 | ```
34 | 
35 | ```
36 | SPANISH_OD_DATA_DIR = "~/spanish_od_data"
37 | ```
38 | 
39 | You can also set the data directory locally, just for the current project. Set the 'envar' in the working directory by editing `.Renviron` file in the root of the project:
40 | 
41 | ```{r}
42 | #| eval: false
43 | file.edit(".Renviron")
44 | ```
45 | 
46 | 
47 | </details>
48 | 


--------------------------------------------------------------------------------
/man/figures/README-desire-lines-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/README-desire-lines-1.png


--------------------------------------------------------------------------------
/man/figures/README-distritos-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/README-distritos-1.png


--------------------------------------------------------------------------------
/man/figures/README-salamanca-plot-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/README-salamanca-plot-1.png


--------------------------------------------------------------------------------
/man/figures/README-salamanca-zones-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/README-salamanca-zones-1.png


--------------------------------------------------------------------------------
/man/figures/README-trips-per-hour-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/README-trips-per-hour-1.png


--------------------------------------------------------------------------------
/man/figures/card.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/card.png


--------------------------------------------------------------------------------
/man/figures/flowmapblue-animated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/flowmapblue-animated.png


--------------------------------------------------------------------------------
/man/figures/flowmapblue-standard-01.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/flowmapblue-standard-01.png


--------------------------------------------------------------------------------
/man/figures/flowmapblue-standard-02.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/flowmapblue-standard-02.png


--------------------------------------------------------------------------------
/man/figures/flowmapblue-standard-time.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/flowmapblue-standard-time.png


--------------------------------------------------------------------------------
/man/figures/flows_plot_all_districts.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/flows_plot_all_districts.png


--------------------------------------------------------------------------------
/man/figures/flows_plot_barcelona.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/flows_plot_barcelona.png


--------------------------------------------------------------------------------
/man/figures/lifecycle-deprecated.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="126" height="20" role="img" aria-label="lifecycle: deprecated">
 2 |     <title>lifecycle: deprecated</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="126" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="71" height="20" fill="#fe7d37" />
13 |         <rect width="126" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="895" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="610">deprecated</text>
19 |         <text x="895" y="140" transform="scale(.1)" fill="#fff" textLength="610">deprecated</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-experimental.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="138" height="20" role="img" aria-label="lifecycle: experimental">
 2 |     <title>lifecycle: experimental</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="138" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="83" height="20" fill="#fe7d37" />
13 |         <rect width="138" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="955" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="730">experimental</text>
19 |         <text x="955" y="140" transform="scale(.1)" fill="#fff" textLength="730">experimental</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-stable.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="98" height="20" role="img" aria-label="lifecycle: stable">
 2 |     <title>lifecycle: stable</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="98" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="43" height="20" fill="#4c1" />
13 |         <rect width="98" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">
17 |       lifecycle
18 |     </text>
19 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">
20 |       lifecycle
21 |     </text>
22 |         <text aria-hidden="true" x="755" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">
23 |       stable
24 |     </text>
25 |         <text x="755" y="140" transform="scale(.1)" fill="#fff" textLength="330">
26 |       stable
27 |     </text>
28 |     </g>
29 | </svg>
30 | 


--------------------------------------------------------------------------------
/man/figures/lifecycle-superseded.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="128" height="20" role="img" aria-label="lifecycle: superseded">
 2 |     <title>lifecycle: superseded</title>
 3 |     <linearGradient id="s" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1" />
 5 |         <stop offset="1" stop-opacity=".1" />
 6 |     </linearGradient>
 7 |     <clipPath id="r">
 8 |         <rect width="128" height="20" rx="3" fill="#fff" />
 9 |     </clipPath>
10 |     <g clip-path="url(#r)">
11 |         <rect width="55" height="20" fill="#555" />
12 |         <rect x="55" width="73" height="20" fill="#007ec6" />
13 |         <rect width="128" height="20" fill="url(#s)" />
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110">
16 |         <text aria-hidden="true" x="285" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">lifecycle</text>
17 |         <text x="285" y="140" transform="scale(.1)" fill="#fff" textLength="450">lifecycle</text>
18 |         <text aria-hidden="true" x="905" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="630">superseded</text>
19 |         <text x="905" y="140" transform="scale(.1)" fill="#fff" textLength="630">superseded</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/man/figures/logo-before-hex.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/logo-before-hex.png


--------------------------------------------------------------------------------
/man/figures/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/logo.png


--------------------------------------------------------------------------------
/man/figures/zones_barcelona_fua_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/zones_barcelona_fua_plot.png


--------------------------------------------------------------------------------
/man/figures/zones_barcelona_plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/man/figures/zones_barcelona_plot.png


--------------------------------------------------------------------------------
/man/global_quiet_param.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/global-params.R
 3 | \name{global_quiet_param}
 4 | \alias{global_quiet_param}
 5 | \title{Global Quiet Parameter}
 6 | \usage{
 7 | global_quiet_param(quiet = FALSE)
 8 | }
 9 | \arguments{
10 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.}
11 | }
12 | \value{
13 | Nothing. This function is just a placeholder for global quiet parameter.
14 | }
15 | \description{
16 | Documentation for the \code{quiet} parameter, used globally.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spanishoddata-package.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/spanishoddata-package.R
 3 | \docType{package}
 4 | \name{spanishoddata-package}
 5 | \alias{spanishoddata}
 6 | \alias{spanishoddata-package}
 7 | \title{spanishoddata: Get Spanish Origin-Destination Data}
 8 | \description{
 9 | \if{html}{\figure{logo.png}{options: style='float: right' alt='logo' width='120'}}
10 | 
11 | Gain seamless access to origin-destination (OD) data from the Spanish Ministry of Transport, hosted at \url{https://www.transportes.gob.es/ministerio/proyectos-singulares/estudios-de-movilidad-con-big-data/opendata-movilidad}. This package simplifies the management of these large datasets by providing tools to download zone boundaries, handle associated origin-destination data, and process it efficiently with the 'duckdb' database interface. Local caching minimizes repeated downloads, streamlining workflows for researchers and analysts. Extensive documentation is available at \url{https://ropenspain.github.io/spanishoddata/index.html}, offering guides on creating static and dynamic mobility flow visualizations and transforming large datasets into analysis-ready formats.
12 | }
13 | \seealso{
14 | Useful links:
15 | \itemize{
16 |   \item \url{https://rOpenSpain.github.io/spanishoddata/}
17 |   \item \url{https://github.com/rOpenSpain/spanishoddata}
18 |   \item Report bugs at \url{https://github.com/rOpenSpain/spanishoddata/issues}
19 | }
20 | 
21 | }
22 | \author{
23 | \strong{Maintainer}: Egor Kotov \email{kotov.egor@gmail.com} (\href{https://orcid.org/0000-0001-6690-5345}{ORCID})
24 | 
25 | Authors:
26 | \itemize{
27 |   \item Robin Lovelace \email{rob00x@gmail.com} (\href{https://orcid.org/0000-0001-5679-6536}{ORCID})
28 | }
29 | 
30 | Other contributors:
31 | \itemize{
32 |   \item Eugeni Vidal-Tortosa (\href{https://orcid.org/0000-0001-5199-4103}{ORCID}) [contributor]
33 | }
34 | 
35 | }
36 | \keyword{internal}
37 | 


--------------------------------------------------------------------------------
/man/spod_available_data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/available-data.R
 3 | \name{spod_available_data}
 4 | \alias{spod_available_data}
 5 | \title{Get available data list}
 6 | \usage{
 7 | spod_available_data(
 8 |   ver = 2,
 9 |   check_local_files = FALSE,
10 |   quiet = FALSE,
11 |   data_dir = spod_get_data_dir()
12 | )
13 | }
14 | \arguments{
15 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.}
16 | 
17 | \item{check_local_files}{Whether to check if the local files exist. Defaults to \code{FALSE}.}
18 | 
19 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.}
20 | 
21 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.}
22 | }
23 | \value{
24 | A tibble with links, release dates of files in the data, dates of data coverage, local paths to files, and the download status.
25 | \describe{
26 | \item{target_url}{\code{character}. The URL link to the data file.}
27 | \item{pub_ts}{\code{POSIXct}. The timestamp of when the file was published.}
28 | \item{file_extension}{\code{character}. The file extension of the data file (e.g., 'tar', 'gz').}
29 | \item{data_ym}{\code{Date}. The year and month of the data coverage, if available.}
30 | \item{data_ymd}{\code{Date}. The specific date of the data coverage, if available.}
31 | \item{local_path}{\code{character}. The local file path where the data is stored.}
32 | \item{downloaded}{\code{logical}. Indicator of whether the data file has been downloaded locally. This is only available if \code{check_local_files} is \code{TRUE}.}
33 | }
34 | }
35 | \description{
36 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
37 | 
38 | Get a table with links to available data files for the specified data version. Optionally check (see arguments) if certain files have already been downloaded into the cache directory specified with SPANISH_OD_DATA_DIR environment variable (set by \link{spod_set_data_dir}) or a custom path specified with \code{data_dir} argument.
39 | }
40 | \examples{
41 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
42 | \donttest{
43 | 
44 | # Set data dir for file downloads
45 | spod_set_data_dir(tempdir())
46 | 
47 | # Get available data list for v1 (2020-2021) data
48 | spod_available_data(ver = 1)
49 | 
50 | # Get available data list for v2 (2022 onwards) data
51 | spod_available_data(ver = 2)
52 | 
53 | # Get available data list for v2 (2022 onwards) data
54 | # while also checking for local files that are already downloaded
55 | spod_available_data(ver = 2, check_local_files = TRUE)
56 | }
57 | \dontshow{\}) # examplesIf}
58 | }
59 | 


--------------------------------------------------------------------------------
/man/spod_available_data_v1.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/available-data.R
 3 | \name{spod_available_data_v1}
 4 | \alias{spod_available_data_v1}
 5 | \title{Get the available v1 data list}
 6 | \usage{
 7 | spod_available_data_v1(
 8 |   data_dir = spod_get_data_dir(),
 9 |   check_local_files = FALSE,
10 |   quiet = FALSE
11 | )
12 | }
13 | \arguments{
14 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.}
15 | 
16 | \item{check_local_files}{Whether to check if the local files exist. Defaults to \code{FALSE}.}
17 | 
18 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.}
19 | }
20 | \value{
21 | A tibble with links, release dates of files in the data, dates of data coverage, local paths to files, and the download status.
22 | \describe{
23 | \item{target_url}{\code{character}. The URL link to the data file.}
24 | \item{pub_ts}{\code{POSIXct}. The timestamp of when the file was published.}
25 | \item{file_extension}{\code{character}. The file extension of the data file (e.g., 'tar', 'gz').}
26 | \item{data_ym}{\code{Date}. The year and month of the data coverage, if available.}
27 | \item{data_ymd}{\code{Date}. The specific date of the data coverage, if available.}
28 | \item{local_path}{\code{character}. The local file path where the data is stored.}
29 | \item{downloaded}{\code{logical}. Indicator of whether the data file has been downloaded locally. This is only available if \code{check_local_files} is \code{TRUE}.}
30 | }
31 | }
32 | \description{
33 | This function provides a table of the available data list of MITMA v1 (2020-2021), both remote and local.
34 | }
35 | \keyword{internal}
36 | 


--------------------------------------------------------------------------------
/man/spod_available_data_v2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/available-data.R
 3 | \name{spod_available_data_v2}
 4 | \alias{spod_available_data_v2}
 5 | \title{Get the data dictionary}
 6 | \usage{
 7 | spod_available_data_v2(
 8 |   data_dir = spod_get_data_dir(),
 9 |   check_local_files = FALSE,
10 |   quiet = FALSE
11 | )
12 | }
13 | \arguments{
14 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.}
15 | 
16 | \item{check_local_files}{Whether to check if the local files exist. Defaults to \code{FALSE}.}
17 | 
18 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.}
19 | }
20 | \value{
21 | A tibble with links, release dates of files in the data, dates of data coverage, local paths to files, and the download status.
22 | \describe{
23 | \item{target_url}{\code{character}. The URL link to the data file.}
24 | \item{pub_ts}{\code{POSIXct}. The timestamp of when the file was published.}
25 | \item{file_extension}{\code{character}. The file extension of the data file (e.g., 'tar', 'gz').}
26 | \item{data_ym}{\code{Date}. The year and month of the data coverage, if available.}
27 | \item{data_ymd}{\code{Date}. The specific date of the data coverage, if available.}
28 | \item{local_path}{\code{character}. The local file path where the data is stored.}
29 | \item{downloaded}{\code{logical}. Indicator of whether the data file has been downloaded locally. This is only available if \code{check_local_files} is \code{TRUE}.}
30 | }
31 | }
32 | \description{
33 | This function retrieves the data dictionary for the specified data directory.
34 | }
35 | \keyword{internal}
36 | 


--------------------------------------------------------------------------------
/man/spod_available_ram.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal-utils.R
 3 | \name{spod_available_ram}
 4 | \alias{spod_available_ram}
 5 | \title{Get available RAM}
 6 | \usage{
 7 | spod_available_ram()
 8 | }
 9 | \value{
10 | A \code{numeric} amount of available RAM in GB.
11 | }
12 | \description{
13 | Get available RAM
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/spod_cite.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/cite.R
 3 | \name{spod_cite}
 4 | \alias{spod_cite}
 5 | \title{Cite the package and the data}
 6 | \usage{
 7 | spod_cite(what = "all", format = "all")
 8 | }
 9 | \arguments{
10 | \item{what}{Character vector specifying what to cite.
11 | Can include "package", "data", "methodology_v1", "methodology_v2", or "all".
12 | Default is "all".}
13 | 
14 | \item{format}{Character vector specifying output format(s).
15 | Can include "text", "markdown", "bibtex", or "all".
16 | Default is "all".}
17 | }
18 | \value{
19 | Nothing. Prints citation in plain text, markdown, BibTeX, or all formats at once to console.
20 | }
21 | \description{
22 | Cite the package and the data
23 | }
24 | \examples{
25 | # Cite everything in all formats
26 | \dontrun{
27 |  spod_cite()
28 | }
29 | 
30 | # Cite just the package in BibTeX format
31 | \dontrun{
32 |  spod_cite(what = "package", format = "bibtex")
33 | }
34 | 
35 | # Cite both methodologies in plain text
36 | \dontrun{
37 |  spod_cite(what = c("methodology_v1", "methodology_v2"), format = "text")
38 | }
39 | }
40 | 


--------------------------------------------------------------------------------
/man/spod_clean_zones_v1.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get-zones.R
 3 | \name{spod_clean_zones_v1}
 4 | \alias{spod_clean_zones_v1}
 5 | \title{Fixes common issues in the zones data and cleans up variable names}
 6 | \usage{
 7 | spod_clean_zones_v1(zones_path, zones)
 8 | }
 9 | \arguments{
10 | \item{zones_path}{The path to the zones spatial data file.}
11 | 
12 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}) for both data versions. Additionaly, these can be \code{"large_urban_areas"} (or \code{"lua"}, or the original Spanish \code{"grandes_areas_urbanas"}, or \code{"gau"}) for v2 data (2022 onwards).}
13 | }
14 | \value{
15 | A spatial object containing the cleaned zones data.
16 | }
17 | \description{
18 | This function fixes any invalid geometries in the zones data and renames the "ID" column to "id".
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/spod_clean_zones_v2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get-zones.R
 3 | \name{spod_clean_zones_v2}
 4 | \alias{spod_clean_zones_v2}
 5 | \title{Fixes common issues in the zones data and cleans up variable names}
 6 | \usage{
 7 | spod_clean_zones_v2(zones_path)
 8 | }
 9 | \arguments{
10 | \item{zones_path}{The path to the zones spatial data file.}
11 | }
12 | \value{
13 | A spatial `sf`` object containing the cleaned zones data.
14 | }
15 | \description{
16 | This function fixes any invalid geometries in the zones data and renames the "ID" column to "id". It also attacches the population counts and zone names provided in the csv files supplied by the original data provider.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_codebook.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/codebook.R
 3 | \name{spod_codebook}
 4 | \alias{spod_codebook}
 5 | \title{View codebooks for v1 and v2 open mobility data}
 6 | \usage{
 7 | spod_codebook(ver = 1)
 8 | }
 9 | \arguments{
10 | \item{ver}{An \code{integer} or \code{numeric} value. The version of the data. Defaults to 1. Can be \code{1} for v1 (2020-2021) data and 2 for v2 (2022 onwards) data.}
11 | }
12 | \value{
13 | Nothing, opens vignette if it is installed. If vignette is missing, prints a message with a link to a webpage with the codebook.
14 | }
15 | \description{
16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
17 | 
18 | Opens relevant vignette with a codebook for v1 (2020-2021) and v2 (2022 onwards) data or provide a webpage if vignette is missing.
19 | }
20 | \examples{
21 | 
22 | # View codebook for v1 (2020-2021) data
23 | spod_codebook(ver = 1)
24 | 
25 | # View codebook for v2 (2022 onwards) data
26 | spod_codebook(ver = 2)
27 | 
28 | }
29 | 


--------------------------------------------------------------------------------
/man/spod_connect.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/connect.R
 3 | \name{spod_connect}
 4 | \alias{spod_connect}
 5 | \title{Connect to data converted to \code{DuckDB} or hive-style \code{parquet} files}
 6 | \usage{
 7 | spod_connect(
 8 |   data_path,
 9 |   target_table_name = NULL,
10 |   quiet = FALSE,
11 |   max_mem_gb = max(4, spod_available_ram() - 4),
12 |   max_n_cpu = max(1, parallelly::availableCores() - 1),
13 |   temp_path = spod_get_temp_dir()
14 | )
15 | }
16 | \arguments{
17 | \item{data_path}{a path to the \code{DuckDB} database file with '.duckdb' extension, or a path to the folder with \code{parquet} files. Eigher one should have been created with the \link{spod_convert} function.}
18 | 
19 | \item{target_table_name}{Default is \code{NULL}. When connecting to a folder of \code{parquet} files, this argument is ignored. When connecting to a \code{DuckDB} database, a \code{character} vector of length 1 with the table name to open from the database file. If not specified, it will be guessed from the \code{data_path} argument and from table names that are available in the database. If you have not manually interfered with the database, this should be guessed automatically and you do not need to specify it.}
20 | 
21 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.}
22 | 
23 | \item{max_mem_gb}{The maximum memory to use in GB. A conservative default is 3 GB, which should be enough for resaving the data to \code{DuckDB} form a folder of CSV.gz files while being small enough to fit in memory of most even old computers. For data analysis using the already converted data (in \code{DuckDB} or Parquet format) or with the raw CSV.gz data, it is recommended to increase it according to available resources.}
24 | 
25 | \item{max_n_cpu}{The maximum number of threads to use. Defaults to the number of available cores minus 1.}
26 | 
27 | \item{temp_path}{The path to the temp folder for DuckDB for \href{https://duckdb.org/2024/07/09/memory-management.html#intermediate-spilling}{intermediate spilling} in case the set memory limit and/or physical memory of the computer is too low to perform the query. By default this is set to the \code{temp} directory in the data folder defined by SPANISH_OD_DATA_DIR environment variable. Otherwise, for queries on folders of CSV files or parquet files, the temporary path would be set to the current R working directory, which probably is undesirable, as the current working directory can be on a slow storage, or storage that may have limited space, compared to the data folder.}
28 | }
29 | \value{
30 | a \code{DuckDB} table connection object.
31 | }
32 | \description{
33 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
34 | 
35 | This function allows the user to quickly connect to the data converted to DuckDB with the \link{spod_convert} function. This function simplifies the connection process. The user is free to use the \code{DBI} and \code{DuckDB} packages to connect to the data manually, or to use the \code{arrow} package to connect to the \code{parquet} files folder.
36 | }
37 | \examples{
38 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
39 | \donttest{
40 | # Set data dir for file downloads
41 | spod_set_data_dir(tempdir())
42 | 
43 | # download and convert data
44 | dates_1 <- c(start = "2020-02-17", end = "2020-02-18")
45 | db_2 <- spod_convert(
46 |  type = "number_of_trips",
47 |  zones = "distr",
48 |  dates = dates_1,
49 |  overwrite = TRUE
50 | )
51 | 
52 | # now connect to the converted data
53 | my_od_data_2 <- spod_connect(db_2)
54 | 
55 | # disconnect from the database
56 | spod_disconnect(my_od_data_2)
57 | }
58 | \dontshow{\}) # examplesIf}
59 | }
60 | 


--------------------------------------------------------------------------------
/man/spod_convert_dates_to_ranges.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal-utils.R
 3 | \name{spod_convert_dates_to_ranges}
 4 | \alias{spod_convert_dates_to_ranges}
 5 | \title{Convert dates to ranges}
 6 | \usage{
 7 | spod_convert_dates_to_ranges(dates)
 8 | }
 9 | \arguments{
10 | \item{dates}{A \code{character} vector of dates.}
11 | }
12 | \value{
13 | A \code{character} vector of date ranges.
14 | }
15 | \description{
16 | This internal helper function reduces a vector of dates to a vector of date ranges to shorten the warning and error messages that mention the valid date ranges.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_dates_argument_to_dates_seq.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal-utils.R
 3 | \name{spod_dates_argument_to_dates_seq}
 4 | \alias{spod_dates_argument_to_dates_seq}
 5 | \title{Convert multiple formates of date arguments to a sequence of dates}
 6 | \usage{
 7 | spod_dates_argument_to_dates_seq(dates)
 8 | }
 9 | \arguments{
10 | \item{dates}{A \code{character} or \code{Date} vector of dates to process. Kindly keep in mind that v1 and v2 data follow different data collection methodologies and may not be directly comparable. Therefore, do not try to request data from both versions for the same date range. If you need to compare data from both versions, please refer to the respective codebooks and methodology documents. The v1 data covers the period from 2020-02-14 to 2021-05-09, and the v2 data covers the period from 2022-01-01 to the present until further notice. The true dates range is checked against the available data for each version on every function run.
11 | 
12 | The possible values can be any of the following:
13 | \itemize{
14 | \item For the \code{spod_get()} and \code{spod_convert()} functions, the \code{dates} can be set to "cached_v1" or "cached_v2" to request data from cached (already previously downloaded) v1 (2020-2021) or v2 (2022 onwards) data. In this case, the function will identify and use all data files that have been downloaded and cached locally, (e.g. using an explicit run of \code{spod_download()}, or any data requests made using the \code{spod_get()} or \code{spod_convert()} functions).
15 | \item A single date in ISO (YYYY-MM-DD) or YYYYMMDD format. \code{character} or \code{Date} object.
16 | \item A vector of dates in ISO (YYYY-MM-DD) or YYYYMMDD format. \code{character} or \code{Date} object. Can be any non-consecutive sequence of dates.
17 | \item A date range
18 | \itemize{
19 | \item eigher a \code{character} or \code{Date} object of length 2 with clearly named elements \code{start} and \code{end} in ISO (YYYY-MM-DD) or YYYYMMDD format. E.g. \code{c(start = "2020-02-15", end = "2020-02-17")};
20 | \item or a \code{character} object of the form \code{YYYY-MM-DD_YYYY-MM-DD} or \code{YYYYMMDD_YYYYMMDD}. For example, \verb{2020-02-15_2020-02-17} or \verb{20200215_20200217}.
21 | }
22 | \item A regular expression to match dates in the format \code{YYYYMMDD}. \code{character} object. For example, \verb{^202002} will match all dates in February 2020.
23 | }}
24 | }
25 | \value{
26 | A \code{character} vector of dates in ISO format (YYYY-MM-DD).
27 | }
28 | \description{
29 | This function processes the date arguments provided to various functions in the package. It can handle single dates and arbitratry sequences (vectors) of dates in ISO (YYYY-MM-DD) and YYYYMMDD format. It can also handle date ranges in the format 'YYYY-MM-DD_YYYY-MM-DD' (or 'YYYYMMDD_YYYYMMDD'), date ranges in named vec and regular expressions to match dates in the format \code{YYYYMMDD}.
30 | }
31 | \keyword{internal}
32 | 


--------------------------------------------------------------------------------
/man/spod_disconnect.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/disconnect.R
 3 | \name{spod_disconnect}
 4 | \alias{spod_disconnect}
 5 | \title{Safely disconnect from data and free memory}
 6 | \usage{
 7 | spod_disconnect(tbl_con, free_mem = TRUE)
 8 | }
 9 | \arguments{
10 | \item{tbl_con}{A \code{tbl_duckdb_connection} connection object that you get from either \code{spod_get()} or \code{spod_connect()}.}
11 | 
12 | \item{free_mem}{A \code{logical}. Whether to free up memory by running \code{gc()}. Defaults to \code{TRUE}.}
13 | }
14 | \value{
15 | No return value, called for side effect of disconnecting from the database and freeing up memory.
16 | }
17 | \description{
18 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
19 | 
20 | This function is to ensure that \code{DuckDB} connections to CSV.gz files (created via \code{spod_get()}), as well as to \code{DuckDB} files or folders of \code{parquet} files (created via \code{spod_convert()}) are closed properly to prevent conflicting connections. Essentially this is just a wrapper around \code{DBI::dbDisconnect()} that reaches out into the \code{.$src$con} object of the \code{tbl_duckdb_connection} connection object that is returned to the user via \code{spod_get()} and \code{spod_connect()}. After disonnecting the database, it also frees up memory by running \code{gc()}.
21 | }
22 | \examples{
23 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
24 | \donttest{
25 | # Set data dir for file downloads
26 | spod_set_data_dir(tempdir())
27 | 
28 | # basic example
29 | # create a connection to the v1 data without converting
30 | # this creates a duckdb database connection to CSV files
31 | od_distr <- spod_get(
32 |  "od",
33 |  zones = "distr",
34 |  dates = c("2020-03-01", "2020-03-02")
35 | )
36 | # disconnect from the database connection
37 | spod_disconnect(od_distr)
38 | 
39 | # Advanced example
40 | # download and convert data
41 | dates_1 <- c(start = "2020-02-17", end = "2020-02-19")
42 | db_2 <- spod_convert(
43 |  type = "od",
44 |  zones = "distr",
45 |  dates = dates_1,
46 |  overwrite = TRUE
47 | )
48 | 
49 | # now connect to the converted data
50 | my_od_data_2 <- spod_connect(db_2)
51 | 
52 | # disconnect from the database
53 | spod_disconnect(my_od_data_2)
54 | }
55 | \dontshow{\}) # examplesIf}
56 | }
57 | 


--------------------------------------------------------------------------------
/man/spod_download_zones_v1.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get-zones.R
 3 | \name{spod_download_zones_v1}
 4 | \alias{spod_download_zones_v1}
 5 | \title{Downloads and extracts the raw v1 zones data}
 6 | \usage{
 7 | spod_download_zones_v1(
 8 |   zones = c("districts", "dist", "distr", "distritos", "municipalities", "muni",
 9 |     "municip", "municipios"),
10 |   data_dir = spod_get_data_dir(),
11 |   quiet = FALSE
12 | )
13 | }
14 | \arguments{
15 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}).}
16 | 
17 | \item{data_dir}{The directory where the data is stored.}
18 | 
19 | \item{quiet}{Boolean flag to control the display of messages.}
20 | }
21 | \value{
22 | A \code{character} string containing the path to the downloaded and extracted file.
23 | }
24 | \description{
25 | This function ensures that the necessary v1 raw data for zones files are downloaded and extracted from the specified data directory.
26 | }
27 | \keyword{internal}
28 | 


--------------------------------------------------------------------------------
/man/spod_duckdb_create_province_enum.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/duckdb-helpers.R
 3 | \name{spod_duckdb_create_province_enum}
 4 | \alias{spod_duckdb_create_province_enum}
 5 | \title{Create province names ENUM in a duckdb connection}
 6 | \usage{
 7 | spod_duckdb_create_province_enum(con)
 8 | }
 9 | \arguments{
10 | \item{con}{A \code{duckdb} connection.}
11 | }
12 | \value{
13 | A \code{duckdb} connection with \code{INE_PROV_NAME_ENUM} and \code{INE_PROV_CODE_ENUM} created.
14 | }
15 | \description{
16 | Create province names ENUM in a duckdb connection
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_duckdb_filter_by_dates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/duckdb-helpers.R
 3 | \name{spod_duckdb_filter_by_dates}
 4 | \alias{spod_duckdb_filter_by_dates}
 5 | \title{Filter a duckdb conenction by dates}
 6 | \usage{
 7 | spod_duckdb_filter_by_dates(con, source_view_name, new_view_name, dates)
 8 | }
 9 | \arguments{
10 | \item{con}{A duckdb connection}
11 | 
12 | \item{source_view_name}{The name of the source duckdb "view" (the virtual table, in the context of current package likely connected to a folder of CSV files)}
13 | 
14 | \item{new_view_name}{The name of the new duckdb "view" (the virtual table, in the context of current package likely connected to a folder of CSV files).}
15 | 
16 | \item{dates}{A \code{character} or \code{Date} vector of dates to process. Kindly keep in mind that v1 and v2 data follow different data collection methodologies and may not be directly comparable. Therefore, do not try to request data from both versions for the same date range. If you need to compare data from both versions, please refer to the respective codebooks and methodology documents. The v1 data covers the period from 2020-02-14 to 2021-05-09, and the v2 data covers the period from 2022-01-01 to the present until further notice. The true dates range is checked against the available data for each version on every function run.
17 | 
18 | The possible values can be any of the following:
19 | \itemize{
20 | \item For the \code{spod_get()} and \code{spod_convert()} functions, the \code{dates} can be set to "cached_v1" or "cached_v2" to request data from cached (already previously downloaded) v1 (2020-2021) or v2 (2022 onwards) data. In this case, the function will identify and use all data files that have been downloaded and cached locally, (e.g. using an explicit run of \code{spod_download()}, or any data requests made using the \code{spod_get()} or \code{spod_convert()} functions).
21 | \item A single date in ISO (YYYY-MM-DD) or YYYYMMDD format. \code{character} or \code{Date} object.
22 | \item A vector of dates in ISO (YYYY-MM-DD) or YYYYMMDD format. \code{character} or \code{Date} object. Can be any non-consecutive sequence of dates.
23 | \item A date range
24 | \itemize{
25 | \item eigher a \code{character} or \code{Date} object of length 2 with clearly named elements \code{start} and \code{end} in ISO (YYYY-MM-DD) or YYYYMMDD format. E.g. \code{c(start = "2020-02-15", end = "2020-02-17")};
26 | \item or a \code{character} object of the form \code{YYYY-MM-DD_YYYY-MM-DD} or \code{YYYYMMDD_YYYYMMDD}. For example, \verb{2020-02-15_2020-02-17} or \verb{20200215_20200217}.
27 | }
28 | \item A regular expression to match dates in the format \code{YYYYMMDD}. \code{character} object. For example, \verb{^202002} will match all dates in February 2020.
29 | }}
30 | }
31 | \value{
32 | A \code{duckdb} connection with original views and a new filtered view.
33 | }
34 | \description{
35 | IMPORTANT: This function assumes that the table or view that is being filtered has separate \code{year}, \code{month} and \code{day} columns with integer values. This is done so that the filtering is faster on CSV files that are stored in a folder structure with hive-style \verb{/year=2020/month=2/day=14/}.
36 | }
37 | \keyword{internal}
38 | 


--------------------------------------------------------------------------------
/man/spod_duckdb_limit_resources.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/duckdb-helpers.R
 3 | \name{spod_duckdb_limit_resources}
 4 | \alias{spod_duckdb_limit_resources}
 5 | \title{Set maximum memory and number of threads for a \code{DuckDB} connection}
 6 | \usage{
 7 | spod_duckdb_limit_resources(
 8 |   con,
 9 |   max_mem_gb = max(4, spod_available_ram() - 4),
10 |   max_n_cpu = max(1, parallelly::availableCores() - 1)
11 | )
12 | }
13 | \arguments{
14 | \item{con}{A \code{duckdb} connection}
15 | 
16 | \item{max_mem_gb}{The maximum memory to use in GB. A conservative default is 3 GB, which should be enough for resaving the data to \code{DuckDB} form a folder of CSV.gz files while being small enough to fit in memory of most even old computers. For data analysis using the already converted data (in \code{DuckDB} or Parquet format) or with the raw CSV.gz data, it is recommended to increase it according to available resources.}
17 | 
18 | \item{max_n_cpu}{The maximum number of threads to use. Defaults to the number of available cores minus 1.}
19 | }
20 | \value{
21 | A \code{duckdb} connection.
22 | }
23 | \description{
24 | Set maximum memory and number of threads for a \code{DuckDB} connection
25 | }
26 | \keyword{internal}
27 | 


--------------------------------------------------------------------------------
/man/spod_duckdb_number_of_trips.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/duckdb-helpers.R
 3 | \name{spod_duckdb_number_of_trips}
 4 | \alias{spod_duckdb_number_of_trips}
 5 | \title{Create a duckdb number of trips table}
 6 | \usage{
 7 | spod_duckdb_number_of_trips(
 8 |   con = DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:", read_only = FALSE),
 9 |   zones = c("districts", "dist", "distr", "distritos", "municipalities", "muni",
10 |     "municip", "municipios", "lua", "large_urban_areas", "gau", "grandes_areas_urbanas"),
11 |   ver = NULL,
12 |   data_dir = spod_get_data_dir()
13 | )
14 | }
15 | \arguments{
16 | \item{con}{A duckdb connection object. If not specified, a new in-memory connection will be created.}
17 | 
18 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}) for both data versions. Additionaly, these can be \code{"large_urban_areas"} (or \code{"lua"}, or the original Spanish \code{"grandes_areas_urbanas"}, or \code{"gau"}) for v2 data (2022 onwards).}
19 | 
20 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.}
21 | 
22 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.}
23 | }
24 | \value{
25 | A \code{duckdb} connection object with 2 views:
26 | \itemize{
27 | \item \code{od_csv_raw} - a raw table view of all cached CSV files with the origin-destination data that has been previously cached in $SPANISH_OD_DATA_DIR
28 | \item \code{od_csv_clean} - a cleaned-up table view of \code{od_csv_raw} with column names and values translated and mapped to English. This still includes all cached data.
29 | }
30 | }
31 | \description{
32 | This function creates a duckdb connection to the number of trips data stored in a folder of CSV.gz files.
33 | }
34 | \keyword{internal}
35 | 


--------------------------------------------------------------------------------
/man/spod_duckdb_overnight_stays.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/duckdb-helpers.R
 3 | \name{spod_duckdb_overnight_stays}
 4 | \alias{spod_duckdb_overnight_stays}
 5 | \title{Create a duckdb overnight stays table}
 6 | \usage{
 7 | spod_duckdb_overnight_stays(
 8 |   con = DBI::dbConnect(duckdb::duckdb(), dbdir = ":memory:", read_only = FALSE),
 9 |   zones = c("districts", "dist", "distr", "distritos", "municipalities", "muni",
10 |     "municip", "municipios", "lua", "large_urban_areas", "gau", "grandes_areas_urbanas"),
11 |   ver = NULL,
12 |   data_dir = spod_get_data_dir()
13 | )
14 | }
15 | \arguments{
16 | \item{con}{A duckdb connection object. If not specified, a new in-memory connection will be created.}
17 | 
18 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}) for both data versions. Additionaly, these can be \code{"large_urban_areas"} (or \code{"lua"}, or the original Spanish \code{"grandes_areas_urbanas"}, or \code{"gau"}) for v2 data (2022 onwards).}
19 | 
20 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.}
21 | 
22 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.}
23 | }
24 | \value{
25 | A \code{duckdb} connection object with 2 views:
26 | \itemize{
27 | \item \code{od_csv_raw} - a raw table view of all cached CSV files with the origin-destination data that has been previously cached in $SPANISH_OD_DATA_DIR
28 | \item \code{od_csv_clean} - a cleaned-up table view of \code{od_csv_raw} with column names and values translated and mapped to English. This still includes all cached data.
29 | }
30 | }
31 | \description{
32 | This function creates a duckdb connection to the overnight stays data stored in a folder of CSV.gz files.
33 | }
34 | \keyword{internal}
35 | 


--------------------------------------------------------------------------------
/man/spod_duckdb_set_temp.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/duckdb-helpers.R
 3 | \name{spod_duckdb_set_temp}
 4 | \alias{spod_duckdb_set_temp}
 5 | \title{Set temp file for DuckDB connection}
 6 | \usage{
 7 | spod_duckdb_set_temp(con, temp_path = spod_get_temp_dir())
 8 | }
 9 | \arguments{
10 | \item{con}{A duckdb connection}
11 | 
12 | \item{temp_path}{The path to the temp folder for DuckDB for \href{https://duckdb.org/2024/07/09/memory-management.html#intermediate-spilling}{intermediate spilling} in case the set memory limit and/or physical memory of the computer is too low to perform the query. By default this is set to the \code{temp} directory in the data folder defined by SPANISH_OD_DATA_DIR environment variable. Otherwise, for queries on folders of CSV files or parquet files, the temporary path would be set to the current R working directory, which probably is undesirable, as the current working directory can be on a slow storage, or storage that may have limited space, compared to the data folder.}
13 | }
14 | \value{
15 | A \code{duckdb} connection.
16 | }
17 | \description{
18 | Set temp file for DuckDB connection
19 | }
20 | \keyword{internal}
21 | 


--------------------------------------------------------------------------------
/man/spod_expand_dates_from_regex.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal-utils.R
 3 | \name{spod_expand_dates_from_regex}
 4 | \alias{spod_expand_dates_from_regex}
 5 | \title{Function to expand dates from a regex}
 6 | \usage{
 7 | spod_expand_dates_from_regex(date_regex)
 8 | }
 9 | \arguments{
10 | \item{date_regex}{A regular expression to match dates in the format 'yyyymmdd'.}
11 | }
12 | \value{
13 | A \code{character} vector of dates matching the regex.
14 | }
15 | \description{
16 | This function generates a sequence of dates from a regular expression pattern based on the provided regular expression.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_files_sizes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dev-tools.R
 3 | \name{spod_files_sizes}
 4 | \alias{spod_files_sizes}
 5 | \title{Get files sizes for remote files of v1 and v2 data and save them into a csv.gz file in the inst/extdata folder.}
 6 | \usage{
 7 | spod_files_sizes(ver = 2)
 8 | }
 9 | \arguments{
10 | \item{ver}{The version of the data (1 or 2). Can be both. Defaults to 2, as v1 data is not being updated since 2021.}
11 | }
12 | \value{
13 | Nothing. Only saves a csv.gz file with up to date file sizes in the inst/extdata folder.
14 | }
15 | \description{
16 | Get files sizes for remote files of v1 and v2 data and save them into a csv.gz file in the inst/extdata folder.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_get_data_dir.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-dir.R
 3 | \name{spod_get_data_dir}
 4 | \alias{spod_get_data_dir}
 5 | \title{Get the data directory}
 6 | \usage{
 7 | spod_get_data_dir(quiet = FALSE)
 8 | }
 9 | \arguments{
10 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.}
11 | }
12 | \value{
13 | A \code{character} vector of length 1 containing the path to the data directory where the package will download and convert the data.
14 | }
15 | \description{
16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
17 | 
18 | This function retrieves the data directory from the environment variable SPANISH_OD_DATA_DIR.
19 | If the environment variable is not set, it returns the temporary directory.
20 | }
21 | \examples{
22 | spod_set_data_dir(tempdir())
23 | spod_get_data_dir()
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/man/spod_get_file_size_from_url.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/dev-tools.R
 3 | \name{spod_get_file_size_from_url}
 4 | \alias{spod_get_file_size_from_url}
 5 | \title{Get file size from URL}
 6 | \usage{
 7 | spod_get_file_size_from_url(x_url)
 8 | }
 9 | \arguments{
10 | \item{x_url}{URL}
11 | }
12 | \value{
13 | File size in MB
14 | }
15 | \description{
16 | Get file size from URL
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_get_hmac_secret.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quick-get.R
 3 | \name{spod_get_hmac_secret}
 4 | \alias{spod_get_hmac_secret}
 5 | \title{Get the HMAC secret from the mapas-movilidad website}
 6 | \usage{
 7 | spod_get_hmac_secret(base_url = "https://mapas-movilidad.transportes.gob.es")
 8 | }
 9 | \arguments{
10 | \item{base_url}{The base URL of the mapas-movilidad website}
11 | }
12 | \value{
13 | Character vector with the HMAC secret.
14 | }
15 | \description{
16 | Get the HMAC secret from the mapas-movilidad website
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_get_latest_v1_file_list.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/available-data.R
 3 | \name{spod_get_latest_v1_file_list}
 4 | \alias{spod_get_latest_v1_file_list}
 5 | \title{Get latest file list from the XML for MITMA open mobility data v1 (2020-2021)}
 6 | \usage{
 7 | spod_get_latest_v1_file_list(
 8 |   data_dir = spod_get_data_dir(),
 9 |   xml_url = "https://opendata-movilidad.mitma.es/RSS.xml"
10 | )
11 | }
12 | \arguments{
13 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.}
14 | 
15 | \item{xml_url}{The URL of the XML file to download. Defaults to "https://opendata-movilidad.mitma.es/RSS.xml".}
16 | }
17 | \value{
18 | The path to the downloaded XML file.
19 | }
20 | \description{
21 | Get latest file list from the XML for MITMA open mobility data v1 (2020-2021)
22 | }
23 | \keyword{internal}
24 | 


--------------------------------------------------------------------------------
/man/spod_get_latest_v2_file_list.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/available-data.R
 3 | \name{spod_get_latest_v2_file_list}
 4 | \alias{spod_get_latest_v2_file_list}
 5 | \title{Get latest file list from the XML for MITMA open mobility data v2 (2022 onwards)}
 6 | \usage{
 7 | spod_get_latest_v2_file_list(
 8 |   data_dir = spod_get_data_dir(),
 9 |   xml_url = "https://movilidad-opendata.mitma.es/RSS.xml"
10 | )
11 | }
12 | \arguments{
13 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.}
14 | 
15 | \item{xml_url}{The URL of the XML file to download. Defaults to "https://movilidad-opendata.mitma.es/RSS.xml".}
16 | }
17 | \value{
18 | The path to the downloaded XML file.
19 | }
20 | \description{
21 | Get latest file list from the XML for MITMA open mobility data v2 (2022 onwards)
22 | }
23 | \keyword{internal}
24 | 


--------------------------------------------------------------------------------
/man/spod_get_temp_dir.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/folders.R
 3 | \name{spod_get_temp_dir}
 4 | \alias{spod_get_temp_dir}
 5 | \title{Get temporary directory for DuckDB intermediate spilling}
 6 | \usage{
 7 | spod_get_temp_dir(data_dir = spod_get_data_dir())
 8 | }
 9 | \arguments{
10 | \item{data_dir}{The directory where the data is stored. Defaults to the value returned by \code{spod_get_data_dir()}.}
11 | }
12 | \value{
13 | A \code{character} string with the path to the temp folder for \code{DuckDB} for \href{https://duckdb.org/2024/07/09/memory-management.html#intermediate-spilling}{intermediate spilling}.
14 | }
15 | \description{
16 | Get the The path to the temp folder for DuckDB for \href{https://duckdb.org/2024/07/09/memory-management.html#intermediate-spilling}{intermediate spilling} in case the set memory limit and/or physical memory of the computer is too low to perform the query.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_get_valid_dates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal-utils.R
 3 | \name{spod_get_valid_dates}
 4 | \alias{spod_get_valid_dates}
 5 | \title{Get valid dates for the specified data version}
 6 | \usage{
 7 | spod_get_valid_dates(ver = NULL)
 8 | }
 9 | \arguments{
10 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.}
11 | }
12 | \value{
13 | A vector of type \code{Date} with all possible valid dates for the specified data version (v1 for 2020-2021 and v2 for 2020 onwards).
14 | }
15 | \description{
16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
17 | 
18 | Get all metadata for requested data version and identify all dates available for download.
19 | }
20 | \examples{
21 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
22 | \donttest{
23 | # Get all valid dates for v1 (2020-2021) data
24 | spod_get_valid_dates(ver = 1)
25 | 
26 | # Get all valid dates for v2 (2020 onwards) data
27 | spod_get_valid_dates(ver = 2)
28 | }
29 | \dontshow{\}) # examplesIf}
30 | }
31 | 


--------------------------------------------------------------------------------
/man/spod_get_zones_v1.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get-zones.R
 3 | \name{spod_get_zones_v1}
 4 | \alias{spod_get_zones_v1}
 5 | \title{Retrieves the zones for v1 data}
 6 | \usage{
 7 | spod_get_zones_v1(
 8 |   zones = c("districts", "dist", "distr", "distritos", "municipalities", "muni",
 9 |     "municip", "municipios"),
10 |   data_dir = spod_get_data_dir(),
11 |   quiet = FALSE
12 | )
13 | }
14 | \arguments{
15 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}).}
16 | 
17 | \item{data_dir}{The directory where the data is stored.}
18 | 
19 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.}
20 | }
21 | \value{
22 | An \code{sf} object (Simple Feature collection) with 2 fields:
23 | \describe{
24 | \item{id}{A character vector containing the unique identifier for each zone, to be matched with identifiers in the tabular data.}
25 | \item{geometry}{A \code{MULTIPOLYGON} column containing the spatial geometry of each zone, stored as an sf object.
26 | The geometry is projected in the ETRS89 / UTM zone 30N coordinate reference system (CRS), with XY dimensions.}
27 | }
28 | }
29 | \description{
30 | This function retrieves the zones data from the specified data directory.
31 | It can retrieve either "distritos" or "municipios" zones data.
32 | }
33 | \keyword{internal}
34 | 


--------------------------------------------------------------------------------
/man/spod_get_zones_v2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/get-zones.R
 3 | \name{spod_get_zones_v2}
 4 | \alias{spod_get_zones_v2}
 5 | \title{Retrieves the zones v2 data}
 6 | \usage{
 7 | spod_get_zones_v2(
 8 |   zones = c("districts", "dist", "distr", "distritos", "municipalities", "muni",
 9 |     "municip", "municipios", "lua", "large_urban_areas", "gau", "grandes_areas_urbanas"),
10 |   data_dir = spod_get_data_dir(),
11 |   quiet = FALSE
12 | )
13 | }
14 | \arguments{
15 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}).}
16 | 
17 | \item{data_dir}{The directory where the data is stored.}
18 | 
19 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.}
20 | }
21 | \value{
22 | An \code{sf} object (Simple Feature collection) with 4 fields:
23 | \describe{
24 | \item{id}{A character vector containing the unique identifier for each zone, to be matched with identifiers in the tabular data.}
25 | \item{name}{A character vector with the name of the zone.}
26 | \item{population}{A numeric vector representing the population of each zone (as of 2022).}
27 | \item{geometry}{A \code{MULTIPOLYGON} column containing the spatial geometry of each zone, stored as an sf object.
28 | The geometry is projected in the ETRS89 / UTM zone 30N coordinate reference system (CRS), with XY dimensions.}
29 | }
30 | }
31 | \description{
32 | This function retrieves the zones data from the specified data directory.
33 | It can retrieve either "distritos" or "municipios" zones data.
34 | }
35 | \keyword{internal}
36 | 


--------------------------------------------------------------------------------
/man/spod_graphql_valid_dates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal-utils.R
 3 | \name{spod_graphql_valid_dates}
 4 | \alias{spod_graphql_valid_dates}
 5 | \title{Get valid dates from the GraphQL API}
 6 | \usage{
 7 | spod_graphql_valid_dates()
 8 | }
 9 | \value{
10 | A \code{Date} vector of dates that are valid to request data with \code{spod_quick_get_od()}.
11 | }
12 | \description{
13 | Get valid dates from the GraphQL API
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/spod_infer_data_v_from_dates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal-utils.R
 3 | \name{spod_infer_data_v_from_dates}
 4 | \alias{spod_infer_data_v_from_dates}
 5 | \title{Infer data version from dates}
 6 | \usage{
 7 | spod_infer_data_v_from_dates(dates, ignore_missing_dates = FALSE)
 8 | }
 9 | \arguments{
10 | \item{dates}{A \code{character} or \code{Date} vector of dates to process. Kindly keep in mind that v1 and v2 data follow different data collection methodologies and may not be directly comparable. Therefore, do not try to request data from both versions for the same date range. If you need to compare data from both versions, please refer to the respective codebooks and methodology documents. The v1 data covers the period from 2020-02-14 to 2021-05-09, and the v2 data covers the period from 2022-01-01 to the present until further notice. The true dates range is checked against the available data for each version on every function run.
11 | 
12 | The possible values can be any of the following:
13 | \itemize{
14 | \item For the \code{spod_get()} and \code{spod_convert()} functions, the \code{dates} can be set to "cached_v1" or "cached_v2" to request data from cached (already previously downloaded) v1 (2020-2021) or v2 (2022 onwards) data. In this case, the function will identify and use all data files that have been downloaded and cached locally, (e.g. using an explicit run of \code{spod_download()}, or any data requests made using the \code{spod_get()} or \code{spod_convert()} functions).
15 | \item A single date in ISO (YYYY-MM-DD) or YYYYMMDD format. \code{character} or \code{Date} object.
16 | \item A vector of dates in ISO (YYYY-MM-DD) or YYYYMMDD format. \code{character} or \code{Date} object. Can be any non-consecutive sequence of dates.
17 | \item A date range
18 | \itemize{
19 | \item eigher a \code{character} or \code{Date} object of length 2 with clearly named elements \code{start} and \code{end} in ISO (YYYY-MM-DD) or YYYYMMDD format. E.g. \code{c(start = "2020-02-15", end = "2020-02-17")};
20 | \item or a \code{character} object of the form \code{YYYY-MM-DD_YYYY-MM-DD} or \code{YYYYMMDD_YYYYMMDD}. For example, \verb{2020-02-15_2020-02-17} or \verb{20200215_20200217}.
21 | }
22 | \item A regular expression to match dates in the format \code{YYYYMMDD}. \code{character} object. For example, \verb{^202002} will match all dates in February 2020.
23 | }}
24 | 
25 | \item{ignore_missing_dates}{Logical. If \code{TRUE}, the function will not raise an error if the some of the specified dates are missing. Any dates that are missing will be skipped, however the data for any valid dates will be acquired. Defaults to \code{FALSE}.}
26 | }
27 | \value{
28 | An \code{integer} indicating the inferred data version.
29 | }
30 | \description{
31 | Infer data version from dates
32 | }
33 | \keyword{internal}
34 | 


--------------------------------------------------------------------------------
/man/spod_is_data_version_overlaps.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal-utils.R
 3 | \name{spod_is_data_version_overlaps}
 4 | \alias{spod_is_data_version_overlaps}
 5 | \title{Check if specified dates span both data versions}
 6 | \usage{
 7 | spod_is_data_version_overlaps(dates)
 8 | }
 9 | \arguments{
10 | \item{dates}{A \code{Dates} vector of dates to check.}
11 | }
12 | \value{
13 | \code{TRUE} if the dates span both data versions, \code{FALSE} otherwise.
14 | }
15 | \description{
16 | This function checks if the specified dates or date ranges span both v1 and v2 data versions.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_match_data_type.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal-utils.R
 3 | \name{spod_match_data_type}
 4 | \alias{spod_match_data_type}
 5 | \title{Match data types for normalisation}
 6 | \usage{
 7 | spod_match_data_type(
 8 |   type = c("od", "origin-destination", "viajes", "os", "overnight_stays",
 9 |     "pernoctaciones", "nt", "number_of_trips", "personas")
10 | )
11 | }
12 | \arguments{
13 | \item{type}{The type of data to match. Can be "od", "origin-destination", "os", "overnight_stays", or "nt", "number_of_trips".}
14 | }
15 | \value{
16 | A \code{character} string with the folder name for the specified data type. Or \code{NULL} if the type is not recognized.
17 | }
18 | \description{
19 | Match data types for normalisation
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/man/spod_match_data_type_for_local_folders.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal-utils.R
 3 | \name{spod_match_data_type_for_local_folders}
 4 | \alias{spod_match_data_type_for_local_folders}
 5 | \title{Match data types to folders}
 6 | \usage{
 7 | spod_match_data_type_for_local_folders(
 8 |   type = c("od", "origin-destination", "os", "overnight_stays", "nt", "number_of_trips"),
 9 |   ver = c(1, 2)
10 | )
11 | }
12 | \arguments{
13 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.}
14 | }
15 | \value{
16 | A \code{character} string with the folder name for the specified data type. Or \code{NULL} if the data type is not recognized.
17 | }
18 | \description{
19 | Match data types to folders
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/man/spod_quick_get_od.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quick-get.R
 3 | \name{spod_quick_get_od}
 4 | \alias{spod_quick_get_od}
 5 | \title{Get daily trip counts per origin-destionation municipality from 2022 onward}
 6 | \usage{
 7 | spod_quick_get_od(
 8 |   date = NA,
 9 |   min_trips = 100,
10 |   distances = c("500m-2km", "2-10km", "10-50km", "50+km"),
11 |   id_origin = NA,
12 |   id_destination = NA
13 | )
14 | }
15 | \arguments{
16 | \item{date}{A character or Date object specifying the date for which to retrieve the data. If date is a character, the date must be in "YYYY-MM-DD" or "YYYYMMDD" format.}
17 | 
18 | \item{min_trips}{A numeric value specifying the minimum number of journeys per origin-destination pair to retrieve. Defaults to 100 to reduce the amount of data returned. Can be set to 0 to retrieve all data.}
19 | 
20 | \item{distances}{A character vector specifying the distances to retrieve. Valid values are "500m-2km", "2-10km", "10-50km", and "50+km". Defaults to \code{c("500m-2km", "2-10km", "10-50km", "50+km")}. The resulting data will not have number of trips per category of distance. Therefore, if you want to retrieve the number of trips per distance category, you need to make 4 separate calls to this function or use \code{\link[=spod_get]{spod_get()}} instead to get the full data from source CSV files.}
21 | 
22 | \item{id_origin}{A character vector specifying the origin municipalities to retrieve. If not provided, all origin municipalities will be included. Valid municipality IDs can be found in the dataset returned by \code{spod_get_zones(zones = "muni", ver = 2)}.}
23 | 
24 | \item{id_destination}{A character vector specifying the target municipalities to retrieve. If not provided, all target municipalities will be included. Valid municipality IDs can be found in the dataset returned by \code{spod_get_zones(zones = "muni", ver = 2)}.}
25 | }
26 | \value{
27 | A \code{tibble} containing the flows for the specified date, minimum number of journeys, distances and origin-destination pairs if specified. The columns are:
28 | \describe{
29 | \item{date}{The date of the trips.}
30 | \item{id_origin}{The origin municipality ID.}
31 | \item{id_destination}{The target municipality ID.}
32 | \item{n_trips}{The number of trips between the origin and target municipality.}
33 | \item{trips_total_length_km}{The total length of trips in kilometers.}
34 | }
35 | }
36 | \description{
37 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
38 | 
39 | \strong{WARNING: this function may stop working at any time, as the API may change}. This function provides a quick way to get daily aggregated (no hourly data) trip counts per origin-destination municipality from v2 data (2022 onward). Compared to \code{\link[=spod_get]{spod_get()}}, which downloads large CSV files, this function downloads the data directly from the GraphQL API. An interactive web map with this data is available at \url{https://mapas-movilidad.transportes.gob.es/}. No data aggregation is performed on your computer (unlike in \code{\link[=spod_get]{spod_get()}}), so you do not need to worry about memory usage and do not have to use a powerful computer with multiple CPU cores just to get this simple data. Only about 1 MB of data is downloaded for a single day. The limitation of this function is that it can only retrieve data for a single day at a time and only with total number of trips and total km travelled. So it is not possible to get any of the extra variables available in the full dataset via \code{\link[=spod_get]{spod_get()}}.
40 | }
41 | \examples{
42 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
43 | \donttest{
44 | od_1000 <- spod_quick_get_od(
45 |   date = "2022-01-01",
46 |   min_trips = 1000
47 | )
48 | }
49 | \dontshow{\}) # examplesIf}
50 | }
51 | 


--------------------------------------------------------------------------------
/man/spod_quick_get_zones.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/quick-get.R
 3 | \name{spod_quick_get_zones}
 4 | \alias{spod_quick_get_zones}
 5 | \title{Get the municipalities geometries}
 6 | \usage{
 7 | spod_quick_get_zones(zones = "municipalities")
 8 | }
 9 | \arguments{
10 | \item{zones}{A character string specifying the zones to retrieve. Valid values are "municipalities", "muni", "municip", and "municipios". Defaults to "municipalities".}
11 | }
12 | \value{
13 | A \code{sf} object with the municipalities geometries to match with the data retrieved with \code{\link[=spod_quick_get_od]{spod_quick_get_od()}}.
14 | }
15 | \description{
16 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}
17 | 
18 | This function fetches the municipalities (for now this is the  only option) geometries from the mapas-movilidad website and returns a \code{sf} object with the municipalities geometries. This is intended for use with the flows data retrieved by the \code{\link[=spod_quick_get_od]{spod_quick_get_od()}} function. An interactive web map with this data is available at \url{https://mapas-movilidad.transportes.gob.es/}. These municipality geometries only include Spanish municipalities (and not the NUTS3 regions in Portugal and France) and do not contain extra columns that you can get with the \code{\link[=spod_get_zones]{spod_get_zones()}} function. The function caches the retrieved geometries in memory of the current R session to reduce the number of requests to the mapas-movilidad website.
19 | }
20 | \examples{
21 | \dontshow{if (interactive()) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
22 | \donttest{
23 | municipalities_sf <- spod_quick_get_zones()
24 | }
25 | \dontshow{\}) # examplesIf}
26 | }
27 | 


--------------------------------------------------------------------------------
/man/spod_read_sql.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/duckdb-helpers.R
 3 | \name{spod_read_sql}
 4 | \alias{spod_read_sql}
 5 | \title{Load an SQL query, glue it, dplyr::sql it}
 6 | \usage{
 7 | spod_read_sql(sql_file_name)
 8 | }
 9 | \arguments{
10 | \item{sql_file_name}{The name of the SQL file to load from the package installation directory.}
11 | }
12 | \value{
13 | Text of the SQL query of class \code{sql}/\code{character}.
14 | }
15 | \description{
16 | Load an SQL query from a specified file in package installation directory, glue::collapse it, glue::glue it in case of any variables that need to be replaced, and dplyr::sql it for additional safety.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_request_length.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal-utils.R
 3 | \name{spod_request_length}
 4 | \alias{spod_request_length}
 5 | \title{Get the length of the request payload}
 6 | \usage{
 7 | spod_request_length(graphql_query)
 8 | }
 9 | \arguments{
10 | \item{graphql_query}{Character. The GraphQL query.}
11 | }
12 | \value{
13 | Numeric. The length of the request payload.
14 | }
15 | \description{
16 | Get the length of the request payload
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_set_data_dir.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/data-dir.R
 3 | \name{spod_set_data_dir}
 4 | \alias{spod_set_data_dir}
 5 | \title{Set the data directory}
 6 | \usage{
 7 | spod_set_data_dir(data_dir, quiet = FALSE)
 8 | }
 9 | \arguments{
10 | \item{data_dir}{The data directory to set.}
11 | 
12 | \item{quiet}{A \code{logical} value indicating whether to suppress messages. Default is \code{FALSE}.}
13 | }
14 | \value{
15 | Nothing. If quiet is \code{FALSE}, prints a message with the path and confirmation that the path exists.
16 | }
17 | \description{
18 | \ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#stable}{\figure{lifecycle-stable.svg}{options: alt='[Stable]'}}}{\strong{[Stable]}}
19 | 
20 | This function sets the data directory in the environment variable SPANISH_OD_DATA_DIR, so that all other functions in the package can access the data. It also creates the directory if it doesn't exist.
21 | }
22 | \examples{
23 | spod_set_data_dir(tempdir())
24 | 
25 | }
26 | 


--------------------------------------------------------------------------------
/man/spod_sql_where_dates.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/duckdb-helpers.R
 3 | \name{spod_sql_where_dates}
 4 | \alias{spod_sql_where_dates}
 5 | \title{Generate a WHERE part of an SQL query from a sequence of dates}
 6 | \usage{
 7 | spod_sql_where_dates(dates)
 8 | }
 9 | \arguments{
10 | \item{dates}{A Dates vector of dates to process.}
11 | }
12 | \value{
13 | A character vector of the SQL query.
14 | }
15 | \description{
16 | Generate a WHERE part of an SQL query from a sequence of dates
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_subfolder_clean_data_cache.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/folders.R
 3 | \name{spod_subfolder_clean_data_cache}
 4 | \alias{spod_subfolder_clean_data_cache}
 5 | \title{Get clean data subfolder name}
 6 | \usage{
 7 | spod_subfolder_clean_data_cache(ver = 1)
 8 | }
 9 | \arguments{
10 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.}
11 | }
12 | \value{
13 | A \code{character} string with the subfolder name for the clean data cache.
14 | }
15 | \description{
16 | Change subfolder name in the code of this function for clean data cache here to apply globally, as all functions in the package should use this function to get the clean data cache path.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_subfolder_metadata_cache.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/folders.R
 3 | \name{spod_subfolder_metadata_cache}
 4 | \alias{spod_subfolder_metadata_cache}
 5 | \title{Get metadata cache subfolder name}
 6 | \usage{
 7 | spod_subfolder_metadata_cache()
 8 | }
 9 | \value{
10 | A \code{character} string with the subfolder name for the raw data cache.
11 | }
12 | \description{
13 | Change subfolder name in the code of this function for metadata cache here to apply globally, as all functions in the package should use this function to get the metadata cache path.
14 | }
15 | \keyword{internal}
16 | 


--------------------------------------------------------------------------------
/man/spod_subfolder_raw_data_cache.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/folders.R
 3 | \name{spod_subfolder_raw_data_cache}
 4 | \alias{spod_subfolder_raw_data_cache}
 5 | \title{Get raw data cache subfolder name}
 6 | \usage{
 7 | spod_subfolder_raw_data_cache(ver = 1)
 8 | }
 9 | \arguments{
10 | \item{ver}{Integer. Can be 1 or 2. The version of the data to use. v1 spans 2020-2021, v2 covers 2022 and onwards.}
11 | }
12 | \value{
13 | A \code{character} string with the subfolder name for the raw data cache.
14 | }
15 | \description{
16 | Change subfolder name in the code of this function for raw data cache here to apply globally, as all functions in the package should use this function to get the raw data cache path.
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_unique_separated_ids.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal-utils.R
 3 | \name{spod_unique_separated_ids}
 4 | \alias{spod_unique_separated_ids}
 5 | \title{Remove duplicate values in a semicolon-separated string}
 6 | \usage{
 7 | spod_unique_separated_ids(column)
 8 | }
 9 | \arguments{
10 | \item{column}{A \code{character} vector column in a data frame to remove duplicates from.}
11 | }
12 | \value{
13 | A \code{character} vector with semicolon-separated unique IDs.
14 | }
15 | \description{
16 | Remove duplicate IDs in a semicolon-separated string in a selected column in a data frame
17 | }
18 | \keyword{internal}
19 | 


--------------------------------------------------------------------------------
/man/spod_zone_names_en2es.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/internal-utils.R
 3 | \name{spod_zone_names_en2es}
 4 | \alias{spod_zone_names_en2es}
 5 | \title{Translate zone names from English to Spanish}
 6 | \usage{
 7 | spod_zone_names_en2es(
 8 |   zones = c("districts", "dist", "distr", "distritos", "municipalities", "muni",
 9 |     "municip", "municipios", "lua", "large_urban_areas", "gau", "grandes_areas_urbanas")
10 | )
11 | }
12 | \arguments{
13 | \item{zones}{The zones for which to download the data. Can be \code{"districts"} (or \code{"dist"}, \code{"distr"}, or the original Spanish \code{"distritos"}) or \code{"municipalities"} (or \code{"muni"}, \code{"municip"}, or the original Spanish \code{"municipios"}) for both data versions. Additionaly, these can be \code{"large_urban_areas"} (or \code{"lua"}, or the original Spanish \code{"grandes_areas_urbanas"}, or \code{"gau"}) for v2 data (2022 onwards).}
14 | }
15 | \value{
16 | A \code{character} string with the translated zone name. Or \code{NULL} if the zone name is not recognized.
17 | }
18 | \description{
19 | Translate zone names from English to Spanish
20 | }
21 | \keyword{internal}
22 | 


--------------------------------------------------------------------------------
/pkgdown/assets/codebooks/20241024_validaciones_estudios_basicos_bigdata_v1.0_en.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/codebooks/20241024_validaciones_estudios_basicos_bigdata_v1.0_en.pdf


--------------------------------------------------------------------------------
/pkgdown/assets/codebooks/README_-_formato_ficheros_movilidad_MITMA_20201228_en.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/codebooks/README_-_formato_ficheros_movilidad_MITMA_20201228_en.pdf


--------------------------------------------------------------------------------
/pkgdown/assets/codebooks/a3_informe_metodologico_estudio_movilidad_mitms_v8_en.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/codebooks/a3_informe_metodologico_estudio_movilidad_mitms_v8_en.pdf


--------------------------------------------------------------------------------
/pkgdown/assets/codebooks/mitma_-_estudio_movilidad_covid-19_informe_metodologico_v3_en.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/codebooks/mitma_-_estudio_movilidad_covid-19_informe_metodologico_v3_en.pdf


--------------------------------------------------------------------------------
/pkgdown/assets/media/barcelona-time.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/media/barcelona-time.gif


--------------------------------------------------------------------------------
/pkgdown/assets/media/flowmapblue-animated.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/media/flowmapblue-animated.mp4


--------------------------------------------------------------------------------
/pkgdown/assets/media/flowmapblue-standard-time.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/media/flowmapblue-standard-time.mp4


--------------------------------------------------------------------------------
/pkgdown/assets/media/flowmapblue-standard.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/media/flowmapblue-standard.mp4


--------------------------------------------------------------------------------
/pkgdown/assets/media/spain-folding-flows.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/assets/media/spain-folding-flows.gif


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-120x120.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/apple-touch-icon-120x120.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-152x152.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/apple-touch-icon-152x152.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-180x180.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/apple-touch-icon-180x180.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-60x60.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/apple-touch-icon-60x60.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon-76x76.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/apple-touch-icon-76x76.png


--------------------------------------------------------------------------------
/pkgdown/favicon/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/apple-touch-icon.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/favicon-16x16.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/favicon-32x32.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon-96x96.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/favicon-96x96.png


--------------------------------------------------------------------------------
/pkgdown/favicon/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/favicon.ico


--------------------------------------------------------------------------------
/pkgdown/favicon/site.webmanifest:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "",
 3 |   "short_name": "",
 4 |   "icons": [
 5 |     {
 6 |       "src": "/web-app-manifest-192x192.png",
 7 |       "sizes": "192x192",
 8 |       "type": "image/png",
 9 |       "purpose": "maskable"
10 |     },
11 |     {
12 |       "src": "/web-app-manifest-512x512.png",
13 |       "sizes": "512x512",
14 |       "type": "image/png",
15 |       "purpose": "maskable"
16 |     }
17 |   ],
18 |   "theme_color": "#ffffff",
19 |   "background_color": "#ffffff",
20 |   "display": "standalone"
21 | }


--------------------------------------------------------------------------------
/pkgdown/favicon/web-app-manifest-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/web-app-manifest-192x192.png


--------------------------------------------------------------------------------
/pkgdown/favicon/web-app-manifest-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/pkgdown/favicon/web-app-manifest-512x512.png


--------------------------------------------------------------------------------
/spanishoddata.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | ProjectId: 0eb7deaa-2778-4211-9274-917281de2007
 3 | 
 4 | RestoreWorkspace: No
 5 | SaveWorkspace: No
 6 | AlwaysSaveHistory: Default
 7 | 
 8 | EnableCodeIndexing: Yes
 9 | UseSpacesForTab: Yes
10 | NumSpacesForTab: 2
11 | Encoding: UTF-8
12 | 
13 | RnwWeave: Sweave
14 | LaTeX: pdfLaTeX
15 | 
16 | AutoAppendNewline: Yes
17 | StripTrailingWhitespace: Yes
18 | LineEndingConversion: Posix
19 | 
20 | BuildType: Package
21 | PackageUseDevtools: Yes
22 | PackageInstallArgs: --no-multiarch --with-keep.source
23 | PackageRoxygenize: rd,collate,namespace,vignette
24 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
 1 | # This file is part of the standard setup for testthat.
 2 | # It is recommended that you do not modify it.
 3 | #
 4 | # Where should you do additional test configuration?
 5 | # Learn more about the roles of various files in:
 6 | # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview
 7 | # * https://testthat.r-lib.org/articles/special-files.html
 8 | 
 9 | library(testthat)
10 | library(spanishoddata)
11 | 
12 | test_check("spanishoddata")
13 | 


--------------------------------------------------------------------------------
/tests/testthat/test-internal_utils.R:
--------------------------------------------------------------------------------
  1 | # Prepare the testing environment using bundled xml files to avoid downloading data from the internet
  2 | 
  3 | extdata_path <- system.file("extdata", package = "spanishoddata")
  4 | gz_files <- list.files(extdata_path, pattern = "(data_links_.*\\.xml\\.gz)|(url_file_sizes_v[1-2]\\.csv\\.gz)", full.names = TRUE)
  5 | 
  6 | if (length(gz_files) == 0) stop("No gzipped XML files found.")
  7 | 
  8 | # Create a temporary directory
  9 | test_data_dir <- tempfile()
 10 | dir.create(test_data_dir, recursive = TRUE)
 11 | # Create metadata directory
 12 | metadata_dir <- paste0(test_data_dir, "/", spod_subfolder_metadata_cache())
 13 | dir.create(metadata_dir, recursive = TRUE)
 14 | 
 15 | current_date <- format(Sys.time(), format = "%Y-%m-%d", usetz = FALSE)
 16 | 
 17 | # Copy and rename gzipped XML files to the temporary directory
 18 | for (gz_file in gz_files) {
 19 |   if (grepl("v1", gz_file)) {
 20 |     file.copy(gz_file, file.path(metadata_dir, paste0("data_links_v1_", current_date, ".xml.gz")))
 21 |   } else if (grepl("v2", gz_file)) {
 22 |     file.copy(gz_file, file.path(metadata_dir, paste0("data_links_v2_", current_date, ".xml.gz")))
 23 |   }
 24 | }
 25 | 
 26 | # Set the environment variable to the test directory
 27 | Sys.setenv(SPANISH_OD_DATA_DIR = test_data_dir)
 28 | # Sys.getenv("SPANISH_OD_DATA_DIR")
 29 | 
 30 | test_that("single ISO date input", {
 31 |   dates <- "2023-07-01"
 32 |   result <- spod_dates_argument_to_dates_seq(dates)
 33 |   expect_equal(result, as.Date("2023-07-01"))
 34 | })
 35 | 
 36 | test_that("single YYYYMMDD date input", {
 37 |   dates <- "20230701"
 38 |   result <- spod_dates_argument_to_dates_seq(dates)
 39 |   expect_equal(result, as.Date("2023-07-01"))
 40 | })
 41 | 
 42 | test_that("vector of ISO dates", {
 43 |   dates <- c("2023-07-01", "2023-07-03", "2023-07-05")
 44 |   result <- spod_dates_argument_to_dates_seq(dates)
 45 |   expect_equal(result, as.Date(c("2023-07-01", "2023-07-03", "2023-07-05")))
 46 | })
 47 | 
 48 | test_that("vector of YYYYMMDD dates", {
 49 |   dates <- c("20230701", "20230703", "20230705")
 50 |   result <- spod_dates_argument_to_dates_seq(dates)
 51 |   expect_equal(result, as.Date(c("2023-07-01", "2023-07-03", "2023-07-05")))
 52 | })
 53 | 
 54 | test_that("date range in ISO format", {
 55 |   dates <- "2023-07-01_2023-07-05"
 56 |   result <- spod_dates_argument_to_dates_seq(dates)
 57 |   expect_equal(result, seq.Date(from = as.Date("2023-07-01"), to = as.Date("2023-07-05"), by = "day"))
 58 | })
 59 | 
 60 | test_that("date range in YYYYMMDD format", {
 61 |   dates <- "20230701_20230705"
 62 |   result <- spod_dates_argument_to_dates_seq(dates)
 63 |   expect_equal(result, seq.Date(from = as.Date("2023-07-01"), to = as.Date("2023-07-05"), by = "day"))
 64 | })
 65 | 
 66 | test_that("named vector date range in ISO format", {
 67 |   dates <- c(start = "2023-07-01", end = "2023-07-05")
 68 |   result <- spod_dates_argument_to_dates_seq(dates)
 69 |   expect_equal(result, seq.Date(from = as.Date("2023-07-01"), to = as.Date("2023-07-05"), by = "day"))
 70 | })
 71 | 
 72 | test_that("named vector date range in YYYYMMDD format", {
 73 |   dates <- c(start = "20230701", end = "20230705")
 74 |   result <- spod_dates_argument_to_dates_seq(dates)
 75 |   expect_equal(result, seq.Date(from = as.Date("2023-07-01"), to = as.Date("2023-07-05"), by = "day"))
 76 | })
 77 | 
 78 | test_that("regex pattern matching dates", {
 79 |   dates <- "^202307"
 80 |   result <- spod_dates_argument_to_dates_seq(dates)
 81 |   expected_dates <- seq.Date(from = as.Date("2023-07-01"), to = as.Date("2023-07-31"), by = "day")
 82 |   expect_equal(result, expected_dates)
 83 | })
 84 | 
 85 | test_that("invalid input type", {
 86 |   dates <- 20230701
 87 |   expect_error(spod_dates_argument_to_dates_seq(dates), "Invalid date input format. Please provide a character vector or Date object.")
 88 | })
 89 | 
 90 | test_that("dates span both v1 and v2 data", {
 91 |   dates <- c("2021-05-09", "2022-01-01")
 92 |   expect_error(
 93 |     spod_dates_argument_to_dates_seq(dates),
 94 |     "Dates found in both v1 and v2 data."
 95 |   )
 96 | })
 97 | 
 98 | # clean up
 99 | unlink(test_data_dir, recursive = TRUE)
100 | 


--------------------------------------------------------------------------------
/tests/testthat/test-quick-get.R:
--------------------------------------------------------------------------------
 1 | # some tests are disabled as the API endpoint is not working because of the new restrictions see https://github.com/rOpenSpain/spanishoddata/issues/162
 2 | test_that("spod_quick_get_od fails out of range dates", {
 3 |   skip_on_ci()
 4 |   skip_on_cran()
 5 |   expect_error(
 6 |     spod_quick_get_od(
 7 |       date = "2021-12-31",
 8 |     ),
 9 |     ".*Must be within valid range.*"
10 |   )
11 | })
12 | 
13 | test_that("spod_quick_get_od fails on invalid date format", {
14 |   expect_error(
15 |     spod_quick_get_od(
16 |       date = "202212-31"
17 |     ),
18 |     ".*Invalid date format.*"
19 |   )
20 | })
21 | 
22 | test_that("spod_quick_get_od fails on incorrect distances", {
23 |   expect_error(
24 |     spod_quick_get_od(
25 |       date = "2022-01-01",
26 |       distances = c("invalid", "0-200")
27 |     ),
28 |     ".*Invalid distance value.*"
29 |   )
30 | })
31 | 
32 | test_that("spod_quick_get_od fails on negative min_trips", {
33 |   expect_error(
34 |     spod_quick_get_od(
35 |       date = "2022-01-02",
36 |       min_trips = -1
37 |     ),
38 |     ".*Assertion.*failed.*"
39 |   )
40 | })
41 | 
42 | test_that("spod_quick_get_od fails on invalid municipality IDs", {
43 |   expect_error(
44 |     spod_quick_get_od(
45 |       date = "2022-01-03",
46 |       id_origin = "invalid"
47 |     ),
48 |     ".*Invalid municipality ID.*"
49 |   )
50 | 
51 |   expect_error(
52 |     spod_quick_get_od(
53 |       date = "2022-01-04",
54 |       id_destination = "invalid"
55 |     ),
56 |     ".*Invalid municipality ID.*"
57 |   )
58 | 
59 |   expect_error(
60 |     spod_quick_get_od(
61 |       date = "2022-01-05",
62 |       id_origin = "invalid",
63 |       id_destination = "invalid"
64 |     ),
65 |     ".*Invalid municipality ID.*"
66 |   )
67 | })
68 | 


--------------------------------------------------------------------------------
/tools/generate_package_logo.R:
--------------------------------------------------------------------------------
  1 | library(mapSpain)
  2 | library(hexSticker)
  3 | library(spanishoddata)
  4 | library(flowmapper)
  5 | library(tidyverse)
  6 | library(sf)
  7 | 
  8 | od <- spod_get("od", zones = "distr", dates = "2022-04-06")
  9 | districts <- spod_get_zones("distr", ver = 2)
 10 | 
 11 | spain_for_vis <- esp_get_ccaa()
 12 | spain_for_join <- esp_get_ccaa(moveCAN = FALSE)
 13 | 
 14 | flows_by_district <- od |>
 15 |   group_by(id_origin, id_destination) |> 
 16 |   summarise(n_trips = sum(n_trips, na.rm = TRUE), .groups = "drop") |> 
 17 |   collect() |> 
 18 |   arrange(desc(id_origin), id_destination, n_trips)
 19 | 
 20 | 
 21 | district_centroids <- districts |>
 22 |   st_centroid() |> 
 23 |   st_transform(crs = st_crs(spain_for_join))
 24 | 
 25 | ca_distr <- district_centroids |>
 26 |   st_join(spain_for_join) |> 
 27 |   st_drop_geometry() |>
 28 |   filter(!is.na(ccaa.shortname.en)) |> 
 29 |   select(id, ca_name = ccaa.shortname.en)
 30 | 
 31 | flows_by_ca <- flows_by_district |>
 32 |   left_join(ca_distr |>
 33 |     rename(id_orig = ca_name),
 34 |       by = c("id_origin" = "id")
 35 |     ) |> 
 36 |   left_join(ca_distr |>
 37 |     rename(id_dest = ca_name),
 38 |       by = c("id_destination" = "id")
 39 |     ) |> 
 40 |   group_by(id_orig, id_dest) |>
 41 |   summarise(n_trips = sum(n_trips, na.rm = TRUE),
 42 |     .groups = "drop") |> 
 43 |   rename(o = id_orig, d = id_dest, value = n_trips)
 44 | 
 45 | spain_for_vis_coords <- spain_for_vis |>
 46 |   st_centroid() |>
 47 |   st_coordinates() |>
 48 |   as.data.frame() |>
 49 |   mutate(name = spain_for_vis$ccaa.shortname.en) |>
 50 |   rename(x = X, y = Y)
 51 | 
 52 | # create base ggplot with boundaries removing any extra elements
 53 | base_plot <- ggplot() +
 54 |   geom_sf(data = spain_for_vis, fill=NA, col = "grey30", linewidth = 0.05)+
 55 |   theme_classic(base_size = 20) +
 56 |   labs(title = "",
 57 |     subtitle = "", fill = "", caption = "") +
 58 |   theme(
 59 |     axis.line = element_blank(),
 60 |     axis.text = element_blank(),
 61 |     axis.ticks = element_blank(),
 62 |     axis.title = element_blank(),
 63 |     panel.background = element_rect(fill='transparent'),
 64 |     plot.background = element_rect(fill='transparent', color=NA),
 65 |     panel.grid.major = element_blank(),
 66 |     panel.grid.minor = element_blank(),
 67 |     legend.background = element_rect(fill='transparent'),
 68 |     legend.box.background = element_rect(fill='transparent')
 69 |   )
 70 | 
 71 | # flows_by_ca_twoway_coords |> arrange(desc(flow_ab))
 72 | # add the flows
 73 | flows_plot <- base_plot|>
 74 |   add_flowmap(
 75 |     od = flows_by_ca,
 76 |     nodes = spain_for_vis_coords,
 77 |     node_radius_factor = 1,
 78 |     edge_width_factor = 1,
 79 |     arrow_point_angle = 35,
 80 |     node_buffer_factor = 1.5,
 81 |     outline_col = "grey80",
 82 |     k_node = 10 # play around with this parameter to aggregate nodes and flows
 83 |   )
 84 | 
 85 | # customise colours and remove legend, as we need a clean image for the logo
 86 | flows_plot <- flows_plot +
 87 |   guides(fill="none") +
 88 |   scale_fill_gradient(low="#FABB29", high = "#AB061F")
 89 | 
 90 | 
 91 | # flows_plot
 92 | 
 93 | sticker(flows_plot,
 94 | 
 95 |   # package name
 96 |   package= "spanishoddata", 
 97 |   p_size=4, p_y = 1.6,
 98 |   p_color = "gray25", p_family="Roboto",
 99 | 
100 |   # ggplot image size and position
101 |   s_x=1.02, s_y=1.19, s_width=2.6, s_height=2.72,
102 | 
103 |   # white hex
104 |   h_fill="#ffffff", h_color="grey", h_size=1.3,
105 | 
106 |   # url
107 |   url = "github.com/rOpenSpain/spanishoddata",
108 |   u_color= "gray25",
109 |   u_family = "Roboto",
110 |   u_size = 1.2,
111 | 
112 |   # save output name and resolution
113 |   filename="./man/figures/logo.png", dpi=300 #
114 | )
115 | 


--------------------------------------------------------------------------------
/tools/meta-data-update-and-submission.R:
--------------------------------------------------------------------------------
 1 | # before release
 2 | # usethis::use_version("patch")
 3 | # usethis::use_version("minor")
 4 | usethis::use_tidy_description()
 5 | cffr::cff_write()
 6 | codemetar::write_codemeta(write_minimeta = T)
 7 | # urlchecker::url_check()
 8 | # devtools::check(remote = TRUE, manual = TRUE)
 9 | # devtools::check(cran = TRUE)
10 | # devtools::check_win_devel()
11 | # revdepcheck::revdep_check(num_workers = 4)
12 | 
13 | # devtools::submit_cran()
14 | 
15 | # usethis::use_github_release()
16 | # usethis::use_dev_version(push = TRUE)
17 | 


--------------------------------------------------------------------------------
/vignettes/.gitignore:
--------------------------------------------------------------------------------
1 | *.html
2 | *.R
3 | *_files
4 | 
5 | /.quarto/
6 | 


--------------------------------------------------------------------------------
/vignettes/disaggregation.qmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "OD data disaggregation"
  3 | vignette: >
  4 |   %\VignetteIndexEntry{OD data disaggregation}
  5 |   %\VignetteEngine{quarto::html}
  6 |   %\VignetteEncoding{UTF-8}
  7 | bibliography: references.bib
  8 | number-sections: true
  9 | format:
 10 |   html:
 11 |     toc: true
 12 |     toc-depth: 2
 13 |     code-overflow: wrap
 14 | execute: 
 15 |   eval: false
 16 | ---
 17 | 
 18 | 
 19 | ```{r, include = FALSE}
 20 | knitr::opts_chunk$set(
 21 |   collapse = TRUE,
 22 |   comment = "#>"
 23 | )
 24 | ```
 25 | 
 26 | 
 27 | ```{r setup}
 28 | remotes::install_dev("tmap")
 29 | library(spanishoddata)
 30 | library(tidyverse)
 31 | library(sf)
 32 | library(tmap)
 33 | ```
 34 | 
 35 | ## Introduction
 36 | 
 37 | This vignette demonstrates origin-destination (OD) data disaggregation using the `{odjitter}` package.
 38 | The package is an implementation of the method described in the paper "Jittering: A Computationally Efficient Method for Generating Realistic Route Networks from Origin-Destination Data" [@lovelace2022jittering] for adding value to OD data by disaggregating desire lines.
 39 | This can be especially useful for transport planning purposes in which high levels of geographic resolution are required (see also the [`od2net`](https://od2net.org/){target="_blank"} for direct network generation from OD data).
 40 | 
 41 | ## Data preparation
 42 | 
 43 | We'll start by loading a week's worth of origin-destination data for the city of Salamanca, building on the example in the README (note: these chunks are not evaluated):
 44 | 
 45 | ```{r}
 46 | od_db <- spod_get(
 47 |   type = "od",
 48 |   zones = "distritos",
 49 |   dates = c(start = "2024-03-01", end = "2024-03-07")
 50 | )
 51 | distritos <- spod_get_zones("distritos", ver = 2)
 52 | distritos_wgs84 <- distritos |>
 53 |   sf::st_simplify(dTolerance = 200) |>
 54 |   sf::st_transform(4326)
 55 | od_national_aggregated <- od_db |>
 56 |   group_by(id_origin, id_destination) |>
 57 |   summarise(Trips = sum(n_trips), .groups = "drop") |>
 58 |   filter(Trips > 500) |>
 59 |   collect() |>
 60 |   arrange(desc(Trips))
 61 | od_national_aggregated
 62 | od_national_interzonal <- od_national_aggregated |>
 63 |   filter(id_origin != id_destination)
 64 | salamanca_zones <- zonebuilder::zb_zone("Salamanca")
 65 | distritos_salamanca <- distritos_wgs84[salamanca_zones, ]
 66 | ids_salamanca <- distritos_salamanca$id
 67 | od_salamanca <- od_national_interzonal |>
 68 |   filter(id_origin %in% ids_salamanca) |>
 69 |   filter(id_destination %in% ids_salamanca) |>
 70 |   arrange(Trips)
 71 | od_salamanca_sf <- od::od_to_sf(
 72 |   od_salamanca,
 73 |   z = distritos_salamanca
 74 | )
 75 | ```
 76 | 
 77 | 
 78 | ## Disaggregating desire lines
 79 | 
 80 | For this you'll need some additional dependencies:
 81 | 
 82 | ```{r}
 83 | remotes::install_github("dabreegster/odjitter", subdir = "r")
 84 | remotes::install_github("nptscot/osmactive")
 85 | ```
 86 | 
 87 | We'll get the road network from OSM:
 88 | 
 89 | ```{r}
 90 | #| results: hide
 91 | salamanca_boundary <- sf::st_union(distritos_salamanca)
 92 | osm_full <- osmactive::get_travel_network(salamanca_boundary)
 93 | ```
 94 | 
 95 | ```{r}
 96 | #| label: osm
 97 | osm <- osm_full[salamanca_boundary, ]
 98 | drive_net <- osmactive::get_driving_network(osm)
 99 | drive_net_major <- osmactive::get_driving_network_major(osm)
100 | cycle_net <- osmactive::get_cycling_network(osm)
101 | cycle_net <- osmactive::distance_to_road(cycle_net, drive_net_major)
102 | cycle_net <- osmactive::classify_cycle_infrastructure(cycle_net)
103 | map_net <- osmactive::plot_osm_tmap(cycle_net)
104 | map_net
105 | ```
106 | 
107 | ![](media/osm.png)
108 | 
109 | We can use the road network to disaggregate the desire lines:
110 | 
111 | ```{r}
112 | od_jittered <- odjitter::jitter(
113 |   od_salamanca_sf,
114 |   zones = distritos_salamanca,
115 |   subpoints = drive_net,
116 |   disaggregation_threshold = 1000,
117 |   disaggregation_key = "Trips"
118 | )
119 | ```
120 | 
121 | Let's plot the disaggregated desire lines:
122 | 
123 | ```{r}
124 | #| label: disaggregated
125 | od_jittered |>
126 |   arrange(Trips) |>
127 |   ggplot() +
128 |   geom_sf(aes(colour = Trips), size = 1) +
129 |   scale_colour_viridis_c() +
130 |   geom_sf(data = drive_net_major, colour = "black") +
131 |   theme_void()
132 | ```
133 | 
134 | ![](media/disaggregated.png)
135 | 
136 | The results show that you can add value to the OD data by disaggregating the desire lines with the `{odjitter}` package.
137 | This can be useful for understanding the spatial distribution of trips within a zone for transport planning.
138 | 
139 | We have plotted the disaggregated desire lines on top of the major road network in Salamanca.
140 | A next step could be routing to help prioritise infrastructure improvements.
141 | 


--------------------------------------------------------------------------------
/vignettes/media/disaggregated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/vignettes/media/disaggregated.png


--------------------------------------------------------------------------------
/vignettes/media/mermaid-that-does-not-render.mermaid:
--------------------------------------------------------------------------------
 1 | flowchart TB
 2 |     M["For daily aggregated flows at munucipal level (2022 onwards)
 3 |     get data from GraphQL API directly"] -->|"spod_quick_get_od(
 4 |     date = '2022-03-15')"
 5 |     | F["'tbl' object with 'id' for origins and destinations and trip counts"]
 6 |     
 7 |     A["Houlry flows and other data for a few days starting in 2020
 8 |     get data from raw CSV.gz files"] -->|"spod_get(
 9 |     type = 'origin-destination',
10 |     zones = 'districts',
11 |     dates = c(start = '2020-02-14', end = '2020-02-21') )"
12 |     | F
13 |     
14 |     C["Analyse longer periods (several months)
15 |     or even the whole dataset over several years"]
16 |     -->|"spod_convert(
17 | type = 'origin-destination',
18 |     zones = 'districts',
19 |     dates = c(start = '2020-02-14', end = '2021-05-09') )"| D["path to converted data"]
20 |     D -->|"spod_connect()" | F
21 |     
22 |     F -->|"dplyr functions: select(), filter(), mutate(), group_by(), summarise(), etc..."| G["dplyr::collect()"]
23 |     G --> H["**flows_data** - data.frame / tibble"] --> R["left_join(**zones**, **flows_data**)"] --> XX["spatial data matched by 'id' with aggegated mobility flows"]
24 | 
25 |     X["spatial data with zones"] --> |"spod_get_zones(
26 |     zones = 'districts',
27 |     ver = 1 )"| Y["**zones** - polygons with zones in sf object
28 |     with 'id' that match with origins and destinations"] --> R
29 | 


--------------------------------------------------------------------------------
/vignettes/media/osm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rOpenSpain/spanishoddata/d97f354c5e75f17a870ca15eef2f03d4756b0572/vignettes/media/osm.png


--------------------------------------------------------------------------------
/vignettes/references.bib:
--------------------------------------------------------------------------------
 1 | @article{lovelace_od_2024,
 2 | 	title = {od: {Manipulate} and {Map} {Origin}-{Destination} {Data}},
 3 | 	url = {https://cran.r-project.org/package=od},
 4 |   doi = {10.32614/CRAN.package.od},
 5 | 	language = {en},
 6 | 	author = {Lovelace, Robin and Morgan, Malcolm},
 7 | 	month = aug,
 8 | 	year = {2024}
 9 | }
10 | 
11 | @Misc{mitms_mobility_web,
12 |   title = {Estudio de la movilidad con Big Data (Study of mobility with Big Data)},
13 |   author = {{Ministerio de Transportes y Movilidad Sostenible MITMS}},
14 |   year = {2024},
15 |   url = {https://www.transportes.gob.es/ministerio/proyectos-singulares/estudio-de-movilidad-con-big-data},
16 | }
17 | 
18 | @Manual{mitma_methodology_2020_v3,
19 |   title = {Análisis de la movilidad en España con tecnología Big Data durante el estado de alarma para la gestión de la crisis del COVID-19 (Analysis of mobility in Spain with Big Data technology during the state of alarm for COVID-19 crisis management)},
20 |   author = {{Ministerio de Transportes, Movilidad y Agenda Urbana (MITMA)}},
21 |   year = {2021},
22 |   url = {https://cdn.mitma.gob.es/portal-web-drupal/covid-19/bigdata/mitma_-_estudio_movilidad_covid-19_informe_metodologico_v3.pdf},
23 | }
24 | 
25 | @Manual{mitms_methodology_2022_v8,
26 |     title = {Estudio de movilidad de viajeros de ámbito nacional aplicando la tecnología Big Data. Informe metodológico (Study of National Traveler mobility Using Big Data Technology. Methodological Report)},
27 |     author = {{Ministerio de Transportes y Movilidad Sostenible (MITMS)}},
28 |     year = {2024},
29 |     url = {https://www.transportes.gob.es/recursos_mfom/paginabasica/recursos/a3_informe_metodologico_estudio_movilidad_mitms_v8.pdf},
30 | }
31 | 
32 | 
33 | @Manual{duckdb-r,
34 |     title = {duckdb: DBI Package for the DuckDB Database Management System},
35 |     author = {Hannes Mühleisen and Mark Raasveldt},
36 |     year = {2024},
37 |     note = {R package version 1.0.0-2},
38 |     url = {https://CRAN.R-project.org/package=duckdb},
39 |     doi = {10.32614/CRAN.package.duckdb}
40 |   }
41 | 
42 | @Manual{od-r,
43 |   title = {od: Manipulate and Map Origin-Destination Data},
44 |   author = {Robin Lovelace and Malcolm Morgan},
45 |   year = {2024},
46 |   note = {R package version 0.5.1},
47 |   url = {https://CRAN.R-project.org/package=od},
48 |   doi = {10.32614/CRAN.package.od}
49 | }
50 | 
51 | @Manual{hexSticker-r,
52 |   title = {hexSticker: Create Hexagon Sticker in R},
53 |   author = {Guangchuang Yu},
54 |   year = {2020},
55 |   note = {R package version 0.4.9},
56 |   url = {https://CRAN.R-project.org/package=hexSticker},
57 |   doi = {10.32614/CRAN.package.hexSticker}
58 | }
59 | 
60 | @Manual{R-mapspain,
61 |   title = {{mapSpain}: Administrative Boundaries of Spain},
62 |   year = {2024},
63 |   version = {0.9.2},
64 |   author = {Diego Hernangómez},
65 |   doi = {10.5281/zenodo.5366622},
66 |   url = {https://ropenspain.github.io/mapSpain/},
67 |   abstract = {Administrative Boundaries of Spain at several levels (Autonomous Communities, Provinces, Municipalities) based on the GISCO Eurostat database <https://ec.europa.eu/eurostat/web/gisco> and CartoBase SIANE from Instituto Geografico Nacional <https://www.ign.es/>. It also provides a leaflet plugin and the ability of downloading and processing static tiles.},
68 | }
69 | 
70 | @Manual{flowmapper-r,
71 |   title = {flowmapper: Draw Flows (Migration, Goods, Money, Information) on 'ggplot2'
72 | Plots},
73 |   author = {Johannes Mast},
74 |   year = {2024},
75 |   note = {R package version 0.1.1, commit f8b7ab7942b4a14ffd5342935d2d45a7c7b3e5d2},
76 |   url = {https://github.com/JohMast/flowmapper},
77 |   doi = {10.32614/CRAN.package.flowmapper}
78 | }
79 | 
80 | @Manual{flowmapblue_r,
81 |   title = {Flowmap.blue widget for R},
82 |   author = {Ilya Boyandin},
83 |   year = {2024},
84 |   url = {https://github.com/FlowmapBlue/flowmapblue.R},
85 |   doi = {10.32614/CRAN.package.flowmapblue},
86 | }
87 | 
88 | @article{lovelace2022jittering,
89 |   title={Jittering: A computationally efficient method for generating realistic route networks from origin-destination data},
90 |   author={Lovelace, Robin and F{\'e}lix, Rosa and Carlino, Dustin},
91 |   journal={Findings},
92 |   year={2022},
93 |   publisher={Findings Press}
94 | }
95 | 


--------------------------------------------------------------------------------