├── .github ├── FUNDING.yml └── workflows │ └── check-full.yaml ├── tests ├── testthat.R └── testthat │ └── test-simple-checkout.R ├── inst ├── img │ ├── cl-logo.png │ └── edit-sql.png └── WORDLIST ├── data └── nivel_educacional_biobio.rda ├── NAMESPACE ├── .Rbuildignore ├── .gitignore ├── man ├── censo_desconectar.Rd ├── censo_tabla.Rd ├── censo_eliminar.Rd ├── censo_descargar.Rd ├── censo_conectar.Rd ├── censo2017-package.Rd └── nivel_educacional_biobio.Rd ├── codecov.yml ├── censo2017.Rproj ├── R ├── zzz.R ├── remove.R ├── utils.R ├── censo2017-package.R ├── connection-pane.R ├── download.R ├── connect.R └── schema.R ├── NEWS.md ├── data-raw ├── 01-create-tsv-shp-files.R ├── 02-create-local-db-duckdb.R ├── 00-create-local-db-sqlite.R └── 03-convertir-xml-a-tidy.R ├── DESCRIPTION ├── LICENSE.md ├── README.Rmd ├── README.md ├── vignettes └── censo2017.Rmd └── codemeta.json /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | custom: https://www.buymeacoffee.com/pacha 2 | -------------------------------------------------------------------------------- /tests/testthat.R: -------------------------------------------------------------------------------- 1 | library(testthat) 2 | library(censo2017) 3 | test_check("censo2017") 4 | -------------------------------------------------------------------------------- /inst/img/cl-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/censo2017/HEAD/inst/img/cl-logo.png -------------------------------------------------------------------------------- /inst/img/edit-sql.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/censo2017/HEAD/inst/img/edit-sql.png -------------------------------------------------------------------------------- /data/nivel_educacional_biobio.rda: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ropensci/censo2017/HEAD/data/nivel_educacional_biobio.rda -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(censo_conectar) 4 | export(censo_descargar) 5 | export(censo_desconectar) 6 | export(censo_eliminar) 7 | export(censo_tabla) 8 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | .github 2 | ^censo2017\.Rproj$ 3 | ^\.Rproj\.user$ 4 | ^data-raw$ 5 | ^LICENSE\.md$ 6 | ^CODE_OF_CONDUCT\.md$ 7 | ^codemeta\.json$ 8 | codecov.yml 9 | vignettes 10 | ^CRAN-RELEASE$ 11 | ^cran-comments\.md$ 12 | README.md 13 | ^README\.Rmd$ 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | _bookdown_files 5 | data-raw/*.sqlite 6 | data-raw/*.sqlite.bz2 7 | data-raw/*.duckdb 8 | data-raw/*.duckdb.bz2 9 | data-raw/*.duckdb.wal 10 | data-raw/*.zip 11 | data-raw/*.tsv 12 | data-raw/*.shp 13 | data-raw/*.dbf 14 | data-raw/*.shx 15 | data-raw/*.prj 16 | data-raw/files-for-user-db 17 | inst/*.log 18 | inst/*.pdf 19 | -------------------------------------------------------------------------------- /man/censo_desconectar.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/connect.R 3 | \name{censo_desconectar} 4 | \alias{censo_desconectar} 5 | \title{Desconecta la Base de Datos del Censo} 6 | \usage{ 7 | censo_desconectar() 8 | } 9 | \description{ 10 | Una funcion auxiliar para desconectarse de la base de datos. 11 | } 12 | \examples{ 13 | censo_desconectar() 14 | } 15 | -------------------------------------------------------------------------------- /codecov.yml: -------------------------------------------------------------------------------- 1 | comment: false 2 | 3 | coverage: 4 | status: 5 | project: 6 | default: 7 | target: auto 8 | threshold: 1% 9 | patch: 10 | default: 11 | target: auto 12 | threshold: 1% 13 | 14 | coverage: 15 | ignore: 16 | - "R/connect.R" # these internal functions are called from end-user functions 17 | - "R/utils.R" 18 | - "R/zzz.R" 19 | -------------------------------------------------------------------------------- /censo2017.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | PackageRoxygenize: rd,collate,namespace,vignette 19 | -------------------------------------------------------------------------------- /inst/WORDLIST: -------------------------------------------------------------------------------- 1 | Acceso 2 | Censo 3 | Convertidor 4 | Datos 5 | De 6 | Esta 7 | Facil 8 | Grande 9 | INE 10 | Los 11 | Provee 12 | REDATAM 13 | acceso 14 | acompanian 15 | ademas 16 | asciificado 17 | castellano 18 | conveniente 19 | creado 20 | datos 21 | de 22 | del 23 | desde 24 | diferentes 25 | documentado 26 | el 27 | esta 28 | estos 29 | fueron 30 | funcione 31 | importados 32 | intencionalmente 33 | los 34 | mapas 35 | millones 36 | oficial 37 | paquete 38 | plataformas 39 | por 40 | problema 41 | proporcionan 42 | que 43 | registros 44 | se 45 | un 46 | usando 47 | -------------------------------------------------------------------------------- /R/zzz.R: -------------------------------------------------------------------------------- 1 | .onAttach <- function(...) { 2 | msg(cli::rule(crayon::bold("CENSO 2017"))) 3 | msg(" ") 4 | msg("La documentacion del paquete y ejemplos de uso se encuentran en https://pacha.dev/censo2017/.") 5 | msg("Visita https://buymeacoffee.com/pacha si deseas donar para contribuir al desarrollo de este software.") 6 | msg("Esta libreria necesita 3.5 GB libres para la crear la base de datos localmente. Una vez creada la base, esta ocupa 1.0 GB en disco.") 7 | msg(" ") 8 | if (interactive() && Sys.getenv("RSTUDIO") == "1" && !in_chk()) { 9 | censo_pane() 10 | } 11 | if (interactive()) censo_status() 12 | } 13 | -------------------------------------------------------------------------------- /man/censo_tabla.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/connect.R 3 | \name{censo_tabla} 4 | \alias{censo_tabla} 5 | \title{Tablas Completas de la Base de Datos del Censo} 6 | \usage{ 7 | censo_tabla(tabla) 8 | } 9 | \arguments{ 10 | \item{tabla}{Una cadena de texto indicando la tabla a extraer} 11 | } 12 | \value{ 13 | Un tibble 14 | } 15 | \description{ 16 | Devuelve una tabla completa de la base de datos. Para entregar datos 17 | filtrados previamente se debe usar \code{\link[=censo_conectar]{censo_conectar()}}. 18 | } 19 | \examples{ 20 | \dontrun{ censo_tabla("comunas") } 21 | } 22 | -------------------------------------------------------------------------------- /man/censo_eliminar.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/remove.R 3 | \name{censo_eliminar} 4 | \alias{censo_eliminar} 5 | \title{Elimina la Base de Datos del Censo de tu Computador} 6 | \usage{ 7 | censo_eliminar(preguntar = TRUE) 8 | } 9 | \arguments{ 10 | \item{preguntar}{Si acaso se despliega un menu para confirmar la accion de 11 | borrar cualquier base del censo existente. Por defecto es verdadero.} 12 | } 13 | \description{ 14 | Elimina el directorio \code{censo2017} y todos sus contenidos, incluyendo versiones 15 | de la base de datos del Censo creadas con cualquier version de 'DuckDB'. 16 | } 17 | \examples{ 18 | \dontrun{ censo_eliminar() } 19 | } 20 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # Version 0.5 2 | 3 | - Adds all of rOpenSci's suggestion, which means great improvements such as 4 | better documentation, consistent syntax, does not depend on DuckDB version, etc. 5 | See https://github.com/ropensci/software-review/issues/414 for the full detail. 6 | 7 | # Version 0.4 8 | 9 | - Works with duckdb 0.3.4 10 | - Removes the databases used with older censo2017 versions 11 | 12 | # Version 0.3 13 | 14 | - Moves local database location according to CRAN request 15 | - Requires R 4.0 16 | - Uses DuckDB instead of SQLite 17 | 18 | # Version 0.2 19 | 20 | - Adds `vignettes/` to `.Rbuildignore`. 21 | - Complies with CRAN policies regarding Suggests. 22 | - Vignettes are now available from gh-pages. 23 | -------------------------------------------------------------------------------- /tests/testthat/test-simple-checkout.R: -------------------------------------------------------------------------------- 1 | context("Download") 2 | 3 | olddir <- Sys.getenv("CENSO2017_DIR") 4 | Sys.setenv(CENSO2017_DIR = tempdir()) 5 | 6 | test_that("censo_tabla returns tbl_df", { 7 | skip_on_cran() 8 | 9 | censo_descargar() 10 | 11 | expect_is(censo_conectar(), "duckdb_connection") 12 | 13 | for (t in c("comunas", "regiones")) { 14 | expect_is(censo_tabla(t), "tbl_df") 15 | } 16 | 17 | if (require("dplyr") & require("dbplyr")) { 18 | for (t in c("comunas", "regiones")) { 19 | expect_is(dplyr::tbl(censo_conectar(), t), "tbl_lazy") 20 | } 21 | } 22 | 23 | censo_desconectar() 24 | 25 | censo_eliminar(preguntar = FALSE) 26 | 27 | expect_false(file.exists(censo_path())) 28 | }) 29 | 30 | Sys.setenv(CENSO2017_DIR = olddir) 31 | -------------------------------------------------------------------------------- /man/censo_descargar.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/download.R 3 | \name{censo_descargar} 4 | \alias{censo_descargar} 5 | \title{Descarga la Base de Datos del Censo a tu Computador} 6 | \usage{ 7 | censo_descargar(ver = NULL) 8 | } 9 | \arguments{ 10 | \item{ver}{La version a descargar. Por defecto es la ultima version 11 | disponible en GitHub. Se pueden ver todas las versiones en 12 | \url{https://github.com/pachamaltese/censo2017/releases}.} 13 | } 14 | \description{ 15 | Este comando descarga la base de datos completa como un unico archivo zip que 16 | se descomprime para crear la base de datos local. Si no quieres descargar la 17 | base de datos en tu home, ejecuta usethis::edit_r_environ() para crear la 18 | variable de entorno CENSO2017_DIR con la ruta. 19 | } 20 | \examples{ 21 | \dontrun{ censo_descargar() } 22 | } 23 | -------------------------------------------------------------------------------- /R/remove.R: -------------------------------------------------------------------------------- 1 | #' Elimina la Base de Datos del Censo de tu Computador 2 | #' 3 | #' Elimina el directorio `censo2017` y todos sus contenidos, incluyendo versiones 4 | #' de la base de datos del Censo creadas con cualquier version de 'DuckDB'. 5 | #' 6 | #' @param preguntar Si acaso se despliega un menu para confirmar la accion de 7 | #' borrar cualquier base del censo existente. Por defecto es verdadero. 8 | #' @return NULL 9 | #' @export 10 | #' 11 | #' @examples 12 | #' \dontrun{ censo_eliminar() } 13 | censo_eliminar <- function(preguntar = TRUE) { 14 | if (preguntar) { 15 | answer <- utils::menu(c("De acuerdo", "Cancelar"), 16 | title = "Esto eliminara todas las bases del censo", 17 | graphics = FALSE) 18 | if (answer == 2) { 19 | return(invisible()) 20 | } 21 | } 22 | 23 | suppressWarnings(censo_desconectar()) 24 | try(unlink(censo_path(), recursive = TRUE)) 25 | update_censo_pane() 26 | return(invisible()) 27 | } 28 | -------------------------------------------------------------------------------- /man/censo_conectar.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/connect.R 3 | \name{censo_conectar} 4 | \alias{censo_conectar} 5 | \title{Conexion a la Base de Datos del Censo} 6 | \usage{ 7 | censo_conectar(dir = censo_path()) 8 | } 9 | \arguments{ 10 | \item{dir}{La ubicacion de la base de datos en el disco. Por defecto es 11 | \code{censo2017} en la carpeta de datos del usuario de R o la variable de entorno 12 | \code{CENSO2017_DIR} si el usuario la especifica.} 13 | } 14 | \description{ 15 | Devuelve una conexion a la base de datos local. Esto corresponde a una 16 | conexion a una base DuckDB compatible con DBI. A diferencia de 17 | \code{\link[=censo_tabla]{censo_tabla()}}, esta funcion es mas flexible y se puede usar con 18 | dbplyr para leer unicamente lo que se necesita o directamente con DBI para 19 | usar comandos SQL. 20 | } 21 | \examples{ 22 | \dontrun{ 23 | DBI::dbListTables(censo_conectar()) 24 | 25 | DBI::dbGetQuery( 26 | censo_conectar(), 27 | 'SELECT * FROM comunas WHERE provincia_ref_id = 1' 28 | ) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /R/utils.R: -------------------------------------------------------------------------------- 1 | msg <- function(..., startup = FALSE) { 2 | if (startup) { 3 | if (!isTRUE(getOption("censo2017.quiet"))) { 4 | packageStartupMessage(text_col(...)) 5 | } 6 | } else { 7 | message(text_col(...)) 8 | } 9 | } 10 | 11 | text_col <- function(x) { 12 | # If RStudio not available, messages already printed in black 13 | if (!rstudioapi::isAvailable()) { 14 | return(x) 15 | } 16 | 17 | if (!rstudioapi::hasFun("getThemeInfo")) { 18 | return(x) 19 | } 20 | 21 | theme <- rstudioapi::getThemeInfo() 22 | 23 | if (isTRUE(theme$dark)) crayon::white(x) else crayon::black(x) 24 | } 25 | 26 | in_chk <- function() { 27 | any( 28 | grepl("check", 29 | sapply(sys.calls(), function(a) paste(deparse(a), collapse = "\n")) 30 | ) 31 | ) 32 | } 33 | 34 | read_table_error <- function(e) { 35 | e <- as.character(e) 36 | # return(e) 37 | msg <- c( 38 | sprintf("No esta disponible la tabla %s.", get("tabla", envir = 1)), 39 | "\nVerifica que escribiste el nombre correctamente y que instalaste los", 40 | "\ndatos con censo_descargar_base()." 41 | ) 42 | stop(msg, call. = FALSE) 43 | } 44 | -------------------------------------------------------------------------------- /man/censo2017-package.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/censo2017-package.R 3 | \docType{package} 4 | \name{censo2017-package} 5 | \alias{censo2017} 6 | \alias{censo2017-package} 7 | \title{censo2017: Base de Datos de Facil Acceso del Censo 2017 de Chile 8 | (2017 Chilean Census Easy Access Database)} 9 | \description{ 10 | Provee un acceso conveniente a mas de 17 millones de registros 11 | de la base de datos del Censo 2017. Los datos fueron importados desde 12 | el DVD oficial del INE usando el Convertidor REDATAM creado por Pablo De 13 | Grande. Esta paquete esta documentado intencionalmente en castellano 14 | asciificado para que funcione sin problema en diferentes plataformas. 15 | (Provides convenient access to more than 17 million records from the 16 | Chilean Census 2017 database. The datasets were imported from the official 17 | DVD provided by the Chilean National Bureau of Statistics by using the 18 | REDATAM converter created by Pablo De Grande and in addition it includes the 19 | maps accompanying these datasets.) 20 | } 21 | \seealso{ 22 | Useful links: 23 | \itemize{ 24 | \item \url{https://docs.ropensci.org/censo2017/} 25 | \item Report bugs at \url{https://github.com/ropensci/censo2017/issues/} 26 | } 27 | 28 | } 29 | \author{ 30 | \strong{Maintainer}: Mauricio Vargas \email{mavargas11@uc.cl} (\href{https://orcid.org/0000-0003-1017-7574}{ORCID}) 31 | 32 | Other contributors: 33 | \itemize{ 34 | \item Juan Correa [contributor] 35 | \item Maria Paula Caldas (rOpenSci) [reviewer] 36 | \item Frans van Dunee (rOpenSci) [reviewer] 37 | \item Melina Vidoni (rOpenSci) [reviewer] 38 | \item Constanza Manriquez (revision independiente de las vinietas) [reviewer] 39 | \item Instituto Nacional de Estadisticas (INE) [data contributor] 40 | } 41 | 42 | } 43 | \keyword{internal} 44 | -------------------------------------------------------------------------------- /R/censo2017-package.R: -------------------------------------------------------------------------------- 1 | #' @keywords internal 2 | "_PACKAGE" 3 | 4 | #' Poblacion por Nivel Educacional en la Region del Bio Bio 5 | #' 6 | #' @name nivel_educacional_biobio 7 | #' @docType data 8 | #' @author Elaboracion propia con base en datos desagregados del Censo 9 | #' @format Un tibble con 860 observaciones en las siguientes 4 variables. 10 | #' \describe{ 11 | #' \item{\code{comuna}}{codigo de comuna (15 regiones)} 12 | #' \item{\code{nivel_educ}}{maximo nivel educacional alcanzado (ver la vinieta 13 | #' con los links a la descripcion de codigos)} 14 | #' \item{\code{cuenta}}{cantidad de personas censadas en la comuna} 15 | #' \item{\code{proporcion}}{porcentaje que representan las personas censadas en 16 | #' la comuna} 17 | #' } 18 | #' @description Proporciona la cuenta y porcentaje por comuna de las personas de 19 | #' la Region del Bio Bio de acuerdo al maximo nivel educacional que reportan 20 | #' (e.g. primaria, secundaria, universitaria, etc.) 21 | #' @examples 22 | #' nivel_educacional_biobio 23 | #' 24 | #' \dontrun{ 25 | #' # replicar el resultado usando dplyr directamente con SQL 26 | #' # es ligeramente distinto a las vinietas que explican esta misma tabla 27 | #' nivel_educacional_biobio <- tbl(censo_conectar(), "zonas") %>% 28 | #' mutate( 29 | #' region = substr(as.character(geocodigo), 1, 2), 30 | #' comuna = substr(as.character(geocodigo), 1, 5) 31 | #' ) %>% 32 | #' filter(region == "08") %>% 33 | #' select(comuna, geocodigo, zonaloc_ref_id) %>% 34 | #' inner_join(select(tbl(censo_conectar(), "viviendas"), 35 | #' zonaloc_ref_id, vivienda_ref_id), by = "zonaloc_ref_id") %>% 36 | #' inner_join(select(tbl(censo_conectar(), "hogares"), 37 | #' vivienda_ref_id, hogar_ref_id), by = "vivienda_ref_id") %>% 38 | #' inner_join(select(tbl(censo_conectar(), "personas"), 39 | #' hogar_ref_id, nivel_educ = p15), by = "hogar_ref_id") %>% 40 | #' group_by(comuna, nivel_educ) %>% 41 | #' summarise(cuenta = n()) %>% 42 | #' group_by(comuna) %>% 43 | #' mutate(proporcion = cuenta * (1 / sum(cuenta))) %>% 44 | #' arrange(comuna, nivel_educ)} 45 | #' @keywords data 46 | NULL 47 | -------------------------------------------------------------------------------- /man/nivel_educacional_biobio.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/censo2017-package.R 3 | \docType{data} 4 | \name{nivel_educacional_biobio} 5 | \alias{nivel_educacional_biobio} 6 | \title{Poblacion por Nivel Educacional en la Region del Bio Bio} 7 | \format{ 8 | Un tibble con 860 observaciones en las siguientes 4 variables. 9 | \describe{ 10 | \item{\code{comuna}}{codigo de comuna (15 regiones)} 11 | \item{\code{nivel_educ}}{maximo nivel educacional alcanzado (ver la vinieta 12 | con los links a la descripcion de codigos)} 13 | \item{\code{cuenta}}{cantidad de personas censadas en la comuna} 14 | \item{\code{proporcion}}{porcentaje que representan las personas censadas en 15 | la comuna} 16 | } 17 | } 18 | \description{ 19 | Proporciona la cuenta y porcentaje por comuna de las personas de 20 | la Region del Bio Bio de acuerdo al maximo nivel educacional que reportan 21 | (e.g. primaria, secundaria, universitaria, etc.) 22 | } 23 | \examples{ 24 | nivel_educacional_biobio 25 | 26 | \dontrun{ 27 | # replicar el resultado usando dplyr directamente con SQL 28 | # es ligeramente distinto a las vinietas que explican esta misma tabla 29 | nivel_educacional_biobio <- tbl(censo_conectar(), "zonas") \%>\% 30 | mutate( 31 | region = substr(as.character(geocodigo), 1, 2), 32 | comuna = substr(as.character(geocodigo), 1, 5) 33 | ) \%>\% 34 | filter(region == "08") \%>\% 35 | select(comuna, geocodigo, zonaloc_ref_id) \%>\% 36 | inner_join(select(tbl(censo_conectar(), "viviendas"), 37 | zonaloc_ref_id, vivienda_ref_id), by = "zonaloc_ref_id") \%>\% 38 | inner_join(select(tbl(censo_conectar(), "hogares"), 39 | vivienda_ref_id, hogar_ref_id), by = "vivienda_ref_id") \%>\% 40 | inner_join(select(tbl(censo_conectar(), "personas"), 41 | hogar_ref_id, nivel_educ = p15), by = "hogar_ref_id") \%>\% 42 | group_by(comuna, nivel_educ) \%>\% 43 | summarise(cuenta = n()) \%>\% 44 | group_by(comuna) \%>\% 45 | mutate(proporcion = cuenta * (1 / sum(cuenta))) \%>\% 46 | arrange(comuna, nivel_educ)} 47 | } 48 | \author{ 49 | Elaboracion propia con base en datos desagregados del Censo 50 | } 51 | \keyword{data} 52 | -------------------------------------------------------------------------------- /data-raw/01-create-tsv-shp-files.R: -------------------------------------------------------------------------------- 1 | # packages ---- 2 | 3 | library(dplyr) 4 | library(stringr) 5 | library(DBI) 6 | library(RSQLite) 7 | library(data.table) 8 | library(sf) 9 | library(geojsonio) 10 | library(xml2) 11 | 12 | # connections ---- 13 | 14 | con <- dbConnect(SQLite(), "data-raw/censo2017.sqlite") 15 | tablas <- dbListTables(con) 16 | tablas_no_mapas <- grep("mapa", tablas, value = T, invert = T) 17 | tablas_mapas <- grep("mapa", tablas, value = T, invert = F) 18 | dbDisconnect(con) 19 | 20 | # export TSV ---- 21 | 22 | try(dir.create("data-raw/files-for-user-db")) 23 | 24 | for (t in tablas_no_mapas) { 25 | message(t) 26 | 27 | con <- dbConnect(SQLite(), "data-raw/censo2017.sqlite") 28 | d <- dbReadTable(con, t) 29 | dbDisconnect(con) 30 | 31 | fwrite(d, paste0("data-raw/files-for-user-db/", t, ".tsv"), sep = "\t") 32 | gc() 33 | rm(d) 34 | } 35 | 36 | # export SHP ---- 37 | 38 | for (t in tablas_mapas) { 39 | message(t) 40 | 41 | con <- dbConnect(SQLite(), "data-raw/censo2017.sqlite") 42 | d <- st_read(con, t) 43 | dbDisconnect(con) 44 | 45 | st_write(d, paste0("data-raw/files-for-user-db/", t, ".shp")) 46 | gc() 47 | rm(d) 48 | } 49 | 50 | # test ---- 51 | 52 | for (t in tablas_no_mapas) { 53 | message(t) 54 | 55 | con <- dbConnect(SQLite(), "data-raw/censo2017.sqlite") 56 | d <- dbReadTable(con, t) 57 | d <- c(nrow(d), ncol(d)) 58 | dbDisconnect(con) 59 | 60 | d2 <- fread(paste0("data-raw/", t, ".tsv")) 61 | d2 <- c(nrow(d2), ncol(d2)) 62 | 63 | stopifnot(d[1] == d2[1]) 64 | stopifnot(d[2] == d2[2]) 65 | 66 | message(paste(paste0("r", d[1], " c", d[2]), "vs", paste0("r", d2[1], " c", d2[2]))) 67 | } 68 | 69 | for (t in tablas_mapas) { 70 | message(t) 71 | 72 | con <- dbConnect(SQLite(), "data-raw/censo2017.sqlite") 73 | d <- st_read(con, t) 74 | d <- c(nrow(d), ncol(d)) 75 | dbDisconnect(con) 76 | 77 | d2 <- st_read(paste0("data-raw/", t, ".shp")) 78 | d2 <- c(nrow(d2), ncol(d2)) 79 | 80 | stopifnot(d[1] == d2[1]) 81 | stopifnot(d[2] == d2[2]) 82 | 83 | message(paste(paste0("r", d[1], " c", d[2]), "vs", paste0("r", d2[1], " c", d2[2]))) 84 | } 85 | -------------------------------------------------------------------------------- /.github/workflows/check-full.yaml: -------------------------------------------------------------------------------- 1 | on: [push, pull_request] 2 | 3 | name: R-CMD-check 4 | 5 | jobs: 6 | R-CMD-check: 7 | runs-on: ${{ matrix.config.os }} 8 | 9 | name: ${{ matrix.config.os }} (${{ matrix.config.r }}) 10 | 11 | strategy: 12 | fail-fast: false 13 | matrix: 14 | config: 15 | - { os: windows-latest, r: '4.0', args: "--no-manual"} 16 | - { os: ubuntu-18.04, r: '4.0', cran: "https://demo.rstudiopm.com/all/__linux__/bionic/latest", args: "--no-manual" } 17 | 18 | env: 19 | R_REMOTES_NO_ERRORS_FROM_WARNINGS: true 20 | CRAN: ${{ matrix.config.cran }} 21 | 22 | steps: 23 | - uses: actions/checkout@v1 24 | 25 | - uses: r-lib/actions/setup-r@master 26 | with: 27 | r-version: ${{ matrix.config.r }} 28 | 29 | - uses: r-lib/actions/setup-pandoc@master 30 | 31 | - uses: r-lib/actions/setup-tinytex@master 32 | if: contains(matrix.config.args, 'no-manual') == false 33 | 34 | - name: Ensure that tex has all the correct packages 35 | if: contains(matrix.config.args, 'no-manual') == false 36 | run: | 37 | tlmgr install cm-super 38 | 39 | - name: Install system dependencies 40 | if: runner.os == 'Linux' 41 | run: | 42 | sudo apt-get update -y 43 | sudo apt-get -y install default-jdk libv8-dev libgdal-dev libgeos-dev libjq-dev libudunits2-dev 44 | 45 | - name: Cache R packages 46 | uses: actions/cache@v1 47 | with: 48 | path: ${{ env.R_LIBS_USER }} 49 | key: ${{ runner.os }}-r-${{ matrix.config.r }}-${{ hashFiles('DESCRIPTION') }} 50 | 51 | - name: Install R dependencies 52 | run: Rscript -e "install.packages(c('remotes', 'rcmdcheck', 'knitr'), type = 'source')" -e "remotes::install_deps(dependencies = TRUE);" 53 | 54 | - name: Check 55 | run: Rscript -e "rcmdcheck::rcmdcheck(args = '${{ matrix.config.args }}', error_on = 'warning', check_dir = 'check')" 56 | 57 | - name: Upload check results 58 | if: failure() 59 | uses: actions/upload-artifact@master 60 | with: 61 | name: ${{ runner.os }}-r${{ matrix.config.r }}-results 62 | path: check 63 | 64 | - name: Test coverage 65 | if: matrix.config.os == 'ubuntu-18.04' && matrix.config.r == '4.0' 66 | run: | 67 | Rscript -e 'remotes::install_github("r-lib/covr@gh-actions")' 68 | Rscript -e 'covr::codecov(token = "${{secrets.CODECOV_TOKEN}}")' 69 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: censo2017 2 | Title: Base de Datos de Facil Acceso del Censo 2017 de Chile 3 | (2017 Chilean Census Easy Access Database) 4 | Version: 0.6.1 5 | Authors@R: c( 6 | person(given = "Mauricio", 7 | family = "Vargas", 8 | role = c("aut", "cre"), 9 | email = "mavargas11@uc.cl", 10 | comment = c(ORCID = "0000-0003-1017-7574")), 11 | person(given = "Juan", 12 | family = "Correa", 13 | role = c("ctb")), 14 | person(given = "Maria Paula", 15 | family = "Caldas", 16 | role = c("rev"), 17 | comment = "rOpenSci"), 18 | person(given = "Frans", 19 | family = "van Dunné", 20 | role = c("rev"), 21 | comment = "rOpenSci"), 22 | person(given = "Melina", 23 | family = "Vidoni", 24 | role = c("rev"), 25 | comment = "rOpenSci"), 26 | person(given = "Constanza", 27 | family = "Manriquez", 28 | role = c("rev"), 29 | comment = "revision independiente de las vinietas"), 30 | person(family = "Instituto Nacional de Estadisticas (INE)", 31 | role = c("dtc") 32 | ) 33 | ) 34 | Description: Provee un acceso conveniente a mas de 17 millones de registros 35 | de la base de datos del Censo 2017. Los datos fueron importados desde 36 | el DVD oficial del INE usando el Convertidor REDATAM creado por Pablo De 37 | Grande. Esta paquete esta documentado intencionalmente en castellano 38 | asciificado para que funcione sin problema en diferentes plataformas. 39 | (Provides convenient access to more than 17 million records from the 40 | Chilean Census 2017 database. The datasets were imported from the official 41 | DVD provided by the Chilean National Bureau of Statistics by using the 42 | REDATAM converter created by Pablo De Grande and in addition it includes the 43 | maps accompanying these datasets.) 44 | URL: https://docs.ropensci.org/censo2017/ 45 | BugReports: https://github.com/ropensci/censo2017/issues/ 46 | License: CC0 47 | Language: es 48 | Encoding: UTF-8 49 | LazyData: true 50 | Roxygen: list(markdown = TRUE) 51 | RoxygenNote: 7.1.1 52 | Imports: 53 | DBI, 54 | duckdb, 55 | httr, 56 | tibble, 57 | purrr, 58 | cli, 59 | crayon, 60 | rstudioapi, 61 | tools 62 | Suggests: 63 | testthat, 64 | covr, 65 | knitr, 66 | dplyr, 67 | dbplyr, 68 | ggplot2, 69 | chilemapas 70 | Depends: 71 | R (>= 4.0) 72 | -------------------------------------------------------------------------------- /R/connection-pane.R: -------------------------------------------------------------------------------- 1 | sql_action <- function() { 2 | if (requireNamespace("rstudioapi", quietly = TRUE) && 3 | exists("documentNew", asNamespace("rstudioapi"))) { 4 | contents <- paste( 5 | "-- !preview conn=censo2017::censo_conectar()", 6 | "", 7 | "SELECT * FROM comunas", 8 | "", 9 | sep = "\n" 10 | ) 11 | 12 | rstudioapi::documentNew( 13 | text = contents, type = "sql", 14 | position = rstudioapi::document_position(2, 40), 15 | execute = FALSE 16 | ) 17 | } 18 | } 19 | 20 | censo_pane <- function() { 21 | observer <- getOption("connectionObserver") 22 | if (!is.null(observer) && interactive()) { 23 | observer$connectionOpened( 24 | type = "Censo2017", 25 | host = "censo2017", 26 | displayName = "Tablas Censo 2017", 27 | icon = system.file("img", "cl-logo.png", package = "censo2017"), 28 | connectCode = "censo2017::censo_pane()", 29 | disconnect = censo2017::censo_desconectar, 30 | listObjectTypes = function() { 31 | list( 32 | table = list(contains = "data") 33 | ) 34 | }, 35 | listObjects = function(type = "datasets") { 36 | tbls <- DBI::dbListTables(censo_conectar()) 37 | data.frame( 38 | name = tbls, 39 | type = rep("table", length(tbls)), 40 | stringsAsFactors = FALSE 41 | ) 42 | }, 43 | listColumns = function(table) { 44 | res <- DBI::dbGetQuery(censo_conectar(), 45 | paste("SELECT * FROM", table, "LIMIT 1")) 46 | data.frame( 47 | name = names(res), type = vapply(res, function(x) class(x)[1], 48 | character(1)), 49 | stringsAsFactors = FALSE 50 | ) 51 | }, 52 | previewObject = function(rowLimit, table) { 53 | DBI::dbGetQuery(censo_conectar(), 54 | paste("SELECT * FROM", table, "LIMIT", rowLimit)) 55 | }, 56 | actions = list( 57 | Status = list( 58 | icon = system.file("img", "ropensci-logo.png", package = "censo2017"), 59 | callback = censo_status 60 | ), 61 | SQL = list( 62 | icon = system.file("img", "edit-sql.png", package = "censo2017"), 63 | callback = sql_action 64 | ) 65 | ), 66 | connectionObject = censo_conectar() 67 | ) 68 | } 69 | } 70 | 71 | update_censo_pane <- function() { 72 | observer <- getOption("connectionObserver") 73 | if (!is.null(observer)) { 74 | observer$connectionUpdated("Censo2017", "censo2017", "") 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /R/download.R: -------------------------------------------------------------------------------- 1 | #' Descarga la Base de Datos del Censo a tu Computador 2 | #' 3 | #' Este comando descarga la base de datos completa como un unico archivo zip que 4 | #' se descomprime para crear la base de datos local. Si no quieres descargar la 5 | #' base de datos en tu home, ejecuta usethis::edit_r_environ() para crear la 6 | #' variable de entorno CENSO2017_DIR con la ruta. 7 | #' 8 | #' @param ver La version a descargar. Por defecto es la ultima version 9 | #' disponible en GitHub. Se pueden ver todas las versiones en 10 | #' . 11 | #' 12 | #' @return NULL 13 | #' @export 14 | #' 15 | #' @examples 16 | #' \dontrun{ censo_descargar() } 17 | censo_descargar <- function(ver = NULL) { 18 | duckdb_version <- utils::packageVersion("duckdb") 19 | db_pattern <- paste0("v", gsub("\\.", "", duckdb_version), ".sql$") 20 | 21 | duckdb_current_files <- list.files(censo_path(), db_pattern, full.names = T) 22 | 23 | if (length(duckdb_current_files) > 0 && 24 | # avoid listing initial empty duckdb files 25 | all(file.size(duckdb_current_files) > 5000000000)) { 26 | msg("Ya existe una base del censo para tu version de DuckDB.") 27 | msg("Si realmente quieres descargar la base nuevamente, ejecuta censo_eliminar() y luego descarga.") 28 | return(invisible()) 29 | } 30 | 31 | msg("Descargando la base de datos desde GitHub...") 32 | 33 | destdir <- tempdir() 34 | dir <- censo_path() 35 | 36 | suppressWarnings(try(dir.create(dir, recursive = TRUE))) 37 | 38 | zfile <- get_gh_release_file("ropensci/censo2017", 39 | tag_name = ver, 40 | dir = destdir 41 | ) 42 | ver <- attr(zfile, "ver") 43 | 44 | suppressWarnings(try(censo_desconectar())) 45 | 46 | msg("Borrando las versiones antiguas de la base que pudiera haber...\n") 47 | censo_eliminar(preguntar = FALSE) 48 | 49 | msg("Descomprimiendo los archivos necesarios...") 50 | utils::unzip(zfile, overwrite = TRUE, exdir = destdir) 51 | unlink(zfile) 52 | 53 | finp_tsv <- list.files(destdir, full.names = TRUE, pattern = "tsv") 54 | 55 | invisible(create_schema()) 56 | 57 | for (x in seq_along(finp_tsv)) { 58 | 59 | tout <- gsub(".*/", "", gsub("\\.tsv", "", finp_tsv[x])) 60 | 61 | msg(sprintf("Creando tabla %s ...", tout)) 62 | 63 | con <- censo_conectar() 64 | 65 | suppressMessages( 66 | DBI::dbExecute( 67 | con, 68 | paste0( 69 | "COPY ", tout, " FROM '", 70 | finp_tsv[x], 71 | "' ( DELIMITER '\t', HEADER 1, NULL 'NA' )" 72 | ) 73 | ) 74 | ) 75 | 76 | DBI::dbDisconnect(con, shutdown = TRUE) 77 | 78 | unlink(finp_tsv[x]) 79 | invisible(gc()) 80 | } 81 | 82 | metadatos <- data.frame(version_duckdb = utils::packageVersion("duckdb"), 83 | fecha_modificacion = Sys.time()) 84 | metadatos$version_duckdb <- as.character(metadatos$version_duckdb) 85 | metadatos$fecha_modificacion <- as.character(metadatos$fecha_modificacion) 86 | 87 | con <- censo_conectar() 88 | suppressMessages(DBI::dbWriteTable(con, "metadatos", metadatos, append = T, temporary = F)) 89 | DBI::dbDisconnect(con, shutdown = TRUE) 90 | 91 | update_censo_pane() 92 | censo_pane() 93 | censo_status() 94 | } 95 | 96 | #' Descarga los archivos tsv/shp desde GitHub 97 | #' @noRd 98 | get_gh_release_file <- function(repo, tag_name = NULL, dir = tempdir(), 99 | overwrite = TRUE) { 100 | releases <- httr::GET( 101 | paste0("https://api.github.com/repos/", repo, "/releases") 102 | ) 103 | httr::stop_for_status(releases, "buscando versiones") 104 | 105 | releases <- httr::content(releases) 106 | 107 | if (is.null(tag_name)) { 108 | release_obj <- releases[1] 109 | } else { 110 | release_obj <- purrr::keep(releases, function(x) x$tag_name == tag_name) 111 | } 112 | 113 | if (!length(release_obj)) stop("No se encuenta una version disponible \"", 114 | tag_name, "\"") 115 | 116 | if (release_obj[[1]]$prerelease) { 117 | msg("Estos datos aun no se han validado.") 118 | } 119 | 120 | download_url <- release_obj[[1]]$assets[[1]]$url 121 | filename <- basename(release_obj[[1]]$assets[[1]]$browser_download_url) 122 | out_path <- normalizePath(file.path(dir, filename), mustWork = FALSE) 123 | response <- httr::GET( 124 | download_url, 125 | httr::accept("application/octet-stream"), 126 | httr::write_disk(path = out_path, overwrite = overwrite), 127 | httr::progress() 128 | ) 129 | httr::stop_for_status(response, "downloading data") 130 | 131 | attr(out_path, "ver") <- release_obj[[1]]$tag_name 132 | return(out_path) 133 | } 134 | 135 | -------------------------------------------------------------------------------- /R/connect.R: -------------------------------------------------------------------------------- 1 | censo_path <- function() { 2 | sys_censo_path <- Sys.getenv("CENSO2017_DIR") 3 | sys_censo_path <- gsub("\\\\", "/", sys_censo_path) 4 | if (sys_censo_path == "") { 5 | return(gsub("\\\\", "/", tools::R_user_dir("censo2017"))) 6 | } else { 7 | return(gsub("\\\\", "/", sys_censo_path)) 8 | } 9 | } 10 | 11 | censo_check_status <- function() { 12 | if (!censo_status(FALSE)) { 13 | stop("La base de datos local del Censo 2017 esta vacia o daniada. 14 | Descargala con censo_descargar().") 15 | } 16 | } 17 | 18 | #' Conexion a la Base de Datos del Censo 19 | #' 20 | #' Devuelve una conexion a la base de datos local. Esto corresponde a una 21 | #' conexion a una base DuckDB compatible con DBI. A diferencia de 22 | #' [censo2017::censo_tabla()], esta funcion es mas flexible y se puede usar con 23 | #' dbplyr para leer unicamente lo que se necesita o directamente con DBI para 24 | #' usar comandos SQL. 25 | #' 26 | #' @param dir La ubicacion de la base de datos en el disco. Por defecto es 27 | #' `censo2017` en la carpeta de datos del usuario de R o la variable de entorno 28 | #' `CENSO2017_DIR` si el usuario la especifica. 29 | #' 30 | #' @export 31 | #' 32 | #' @examples 33 | #' \dontrun{ 34 | #' DBI::dbListTables(censo_conectar()) 35 | #' 36 | #' DBI::dbGetQuery( 37 | #' censo_conectar(), 38 | #' 'SELECT * FROM comunas WHERE provincia_ref_id = 1' 39 | #' ) 40 | #' } 41 | censo_conectar <- function(dir = censo_path()) { 42 | duckdb_version <- utils::packageVersion("duckdb") 43 | db_file <- paste0(dir, "/censo2017_duckdb_v", gsub("\\.", "", duckdb_version), ".sql") 44 | 45 | db <- mget("censo_conectar", envir = censo_cache, ifnotfound = NA)[[1]] 46 | 47 | if (inherits(db, "DBIConnection")) { 48 | if (DBI::dbIsValid(db)) { 49 | return(db) 50 | } 51 | } 52 | 53 | try(dir.create(dir, showWarnings = FALSE, recursive = TRUE)) 54 | 55 | drv <- duckdb::duckdb(db_file, read_only = FALSE) 56 | 57 | tryCatch({ 58 | con <- DBI::dbConnect(drv) 59 | }, 60 | error = function(e) { 61 | if (grepl("Failed to open database", e)) { 62 | stop( 63 | "La base de datos local del Censo esta siendo usada por otro proceso. 64 | Intenta cerrar otras sesiones de R o desconectar la base usando 65 | censo_desconectar() en las demas sesiones.", 66 | call. = FALSE 67 | ) 68 | } else { 69 | stop(e) 70 | } 71 | }, 72 | finally = NULL 73 | ) 74 | 75 | assign("censo_conectar", con, envir = censo_cache) 76 | con 77 | } 78 | 79 | #' Tablas Completas de la Base de Datos del Censo 80 | #' 81 | #' Devuelve una tabla completa de la base de datos. Para entregar datos 82 | #' filtrados previamente se debe usar [censo2017::censo_conectar()]. 83 | #' 84 | #' @param tabla Una cadena de texto indicando la tabla a extraer 85 | #' @return Un tibble 86 | #' @export 87 | #' 88 | #' @examples 89 | #' \dontrun{ censo_tabla("comunas") } 90 | censo_tabla <- function(tabla) { 91 | df <- tryCatch( 92 | tibble::as_tibble(DBI::dbReadTable(censo_conectar(), tabla)), 93 | error = function(e) { read_table_error(e) } 94 | ) 95 | return(df) 96 | } 97 | 98 | #' Desconecta la Base de Datos del Censo 99 | #' 100 | #' Una funcion auxiliar para desconectarse de la base de datos. 101 | #' 102 | #' @examples 103 | #' censo_desconectar() 104 | #' @export 105 | #' 106 | censo_desconectar <- function() { 107 | censo_disconnect_() 108 | } 109 | 110 | censo_disconnect_ <- function(environment = censo_cache) { 111 | db <- mget("censo_conectar", envir = censo_cache, ifnotfound = NA)[[1]] 112 | if (inherits(db, "DBIConnection")) { 113 | DBI::dbDisconnect(db, shutdown = TRUE) 114 | } 115 | observer <- getOption("connectionObserver") 116 | if (!is.null(observer)) { 117 | observer$connectionClosed("Censo2017", "censo2017") 118 | } 119 | } 120 | 121 | censo_status <- function(msg = TRUE) { 122 | expected_tables <- sort(censo_tables()) 123 | existing_tables <- sort(DBI::dbListTables(censo_conectar())) 124 | 125 | if (isTRUE(all.equal(expected_tables, existing_tables))) { 126 | status_msg <- crayon::green(paste(cli::symbol$tick, 127 | "La base de datos local del Censo 2017 esta OK.")) 128 | out <- TRUE 129 | } else { 130 | status_msg <- crayon::red(paste(cli::symbol$cross, 131 | "La base de datos local del Censo 2017 esta vacia, daniada o no es compatible con tu version de duckdb. Descargala con censo_descargar().")) 132 | out <- FALSE 133 | } 134 | if (msg) msg(status_msg) 135 | invisible(out) 136 | } 137 | 138 | censo_tables <- function() { 139 | c("comunas", "hogares", "personas", "provincias", 140 | "regiones", "viviendas", "zonas", 141 | "variables", "variables_codificacion", "metadatos") 142 | } 143 | 144 | censo_cache <- new.env() 145 | reg.finalizer(censo_cache, censo_disconnect_, onexit = TRUE) 146 | -------------------------------------------------------------------------------- /R/schema.R: -------------------------------------------------------------------------------- 1 | #' Crea el esquema SQL 2 | #' @noRd 3 | create_schema <- function() { 4 | con <- censo_conectar() 5 | 6 | # comunas ---- 7 | 8 | DBI::dbSendQuery(con, "DROP TABLE IF EXISTS comunas") 9 | 10 | DBI::dbSendQuery( 11 | con, 12 | "CREATE TABLE comunas ( 13 | comuna_ref_id INTEGER NOT NULL, 14 | provincia_ref_id INTEGER NULL, 15 | idcomuna VARCHAR NULL, 16 | redcoden VARCHAR(5) NOT NULL, 17 | nom_comuna VARCHAR NULL)" 18 | ) 19 | 20 | # hogares ---- 21 | 22 | DBI::dbSendQuery(con, "DROP TABLE IF EXISTS hogares") 23 | 24 | DBI::dbSendQuery( 25 | con, 26 | "CREATE TABLE hogares ( 27 | hogar_ref_id INTEGER NOT NULL, 28 | vivienda_ref_id INTEGER NULL, 29 | nhogar INTEGER NULL, 30 | tipo_hogar INTEGER NULL, 31 | ncu_yern_nuer INTEGER NULL, 32 | n_herm_cun INTEGER NULL, 33 | nuc_herm_cun INTEGER NULL, 34 | num_sueg_pad_abu INTEGER NULL, 35 | nuc_pad_sueg_abu INTEGER NULL, 36 | num_otros INTEGER NULL, 37 | nuc_otros INTEGER NULL, 38 | num_no_par INTEGER NULL, 39 | nuc_no_par INTEGER NULL, 40 | tot_nucleos INTEGER NULL)" 41 | ) 42 | 43 | # personas ---- 44 | 45 | DBI::dbSendQuery(con, "DROP TABLE IF EXISTS personas") 46 | 47 | DBI::dbSendQuery( 48 | con, 49 | "CREATE TABLE personas ( 50 | persona_ref_id DOUBLE NULL, 51 | hogar_ref_id INTEGER NULL, 52 | personan INTEGER NULL, 53 | p07 INTEGER NULL, 54 | p08 INTEGER NULL, 55 | p09 INTEGER NULL, 56 | p10 INTEGER NULL, 57 | p10comuna INTEGER NULL, 58 | p10pais INTEGER NULL, 59 | p10pais_grupo INTEGER NULL, 60 | p11 INTEGER NULL, 61 | p11comuna INTEGER NULL, 62 | p11pais INTEGER NULL, 63 | p11pais_grupo INTEGER NULL, 64 | p12 INTEGER NULL, 65 | p12comuna INTEGER NULL, 66 | p12pais INTEGER NULL, 67 | p12pais_grupo INTEGER NULL, 68 | p12a_llegada INTEGER NULL, 69 | p12a_tramo INTEGER NULL, 70 | p13 INTEGER NULL, 71 | p14 INTEGER NULL, 72 | p15 INTEGER NULL, 73 | p15a INTEGER NULL, 74 | p16 INTEGER NULL, 75 | p16a INTEGER NULL, 76 | p16a_otro INTEGER NULL, 77 | p16a_grupo INTEGER NULL, 78 | p17 INTEGER NULL, 79 | p18 VARCHAR NULL, 80 | p19 INTEGER NULL, 81 | p20 INTEGER NULL, 82 | p21m INTEGER NULL, 83 | p21a INTEGER NULL, 84 | escolaridad INTEGER NULL, 85 | rec_parentesco INTEGER NULL)" 86 | ) 87 | 88 | # provincias ---- 89 | 90 | DBI::dbSendQuery(con, "DROP TABLE IF EXISTS provincias") 91 | 92 | DBI::dbSendQuery( 93 | con, 94 | "CREATE TABLE provincias ( 95 | provincia_ref_id INTEGER NULL, 96 | region_ref_id INTEGER NULL, 97 | idprovincia INTEGER NULL, 98 | redcoden VARCHAR(3) NOT NULL, 99 | nom_provincia VARCHAR NULL)" 100 | ) 101 | 102 | # regiones ---- 103 | 104 | DBI::dbSendQuery(con, "DROP TABLE IF EXISTS regiones") 105 | 106 | DBI::dbSendQuery( 107 | con, 108 | "CREATE TABLE regiones ( 109 | region_ref_id INTEGER NOT NULL, 110 | censo_ref_id INTEGER NULL, 111 | idregion VARCHAR NULL, 112 | redcoden VARCHAR(2) NOT NULL, 113 | nom_region VARCHAR NULL)" 114 | ) 115 | 116 | # viviendas ---- 117 | 118 | DBI::dbSendQuery(con, "DROP TABLE IF EXISTS viviendas") 119 | 120 | DBI::dbSendQuery( 121 | con, 122 | "CREATE TABLE viviendas ( 123 | vivienda_ref_id INTEGER NOT NULL, 124 | zonaloc_ref_id INTEGER NULL, 125 | nviv INTEGER NULL, 126 | p01 INTEGER NULL, 127 | p02 INTEGER NULL, 128 | p03a INTEGER NULL, 129 | p03b INTEGER NULL, 130 | p03c INTEGER NULL, 131 | p04 INTEGER NULL, 132 | p05 INTEGER NULL, 133 | cant_hog INTEGER NULL, 134 | cant_per INTEGER NULL, 135 | ind_hacin DOUBLE NULL, 136 | ind_hacin_rec INTEGER NULL, 137 | ind_material INTEGER NULL)" 138 | ) 139 | 140 | # metadatos ---- 141 | 142 | DBI::dbSendQuery(con, "DROP TABLE IF EXISTS metadatos") 143 | 144 | DBI::dbSendQuery( 145 | con, 146 | "CREATE TABLE metadatos ( 147 | version_duckdb VARCHAR NOT NULL, 148 | fecha_modificacion VARCHAR NOT NULL)" 149 | ) 150 | 151 | # variables ---- 152 | 153 | DBI::dbSendQuery(con, "DROP TABLE IF EXISTS variables") 154 | 155 | DBI::dbSendQuery( 156 | con, 157 | "CREATE TABLE variables ( 158 | tabla VARCHAR NULL, 159 | variable VARCHAR NULL, 160 | descripcion VARCHAR NULL, 161 | tipo VARCHAR NULL, 162 | rango VARCHAR NULL)" 163 | ) 164 | 165 | DBI::dbSendQuery(con, "DROP TABLE IF EXISTS variables_codificacion") 166 | 167 | DBI::dbSendQuery( 168 | con, 169 | "CREATE TABLE variables_codificacion ( 170 | tabla VARCHAR NULL, 171 | variable VARCHAR NULL, 172 | valor INTEGER NULL, 173 | descripcion VARCHAR NULL)" 174 | ) 175 | 176 | # zonas ---- 177 | 178 | DBI::dbSendQuery(con, "DROP TABLE IF EXISTS zonas") 179 | 180 | DBI::dbSendQuery( 181 | con, 182 | "CREATE TABLE zonas ( 183 | zonaloc_ref_id INTEGER NOT NULL, 184 | geocodigo VARCHAR NOT NULL, 185 | observacion VARCHAR NULL)" 186 | ) 187 | 188 | # indexes ---- 189 | 190 | DBI::dbSendQuery(con, "CREATE UNIQUE INDEX hogares_hogar_ref_id ON hogares (hogar_ref_id)") 191 | DBI::dbSendQuery(con, "CREATE UNIQUE INDEX viviendas_vivienda_ref_id ON viviendas (vivienda_ref_id)") 192 | 193 | DBI::dbSendQuery(con, "CREATE UNIQUE INDEX zonas_zonaloc_ref_id ON zonas (zonaloc_ref_id)") 194 | DBI::dbSendQuery(con, "CREATE UNIQUE INDEX zonas_geocodigo ON zonas (geocodigo)") 195 | 196 | # disconnect ---- 197 | 198 | DBI::dbDisconnect(con, shutdown = TRUE) 199 | gc() 200 | } 201 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | ## creative commons 2 | 3 | # CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED HEREUNDER. 6 | 7 | ### Statement of Purpose 8 | 9 | The laws of most jurisdictions throughout the world automatically confer exclusive Copyright and Related Rights (defined below) upon the creator and subsequent owner(s) (each and all, an "owner") of an original work of authorship and/or a database (each, a "Work"). 10 | 11 | Certain owners wish to permanently relinquish those rights to a Work for the purpose of contributing to a commons of creative, cultural and scientific works ("Commons") that the public can reliably and without fear of later claims of infringement build upon, modify, incorporate in other works, reuse and redistribute as freely as possible in any form whatsoever and for any purposes, including without limitation commercial purposes. These owners may contribute to the Commons to promote the ideal of a free culture and the further production of creative, cultural and scientific works, or to gain reputation or greater distribution for their Work in part through the use and efforts of others. 12 | 13 | For these and/or other purposes and motivations, and without any expectation of additional consideration or compensation, the person associating CC0 with a Work (the "Affirmer"), to the extent that he or she is an owner of Copyright and Related Rights in the Work, voluntarily elects to apply CC0 to the Work and publicly distribute the Work under its terms, with knowledge of his or her Copyright and Related Rights in the Work and the meaning and intended legal effect of CC0 on those rights. 14 | 15 | 1. __Copyright and Related Rights.__ A Work made available under CC0 may be protected by copyright and related or neighboring rights ("Copyright and Related Rights"). Copyright and Related Rights include, but are not limited to, the following: 16 | 17 | i. the right to reproduce, adapt, distribute, perform, display, communicate, and translate a Work; 18 | 19 | ii. moral rights retained by the original author(s) and/or performer(s); 20 | 21 | iii. publicity and privacy rights pertaining to a person's image or likeness depicted in a Work; 22 | 23 | iv. rights protecting against unfair competition in regards to a Work, subject to the limitations in paragraph 4(a), below; 24 | 25 | v. rights protecting the extraction, dissemination, use and reuse of data in a Work; 26 | 27 | vi. database rights (such as those arising under Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, and under any national implementation thereof, including any amended or successor version of such directive); and 28 | 29 | vii. other similar, equivalent or corresponding rights throughout the world based on applicable law or treaty, and any national implementations thereof. 30 | 31 | 2. __Waiver.__ To the greatest extent permitted by, but not in contravention of, applicable law, Affirmer hereby overtly, fully, permanently, irrevocably and unconditionally waives, abandons, and surrenders all of Affirmer's Copyright and Related Rights and associated claims and causes of action, whether now known or unknown (including existing as well as future claims and causes of action), in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each member of the public at large and to the detriment of Affirmer's heirs and successors, fully intending that such Waiver shall not be subject to revocation, rescission, cancellation, termination, or any other legal or equitable action to disrupt the quiet enjoyment of the Work by the public as contemplated by Affirmer's express Statement of Purpose. 32 | 33 | 3. __Public License Fallback.__ Should any part of the Waiver for any reason be judged legally invalid or ineffective under applicable law, then the Waiver shall be preserved to the maximum extent permitted taking into account Affirmer's express Statement of Purpose. In addition, to the extent the Waiver is so judged Affirmer hereby grants to each affected person a royalty-free, non transferable, non sublicensable, non exclusive, irrevocable and unconditional license to exercise Affirmer's Copyright and Related Rights in the Work (i) in all territories worldwide, (ii) for the maximum duration provided by applicable law or treaty (including future time extensions), (iii) in any current or future medium and for any number of copies, and (iv) for any purpose whatsoever, including without limitation commercial, advertising or promotional purposes (the "License"). The License shall be deemed effective as of the date CC0 was applied by Affirmer to the Work. Should any part of the License for any reason be judged legally invalid or ineffective under applicable law, such partial invalidity or ineffectiveness shall not invalidate the remainder of the License, and in such case Affirmer hereby affirms that he or she will not (i) exercise any of his or her remaining Copyright and Related Rights in the Work or (ii) assert any associated claims and causes of action with respect to the Work, in either case contrary to Affirmer's express Statement of Purpose. 34 | 35 | 4. __Limitations and Disclaimers.__ 36 | 37 | a. No trademark or patent rights held by Affirmer are waived, abandoned, surrendered, licensed or otherwise affected by this document. 38 | 39 | b. Affirmer offers the Work as-is and makes no representations or warranties of any kind concerning the Work, express, implied, statutory or otherwise, including without limitation warranties of title, merchantability, fitness for a particular purpose, non infringement, or the absence of latent or other defects, accuracy, or the present or absence of errors, whether or not discoverable, all to the greatest extent permissible under applicable law. 40 | 41 | c. Affirmer disclaims responsibility for clearing rights of other persons that may apply to the Work or any use thereof, including without limitation any person's Copyright and Related Rights in the Work. Further, Affirmer disclaims responsibility for obtaining any necessary consents, permissions or other rights required for any use of the Work. 42 | 43 | d. Affirmer understands and acknowledges that Creative Commons is not a party to this document and has no duty or obligation with respect to this CC0 or use of the Work. 44 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, include = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "man/figures/README-", 12 | out.width = "100%" 13 | ) 14 | ``` 15 | 16 | # Censo 2017 (Paquete R) 17 | 18 | 19 | [![Project Status: Active – The project has reached a stable, usable state and is being actively developed.](https://lifecycle.r-lib.org/articles/figures/lifecycle-stable.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable-1) 20 | [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#stable) 21 | [![GH-actions](https://github.com/ropensci/censo2017/workflows/R-CMD-check/badge.svg)](https://github.com/ropensci/censo2017/actions) 22 | [![codecov](https://codecov.io/gh/ropensci/censo2017/branch/main/graph/badge.svg?token=XI59cmGd15)](https://codecov.io/gh/ropensci/censo2017) 23 | [![CRAN status](https://www.r-pkg.org/badges/version/censo2017)](https://CRAN.R-project.org/package=censo2017) 24 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4277761.svg)](https://doi.org/10.5281/zenodo.4277761) 25 | [![Buy Me a Coffee](https://img.shields.io/badge/buymeacoffee-pacha-yellow)](https://www.buymeacoffee.com/pacha?via=github) 26 | [![Status at rOpenSci Software Peer Review](https://badges.ropensci.org/414_status.svg)](https://github.com/ropensci/software-review/issues/414) 27 | 28 | 29 | # Acerca de 30 | 31 | Provee un acceso conveniente a mas de 17 millones de registros de la base de datos del Censo 2017. Los datos fueron importados desde el DVD oficial del INE usando el [Convertidor REDATAM](https://github.com/discontinuos/redatam-converter/) creado por Pablo De Grande y ademas se proporcionan los mapas que acompanian a estos datos. Estos mismos datos en DVD posteriormente quedaron disponibles en las [Bases de Datos del INE](https://www.ine.cl/estadisticas/sociales/censos-de-poblacion-y-vivienda/poblacion-y-vivienda). 32 | 33 | Despues de la primera llamada a `library(censo2017)` se le pedira al usuario que descargue la base usando `censo_descargar_base()` y se puede modificar la ruta de descarga con la variable de entorno `CENSO2017_DIR`. La variable de entorno se puede crear con `usethis::edit_r_environ()`. 34 | 35 | La documentacion esta disponible en https://docs.ropensci.org/censo2017/. 36 | 37 | # Publico objetivo 38 | 39 | Estudiantes, academicos e investigadores que necesiten un acceso conveniente a datos censales directamente en R o RStudio. 40 | 41 | # Requerimientos de instalacion 42 | 43 | Este paquete necesita 3.5 GB libres para la crear la base de datos localmente. 44 | 45 | # Instalacion 46 | 47 | Version estable 48 | ``` 49 | install.packages("censo2017") 50 | ``` 51 | 52 | Version de desarrollo 53 | ``` 54 | # install.packages("remotes") 55 | remotes::install_github("ropensci/censo2017") 56 | ``` 57 | 58 | # Valor agregado sobre los archivos SHP y REDATAM del INE 59 | 60 | Esta version de la base de datos del Censo 2017 presenta algunas diferencias respecto de la original que se obtiene en DVD y corresponde a una version DuckDB derivada a partir de los Microdatos del Censo 2017 en formato DVD. 61 | 62 | La modificacion sobre los archivos originales, que incluyen geometrias detalladas disponibles en [Cartografias Censo2017](https://github.com/ropensci/censo2017-cartografias), consistio en unir todos los archivos SHP regionales en una unica tabla por nivel (e.g en lugar de proveer `R01_mapa_comunas`, ..., `R15_mapa_comunas` combine las 15 regiones en una unica tabla `mapa_comunas`). 63 | 64 | Los cambios concretos respecto de la base original son los siguientes: 65 | 66 | * Nombres de columna en formato "tidy" (e.g. `comuna_ref_id` en lugar de `COMUNA_REF_ID`). 67 | * Agregue los nombres de las unidades geograficas (e.g. se incluye `nom_comuna` en la tabla `comunas` para facilitar los filtros). 68 | * Aniadi la variable `geocodigo` a la tabla de `zonas`. Esto facilita mucho las uniones con las tablas de mapas en SQL. 69 | * Tambien inclui las observaciones 16054 to 16060 en la variable `zonaloc_ref_id`. Esto se debio a que era necesario para crear una llave foranea desde la tabla `mapa_zonas` (ver repositorio [Cartografias Censo2017](https://github.com/ropensci/censo2017-cartografias)) y vincular el `geocodigo` (no todas las zonas del mapa estan presentes en los datos del Censo). 70 | 71 | Ademas de los datos del Censo, inclui la descripcion de las variables en formato tabla (y no en XML como se obtiene del DVD). La ventaja de esto es poder consultar rapidamente lo que significan los codigos de variables y su etiquetado, por ejemplo: 72 | ```{r message=FALSE, warning=FALSE} 73 | # con la bbdd instalada 74 | library(censo2017) 75 | library(dplyr) 76 | 77 | censo_tabla("variables") %>% 78 | filter(variable == "p01") 79 | 80 | censo_tabla("variables_codificacion") %>% 81 | filter(variable == "p01") 82 | ``` 83 | 84 | # Relacion de Censo 2017 con Chilemapas 85 | 86 | Todos los datos de estos repositorios contemplan 15 regiones pues los archivos del Censo se entregan de esta forma y este paquete esta 100% orientado a facilitar el acceso a datos. 87 | 88 | Por su parte, [chilemapas](https://docs.ropensci.org/censo2017) se centra unicamente en los mapas y tambien usa las cartografias del DVD del Censo para entregar mapas simplificados (de menor detalle y mas livianos). Chilemapas cuenta con una transformacion de codigos para dar cuenta de la creacion de la Region de Niuble. 89 | 90 | En resumen, censo2017 permite construir estadisticas demograficas y chilemapas ayuda a mostrarlas en un mapa usando ggplot2 (u otro paquete como tmap). 91 | 92 | # Cita este trabajo 93 | 94 | Si usas `censo2017` en trabajos academicos u otro tipo de publicacion por favor usa la siguiente cita: 95 | 96 | ``` 97 | Mauricio Vargas (2020). censo2017: Base de Datos de Facil Acceso del Censo 98 | 2017 de Chile (2017 Chilean Census Easy Access Database). R package version 99 | 0.1. https://docs.ropensci.org/censo2017/ 100 | ``` 101 | 102 | Entrada para BibTeX: 103 | 104 | ``` 105 | @Manual{, 106 | title = {censo2017: Base de Datos de F\'acil Acceso del Censo 2017 de Chile 107 | (2017 Chilean Census Easy Access Database)}, 108 | author = {Mauricio Vargas}, 109 | year = {2020}, 110 | note = {R package version 0.1}, 111 | url = {https://docs.ropensci.org/censo2017/}, 112 | doi = {10.5281/zenodo.4277761} 113 | } 114 | ``` 115 | 116 | # Contribuciones 117 | 118 | Para contribuir a este proyecto debes estar de acuerdo con el [Codigo de Conducta de rOpenSci](https://ropensci.org/code-of-conduct/). Me es util contar con mas ejemplos, mejoras a las funciones y todo lo que ayude a la comunidad. Si tienes algo que aportar me puedes dejar un issue o pull request. 119 | 120 | # Agradecimientos 121 | 122 | Muchas gracias a Juan Correa por su asesoria como geografo experto. 123 | 124 | # Aportes 125 | 126 | Si quieres donar para aportar al desarrollo de este y mas paquetes Open Source, puedes hacerlo en [Buy Me a Coffee](https://www.buymeacoffee.com/pacha/). 127 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # Censo 2017 (Paquete R) 5 | 6 | 7 | 8 | [![Project Status: Active – The project has reached a stable, usable 9 | state and is being actively 10 | developed.](https://lifecycle.r-lib.org/articles/figures/lifecycle-stable.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable-1) 11 | [![Lifecycle: 12 | stable](https://img.shields.io/badge/lifecycle-maturing-blue.svg)](https://www.tidyverse.org/lifecycle/#stable) 13 | [![GH-actions](https://github.com/ropensci/censo2017/workflows/R-CMD-check/badge.svg)](https://github.com/ropensci/censo2017/actions) 14 | [![codecov](https://codecov.io/gh/ropensci/censo2017/branch/main/graph/badge.svg?token=XI59cmGd15)](https://codecov.io/gh/ropensci/censo2017) 15 | [![CRAN 16 | status](https://www.r-pkg.org/badges/version/censo2017)](https://CRAN.R-project.org/package=censo2017) 17 | [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.4277761.svg)](https://doi.org/10.5281/zenodo.4277761) 18 | [![Buy Me a 19 | Coffee](https://img.shields.io/badge/buymeacoffee-pacha-yellow)](https://www.buymeacoffee.com/pacha?via=github) 20 | [![Status at rOpenSci Software Peer 21 | Review](https://badges.ropensci.org/414_status.svg)](https://github.com/ropensci/software-review/issues/414) 22 | 23 | 24 | # Acerca de 25 | 26 | Provee un acceso conveniente a mas de 17 millones de registros de la 27 | base de datos del Censo 2017. Los datos fueron importados desde el DVD 28 | oficial del INE usando el [Convertidor 29 | REDATAM](https://github.com/discontinuos/redatam-converter/) creado por 30 | Pablo De Grande y ademas se proporcionan los mapas que acompanian a 31 | estos datos. Estos mismos datos en DVD posteriormente quedaron 32 | disponibles en las [Bases de Datos del 33 | INE](https://www.ine.cl/estadisticas/sociales/censos-de-poblacion-y-vivienda/poblacion-y-vivienda). 34 | 35 | Despues de la primera llamada a `library(censo2017)` se le pedira al 36 | usuario que descargue la base usando `censo_descargar_base()` y se puede 37 | modificar la ruta de descarga con la variable de entorno 38 | `CENSO_BBDD_DIR`. La variable de entorno se puede crear con 39 | `usethis::edit_r_environ()`. 40 | 41 | La documentacion esta disponible en 42 | . 43 | 44 | # Publico objetivo 45 | 46 | Estudiantes, academicos e investigadores que necesiten un acceso 47 | conveniente a datos censales directamente en R o RStudio. 48 | 49 | # Requerimientos de instalacion 50 | 51 | Esta libreria necesita 3.5 GB libres para la crear la base de datos 52 | localmente. Una vez creada la base, esta ocupa 1.0 GB en disco. 53 | 54 | # Instalacion 55 | 56 | Version estable 57 | 58 | install.packages("censo2017") 59 | 60 | Version de desarrollo 61 | 62 | # install.packages("remotes") 63 | remotes::install_github("ropensci/censo2017") 64 | 65 | # Valor agregado sobre los archivos SHP y REDATAM del INE 66 | 67 | Esta version de la base de datos del Censo 2017 presenta algunas 68 | diferencias respecto de la original que se obtiene en DVD y corresponde 69 | a una version DuckDB derivada a partir de los Microdatos del Censo 2017 70 | en formato DVD. 71 | 72 | La modificacion sobre los archivos originales, que incluyen geometrias 73 | detalladas disponibles en [Cartografias 74 | Censo2017](https://github.com/ropensci/censo2017-cartografias), 75 | consistio en unir todos los archivos SHP regionales en una unica tabla 76 | por nivel (e.g en lugar de proveer `R01_mapa_comunas`, …, 77 | `R15_mapa_comunas` combine las 15 regiones en una unica tabla 78 | `mapa_comunas`). 79 | 80 | Los cambios concretos respecto de la base original son los siguientes: 81 | 82 | - Nombres de columna en formato “tidy” (e.g. `comuna_ref_id` en lugar 83 | de `COMUNA_REF_ID`). 84 | - Agregue los nombres de las unidades geograficas (e.g. se incluye 85 | `nom_comuna` en la tabla `comunas` para facilitar los filtros). 86 | - Aniadi la variable `geocodigo` a la tabla de `zonas`. Esto facilita 87 | mucho las uniones con las tablas de mapas en SQL. 88 | - Tambien inclui las observaciones 16054 to 16060 en la variable 89 | `zonaloc_ref_id`. Esto se debio a que era necesario para crear una 90 | llave foranea desde la tabla `mapa_zonas` (ver repositorio 91 | [Cartografias 92 | Censo2017](https://github.com/ropensci/censo2017-cartografias)) y 93 | vincular el `geocodigo` (no todas las zonas del mapa estan presentes 94 | en los datos del Censo). 95 | 96 | Ademas de los datos del Censo, inclui la descripcion de las variables en 97 | formato tabla (y no en XML como se obtiene del DVD). La ventaja de esto 98 | es poder consultar rapidamente lo que significan los codigos de 99 | variables y su etiquetado, por ejemplo como explico en la 100 | [historia del proyecto]([https://github.com/pachadotdev/censo2017/blob/main/vignettes/censo2017.Rmd](https://ropensci.org/blog/2021/07/27/censo2017-es/)). 101 | 102 | # Relacion de Censo 2017 con Chilemapas 103 | 104 | Todos los datos de estos repositorios contemplan 15 regiones pues los 105 | archivos del Censo se entregan de esta forma y este paquete esta 100% 106 | orientado a facilitar el acceso a datos. 107 | 108 | Por su parte, [chilemapas](https://docs.ropensci.org/censo2017) se 109 | centra unicamente en los mapas y tambien usa las cartografias del DVD 110 | del Censo para entregar mapas simplificados (de menor detalle y mas 111 | livianos). Chilemapas cuenta con una transformacion de codigos para dar 112 | cuenta de la creacion de la Region de Niuble. 113 | 114 | En resumen, censo2017 permite construir estadisticas demograficas y 115 | chilemapas ayuda a mostrarlas en un mapa usando ggplot2 (u otro paquete 116 | como tmap). 117 | 118 | # Cita este trabajo 119 | 120 | Si usas `censo2017` en trabajos academicos u otro tipo de publicacion 121 | por favor usa la siguiente cita: 122 | 123 | Mauricio Vargas (2020). censo2017: Base de Datos de Facil Acceso del Censo 124 | 2017 de Chile (2017 Chilean Census Easy Access Database). R package version 125 | 0.1. https://docs.ropensci.org/censo2017/ 126 | 127 | Entrada para BibTeX: 128 | 129 | @Manual{, 130 | title = {censo2017: Base de Datos de F\'acil Acceso del Censo 2017 de Chile 131 | (2017 Chilean Census Easy Access Database)}, 132 | author = {Mauricio Vargas}, 133 | year = {2020}, 134 | note = {R package version 0.1}, 135 | url = {https://docs.ropensci.org/censo2017/}, 136 | doi = {10.5281/zenodo.4277761} 137 | } 138 | 139 | # Contribuciones 140 | 141 | Para contribuir a este proyecto debes estar de acuerdo con el [Codigo de 142 | Conducta de rOpenSci](https://ropensci.org/code-of-conduct/). Me es util 143 | contar con mas ejemplos, mejoras a las funciones y todo lo que ayude a 144 | la comunidad. Si tienes algo que aportar me puedes dejar un issue o pull 145 | request. 146 | 147 | # Agradecimientos 148 | 149 | Muchas gracias a Juan Correa por su asesoria como geografo experto. 150 | 151 | # Aportes 152 | 153 | Si quieres donar para aportar al desarrollo de este y mas paquetes Open 154 | Source, puedes hacerlo en [Buy Me a 155 | Coffee](https://www.buymeacoffee.com/pacha/). 156 | -------------------------------------------------------------------------------- /vignettes/censo2017.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Uso basico del paquete censo2017" 3 | author: "Mauricio Vargas S." 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Uso basico del paquete censo2017} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | %\VignetteEncoding{UTF-8} 10 | --- 11 | 12 | ```{r setup, include = FALSE} 13 | knitr::opts_chunk$set( 14 | cache = FALSE, 15 | collapse = TRUE, 16 | eval = TRUE, 17 | comment = "#>" 18 | ) 19 | ``` 20 | 21 | # Introduccion 22 | 23 | Este paquete se integra perfectamente con el tidyverse, con el que daremos un ejemplo muy basico para mostrar sus principales funciones. 24 | 25 | # Aproximacion de la poblacion con el grado de doctor en la Region del Bio Bio. 26 | 27 | Se procedera a obtener una aproximacion usando dplyr ya que, puede haber personas que no sean de la comuna y aparezcan censadas en ella. Sin embargo, cabe aclarar que para esta ocasion no haremos el filtro que corrige esto. Nuestra idea es mantener el ejemplo lo mas simple posible. 28 | 29 | Primero que todo, se cargan los paquetes necesarios. 30 | 31 | • censo2017: Proporciona los datos censales para poder generar las tablas y graficos de este ejemplo, 32 | • dplyr: Facilta filtrar datos en una tabla, unir distintas tablas y en general todas las tareas de limpieza y transformacion de datos. 33 | * ggplot2: Nos permite graficar usando el concepto de "gramatica de graficos", es decir que podemos ir creando graficos incrementales y controlar los ejes, titulos y demas elementos por separado. 34 | • chilemapas: Nos entrega mapas terrestres con topologias simplificadas. Esto significa, que este paquete al contener poligonos que generan graficos, y no tablas, nos permite un uso mas sencillo para lo que queremos realizar, que es darle un complemento visual a la información cargada desde censo2017. 35 | 36 | ```{r, warning=FALSE, message=FALSE} 37 | library(censo2017) 38 | library(dplyr) 39 | library(ggplot2) 40 | library(chilemapas) 41 | ``` 42 | 43 | Hay que realizar algunos cruces de tablas, de manera de filtrar la region que nos interesa. 44 | 45 | Comenzamos con la tabla zonas: generamos la provincia a partir del geocodigo y luego filtro para unir hasta llegar a la tabla personas. Nos interesa utilizar la variable `p15`, cuya descripcion esta en la tabla `variables` y cuya codificacion aparece en la tabla `variables_codificacion`. 46 | 47 | ```{r, warning=FALSE, message=FALSE, eval=FALSE} 48 | nivel_educacional_biobio <- tbl(censo_conectar(), "zonas") %>% 49 | mutate( 50 | region = substr(as.character(geocodigo), 1, 2), 51 | comuna = substr(as.character(geocodigo), 1, 5) 52 | ) %>% 53 | filter(region == "08") %>% 54 | select(comuna, geocodigo, zonaloc_ref_id) %>% 55 | inner_join(select(tbl(censo_conectar(), "viviendas"), zonaloc_ref_id, vivienda_ref_id), by = "zonaloc_ref_id") %>% 56 | inner_join(select(tbl(censo_conectar(), "hogares"), vivienda_ref_id, hogar_ref_id), by = "vivienda_ref_id") %>% 57 | inner_join(select(tbl(censo_conectar(), "personas"), hogar_ref_id, nivel_educ = p15), by = "hogar_ref_id") %>% 58 | collect() 59 | ``` 60 | 61 | Con lo anterior, los niveles educacionales de las personas censadas se pueden agrupar por comuna y obtener la cuenta proporcionada en base a la suma total. 62 | ```{r, warning=FALSE, message=FALSE, eval=FALSE} 63 | nivel_educacional_biobio <- nivel_educacional_biobio %>% 64 | group_by(comuna, nivel_educ) %>% 65 | summarise(cuenta = n()) %>% 66 | group_by(comuna) %>% 67 | mutate(proporcion = cuenta / sum(cuenta)) 68 | ``` 69 | 70 | Vemos los datos antes de continuar. 71 | ```{r} 72 | nivel_educacional_biobio 73 | ``` 74 | 75 | Creamos la variable mapa_biobio haciendo un filtro para obtener unicamente los datos de la region con codigo "08" (region del Bio Bio) desde mapa_comunas. Luego de eso, haremos un left join (union de tablas manteniendo todas las filas de la tabla izquierda o inicial) desde la tabla chilemapas, donde obtendremos el mapa de la provincia, y la uniremos con los datos coincidentes segon el codigo_comuna de la tabla censo2017. 76 | ```{r, warning=FALSE, message=FALSE} 77 | mapa_biobio <- mapa_comunas %>% 78 | filter(codigo_region == "08") %>% 79 | left_join(nivel_educacional_biobio, by = c("codigo_comuna" = "comuna")) 80 | ``` 81 | 82 | Ahora que cargamos toda la informacion necesaria en R desde la base de datos, debemos cerrar la conexion SQL (importante). 83 | ```{r, warning=FALSE, message=FALSE} 84 | censo_desconectar() 85 | ``` 86 | 87 | Finalmente procedemos a generar el mapa. 88 | 89 | Primero, creamos la variable colors, en el que incluiremos los codigos hexadecimales de los colores que utilizaremos al momento de crear el mapa. 90 | 91 | Luego de hecho esto, utilizamos geom_sf del paquete ggplot2, que se usa para visualizar objetos de caracteristicas simples (sf = simple features). Geom_sf dibujara diferentes objetos geometricos a partir de una columna de tipo 'sf' que debe estar presente en los datos. 92 | 93 | Seleccionamos el codigo_comuna y geometry (que contiene los poligonos que componen cada region) desde el mapa_biobio, que creamos anteriormente. Volvemos a hacer un left_join ahora de mapa_biobio seleccionando unicamente codigo_comuna, nivel_educ y proporcion. 94 | ```{r, fig.width=10, warning=FALSE, message=FALSE} 95 | colors <- c("#DCA761","#C6C16D","#8B9C94","#628CA5","#5A6C7A") 96 | 97 | g <- ggplot() + 98 | geom_sf(data = mapa_biobio %>% 99 | select(codigo_comuna, geometry) %>% 100 | left_join( 101 | mapa_biobio %>% 102 | filter(nivel_educ == 14) %>% 103 | select(codigo_comuna, nivel_educ, proporcion), 104 | by = "codigo_comuna" 105 | ), 106 | aes(fill = proporcion, geometry = geometry), 107 | size = 0.1) + 108 | scale_fill_gradientn(colours = rev(colors), name = "Porcentaje") + 109 | labs(title = "Porcentaje de habitantes con el grado de doctor\npor comuna en la Region del Bio Bio") + 110 | theme_minimal(base_size = 13) 111 | 112 | g 113 | ``` 114 | 115 | Notas: 116 | 117 | * El uso de `tbl()` y `collect()` en la primera parte se podra entender mejor leyendo, por ejemplo, [A Crash Course on PostgreSQL for R Users](https://pacha.dev/blog/2020/08/09/a-crash-course-on-postgresql-for-r-users/). 118 | * En la segunda parte se usa `censo_tabla()` ya que SQL almacena la columna `geometry` (de tipo poligono) como cadena de texto mientras que R lee poligonos sin problema. 119 | * En la tercera parte hago un join entre el mapa completo y la tabla con quienes tienen el grado de doctor. Este paso, aunque pueda parecer redundante, es necesario si quiero mostrar las zonas con 0 doctores y si lo omito se borran algunas zonas del mapa. 120 | * El mapa que se genero usando las funciones de chilemapas podria haber generado con las cartografias oficiales del Censo (ver repositorio cartografias-censo2017. Esta alternativa entrega un mayor nivel de detalle, pero requiere mayor esfuerzo para leer las cartografias y el tiempo requerido para generar los mapas aumenta fuertemente. 121 | 122 | # Ejercicios para el usuario 123 | 124 | 1. Realizar un grafico similar al del ejemplo pero a nivel de zona censal. 125 | 2. Explorar la columna `p10` en la tabla `personas` y realizar un grafico que de cuenta de la poblacion efectiva de la comuna (e.g. mejorando el problema de personas que podrian no ser de la comuna en el ejemplo). 126 | 3. Agregar datos al mapa sin usar `chilemapas`. Una forma de hacerlo es la siguiente: 127 | 128 | ```{r, warning=FALSE, message=FALSE, eval=FALSE} 129 | mapa_biobio <- censo_tabla("mapa_comunas") %>% 130 | filter(region == "08") %>% 131 | left_join(nivel_educacional_biobio, by = "comuna") 132 | ``` 133 | -------------------------------------------------------------------------------- /data-raw/02-create-local-db-duckdb.R: -------------------------------------------------------------------------------- 1 | # packages ---- 2 | 3 | library(dplyr) 4 | library(DBI) 5 | library(duckdb) 6 | library(RSQLite) 7 | 8 | # connections ---- 9 | 10 | con <- dbConnect(SQLite(), "data-raw/censo2017.sqlite") 11 | tablas <- dbListTables(con) 12 | 13 | con2 <- dbConnect(duckdb(), "data-raw/censo2017.duckdb") 14 | 15 | # comunas ---- 16 | 17 | dbSendQuery(con2, "DROP TABLE IF EXISTS comunas") 18 | 19 | dbSendQuery( 20 | con2, 21 | "CREATE TABLE comunas ( 22 | comuna_ref_id INTEGER NOT NULL, 23 | provincia_ref_id INTEGER NULL, 24 | idcomuna VARCHAR NULL, 25 | redcoden VARCHAR(5) NOT NULL, 26 | nom_comuna VARCHAR NULL)" 27 | ) 28 | 29 | # hogares ---- 30 | 31 | dbSendQuery(con2, "DROP TABLE IF EXISTS hogares") 32 | 33 | dbSendQuery( 34 | con2, 35 | "CREATE TABLE hogares ( 36 | hogar_ref_id INTEGER NOT NULL, 37 | vivienda_ref_id INTEGER NULL, 38 | nhogar INTEGER NULL, 39 | tipo_hogar INTEGER NULL, 40 | ncu_yern_nuer INTEGER NULL, 41 | n_herm_cun INTEGER NULL, 42 | nuc_herm_cun INTEGER NULL, 43 | num_sueg_pad_abu INTEGER NULL, 44 | nuc_pad_sueg_abu INTEGER NULL, 45 | num_otros INTEGER NULL, 46 | nuc_otros INTEGER NULL, 47 | num_no_par INTEGER NULL, 48 | nuc_no_par INTEGER NULL, 49 | tot_nucleos INTEGER NULL)" 50 | ) 51 | 52 | # mapa comunas ---- 53 | 54 | dbSendQuery(con2, "DROP TABLE IF EXISTS mapa_comunas") 55 | 56 | dbSendQuery( 57 | con2, 58 | "CREATE TABLE mapa_comunas ( 59 | geometry VARCHAR NULL, 60 | region VARCHAR(2) NULL, 61 | provincia VARCHAR(3) NULL, 62 | comuna VARCHAR(5) NOT NULL)" 63 | ) 64 | 65 | # mapa provincias ---- 66 | 67 | dbSendQuery(con2, "DROP TABLE IF EXISTS mapa_provincias") 68 | 69 | dbSendQuery( 70 | con2, 71 | "CREATE TABLE mapa_provincias ( 72 | geometry VARCHAR NULL, 73 | region VARCHAR(2) NULL, 74 | provincia VARCHAR(3) NOT NULL)" 75 | ) 76 | 77 | # mapa regiones ---- 78 | 79 | dbSendQuery(con2, "DROP TABLE IF EXISTS mapa_regiones") 80 | 81 | dbSendQuery( 82 | con2, 83 | "CREATE TABLE mapa_regiones ( 84 | geometry VARCHAR NULL, 85 | region VARCHAR(2) NOT NULL);" 86 | ) 87 | 88 | # mapa zonas ---- 89 | dbSendQuery(con2, "DROP TABLE IF EXISTS mapa_zonas") 90 | 91 | dbSendQuery( 92 | con2, 93 | "CREATE TABLE mapa_zonas ( 94 | geometry VARCHAR NULL, 95 | region VARCHAR(2) NULL, 96 | provincia VARCHAR(3) NULL, 97 | comuna VARCHAR(5) NULL, 98 | geocodigo VARCHAR(11) NOT NULL)" 99 | ) 100 | 101 | # personas ---- 102 | 103 | dbSendQuery(con2, "DROP TABLE IF EXISTS personas") 104 | 105 | dbSendQuery( 106 | con2, 107 | "CREATE TABLE personas ( 108 | persona_ref_id INTEGER NULL, 109 | hogar_ref_id INTEGER NULL, 110 | personan INTEGER NULL, 111 | p07 INTEGER NULL, 112 | p08 INTEGER NULL, 113 | p09 INTEGER NULL, 114 | p10 INTEGER NULL, 115 | p10comuna INTEGER NULL, 116 | p10pais INTEGER NULL, 117 | p10pais_grupo INTEGER NULL, 118 | p11 INTEGER NULL, 119 | p11comuna INTEGER NULL, 120 | p11pais INTEGER NULL, 121 | p11pais_grupo INTEGER NULL, 122 | p12 INTEGER NULL, 123 | p12comuna INTEGER NULL, 124 | p12pais INTEGER NULL, 125 | p12pais_grupo INTEGER NULL, 126 | p12a_llegada INTEGER NULL, 127 | p12a_tramo INTEGER NULL, 128 | p13 INTEGER NULL, 129 | p14 INTEGER NULL, 130 | p15 INTEGER NULL, 131 | p15a INTEGER NULL, 132 | p16 INTEGER NULL, 133 | p16a INTEGER NULL, 134 | p16a_otro INTEGER NULL, 135 | p16a_grupo INTEGER NULL, 136 | p17 INTEGER NULL, 137 | p18 VARCHAR NULL, 138 | p19 INTEGER NULL, 139 | p20 INTEGER NULL, 140 | p21m INTEGER NULL, 141 | p21a INTEGER NULL, 142 | escolaridad INTEGER NULL, 143 | rec_parentesco INTEGER NULL)" 144 | ) 145 | 146 | # provincias ---- 147 | 148 | dbSendQuery(con2, "DROP TABLE IF EXISTS provincias") 149 | 150 | dbSendQuery( 151 | con2, 152 | "CREATE TABLE provincias ( 153 | provincia_ref_id INTEGER NULL, 154 | region_ref_id INTEGER NULL, 155 | idprovincia INTEGER NULL, 156 | redcoden VARCHAR(3) NOT NULL, 157 | nom_provincia VARCHAR NULL)" 158 | ) 159 | 160 | # regiones ---- 161 | 162 | dbSendQuery(con2, "DROP TABLE IF EXISTS regiones") 163 | 164 | dbSendQuery( 165 | con2, 166 | "CREATE TABLE regiones ( 167 | region_ref_id INTEGER NOT NULL, 168 | censo_ref_id INTEGER NULL, 169 | idregion VARCHAR NULL, 170 | redcoden VARCHAR(2) NOT NULL, 171 | nom_region VARCHAR NULL)" 172 | ) 173 | 174 | # viviendas ---- 175 | 176 | dbSendQuery(con2, "DROP TABLE IF EXISTS viviendas") 177 | 178 | dbSendQuery( 179 | con2, 180 | "CREATE TABLE viviendas ( 181 | vivienda_ref_id INTEGER NOT NULL, 182 | zonaloc_ref_id INTEGER NULL, 183 | nviv INTEGER NULL, 184 | p01 INTEGER NULL, 185 | p02 INTEGER NULL, 186 | p03a INTEGER NULL, 187 | p03b INTEGER NULL, 188 | p03c INTEGER NULL, 189 | p04 INTEGER NULL, 190 | p05 INTEGER NULL, 191 | cant_hog INTEGER NULL, 192 | cant_per INTEGER NULL, 193 | ind_hacin DOUBLE NULL, 194 | ind_hacin_rec INTEGER NULL, 195 | ind_material INTEGER NULL)" 196 | ) 197 | 198 | # zonas ---- 199 | 200 | dbSendQuery(con2, "DROP TABLE IF EXISTS zonas") 201 | 202 | dbSendQuery( 203 | con2, 204 | "CREATE TABLE zonas ( 205 | zonaloc_ref_id INTEGER NOT NULL, 206 | geocodigo VARCHAR NOT NULL, 207 | observacion VARCHAR NULL)" 208 | ) 209 | 210 | # disconnect ---- 211 | 212 | duckdb::dbDisconnect(con2, shutdown = T) 213 | gc() 214 | 215 | # metadata ---- 216 | 217 | metadatos <- data.frame(version_duckdb = utils::packageVersion("duckdb"), 218 | fecha_modificacion = Sys.time()) 219 | metadatos$version_duckdb <- as.character(metadatos$version_duckdb) 220 | metadatos$fecha_modificacion <- as.character(metadatos$fecha_modificacion) 221 | 222 | # connect, copy table, disconnect and repeat ---- 223 | 224 | for (t in c(tablas)) { 225 | message(t) 226 | d <- dbReadTable(con, t) 227 | 228 | con2 <- dbConnect(duckdb(), "data-raw/censo2017.duckdb") 229 | dbWriteTable(con2, t, d, append = T, temporary = F) 230 | dbDisconnect(con2, shutdown = T) 231 | 232 | gc() 233 | rm(d) 234 | } 235 | 236 | gc() 237 | dbDisconnect(con) 238 | 239 | con2 <- dbConnect(duckdb(), "data-raw/censo2017.duckdb") 240 | copy_to(con2, metadatos, "metadatos", temporary = F) 241 | dbDisconnect(con2, shutdown = T) 242 | 243 | # create indexes ---- 244 | 245 | con2 <- dbConnect(duckdb(), "data-raw/censo2017.duckdb") 246 | 247 | dbSendQuery(con2, "CREATE UNIQUE INDEX comunas_redcoden ON comunas (redcoden)") 248 | dbSendQuery(con2, "CREATE UNIQUE INDEX provincias_redcoden ON provincias (redcoden)") 249 | dbSendQuery(con2, "CREATE UNIQUE INDEX regiones_redcoden ON regiones (redcoden)") 250 | 251 | dbSendQuery(con2, "CREATE UNIQUE INDEX hogares_hogar_ref_id ON hogares (hogar_ref_id)") 252 | dbSendQuery(con2, "CREATE UNIQUE INDEX viviendas_vivienda_ref_id ON viviendas (vivienda_ref_id)") 253 | 254 | dbSendQuery(con2, "CREATE UNIQUE INDEX zonas_zonaloc_ref_id ON zonas (zonaloc_ref_id)") 255 | dbSendQuery(con2, "CREATE UNIQUE INDEX zonas_geocodigo ON zonas (geocodigo)") 256 | 257 | dbSendQuery(con2, "CREATE INDEX mapa_comunas_comuna ON mapa_comunas (comuna)") 258 | dbSendQuery(con2, "CREATE INDEX mapa_provincias_provincia ON mapa_provincias (provincia)") 259 | dbSendQuery(con2, "CREATE INDEX mapa_regiones_region ON mapa_regiones (region)") 260 | dbSendQuery(con2, "CREATE INDEX mapa_zonas_geocodigo ON mapa_zonas (geocodigo)") 261 | 262 | dbDisconnect(con2, shutdown = T) 263 | 264 | # test ---- 265 | 266 | for (t in tablas) { 267 | message(t) 268 | 269 | con <- dbConnect(SQLite(), "data-raw/censo2017.sqlite") 270 | d <- dbReadTable(con, t) 271 | d <- c(nrow(d), ncol(d)) 272 | dbDisconnect(con) 273 | 274 | con2 <- dbConnect(duckdb(), "data-raw/censo2017.duckdb") 275 | d2 <- dbReadTable(con2, t) 276 | d2 <- c(nrow(d2), ncol(d2)) 277 | dbDisconnect(con2, shutdown = T) 278 | stopifnot(d[1] == d2[1]) 279 | stopifnot(d[2] == d2[2]) 280 | 281 | message(paste(paste0("r", d[1], " c", d[2]), "vs", paste0("r", d2[1], " c", d2[2]))) 282 | } 283 | -------------------------------------------------------------------------------- /codemeta.json: -------------------------------------------------------------------------------- 1 | { 2 | "@context": [ 3 | "https://doi.org/10.5063/schema/codemeta-2.0", 4 | "http://schema.org" 5 | ], 6 | "@type": "SoftwareSourceCode", 7 | "identifier": "censo2017", 8 | "description": "Provee un acceso conveniente a mas de 17 millones de registros\n de la base de datos del Censo 2017. Los datos fueron importados desde\n el DVD oficial del INE usando el Convertidor REDATAM creado por Pablo De\n Grande. Esta paquete esta documentado intencionalmente en castellano\n asciificado para que funcione sin problema en diferentes plataformas.\n (Provides convenient access to more than 17 million records from the\n Chilean Census 2017 database. The datasets were imported from the official\n DVD provided by the Chilean National Bureau of Statistics by using the\n REDATAM converter created by Pablo De Grande and in addition it includes the\n maps accompanying these datasets.)", 9 | "name": "censo2017: Base de Datos de Facil Acceso del Censo 2017 de Chile\n (2017 Chilean Census Easy Access Database)", 10 | "codeRepository": "https://github.com/pachadotdev/censo2017", 11 | "issueTracker": "https://github.com/ropensci/censo2017/issues/", 12 | "license": "https://spdx.org/licenses/CC0-1.0", 13 | "version": "0.6.1", 14 | "programmingLanguage": { 15 | "@type": "ComputerLanguage", 16 | "name": "R", 17 | "url": "https://r-project.org" 18 | }, 19 | "runtimePlatform": "R version 4.1.1 (2021-08-10)", 20 | "author": [ 21 | { 22 | "@type": "Person", 23 | "givenName": "Mauricio", 24 | "familyName": "Vargas", 25 | "email": "mavargas11@uc.cl", 26 | "@id": "https://orcid.org/0000-0003-1017-7574" 27 | } 28 | ], 29 | "contributor": [ 30 | { 31 | "@type": "Person", 32 | "givenName": "Juan", 33 | "familyName": "Correa" 34 | }, 35 | { 36 | "@type": "Organization", 37 | "name": "Instituto Nacional de Estadisticas (INE)" 38 | } 39 | ], 40 | "contributor.1": [ 41 | { 42 | "@type": "Person", 43 | "givenName": "Constanza", 44 | "familyName": "Manriquez", 45 | "email": "constanza.ima@gmail.com" 46 | } 47 | ], 48 | "copyrightHolder": {}, 49 | "funder": {}, 50 | "maintainer": [ 51 | { 52 | "@type": "Person", 53 | "givenName": "Mauricio", 54 | "familyName": "Vargas", 55 | "email": "mavargas11@uc.cl", 56 | "@id": "https://orcid.org/0000-0003-1017-7574" 57 | } 58 | ], 59 | "softwareSuggestions": [ 60 | { 61 | "@type": "SoftwareApplication", 62 | "identifier": "testthat", 63 | "name": "testthat", 64 | "provider": { 65 | "@id": "https://cran.r-project.org", 66 | "@type": "Organization", 67 | "name": "Comprehensive R Archive Network (CRAN)", 68 | "url": "https://cran.r-project.org" 69 | }, 70 | "sameAs": "https://CRAN.R-project.org/package=testthat" 71 | }, 72 | { 73 | "@type": "SoftwareApplication", 74 | "identifier": "covr", 75 | "name": "covr", 76 | "provider": { 77 | "@id": "https://cran.r-project.org", 78 | "@type": "Organization", 79 | "name": "Comprehensive R Archive Network (CRAN)", 80 | "url": "https://cran.r-project.org" 81 | }, 82 | "sameAs": "https://CRAN.R-project.org/package=covr" 83 | }, 84 | { 85 | "@type": "SoftwareApplication", 86 | "identifier": "knitr", 87 | "name": "knitr", 88 | "provider": { 89 | "@id": "https://cran.r-project.org", 90 | "@type": "Organization", 91 | "name": "Comprehensive R Archive Network (CRAN)", 92 | "url": "https://cran.r-project.org" 93 | }, 94 | "sameAs": "https://CRAN.R-project.org/package=knitr" 95 | }, 96 | { 97 | "@type": "SoftwareApplication", 98 | "identifier": "dplyr", 99 | "name": "dplyr", 100 | "provider": { 101 | "@id": "https://cran.r-project.org", 102 | "@type": "Organization", 103 | "name": "Comprehensive R Archive Network (CRAN)", 104 | "url": "https://cran.r-project.org" 105 | }, 106 | "sameAs": "https://CRAN.R-project.org/package=dplyr" 107 | }, 108 | { 109 | "@type": "SoftwareApplication", 110 | "identifier": "dbplyr", 111 | "name": "dbplyr", 112 | "provider": { 113 | "@id": "https://cran.r-project.org", 114 | "@type": "Organization", 115 | "name": "Comprehensive R Archive Network (CRAN)", 116 | "url": "https://cran.r-project.org" 117 | }, 118 | "sameAs": "https://CRAN.R-project.org/package=dbplyr" 119 | }, 120 | { 121 | "@type": "SoftwareApplication", 122 | "identifier": "ggplot2", 123 | "name": "ggplot2", 124 | "provider": { 125 | "@id": "https://cran.r-project.org", 126 | "@type": "Organization", 127 | "name": "Comprehensive R Archive Network (CRAN)", 128 | "url": "https://cran.r-project.org" 129 | }, 130 | "sameAs": "https://CRAN.R-project.org/package=ggplot2" 131 | }, 132 | { 133 | "@type": "SoftwareApplication", 134 | "identifier": "chilemapas", 135 | "name": "chilemapas", 136 | "provider": { 137 | "@id": "https://cran.r-project.org", 138 | "@type": "Organization", 139 | "name": "Comprehensive R Archive Network (CRAN)", 140 | "url": "https://cran.r-project.org" 141 | }, 142 | "sameAs": "https://CRAN.R-project.org/package=chilemapas" 143 | } 144 | ], 145 | "softwareRequirements": [ 146 | { 147 | "@type": "SoftwareApplication", 148 | "identifier": "DBI", 149 | "name": "DBI", 150 | "provider": { 151 | "@id": "https://cran.r-project.org", 152 | "@type": "Organization", 153 | "name": "Comprehensive R Archive Network (CRAN)", 154 | "url": "https://cran.r-project.org" 155 | }, 156 | "sameAs": "https://CRAN.R-project.org/package=DBI" 157 | }, 158 | { 159 | "@type": "SoftwareApplication", 160 | "identifier": "duckdb", 161 | "name": "duckdb", 162 | "provider": { 163 | "@id": "https://cran.r-project.org", 164 | "@type": "Organization", 165 | "name": "Comprehensive R Archive Network (CRAN)", 166 | "url": "https://cran.r-project.org" 167 | }, 168 | "sameAs": "https://CRAN.R-project.org/package=duckdb" 169 | }, 170 | { 171 | "@type": "SoftwareApplication", 172 | "identifier": "httr", 173 | "name": "httr", 174 | "provider": { 175 | "@id": "https://cran.r-project.org", 176 | "@type": "Organization", 177 | "name": "Comprehensive R Archive Network (CRAN)", 178 | "url": "https://cran.r-project.org" 179 | }, 180 | "sameAs": "https://CRAN.R-project.org/package=httr" 181 | }, 182 | { 183 | "@type": "SoftwareApplication", 184 | "identifier": "tibble", 185 | "name": "tibble", 186 | "provider": { 187 | "@id": "https://cran.r-project.org", 188 | "@type": "Organization", 189 | "name": "Comprehensive R Archive Network (CRAN)", 190 | "url": "https://cran.r-project.org" 191 | }, 192 | "sameAs": "https://CRAN.R-project.org/package=tibble" 193 | }, 194 | { 195 | "@type": "SoftwareApplication", 196 | "identifier": "purrr", 197 | "name": "purrr", 198 | "provider": { 199 | "@id": "https://cran.r-project.org", 200 | "@type": "Organization", 201 | "name": "Comprehensive R Archive Network (CRAN)", 202 | "url": "https://cran.r-project.org" 203 | }, 204 | "sameAs": "https://CRAN.R-project.org/package=purrr" 205 | }, 206 | { 207 | "@type": "SoftwareApplication", 208 | "identifier": "cli", 209 | "name": "cli", 210 | "provider": { 211 | "@id": "https://cran.r-project.org", 212 | "@type": "Organization", 213 | "name": "Comprehensive R Archive Network (CRAN)", 214 | "url": "https://cran.r-project.org" 215 | }, 216 | "sameAs": "https://CRAN.R-project.org/package=cli" 217 | }, 218 | { 219 | "@type": "SoftwareApplication", 220 | "identifier": "crayon", 221 | "name": "crayon", 222 | "provider": { 223 | "@id": "https://cran.r-project.org", 224 | "@type": "Organization", 225 | "name": "Comprehensive R Archive Network (CRAN)", 226 | "url": "https://cran.r-project.org" 227 | }, 228 | "sameAs": "https://CRAN.R-project.org/package=crayon" 229 | }, 230 | { 231 | "@type": "SoftwareApplication", 232 | "identifier": "rstudioapi", 233 | "name": "rstudioapi", 234 | "provider": { 235 | "@id": "https://cran.r-project.org", 236 | "@type": "Organization", 237 | "name": "Comprehensive R Archive Network (CRAN)", 238 | "url": "https://cran.r-project.org" 239 | }, 240 | "sameAs": "https://CRAN.R-project.org/package=rstudioapi" 241 | }, 242 | { 243 | "@type": "SoftwareApplication", 244 | "identifier": "tools", 245 | "name": "tools" 246 | }, 247 | { 248 | "@type": "SoftwareApplication", 249 | "identifier": "R", 250 | "name": "R", 251 | "version": ">= 4.0" 252 | } 253 | ], 254 | "relatedLink": ["https://docs.ropensci.org/censo2017/", "https://CRAN.R-project.org/package=censo2017"], 255 | "readme": "https://github.com/pachadotdev/censo2017/blob/main/README.md", 256 | "fileSize": "46.407KB", 257 | "contIntegration": ["https://github.com/ropensci/censo2017/actions", "https://codecov.io/gh/ropensci/censo2017"], 258 | "developmentStatus": ["https://lifecycle.r-lib.org/articles/stages.html#stable-1", "https://www.tidyverse.org/lifecycle/#stable"], 259 | "keywords": [ 260 | "chile", 261 | "censo", 262 | "census", 263 | "demografia", 264 | "demographics", 265 | "rstats", 266 | "sqlite", 267 | "redatam", 268 | "duckdb" 269 | ], 270 | "provider": { 271 | "@id": "https://cran.r-project.org", 272 | "@type": "Organization", 273 | "name": "Comprehensive R Archive Network (CRAN)", 274 | "url": "https://cran.r-project.org" 275 | }, 276 | "releaseNotes": "https://github.com/pachadotdev/censo2017/blob/master/NEWS.md", 277 | "review": { 278 | "@type": "Review", 279 | "url": "https://github.com/ropensci/software-review/issues/414", 280 | "provider": "https://ropensci.org" 281 | } 282 | } 283 | -------------------------------------------------------------------------------- /data-raw/00-create-local-db-sqlite.R: -------------------------------------------------------------------------------- 1 | library(DBI) 2 | library(RPostgres) 3 | library(RSQLite) 4 | library(duckdb) 5 | library(dplyr) 6 | library(stringr) 7 | library(sf) 8 | 9 | # create initial schema ---- 10 | 11 | # I imported the SQL dump from databases.pacha.dev 12 | con <- dbConnect( 13 | Postgres(), 14 | user = "student", 15 | password = Sys.getenv("databases_student_pwd"), 16 | dbname = "censo", 17 | host = "databases.pacha.dev" 18 | ) 19 | 20 | tablas <- dbListTables(con) 21 | tablas <- grep("geometry_|geography_|raster_|spatial_", tablas, value = T, invert = T) 22 | tablas <- sort(tablas) 23 | 24 | con2 <- dbConnect(SQLite(), "data-raw/censo2017.sqlite") 25 | 26 | dbSendQuery( 27 | con2, 28 | "CREATE TABLE comunas ( 29 | comuna_ref_id float8 NULL, 30 | provincia_ref_id float8 NULL, 31 | idcomuna text NULL, 32 | redcoden float8 NOT NULL, 33 | nom_comuna text NULL, 34 | CONSTRAINT comunas_pk PRIMARY KEY (redcoden), 35 | CONSTRAINT comunas_un UNIQUE (comuna_ref_id))" 36 | ) 37 | 38 | dbSendQuery( 39 | con2, 40 | "CREATE TABLE hogares ( 41 | hogar_ref_id float8 NOT NULL, 42 | vivienda_ref_id float8 NULL, 43 | nhogar float8 NULL, 44 | tipo_hogar float8 NULL, 45 | ncu_yern_nuer float8 NULL, 46 | n_herm_cun float8 NULL, 47 | nuc_herm_cun float8 NULL, 48 | num_sueg_pad_abu float8 NULL, 49 | nuc_pad_sueg_abu float8 NULL, 50 | num_otros float8 NULL, 51 | nuc_otros float8 NULL, 52 | num_no_par float8 NULL, 53 | nuc_no_par float8 NULL, 54 | tot_nucleos float8 NULL, 55 | CONSTRAINT hogares_pk PRIMARY KEY (hogar_ref_id))" 56 | ) 57 | 58 | dbSendQuery( 59 | con2, 60 | "CREATE TABLE mapa_comunas ( 61 | geometry text NULL, 62 | region float8 NULL, 63 | provincia float8 NULL, 64 | comuna float8 NOT NULL, 65 | CONSTRAINT mapa_comunas_pk PRIMARY KEY (comuna))" 66 | ) 67 | 68 | dbSendQuery( 69 | con2, 70 | "CREATE TABLE mapa_provincias ( 71 | geometry text NULL, 72 | region float8 NULL, 73 | provincia float8 NOT NULL, 74 | CONSTRAINT mapa_provincias_pk PRIMARY KEY (provincia))" 75 | ) 76 | 77 | dbSendQuery( 78 | con2, 79 | "CREATE TABLE mapa_regiones ( 80 | geometry text NULL, 81 | region float8 NOT NULL, 82 | CONSTRAINT mapa_regiones_pk PRIMARY KEY (region))" 83 | ) 84 | 85 | dbSendQuery( 86 | con2, 87 | "CREATE TABLE mapa_zonas ( 88 | geometry text NULL, 89 | region float8 NULL, 90 | provincia float8 NULL, 91 | comuna float8 NULL, 92 | geocodigo float8 NOT NULL, 93 | CONSTRAINT mapa_zonas_pk PRIMARY KEY (geocodigo))" 94 | ) 95 | 96 | dbSendQuery( 97 | con2, 98 | "CREATE TABLE personas ( 99 | persona_ref_id float8 NULL, 100 | hogar_ref_id float8 NULL, 101 | personan int4 NULL, 102 | p07 int4 NULL, 103 | p08 int4 NULL, 104 | p09 int4 NULL, 105 | p10 int4 NULL, 106 | p10comuna int4 NULL, 107 | p10pais int4 NULL, 108 | p10pais_grupo int4 NULL, 109 | p11 int4 NULL, 110 | p11comuna int4 NULL, 111 | p11pais int4 NULL, 112 | p11pais_grupo int4 NULL, 113 | p12 int4 NULL, 114 | p12comuna int4 NULL, 115 | p12pais int4 NULL, 116 | p12pais_grupo int4 NULL, 117 | p12a_llegada int4 NULL, 118 | p12a_tramo int4 NULL, 119 | p13 int4 NULL, 120 | p14 int4 NULL, 121 | p15 int4 NULL, 122 | p15a int4 NULL, 123 | p16 int4 NULL, 124 | p16a int4 NULL, 125 | p16a_otro int4 NULL, 126 | p16a_grupo int4 NULL, 127 | p17 int4 NULL, 128 | p18 text NULL, 129 | p19 int4 NULL, 130 | p20 int4 NULL, 131 | p21m int4 NULL, 132 | p21a int4 NULL, 133 | escolaridad int4 NULL, 134 | rec_parentesco int4 NULL)" 135 | ) 136 | 137 | dbSendQuery( 138 | con2, 139 | "CREATE INDEX personas_idx ON personas (hogar_ref_id)" 140 | ) 141 | 142 | dbSendQuery( 143 | con2, 144 | "CREATE TABLE provincias ( 145 | provincia_ref_id float8 NULL, 146 | region_ref_id float8 NULL, 147 | idprovincia float8 NULL, 148 | redcoden float8 NOT NULL, 149 | nom_provincia text NULL, 150 | CONSTRAINT provincias_pk PRIMARY KEY (redcoden))" 151 | ) 152 | 153 | dbSendQuery( 154 | con2, 155 | "CREATE TABLE regiones ( 156 | region_ref_id float8 NOT NULL, 157 | censo_ref_id float8 NULL, 158 | idregion text NULL, 159 | redcoden float8 NOT NULL, 160 | nom_region text NULL, 161 | CONSTRAINT regiones_pk PRIMARY KEY (redcoden))" 162 | ) 163 | 164 | dbSendQuery( 165 | con2, 166 | "CREATE TABLE viviendas ( 167 | vivienda_ref_id float8 NOT NULL, 168 | zonaloc_ref_id float8 NULL, 169 | nviv int4 NULL, 170 | p01 int4 NULL, 171 | p02 int4 NULL, 172 | p03a int4 NULL, 173 | p03b int4 NULL, 174 | p03c int4 NULL, 175 | p04 int4 NULL, 176 | p05 int4 NULL, 177 | cant_hog int4 NULL, 178 | cant_per int4 NULL, 179 | ind_hacin float8 NULL, 180 | ind_hacin_rec int4 NULL, 181 | ind_material int4 NULL, 182 | CONSTRAINT viviendas_pk PRIMARY KEY (vivienda_ref_id))" 183 | ) 184 | 185 | dbSendQuery( 186 | con2, 187 | "CREATE TABLE zonas ( 188 | zonaloc_ref_id float8 NOT NULL, 189 | geocodigo float8 NULL, 190 | observacion text NULL, 191 | CONSTRAINT zonas_pk PRIMARY KEY (zonaloc_ref_id), 192 | CONSTRAINT zonas_un UNIQUE (geocodigo))" 193 | ) 194 | 195 | dbDisconnect(con2) 196 | 197 | for (i in seq_along(tablas)) { 198 | t <- tablas[i] 199 | message(t) 200 | 201 | d <- tbl(con, t) %>% collect() 202 | 203 | con2 <- dbConnect(SQLite(), "data-raw/censo2017.sqlite") 204 | 205 | dbWriteTable( 206 | con2, 207 | t, 208 | d, 209 | temporary = FALSE, 210 | overwrite = FALSE, 211 | append = TRUE 212 | ) 213 | 214 | dbDisconnect(con2) 215 | 216 | rm(d) 217 | gc() 218 | } 219 | 220 | # fix varying geo codes ---- 221 | 222 | con2 <- dbConnect(SQLite(), "data-raw/censo2017.sqlite") 223 | 224 | zonas <- dbReadTable(con2, "zonas") 225 | 226 | zonas <- zonas %>% 227 | mutate( 228 | geocodigo = str_pad(geocodigo, 11, "left", "0") 229 | ) 230 | 231 | dbSendQuery(con2, "DROP TABLE zonas") 232 | 233 | dbSendQuery( 234 | con2, 235 | "CREATE TABLE zonas ( 236 | zonaloc_ref_id float8 NOT NULL, 237 | geocodigo char(11) NULL, 238 | observacion text NULL, 239 | CONSTRAINT zonas_pk PRIMARY KEY (zonaloc_ref_id), 240 | CONSTRAINT zonas_un UNIQUE (geocodigo))" 241 | ) 242 | 243 | dbWriteTable( 244 | con2, 245 | "zonas", 246 | zonas, 247 | temporary = FALSE, 248 | row.names = FALSE, 249 | overwrite = F, 250 | append = T 251 | ) 252 | 253 | mapa_zonas <- dbReadTable(con2, "mapa_zonas") 254 | 255 | mapa_zonas <- mapa_zonas %>% 256 | mutate( 257 | region = str_pad(region, 2, "left", "0"), 258 | provincia = str_pad(provincia, 3, "left", "0"), 259 | comuna = str_pad(comuna, 5, "left", "0"), 260 | geocodigo = str_pad(geocodigo, 11, "left", "0") 261 | ) 262 | 263 | dbSendQuery(con2, "DROP TABLE mapa_zonas") 264 | 265 | dbSendQuery( 266 | con2, 267 | "CREATE TABLE mapa_zonas ( 268 | geometry text NULL, 269 | region char(2) NULL, 270 | provincia char(3) NULL, 271 | comuna char(5) NULL, 272 | geocodigo char(11) NOT NULL, 273 | CONSTRAINT mapa_zonas_pk PRIMARY KEY (geocodigo))" 274 | ) 275 | 276 | dbWriteTable( 277 | con2, 278 | "mapa_zonas", 279 | mapa_zonas, 280 | temporary = FALSE, 281 | row.names = FALSE, 282 | overwrite = F, 283 | append = T 284 | ) 285 | 286 | comunas <- dbReadTable(con2, "comunas") 287 | 288 | comunas <- comunas %>% 289 | mutate( 290 | redcoden = stringr::str_pad(redcoden, 5, "left", "0") 291 | ) 292 | 293 | dbSendQuery(con2, "DROP TABLE comunas") 294 | 295 | dbSendQuery( 296 | con2, 297 | "CREATE TABLE comunas ( 298 | comuna_ref_id float8 NULL, 299 | provincia_ref_id float8 NULL, 300 | idcomuna text NULL, 301 | redcoden char(5) NOT NULL, 302 | nom_comuna text NULL, 303 | CONSTRAINT comunas_pk PRIMARY KEY (redcoden), 304 | CONSTRAINT comunas_un UNIQUE (comuna_ref_id))" 305 | ) 306 | 307 | dbWriteTable( 308 | con2, 309 | "comunas", 310 | comunas, 311 | temporary = FALSE, 312 | row.names = FALSE, 313 | overwrite = F, 314 | append = T 315 | ) 316 | 317 | mapa_comunas <- dbReadTable(con2, "mapa_comunas") 318 | 319 | mapa_comunas <- mapa_comunas %>% 320 | mutate( 321 | region = stringr::str_pad(region, 2, "left", "0"), 322 | provincia = stringr::str_pad(provincia, 3, "left", "0"), 323 | comuna = stringr::str_pad(comuna, 5, "left", "0") 324 | ) 325 | 326 | dbSendQuery(con2, "DROP TABLE mapa_comunas") 327 | 328 | dbSendQuery( 329 | con2, 330 | "CREATE TABLE mapa_comunas ( 331 | geometry text NULL, 332 | region char(2) NULL, 333 | provincia char(3) NULL, 334 | comuna char(5) NOT NULL, 335 | CONSTRAINT mapa_comunas_pk PRIMARY KEY (comuna))" 336 | ) 337 | 338 | dbWriteTable( 339 | con2, 340 | "mapa_comunas", 341 | mapa_comunas, 342 | temporary = FALSE, 343 | row.names = FALSE, 344 | overwrite = F, 345 | append = T 346 | ) 347 | 348 | provincias <- dbReadTable(con2, "provincias") 349 | 350 | provincias <- provincias %>% 351 | mutate( 352 | redcoden = stringr::str_pad(redcoden, 3, "left", "0") 353 | ) 354 | 355 | dbSendQuery(con2, "DROP TABLE provincias") 356 | 357 | dbSendQuery( 358 | con2, 359 | "CREATE TABLE provincias ( 360 | provincia_ref_id float8 NULL, 361 | region_ref_id float8 NULL, 362 | idprovincia float8 NULL, 363 | redcoden char(3) NOT NULL, 364 | nom_provincia text NULL, 365 | CONSTRAINT provincias_pk PRIMARY KEY (redcoden))" 366 | ) 367 | 368 | dbWriteTable( 369 | con2, 370 | "provincias", 371 | provincias, 372 | temporary = FALSE, 373 | row.names = FALSE, 374 | overwrite = F, 375 | append = T 376 | ) 377 | 378 | mapa_provincias <- dbReadTable(con2, "mapa_provincias") 379 | 380 | mapa_provincias <- mapa_provincias %>% 381 | mutate( 382 | region = stringr::str_pad(region, 2, "left", "0"), 383 | provincia = stringr::str_pad(provincia, 3, "left", "0") 384 | ) 385 | 386 | dbSendQuery(con2, "DROP TABLE mapa_provincias") 387 | 388 | dbSendQuery( 389 | con2, 390 | "CREATE TABLE mapa_provincias ( 391 | geometry text NULL, 392 | region char(2) NULL, 393 | provincia char(3) NOT NULL, 394 | CONSTRAINT mapa_provincias_pk PRIMARY KEY (provincia))" 395 | ) 396 | 397 | dbWriteTable( 398 | con2, 399 | "mapa_provincias", 400 | mapa_provincias, 401 | temporary = FALSE, 402 | row.names = FALSE, 403 | overwrite = F, 404 | append = T 405 | ) 406 | 407 | regiones <- dbReadTable(con2, "regiones") 408 | 409 | regiones <- regiones %>% 410 | mutate( 411 | redcoden = stringr::str_pad(redcoden, 2, "left", "0") 412 | ) 413 | 414 | dbSendQuery(con2, "DROP TABLE regiones") 415 | 416 | dbSendQuery( 417 | con2, 418 | "CREATE TABLE regiones ( 419 | region_ref_id float8 NOT NULL, 420 | censo_ref_id float8 NULL, 421 | idregion text NULL, 422 | redcoden char(2) NOT NULL, 423 | nom_region text NULL, 424 | CONSTRAINT regiones_pk PRIMARY KEY (redcoden))" 425 | ) 426 | 427 | dbWriteTable( 428 | con2, 429 | "regiones", 430 | regiones, 431 | temporary = FALSE, 432 | row.names = FALSE, 433 | overwrite = F, 434 | append = T 435 | ) 436 | 437 | mapa_regiones <- dbReadTable(con2, "mapa_regiones") 438 | 439 | mapa_regiones <- mapa_regiones %>% 440 | mutate( 441 | region = stringr::str_pad(region, 2, "left", "0") 442 | ) 443 | 444 | dbSendQuery(con2, "DROP TABLE mapa_regiones") 445 | 446 | dbSendQuery( 447 | con2, 448 | "CREATE TABLE mapa_regiones ( 449 | geometry text NULL, 450 | region char(2) NOT NULL, 451 | CONSTRAINT mapa_regiones_pk PRIMARY KEY (region))" 452 | ) 453 | 454 | dbWriteTable( 455 | con2, 456 | "mapa_regiones", 457 | mapa_regiones, 458 | temporary = FALSE, 459 | row.names = FALSE, 460 | overwrite = F, 461 | append = T 462 | ) 463 | 464 | dbDisconnect(con2) 465 | -------------------------------------------------------------------------------- /data-raw/03-convertir-xml-a-tidy.R: -------------------------------------------------------------------------------- 1 | library(censo2017) 2 | library(xml2) 3 | library(tidyverse) 4 | 5 | d <- read_xml("data-raw/censo2017-descripcion-variables.xml") 6 | 7 | cadena_a_titulo <- function(x) { 8 | x %>% 9 | iconv(., to = "UTF-8") %>% 10 | str_trim() %>% 11 | ifelse(. == "", NA, .) %>% 12 | str_to_title() %>% 13 | str_replace_all(., "/A", "/a") %>% 14 | str_replace_all(., " De ", " de ") %>% 15 | str_replace_all(., " De\\)", " de)") %>% 16 | str_replace_all(., " Del ", " del ") %>% 17 | str_replace_all(., " O ", " o ") %>% 18 | str_replace_all(., " Y ", " y ") %>% 19 | str_replace_all(., " Por ", " por ") %>% 20 | str_replace_all(., " En ", " en ") %>% 21 | str_replace_all(., " U ", " u ") %>% 22 | str_replace_all(., " El ", " el ") %>% 23 | str_replace_all(., " La ", " la ") %>% 24 | str_replace_all(., " Al ", " al ") %>% 25 | str_replace_all(., "\\(Grupo", " (Grupo") %>% 26 | str_replace_all(., "_recode", " recodificado/a") 27 | } 28 | 29 | # explorar ---- 30 | 31 | # persona <- 32 | # xml_attrs(xml_child(xml_child(xml_child( 33 | # xml_child(xml_child(xml_child( 34 | # xml_child(xml_child(xml_child(xml_child( 35 | # d, 2 36 | # ), 2), 3), 3), 3 37 | # ), 2), 3), 2 38 | # ), 14), 13)) 39 | # 40 | # hogar <- 41 | # xml_attrs(xml_child(xml_child(xml_child( 42 | # xml_child(xml_child(xml_child( 43 | # xml_child(xml_child(xml_child(d, 2), 2), 3), 3 44 | # ), 3), 2), 3 45 | # ), 2), 14)) 46 | # 47 | # vivienda <- 48 | # xml_attrs(xml_child(xml_child(xml_child( 49 | # xml_child(xml_child(xml_child( 50 | # xml_child(xml_child(d, 2), 2), 3 51 | # ), 3), 3), 2 52 | # ), 3), 2)) 53 | # 54 | # zonaloc <- 55 | # xml_attrs(xml_child(xml_child(xml_child( 56 | # xml_child(xml_child(xml_child(xml_child( 57 | # d, 2 58 | # ), 2), 3), 3), 3 59 | # ), 2), 3)) 60 | # 61 | # distrito <- 62 | # xml_attrs(xml_child(xml_child(xml_child( 63 | # xml_child(xml_child(d, 2), 2), 3 64 | # ), 3), 3)) 65 | # 66 | # comuna <- 67 | # xml_attrs(xml_child(xml_child(xml_child(xml_child( 68 | # d, 2 69 | # ), 2), 3), 3)) 70 | # 71 | # provincia <- xml_attrs(xml_child(xml_child(xml_child(d, 2), 2), 3)) 72 | # 73 | # region <- xml_attrs(xml_child(xml_child(d, 2), 2)) 74 | # 75 | # censo <- xml_attrs(xml_child(xml_child(d, 2), 2)) 76 | 77 | # personas ---- 78 | 79 | personas <- 80 | xml_child(xml_child(xml_child(xml_child( 81 | xml_child(xml_child(xml_child( 82 | xml_child(xml_child(xml_child(d, 2), 2), 3), 3 83 | ), 3), 2), 3 84 | ), 2), 14), 13) 85 | 86 | # xml_attrs(xml_child(personas, 1)) 87 | # xml_attrs(xml_child(personas, 34)) 88 | personas2 <- xml_attrs(xml_children(personas)) 89 | 90 | personas2 <- bind_rows(personas2) %>% 91 | select(variable = name, descripcion = label, tipo = type, 92 | tamanio = size, decimales = decimals, rango = range) 93 | 94 | personas2 <- personas2 %>% 95 | mutate( 96 | rango = str_replace_all(rango, " TO ", " - "), 97 | descripcion = cadena_a_titulo(descripcion) 98 | ) %>% 99 | mutate_if(is.character, function(x) { str_trim(x) }) %>% 100 | mutate( 101 | variable = str_to_lower(variable) 102 | ) 103 | 104 | # xml_attrs(xml_child(xml_child(xml_child(personas, 1), 1), 1)) 105 | # xml_attrs(xml_child(xml_child(xml_child(personas, 1), 1), 2)) 106 | personas_codificacion <- map_df( 107 | seq_along(personas2$variable), 108 | function(x) { 109 | d <- bind_rows(xml_attrs(xml_children(xml_children(xml_child(personas, x))))) 110 | d$variable <- personas2$variable[[x]] 111 | 112 | d <- d %>% 113 | select(variable, valor = name, descripcion = value) 114 | 115 | d <- d %>% 116 | mutate( 117 | descripcion = case_when( 118 | descripcion == "MISSING" ~ "Valor Perdido", 119 | descripcion == "NOTAPPLICABLE" ~ "No Aplica", 120 | TRUE ~ descripcion 121 | ), 122 | descripcion = str_trim(cadena_a_titulo(descripcion)) 123 | ) 124 | 125 | d 126 | } 127 | ) 128 | 129 | personas_codificacion <- personas_codificacion %>% 130 | distinct(variable, valor, .keep_all = T) 131 | 132 | personas_codificacion <- personas_codificacion %>% 133 | mutate(tabla = "personas") %>% 134 | select(tabla, everything()) 135 | 136 | personas <- personas2 %>% 137 | mutate(rango = ifelse(rango == "", NA_character_, rango)) %>% 138 | mutate(tabla = "personas") %>% 139 | select(tabla, everything()) 140 | 141 | rm(personas2) 142 | 143 | # hogares ---- 144 | 145 | hogares <- 146 | xml_child(xml_child(xml_child( 147 | xml_child(xml_child(xml_child( 148 | xml_child(xml_child(xml_child(d, 2), 2), 3), 3 149 | ), 3), 2), 3 150 | ), 2), 14) 151 | 152 | # xml_attrs(xml_child(hogares, 1)) 153 | # xml_attrs(xml_child(hogares, 34)) 154 | hogares2 <- xml_attrs(xml_children(hogares)) 155 | 156 | hogares2 <- bind_rows(hogares2) %>% 157 | select(variable = name, descripcion = label, tipo = type, 158 | tamanio = size, decimales = decimals, rango = range) 159 | 160 | hogares2 <- hogares2 %>% 161 | mutate( 162 | rango = str_replace_all(rango, " TO ", " - "), 163 | descripcion = cadena_a_titulo(descripcion) 164 | ) %>% 165 | mutate_if(is.character, function(x) { str_trim(x) }) %>% 166 | mutate( 167 | variable = str_to_lower(variable) 168 | ) 169 | 170 | # xml_attrs(xml_child(xml_child(xml_child(hogares, 1), 1), 1)) 171 | # xml_attrs(xml_child(xml_child(xml_child(hogares, 1), 1), 2)) 172 | hogares_codificacion <- map_df( 173 | seq_along(hogares2$variable), 174 | function(x) { 175 | print(x) 176 | 177 | d <- bind_rows(xml_attrs(xml_children(xml_children(xml_child(hogares, x))))) 178 | d$variable <- hogares2$variable[[x]] 179 | 180 | if (!any(colnames(d) %in% "name")) { d$name <- NA_character_ } 181 | if (!any(colnames(d) %in% "value")) { d$value <- NA_character_ } 182 | 183 | d <- d %>% 184 | select(variable, valor = name, descripcion = value) 185 | 186 | d <- d %>% 187 | mutate( 188 | descripcion = case_when( 189 | descripcion == "MISSING" ~ "Valor Perdido", 190 | descripcion == "NOTAPPLICABLE" ~ "No Aplica", 191 | TRUE ~ descripcion 192 | ), 193 | descripcion = str_trim(cadena_a_titulo(descripcion)), 194 | variable = str_to_lower(variable) 195 | ) 196 | 197 | d 198 | } 199 | ) 200 | 201 | hogares_codificacion <- hogares_codificacion %>% 202 | distinct(variable, valor, .keep_all = T) 203 | 204 | hogares_codificacion <- hogares_codificacion %>% 205 | mutate(tabla = "hogares") %>% 206 | select(tabla, everything()) 207 | 208 | hogares <- hogares2 %>% 209 | drop_na(descripcion) %>% 210 | mutate(rango = ifelse(rango == "", NA_character_, rango)) %>% 211 | mutate(tabla = "hogares") %>% 212 | select(tabla, everything()) 213 | 214 | rm(hogares2) 215 | 216 | # viviendas ---- 217 | 218 | viviendas <- 219 | xml_child(xml_child(xml_child( 220 | xml_child(xml_child(xml_child( 221 | xml_child(xml_child(d, 2), 2), 3 222 | ), 3), 3), 2 223 | ), 3), 2) 224 | 225 | # xml_attrs(xml_child(viviendas, 1)) 226 | # xml_attrs(xml_child(viviendas, 34)) 227 | viviendas2 <- xml_attrs(xml_children(viviendas)) 228 | 229 | viviendas2 <- bind_rows(viviendas2) %>% 230 | select(variable = name, descripcion = label, tipo = type, 231 | tamanio = size, decimales = decimals, rango = range) 232 | 233 | viviendas2 <- viviendas2 %>% 234 | mutate( 235 | rango = str_replace_all(rango, " TO ", " - "), 236 | descripcion = cadena_a_titulo(descripcion) 237 | ) %>% 238 | mutate_if(is.character, function(x) { str_trim(x) }) %>% 239 | mutate( 240 | variable = str_to_lower(variable) 241 | ) 242 | 243 | # xml_attrs(xml_child(xml_child(xml_child(viviendas, 1), 1), 1)) 244 | # xml_attrs(xml_child(xml_child(xml_child(viviendas, 1), 1), 2)) 245 | viviendas_codificacion <- map_df( 246 | seq_along(viviendas2$variable), 247 | function(x) { 248 | print(x) 249 | 250 | d <- bind_rows(xml_attrs(xml_children(xml_children(xml_child(viviendas, x))))) 251 | d$variable <- viviendas2$variable[[x]] 252 | 253 | if (!any(colnames(d) %in% "name")) { d$name <- NA_character_ } 254 | if (!any(colnames(d) %in% "value")) { d$value <- NA_character_ } 255 | 256 | d <- d %>% 257 | select(variable, valor = name, descripcion = value) 258 | 259 | d <- d %>% 260 | mutate( 261 | descripcion = case_when( 262 | descripcion == "MISSING" ~ "Valor Perdido", 263 | descripcion == "NOTAPPLICABLE" ~ "No Aplica", 264 | TRUE ~ descripcion 265 | ), 266 | descripcion = str_trim(cadena_a_titulo(descripcion)), 267 | variable = str_to_lower(variable) 268 | ) 269 | 270 | d 271 | } 272 | ) 273 | 274 | viviendas_codificacion <- viviendas_codificacion %>% 275 | distinct(variable, valor, .keep_all = T) 276 | 277 | viviendas_codificacion <- viviendas_codificacion %>% 278 | mutate(tabla = "viviendas") %>% 279 | select(tabla, everything()) 280 | 281 | viviendas <- viviendas2 %>% 282 | drop_na(descripcion) %>% 283 | mutate(rango = ifelse(rango == "", NA_character_, rango)) %>% 284 | mutate(tabla = "vivienda") %>% 285 | select(tabla, everything()) 286 | 287 | rm(viviendas2) 288 | 289 | # zonas ---- 290 | 291 | zonas <- tibble( 292 | tabla = "zonas", 293 | variable = "geocodigo", 294 | descripcion = "Sub-División Comunal de la forma RRPCCDDLLLL. RR = Región; RRP = Provincia; RRPCC = Comuna; RRPCCDD = Distrito Censal; RRPCCDDLLLL = Zona Censal.", 295 | tipo = "string", 296 | tamanio = "11", 297 | decimales = "0", 298 | rango = NA_character_ 299 | ) 300 | 301 | 302 | # comunas ---- 303 | 304 | comunas <- 305 | xml_child(xml_child(xml_child(xml_child( 306 | d, 2 307 | ), 2), 3), 3) 308 | 309 | # xml_attrs(xml_child(comunas, 1)) 310 | # xml_attrs(xml_child(comunas, 34)) 311 | comunas2 <- xml_attrs(xml_children(comunas)) 312 | 313 | comunas2 <- bind_rows(comunas2) %>% 314 | select(variable = name, descripcion = label, tipo = type, 315 | tamanio = size, decimales = decimals, rango = range) 316 | 317 | comunas2 <- comunas2 %>% 318 | mutate( 319 | rango = str_replace_all(rango, " TO ", " - "), 320 | descripcion = cadena_a_titulo(descripcion) 321 | ) %>% 322 | mutate_if(is.character, function(x) { str_trim(x) }) %>% 323 | mutate( 324 | variable = str_to_lower(variable) 325 | ) 326 | 327 | comunas <- comunas2 %>% 328 | drop_na(descripcion) %>% 329 | mutate(rango = ifelse(rango == "", NA_character_, rango)) %>% 330 | mutate(tabla = "comunas") %>% 331 | select(tabla, everything()) 332 | 333 | rm(comunas2) 334 | 335 | # unir ---- 336 | 337 | censo_variables <- personas %>% 338 | bind_rows(hogares) %>% 339 | bind_rows(viviendas) %>% 340 | bind_rows(zonas) %>% 341 | bind_rows(comunas) 342 | 343 | censo_variables <- censo_variables %>% 344 | mutate( 345 | tipo = str_to_lower(tipo), 346 | tamanio = as.integer(tamanio), 347 | decimales = as.integer(decimales) 348 | ) 349 | 350 | censo_variables <- censo_variables %>% 351 | mutate( 352 | tabla = ifelse(tabla == "vivienda", "viviendas", tabla), 353 | ) 354 | 355 | personas <- censo_tabla("personas") 356 | viviendas <- censo_tabla("viviendas") 357 | hogares <- censo_tabla("hogares") 358 | zonas <- censo_tabla("zonas") 359 | comunas <- censo_tabla("comunas") 360 | 361 | censo_variables <- censo_variables %>% 362 | mutate( 363 | pretipo = paste0("class(", tabla, "$", variable, ")") 364 | ) 365 | 366 | tipo <- NULL 367 | for(i in seq_len(nrow(censo_variables))) { 368 | tipo[i] <- eval(parse(text = censo_variables$pretipo[i])) 369 | } 370 | 371 | censo_variables$tipo <- tipo 372 | 373 | censo_variables <- censo_variables %>% 374 | select(-c(tamanio, decimales, pretipo)) 375 | 376 | censo_variables <- censo_variables %>% 377 | mutate( 378 | tabla = as_factor(tabla), 379 | tipo = as_factor(tipo) 380 | ) 381 | 382 | censo_codificacion_variables <- personas_codificacion %>% 383 | bind_rows(hogares_codificacion) %>% 384 | bind_rows(viviendas_codificacion) 385 | 386 | censo_codificacion_variables <- censo_codificacion_variables %>% 387 | drop_na() %>% 388 | mutate(valor = as.integer(valor)) 389 | 390 | data.table::fwrite(censo_variables, "data-raw/variables.tsv", sep = "\t") 391 | data.table::fwrite(censo_codificacion_variables, "data-raw/variables_codificacion.tsv", sep = "\t") 392 | --------------------------------------------------------------------------------