├── .gitignore ├── vignettes ├── fig1.png ├── fig2.png └── Introduction_to_sidrar.Rmd ├── CRAN-SUBMISSION ├── .Rbuildignore ├── NAMESPACE ├── cran-comments.md ├── sidrar.Rproj ├── man ├── search_sidra.Rd ├── info_sidra.Rd └── get_sidra.Rd ├── R ├── search_sidra.R ├── info_sidra.R └── get_sidra.R ├── NEWS.md ├── DESCRIPTION ├── README.Rmd └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .Rproj.user 2 | .Rhistory 3 | .RData 4 | .Ruserdata 5 | -------------------------------------------------------------------------------- /vignettes/fig1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rpradosiqueira/sidrar/HEAD/vignettes/fig1.png -------------------------------------------------------------------------------- /vignettes/fig2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rpradosiqueira/sidrar/HEAD/vignettes/fig2.png -------------------------------------------------------------------------------- /CRAN-SUBMISSION: -------------------------------------------------------------------------------- 1 | Version: 0.2.8 2 | Date: 2022-06-03 20:31:24 UTC 3 | SHA: 850aba36c77e2d66d4973674d0ec0a28260eb95e 4 | -------------------------------------------------------------------------------- /.Rbuildignore: -------------------------------------------------------------------------------- 1 | ^.*\.Rproj$ 2 | ^\.Rproj\.user$ 3 | ^README\.Rmd$ 4 | ^README-.*\.png$ 5 | ^cran-comments\.md$ 6 | ^CRAN-RELEASE$ 7 | ^CRAN-SUBMISSION$ 8 | -------------------------------------------------------------------------------- /NAMESPACE: -------------------------------------------------------------------------------- 1 | # Generated by roxygen2: do not edit by hand 2 | 3 | export(get_sidra) 4 | export(info_sidra) 5 | export(search_sidra) 6 | importFrom(magrittr,"%>%") 7 | -------------------------------------------------------------------------------- /cran-comments.md: -------------------------------------------------------------------------------- 1 | ## New R CMD check results 2 | 3 | 0 errors | 0 warnings | 0 note 4 | 5 | ## Test environments 6 | * local OS X install, R 3.3.2 7 | * win-builder (devel and release) 8 | 9 | ## R CMD check results 10 | 11 | 0 errors | 0 warnings | 0 note -------------------------------------------------------------------------------- /sidrar.Rproj: -------------------------------------------------------------------------------- 1 | Version: 1.0 2 | 3 | RestoreWorkspace: Default 4 | SaveWorkspace: Default 5 | AlwaysSaveHistory: Default 6 | 7 | EnableCodeIndexing: Yes 8 | UseSpacesForTab: Yes 9 | NumSpacesForTab: 2 10 | Encoding: UTF-8 11 | 12 | RnwWeave: Sweave 13 | LaTeX: pdfLaTeX 14 | 15 | BuildType: Package 16 | PackageUseDevtools: Yes 17 | PackageInstallArgs: --no-multiarch --with-keep.source 18 | -------------------------------------------------------------------------------- /man/search_sidra.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/search_sidra.R 3 | \name{search_sidra} 4 | \alias{search_sidra} 5 | \title{Search SIDRA's tables with determined term(s)} 6 | \usage{ 7 | search_sidra(x) 8 | } 9 | \arguments{ 10 | \item{x}{A character vector with the term(s)/word(s) to search.} 11 | } 12 | \value{ 13 | A character vector with the tables' names. 14 | } 15 | \description{ 16 | It returns all SIDRA's tables with determined term 17 | } 18 | \examples{ 19 | \dontrun{ 20 | search_sidra("contas nacionais") 21 | } 22 | 23 | } 24 | \seealso{ 25 | \code{\link{get_sidra}} 26 | } 27 | \author{ 28 | Renato Prado Siqueira \email{rpradosiqueira@gmail.com} 29 | } 30 | \keyword{IBGE} 31 | \keyword{sidra} 32 | -------------------------------------------------------------------------------- /man/info_sidra.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/info_sidra.R 3 | \name{info_sidra} 4 | \alias{info_sidra} 5 | \title{Listing all the parameters of a SIDRA's table} 6 | \usage{ 7 | info_sidra(x, wb = FALSE) 8 | } 9 | \arguments{ 10 | \item{x}{A table from SIDRA's API.} 11 | 12 | \item{wb}{Logical. Should the metadata be open in the web browser? 13 | Default to FALSE.} 14 | } 15 | \value{ 16 | A list with the all table's parameters. 17 | } 18 | \description{ 19 | It returns the parameters and their descriptions of a SIDRA's table. 20 | } 21 | \examples{ 22 | \dontrun{ 23 | info_sidra(1419) 24 | } 25 | 26 | } 27 | \seealso{ 28 | \code{\link{get_sidra}} 29 | } 30 | \author{ 31 | Renato Prado Siqueira \email{rpradosiqueira@gmail.com} 32 | } 33 | \keyword{IBGE} 34 | \keyword{sidra} 35 | -------------------------------------------------------------------------------- /R/search_sidra.R: -------------------------------------------------------------------------------- 1 | #' Search SIDRA's tables with determined term(s) 2 | #' 3 | #' It returns all SIDRA's tables with determined term 4 | #' 5 | #' @param x A character vector with the term(s)/word(s) to search. 6 | #' @return A character vector with the tables' names. 7 | #' @author Renato Prado Siqueira \email{rpradosiqueira@@gmail.com} 8 | #' @seealso \code{\link{get_sidra}} 9 | #' @examples 10 | #' \dontrun{ 11 | #' search_sidra("contas nacionais") 12 | #' } 13 | #' 14 | #' @keywords sidra IBGE 15 | #' @importFrom magrittr %>% 16 | #' @export 17 | 18 | search_sidra <- function(x) { 19 | 20 | x <- gsub(" ", "%20", x) 21 | 22 | a <- xml2::read_html(paste0("https://sidra.ibge.gov.br/Busca?q=", paste0(x, collapse = "%20"))) 23 | 24 | s <- a %>% 25 | rvest::html_nodes(".busca-link-tabela") %>% 26 | rvest::html_text() 27 | 28 | return(s) 29 | } 30 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # sidrar 0.2.9 2 | 3 | * Address SSL error 4 | 5 | # sidrar 0.2.8 6 | 7 | * Minor adjusts to address CRAN warnings 8 | 9 | # sidrar 0.2.7 10 | 11 | * Adjusts in the Vignette to CRAN 12 | 13 | # sidrar 0.2.6 14 | 15 | * Fixed bug in get_sidra (Issue #10) 16 | * Minor change in the Vignette 17 | 18 | # sidrar 0.2.5 19 | 20 | * Fixed bugs in get_sidra (Issue #5, #6) 21 | * "tidyr" and "dplyr" package dependency removed 22 | 23 | # sidrar 0.2.4 24 | 25 | * Fixed bug in search_sidra (Issue #2) 26 | * Better error messages 27 | * Vignette updated 28 | 29 | # sidrar 0.2.1 30 | 31 | * Fixed list-column in resultant data.frame in get_sidra (Issue #1) 32 | 33 | # sidrar 0.2.0 34 | 35 | * New argument in get_sidra ("api") 36 | 37 | # sidrar 0.1.1 38 | 39 | * Better messages depending of the input arguments 40 | 41 | # sidrar 0.1.0 42 | 43 | * Initial version 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /DESCRIPTION: -------------------------------------------------------------------------------- 1 | Package: sidrar 2 | Type: Package 3 | Title: An Interface to IBGE's SIDRA API 4 | Version: 0.2.9 5 | Authors@R: person("Renato", "Prado Siqueira", email = "rpradosiqueira@gmail.com", role = c("aut", "cre")) 6 | Description: Allows the user to connect with IBGE's (Instituto Brasileiro de 7 | Geografia e Estatistica, see for more information) 8 | SIDRA API in a flexible way. SIDRA is the acronym to "Sistema IBGE de 9 | Recuperacao Automatica" and is the system where IBGE turns available 10 | aggregate data from their researches. 11 | Depends: R (>= 3.2.0) 12 | License: GPL-3 13 | Encoding: UTF-8 14 | URL: https://github.com/rpradosiqueira/sidrar/ 15 | BugReports: https://github.com/rpradosiqueira/sidrar/issues/ 16 | Imports: 17 | magrittr, 18 | httr, 19 | rjson, 20 | rvest, 21 | stringr, 22 | xml2 23 | RoxygenNote: 7.1.2 24 | Suggests: 25 | knitr, 26 | rmarkdown 27 | VignetteBuilder: knitr 28 | -------------------------------------------------------------------------------- /README.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | output: github_document 3 | --- 4 | 5 | 6 | 7 | ```{r, echo = FALSE} 8 | knitr::opts_chunk$set( 9 | collapse = TRUE, 10 | comment = "#>", 11 | fig.path = "README-" 12 | ) 13 | ``` 14 | 15 | # sidrar 16 | 17 | The goal of *sidrar* is to provide direct access to the data of IBGE's (Brazilian Institute of Geography and Statistics) SIDRA API within the R environment in an easy and flexible way. SIDRA is the acronym to "Sistema IBGE de Recuperação Automática" and it is the system where IBGE makes aggregate data from their researches available. 18 | 19 | ## Installation 20 | 21 | Install the release version from CRAN: 22 | 23 | ```{r , eval = FALSE} 24 | install.packages("sidrar") 25 | ``` 26 | 27 | or the development version from github 28 | 29 | ```{r , eval = FALSE} 30 | # install.packages("devtools") 31 | devtools::install_github("rpradosiqueira/sidrar") 32 | ``` 33 | 34 | ## Functions 35 | 36 | For the time being, the "sidrar" package contains only three functions: 37 | 38 | ```{r, eval=FALSE} 39 | get_sidra It recovers data from the given table 40 | according to the parameters 41 | 42 | info_sidra It allows you to check what parameters 43 | are available for a table 44 | 45 | search_sidra It searches which tables have a particular 46 | word in their names 47 | ``` 48 | 49 | ## Example 50 | 51 | Let's assume that we want the IPCA (Índice de Preços ao Consumidor Amplo) for the city of Campo Grande/MS. However, we want to recover only the overall percentage rate in the last 12 months. To do this simply execute: 52 | 53 | ```{r, eval = FALSE} 54 | library(sidrar) 55 | 56 | get_sidra(x = 1419, 57 | variable = 63, 58 | period = c(last = "12"), 59 | geo = "City", 60 | geo.filter = 5002704, 61 | classific = "c315", 62 | category = list(7169), 63 | header = FALSE, 64 | format = 3) 65 | 66 | ``` 67 | 68 | To more examples, see the vignette ["Introduction to sidrar"](https://CRAN.R-project.org/package=sidrar/vignettes/Introduction_to_sidrar.html). 69 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | sidrar 4 | ====== 5 | 6 | [![CRAN\_Status\_Badge](https://www.r-pkg.org/badges/version/sidrar)](https://CRAN.R-project.org/package=sidrar) [![CRAC\_Downloads](https://cranlogs.r-pkg.org/badges/grand-total/sidrar)](https://CRAN.R-project.org/package=sidrar) 7 | 8 | 9 | The goal of *sidrar* is to provide direct access to the data of IBGE's (Brazilian Institute of Geography and Statistics) SIDRA API within the R environment in an easy and flexible way. SIDRA is the acronym to "Sistema IBGE de Recuperação Automática" and it is the system where IBGE makes aggregate data from their researches available. 10 | 11 | Installation 12 | ------------ 13 | 14 | Install the release version from CRAN: 15 | 16 | ``` r 17 | install.packages("sidrar") 18 | ``` 19 | 20 | or the development version from github 21 | 22 | ``` r 23 | # install.packages("devtools") 24 | devtools::install_github("rpradosiqueira/sidrar") 25 | ``` 26 | 27 | Functions 28 | --------- 29 | 30 | For the time being, the "sidrar" package contains only three functions: 31 | 32 | ``` r 33 | get_sidra It recovers data from the given table 34 | according to the parameters 35 | 36 | info_sidra It allows you to check what parameters 37 | are available for a table 38 | 39 | search_sidra It searches which tables have a particular 40 | word in their names 41 | ``` 42 | 43 | Example 44 | ------- 45 | 46 | Let's assume that we want the IPCA (Índice de Preços ao Consumidor Amplo) for the city of Campo Grande/MS. However, we want to recover only the overall percentage rate in the last 12 months. To do this simply execute: 47 | 48 | ``` r 49 | library(sidrar) 50 | 51 | get_sidra(x = 1419, 52 | variable = 63, 53 | period = c(last = "12"), 54 | geo = "City", 55 | geo.filter = 5002704, 56 | classific = "c315", 57 | category = list(7169), 58 | header = FALSE, 59 | format = 3) 60 | ``` 61 | 62 | To more examples, see the vignette ["Introduction to sidrar"](https://CRAN.R-project.org/package=sidrar/vignettes/Introduction_to_sidrar.html). 63 | -------------------------------------------------------------------------------- /man/get_sidra.Rd: -------------------------------------------------------------------------------- 1 | % Generated by roxygen2: do not edit by hand 2 | % Please edit documentation in R/get_sidra.R 3 | \name{get_sidra} 4 | \alias{get_sidra} 5 | \title{Get SIDRA's table} 6 | \usage{ 7 | get_sidra(x, variable = "allxp", period = "last", geo = "Brazil", 8 | geo.filter = NULL, classific = "all", category = "all", header = TRUE, 9 | format = 4, digits = "default", api = NULL) 10 | } 11 | \arguments{ 12 | \item{x}{A table from IBGE's SIDRA API.} 13 | 14 | \item{variable}{An integer vector of the variables' codes to be returned. 15 | Defaults to all variables with exception of "Total".} 16 | 17 | \item{period}{A character vector describing the period of data. Defaults to 18 | the last available.} 19 | 20 | \item{geo}{A character vector describing the geographic levels of the data. 21 | Defauts to "Brazil".} 22 | 23 | \item{geo.filter}{A (named) list object with the specific item of the 24 | geographic level or all itens of a determined higher geografic level. It should 25 | be used when geo argument is provided, otherwise all geographic units of 26 | 'geo' argument are considered.} 27 | 28 | \item{classific}{A character vector with the table's classification(s). Defaults to 29 | all.} 30 | 31 | \item{category}{"all" or a list object with the categories of the classifications 32 | of \code{classific(s)} argument. Defaults to "all".} 33 | 34 | \item{header}{Logical. should the data frame be returned with the description 35 | names in header?} 36 | 37 | \item{format}{An integer ranging between 1 and 4. Default to 4. See more in details.} 38 | 39 | \item{digits}{An integer, "default" or "max". Default to "default" that returns the 40 | defaults digits to each variable.} 41 | 42 | \item{api}{A character with the api's parameters. Defaults to NULL.} 43 | } 44 | \value{ 45 | The function returns a data frame printed by default functions 46 | } 47 | \description{ 48 | This function allows the user to connect with IBGE's (Instituto Brasileiro de 49 | Geografia e Estatistica) SIDRA API in a flexible way. \acronym{SIDRA} is the 50 | acronym to "Sistema IBGE de Recuperação Automática" and it is the system where 51 | IBGE makes aggregate data from their researches available. 52 | } 53 | \details{ 54 | \code{period} can be a integer vector with names "first" and/or "last", 55 | or "all" or a simply character vector with date format %Y%m-%Y%m. 56 | 57 | The \code{geo} argument can be one of "Brazil", "Region", "State", 58 | "MesoRegion", "MicroRegion", "MetroRegion", "MetroRegionDiv", "IRD", 59 | "UrbAglo", "City", "District","subdistrict","Neighborhood","PopArrang". 60 | 'geo.filter' lists can/must be named with the same characters. 61 | 62 | When NULL, the arguments \code{classific} and \code{category} return all options 63 | available. 64 | 65 | When argument \code{api} is not NULL, all others arguments informed are desconsidered 66 | 67 | The \code{format} argument can be set to: 68 | \itemize{ 69 | \item 1: Return only the descriptors' codes 70 | \item 2: Return only the descriptor's names 71 | \item 3: Return the codes and names of the geographic level and descriptors' names 72 | \item 4: Return the codes and names of the descriptors (Default) 73 | } 74 | } 75 | \examples{ 76 | \dontrun{ 77 | ## Requesting table 1419 (Consumer Price Index - IPCA) from the API 78 | ipca <- get_sidra(1419, 79 | variable = 69, 80 | period = c("201212","201401-201412"), 81 | geo = "City", 82 | geo.filter = list("State" = 50)) 83 | 84 | ## Urban population count from Census data (2010) for States and cities of Southest region. 85 | get_sidra(1378, 86 | variable = 93, 87 | geo = c("State","City"), 88 | geo.filter = list("Region" = 3, "Region" = 3), 89 | classific = c("c1"), 90 | category = list(1)) 91 | 92 | ## Number of informants by state in the Inventory Research (last data available) 93 | get_sidra(api = "/t/254/n1/all/n3/all/v/151/p/last\%201/c162/118423/c163/0") 94 | 95 | } 96 | 97 | } 98 | \seealso{ 99 | \code{\link{info_sidra}} 100 | } 101 | \author{ 102 | Renato Prado Siqueira \email{rpradosiqueira@gmail.com} 103 | } 104 | \keyword{IBGE} 105 | \keyword{sidra} 106 | -------------------------------------------------------------------------------- /vignettes/Introduction_to_sidrar.Rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Introduction to sidrar" 3 | author: "Renato Prado Siqueira" 4 | date: "`r Sys.Date()`" 5 | output: rmarkdown::html_vignette 6 | vignette: > 7 | %\VignetteIndexEntry{Introduction to sidrar} 8 | %\VignetteEngine{knitr::rmarkdown} 9 | \usepackage[utf8]{inputenc} 10 | --- 11 | 12 | ```{r setup, include=FALSE} 13 | knitr::opts_chunk$set(echo = TRUE, collapse = TRUE, comment = "#>") 14 | ``` 15 | 16 | 17 | ## R Interface to the SIDRA's API 18 | 19 | The "sidrar" R package seeks to provide direct access to the data of SIDRA - Sistema IBGE de Recuperação Automática - within the R environment in an easy and flexible way. 20 | 21 | ## Installation 22 | 23 | To install the version available on CRAN: 24 | 25 | ```{r, eval=FALSE} 26 | install.packages("sidrar") 27 | ``` 28 | 29 | To install the development version hosted on Github: 30 | 31 | ```{r, eval=FALSE} 32 | library(devtools) 33 | install_github("rpradosiqueira/sidrar") 34 | ``` 35 | 36 | ## Functions 37 | 38 | For the time being, the "sidra" package contains only three functions: 39 | 40 | ```{r, eval=FALSE} 41 | get_sidra It recovers data from the given table 42 | according to the parameters 43 | 44 | info_sidra It allows you to check what parameters 45 | are available for a table via an web browser 46 | 47 | search_sidra It searches which tables have a particular 48 | word in their names 49 | ``` 50 | 51 | ## Examples 52 | ### get_sidra 53 | 54 | 1) Let's assume that we want the IPCA (Índice de Preços ao Consumidor Amplo) for the city of Campo Grande/MS. However, we will only recover the overall percentage rate in the last 12 months. To do this simply execute: 55 | 56 | ```{r, eval = FALSE} 57 | library(sidrar) 58 | 59 | get_sidra(x = 1419, 60 | variable = 63, 61 | period = c("last" = 12), 62 | geo = "City", 63 | geo.filter = 5002407, 64 | classific = "c315", 65 | category = list(7169), 66 | header = FALSE, 67 | format = 3) 68 | 69 | ``` 70 | 71 | ```{r, echo = FALSE, eval=FALSE} 72 | ## Tabela obtida 73 | library(sidrar) 74 | 75 | get_sidra(x = 1419, 76 | variable = 63, 77 | period = c(last = "12"), 78 | geo = "City", 79 | geo.filter = 5002704, 80 | classific = "c315", 81 | category = list(7169), 82 | header = FALSE, 83 | format = 3) 84 | 85 | ``` 86 | 87 |


88 | 89 | 2) In this example we will download the Gini index data for the 2014 GDP of the states, containing only the codes in the table (format = 1): 90 | 91 | ```{r, eval = FALSE} 92 | get_sidra(x = 5939, 93 | variable = 529, 94 | period = "2014", 95 | geo = "State", 96 | header = TRUE, 97 | format = 1) 98 | 99 | ``` 100 | 101 | ```{r, echo = FALSE, eval=FALSE} 102 | get_sidra(x = 5939, 103 | variable = 529, 104 | period = "2014", 105 | geo = "State", 106 | header = TRUE, 107 | format = 1) 108 | 109 | ``` 110 | 111 |


112 | 113 | 3) Finally, if you want to put the parameters of the API directly, just execute: 114 | 115 | ```{r, eval = FALSE} 116 | get_sidra(api = "/t/5938/n3/all/v/37/p/last%201/d/v37%200") 117 | 118 | ``` 119 | 120 | ```{r, echo = FALSE, eval=FALSE} 121 | get_sidra(api = "/t/5938/n3/all/v/37/p/last%201/d/v37%200") 122 | 123 | ``` 124 | 125 |


126 | 127 | For most users the data request is done via the online portal (). In this case, if you want to save the parameters of the table selected in the portal to a posterior request of the same table via **sidrar**, you should copy the path in the red rectangle and pass to the *api* argument in get_sidra: 128 | 129 |


130 | 131 | ![](fig2.png) 132 | 133 | ------- 134 | 135 | ### info_sidra 136 | 137 | In the previous examples we know how to recover data from tables according to the parameters reported. However, if I do not know what the parameters are, how should I proceed? To verify the parameters (variables, classifications, periods, etc.) of a given table, simply use the function "info_sidra", informing the code of the table. The function returns a list with the possible parameters in the console. However, if wb = TRUE, the user can allow the result to be displayed in an web browser. 138 | 139 | ```{r, echo = FALSE, eval=FALSE} 140 | info_sidra(5939) 141 | ``` 142 | 143 | if **wb = TRUE**: 144 | 145 | ```{r, eval=FALSE} 146 | info_sidra(5939, wb = TRUE) 147 | ``` 148 | 149 | ```{r, echo=FALSE, error=TRUE} 150 | cat("Can the web browser be open? (y/n): ") 151 | ``` 152 | 153 | By placing **y**, we have in this example: 154 | 155 | ![](fig1.png) 156 | 157 | ------- 158 | 159 | ### search_sidra 160 | 161 | If the user wants to know if there is a table that contains a certain term / word, simply use the function ** search_sidra ** informing the words of interest. The function returns the tables containing these terms in their headings. 162 | 163 | ```{r, eval=FALSE} 164 | search_sidra(c("gini")) 165 | ``` 166 | -------------------------------------------------------------------------------- /R/info_sidra.R: -------------------------------------------------------------------------------- 1 | #' Listing all the parameters of a SIDRA's table 2 | #' 3 | #' It returns the parameters and their descriptions of a SIDRA's table. 4 | #' 5 | #' @param x A table from SIDRA's API. 6 | #' @param wb Logical. Should the metadata be open in the web browser? 7 | #' Default to FALSE. 8 | #' @return A list with the all table's parameters. 9 | #' @author Renato Prado Siqueira \email{rpradosiqueira@@gmail.com} 10 | #' @seealso \code{\link{get_sidra}} 11 | #' @examples 12 | #' \dontrun{ 13 | #' info_sidra(1419) 14 | #' } 15 | #' 16 | #' @keywords sidra IBGE 17 | #' @export 18 | 19 | 20 | info_sidra <- function(x, wb = FALSE) { 21 | 22 | if (!is.logical(wb)) { 23 | 24 | stop("'wb' argument must be TRUE or FALSE") 25 | 26 | } else if (wb == FALSE || wb == F) { 27 | 28 | a <- xml2::read_html(paste0("http://api.sidra.ibge.gov.br/desctabapi.aspx?c=", x)) 29 | 30 | # Tabela 31 | tab1 = a %>% 32 | rvest::html_nodes("#lblNumeroTabela") %>% 33 | rvest::html_text() 34 | 35 | tab2 = a %>% 36 | rvest::html_nodes("#lblNomeTabela") %>% 37 | rvest::html_text() 38 | 39 | table <- list("table" = paste0("Tabela ", tab1, ": ", tab2)) 40 | 41 | 42 | # Período 43 | p1 = a %>% 44 | rvest::html_nodes("#lblPeriodoDisponibilidade") %>% 45 | rvest::html_text() 46 | 47 | period <- list("period" = p1) 48 | 49 | 50 | # Variáveis 51 | v1 <- a %>% rvest::html_nodes("#lblVariaveis") %>% 52 | rvest::html_text() 53 | 54 | v2 <- a %>% rvest::html_table(fill = TRUE, trim = TRUE) 55 | v2 <- v2[[2]] 56 | 57 | v3 <- data.frame(cod = apply(v2, 1, stringr::str_extract,"[[:digit:]]+"), 58 | desc = apply(v2, 1, stringr::str_replace_all, "([[:digit:]])", "")) 59 | v3$cod <- stringr::str_trim(v3$cod) 60 | v3$desc <- stringr::str_trim(v3$desc) 61 | v3$desc <- stringr::str_replace(v3$desc, " - casas decimais: padr\uE3o = , m\uE1ximo =", "") 62 | 63 | variables <- list("variable" = v3) 64 | 65 | # Classificações e categorias 66 | c1 <- rvest::html_nodes(a, "table") %>% 67 | rvest::html_table(fill = TRUE, trim = TRUE) %>% 68 | unlist() %>% 69 | stringr::str_extract("\\C[0-9]+") %>% 70 | stringr::str_subset("\\C[0-9]+") %>% 71 | base::tolower() 72 | 73 | if (length(c1) >= 1) { 74 | 75 | lc1 <- length(c1) 76 | 77 | c2 <- a %>% rvest::html_nodes(".tituloLinha:nth-child(4)") %>% rvest::html_text() 78 | 79 | c3 <- a %>% rvest::html_nodes(".tituloLinha:nth-child(5)") %>% rvest::html_text() 80 | 81 | c4 <- paste(c1, "=", c2, c3) 82 | 83 | c5 <- list() 84 | 85 | for (i in 0:(lc1-1)) { 86 | 87 | c5[[i+1]] <- a %>% rvest::html_nodes(paste0("#lstClassificacoes_lblQuantidadeCategorias_", i, "+ ", "#tabPrincipal span")) %>% 88 | rvest::html_text() %>% stringr::str_replace("\\[[^]]*]", "NA") 89 | c5[[i+1]] <- c5[[i+1]][c5[[i+1]] != "NA"] 90 | c5[[i+1]] <- data.frame(cod = c5[[i+1]][seq(1, length(c5[[i+1]]), 2)], 91 | desc = c5[[i+1]][seq(2, length(c5[[i+1]]), 2)]) 92 | 93 | } 94 | 95 | names(c5) <- c4 96 | 97 | classific_category <- list("classific_category" = c5) 98 | 99 | } else { 100 | 101 | classific_category <- list("classific_category" = NULL) 102 | 103 | } 104 | 105 | 106 | 107 | # Níveis Territoriais 108 | trad.geo <- data.frame(cod = as.character(c("n1","n2","n3","n8","n9","n7","n13","n14","n15","n23","n6","n10", 109 | "n11","n102")), 110 | cod2 = as.character(c("Brazil","Region","State","MesoRegion","MicroRegion", 111 | "MetroRegion","MetroRegionDiv","IRD","UrbAglo","PopArrang", 112 | "City", "District","subdistrict","Neighborhood")), 113 | level = c(1:14), 114 | order = c(1:5, 10:14, 6:9)) 115 | 116 | 117 | n1 <- rvest::html_nodes(a, "table") %>% 118 | rvest::html_table(fill = TRUE, trim = TRUE) %>% 119 | unlist() %>% 120 | stringr::str_extract("N[0-9]+") %>% 121 | stringr::str_subset("N[0-9]+") %>% 122 | tolower() %>% 123 | as.data.frame() 124 | 125 | n2 <- a %>% rvest::html_nodes("p+ #tabPrincipal span:nth-child(4)") %>% rvest::html_text() 126 | n3 <- a %>% rvest::html_nodes("p+ #tabPrincipal span:nth-child(5)") %>% rvest::html_text() 127 | n4 <- data.frame(desc = paste(n2, n3)) 128 | 129 | n5 <- cbind(n1, n4) 130 | 131 | ngeo <- merge(trad.geo, n5, by.x = "cod", by.y = ".") 132 | ngeo <- ngeo[c("cod2","desc")] 133 | names(ngeo) <- c("cod","desc") 134 | 135 | ngeo <- list(geo = ngeo) 136 | 137 | info <- c(table, period, variables, classific_category, ngeo) 138 | 139 | return(info) 140 | 141 | 142 | } else if (wb == TRUE || wb == T) { 143 | 144 | p <- readline(prompt = "Can the web browser be open? (y/n): ") 145 | 146 | if (p == "y" | p == "Y") { 147 | 148 | shell.exec(paste0("http://api.sidra.ibge.gov.br/desctabapi.aspx?c=", x)) 149 | 150 | } else { 151 | 152 | stop(paste("Sorry, I need your permission to show the parameters of the table", x)) 153 | 154 | } 155 | 156 | } 157 | 158 | } 159 | -------------------------------------------------------------------------------- /R/get_sidra.R: -------------------------------------------------------------------------------- 1 | #' Get SIDRA's table 2 | #' 3 | #' This function allows the user to connect with IBGE's (Instituto Brasileiro de 4 | #' Geografia e Estatistica) SIDRA API in a flexible way. \acronym{SIDRA} is the 5 | #' acronym to "Sistema IBGE de Recuperação Automática" and it is the system where 6 | #' IBGE makes aggregate data from their researches available. 7 | #' 8 | #' @usage get_sidra(x, variable = "allxp", period = "last", geo = "Brazil", 9 | #' geo.filter = NULL, classific = "all", category = "all", header = TRUE, 10 | #' format = 4, digits = "default", api = NULL) 11 | #' @param x A table from IBGE's SIDRA API. 12 | #' @param variable An integer vector of the variables' codes to be returned. 13 | #' Defaults to all variables with exception of "Total". 14 | #' @param period A character vector describing the period of data. Defaults to 15 | #' the last available. 16 | #' @param geo A character vector describing the geographic levels of the data. 17 | #' Defauts to "Brazil". 18 | #' @param geo.filter A (named) list object with the specific item of the 19 | #' geographic level or all itens of a determined higher geografic level. It should 20 | #' be used when geo argument is provided, otherwise all geographic units of 21 | #' 'geo' argument are considered. 22 | #' @param classific A character vector with the table's classification(s). Defaults to 23 | #' all. 24 | #' @param category "all" or a list object with the categories of the classifications 25 | #' of \code{classific(s)} argument. Defaults to "all". 26 | #' @param header Logical. should the data frame be returned with the description 27 | #' names in header? 28 | #' @param format An integer ranging between 1 and 4. Default to 4. See more in details. 29 | #' @param digits An integer, "default" or "max". Default to "default" that returns the 30 | #' defaults digits to each variable. 31 | #' @param api A character with the api's parameters. Defaults to NULL. 32 | #' @details 33 | #' \code{period} can be a integer vector with names "first" and/or "last", 34 | #' or "all" or a simply character vector with date format %Y%m-%Y%m. 35 | #' 36 | #' The \code{geo} argument can be one of "Brazil", "Region", "State", 37 | #' "MesoRegion", "MicroRegion", "MetroRegion", "MetroRegionDiv", "IRD", 38 | #' "UrbAglo", "City", "District","subdistrict","Neighborhood","PopArrang". 39 | #' 'geo.filter' lists can/must be named with the same characters. 40 | #' 41 | #' When NULL, the arguments \code{classific} and \code{category} return all options 42 | #' available. 43 | #' 44 | #' When argument \code{api} is not NULL, all others arguments informed are desconsidered 45 | #' 46 | #' The \code{format} argument can be set to: 47 | #' \itemize{ 48 | #' \item 1: Return only the descriptors' codes 49 | #' \item 2: Return only the descriptor's names 50 | #' \item 3: Return the codes and names of the geographic level and descriptors' names 51 | #' \item 4: Return the codes and names of the descriptors (Default) 52 | #' } 53 | #' @return The function returns a data frame printed by default functions 54 | #' @author Renato Prado Siqueira \email{rpradosiqueira@@gmail.com} 55 | #' @seealso \code{\link{info_sidra}} 56 | #' @examples 57 | #' \dontrun{ 58 | #' ## Requesting table 1419 (Consumer Price Index - IPCA) from the API 59 | #' ipca <- get_sidra(1419, 60 | #' variable = 69, 61 | #' period = c("201212","201401-201412"), 62 | #' geo = "City", 63 | #' geo.filter = list("State" = 50)) 64 | #' 65 | #' ## Urban population count from Census data (2010) for States and cities of Southest region. 66 | #' get_sidra(1378, 67 | #' variable = 93, 68 | #' geo = c("State","City"), 69 | #' geo.filter = list("Region" = 3, "Region" = 3), 70 | #' classific = c("c1"), 71 | #' category = list(1)) 72 | #' 73 | #' ## Number of informants by state in the Inventory Research (last data available) 74 | #' get_sidra(api = "/t/254/n1/all/n3/all/v/151/p/last%201/c162/118423/c163/0") 75 | #' 76 | #' } 77 | #' 78 | #' @keywords sidra IBGE 79 | #' @export 80 | 81 | get_sidra <- function(x, 82 | variable = "allxp", 83 | period = "last", 84 | geo = "Brazil", 85 | geo.filter = NULL, 86 | classific = "all", 87 | category = "all", 88 | header = TRUE, 89 | format = 4, 90 | digits = "default", 91 | api = NULL) { 92 | 93 | if (is.null(api)) { 94 | 95 | if (length(x) != 1) { 96 | stop("Only one table is allowed") 97 | } 98 | 99 | 100 | # Variaveis 101 | variable = paste(variable, collapse = ",") 102 | 103 | 104 | # Niveis territoriais 105 | trad.geo <- data.frame(cod = as.character(c("n1","n2","n3","n8","n9","n7","n13","n14","n15","n23","n6","n10", 106 | "n11","n102")), 107 | description = as.character(c("Brazil","Region","State","MesoRegion","MicroRegion", 108 | "MetroRegion","MetroRegionDiv","IRD","UrbAglo","PopArrang", 109 | "City", "District","subdistrict","Neighborhood")), 110 | level = c(1:14)) 111 | 112 | if (sum(!(geo %in% trad.geo$description)) > 0) { 113 | 114 | a0 <- subset(geo, !(geo %in% trad.geo$description)) 115 | 116 | stop(paste("Some element in 'geo' argument is misspecified:", paste0(a0, collapse = " & "))) 117 | 118 | } 119 | 120 | 121 | 122 | # geo e geo.filter 123 | if (is.null(geo) || geo == "Brazil") { 124 | 125 | path_geo <- "n1/1" 126 | 127 | if (!is.null(geo.filter)) { 128 | message("No filter is necessary in 'geo.filter' argument once 'geo' is set to 'Brazil' (default)") 129 | } 130 | 131 | } else if (length(geo.filter) > length(geo)) { 132 | 133 | if (is.null(geo) || geo == "Brazil") { 134 | 135 | message("No filter is necessary in 'geo.filter' argument once 'geo' is set to 'Brazil' (default)") 136 | 137 | } else { 138 | 139 | stop("The geo.filter argument must have the same or less length than 'geo'") 140 | 141 | } 142 | 143 | } else if (length(geo.filter) <= length(geo)) { 144 | 145 | for (i in 1:length(geo)) { 146 | 147 | if (is.null(names(geo.filter[i])) || names(geo.filter[i]) == "") { 148 | 149 | if (is.null(geo.filter[i])) { 150 | 151 | geo.filter[i] <- "all" 152 | names(geo.filter)[i] <- geo[i] 153 | 154 | } else { 155 | 156 | names(geo.filter)[i] <- geo[i] 157 | 158 | } 159 | 160 | } 161 | 162 | } 163 | 164 | a1 <- 1:length(geo) 165 | a2 <- 1:length(geo.filter) 166 | a3 <- subset(a1, !(a1 %in% a2)) 167 | 168 | if (any(a3)) { 169 | 170 | for (j in a3) { 171 | 172 | geo.filter[[j]]<- "all" 173 | names(geo.filter)[[j]] <- geo[j] 174 | 175 | } 176 | 177 | } 178 | 179 | g1 <- data.frame(geo_desc = geo) 180 | g1 <- suppressWarnings(merge(g1, trad.geo, by.x = "geo_desc", by.y = "description")) 181 | 182 | g2 <- data.frame(geo_desc = names(geo.filter)) 183 | g2 <- suppressWarnings(merge(g2, trad.geo, by.x = "geo_desc", by.y = "description")) 184 | names(g2) <- paste0(names(g1), "2") 185 | 186 | g3 <- cbind(g1, g2) 187 | g3$relation <- ifelse(g3$level == g3$level2, 0, ifelse(g3$level < g3$level2, 1, 0)) 188 | 189 | if (sum(g3$relation) != 0) {stop("Some element in 'geo.filter' is misspecified")} 190 | 191 | g3$relation2 = ifelse(g3$level == g3$level2, 0, 1) 192 | 193 | path_geo_temp <- list() 194 | 195 | for (h in 1:length(geo)) { 196 | 197 | if (g3$relation2[h] == 0) { 198 | 199 | path_geo_temp[[h]] <- paste0(g3$cod2[h], "/", paste(geo.filter[[h]], collapse = ",")) 200 | 201 | } else { 202 | 203 | path_geo_temp[[h]] <- paste0(g3$cod[h], "/in%20", g3$cod2[h], "%20", paste(geo.filter[[h]], collapse = ",")) 204 | 205 | } 206 | 207 | } 208 | 209 | path_geo <- paste(unlist(path_geo_temp), collapse = "/") 210 | 211 | } 212 | 213 | 214 | 215 | # Classificaoes e categorias (ou secoes) 216 | if (is.null(classific) || classific == "all") { 217 | 218 | if (!is.null(category)) { 219 | message("Considering all categories once 'classific' was set to 'all' (default)") 220 | } 221 | 222 | category <- NULL 223 | 224 | path_classific <- xml2::read_html(paste0("https://apisidra.ibge.gov.br/desctabapi.aspx?c=", x)) 225 | path_classific <- rvest::html_nodes(path_classific, "table") 226 | path_classific <- rvest::html_table(path_classific, fill = TRUE, trim = TRUE) 227 | path_classific <- unlist(path_classific) 228 | path_classific <- stringr::str_extract(path_classific, "\\C[0-9]+") 229 | 230 | if (sum(!is.na(path_classific)) == 0) { 231 | 232 | path_classific <- "" 233 | 234 | } else { 235 | 236 | path_classific <- stringr::str_subset(path_classific, "\\C[0-9]+") 237 | path_classific <- base::tolower(path_classific) 238 | path_classific <- paste0("/", paste0(path_classific, "/all", collapse = "/")) 239 | 240 | } 241 | 242 | } else if (!is.null(classific)) { 243 | 244 | if (is.null(category) || (is.character(category) & category == "all")) { 245 | 246 | path_classific <- paste0("/", paste0(classific, "/all", collapse = "/")) 247 | 248 | } else if (!is.list(category)) { 249 | 250 | stop("If not 'all', 'category' must be an object of type 'list'") 251 | 252 | } else if (length(category) > length(classific)) { 253 | 254 | stop("The length of 'category' must be equal or less than 'classific' argument") 255 | 256 | } else if (length(category) == length(classific)) { 257 | 258 | path_classific <- "" 259 | 260 | for (i in 1:length(category)) { 261 | 262 | path_classific <- paste0(path_classific, "/", paste0(classific[i], "/", paste0(category[[i]], collapse = ","))) 263 | 264 | } 265 | 266 | } else if (length(category) < length(classific)) { 267 | 268 | for (i in 1:length(category)) { 269 | 270 | path_classific <- paste0(classific[i], "/", paste0(category[[i]], collapse = ",")) 271 | 272 | } 273 | 274 | path_classific <- paste0("/", paste0(path_classific, "/", paste0(classific[-c(1:length(category))], "/all", collapse = "/"))) 275 | 276 | } 277 | 278 | } 279 | 280 | 281 | # period 282 | if (!is.character(period) & is.null(names(period))) { 283 | 284 | stop("The 'period' argument must be an object of type character") 285 | 286 | } else if (!is.null(names(period))) { 287 | 288 | if (length(period) != 1) { 289 | 290 | stop("only one element is possible when named vector ('last' or 'first') is present") 291 | 292 | } else if(!(names(period) == "last" | names(period) == "first")){ 293 | 294 | stop("The element's 'name' attribute must be 'last' or 'first'") 295 | 296 | } else { 297 | 298 | period <- paste0(names(period), "%20", period) 299 | 300 | } 301 | 302 | 303 | } else { 304 | 305 | period <- paste0(period, collapse = ",") 306 | 307 | } 308 | 309 | 310 | # header 311 | if ( header == TRUE | header == T) { 312 | 313 | path_header = "y" 314 | 315 | } else if (header == FALSE | header == F) { 316 | 317 | path_header = "n" 318 | 319 | } else { 320 | 321 | stop("Only TRUE or FALSE") 322 | 323 | } 324 | 325 | 326 | # Cod and descriptions 327 | if (format == 4 || is.null(format)) { 328 | 329 | format <- "/f/a" 330 | 331 | } else if (format == 3) { 332 | 333 | format <- "/f/u" 334 | 335 | } else if (format == 2) { 336 | 337 | format <- "/f/n" 338 | 339 | } else if (format == 1) { 340 | 341 | format <- "/f/c" 342 | 343 | } else { 344 | 345 | warning("The format argument is misspecified. Considering defaut specification.") 346 | 347 | } 348 | 349 | 350 | # digits 351 | if (digits == "default" || is.null(digits)) { 352 | 353 | digits <- "/d/s" 354 | 355 | } else if (digits == "max") { 356 | 357 | digits <- "/d/m" 358 | 359 | } else if (digits >= 0 & digits <= 9) { 360 | 361 | digits <- paste0("/d/", digits) 362 | 363 | } else { 364 | 365 | warning("The digits argument is misspecified. Considering defaut specification.") 366 | 367 | } 368 | 369 | path <- paste0("https://apisidra.ibge.gov.br/values", 370 | "/t/", x, "/", 371 | path_geo, 372 | "/p/", period, 373 | "/v/", variable, 374 | path_classific, format, "/h/", 375 | path_header, 376 | digits) 377 | 378 | path <- httr::content(httr::GET(path), as = "text" ) 379 | 380 | } else { 381 | 382 | if (!is.character(api)) stop("The 'api' argument must be a character vector") 383 | if (length(api) != 1) stop("The 'api' argument must have the length equals to 1") 384 | 385 | message("All others arguments are desconsidered when 'api' is informed") 386 | 387 | path <- httr::content(httr::GET(paste0("https://apisidra.ibge.gov.br/values", api)), as = "text") 388 | 389 | path_header <- "y" 390 | 391 | } 392 | 393 | 394 | test1 <- try(rjson::fromJSON(path), silent=TRUE) 395 | 396 | 397 | if (strsplit(path, " ")[[1]][2] == "P") { 398 | 399 | stop("The 'period' argument is misspecified.") 400 | 401 | } else if (strsplit(path, " ")[[1]][1] == "Tabela" & 402 | strsplit(path, " ")[[1]][3] == "Tabela"){ 403 | 404 | ntable <- strsplit(path, " ")[[1]][2] 405 | ntable <- substr(ntable, 1, nchar(ntable)-1) 406 | 407 | stop("This table does not exists.") 408 | 409 | } else if (strsplit(path, " ")[[1]][2] == "V") { 410 | 411 | stop(sprintf("The table %s does not contain the %s variable", x, variable)) 412 | 413 | } else if (grepl("Server Error", path)) { 414 | 415 | stop("Server error: Some argument is misspecified or (probabily) The query will result in a table with more than 20k values. 416 | In this case, you may address to the SIDRA's site and request the data manually to be delivered by an email account.") 417 | 418 | } else if ('try-error' %in% class(test1)) { 419 | 420 | stop(path) 421 | 422 | } else { 423 | 424 | path <- rjson::fromJSON(path) 425 | path <- as.data.frame(do.call("rbind", path)) 426 | 427 | path <- as.data.frame(lapply(path, unlist), stringsAsFactors = FALSE) 428 | 429 | if (path_header == "y"){ 430 | 431 | colnames(path) <- unlist(path[1, ]) 432 | path <- path[-1, ] 433 | 434 | } 435 | 436 | id <- which(colnames(path) == "V" | colnames(path) == "Valor") 437 | 438 | path[ ,id] = suppressWarnings(ifelse(unlist(path[ ,id]) != "..", as.numeric(unlist(path[ ,id])), NA)) 439 | 440 | } 441 | 442 | return(path) 443 | 444 | } --------------------------------------------------------------------------------