├── .gitignore
├── vignettes
├── fig1.png
├── fig2.png
└── Introduction_to_sidrar.Rmd
├── CRAN-SUBMISSION
├── .Rbuildignore
├── NAMESPACE
├── cran-comments.md
├── sidrar.Rproj
├── man
├── search_sidra.Rd
├── info_sidra.Rd
└── get_sidra.Rd
├── R
├── search_sidra.R
├── info_sidra.R
└── get_sidra.R
├── NEWS.md
├── DESCRIPTION
├── README.Rmd
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 |
--------------------------------------------------------------------------------
/vignettes/fig1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rpradosiqueira/sidrar/HEAD/vignettes/fig1.png
--------------------------------------------------------------------------------
/vignettes/fig2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rpradosiqueira/sidrar/HEAD/vignettes/fig2.png
--------------------------------------------------------------------------------
/CRAN-SUBMISSION:
--------------------------------------------------------------------------------
1 | Version: 0.2.8
2 | Date: 2022-06-03 20:31:24 UTC
3 | SHA: 850aba36c77e2d66d4973674d0ec0a28260eb95e
4 |
--------------------------------------------------------------------------------
/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | ^README\.Rmd$
4 | ^README-.*\.png$
5 | ^cran-comments\.md$
6 | ^CRAN-RELEASE$
7 | ^CRAN-SUBMISSION$
8 |
--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
1 | # Generated by roxygen2: do not edit by hand
2 |
3 | export(get_sidra)
4 | export(info_sidra)
5 | export(search_sidra)
6 | importFrom(magrittr,"%>%")
7 |
--------------------------------------------------------------------------------
/cran-comments.md:
--------------------------------------------------------------------------------
1 | ## New R CMD check results
2 |
3 | 0 errors | 0 warnings | 0 note
4 |
5 | ## Test environments
6 | * local OS X install, R 3.3.2
7 | * win-builder (devel and release)
8 |
9 | ## R CMD check results
10 |
11 | 0 errors | 0 warnings | 0 note
--------------------------------------------------------------------------------
/sidrar.Rproj:
--------------------------------------------------------------------------------
1 | Version: 1.0
2 |
3 | RestoreWorkspace: Default
4 | SaveWorkspace: Default
5 | AlwaysSaveHistory: Default
6 |
7 | EnableCodeIndexing: Yes
8 | UseSpacesForTab: Yes
9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 |
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 |
15 | BuildType: Package
16 | PackageUseDevtools: Yes
17 | PackageInstallArgs: --no-multiarch --with-keep.source
18 |
--------------------------------------------------------------------------------
/man/search_sidra.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/search_sidra.R
3 | \name{search_sidra}
4 | \alias{search_sidra}
5 | \title{Search SIDRA's tables with determined term(s)}
6 | \usage{
7 | search_sidra(x)
8 | }
9 | \arguments{
10 | \item{x}{A character vector with the term(s)/word(s) to search.}
11 | }
12 | \value{
13 | A character vector with the tables' names.
14 | }
15 | \description{
16 | It returns all SIDRA's tables with determined term
17 | }
18 | \examples{
19 | \dontrun{
20 | search_sidra("contas nacionais")
21 | }
22 |
23 | }
24 | \seealso{
25 | \code{\link{get_sidra}}
26 | }
27 | \author{
28 | Renato Prado Siqueira \email{rpradosiqueira@gmail.com}
29 | }
30 | \keyword{IBGE}
31 | \keyword{sidra}
32 |
--------------------------------------------------------------------------------
/man/info_sidra.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/info_sidra.R
3 | \name{info_sidra}
4 | \alias{info_sidra}
5 | \title{Listing all the parameters of a SIDRA's table}
6 | \usage{
7 | info_sidra(x, wb = FALSE)
8 | }
9 | \arguments{
10 | \item{x}{A table from SIDRA's API.}
11 |
12 | \item{wb}{Logical. Should the metadata be open in the web browser?
13 | Default to FALSE.}
14 | }
15 | \value{
16 | A list with the all table's parameters.
17 | }
18 | \description{
19 | It returns the parameters and their descriptions of a SIDRA's table.
20 | }
21 | \examples{
22 | \dontrun{
23 | info_sidra(1419)
24 | }
25 |
26 | }
27 | \seealso{
28 | \code{\link{get_sidra}}
29 | }
30 | \author{
31 | Renato Prado Siqueira \email{rpradosiqueira@gmail.com}
32 | }
33 | \keyword{IBGE}
34 | \keyword{sidra}
35 |
--------------------------------------------------------------------------------
/R/search_sidra.R:
--------------------------------------------------------------------------------
1 | #' Search SIDRA's tables with determined term(s)
2 | #'
3 | #' It returns all SIDRA's tables with determined term
4 | #'
5 | #' @param x A character vector with the term(s)/word(s) to search.
6 | #' @return A character vector with the tables' names.
7 | #' @author Renato Prado Siqueira \email{rpradosiqueira@@gmail.com}
8 | #' @seealso \code{\link{get_sidra}}
9 | #' @examples
10 | #' \dontrun{
11 | #' search_sidra("contas nacionais")
12 | #' }
13 | #'
14 | #' @keywords sidra IBGE
15 | #' @importFrom magrittr %>%
16 | #' @export
17 |
18 | search_sidra <- function(x) {
19 |
20 | x <- gsub(" ", "%20", x)
21 |
22 | a <- xml2::read_html(paste0("https://sidra.ibge.gov.br/Busca?q=", paste0(x, collapse = "%20")))
23 |
24 | s <- a %>%
25 | rvest::html_nodes(".busca-link-tabela") %>%
26 | rvest::html_text()
27 |
28 | return(s)
29 | }
30 |
--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
1 | # sidrar 0.2.9
2 |
3 | * Address SSL error
4 |
5 | # sidrar 0.2.8
6 |
7 | * Minor adjusts to address CRAN warnings
8 |
9 | # sidrar 0.2.7
10 |
11 | * Adjusts in the Vignette to CRAN
12 |
13 | # sidrar 0.2.6
14 |
15 | * Fixed bug in get_sidra (Issue #10)
16 | * Minor change in the Vignette
17 |
18 | # sidrar 0.2.5
19 |
20 | * Fixed bugs in get_sidra (Issue #5, #6)
21 | * "tidyr" and "dplyr" package dependency removed
22 |
23 | # sidrar 0.2.4
24 |
25 | * Fixed bug in search_sidra (Issue #2)
26 | * Better error messages
27 | * Vignette updated
28 |
29 | # sidrar 0.2.1
30 |
31 | * Fixed list-column in resultant data.frame in get_sidra (Issue #1)
32 |
33 | # sidrar 0.2.0
34 |
35 | * New argument in get_sidra ("api")
36 |
37 | # sidrar 0.1.1
38 |
39 | * Better messages depending of the input arguments
40 |
41 | # sidrar 0.1.0
42 |
43 | * Initial version
44 |
45 |
46 |
47 |
--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
1 | Package: sidrar
2 | Type: Package
3 | Title: An Interface to IBGE's SIDRA API
4 | Version: 0.2.9
5 | Authors@R: person("Renato", "Prado Siqueira", email = "rpradosiqueira@gmail.com", role = c("aut", "cre"))
6 | Description: Allows the user to connect with IBGE's (Instituto Brasileiro de
7 | Geografia e Estatistica, see for more information)
8 | SIDRA API in a flexible way. SIDRA is the acronym to "Sistema IBGE de
9 | Recuperacao Automatica" and is the system where IBGE turns available
10 | aggregate data from their researches.
11 | Depends: R (>= 3.2.0)
12 | License: GPL-3
13 | Encoding: UTF-8
14 | URL: https://github.com/rpradosiqueira/sidrar/
15 | BugReports: https://github.com/rpradosiqueira/sidrar/issues/
16 | Imports:
17 | magrittr,
18 | httr,
19 | rjson,
20 | rvest,
21 | stringr,
22 | xml2
23 | RoxygenNote: 7.1.2
24 | Suggests:
25 | knitr,
26 | rmarkdown
27 | VignetteBuilder: knitr
28 |
--------------------------------------------------------------------------------
/README.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | output: github_document
3 | ---
4 |
5 |
6 |
7 | ```{r, echo = FALSE}
8 | knitr::opts_chunk$set(
9 | collapse = TRUE,
10 | comment = "#>",
11 | fig.path = "README-"
12 | )
13 | ```
14 |
15 | # sidrar
16 |
17 | The goal of *sidrar* is to provide direct access to the data of IBGE's (Brazilian Institute of Geography and Statistics) SIDRA API within the R environment in an easy and flexible way. SIDRA is the acronym to "Sistema IBGE de Recuperação Automática" and it is the system where IBGE makes aggregate data from their researches available.
18 |
19 | ## Installation
20 |
21 | Install the release version from CRAN:
22 |
23 | ```{r , eval = FALSE}
24 | install.packages("sidrar")
25 | ```
26 |
27 | or the development version from github
28 |
29 | ```{r , eval = FALSE}
30 | # install.packages("devtools")
31 | devtools::install_github("rpradosiqueira/sidrar")
32 | ```
33 |
34 | ## Functions
35 |
36 | For the time being, the "sidrar" package contains only three functions:
37 |
38 | ```{r, eval=FALSE}
39 | get_sidra It recovers data from the given table
40 | according to the parameters
41 |
42 | info_sidra It allows you to check what parameters
43 | are available for a table
44 |
45 | search_sidra It searches which tables have a particular
46 | word in their names
47 | ```
48 |
49 | ## Example
50 |
51 | Let's assume that we want the IPCA (Índice de Preços ao Consumidor Amplo) for the city of Campo Grande/MS. However, we want to recover only the overall percentage rate in the last 12 months. To do this simply execute:
52 |
53 | ```{r, eval = FALSE}
54 | library(sidrar)
55 |
56 | get_sidra(x = 1419,
57 | variable = 63,
58 | period = c(last = "12"),
59 | geo = "City",
60 | geo.filter = 5002704,
61 | classific = "c315",
62 | category = list(7169),
63 | header = FALSE,
64 | format = 3)
65 |
66 | ```
67 |
68 | To more examples, see the vignette ["Introduction to sidrar"](https://CRAN.R-project.org/package=sidrar/vignettes/Introduction_to_sidrar.html).
69 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | sidrar
4 | ======
5 |
6 | [](https://CRAN.R-project.org/package=sidrar) [](https://CRAN.R-project.org/package=sidrar)
7 |
8 |
9 | The goal of *sidrar* is to provide direct access to the data of IBGE's (Brazilian Institute of Geography and Statistics) SIDRA API within the R environment in an easy and flexible way. SIDRA is the acronym to "Sistema IBGE de Recuperação Automática" and it is the system where IBGE makes aggregate data from their researches available.
10 |
11 | Installation
12 | ------------
13 |
14 | Install the release version from CRAN:
15 |
16 | ``` r
17 | install.packages("sidrar")
18 | ```
19 |
20 | or the development version from github
21 |
22 | ``` r
23 | # install.packages("devtools")
24 | devtools::install_github("rpradosiqueira/sidrar")
25 | ```
26 |
27 | Functions
28 | ---------
29 |
30 | For the time being, the "sidrar" package contains only three functions:
31 |
32 | ``` r
33 | get_sidra It recovers data from the given table
34 | according to the parameters
35 |
36 | info_sidra It allows you to check what parameters
37 | are available for a table
38 |
39 | search_sidra It searches which tables have a particular
40 | word in their names
41 | ```
42 |
43 | Example
44 | -------
45 |
46 | Let's assume that we want the IPCA (Índice de Preços ao Consumidor Amplo) for the city of Campo Grande/MS. However, we want to recover only the overall percentage rate in the last 12 months. To do this simply execute:
47 |
48 | ``` r
49 | library(sidrar)
50 |
51 | get_sidra(x = 1419,
52 | variable = 63,
53 | period = c(last = "12"),
54 | geo = "City",
55 | geo.filter = 5002704,
56 | classific = "c315",
57 | category = list(7169),
58 | header = FALSE,
59 | format = 3)
60 | ```
61 |
62 | To more examples, see the vignette ["Introduction to sidrar"](https://CRAN.R-project.org/package=sidrar/vignettes/Introduction_to_sidrar.html).
63 |
--------------------------------------------------------------------------------
/man/get_sidra.Rd:
--------------------------------------------------------------------------------
1 | % Generated by roxygen2: do not edit by hand
2 | % Please edit documentation in R/get_sidra.R
3 | \name{get_sidra}
4 | \alias{get_sidra}
5 | \title{Get SIDRA's table}
6 | \usage{
7 | get_sidra(x, variable = "allxp", period = "last", geo = "Brazil",
8 | geo.filter = NULL, classific = "all", category = "all", header = TRUE,
9 | format = 4, digits = "default", api = NULL)
10 | }
11 | \arguments{
12 | \item{x}{A table from IBGE's SIDRA API.}
13 |
14 | \item{variable}{An integer vector of the variables' codes to be returned.
15 | Defaults to all variables with exception of "Total".}
16 |
17 | \item{period}{A character vector describing the period of data. Defaults to
18 | the last available.}
19 |
20 | \item{geo}{A character vector describing the geographic levels of the data.
21 | Defauts to "Brazil".}
22 |
23 | \item{geo.filter}{A (named) list object with the specific item of the
24 | geographic level or all itens of a determined higher geografic level. It should
25 | be used when geo argument is provided, otherwise all geographic units of
26 | 'geo' argument are considered.}
27 |
28 | \item{classific}{A character vector with the table's classification(s). Defaults to
29 | all.}
30 |
31 | \item{category}{"all" or a list object with the categories of the classifications
32 | of \code{classific(s)} argument. Defaults to "all".}
33 |
34 | \item{header}{Logical. should the data frame be returned with the description
35 | names in header?}
36 |
37 | \item{format}{An integer ranging between 1 and 4. Default to 4. See more in details.}
38 |
39 | \item{digits}{An integer, "default" or "max". Default to "default" that returns the
40 | defaults digits to each variable.}
41 |
42 | \item{api}{A character with the api's parameters. Defaults to NULL.}
43 | }
44 | \value{
45 | The function returns a data frame printed by default functions
46 | }
47 | \description{
48 | This function allows the user to connect with IBGE's (Instituto Brasileiro de
49 | Geografia e Estatistica) SIDRA API in a flexible way. \acronym{SIDRA} is the
50 | acronym to "Sistema IBGE de Recuperação Automática" and it is the system where
51 | IBGE makes aggregate data from their researches available.
52 | }
53 | \details{
54 | \code{period} can be a integer vector with names "first" and/or "last",
55 | or "all" or a simply character vector with date format %Y%m-%Y%m.
56 |
57 | The \code{geo} argument can be one of "Brazil", "Region", "State",
58 | "MesoRegion", "MicroRegion", "MetroRegion", "MetroRegionDiv", "IRD",
59 | "UrbAglo", "City", "District","subdistrict","Neighborhood","PopArrang".
60 | 'geo.filter' lists can/must be named with the same characters.
61 |
62 | When NULL, the arguments \code{classific} and \code{category} return all options
63 | available.
64 |
65 | When argument \code{api} is not NULL, all others arguments informed are desconsidered
66 |
67 | The \code{format} argument can be set to:
68 | \itemize{
69 | \item 1: Return only the descriptors' codes
70 | \item 2: Return only the descriptor's names
71 | \item 3: Return the codes and names of the geographic level and descriptors' names
72 | \item 4: Return the codes and names of the descriptors (Default)
73 | }
74 | }
75 | \examples{
76 | \dontrun{
77 | ## Requesting table 1419 (Consumer Price Index - IPCA) from the API
78 | ipca <- get_sidra(1419,
79 | variable = 69,
80 | period = c("201212","201401-201412"),
81 | geo = "City",
82 | geo.filter = list("State" = 50))
83 |
84 | ## Urban population count from Census data (2010) for States and cities of Southest region.
85 | get_sidra(1378,
86 | variable = 93,
87 | geo = c("State","City"),
88 | geo.filter = list("Region" = 3, "Region" = 3),
89 | classific = c("c1"),
90 | category = list(1))
91 |
92 | ## Number of informants by state in the Inventory Research (last data available)
93 | get_sidra(api = "/t/254/n1/all/n3/all/v/151/p/last\%201/c162/118423/c163/0")
94 |
95 | }
96 |
97 | }
98 | \seealso{
99 | \code{\link{info_sidra}}
100 | }
101 | \author{
102 | Renato Prado Siqueira \email{rpradosiqueira@gmail.com}
103 | }
104 | \keyword{IBGE}
105 | \keyword{sidra}
106 |
--------------------------------------------------------------------------------
/vignettes/Introduction_to_sidrar.Rmd:
--------------------------------------------------------------------------------
1 | ---
2 | title: "Introduction to sidrar"
3 | author: "Renato Prado Siqueira"
4 | date: "`r Sys.Date()`"
5 | output: rmarkdown::html_vignette
6 | vignette: >
7 | %\VignetteIndexEntry{Introduction to sidrar}
8 | %\VignetteEngine{knitr::rmarkdown}
9 | \usepackage[utf8]{inputenc}
10 | ---
11 |
12 | ```{r setup, include=FALSE}
13 | knitr::opts_chunk$set(echo = TRUE, collapse = TRUE, comment = "#>")
14 | ```
15 |
16 |
17 | ## R Interface to the SIDRA's API
18 |
19 | The "sidrar" R package seeks to provide direct access to the data of SIDRA - Sistema IBGE de Recuperação Automática - within the R environment in an easy and flexible way.
20 |
21 | ## Installation
22 |
23 | To install the version available on CRAN:
24 |
25 | ```{r, eval=FALSE}
26 | install.packages("sidrar")
27 | ```
28 |
29 | To install the development version hosted on Github:
30 |
31 | ```{r, eval=FALSE}
32 | library(devtools)
33 | install_github("rpradosiqueira/sidrar")
34 | ```
35 |
36 | ## Functions
37 |
38 | For the time being, the "sidra" package contains only three functions:
39 |
40 | ```{r, eval=FALSE}
41 | get_sidra It recovers data from the given table
42 | according to the parameters
43 |
44 | info_sidra It allows you to check what parameters
45 | are available for a table via an web browser
46 |
47 | search_sidra It searches which tables have a particular
48 | word in their names
49 | ```
50 |
51 | ## Examples
52 | ### get_sidra
53 |
54 | 1) Let's assume that we want the IPCA (Índice de Preços ao Consumidor Amplo) for the city of Campo Grande/MS. However, we will only recover the overall percentage rate in the last 12 months. To do this simply execute:
55 |
56 | ```{r, eval = FALSE}
57 | library(sidrar)
58 |
59 | get_sidra(x = 1419,
60 | variable = 63,
61 | period = c("last" = 12),
62 | geo = "City",
63 | geo.filter = 5002407,
64 | classific = "c315",
65 | category = list(7169),
66 | header = FALSE,
67 | format = 3)
68 |
69 | ```
70 |
71 | ```{r, echo = FALSE, eval=FALSE}
72 | ## Tabela obtida
73 | library(sidrar)
74 |
75 | get_sidra(x = 1419,
76 | variable = 63,
77 | period = c(last = "12"),
78 | geo = "City",
79 | geo.filter = 5002704,
80 | classific = "c315",
81 | category = list(7169),
82 | header = FALSE,
83 | format = 3)
84 |
85 | ```
86 |
87 |
88 |
89 | 2) In this example we will download the Gini index data for the 2014 GDP of the states, containing only the codes in the table (format = 1):
90 |
91 | ```{r, eval = FALSE}
92 | get_sidra(x = 5939,
93 | variable = 529,
94 | period = "2014",
95 | geo = "State",
96 | header = TRUE,
97 | format = 1)
98 |
99 | ```
100 |
101 | ```{r, echo = FALSE, eval=FALSE}
102 | get_sidra(x = 5939,
103 | variable = 529,
104 | period = "2014",
105 | geo = "State",
106 | header = TRUE,
107 | format = 1)
108 |
109 | ```
110 |
111 |
112 |
113 | 3) Finally, if you want to put the parameters of the API directly, just execute:
114 |
115 | ```{r, eval = FALSE}
116 | get_sidra(api = "/t/5938/n3/all/v/37/p/last%201/d/v37%200")
117 |
118 | ```
119 |
120 | ```{r, echo = FALSE, eval=FALSE}
121 | get_sidra(api = "/t/5938/n3/all/v/37/p/last%201/d/v37%200")
122 |
123 | ```
124 |
125 |
126 |
127 | For most users the data request is done via the online portal (). In this case, if you want to save the parameters of the table selected in the portal to a posterior request of the same table via **sidrar**, you should copy the path in the red rectangle and pass to the *api* argument in get_sidra:
128 |
129 |
130 |
131 | 
132 |
133 | -------
134 |
135 | ### info_sidra
136 |
137 | In the previous examples we know how to recover data from tables according to the parameters reported. However, if I do not know what the parameters are, how should I proceed? To verify the parameters (variables, classifications, periods, etc.) of a given table, simply use the function "info_sidra", informing the code of the table. The function returns a list with the possible parameters in the console. However, if wb = TRUE, the user can allow the result to be displayed in an web browser.
138 |
139 | ```{r, echo = FALSE, eval=FALSE}
140 | info_sidra(5939)
141 | ```
142 |
143 | if **wb = TRUE**:
144 |
145 | ```{r, eval=FALSE}
146 | info_sidra(5939, wb = TRUE)
147 | ```
148 |
149 | ```{r, echo=FALSE, error=TRUE}
150 | cat("Can the web browser be open? (y/n): ")
151 | ```
152 |
153 | By placing **y**, we have in this example:
154 |
155 | 
156 |
157 | -------
158 |
159 | ### search_sidra
160 |
161 | If the user wants to know if there is a table that contains a certain term / word, simply use the function ** search_sidra ** informing the words of interest. The function returns the tables containing these terms in their headings.
162 |
163 | ```{r, eval=FALSE}
164 | search_sidra(c("gini"))
165 | ```
166 |
--------------------------------------------------------------------------------
/R/info_sidra.R:
--------------------------------------------------------------------------------
1 | #' Listing all the parameters of a SIDRA's table
2 | #'
3 | #' It returns the parameters and their descriptions of a SIDRA's table.
4 | #'
5 | #' @param x A table from SIDRA's API.
6 | #' @param wb Logical. Should the metadata be open in the web browser?
7 | #' Default to FALSE.
8 | #' @return A list with the all table's parameters.
9 | #' @author Renato Prado Siqueira \email{rpradosiqueira@@gmail.com}
10 | #' @seealso \code{\link{get_sidra}}
11 | #' @examples
12 | #' \dontrun{
13 | #' info_sidra(1419)
14 | #' }
15 | #'
16 | #' @keywords sidra IBGE
17 | #' @export
18 |
19 |
20 | info_sidra <- function(x, wb = FALSE) {
21 |
22 | if (!is.logical(wb)) {
23 |
24 | stop("'wb' argument must be TRUE or FALSE")
25 |
26 | } else if (wb == FALSE || wb == F) {
27 |
28 | a <- xml2::read_html(paste0("http://api.sidra.ibge.gov.br/desctabapi.aspx?c=", x))
29 |
30 | # Tabela
31 | tab1 = a %>%
32 | rvest::html_nodes("#lblNumeroTabela") %>%
33 | rvest::html_text()
34 |
35 | tab2 = a %>%
36 | rvest::html_nodes("#lblNomeTabela") %>%
37 | rvest::html_text()
38 |
39 | table <- list("table" = paste0("Tabela ", tab1, ": ", tab2))
40 |
41 |
42 | # Período
43 | p1 = a %>%
44 | rvest::html_nodes("#lblPeriodoDisponibilidade") %>%
45 | rvest::html_text()
46 |
47 | period <- list("period" = p1)
48 |
49 |
50 | # Variáveis
51 | v1 <- a %>% rvest::html_nodes("#lblVariaveis") %>%
52 | rvest::html_text()
53 |
54 | v2 <- a %>% rvest::html_table(fill = TRUE, trim = TRUE)
55 | v2 <- v2[[2]]
56 |
57 | v3 <- data.frame(cod = apply(v2, 1, stringr::str_extract,"[[:digit:]]+"),
58 | desc = apply(v2, 1, stringr::str_replace_all, "([[:digit:]])", ""))
59 | v3$cod <- stringr::str_trim(v3$cod)
60 | v3$desc <- stringr::str_trim(v3$desc)
61 | v3$desc <- stringr::str_replace(v3$desc, " - casas decimais: padr\uE3o = , m\uE1ximo =", "")
62 |
63 | variables <- list("variable" = v3)
64 |
65 | # Classificações e categorias
66 | c1 <- rvest::html_nodes(a, "table") %>%
67 | rvest::html_table(fill = TRUE, trim = TRUE) %>%
68 | unlist() %>%
69 | stringr::str_extract("\\C[0-9]+") %>%
70 | stringr::str_subset("\\C[0-9]+") %>%
71 | base::tolower()
72 |
73 | if (length(c1) >= 1) {
74 |
75 | lc1 <- length(c1)
76 |
77 | c2 <- a %>% rvest::html_nodes(".tituloLinha:nth-child(4)") %>% rvest::html_text()
78 |
79 | c3 <- a %>% rvest::html_nodes(".tituloLinha:nth-child(5)") %>% rvest::html_text()
80 |
81 | c4 <- paste(c1, "=", c2, c3)
82 |
83 | c5 <- list()
84 |
85 | for (i in 0:(lc1-1)) {
86 |
87 | c5[[i+1]] <- a %>% rvest::html_nodes(paste0("#lstClassificacoes_lblQuantidadeCategorias_", i, "+ ", "#tabPrincipal span")) %>%
88 | rvest::html_text() %>% stringr::str_replace("\\[[^]]*]", "NA")
89 | c5[[i+1]] <- c5[[i+1]][c5[[i+1]] != "NA"]
90 | c5[[i+1]] <- data.frame(cod = c5[[i+1]][seq(1, length(c5[[i+1]]), 2)],
91 | desc = c5[[i+1]][seq(2, length(c5[[i+1]]), 2)])
92 |
93 | }
94 |
95 | names(c5) <- c4
96 |
97 | classific_category <- list("classific_category" = c5)
98 |
99 | } else {
100 |
101 | classific_category <- list("classific_category" = NULL)
102 |
103 | }
104 |
105 |
106 |
107 | # Níveis Territoriais
108 | trad.geo <- data.frame(cod = as.character(c("n1","n2","n3","n8","n9","n7","n13","n14","n15","n23","n6","n10",
109 | "n11","n102")),
110 | cod2 = as.character(c("Brazil","Region","State","MesoRegion","MicroRegion",
111 | "MetroRegion","MetroRegionDiv","IRD","UrbAglo","PopArrang",
112 | "City", "District","subdistrict","Neighborhood")),
113 | level = c(1:14),
114 | order = c(1:5, 10:14, 6:9))
115 |
116 |
117 | n1 <- rvest::html_nodes(a, "table") %>%
118 | rvest::html_table(fill = TRUE, trim = TRUE) %>%
119 | unlist() %>%
120 | stringr::str_extract("N[0-9]+") %>%
121 | stringr::str_subset("N[0-9]+") %>%
122 | tolower() %>%
123 | as.data.frame()
124 |
125 | n2 <- a %>% rvest::html_nodes("p+ #tabPrincipal span:nth-child(4)") %>% rvest::html_text()
126 | n3 <- a %>% rvest::html_nodes("p+ #tabPrincipal span:nth-child(5)") %>% rvest::html_text()
127 | n4 <- data.frame(desc = paste(n2, n3))
128 |
129 | n5 <- cbind(n1, n4)
130 |
131 | ngeo <- merge(trad.geo, n5, by.x = "cod", by.y = ".")
132 | ngeo <- ngeo[c("cod2","desc")]
133 | names(ngeo) <- c("cod","desc")
134 |
135 | ngeo <- list(geo = ngeo)
136 |
137 | info <- c(table, period, variables, classific_category, ngeo)
138 |
139 | return(info)
140 |
141 |
142 | } else if (wb == TRUE || wb == T) {
143 |
144 | p <- readline(prompt = "Can the web browser be open? (y/n): ")
145 |
146 | if (p == "y" | p == "Y") {
147 |
148 | shell.exec(paste0("http://api.sidra.ibge.gov.br/desctabapi.aspx?c=", x))
149 |
150 | } else {
151 |
152 | stop(paste("Sorry, I need your permission to show the parameters of the table", x))
153 |
154 | }
155 |
156 | }
157 |
158 | }
159 |
--------------------------------------------------------------------------------
/R/get_sidra.R:
--------------------------------------------------------------------------------
1 | #' Get SIDRA's table
2 | #'
3 | #' This function allows the user to connect with IBGE's (Instituto Brasileiro de
4 | #' Geografia e Estatistica) SIDRA API in a flexible way. \acronym{SIDRA} is the
5 | #' acronym to "Sistema IBGE de Recuperação Automática" and it is the system where
6 | #' IBGE makes aggregate data from their researches available.
7 | #'
8 | #' @usage get_sidra(x, variable = "allxp", period = "last", geo = "Brazil",
9 | #' geo.filter = NULL, classific = "all", category = "all", header = TRUE,
10 | #' format = 4, digits = "default", api = NULL)
11 | #' @param x A table from IBGE's SIDRA API.
12 | #' @param variable An integer vector of the variables' codes to be returned.
13 | #' Defaults to all variables with exception of "Total".
14 | #' @param period A character vector describing the period of data. Defaults to
15 | #' the last available.
16 | #' @param geo A character vector describing the geographic levels of the data.
17 | #' Defauts to "Brazil".
18 | #' @param geo.filter A (named) list object with the specific item of the
19 | #' geographic level or all itens of a determined higher geografic level. It should
20 | #' be used when geo argument is provided, otherwise all geographic units of
21 | #' 'geo' argument are considered.
22 | #' @param classific A character vector with the table's classification(s). Defaults to
23 | #' all.
24 | #' @param category "all" or a list object with the categories of the classifications
25 | #' of \code{classific(s)} argument. Defaults to "all".
26 | #' @param header Logical. should the data frame be returned with the description
27 | #' names in header?
28 | #' @param format An integer ranging between 1 and 4. Default to 4. See more in details.
29 | #' @param digits An integer, "default" or "max". Default to "default" that returns the
30 | #' defaults digits to each variable.
31 | #' @param api A character with the api's parameters. Defaults to NULL.
32 | #' @details
33 | #' \code{period} can be a integer vector with names "first" and/or "last",
34 | #' or "all" or a simply character vector with date format %Y%m-%Y%m.
35 | #'
36 | #' The \code{geo} argument can be one of "Brazil", "Region", "State",
37 | #' "MesoRegion", "MicroRegion", "MetroRegion", "MetroRegionDiv", "IRD",
38 | #' "UrbAglo", "City", "District","subdistrict","Neighborhood","PopArrang".
39 | #' 'geo.filter' lists can/must be named with the same characters.
40 | #'
41 | #' When NULL, the arguments \code{classific} and \code{category} return all options
42 | #' available.
43 | #'
44 | #' When argument \code{api} is not NULL, all others arguments informed are desconsidered
45 | #'
46 | #' The \code{format} argument can be set to:
47 | #' \itemize{
48 | #' \item 1: Return only the descriptors' codes
49 | #' \item 2: Return only the descriptor's names
50 | #' \item 3: Return the codes and names of the geographic level and descriptors' names
51 | #' \item 4: Return the codes and names of the descriptors (Default)
52 | #' }
53 | #' @return The function returns a data frame printed by default functions
54 | #' @author Renato Prado Siqueira \email{rpradosiqueira@@gmail.com}
55 | #' @seealso \code{\link{info_sidra}}
56 | #' @examples
57 | #' \dontrun{
58 | #' ## Requesting table 1419 (Consumer Price Index - IPCA) from the API
59 | #' ipca <- get_sidra(1419,
60 | #' variable = 69,
61 | #' period = c("201212","201401-201412"),
62 | #' geo = "City",
63 | #' geo.filter = list("State" = 50))
64 | #'
65 | #' ## Urban population count from Census data (2010) for States and cities of Southest region.
66 | #' get_sidra(1378,
67 | #' variable = 93,
68 | #' geo = c("State","City"),
69 | #' geo.filter = list("Region" = 3, "Region" = 3),
70 | #' classific = c("c1"),
71 | #' category = list(1))
72 | #'
73 | #' ## Number of informants by state in the Inventory Research (last data available)
74 | #' get_sidra(api = "/t/254/n1/all/n3/all/v/151/p/last%201/c162/118423/c163/0")
75 | #'
76 | #' }
77 | #'
78 | #' @keywords sidra IBGE
79 | #' @export
80 |
81 | get_sidra <- function(x,
82 | variable = "allxp",
83 | period = "last",
84 | geo = "Brazil",
85 | geo.filter = NULL,
86 | classific = "all",
87 | category = "all",
88 | header = TRUE,
89 | format = 4,
90 | digits = "default",
91 | api = NULL) {
92 |
93 | if (is.null(api)) {
94 |
95 | if (length(x) != 1) {
96 | stop("Only one table is allowed")
97 | }
98 |
99 |
100 | # Variaveis
101 | variable = paste(variable, collapse = ",")
102 |
103 |
104 | # Niveis territoriais
105 | trad.geo <- data.frame(cod = as.character(c("n1","n2","n3","n8","n9","n7","n13","n14","n15","n23","n6","n10",
106 | "n11","n102")),
107 | description = as.character(c("Brazil","Region","State","MesoRegion","MicroRegion",
108 | "MetroRegion","MetroRegionDiv","IRD","UrbAglo","PopArrang",
109 | "City", "District","subdistrict","Neighborhood")),
110 | level = c(1:14))
111 |
112 | if (sum(!(geo %in% trad.geo$description)) > 0) {
113 |
114 | a0 <- subset(geo, !(geo %in% trad.geo$description))
115 |
116 | stop(paste("Some element in 'geo' argument is misspecified:", paste0(a0, collapse = " & ")))
117 |
118 | }
119 |
120 |
121 |
122 | # geo e geo.filter
123 | if (is.null(geo) || geo == "Brazil") {
124 |
125 | path_geo <- "n1/1"
126 |
127 | if (!is.null(geo.filter)) {
128 | message("No filter is necessary in 'geo.filter' argument once 'geo' is set to 'Brazil' (default)")
129 | }
130 |
131 | } else if (length(geo.filter) > length(geo)) {
132 |
133 | if (is.null(geo) || geo == "Brazil") {
134 |
135 | message("No filter is necessary in 'geo.filter' argument once 'geo' is set to 'Brazil' (default)")
136 |
137 | } else {
138 |
139 | stop("The geo.filter argument must have the same or less length than 'geo'")
140 |
141 | }
142 |
143 | } else if (length(geo.filter) <= length(geo)) {
144 |
145 | for (i in 1:length(geo)) {
146 |
147 | if (is.null(names(geo.filter[i])) || names(geo.filter[i]) == "") {
148 |
149 | if (is.null(geo.filter[i])) {
150 |
151 | geo.filter[i] <- "all"
152 | names(geo.filter)[i] <- geo[i]
153 |
154 | } else {
155 |
156 | names(geo.filter)[i] <- geo[i]
157 |
158 | }
159 |
160 | }
161 |
162 | }
163 |
164 | a1 <- 1:length(geo)
165 | a2 <- 1:length(geo.filter)
166 | a3 <- subset(a1, !(a1 %in% a2))
167 |
168 | if (any(a3)) {
169 |
170 | for (j in a3) {
171 |
172 | geo.filter[[j]]<- "all"
173 | names(geo.filter)[[j]] <- geo[j]
174 |
175 | }
176 |
177 | }
178 |
179 | g1 <- data.frame(geo_desc = geo)
180 | g1 <- suppressWarnings(merge(g1, trad.geo, by.x = "geo_desc", by.y = "description"))
181 |
182 | g2 <- data.frame(geo_desc = names(geo.filter))
183 | g2 <- suppressWarnings(merge(g2, trad.geo, by.x = "geo_desc", by.y = "description"))
184 | names(g2) <- paste0(names(g1), "2")
185 |
186 | g3 <- cbind(g1, g2)
187 | g3$relation <- ifelse(g3$level == g3$level2, 0, ifelse(g3$level < g3$level2, 1, 0))
188 |
189 | if (sum(g3$relation) != 0) {stop("Some element in 'geo.filter' is misspecified")}
190 |
191 | g3$relation2 = ifelse(g3$level == g3$level2, 0, 1)
192 |
193 | path_geo_temp <- list()
194 |
195 | for (h in 1:length(geo)) {
196 |
197 | if (g3$relation2[h] == 0) {
198 |
199 | path_geo_temp[[h]] <- paste0(g3$cod2[h], "/", paste(geo.filter[[h]], collapse = ","))
200 |
201 | } else {
202 |
203 | path_geo_temp[[h]] <- paste0(g3$cod[h], "/in%20", g3$cod2[h], "%20", paste(geo.filter[[h]], collapse = ","))
204 |
205 | }
206 |
207 | }
208 |
209 | path_geo <- paste(unlist(path_geo_temp), collapse = "/")
210 |
211 | }
212 |
213 |
214 |
215 | # Classificaoes e categorias (ou secoes)
216 | if (is.null(classific) || classific == "all") {
217 |
218 | if (!is.null(category)) {
219 | message("Considering all categories once 'classific' was set to 'all' (default)")
220 | }
221 |
222 | category <- NULL
223 |
224 | path_classific <- xml2::read_html(paste0("https://apisidra.ibge.gov.br/desctabapi.aspx?c=", x))
225 | path_classific <- rvest::html_nodes(path_classific, "table")
226 | path_classific <- rvest::html_table(path_classific, fill = TRUE, trim = TRUE)
227 | path_classific <- unlist(path_classific)
228 | path_classific <- stringr::str_extract(path_classific, "\\C[0-9]+")
229 |
230 | if (sum(!is.na(path_classific)) == 0) {
231 |
232 | path_classific <- ""
233 |
234 | } else {
235 |
236 | path_classific <- stringr::str_subset(path_classific, "\\C[0-9]+")
237 | path_classific <- base::tolower(path_classific)
238 | path_classific <- paste0("/", paste0(path_classific, "/all", collapse = "/"))
239 |
240 | }
241 |
242 | } else if (!is.null(classific)) {
243 |
244 | if (is.null(category) || (is.character(category) & category == "all")) {
245 |
246 | path_classific <- paste0("/", paste0(classific, "/all", collapse = "/"))
247 |
248 | } else if (!is.list(category)) {
249 |
250 | stop("If not 'all', 'category' must be an object of type 'list'")
251 |
252 | } else if (length(category) > length(classific)) {
253 |
254 | stop("The length of 'category' must be equal or less than 'classific' argument")
255 |
256 | } else if (length(category) == length(classific)) {
257 |
258 | path_classific <- ""
259 |
260 | for (i in 1:length(category)) {
261 |
262 | path_classific <- paste0(path_classific, "/", paste0(classific[i], "/", paste0(category[[i]], collapse = ",")))
263 |
264 | }
265 |
266 | } else if (length(category) < length(classific)) {
267 |
268 | for (i in 1:length(category)) {
269 |
270 | path_classific <- paste0(classific[i], "/", paste0(category[[i]], collapse = ","))
271 |
272 | }
273 |
274 | path_classific <- paste0("/", paste0(path_classific, "/", paste0(classific[-c(1:length(category))], "/all", collapse = "/")))
275 |
276 | }
277 |
278 | }
279 |
280 |
281 | # period
282 | if (!is.character(period) & is.null(names(period))) {
283 |
284 | stop("The 'period' argument must be an object of type character")
285 |
286 | } else if (!is.null(names(period))) {
287 |
288 | if (length(period) != 1) {
289 |
290 | stop("only one element is possible when named vector ('last' or 'first') is present")
291 |
292 | } else if(!(names(period) == "last" | names(period) == "first")){
293 |
294 | stop("The element's 'name' attribute must be 'last' or 'first'")
295 |
296 | } else {
297 |
298 | period <- paste0(names(period), "%20", period)
299 |
300 | }
301 |
302 |
303 | } else {
304 |
305 | period <- paste0(period, collapse = ",")
306 |
307 | }
308 |
309 |
310 | # header
311 | if ( header == TRUE | header == T) {
312 |
313 | path_header = "y"
314 |
315 | } else if (header == FALSE | header == F) {
316 |
317 | path_header = "n"
318 |
319 | } else {
320 |
321 | stop("Only TRUE or FALSE")
322 |
323 | }
324 |
325 |
326 | # Cod and descriptions
327 | if (format == 4 || is.null(format)) {
328 |
329 | format <- "/f/a"
330 |
331 | } else if (format == 3) {
332 |
333 | format <- "/f/u"
334 |
335 | } else if (format == 2) {
336 |
337 | format <- "/f/n"
338 |
339 | } else if (format == 1) {
340 |
341 | format <- "/f/c"
342 |
343 | } else {
344 |
345 | warning("The format argument is misspecified. Considering defaut specification.")
346 |
347 | }
348 |
349 |
350 | # digits
351 | if (digits == "default" || is.null(digits)) {
352 |
353 | digits <- "/d/s"
354 |
355 | } else if (digits == "max") {
356 |
357 | digits <- "/d/m"
358 |
359 | } else if (digits >= 0 & digits <= 9) {
360 |
361 | digits <- paste0("/d/", digits)
362 |
363 | } else {
364 |
365 | warning("The digits argument is misspecified. Considering defaut specification.")
366 |
367 | }
368 |
369 | path <- paste0("https://apisidra.ibge.gov.br/values",
370 | "/t/", x, "/",
371 | path_geo,
372 | "/p/", period,
373 | "/v/", variable,
374 | path_classific, format, "/h/",
375 | path_header,
376 | digits)
377 |
378 | path <- httr::content(httr::GET(path), as = "text" )
379 |
380 | } else {
381 |
382 | if (!is.character(api)) stop("The 'api' argument must be a character vector")
383 | if (length(api) != 1) stop("The 'api' argument must have the length equals to 1")
384 |
385 | message("All others arguments are desconsidered when 'api' is informed")
386 |
387 | path <- httr::content(httr::GET(paste0("https://apisidra.ibge.gov.br/values", api)), as = "text")
388 |
389 | path_header <- "y"
390 |
391 | }
392 |
393 |
394 | test1 <- try(rjson::fromJSON(path), silent=TRUE)
395 |
396 |
397 | if (strsplit(path, " ")[[1]][2] == "P") {
398 |
399 | stop("The 'period' argument is misspecified.")
400 |
401 | } else if (strsplit(path, " ")[[1]][1] == "Tabela" &
402 | strsplit(path, " ")[[1]][3] == "Tabela"){
403 |
404 | ntable <- strsplit(path, " ")[[1]][2]
405 | ntable <- substr(ntable, 1, nchar(ntable)-1)
406 |
407 | stop("This table does not exists.")
408 |
409 | } else if (strsplit(path, " ")[[1]][2] == "V") {
410 |
411 | stop(sprintf("The table %s does not contain the %s variable", x, variable))
412 |
413 | } else if (grepl("Server Error", path)) {
414 |
415 | stop("Server error: Some argument is misspecified or (probabily) The query will result in a table with more than 20k values.
416 | In this case, you may address to the SIDRA's site and request the data manually to be delivered by an email account.")
417 |
418 | } else if ('try-error' %in% class(test1)) {
419 |
420 | stop(path)
421 |
422 | } else {
423 |
424 | path <- rjson::fromJSON(path)
425 | path <- as.data.frame(do.call("rbind", path))
426 |
427 | path <- as.data.frame(lapply(path, unlist), stringsAsFactors = FALSE)
428 |
429 | if (path_header == "y"){
430 |
431 | colnames(path) <- unlist(path[1, ])
432 | path <- path[-1, ]
433 |
434 | }
435 |
436 | id <- which(colnames(path) == "V" | colnames(path) == "Valor")
437 |
438 | path[ ,id] = suppressWarnings(ifelse(unlist(path[ ,id]) != "..", as.numeric(unlist(path[ ,id])), NA))
439 |
440 | }
441 |
442 | return(path)
443 |
444 | }
--------------------------------------------------------------------------------