├── .Rbuildignore
├── .gitignore
├── DESCRIPTION
├── GetITRData.Rproj
├── NAMESPACE
├── NEWS.md
├── R
    ├── gitrd_Bovespa_Data.R
    ├── gitrd_GetITRData.R
    ├── gitrd_export_ITR_Data.R
    ├── gitrd_fix_dataframes.R
    ├── gitrd_get_inflation_data.R
    ├── gitrd_get_info_companies.R
    ├── gitrd_read_fre_zip_file.R
    ├── gitrd_read_zip_file.R
    └── gitrd_utils.R
├── README.md
├── inst
    └── extdata
    │   ├── 434_ARAC_2008-09-30.zip
    │   ├── ExampleReport_Petrobras.RData
    │   └── ITRBPAE.001
├── man
    ├── gitrd.GetITRData.Rd
    ├── gitrd.convert.to.wide.Rd
    ├── gitrd.export.ITR.data.Rd
    ├── gitrd.fix.dataframes.Rd
    ├── gitrd.get.bovespa.data.Rd
    ├── gitrd.get.inflation.data.Rd
    ├── gitrd.get.info.companies.Rd
    ├── gitrd.read.fre.zip.file.Rd
    ├── gitrd.read.fwf.file.Rd
    ├── gitrd.read.zip.file.Rd
    ├── gitrd.read.zip.file.type.1.Rd
    ├── gitrd.read.zip.file.type.2.Rd
    ├── gitrd.read.zip.file.type.fre.Rd
    └── gitrd.search.company.Rd
├── tests
    ├── testthat.R
    └── testthat
    │   └── test_gitrd.R
└── vignettes
    ├── MyExcelData.xlsx
    ├── gitrd-vignette-introduction.R
    ├── gitrd-vignette-introduction.Rmd
    └── gitrd-vignette-introduction.html


/.Rbuildignore:
--------------------------------------------------------------------------------
1 | ^.*\.Rproj$
2 | ^\.Rproj\.user$
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .Rproj.user
2 | .Rhistory
3 | .RData
4 | .Ruserdata
5 | 


--------------------------------------------------------------------------------
/DESCRIPTION:
--------------------------------------------------------------------------------
 1 | Package: GetITRData
 2 | Title: Reading Financial Reports from Bovespa's ITR System
 3 | Version: 0.7
 4 | Date: 2017-10-21
 5 | Authors@R: person("Marcelo", "Perlin", email = "marceloperlin@gmail.com", role = c("aut", "cre"))
 6 | Description: Reads quarterly and annual financial reports including assets, liabilities, income and cash flow statements from Bovespa's ITR (informacoes trimestrais) system <http://www.bmfbovespa.com.br/en_us/products/listed-equities-and-derivatives/equities/listed-companies.htm>.
 7 |  The ITR/DFP system is a web based interface for all financial reports of companies traded at Bovespa. The package is especially designed for large scale data importation, keeping a tabular (long) structure for easier processing.  
 8 | Depends:
 9 |     R (>= 3.3.0)
10 | Imports: stringr,XML,dplyr, readr, reshape2, tibble, xlsx, stats, curl
11 | License: GPL-2
12 | BugReports: https://github.com/msperlin/GetITRData/issues
13 | URL: https://github.com/msperlin/GetITRData/
14 | LazyData: true
15 | RoxygenNote: 6.0.1
16 | Suggests: knitr,
17 |     rmarkdown,
18 |     testthat,
19 |     ggplot2
20 | VignetteBuilder: knitr
21 | 


--------------------------------------------------------------------------------
/GetITRData.Rproj:
--------------------------------------------------------------------------------
 1 | Version: 1.0
 2 | 
 3 | RestoreWorkspace: No
 4 | SaveWorkspace: No
 5 | AlwaysSaveHistory: No
 6 | 
 7 | EnableCodeIndexing: Yes
 8 | UseSpacesForTab: Yes
 9 | NumSpacesForTab: 2
10 | Encoding: UTF-8
11 | 
12 | RnwWeave: Sweave
13 | LaTeX: pdfLaTeX
14 | 
15 | AutoAppendNewline: Yes
16 | StripTrailingWhitespace: Yes
17 | 
18 | BuildType: Package
19 | PackageUseDevtools: Yes
20 | PackageInstallArgs: --no-multiarch --with-keep.source
21 | PackageRoxygenize: rd,collate,namespace,vignette
22 | 


--------------------------------------------------------------------------------
/NAMESPACE:
--------------------------------------------------------------------------------
 1 | # Generated by roxygen2: do not edit by hand
 2 | 
 3 | export(gitrd.GetITRData)
 4 | export(gitrd.convert.to.wide)
 5 | export(gitrd.export.ITR.data)
 6 | export(gitrd.fix.dataframes)
 7 | export(gitrd.get.bovespa.data)
 8 | export(gitrd.get.inflation.data)
 9 | export(gitrd.get.info.companies)
10 | export(gitrd.read.fre.zip.file)
11 | export(gitrd.read.fwf.file)
12 | export(gitrd.read.zip.file)
13 | export(gitrd.search.company)
14 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
 1 | ### Version 0.7 (2018-02-28)
 2 | 
 3 | - fixed bug in vignette
 4 | 
 5 | ### Version 0.6 (2017-10-21)
 6 | 
 7 | - fixed bug with NA dates and names of companies
 8 | - Implemented inflation adjusted values
 9 | - Added current capital composition
10 | - added tickers list for each company
11 | - Dividends history is now available
12 | - Yearly data (DFP system) is now also available
13 | - Users can now select max levels in account description (rows)
14 | 
15 | ### Version 0.5 (2017-09-29)
16 | 
17 | First version.
18 | 


--------------------------------------------------------------------------------
/R/gitrd_Bovespa_Data.R:
--------------------------------------------------------------------------------
  1 | #' Reads information for a company from B3 site
  2 | #'
  3 | #' Given a CVM code, this function scrapes information from the company page.
  4 | #'
  5 | #' @param my.id A CVM id
  6 | #' @return A list with several dataframes
  7 | #' @export
  8 | #'
  9 | #' @examples
 10 | #'
 11 | #' \dontrun{ # keep cran check fast
 12 | #' l.info.PETR <- gitrd.get.dovespa.data(my.id = 9512)
 13 | #' str(l.info.PETR)
 14 | #' }
 15 | gitrd.get.bovespa.data <- function(my.id) {
 16 | 
 17 |   # fct for cleaning numerical values
 18 |   fix.num.cols <- function(x) {
 19 |     x <- as.character(x)
 20 |     x <- stringr::str_replace_all(x, stringr::fixed('.'),'')
 21 |     x <- stringr::str_replace_all(x, stringr::fixed(','),'.')
 22 | 
 23 |     x <- as.numeric(x)
 24 |     return(x)
 25 |   }
 26 |   # get data from bovespa
 27 | 
 28 | 
 29 |   my.link <- paste0('http://bvmf.bmfbovespa.com.br/pt-br/mercados/acoes/empresas/ExecutaAcaoConsultaInfoEmp.asp?CodCVM=',
 30 |                     my.id, '&ViewDoc=0#a')
 31 |   data.out <- XML::readHTMLTable(my.link,
 32 |                                  skip.rows = 1)
 33 | 
 34 |   if (length(data.out) !=0 ) {
 35 | 
 36 |     # current stockholders data
 37 | 
 38 |     idx <- sapply(data.out, FUN = function(df.in) any(colnames(df.in) == '%ON'))
 39 |     tbl.idx <- which(idx)
 40 | 
 41 |     df.stock.holders <- data.out[[tbl.idx]]
 42 |     names(df.stock.holders) <- c('name', 'ON.percent', 'PN.percent', 'total.percent')
 43 |     df.stock.holders$ON.percent <- fix.num.cols(df.stock.holders$ON.percent)
 44 |     df.stock.holders$PN.percent <- fix.num.cols(df.stock.holders$PN.percent)
 45 |     df.stock.holders$total.percent <- fix.num.cols(df.stock.holders$total.percent)
 46 | 
 47 |     # current total stocks
 48 |     idx <- sapply(data.out, FUN = function(df.in) any(colnames(df.in) == 'V1'))
 49 |     tbl.idx <- which(idx)
 50 | 
 51 |     df.stock.composition <- data.out[[tbl.idx]]
 52 |     names(df.stock.composition) <- c('type.stock', 'number.of.stocks')
 53 |     df.stock.composition$number.of.stocks <- fix.num.cols(df.stock.composition$number.of.stocks)
 54 | 
 55 |     # current listing segments
 56 | 
 57 |     my.html <- paste0(readLines(my.link, encoding = 'Latin1'), collapse = '\n')
 58 | 
 59 |     # build dict
 60 |     my.dict <- data.frame(segments.id = c('Bovespa Mais',
 61 |                                           'Bovespa Mais - Level 2',
 62 |                                           'Novo Mercado',
 63 |                                           'Corporate Governance - Level 2',
 64 |                                           'Corporate Governance - Level 1'),
 65 |                           pic.file = c('InfEmpSeloBovespaMaisBrasil.png',
 66 |                                        'img_logo-bovmaisn2.png',
 67 |                                        'InfEmpLogoNovoMercado.png',
 68 |                                        'InfEmpLogoMercadoNivel2.png',
 69 |                                        'InfEmpLogoMercadoNivel1.png'), stringsAsFactors = F )
 70 | 
 71 |     segment.test <- sapply(X = my.dict$pic.file, FUN = function(x) return(stringr::str_detect(my.html, x)) )
 72 | 
 73 |     if (any(segment.test)) {
 74 |       company.segment <- my.dict$segments.id[which(segment.test)]
 75 |     } else {
 76 |       company.segment <- 'None'
 77 |     }
 78 | 
 79 |   } else {
 80 |     df.stock.holders <- data.frame()
 81 |     df.stock.composition <- data.frame()
 82 |   }
 83 | 
 84 |   # cash dividends
 85 |   my.url <- paste0('http://bvmf.bmfbovespa.com.br/cias-listadas/empresas-listadas/ResumoProventosDinheiro.aspx?codigoCvm=',
 86 |                    my.id,'&tab=3.1&idioma=pt-br')
 87 | 
 88 |   l.out <- XML::readHTMLTable(my.url)
 89 | 
 90 |   if (length(l.out) != 0) {
 91 | 
 92 |     df.dividends <- l.out[[1]]
 93 |     names(df.dividends) <- c('type.stock', 'date.aproval', 'value', 'unit.dividend', 'type.dividend',
 94 |                              'last.day.with.dividend','last.day.price.with.dividend', 'last.price', 'last.price.unit',
 95 |                              'dividend.by.price')
 96 | 
 97 |     df.dividends$type.stock <- as.character(df.dividends$type.stock)
 98 |     df.dividends$date.aproval <- as.Date(df.dividends$date.aproval, '%d/%m/%Y')
 99 |     df.dividends$value <-   fix.num.cols(df.dividends$value)
100 |     df.dividends$unit.dividend <-   fix.num.cols(df.dividends$unit.dividend)
101 |     df.dividends$type.dividend <-   as.character(df.dividends$type.dividend)
102 |     df.dividends$last.day.with.dividend <-   as.Date(df.dividends$last.day.with.dividend, '%d/%m/%Y')
103 |     df.dividends$last.day.price.with.dividend <-   as.Date(df.dividends$last.day.price.with.dividend, '%d/%m/%Y')
104 |     df.dividends$last.price <-   fix.num.cols(df.dividends$last.price)
105 |     df.dividends$last.price.unit <-   fix.num.cols(df.dividends$last.price.unit)
106 |     df.dividends$dividend.by.price <-   fix.num.cols(df.dividends$dividend.by.price)
107 | 
108 |   } else {
109 |     df.dividends <- data.frame()
110 |   }
111 | 
112 |   l.out <- list(df.stock.composition = df.stock.composition,
113 |                 df.stock.holders = df.stock.holders,
114 |                 df.dividends = df.dividends)
115 | 
116 |   return(l.out)
117 | 
118 | 
119 | }
120 | 


--------------------------------------------------------------------------------
/R/gitrd_GetITRData.R:
--------------------------------------------------------------------------------
  1 | #' Downloads and reads financial reports from Bovespa
  2 | #'
  3 | #' Quarterly and annual financial reports are downloaded from B3 for a combination of companies and time period.
  4 | #' The easist way to get started with gitrd.GetITRData is looking for the official name of traded companies using function gitrd.search.company('nametolookfor').
  5 | #' Alternatively, you can use function gitrd.get.info.companies('companies') to import a dataframe with information for all available companies and time periods.
  6 | #'
  7 | #' @param name.companies Official names of companies to get financial reports (e.g. 'ELETROPAULO METROPOLITANA EL.S.PAULO S.A').
  8 | #' Names of companies can be found using function gitrd.search.company('nametolookfor') or gitrd.get.info.companies('companies')
  9 | #' @param first.date First date (YYYY-MM-DD) to get data. Character or Date. E.g. first.date = '2010-01-01'.
 10 | #' @param last.date Last date (YYYY-MM-DD) to get data. Character or Date. E.g. last.date = '2017-01-01'.
 11 | #' @param type.info Type of financial statements, 'individual' (default) or 'consolidated'. Argument can be a single value or a vector with the same
 12 | #' length as name.companies. The individual type only includes financial statements from the company itself, while consolidated statements adds information
 13 | #' about controlled companies
 14 | #' @param periodicy.fin.report The frequency of financial reports: 'annual' (default) or 'quarterly'
 15 | #' @param inflation.index Sets the inflation index to use for finding inflation adjusted values of all reports. Possible values: 'dollar' (default) or 'IPCA', the brazilian main inflation index.
 16 | #' When using 'IPCA', the base date is set as the last date found in itr/dfp dataset.
 17 | #' @param max.levels Sets the maximum number of levels of accounting items in financial reports
 18 | #' @param folder.out Folder where to download and manipulate the zip files. Default = tempdir()
 19 | #' @param be.quiet Should the function output information about progress? TRUE (default) or FALSE
 20 | #'
 21 | #' @return A tibble (dataframe with lists) object with all gathered financial statements, with each company as a row in the tibble.
 22 | #' @export
 23 | #'
 24 | #' @examples
 25 | #'
 26 | #' \dontrun{ #dontrun: keep cran check time short
 27 | #' name.companies <- 'ELETROPAULO METROPOLITANA EL.S.PAULO S.A'
 28 | #' first.date <- '2005-01-01'
 29 | #' last.date <-  '2006-01-01'
 30 | #'
 31 | #' df.statements <- gitrd.GetITRData(name.companies = name.companies,
 32 | #'                                   first.date = first.date,
 33 | #'                                   last.date = last.date)
 34 | #'  }
 35 | gitrd.GetITRData <- function(name.companies,
 36 |                              first.date = Sys.Date()-12*30,
 37 |                              last.date = Sys.Date(),
 38 |                              type.info = 'individual',
 39 |                              periodicy.fin.report = 'annual',
 40 |                              inflation.index = 'dollar',
 41 |                              max.levels = 3,
 42 |                              folder.out = tempdir(),
 43 |                              be.quiet = FALSE) {
 44 | 
 45 |   # sanity check
 46 |   possible.values <- c('individual', 'consolidated')
 47 |   if ( !(any(type.info %in% possible.values)) ){
 48 |     stop('Input type.info should be "individual" or "consolidated"')
 49 |   }
 50 | 
 51 |   if (length(type.info) == 1) {
 52 |     type.info <- rep(type.info, length(name.companies))
 53 |   }
 54 | 
 55 |   if (length(type.info) != length(name.companies)) {
 56 |     stop('Length of type.info does not match the length of name.companies')
 57 |   }
 58 | 
 59 |   if (!dir.exists(folder.out)) dir.create(folder.out)
 60 | 
 61 |   possible.values <- c('annual', 'quarterly')
 62 |   if ( !(any(periodicy.fin.report %in% possible.values)) ){
 63 |     stop('Input periodicy.fin.report should be "annual" or "quarterly"')
 64 |   }
 65 | 
 66 |   type.fin.report <- switch(periodicy.fin.report,
 67 |                             'annual' = 'dfp',
 68 |                             'quarterly' = 'itr')
 69 | 
 70 |   # check input inflation.index
 71 |   possible.values <- c('IPCA', 'dollar')
 72 |   if ( !(any(inflation.index %in% possible.values)) ) {
 73 |     stop(paste0('Input inflation.index should be one of:\n' , paste0(possible.values, collapse = '\n') ) )
 74 |   }
 75 | 
 76 |   if (max.levels < 1) {
 77 |     stop('Input max.levels should be higher than one')
 78 |   }
 79 |   # check internet
 80 |   if (!curl::has_internet()) {
 81 |     stop('You need an active internet connection to download files from Bovespa.')
 82 |   }
 83 | 
 84 |   # get data from github
 85 |   df.info <- gitrd.get.info.companies(type.data = 'companies_files')
 86 |   unique.names <- unique(df.info$name.company)
 87 | 
 88 |   idx <- !(name.companies %in% unique.names)
 89 |   if (any( idx)) {
 90 |     stop(paste0('Name of companies: \n\n ', paste0(name.companies[idx], collapse = '\n'), '\n\n',
 91 |                 'not found in registry. Use df.info <- gitrd.get.info.companies() to find the names of all available companies.'))
 92 |   }
 93 | 
 94 |   # check dates
 95 |   first.date <- as.Date(first.date)
 96 |   last.date <- as.Date(last.date)
 97 | 
 98 |   if ( (class(first.date) != 'Date')|(class(last.date) != 'Date') )  {
 99 |     stop('Inputs first.date or last.date does not seem to be dates. Use format YYYY-MM-DD')
100 |   }
101 | 
102 |   if (last.date < first.date) {
103 |     stop('Your last.date is older than first.date. Did you mix them up?')
104 |   }
105 | 
106 |   # find available dates for selected companies
107 |   idx <- (df.info$name.company %in% name.companies)&
108 |     (df.info$id.date >= first.date)&(df.info$id.date <= last.date)
109 | 
110 |   df.to.process <- df.info[idx, ]
111 | 
112 |   # remove duplicates/NA and filter for type.data
113 |   idx <- !duplicated(df.to.process[, c('id.company', 'id.date', 'type.fin.report')])
114 |   df.to.process <- df.to.process[idx, ]
115 | 
116 |   idx <- !is.na(df.to.process$id.company)
117 |   df.to.process <- df.to.process[idx, ]
118 | 
119 |   idx <- !is.na(df.to.process$name.company)
120 |   df.to.process <- df.to.process[idx, ]
121 | 
122 |   if (nrow(df.to.process) == 0){
123 |     stop('Cannot find any dates related to companies in registry. You should try different dates and companies.')
124 |   }
125 | 
126 |   # msg to prompt
127 |   if (length(unique(type.info))==1){
128 |     msg.reach <- type.info[1]
129 |   } else {
130 |     # find most frequent
131 |     tbl <- sort(table(type.info), decreasing = TRUE)
132 |     msg.reach <- paste0('mostly ', names(tbl)[1])
133 |   }
134 | 
135 |   cat(paste0('\n\nDownloading data for ', length(name.companies), ' companies',
136 |              '\nType of financial reports: ', msg.reach,
137 |              '\nPeriodicy of financial reports: ', periodicy.fin.report, ' (',type.fin.report, ' system)',
138 |              '\nFirst Date: ',first.date,
139 |              '\nLaste Date: ',last.date,
140 |              '\nInflation index: ', inflation.index,
141 |              '\n\n') )
142 | 
143 |   cat(paste0('Downloading inflation data' ))
144 | 
145 |   # download inflation data using BETS
146 | 
147 |   df.inflation <- gitrd.get.inflation.data(inflation.index)
148 | 
149 |   cat('\tDone\n\n')
150 | 
151 |   # try to find company's names
152 |   idx <- !name.companies %in% unique(df.to.process$name.company)
153 | 
154 |   if (any(idx)) {
155 |     cat(paste0('\nWARNING: Cant find available dates for ', paste0(name.companies[idx], collapse = ', ')))
156 |   }
157 | 
158 |   # warn user for lack of cash flow data
159 |   if (any(df.to.process$id.date < as.Date('2009-01-01'))) {
160 |     cat('\nWARNING: For data before 2009, the cash flow statements are not available\n\n')
161 |   }
162 | 
163 |   # start processing
164 |   cat(paste0('Inputs looking good! Starting download of files:\n' ) )
165 | 
166 |   for (i.company in unique(df.to.process$name.company) ) {
167 |     idx <- (df.to.process$name.company == i.company)&
168 |       (df.to.process$type.fin.report == type.fin.report)
169 |     temp.df <- df.to.process[idx, ]
170 |     cat(paste0('\n', i.company) )
171 | 
172 |     cat(paste0('\n\tAvailable periods: ', paste0(temp.df$id.date, collapse = '\t')) )
173 | 
174 |   }
175 | 
176 |   cat('\n\n')
177 | 
178 |   tibble.out <- tibble::tibble()
179 | 
180 |   for (i.company in unique(df.to.process$name.company)) {
181 | 
182 |     idx <- (df.to.process$name.company == i.company)&
183 |       (df.to.process$type.fin.report == type.fin.report)
184 |     temp.df <- df.to.process[idx,  ]
185 | 
186 |     # get data from Bovespa site
187 |     my.id <- temp.df$id.company[1]
188 | 
189 |     l.out.bov <- gitrd.get.bovespa.data(my.id)
190 | 
191 |     current.stock.holders <- l.out.bov$df.stock.holders
192 |     current.stock.composition <- l.out.bov$df.stock.composition
193 |     df.dividends <- l.out.bov$df.dividends
194 | 
195 |     type.info.now <- type.info[which(i.company == name.companies)]
196 |     df.assets <- data.frame()
197 |     df.liabilities <- data.frame()
198 |     df.income <- data.frame()
199 |     df.cashflow <- data.frame()
200 |     df.fre.stock.holders <- data.frame()
201 |     df.fre.capital <- data.frame()
202 |     for (i.date in as.character(temp.df$id.date) ) {
203 | 
204 |       temp.df2 <- temp.df[temp.df$id.date == i.date,  ]
205 | 
206 |       # cases for more than one file per quarter
207 |       if (nrow(temp.df2)> 1) {
208 |         # find id with highest value (most recent file)
209 |         temp.df2 <- temp.df2[which.max(temp.df2$id.file), ]
210 |       }
211 | 
212 |       if (!be.quiet) {
213 |         cat(paste0('\nProcessing ', i.company, ', Date = ', i.date  ) )
214 |       }
215 | 
216 | 
217 |       # get dfp/itr data
218 |       dl.link <- temp.df2$dl.link
219 |       type.fin.report <- temp.df2$type.fin.report
220 | 
221 |       # fix file names for latin characters
222 |       my.filename <- iconv(temp.df2$name.company, to = 'ASCII//TRANSLIT')
223 |       my.filename <- stringr::str_replace_all(my.filename, stringr::fixed('?'), '_')
224 | 
225 |       temp.file = file.path(folder.out, paste0(temp.df2$id.company, '_',
226 |                                                stringr::str_sub(my.filename,1,4), '_',
227 |                                                i.date, '.zip') )
228 | 
229 | 
230 |       cat(paste0('\n\tAcessing ', type.fin.report, ' data') )
231 | 
232 |       if (file.exists(temp.file)) {
233 |         cat(' | file exists (no dl)')
234 |       } else {
235 |         cat(' | downloading file')
236 | 
237 |         utils::download.file(url = dl.link,
238 |                              destfile = temp.file,
239 |                              quiet = T,
240 |                              mode = 'wb')
241 |       }
242 | 
243 |       cat(' | reading file')
244 | 
245 |       suppressWarnings({
246 |         l.out <- gitrd.read.zip.file(my.zip.file = temp.file, folder.to.unzip = tempdir(),
247 |                                      id.type = temp.df2$id.type, type.fin.report)
248 |       })
249 | 
250 |       if (type.info.now == 'individual') {
251 |         out.df <- l.out$ind.dfs
252 |       }
253 | 
254 |       if (type.info.now == 'consolidated') {
255 |         out.df <- l.out$cons.dfs
256 |       }
257 | 
258 | 
259 |       # set some cols for long format
260 |       out.df$df.assets$ref.date <- as.Date(i.date)
261 |       out.df$df.assets$company.name <- i.company
262 |       out.df$df.liabilities$ref.date <- as.Date(i.date)
263 |       out.df$df.liabilities$company.name <- i.company
264 |       out.df$df.income$ref.date <- as.Date(i.date)
265 |       out.df$df.income$company.name <- i.company
266 |       out.df$df.cashflow$company.name <- i.company
267 |       out.df$df.cashflow$ref.date <- as.Date(i.date)
268 | 
269 |       df.assets <- rbind(df.assets, out.df$df.assets)
270 |       df.liabilities <- rbind(df.liabilities, out.df$df.liabilities)
271 |       df.income <- rbind(df.income, out.df$df.income)
272 |       df.cashflow <- rbind(df.cashflow, out.df$df.cashflow)
273 | 
274 |       # get data from FRE
275 | 
276 |       cat(paste0('\n\tAcessing fre data') )
277 | 
278 |       idx <- (df.to.process$name.company == i.company)&
279 |         (df.to.process$type.fin.report == 'fre')&
280 |         (df.to.process$id.date == temp.df2$id.date)
281 | 
282 |       temp.df.fre <- df.to.process[idx,  ]
283 | 
284 |       if (nrow(temp.df.fre) == 0) {
285 |         cat('\n\t\tNo FRE file available..')
286 | 
287 |         next()
288 | 
289 |       }
290 | 
291 |       temp.file = file.path(folder.out, paste0('FRE_', temp.df2$id.company, '_',
292 |                                                stringr::str_sub(my.filename,1,4), '_',
293 |                                                i.date, '.zip') )
294 | 
295 | 
296 |       dl.link <- temp.df.fre$dl.link
297 | 
298 |       if (file.exists(temp.file)) {
299 |         cat(' | file exists (no dl)')
300 |       } else {
301 |         cat(' | downloading file')
302 | 
303 |         utils::download.file(url = dl.link,
304 |                              destfile = temp.file,
305 |                              quiet = T,
306 |                              mode = 'wb')
307 |       }
308 | 
309 |       cat(' | reading file')
310 | 
311 |       l.out.FRE <- gitrd.read.fre.zip.file(my.zip.file = temp.file,
312 |                                            folder.to.unzip = tempdir(),
313 |                                            l.other.info = list(company.name = temp.df2$name.company,
314 |                                                                ref.date = temp.df2$id.date))
315 | 
316 |       # set new cols and fix order
317 |       old.names <- names(l.out.FRE$df.stockholders)
318 |       l.out.FRE$df.stockholders$name.company <- temp.df2$name.company
319 |       l.out.FRE$df.stockholders$id.date <- temp.df2$id.date
320 | 
321 |       my.cols <- c('name.company', 'id.date', old.names)
322 |       l.out.FRE$df.stockholders <- l.out.FRE$df.stockholders[ ,my.cols]
323 | 
324 |       # set new cols and fix order
325 |       old.names <- names(l.out.FRE$df.capital)
326 |       l.out.FRE$df.capital$name.company <- temp.df2$name.company
327 |       l.out.FRE$df.capital$id.date <- temp.df2$id.date
328 | 
329 |       my.cols <- c('name.company', 'id.date', old.names)
330 |       l.out.FRE$df.capital <- l.out.FRE$df.capital[ ,my.cols]
331 | 
332 |       df.fre.stock.holders <- rbind(df.fre.stock.holders, l.out.FRE$df.stockholders)
333 |       df.fre.capital <- rbind(df.fre.capital, l.out.FRE$df.capital)
334 |     }
335 | 
336 |     # clean up dataframes before saving
337 |     df.assets <-      gitrd.fix.dataframes(stats::na.omit(df.assets), inflation.index, df.inflation,max.levels)
338 |     df.liabilities <- gitrd.fix.dataframes(stats::na.omit(df.liabilities), inflation.index, df.inflation,max.levels)
339 |     df.income <-      gitrd.fix.dataframes(stats::na.omit(df.income), inflation.index, df.inflation, max.levels)
340 |     df.cashflow <-    gitrd.fix.dataframes(stats::na.omit(df.cashflow), inflation.index, df.inflation, max.levels)
341 | 
342 |     tibble.company <- tibble::tibble(company.name = i.company,
343 |                                      company.code = temp.df$id.company[1],
344 |                                      type.info = type.info.now,
345 |                                      min.date = min(temp.df$id.date),
346 |                                      max.date = max(temp.df$id.date),
347 |                                      n.periods = length(temp.df$id.date),
348 |                                      current.stock.composition = list(current.stock.composition),
349 |                                      dividends.history = list(df.dividends),
350 |                                      fr.assets = list(df.assets),
351 |                                      fr.liabilities = list(df.liabilities),
352 |                                      fr.income = list(df.income),
353 |                                      fr.cashflow = list(df.cashflow),
354 |                                      history.stock.holders = list(df.fre.stock.holders),
355 |                                      history.capital = list(df.fre.capital) )
356 | 
357 |     tibble.out <- dplyr::bind_rows(tibble.out, tibble.company)
358 | 
359 |   }
360 | 
361 |   return(tibble.out)
362 | }
363 | 


--------------------------------------------------------------------------------
/R/gitrd_export_ITR_Data.R:
--------------------------------------------------------------------------------
  1 | #' Export tibble to an excel file
  2 | #'
  3 | #' @param data.in Tibble with financial information (output of gitrd.GetITRData)
  4 | #' @param base.file.name The basename of excel file (don't include extension)
  5 | #' @param type.export The extension of the desired format (only 'xlsx' implemented so far)
  6 | #' @param format.data The format of output. 'wide' (default) for a wide table and 'long' for a long table
  7 | #'
  8 | #' @return nothing
  9 | #' @export
 10 | #'
 11 | #' @examples
 12 | #'
 13 | #' # get example data from RData file
 14 | #' my.f <- system.file('extdata/ExampleReport_Petrobras.RData', package = 'GetITRData')
 15 | #' load(my.f)
 16 | #'
 17 | #' \dontrun{ # dontrun: keep cran check time short
 18 | #' gitrd.export.ITR.data(df.reports, base.file.name = 'MyExcelFile', format.data = 'wide')
 19 | #' }
 20 | gitrd.export.ITR.data <- function(data.in,
 21 |                                   base.file.name = paste0('GetITRData_Export_',Sys.Date()),
 22 |                                   type.export = 'xlsx',
 23 |                                   format.data = 'wide') {
 24 | 
 25 |   # check args
 26 |   possible.exports <- c('xlsx')
 27 |   if (any(!(type.export %in% type.export))) {
 28 |     stop('input type.export should be "xlsx"')
 29 |   }
 30 | 
 31 |   possible.formats <- c('wide', 'long')
 32 |   if (any(!(type.export %in% type.export))) {
 33 |     stop('input format.data should be "wide" or "long"')
 34 |   }
 35 | 
 36 |   f.out <- paste0(base.file.name,'.', type.export)
 37 | 
 38 |   if (file.exists(f.out)) {
 39 |     cat('File ', f.out, ' already exists. Deleting it..')
 40 |     file.remove(f.out)
 41 |   }
 42 | 
 43 |   # copy metadata
 44 |   df.to.copy <- data.in[ ,c("company.name", "company.code", "type.info",
 45 |                             "min.date", "max.date", "n.periods")]
 46 |   xlsx::write.xlsx(x = df.to.copy, file = f.out,
 47 |                    sheetName = 'METADATA',
 48 |                    append = T )
 49 | 
 50 |   for (i.company in data.in$company.name) {
 51 |     cat('\nCopying', format.data, 'data for', i.company)
 52 | 
 53 |     temp.df <- data.in[data.in$company.name == i.company, ]
 54 | 
 55 |     if (nrow(temp.df$fr.assets[[1]]) == 0) {
 56 |       cat('\n\tFound 0 row dataframe for assets. Skipping it..')
 57 |       out.asset = data.frame(col = 'NO DATA')
 58 |     } else {
 59 |       if (format.data == 'wide') {
 60 |         out.asset = gitrd.convert.to.wide(temp.df$fr.assets[[1]])
 61 |       } else {
 62 |         out.asset = temp.df$fr.assets[[1]]
 63 |       }
 64 |     }
 65 | 
 66 |     if (nrow(temp.df$fr.liabilities[[1]]) == 0) {
 67 |       cat('\n\tFound 0 row dataframe for liabilities. Skipping it..')
 68 |       out.liability = data.frame(col = 'NO DATA')
 69 |     } else {
 70 |       if (format.data == 'wide') {
 71 |         out.liability = gitrd.convert.to.wide(temp.df$fr.liabilities[[1]])
 72 |       } else {
 73 |         out.liability = temp.df$fr.liabilities[[1]]
 74 |       }
 75 |     }
 76 | 
 77 |     if (nrow(temp.df$fr.income[[1]]) == 0) {
 78 |       cat('\n\tFound 0 row dataframe for income. Skipping it..')
 79 |       out.income = data.frame(col = 'NO DATA')
 80 |     } else {
 81 |       if (format.data == 'wide') {
 82 |         out.income = gitrd.convert.to.wide(temp.df$fr.income[[1]])
 83 |       } else {
 84 |         out.income = temp.df$fr.income[[1]]
 85 |       }
 86 |     }
 87 | 
 88 |     if (nrow(temp.df$fr.cashflow[[1]]) == 0) {
 89 |       cat('\n\tFound 0 row dataframe for income. Skipping it..')
 90 |       out.cashflow = data.frame(col = 'NO DATA')
 91 |     } else {
 92 |       if (format.data == 'wide') {
 93 |         out.cashflow = gitrd.convert.to.wide(temp.df$fr.cashflow[[1]])
 94 |       } else {
 95 |         out.cashflow = temp.df$fr.cashflow[[1]]
 96 |       }
 97 |     }
 98 | 
 99 |     if (length(temp.df$dividends.history[[1]]) == 0) {
100 |       cat('\n\tFound NA for dividends history. Skipping it..')
101 |       out.dividends = data.frame(col = 'NO DATA')
102 |     } else {
103 |       out.dividends <- temp.df$dividends.history[[1]]
104 |     }
105 | 
106 |     if (length(temp.df$history.stock.holders[[1]]) == 0) {
107 |       cat('\n\tFound NA for current stock holders. Skipping it..')
108 |       out.stock.holders = data.frame(col = 'NO DATA')
109 |     } else {
110 |       out.stock.holders <- temp.df$history.stock.holders[[1]]
111 |     }
112 | 
113 |     # copy financial reports
114 |     xlsx::write.xlsx(x = out.asset, file = f.out,
115 |                      sheetName = paste0('ASSETS ',temp.df$company.code,'-', stringr::str_sub(temp.df$company.name,1,5)),
116 |                      append = T )
117 | 
118 |     xlsx::write.xlsx(x = out.liability, file = f.out,
119 |                      sheetName = paste0('LIABIL ',temp.df$company.code,'-', stringr::str_sub(temp.df$company.name,1,5)),
120 |                      append = T )
121 | 
122 |     xlsx::write.xlsx(x = out.income, file = f.out,
123 |                      sheetName = paste0('INCOME ',temp.df$company.code,'-', stringr::str_sub(temp.df$company.name,1,5)),
124 |                      append = T )
125 | 
126 |     xlsx::write.xlsx(x = out.cashflow, file = f.out,
127 |                      sheetName = paste0('CASHFLOW ',temp.df$company.code,'-', stringr::str_sub(temp.df$company.name,1,5)),
128 |                      append = T )
129 | 
130 |     xlsx::write.xlsx(x = out.dividends, file = f.out,
131 |                      sheetName = paste0('DIV HISTORY ',temp.df$company.code,'-', stringr::str_sub(temp.df$company.name,1,5)),
132 |                      append = T )
133 | 
134 |     xlsx::write.xlsx(x = out.stock.holders, file = f.out,
135 |                      sheetName = paste0('STOCKHOLDERS ',temp.df$company.code,'-', stringr::str_sub(temp.df$company.name,1,5)),
136 |                      append = T )
137 | 
138 |   }
139 | 
140 |   cat('\nExport sucessful')
141 | }
142 | 
143 | 
144 | 


--------------------------------------------------------------------------------
/R/gitrd_fix_dataframes.R:
--------------------------------------------------------------------------------
  1 | #' Fix dataframe for version issues and inflation measures (internal)
  2 | #'
  3 | #' @param df.in A dataframe with financial statements
  4 | #' @inheritParams gitrd.GetITRData
  5 | #' @param df.inflation Dataframe with inflation data
  6 | #' @return The fixed data.frame
  7 | #' @export
  8 | #'
  9 | #' @examples#'
 10 | #' # get example data from RData file
 11 | #' my.f <- system.file('extdata/ExampleReport_Petrobras.RData', package = 'GetITRData')
 12 | #' load(my.f)
 13 | #'
 14 | #' df.assets <- df.reports$fr.assets[[1]]
 15 | #'
 16 | #' df.assets.fixed <- gitrd.fix.dataframes(df.assets,
 17 | #'                                         inflation.index = 'none',
 18 | #'                                         df.inflation = data.frame())
 19 | gitrd.fix.dataframes <- function(df.in, inflation.index, df.inflation, max.levels = 3) {
 20 | 
 21 |   # if empty df
 22 |   if (nrow(df.in) == 0) {
 23 |     return(df.in)
 24 |   }
 25 | 
 26 |   # fix .00 in acc.number
 27 |   df.in$acc.number <- stringr::str_replace_all(df.in$acc.number, '.00', '')
 28 | 
 29 |   # fix change: 1.03 -> 1.02
 30 |   #browser()
 31 | 
 32 |   # fix names of acc.desc using latest info
 33 |   df.in$ref.date <- as.Date(df.in$ref.date)
 34 |   max.date <- max(df.in$ref.date)
 35 | 
 36 |   # fix names for cashflow statements (from 4.01 to 6.01)
 37 |   if (any(stringr::str_sub(df.in$acc.number, 1, 1) == '4') ) {
 38 |     substr(df.in$acc.number, 1, 1) <- "6"
 39 |   }
 40 | 
 41 |   # remove according to max.levels
 42 |   my.count <- function(x) {
 43 |     splitted <- stringr::str_split(x, stringr::fixed('.') )[[1]]
 44 |     return(length(splitted))
 45 |   }
 46 | 
 47 |   idx <- sapply(df.in$acc.number, my.count) <= max.levels
 48 |   df.in <- df.in[idx, ]
 49 | 
 50 |   # get reference table for substitution
 51 |   ref.table <- unique(df.in[df.in$ref.date == max.date, c('acc.number', 'acc.desc')])
 52 |   ref.table <- unique(df.in[ , c('acc.number', 'acc.desc', 'ref.date')])
 53 | 
 54 |   my.fct <- function(x, ref.table) {
 55 |     temp <- ref.table[ref.table$acc.number == x, ]
 56 | 
 57 |     idx <- which.max(temp$ref.date)
 58 |     return(temp$acc.desc[idx])
 59 |   }
 60 | 
 61 |   desc.to.use <- sapply(X = unique(df.in$acc.number), FUN = my.fct, ref.table = ref.table)
 62 | 
 63 |   # replace all
 64 |   idx <- match( df.in$acc.number, names(desc.to.use))
 65 |   df.in$acc.desc <- desc.to.use[idx]
 66 | 
 67 |   # fix inflation
 68 | 
 69 |   if (inflation.index == 'IPCA') {
 70 | 
 71 |     # get accumulated inflation index
 72 |     df.inflation$cum <- cumprod(df.inflation$Value/100 +1)
 73 | 
 74 |     # filter df.inflation for dates
 75 |     df.inflation <- df.inflation[df.inflation$Date<=max(df.in$ref.date), ]
 76 | 
 77 |     # use base date as last available date in df.inflation
 78 |     base.value <- df.inflation$cum[which.max(df.inflation$Date)]
 79 |     df.inflation$inflator <- df.inflation$cum/base.value
 80 | 
 81 |     # match time periods
 82 |     idx <- match(format(df.in$ref.date, '%Y-%m'), format(df.inflation$Date, '%Y-%m'))
 83 |     df.in$acc.value.infl.adj <- df.in$acc.value/df.inflation$inflator[idx]
 84 |   }
 85 | 
 86 |   if (inflation.index == 'dollar') {
 87 | 
 88 |     # find closest date for dollar
 89 |     match.neardate <- function(date.in, table.dates) {
 90 |       idx <- which.min(abs(date.in - table.dates))
 91 |       return(idx)
 92 |     }
 93 | 
 94 |     idx <- sapply(X = df.in$ref.date, FUN = match.neardate, table.dates = df.inflation$Date)
 95 | 
 96 |     df.in$acc.value.infl.adj <- df.in$acc.value/df.inflation$Value[idx]
 97 |   }
 98 | 
 99 |   # fix cols order
100 |   my.col <- c("company.name","ref.date", "acc.number", "acc.desc",
101 |               "acc.value", "acc.value.infl.adj")
102 |   df.in <- df.in[ , my.col]
103 | 
104 |   return(df.in)
105 | }
106 | 
107 | 


--------------------------------------------------------------------------------
/R/gitrd_get_inflation_data.R:
--------------------------------------------------------------------------------
 1 | #' Downloads and read inflation data from github
 2 | #'
 3 | #' Inflation data is available at git repo 'msperlin/GetITRData_auxiliary'
 4 | #'
 5 | #' @inheritParams gitrd.GetITRData
 6 | #'
 7 | #' @return A dataframe with inflation data
 8 | #' @export
 9 | #'
10 | #' @examples
11 | #'
12 | #' \dontrun{ # keep cran check fast
13 | #' df.inflation <- gitrd.get.inflation.data('IPCA')
14 | #' str(df.inflation)
15 | #' }
16 | gitrd.get.inflation.data <- function(inflation.index) {
17 | 
18 |   # error checking
19 |   possible.values <- c('dollar', 'IPCA')
20 |   if ( !(inflation.index %in% possible.values) ) {
21 |     stop('Input inflation.index should be one of:\n\n', paste0(possible.values, collapse = '\n'))
22 |   }
23 | 
24 |   # check if cache file exists
25 |   my.f.rdata <- file.path(tempdir(),paste0('df_inflation_', inflation.index, '.RData') )
26 | 
27 |   if (file.exists(my.f.rdata)) {
28 |     cat('\n\tFound cache file. Loading data..')
29 |     load(my.f.rdata)
30 |     return(df.inflation)
31 |   }
32 | 
33 |   # get data from github
34 | 
35 |   my.cols <- readr::cols(
36 |     Date = readr::col_date(format = ""),
37 |     Value = readr::col_double()
38 |   )
39 | 
40 |   link.file <- switch(inflation.index,
41 |                       'IPCA' = 'https://raw.githubusercontent.com/msperlin/GetitrData_auxiliary/master/Inflation-IPCA.csv',
42 |                       'dollar' = 'https://raw.githubusercontent.com/msperlin/GetitrData_auxiliary/master/Inflation-dollar.csv')
43 | 
44 |   df.inflation <- readr::read_csv(link.file, col_types = my.cols)
45 | 
46 |   cat('\n\tCaching inflation RDATA into tempdir()')
47 |   save('df.inflation', file = my.f.rdata)
48 | 
49 |   return(df.inflation)
50 | 
51 | }
52 | 


--------------------------------------------------------------------------------
/R/gitrd_get_info_companies.R:
--------------------------------------------------------------------------------
  1 | #' Reads up to date information about Bovespa companies from a github file
  2 | #'
  3 | #' A csv file with information about available companies and time periods is downloaded from github and read.
  4 | #' This file is updated periodically and manually by the author. When run for the first time in a R session, a .RDATA file
  5 | #' containing the output of the function is saved in tempdir() for caching.
  6 | #'
  7 | #' @param type.data A string that sets the type of information to be returned ('companies' or 'companies_files').
  8 | #' If 'companies', it will return a dataframe with several information about companies, but without download links.
  9 | #'
 10 | #' @return A dataframe with several information about Bovespa companies
 11 | #' @export
 12 | #'
 13 | #' @examples
 14 | #'
 15 | #' \dontrun{ # keep cran check fast
 16 | #' df.info <- gitrd.get.info.companies()
 17 | #' str(df.info)
 18 | #' }
 19 | gitrd.get.info.companies <- function(type.data = 'companies_files') {
 20 | 
 21 |   # error checking
 22 |   possible.values <- c('companies_files', 'companies')
 23 |   if ( !(type.data %in% possible.values) ) {
 24 |     stop('Input type.data should be one of:\n\n', paste0(possible.values, collapse = '\n'))
 25 |   }
 26 | 
 27 |   # check if cache file exists
 28 |   my.f.rdata <- file.path(tempdir(),paste0('df_info_', type.data, '.RData') )
 29 | 
 30 |   if (file.exists(my.f.rdata)) {
 31 |     cat('Found cache file. Loading data..')
 32 |     load(my.f.rdata)
 33 |     return(df.info)
 34 |   }
 35 | 
 36 |   # get data from github
 37 | 
 38 |   cat('\nReading info file from github')
 39 |   link.github <- 'https://raw.githubusercontent.com/msperlin/GetitrData_auxiliary/master/InfoBovespaCompanies.csv'
 40 | 
 41 |   my.cols <- readr::cols(
 42 |     id.company = readr::col_integer(),
 43 |     name.company = readr::col_character(),
 44 |     main.sector = readr::col_character(),
 45 |     sub.sector = readr::col_character(),
 46 |     segment = readr::col_character(),
 47 |     listing.segment = readr::col_character(),
 48 |     tickers = readr::col_character(),
 49 |     id.file = readr::col_integer(),
 50 |     dl.link = readr::col_character(),
 51 |     id.date = readr::col_date(),
 52 |     id.type = readr::col_character(),
 53 |     type.fin.report = readr::col_character(),
 54 |     situation = readr::col_character()
 55 |   )
 56 | 
 57 | 
 58 |   df.info <- readr::read_csv(link.github, col_types = my.cols)
 59 | 
 60 |   # remove rows without id for dates or situation
 61 |   idx <- (!is.na(df.info$id.date))&(!is.na(df.info$situation))
 62 |   df.info <- df.info[idx, ]
 63 | 
 64 |   n.actives <- sum(unique(df.info[ ,c('name.company', 'situation')])$situation == 'ATIVO')
 65 |   n.inactives <- sum(unique(df.info[ ,c('name.company', 'situation')])$situation != 'ATIVO' )
 66 | 
 67 |   cat('\nFound', nrow(df.info), 'lines for', length(unique(df.info$name.company)), 'companies ',
 68 |       '[Actives = ', n.actives, ' Inactives = ', n.inactives, ']')
 69 | 
 70 |   my.last.update <- readLines('https://raw.githubusercontent.com/msperlin/GetitrData_auxiliary/master/LastUpdate.txt')
 71 |   cat('\nLast file update: ', my.last.update)
 72 | 
 73 |   if (type.data == 'companies') {
 74 | 
 75 |     my.cols <- my.cols <- c("name.company","id.company", "situation", "listing.segment",
 76 |                             "main.sector", "sub.sector", "segment", "tickers")
 77 |     df.info.agg <- unique(df.info[, my.cols])
 78 | 
 79 |     my.fun <- function(df) {
 80 |       return(c(min(df$id.date), max(df$id.date)))
 81 |     }
 82 |     out <- by(data = df.info, INDICES = df.info$name.company, FUN = my.fun)
 83 | 
 84 |     df.temp <- data.frame(name.company = names(out),
 85 |                first.date = sapply(out, FUN = function(x) as.character(x[1])),
 86 |                last.date = sapply(out, FUN = function(x) as.character(x[2])),
 87 |     stringsAsFactors = F )
 88 | 
 89 |     df.info.agg <- merge(df.info.agg, df.temp, by = 'name.company')
 90 |     df.info.agg$first.date <- as.Date(df.info.agg$first.date)
 91 |     df.info.agg$last.date <- as.Date(df.info.agg$last.date)
 92 | 
 93 |     df.info <- df.info.agg
 94 |   }
 95 | 
 96 |   cat('\nCaching RDATA into tempdir()')
 97 |   save('df.info', file = my.f.rdata)
 98 | 
 99 |   return(df.info)
100 | 
101 | }
102 | 


--------------------------------------------------------------------------------
/R/gitrd_read_fre_zip_file.R:
--------------------------------------------------------------------------------
  1 | #' Reads a single zip file downloaded from Bovespa
  2 | #'
  3 | #' @param my.zip.file Full path to zip file
  4 | #' @param folder.to.unzip Folder to unzip files (default = tempdir())
  5 | #' @param l.other.info Other information as a list
  6 | #'
  7 | #' @return A list with several dataframes containing financial statements
  8 | #' @export
  9 | #'
 10 | #' @examples
 11 | #'
 12 | #' my.f <- system.file('extdata/434_ARAC_2008-09-30.zip', package = 'GetITRData')
 13 | #'
 14 | #' my.l <- gitrd.read.zip.file(my.f, id.type = 'before 2011', type.fin.report = 'itr')
 15 | #' print(my.l)
 16 | #'
 17 | gitrd.read.fre.zip.file <- function(my.zip.file,
 18 |                                     folder.to.unzip = tempdir(),
 19 |                                     l.other.info) {
 20 | 
 21 |   # sanity check
 22 |   if (tools::file_ext(my.zip.file) != 'zip') {
 23 |     stop(paste('File', my.zip.file, ' is not a zip file.') )
 24 |   }
 25 | 
 26 |   if (!file.exists(my.zip.file)) {
 27 |     stop(paste('File', my.zip.file, ' does not exists.') )
 28 |   }
 29 | 
 30 |   if (file.size(my.zip.file) == 0){
 31 |     stop(paste('File', my.zip.file, ' has size 0!') )
 32 |   }
 33 | 
 34 |   if (length(my.zip.file) != 1){
 35 |     stop('This function only works for a single zip file... check your inputs')
 36 |   }
 37 | 
 38 |   if (!dir.exists(folder.to.unzip)) {
 39 |     cat(paste('Folder', folder.to.unzip, 'does not exist. Creating it.'))
 40 |     dir.create(folder.to.unzip)
 41 |   }
 42 | 
 43 |   my.basename <- tools::file_path_sans_ext(basename(my.zip.file))
 44 |   rnd.folder.name <- file.path(folder.to.unzip, paste0('DIR-',my.basename))
 45 | 
 46 |   if (!dir.exists(rnd.folder.name)) dir.create(rnd.folder.name)
 47 | 
 48 |   utils::unzip(my.zip.file, exdir = rnd.folder.name, junkpaths = TRUE)
 49 | 
 50 |   # list files and check it
 51 |   my.files <- list.files(rnd.folder.name)
 52 | 
 53 |   if (length(my.files) == 0) {
 54 |     #browser()
 55 | 
 56 |     file.remove(my.zip.file)
 57 |     stop(paste0('Zipped file contains 0 files. ',
 58 |                 'This is likelly a problem with the downloaded file. ',
 59 |                 'Try running the code again as the corrupted zip file was deleted and will be downloaded again.',
 60 |                 '\n\nIf the problem persists, my suggestions is to remove the time period with problem.') )
 61 |   }
 62 | 
 63 | 
 64 |   my.l <- gitrd.read.zip.file.type.fre(rnd.folder.name, folder.to.unzip)
 65 | 
 66 |   return(my.l)
 67 | }
 68 | 
 69 | #' Reads folder for zip file post 2011 (internal)
 70 | #'
 71 | #' @inheritParams gitrd.read.zip.file
 72 | #' @param rnd.folder.name Folder where unzipped files are available
 73 | #'
 74 | #' @return A list with financial statements
 75 | #'
 76 | #' @examples
 77 | #' # no example (this functions not used directly)
 78 | gitrd.read.zip.file.type.fre <- function(rnd.folder.name, folder.to.unzip = tempdir()) {
 79 | 
 80 |   zipped.file <- file.path(rnd.folder.name, list.files(rnd.folder.name, pattern = '*.fre')[1])
 81 | 
 82 |   utils::unzip(zipped.file, exdir = rnd.folder.name)
 83 | 
 84 |   company.reg.file <- file.path(rnd.folder.name,'ControleAcionario.xml')
 85 | 
 86 |   xml_data <- XML::xmlToList(XML::xmlParse(company.reg.file))
 87 | 
 88 | 
 89 |   # get stock holders composition
 90 | 
 91 |   fix.fct <- function(x) {
 92 |     if (is.null(x)) x <- NA
 93 |     return(x)
 94 |   }
 95 | 
 96 |   my.fct <- function(l.in, l.other.info) {
 97 | 
 98 |     df.out <- data.frame(type.register = l.in$TipoRegistro,
 99 |                          id.stockholder = l.in$NumeroIdentificacaoAcionista,
100 |                          id.person = fix.fct(l.in$Pessoa$IdentificacaoPessoa),
101 |                          id.nationality = fix.fct(l.in$Nacionalidade),
102 |                          id.state = fix.fct(l.in$Estado$NomeEstado),
103 |                          id.country = fix.fct(l.in$Estado$Pais$NomePais),
104 |                          name.stockholder = fix.fct(l.in$Pessoa$NomePessoa),
105 |                          type.stockholder = fix.fct(l.in$Pessoa$TipoPessoa),
106 |                          qtd.ord.shares = l.in$QuantidadeAcoesOrdinarias,
107 |                          perc.ord.shares = l.in$PercentualAcoesOrdinarias,
108 |                          qtd.pref.shares = l.in$QuantidadeAcoesPreferenciais,
109 |                          perc.pref.shares = l.in$PercentualAcoesPreferenciais,
110 |                          controlling.stockholder = switch(l.in$AcionistaControlador,
111 |                                                        '1' = TRUE,
112 |                                                        '2' = FALSE,
113 |                                                        '0' = FALSE) )
114 | 
115 | 
116 |     return(df.out)
117 |   }
118 | 
119 |   df.stockholders <- do.call(what = rbind, lapply(xml_data, my.fct))
120 |   rownames(df.stockholders) <- NULL
121 | 
122 |   # stock composition and value of company
123 |   company.reg.file <- file.path(rnd.folder.name,'CapitalSocial.xml')
124 | 
125 |   xml_data <- XML::xmlToList(XML::xmlParse(company.reg.file))
126 | 
127 |   if (length(xml_data) < 3 ) {
128 |     df.capital <- data.frame(qtd.ord.shares = NA,
129 |                              qtd.pref.shares = NA,
130 |                              total.value = NA)
131 |   } else {
132 |     effective.capital <- xml_data[[3]]
133 | 
134 |     df.capital <- data.frame(qtd.ord.shares = effective.capital$QuantidadeAcoesOrdinarias,
135 |                              qtd.pref.shares = effective.capital$QuantidadeAcoesPreferenciais,
136 |                              total.value = effective.capital$ValorCapitalSocial)
137 |   }
138 | 
139 | 
140 |   my.l <- list(df.stockholders = df.stockholders,
141 |                df.capital = df.capital)
142 | 
143 |   return(my.l)
144 | }
145 | 
146 | 


--------------------------------------------------------------------------------
/R/gitrd_read_zip_file.R:
--------------------------------------------------------------------------------
  1 | #' Reads a single zip file downloaded from Bovespa
  2 | #'
  3 | #' @param my.zip.file Full path to zip file
  4 | #' @param folder.to.unzip Folder to unzip files (default = tempdir())
  5 | #' @param id.type The type of file structure ('after 2011' or 'before 2011')
  6 | #' @param type.fin.report Peridodicy of fin data ('itr' - quarterly or 'dfp' - annual)
  7 | #'
  8 | #' @return A list with several dataframes containing financial statements
  9 | #' @export
 10 | #'
 11 | #' @examples
 12 | #'
 13 | #' my.f <- system.file('extdata/434_ARAC_2008-09-30.zip', package = 'GetITRData')
 14 | #'
 15 | #' my.l <- gitrd.read.zip.file(my.f, id.type = 'before 2011', type.fin.report = 'itr')
 16 | #' print(my.l)
 17 | gitrd.read.zip.file <- function(my.zip.file,
 18 |                                 folder.to.unzip = tempdir(),
 19 |                                 id.type,
 20 |                                 type.fin.report) {
 21 | 
 22 |   # sanity check
 23 |   if (tools::file_ext(my.zip.file) != 'zip') {
 24 |     stop(paste('File', my.zip.file, ' is not a zip file.') )
 25 |   }
 26 | 
 27 |   if (!file.exists(my.zip.file)) {
 28 |     stop(paste('File', my.zip.file, ' does not exists.') )
 29 |   }
 30 | 
 31 |   if (file.size(my.zip.file) == 0){
 32 |     stop(paste('File', my.zip.file, ' has size 0!') )
 33 |   }
 34 | 
 35 |   if (length(my.zip.file) != 1){
 36 |     stop('This function only works for a single zip file... check your inputs')
 37 |   }
 38 | 
 39 |   if (!dir.exists(folder.to.unzip)) {
 40 |     cat(paste('Folder', folder.to.unzip, 'does not exist. Creating it.'))
 41 |     dir.create(folder.to.unzip)
 42 |   }
 43 | 
 44 |   my.basename <- tools::file_path_sans_ext(basename(my.zip.file))
 45 |   rnd.folder.name <- file.path(folder.to.unzip, paste0('DIR-',my.basename))
 46 | 
 47 |   if (!dir.exists(rnd.folder.name)) dir.create(rnd.folder.name)
 48 | 
 49 |   utils::unzip(my.zip.file, exdir = rnd.folder.name, junkpaths = TRUE)
 50 | 
 51 |   # list files and check it
 52 |   my.files <- list.files(rnd.folder.name)
 53 | 
 54 |   if (length(my.files) == 0) {
 55 |     #browser()
 56 | 
 57 |     file.remove(my.zip.file)
 58 |     stop(paste0('Zipped file contains 0 files. ',
 59 |                 'This is likelly a problem with the downloaded file. ',
 60 |                 'Try running the code again as the corrupted zip file was deleted and will be downloaded again.',
 61 |                 '\n\nIf the problem persists, my suggestions is to remove the time period with problem.') )
 62 |   }
 63 | 
 64 |   if (id.type == 'after 2011') {
 65 |     my.l <- gitrd.read.zip.file.type.1(rnd.folder.name, folder.to.unzip, type.fin.report)
 66 |   }
 67 | 
 68 |   if (id.type == 'before 2011') {
 69 |     my.l <- gitrd.read.zip.file.type.2(rnd.folder.name, folder.to.unzip, type.fin.report)
 70 |   }
 71 | 
 72 |   # check for empty dfs
 73 |   my.fct <- function(df.in) {
 74 |     if (nrow(df.in)==0) {
 75 |       df.out <- data.frame(acc.number = NA, acc.desc = NA, acc.value = NA)
 76 |     } else {
 77 |       df.out <- df.in
 78 |     }
 79 |     return(df.out)
 80 |   }
 81 | 
 82 |   my.l$cons.dfs <- lapply(my.l$cons.dfs, my.fct)
 83 |   my.l$ind.dfs  <- lapply(my.l$ind.dfs , my.fct)
 84 | 
 85 |   return(my.l)
 86 | }
 87 | 
 88 | #' Reads folder for zip file post 2011 (internal)
 89 | #'
 90 | #' @inheritParams gitrd.read.zip.file
 91 | #' @param rnd.folder.name Folder where unzipped files are available
 92 | #'
 93 | #' @return A list with financial statements
 94 | #'
 95 | #' @examples
 96 | #' # no example (this functions not used directly)
 97 | gitrd.read.zip.file.type.1 <- function(rnd.folder.name, folder.to.unzip = tempdir(), type.fin.report) {
 98 | 
 99 |   company.reg.file <- switch(type.fin.report,
100 |                              'itr' = file.path(rnd.folder.name,'FormularioDemonstracaoFinanceiraITR.xml'),
101 |                              'dfp' = file.path(rnd.folder.name,'FormularioDemonstracaoFinanceiraDFP.xml') )
102 | 
103 |   xml_data <- XML::xmlToList(XML::xmlParse(company.reg.file))
104 | 
105 |   # get basic info
106 | 
107 |   company.name = xml_data$CompanhiaAberta$NomeRazaoSocialCompanhiaAberta
108 |   company.cvm_code <- xml_data$CompanhiaAberta$CodigoCvm
109 |   company.SeqNumber <- xml_data$CompanhiaAberta$NumeroSequencialRegistroCvm
110 |   company.date.delivery <- xml_data$DataEntrega
111 |   date.docs <- as.Date(xml_data$DataReferenciaDocumento, format = '%Y-%m-%d')
112 | 
113 |   #cat('\nReading', my.zip.file, '-', company.name, '|', as.character(date.docs))
114 | 
115 |   zipped.file <- switch(type.fin.report,
116 |                              'itr' = file.path(rnd.folder.name, list.files(rnd.folder.name, pattern = '*.itr')[1]),
117 |                              'dfp' = file.path(rnd.folder.name, list.files(rnd.folder.name, pattern = '*.dfp')[1]) )
118 | 
119 |   utils::unzip(zipped.file, exdir = rnd.folder.name)
120 | 
121 |   fin.report.file <- file.path(rnd.folder.name, 'InfoFinaDFin.xml')
122 | 
123 |   if (!file.exists(fin.report.file)) {
124 |     stop('Cant find file', fin.report.file)
125 |   }
126 | 
127 |   xml_data <- XML::xmlToList(XML::xmlParse(fin.report.file))
128 | 
129 |   file.remove(fin.report.file)
130 | 
131 |   # function to get individual DF
132 |   my.fct <- function(x, type.df, info){
133 | 
134 |     if (type.df == 'individual') my.char = '1'
135 |     if (type.df == 'consolidated') my.char = '2'
136 | 
137 |     if (x$PlanoConta$VersaoPlanoConta$CodigoTipoInformacaoFinanceira == my.char){
138 | 
139 |       if (info == 'Descricao') return(x$DescricaoConta1)
140 |       if (info == 'Valor') {
141 | 
142 | 
143 |         my.value <- switch(type.fin.report,
144 |                            'itr' = as.numeric(c(x$ValorConta2, x$ValorConta3, x$ValorConta4)),
145 |                            'dfp' = as.numeric(c(x$ValorConta1, x$ValorConta2, x$ValorConta3,x$ValorConta4))
146 |                            )
147 | 
148 |         my.value <- my.value[my.value != 0]
149 |         if (length(my.value)==0) {
150 |           my.value <- 0
151 |         } else {
152 |           my.value <- my.value[1]
153 |         }
154 | 
155 |         return(my.value)
156 |       }
157 |       if (info == 'id') return(x$PlanoConta$NumeroConta)
158 | 
159 |     } else {
160 |       return(NA)
161 |     }
162 |   }
163 | 
164 |   # get individual dfs
165 |   type.df <- 'individual'
166 |   acc.desc  <- as.character(sapply(xml_data, my.fct, type.df = type.df, info = 'Descricao'))
167 |   acc.value <-   as.numeric(sapply(xml_data, my.fct, type.df = type.df, info = 'Valor'))
168 |   acc.number <- as.character(sapply(xml_data, my.fct, type.df = type.df, info = 'id'))
169 | 
170 |   ind.df <- data.frame(acc.number,acc.desc,acc.value)
171 | 
172 |   # save info
173 |   df.assets <- stats::na.omit(ind.df[stringr::str_sub(ind.df$acc.number,1,1) == '1', ])
174 |   df.liabilities <- stats::na.omit(ind.df[stringr::str_sub(ind.df$acc.number,1,1) == '2', ])
175 |   df.income    <- stats::na.omit(ind.df[stringr::str_sub(ind.df$acc.number,1,1) == '3', ])
176 |   df.cashflow    <- stats::na.omit(ind.df[stringr::str_sub(ind.df$acc.number,1,1) == '6', ])
177 | 
178 |   l.individual.dfs <- list(df.assets = df.assets,
179 |                            df.liabilities = df.liabilities,
180 |                            df.income = df.income,
181 |                            df.cashflow = df.cashflow)
182 | 
183 |   # get consolidated dfs
184 |   type.df <- 'consolidated'
185 |   acc.desc  <- as.character(sapply(xml_data, my.fct, type.df = type.df, info = 'Descricao'))
186 |   acc.value <-   as.numeric(sapply(xml_data, my.fct, type.df = type.df, info = 'Valor'))
187 |   acc.number <- as.character(sapply(xml_data, my.fct, type.df = type.df, info = 'id'))
188 | 
189 |   consolidated.df <- data.frame(acc.number,acc.desc,acc.value)
190 | 
191 |   # save info
192 |   df.assets <- stats::na.omit(consolidated.df[stringr::str_sub(consolidated.df$acc.number,1,1) == '1', ])
193 |   df.liabilities <- stats::na.omit(consolidated.df[stringr::str_sub(consolidated.df$acc.number,1,1) == '2', ])
194 |   df.income    <- stats::na.omit(consolidated.df[stringr::str_sub(consolidated.df$acc.number,1,1) == '3', ])
195 |   df.cashflow    <- stats::na.omit(consolidated.df[stringr::str_sub(consolidated.df$acc.number,1,1) == '6', ])
196 | 
197 |   l.consolidated.dfs <- list(df.assets = df.assets,
198 |                              df.liabilities = df.liabilities,
199 |                              df.income = df.income,
200 |                              df.cashflow = df.cashflow)
201 | 
202 |   my.l <- list(ind.dfs = l.individual.dfs,
203 |                cons.dfs = l.consolidated.dfs)
204 | 
205 |   return(my.l)
206 | }
207 | 
208 | #' Reads folder for zip file pre 2011 (internal)
209 | #'
210 | #' @inheritParams gitrd.read.zip.file
211 | #' @param rnd.folder.name Folder where unzipped files are available
212 | #'
213 | #' @return A list with financial statements
214 | #'
215 | #' @examples
216 | #' # no example (this functions not used directly)
217 | gitrd.read.zip.file.type.2 <- function(rnd.folder.name, folder.to.unzip = tempdir(), type.fin.report) {
218 | 
219 | 
220 |   # get individual fin statements
221 |   #my.f <- file.path(rnd.folder.name, '/ITRBPAE.001')
222 | 
223 |   my.f <- switch(type.fin.report,
224 |                  'itr' = list.files(rnd.folder.name,'ITRBPAE', full.names = T),
225 |                  'dfp' = list.files(rnd.folder.name,'DFPBPAE', full.names = T))
226 | 
227 |   df.assets <- gitrd.read.fwf.file(my.f, type.fin.report)
228 | 
229 |   #my.f <- paste0(rnd.folder.name,'/ITRBPPE.001')
230 |   my.f <- switch(type.fin.report,
231 |                  'itr' = list.files(rnd.folder.name, pattern = 'ITRBPP', full.names = T),
232 |                  'dfp' = list.files(rnd.folder.name, 'DFPBPPE', full.names = T) )
233 | 
234 |   df.liabilities <- gitrd.read.fwf.file(my.f, type.fin.report)
235 | 
236 | 
237 |   my.f <- switch(type.fin.report,
238 |                  'itr' = list.files(rnd.folder.name, pattern = 'ITRDERE', full.names = T),
239 |                  'dfp' = list.files(rnd.folder.name, 'DFPDEREE', full.names = T) )
240 |   df.income <- gitrd.read.fwf.file(my.f, type.fin.report)
241 | 
242 | 
243 |   my.f <- switch(type.fin.report,
244 |                  'itr' = list.files(rnd.folder.name, pattern = 'ITRDFCE', full.names = T),
245 |                  'dfp' = list.files(rnd.folder.name, 'DFPDFCEE', full.names = T) )
246 |   if (length(my.f) == 0) {
247 |     df.cashflow <- data.frame(acc.desc  = NA,
248 |                               acc.value = NA,
249 |                               acc.number = NA)
250 |   }else {
251 |     df.cashflow <- gitrd.read.fwf.file(my.f, type.fin.report)
252 |   }
253 | 
254 |   l.individual.dfs <- list(df.assets = df.assets,
255 |                            df.liabilities = df.liabilities,
256 |                            df.income = df.income,
257 |                            df.cashflow = df.cashflow)
258 | 
259 | 
260 |   # get consolidated fin statements
261 | 
262 |   #my.f <- paste0(rnd.folder.name,'/ITRCBPAE.001')
263 |   my.f <- switch(type.fin.report,
264 |                  'itr' = list.files(rnd.folder.name,'ITRCBPA', full.names = T),
265 |                  'dfp' = list.files(rnd.folder.name,'DFPCBPAE', full.names = T))
266 |   df.assets <- gitrd.read.fwf.file(my.f, type.fin.report)
267 | 
268 | 
269 |   my.f <- switch(type.fin.report,
270 |                  'itr' = list.files(rnd.folder.name,'ITRCBPP', full.names = T),
271 |                  'dfp' = list.files(rnd.folder.name,'DFPCBPPE', full.names = T))
272 |   df.liabilities <- gitrd.read.fwf.file(my.f, type.fin.report)
273 | 
274 | 
275 |   my.f <- switch(type.fin.report,
276 |                  'itr' = list.files(rnd.folder.name,'ITRCDER', full.names = T),
277 |                  'dfp' = list.files(rnd.folder.name,'DFPCDERE', full.names = T))
278 |   df.income <- gitrd.read.fwf.file(my.f, type.fin.report)
279 | 
280 | 
281 |   my.f <- switch(type.fin.report,
282 |                  'itr' = list.files(rnd.folder.name,'ITRCDFCE', full.names = T),
283 |                  'dfp' = list.files(rnd.folder.name,'DFPCDFCEE', full.names = T))
284 | 
285 |   if (length(my.f) == 0) {
286 |     df.cashflow <- data.frame(acc.desc  = NA,
287 |                               acc.value = NA,
288 |                               acc.number = NA)
289 |   } else {
290 |     df.cashflow <- gitrd.read.fwf.file(my.f, type.fin.report)
291 |   }
292 | 
293 |   l.consolidated.dfs<- list(df.assets = df.assets,
294 |                            df.liabilities = df.liabilities,
295 |                            df.income = df.income,
296 |                            df.cashflow = df.cashflow)
297 |   # get basic info
298 | 
299 |   my.l <- list(ind.dfs = l.individual.dfs,
300 |                cons.dfs = l.consolidated.dfs)
301 | 
302 |   return(my.l)
303 | }
304 | 


--------------------------------------------------------------------------------
/R/gitrd_utils.R:
--------------------------------------------------------------------------------
  1 | #' Converts a dataframe from gitr_GetITRData to the wide format
  2 | #'
  3 | #' @param data.in Data frame with financial information
  4 | #' @param data.in.cols Which data to go in rows values ('original' or 'inflation adjusted')
  5 | #'
  6 | #' @return A dataframe in the wide format
  7 | #' @export
  8 | #'
  9 | #' @examples
 10 | #'
 11 | #' # get example data from RData file
 12 | #' my.f <- system.file('extdata/ExampleReport_Petrobras.RData', package = 'GetITRData')
 13 | #' load(my.f)
 14 | #'
 15 | #' df.assets <- df.reports$fr.assets[[1]]
 16 | #' df.assets.wide <- gitrd.convert.to.wide(df.assets)
 17 | gitrd.convert.to.wide <- function(data.in, data.in.cols = 'original') {
 18 | 
 19 |   possible.types <- c('original','inflation adjusted')
 20 |   if ( !any(data.in.cols %in% possible.types) ) {
 21 |     stop('ERROR: input data.in.cols must be either "original" or "inflation adjusted"')
 22 |   }
 23 | 
 24 |   if (!any('data.frame' %in% class(data.in))) {
 25 |     stop('input data.in does not seems to be a dataframe..')
 26 |   }
 27 | 
 28 |   value.var <- switch(data.in.cols,
 29 |                       'original' = 'acc.value',
 30 |                       'inflation adjusted' =  'acc.value.infl.adj')
 31 | 
 32 |   df.wide <- reshape2::dcast(data = data.in,
 33 |                    formula = acc.number + acc.desc + company.name  ~ ref.date,
 34 |                    value.var = value.var, fill = 0)
 35 | 
 36 |   return(df.wide)
 37 | 
 38 | }
 39 | 
 40 | #' Helps users search for a company name
 41 | #'
 42 | #' @param char.to.search Character for partial matching
 43 | #'
 44 | #' @return Names of found companies
 45 | #' @export
 46 | #'
 47 | #' @examples
 48 | #'
 49 | #' \dontrun{ # dontrun: keep cran check fast
 50 | #' gitrd.search.company('GERDAU')
 51 | #' }
 52 | gitrd.search.company <- function(char.to.search) {
 53 | 
 54 |   df.info <- gitrd.get.info.companies()
 55 | 
 56 |   unique.names <- unique(df.info$name.company)
 57 |   char.target <- iconv(stringr::str_to_lower(unique.names),to='ASCII//TRANSLIT')
 58 |   char.to.search <- iconv(stringr::str_to_lower(char.to.search),to='ASCII//TRANSLIT')
 59 | 
 60 |   idx <- stringr::str_detect(char.target, pattern = stringr::fixed(char.to.search))
 61 | 
 62 |   char.out <- stats::na.omit(unique.names[idx])
 63 | 
 64 |   temp.df <- unique(df.info[df.info$name.company %in% char.out, c('name.company', 'id.date', 'situation')])
 65 | 
 66 |   cat('\n\nFound', length(char.out), 'companies:')
 67 | 
 68 |   for (i.company in char.out) {
 69 | 
 70 |     temp.df <- df.info[which(df.info$name.company == i.company), ]
 71 | 
 72 |     first.date <- min(stats::na.omit(temp.df$id.date))
 73 |     last.date  <- max(stats::na.omit(temp.df$id.date))
 74 | 
 75 |     cat(paste0('\n', paste0(i.company, paste0(rep(' ', max(nchar(char.out)) - nchar(i.company)),
 76 |                                               collapse = '' ),
 77 |                             ' | situation = ', temp.df$situation[1],
 78 |                             ' | first date = ', first.date,
 79 |                             ' | last date - ',  last.date) ) )
 80 |   }
 81 | 
 82 |   cat('\n\n')
 83 | 
 84 | }
 85 | 
 86 | #' Reads FWF file from bovespa (internal)
 87 | #'
 88 | #' @param my.f File to be read
 89 | #' @inheritParams gitrd.GetITRData
 90 | #' @inheritParams gitrd.read.zip.file
 91 | #' @return A dataframe with data
 92 | #' @export
 93 | #' @examples
 94 | #'
 95 | #' my.f <- system.file('extdata/ITRBPAE.001', package = 'GetITRData')
 96 | #'
 97 | #' df.assets <- gitrd.read.fwf.file(my.f, type.fin.report = 'itr')
 98 | gitrd.read.fwf.file <- function(my.f, type.fin.report) {
 99 | 
100 |   if (file.size(my.f) ==0 ) {
101 |     df.out <- data.frame(acc.number= NA,
102 |                          acc.desc = NA,
103 |                          acc.value = NA)
104 |     return(df.out)
105 |   }
106 | 
107 |   # set cols for fwf
108 |   if (type.fin.report == 'itr') {
109 | 
110 |   my.col.types <- readr::cols(
111 |     acc.number = readr::col_character(),
112 |     acc.desc = readr::col_character(),
113 |     acc.value = readr::col_integer()
114 |   )
115 | 
116 |   my.col.names<-  c('acc.number', 'acc.desc', 'acc.value')
117 |   my.pos <- readr::fwf_positions(start = c(15, 28, 74), end = c(27, 67, 82),
118 |                                  col_names = my.col.names)
119 | 
120 |   }
121 | 
122 |   if (type.fin.report == 'dfp') {
123 | 
124 |     my.col.types <- readr::cols(
125 |       acc.number = readr::col_character(),
126 |       acc.desc = readr::col_character(),
127 |       acc.value1 = readr::col_integer(),
128 |       acc.value2 = readr::col_integer(),
129 |       acc.value = readr::col_integer()
130 |     )
131 | 
132 |     my.col.names<-  c('acc.number', 'acc.desc', 'acc.value1','acc.value2','acc.value')
133 |     my.pos <- readr::fwf_positions(start = c(15, 28, 74,89,89+14+1), end = c(27, 67, 82,97,112),
134 |                                    col_names = my.col.names)
135 | 
136 |   }
137 | 
138 |   df.out <- readr::read_fwf(my.f, my.pos,
139 |                                locale = readr::locale(encoding = 'Latin1'), col_types =  my.col.types)
140 | 
141 | 
142 |   if (type.fin.report == 'dfp') {
143 |     df.out <- df.out[, c('acc.number', 'acc.desc', 'acc.value')]
144 |   }
145 |   # fix for empty data
146 |   if (nrow(df.out) == 0) {
147 |     df.out <- tibble::tibble(acc.number = NA,
148 |                              acc.desc = NA,
149 |                              acc.value = NA)
150 |   }
151 | 
152 |   return(df.out)
153 | 
154 | }
155 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CRAN Package GetITRData - Reading Quarterly Financial Reports from Bovespa
 2 | 
 3 | **BE AWARE that this package is no longer in development. The code is kept here as a reference.**
 4 | 
 5 | Financial statements of companies traded at B3 (formerly Bovespa), the Brazilian stock exchange, are available in its [website](http://www.bmfbovespa.com.br/). Accessing the data for a single company is straightforwardd. In the website one can find a simple interface for accessing this dataset. An example is given [here](https://www.rad.cvm.gov.br/ENETCONSULTA/frmGerenciaPaginaFRE.aspx?NumeroSequencialDocumento=67775&CodigoTipoInstituicao=2). However, gathering and organizing the data for a large scale research, with many companies and many quarters, is painful. Quarterly reports must be downloaded or copied individually and later aggregated. Changes in the accounting format thoughout time can make this process slow, unreliable and irreproducible.
 6 | 
 7 | Package `GetITRData` provides a R interface to all financial statements available in the website. It not only downloads the data but also organizes it in a tabular format. Users can simply select companies and a time period to download all available data. Several information about current companies, such as sector and available quarters are also at reach. The main purpose of the package is to make it easy to access quarterly financial statements in large scale research, facilitating the reproducibility of such studies.
 8 | 
 9 | 
10 | # Installation
11 | 
12 | The package is available in CRAN (release version) and in Github (development version). You can install any of those with the following code:
13 | 
14 | ```
15 | # Release version in CRAN
16 | install.packages('GetDFPData') 
17 | 
18 | # Development version in Github
19 | devtools::install_github('msperlin/GetDFPData')
20 | ```
21 | 
22 | # How to use GetDFPData
23 | 
24 | See manual and vignette in CRAN.
25 | 


--------------------------------------------------------------------------------
/inst/extdata/434_ARAC_2008-09-30.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msperlin/GetITRData/4714f9f6bfe4f12ac1858d67ef557abd86194af3/inst/extdata/434_ARAC_2008-09-30.zip


--------------------------------------------------------------------------------
/inst/extdata/ExampleReport_Petrobras.RData:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msperlin/GetITRData/4714f9f6bfe4f12ac1858d67ef557abd86194af3/inst/extdata/ExampleReport_Petrobras.RData


--------------------------------------------------------------------------------
/inst/extdata/ITRBPAE.001:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msperlin/GetITRData/4714f9f6bfe4f12ac1858d67ef557abd86194af3/inst/extdata/ITRBPAE.001


--------------------------------------------------------------------------------
/man/gitrd.GetITRData.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_GetITRData.R
 3 | \name{gitrd.GetITRData}
 4 | \alias{gitrd.GetITRData}
 5 | \title{Downloads and reads financial reports from Bovespa}
 6 | \usage{
 7 | gitrd.GetITRData(name.companies, first.date = Sys.Date() - 12 * 30,
 8 |   last.date = Sys.Date(), type.info = "individual",
 9 |   periodicy.fin.report = "annual", inflation.index = "dollar",
10 |   max.levels = 3, folder.out = tempdir(), be.quiet = FALSE)
11 | }
12 | \arguments{
13 | \item{name.companies}{Official names of companies to get financial reports (e.g. 'ELETROPAULO METROPOLITANA EL.S.PAULO S.A').
14 | Names of companies can be found using function gitrd.search.company('nametolookfor') or gitrd.get.info.companies('companies')}
15 | 
16 | \item{first.date}{First date (YYYY-MM-DD) to get data. Character or Date. E.g. first.date = '2010-01-01'.}
17 | 
18 | \item{last.date}{Last date (YYYY-MM-DD) to get data. Character or Date. E.g. last.date = '2017-01-01'.}
19 | 
20 | \item{type.info}{Type of financial statements, 'individual' (default) or 'consolidated'. Argument can be a single value or a vector with the same
21 | length as name.companies. The individual type only includes financial statements from the company itself, while consolidated statements adds information
22 | about controlled companies}
23 | 
24 | \item{periodicy.fin.report}{The frequency of financial reports: 'annual' (default) or 'quarterly'}
25 | 
26 | \item{inflation.index}{Sets the inflation index to use for finding inflation adjusted values of all reports. Possible values: 'dollar' (default) or 'IPCA', the brazilian main inflation index.
27 | When using 'IPCA', the base date is set as the last date found in itr/dfp dataset.}
28 | 
29 | \item{max.levels}{Sets the maximum number of levels of accounting items in financial reports}
30 | 
31 | \item{folder.out}{Folder where to download and manipulate the zip files. Default = tempdir()}
32 | 
33 | \item{be.quiet}{Should the function output information about progress? TRUE (default) or FALSE}
34 | }
35 | \value{
36 | A tibble (dataframe with lists) object with all gathered financial statements, with each company as a row in the tibble.
37 | }
38 | \description{
39 | Quarterly and annual financial reports are downloaded from B3 for a combination of companies and time period.
40 | The easist way to get started with gitrd.GetITRData is looking for the official name of traded companies using function gitrd.search.company('nametolookfor').
41 | Alternatively, you can use function gitrd.get.info.companies('companies') to import a dataframe with information for all available companies and time periods.
42 | }
43 | \examples{
44 | 
45 | \dontrun{ #dontrun: keep cran check time short
46 | name.companies <- 'ELETROPAULO METROPOLITANA EL.S.PAULO S.A'
47 | first.date <- '2005-01-01'
48 | last.date <-  '2006-01-01'
49 | 
50 | df.statements <- gitrd.GetITRData(name.companies = name.companies,
51 |                                   first.date = first.date,
52 |                                   last.date = last.date)
53 |  }
54 | }
55 | 


--------------------------------------------------------------------------------
/man/gitrd.convert.to.wide.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_utils.R
 3 | \name{gitrd.convert.to.wide}
 4 | \alias{gitrd.convert.to.wide}
 5 | \title{Converts a dataframe from gitr_GetITRData to the wide format}
 6 | \usage{
 7 | gitrd.convert.to.wide(data.in, data.in.cols = "original")
 8 | }
 9 | \arguments{
10 | \item{data.in}{Data frame with financial information}
11 | 
12 | \item{data.in.cols}{Which data to go in rows values ('original' or 'inflation adjusted')}
13 | }
14 | \value{
15 | A dataframe in the wide format
16 | }
17 | \description{
18 | Converts a dataframe from gitr_GetITRData to the wide format
19 | }
20 | \examples{
21 | 
22 | # get example data from RData file
23 | my.f <- system.file('extdata/ExampleReport_Petrobras.RData', package = 'GetITRData')
24 | load(my.f)
25 | 
26 | df.assets <- df.reports$fr.assets[[1]]
27 | df.assets.wide <- gitrd.convert.to.wide(df.assets)
28 | }
29 | 


--------------------------------------------------------------------------------
/man/gitrd.export.ITR.data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_export_ITR_Data.R
 3 | \name{gitrd.export.ITR.data}
 4 | \alias{gitrd.export.ITR.data}
 5 | \title{Export tibble to an excel file}
 6 | \usage{
 7 | gitrd.export.ITR.data(data.in, base.file.name = paste0("GetITRData_Export_",
 8 |   Sys.Date()), type.export = "xlsx", format.data = "wide")
 9 | }
10 | \arguments{
11 | \item{data.in}{Tibble with financial information (output of gitrd.GetITRData)}
12 | 
13 | \item{base.file.name}{The basename of excel file (don't include extension)}
14 | 
15 | \item{type.export}{The extension of the desired format (only 'xlsx' implemented so far)}
16 | 
17 | \item{format.data}{The format of output. 'wide' (default) for a wide table and 'long' for a long table}
18 | }
19 | \value{
20 | nothing
21 | }
22 | \description{
23 | Export tibble to an excel file
24 | }
25 | \examples{
26 | 
27 | # get example data from RData file
28 | my.f <- system.file('extdata/ExampleReport_Petrobras.RData', package = 'GetITRData')
29 | load(my.f)
30 | 
31 | \dontrun{ # dontrun: keep cran check time short
32 | gitrd.export.ITR.data(df.reports, base.file.name = 'MyExcelFile', format.data = 'wide')
33 | }
34 | }
35 | 


--------------------------------------------------------------------------------
/man/gitrd.fix.dataframes.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_fix_dataframes.R
 3 | \name{gitrd.fix.dataframes}
 4 | \alias{gitrd.fix.dataframes}
 5 | \title{Fix dataframe for version issues and inflation measures (internal)}
 6 | \usage{
 7 | gitrd.fix.dataframes(df.in, inflation.index, df.inflation, max.levels = 3)
 8 | }
 9 | \arguments{
10 | \item{df.in}{A dataframe with financial statements}
11 | 
12 | \item{inflation.index}{Sets the inflation index to use for finding inflation adjusted values of all reports. Possible values: 'dollar' (default) or 'IPCA', the brazilian main inflation index.
13 | When using 'IPCA', the base date is set as the last date found in itr/dfp dataset.}
14 | 
15 | \item{df.inflation}{Dataframe with inflation data}
16 | 
17 | \item{max.levels}{Sets the maximum number of levels of accounting items in financial reports}
18 | }
19 | \value{
20 | The fixed data.frame
21 | }
22 | \description{
23 | Fix dataframe for version issues and inflation measures (internal)
24 | }
25 | \examples{
26 | #'
27 | # get example data from RData file
28 | my.f <- system.file('extdata/ExampleReport_Petrobras.RData', package = 'GetITRData')
29 | load(my.f)
30 | 
31 | df.assets <- df.reports$fr.assets[[1]]
32 | 
33 | df.assets.fixed <- gitrd.fix.dataframes(df.assets,
34 |                                         inflation.index = 'none',
35 |                                         df.inflation = data.frame())
36 | }
37 | 


--------------------------------------------------------------------------------
/man/gitrd.get.bovespa.data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_Bovespa_Data.R
 3 | \name{gitrd.get.bovespa.data}
 4 | \alias{gitrd.get.bovespa.data}
 5 | \title{Reads information for a company from B3 site}
 6 | \usage{
 7 | gitrd.get.bovespa.data(my.id)
 8 | }
 9 | \arguments{
10 | \item{my.id}{A CVM id}
11 | }
12 | \value{
13 | A list with several dataframes
14 | }
15 | \description{
16 | Given a CVM code, this function scrapes information from the company page.
17 | }
18 | \examples{
19 | 
20 | \dontrun{ # keep cran check fast
21 | l.info.PETR <- gitrd.get.dovespa.data(my.id = 9512)
22 | str(l.info.PETR)
23 | }
24 | }
25 | 


--------------------------------------------------------------------------------
/man/gitrd.get.inflation.data.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_get_inflation_data.R
 3 | \name{gitrd.get.inflation.data}
 4 | \alias{gitrd.get.inflation.data}
 5 | \title{Downloads and read inflation data from github}
 6 | \usage{
 7 | gitrd.get.inflation.data(inflation.index)
 8 | }
 9 | \arguments{
10 | \item{inflation.index}{Sets the inflation index to use for finding inflation adjusted values of all reports. Possible values: 'dollar' (default) or 'IPCA', the brazilian main inflation index.
11 | When using 'IPCA', the base date is set as the last date found in itr/dfp dataset.}
12 | }
13 | \value{
14 | A dataframe with inflation data
15 | }
16 | \description{
17 | Inflation data is available at git repo 'msperlin/GetITRData_auxiliary'
18 | }
19 | \examples{
20 | 
21 | \dontrun{ # keep cran check fast
22 | df.inflation <- gitrd.get.inflation.data('IPCA')
23 | str(df.inflation)
24 | }
25 | }
26 | 


--------------------------------------------------------------------------------
/man/gitrd.get.info.companies.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_get_info_companies.R
 3 | \name{gitrd.get.info.companies}
 4 | \alias{gitrd.get.info.companies}
 5 | \title{Reads up to date information about Bovespa companies from a github file}
 6 | \usage{
 7 | gitrd.get.info.companies(type.data = "companies_files")
 8 | }
 9 | \arguments{
10 | \item{type.data}{A string that sets the type of information to be returned ('companies' or 'companies_files').
11 | If 'companies', it will return a dataframe with several information about companies, but without download links.}
12 | }
13 | \value{
14 | A dataframe with several information about Bovespa companies
15 | }
16 | \description{
17 | A csv file with information about available companies and time periods is downloaded from github and read.
18 | This file is updated periodically and manually by the author. When run for the first time in a R session, a .RDATA file
19 | containing the output of the function is saved in tempdir() for caching.
20 | }
21 | \examples{
22 | 
23 | \dontrun{ # keep cran check fast
24 | df.info <- gitrd.get.info.companies()
25 | str(df.info)
26 | }
27 | }
28 | 


--------------------------------------------------------------------------------
/man/gitrd.read.fre.zip.file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_read_fre_zip_file.R
 3 | \name{gitrd.read.fre.zip.file}
 4 | \alias{gitrd.read.fre.zip.file}
 5 | \title{Reads a single zip file downloaded from Bovespa}
 6 | \usage{
 7 | gitrd.read.fre.zip.file(my.zip.file, folder.to.unzip = tempdir(),
 8 |   l.other.info)
 9 | }
10 | \arguments{
11 | \item{my.zip.file}{Full path to zip file}
12 | 
13 | \item{folder.to.unzip}{Folder to unzip files (default = tempdir())}
14 | 
15 | \item{l.other.info}{Other information as a list}
16 | }
17 | \value{
18 | A list with several dataframes containing financial statements
19 | }
20 | \description{
21 | Reads a single zip file downloaded from Bovespa
22 | }
23 | \examples{
24 | 
25 | my.f <- system.file('extdata/434_ARAC_2008-09-30.zip', package = 'GetITRData')
26 | 
27 | my.l <- gitrd.read.zip.file(my.f, id.type = 'before 2011', type.fin.report = 'itr')
28 | print(my.l)
29 | 
30 | }
31 | 


--------------------------------------------------------------------------------
/man/gitrd.read.fwf.file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_utils.R
 3 | \name{gitrd.read.fwf.file}
 4 | \alias{gitrd.read.fwf.file}
 5 | \title{Reads FWF file from bovespa (internal)}
 6 | \usage{
 7 | gitrd.read.fwf.file(my.f, type.fin.report)
 8 | }
 9 | \arguments{
10 | \item{my.f}{File to be read}
11 | 
12 | \item{type.fin.report}{Peridodicy of fin data ('itr' - quarterly or 'dfp' - annual)}
13 | }
14 | \value{
15 | A dataframe with data
16 | }
17 | \description{
18 | Reads FWF file from bovespa (internal)
19 | }
20 | \examples{
21 | 
22 | my.f <- system.file('extdata/ITRBPAE.001', package = 'GetITRData')
23 | 
24 | df.assets <- gitrd.read.fwf.file(my.f, type.fin.report = 'itr')
25 | }
26 | 


--------------------------------------------------------------------------------
/man/gitrd.read.zip.file.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_read_zip_file.R
 3 | \name{gitrd.read.zip.file}
 4 | \alias{gitrd.read.zip.file}
 5 | \title{Reads a single zip file downloaded from Bovespa}
 6 | \usage{
 7 | gitrd.read.zip.file(my.zip.file, folder.to.unzip = tempdir(), id.type,
 8 |   type.fin.report)
 9 | }
10 | \arguments{
11 | \item{my.zip.file}{Full path to zip file}
12 | 
13 | \item{folder.to.unzip}{Folder to unzip files (default = tempdir())}
14 | 
15 | \item{id.type}{The type of file structure ('after 2011' or 'before 2011')}
16 | 
17 | \item{type.fin.report}{Peridodicy of fin data ('itr' - quarterly or 'dfp' - annual)}
18 | }
19 | \value{
20 | A list with several dataframes containing financial statements
21 | }
22 | \description{
23 | Reads a single zip file downloaded from Bovespa
24 | }
25 | \examples{
26 | 
27 | my.f <- system.file('extdata/434_ARAC_2008-09-30.zip', package = 'GetITRData')
28 | 
29 | my.l <- gitrd.read.zip.file(my.f, id.type = 'before 2011', type.fin.report = 'itr')
30 | print(my.l)
31 | }
32 | 


--------------------------------------------------------------------------------
/man/gitrd.read.zip.file.type.1.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_read_zip_file.R
 3 | \name{gitrd.read.zip.file.type.1}
 4 | \alias{gitrd.read.zip.file.type.1}
 5 | \title{Reads folder for zip file post 2011 (internal)}
 6 | \usage{
 7 | gitrd.read.zip.file.type.1(rnd.folder.name, folder.to.unzip = tempdir(),
 8 |   type.fin.report)
 9 | }
10 | \arguments{
11 | \item{rnd.folder.name}{Folder where unzipped files are available}
12 | 
13 | \item{folder.to.unzip}{Folder to unzip files (default = tempdir())}
14 | 
15 | \item{type.fin.report}{Peridodicy of fin data ('itr' - quarterly or 'dfp' - annual)}
16 | }
17 | \value{
18 | A list with financial statements
19 | }
20 | \description{
21 | Reads folder for zip file post 2011 (internal)
22 | }
23 | \examples{
24 | # no example (this functions not used directly)
25 | }
26 | 


--------------------------------------------------------------------------------
/man/gitrd.read.zip.file.type.2.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_read_zip_file.R
 3 | \name{gitrd.read.zip.file.type.2}
 4 | \alias{gitrd.read.zip.file.type.2}
 5 | \title{Reads folder for zip file pre 2011 (internal)}
 6 | \usage{
 7 | gitrd.read.zip.file.type.2(rnd.folder.name, folder.to.unzip = tempdir(),
 8 |   type.fin.report)
 9 | }
10 | \arguments{
11 | \item{rnd.folder.name}{Folder where unzipped files are available}
12 | 
13 | \item{folder.to.unzip}{Folder to unzip files (default = tempdir())}
14 | 
15 | \item{type.fin.report}{Peridodicy of fin data ('itr' - quarterly or 'dfp' - annual)}
16 | }
17 | \value{
18 | A list with financial statements
19 | }
20 | \description{
21 | Reads folder for zip file pre 2011 (internal)
22 | }
23 | \examples{
24 | # no example (this functions not used directly)
25 | }
26 | 


--------------------------------------------------------------------------------
/man/gitrd.read.zip.file.type.fre.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_read_fre_zip_file.R
 3 | \name{gitrd.read.zip.file.type.fre}
 4 | \alias{gitrd.read.zip.file.type.fre}
 5 | \title{Reads folder for zip file post 2011 (internal)}
 6 | \usage{
 7 | gitrd.read.zip.file.type.fre(rnd.folder.name, folder.to.unzip = tempdir())
 8 | }
 9 | \arguments{
10 | \item{rnd.folder.name}{Folder where unzipped files are available}
11 | 
12 | \item{folder.to.unzip}{Folder to unzip files (default = tempdir())}
13 | }
14 | \value{
15 | A list with financial statements
16 | }
17 | \description{
18 | Reads folder for zip file post 2011 (internal)
19 | }
20 | \examples{
21 | # no example (this functions not used directly)
22 | }
23 | 


--------------------------------------------------------------------------------
/man/gitrd.search.company.Rd:
--------------------------------------------------------------------------------
 1 | % Generated by roxygen2: do not edit by hand
 2 | % Please edit documentation in R/gitrd_utils.R
 3 | \name{gitrd.search.company}
 4 | \alias{gitrd.search.company}
 5 | \title{Helps users search for a company name}
 6 | \usage{
 7 | gitrd.search.company(char.to.search)
 8 | }
 9 | \arguments{
10 | \item{char.to.search}{Character for partial matching}
11 | }
12 | \value{
13 | Names of found companies
14 | }
15 | \description{
16 | Helps users search for a company name
17 | }
18 | \examples{
19 | 
20 | \dontrun{ # dontrun: keep cran check fast
21 | gitrd.search.company('GERDAU')
22 | }
23 | }
24 | 


--------------------------------------------------------------------------------
/tests/testthat.R:
--------------------------------------------------------------------------------
1 | library(testthat)
2 | library(GetITRData)
3 | 
4 | test_check("GetITRData")
5 | 


--------------------------------------------------------------------------------
/tests/testthat/test_gitrd.R:
--------------------------------------------------------------------------------
 1 | library(testthat)
 2 | library(GetITRData)
 3 | 
 4 | #test_that(desc = 'Test of download function',{
 5 | #          expect_equal(1, 1) } )
 6 | 
 7 | # my.assets <- c('ABEVA20', 'PETRL78')
 8 | # out.file <- system.file("extdata", 'NEG_OPCOES_20151126.zip', package = "GetHFData")
 9 | # 
10 | # df.out <- ghfd_read_file(out.file, my.assets)
11 | # 
12 | # test_that(desc = 'Test of read function',{
13 | #   expect_true(nrow(df.out)>0)
14 | #   } )
15 | 
16 | #cat('\nDeleting test folder')
17 | #unlink(dl.folder, recursive = T)
18 | 
19 | 


--------------------------------------------------------------------------------
/vignettes/MyExcelData.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/msperlin/GetITRData/4714f9f6bfe4f12ac1858d67ef557abd86194af3/vignettes/MyExcelData.xlsx


--------------------------------------------------------------------------------
/vignettes/gitrd-vignette-introduction.R:
--------------------------------------------------------------------------------
 1 | ## ---- eval=FALSE---------------------------------------------------------
 2 | #  # Release version in CRAN
 3 | #  install.packages('GetITRData') # not in CRAN yet
 4 | #  
 5 | #  # Development version in Github
 6 | #  devtools::install_github('msperlin/GetITRData')
 7 | 
 8 | ## ------------------------------------------------------------------------
 9 | library(GetITRData)
10 | library(tibble)
11 | 
12 | gitrd.search.company('petrobras')
13 | 
14 | ## ------------------------------------------------------------------------
15 | df.info <- gitrd.get.info.companies(type.data = 'companies')
16 | 
17 | glimpse(df.info)
18 | 
19 | ## ------------------------------------------------------------------------
20 | name.companies <- 'PETRÓLEO BRASILEIRO  S.A.  - PETROBRAS'
21 | first.date <- '2004-01-01'
22 | last.date  <- '2006-01-01'
23 | type.statements <- 'individual'
24 | periodicy.fin.report <- 'annual'
25 | 
26 | df.reports <- gitrd.GetITRData(name.companies = name.companies, 
27 |                                periodicy.fin.report = periodicy.fin.report, 
28 |                                first.date = first.date,
29 |                                last.date = last.date,
30 |                                type.info = type.statements)
31 | 
32 | ## ------------------------------------------------------------------------
33 | glimpse(df.reports)
34 | 
35 | ## ------------------------------------------------------------------------
36 | df.income.long <- df.reports$fr.income[[1]]
37 | 
38 | glimpse(df.income.long)
39 | 
40 | ## ------------------------------------------------------------------------
41 | df.income.wide <- gitrd.convert.to.wide(df.income.long)
42 | 
43 | knitr::kable(df.income.wide )
44 | 
45 | ## ------------------------------------------------------------------------
46 | set.seed(2)
47 | my.companies <- sample(unique(df.info$name.company), 5)
48 | 
49 | first.date <- '2008-01-01'
50 | last.date  <- '2010-01-01'
51 | type.statements <- 'individual'
52 | periodicy.fin.report <- 'annual'
53 | 
54 | df.reports <- gitrd.GetITRData(name.companies = my.companies, 
55 |                                periodicy.fin.report = periodicy.fin.report,
56 |                                first.date = first.date,
57 |                                last.date = last.date,
58 |                                type.info = type.statements)
59 | 
60 | ## ------------------------------------------------------------------------
61 | glimpse(df.reports)
62 | 
63 | ## ------------------------------------------------------------------------
64 | df.assets <- do.call(what = rbind, args = df.reports$fr.assets)
65 | df.liabilities <- do.call(what = rbind, args = df.reports$fr.liabilities)
66 | 
67 | df.assets.liabilities <- rbind(df.assets, df.liabilities)
68 | 
69 | ## ------------------------------------------------------------------------
70 | library(dplyr)
71 | 
72 | my.tab <- df.assets.liabilities %>%
73 |   group_by(company.name, ref.date) %>%
74 |   summarise(Liq.Index = acc.value[acc.number == '1.01']/ acc.value[acc.number == '2.01'])
75 | 
76 | my.tab
77 | 
78 | ## ------------------------------------------------------------------------
79 | library(ggplot2)
80 | 
81 | p <- ggplot(my.tab, aes(x = ref.date, y = Liq.Index, fill = company.name)) +
82 |   geom_col(position = 'dodge' )
83 | print(p)
84 | 
85 | ## ---- eval=FALSE---------------------------------------------------------
86 | #  my.basename <- 'MyExcelData'
87 | #  my.format <- 'xlsx' # only supported so far
88 | #  gitrd.export.ITR.data(data.in = df.reports,
89 | #                        base.file.name = my.basename,
90 | #                        type.export = my.format,
91 | #                        format.data = 'long')
92 | 
93 | 


--------------------------------------------------------------------------------
/vignettes/gitrd-vignette-introduction.Rmd:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: "Using GetITRData to obtain quarterly financial reports from Bovespa"
  3 | author: "Marcelo Perlin"
  4 | date: "`r Sys.Date()`"
  5 | output: rmarkdown::html_vignette
  6 | vignette: >
  7 |   %\VignetteIndexEntry{Using GetITRData to obtain quarterly financial reports from Bovespa}
  8 |   %\VignetteEngine{knitr::rmarkdown}
  9 |   %\VignetteEncoding{UTF-8}
 10 | ---
 11 |   
 12 | Financial statements of companies traded at B3 (formerly Bovespa), the Brazilian stock exchange, are available in its [website](http://www.bmfbovespa.com.br/). Accessing the data for a single company is straightforward. In the website one can find a simple interface for accessing this dataset. An example is given [here](https://www.rad.cvm.gov.br/ENETCONSULTA/frmGerenciaPaginaFRE.aspx?NumeroSequencialDocumento=67775&CodigoTipoInstituicao=2). However, gathering and organizing the data for a large scale research, with many companies and many dates, is painful. Financial reports must be downloaded or copied individually and later aggregated. Changes in the accounting format thoughout time can make this process slow, unreliable and irreproducible.
 13 | 
 14 | Package `GetITRData` provides a R interface to all financial statements available in the website. It not only downloads the data but also organizes it in a tabular format and allows the use of inflation indexes. Users can select companies and a time period to download all available data. Several information about current companies, such as sector and available quarters are also at reach. The main purpose of the package is to make it easy to access quarterly financial statements in large scale research, facilitating the reproducibility of corporate finance studies with B3 data.
 15 | 
 16 | # Installation
 17 |   
 18 | The package is available in CRAN (release version) and in Github (development version). You can install any of those with the following code:
 19 |   
 20 | ```{r, eval=FALSE}
 21 | # Release version in CRAN
 22 | install.packages('GetITRData') # not in CRAN yet
 23 | 
 24 | # Development version in Github
 25 | devtools::install_github('msperlin/GetITRData')
 26 | ```
 27 | 
 28 | 
 29 | # How to use `GetITRData`
 30 | 
 31 | The starting point of `GetITRData` is to find the official names of companies in Bovespa. Function `gitrd.search.company` serves this purpose. Given a string (text), it will search for a partial matches in companies names. As an example, let's find the _official_ name of Petrobras, one of the largest companies in Brazil:
 32 | 
 33 | ```{r}
 34 | library(GetITRData)
 35 | library(tibble)
 36 | 
 37 | gitrd.search.company('petrobras')
 38 | ```
 39 | 
 40 | Its official name in Bovespa records is `PETRÓLEO BRASILEIRO  S.A.  - PETROBRAS`. Data for quarterly and annual statements are available from 1998 to 2017. The situation of the company, active or canceled, is also given. This helps verifying the availability of data.
 41 | 
 42 | The content of all available quarterly statements can be accessed with function `gitrd.get.info.companies`. It will read and parse a .csv file from my [github repository](https://github.com/msperlin/GetITRData_auxiliary). This will be periodically updated for new quarterly statements. Let's try it out:
 43 | 
 44 | ```{r}
 45 | df.info <- gitrd.get.info.companies(type.data = 'companies')
 46 | 
 47 | glimpse(df.info)
 48 | ```
 49 | 
 50 | This file includes several information that are gathered from Bovespa: names of companies, sectors, dates  quarterly statements and, most importantly, the links to download the files. The resulting dataframe can be used to filter and gather information for large scale research such as downloading financial data for a specific sector.
 51 | 
 52 | 
 53 | ## Downloading financial information for ONE company
 54 | 
 55 | All you need to download financial data with `GetITRData` are the official names of companies, which can be found with `gitrd.search.company`, the desired starting and ending dates and the type of financial information (individual or consolidated). Let's try it for PETROBRAS:
 56 | 
 57 | ```{r}
 58 | name.companies <- 'PETRÓLEO BRASILEIRO  S.A.  - PETROBRAS'
 59 | first.date <- '2004-01-01'
 60 | last.date  <- '2006-01-01'
 61 | type.statements <- 'individual'
 62 | periodicy.fin.report <- 'annual'
 63 | 
 64 | df.reports <- gitrd.GetITRData(name.companies = name.companies, 
 65 |                                periodicy.fin.report = periodicy.fin.report, 
 66 |                                first.date = first.date,
 67 |                                last.date = last.date,
 68 |                                type.info = type.statements)
 69 | ```
 70 | 
 71 | The resulting object is a `tibble`, a data.frame type of object that allows for list columns. Let's have a look in its content:
 72 | 
 73 | ```{r}
 74 | glimpse(df.reports)
 75 | ```
 76 | 
 77 | Object `df.reports` only has one row since we only asked for data of one company. The number of rows increases with the number of companies, as we will soon learn with the next example. All financial statements for the different years are available within `df.reports`. For example, the income statements for all desired years of PETROBRAS are:
 78 | 
 79 | ```{r}
 80 | df.income.long <- df.reports$fr.income[[1]]
 81 | 
 82 | glimpse(df.income.long)
 83 | ```
 84 | 
 85 | The resulting dataframe is in the long format, ready for processing. In the long format, financial statements of different quarters are stacked. In the wide format, we have the quarters as dates. If you want the wide format, which I believe is most common in financial analysis, you can use function `gitrd.convert.to.wide`. See an example next:
 86 | 
 87 | ```{r}
 88 | df.income.wide <- gitrd.convert.to.wide(df.income.long)
 89 | 
 90 | knitr::kable(df.income.wide )
 91 | ```
 92 | 
 93 | 
 94 | ## Downloading financial information for SEVERAL companies
 95 | 
 96 | If you are doing serious research, it is likely that you need financial statements for more than one company.  Package `GetITRData` is specially designed for handling large scale download of data. Let's build a case with 3 randomly selected companies:
 97 | 
 98 | ```{r}
 99 | set.seed(2)
100 | my.companies <- sample(unique(df.info$name.company), 5)
101 | 
102 | first.date <- '2008-01-01'
103 | last.date  <- '2010-01-01'
104 | type.statements <- 'individual'
105 | periodicy.fin.report <- 'annual'
106 | 
107 | df.reports <- gitrd.GetITRData(name.companies = my.companies, 
108 |                                periodicy.fin.report = periodicy.fin.report,
109 |                                first.date = first.date,
110 |                                last.date = last.date,
111 |                                type.info = type.statements)
112 | ```
113 | 
114 | And now we can check the resulting `tibble`:
115 | 
116 | ```{r}
117 | glimpse(df.reports)
118 | ```
119 | 
120 | Every row of `df.reports` will provide information for one company. Metadata about the corresponding dataframes such as min/max dates is available in the first columns. Keeping a tabular structure facilitates the organization and future processing of all financial data. We can use tibble `df.reports` for creating other dataframes in the long format containing data for all companies. See next, where we create dataframes with the assets and liabilities of all companies:
121 | 
122 | ```{r}
123 | df.assets <- do.call(what = rbind, args = df.reports$fr.assets)
124 | df.liabilities <- do.call(what = rbind, args = df.reports$fr.liabilities)
125 | 
126 | df.assets.liabilities <- rbind(df.assets, df.liabilities)
127 | ```
128 | 
129 | As an example, let's use the resulting dataframe for calculating and analyzing a simple liquidity index of a company, the total of current (liquid) assets (_Ativo circulante_) divided by the total of current short term liabilities (_Passivo Circulante_), over time.
130 | 
131 | ```{r}
132 | library(dplyr)
133 | 
134 | my.tab <- df.assets.liabilities %>%
135 |   group_by(company.name, ref.date) %>%
136 |   summarise(Liq.Index = acc.value[acc.number == '1.01']/ acc.value[acc.number == '2.01'])
137 | 
138 | my.tab
139 | ```
140 | 
141 | Now we can visualize the information using `ggplot2`:
142 | 
143 | ```{r}
144 | library(ggplot2)
145 | 
146 | p <- ggplot(my.tab, aes(x = ref.date, y = Liq.Index, fill = company.name)) +
147 |   geom_col(position = 'dodge' )
148 | print(p)
149 | ```
150 | 
151 | As we can see, we could not find available data for all companies. Nonetheless, JPSP is the company with highest liquidity, being able to pay its short term debt with the current assets in all years. We can certainly do a lot more interesting studies based on this data set.
152 | 
153 | 
154 | ## Exporting financial data
155 | 
156 | The package includes function `gitrd.export.ITR.data` for exporting the financial data to an Excel file. Users can choose between the long and wide format.  See next:
157 | 
158 | ```{r, eval=FALSE}
159 | my.basename <- 'MyExcelData'
160 | my.format <- 'xlsx' # only supported so far
161 | gitrd.export.ITR.data(data.in = df.reports, 
162 |                       base.file.name = my.basename,
163 |                       type.export = my.format,
164 |                       format.data = 'long')
165 | ```
166 | 
167 | The resulting Excel file contains all data available in `df.reports`.
168 | 


--------------------------------------------------------------------------------
/vignettes/gitrd-vignette-introduction.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | 
  3 | <html xmlns="http://www.w3.org/1999/xhtml">
  4 | 
  5 | <head>
  6 | 
  7 | <meta charset="utf-8" />
  8 | <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
  9 | <meta name="generator" content="pandoc" />
 10 | 
 11 | <meta name="viewport" content="width=device-width, initial-scale=1">
 12 | 
 13 | <meta name="author" content="Marcelo Perlin" />
 14 | 
 15 | <meta name="date" content="2018-02-28" />
 16 | 
 17 | <title>Using GetITRData to obtain quarterly financial reports from Bovespa</title>
 18 | 
 19 | 
 20 | 
 21 | <style type="text/css">code{white-space: pre;}</style>
 22 | <style type="text/css">
 23 | div.sourceCode { overflow-x: auto; }
 24 | table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
 25 |   margin: 0; padding: 0; vertical-align: baseline; border: none; }
 26 | table.sourceCode { width: 100%; line-height: 100%; }
 27 | td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
 28 | td.sourceCode { padding-left: 5px; }
 29 | code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
 30 | code > span.dt { color: #902000; } /* DataType */
 31 | code > span.dv { color: #40a070; } /* DecVal */
 32 | code > span.bn { color: #40a070; } /* BaseN */
 33 | code > span.fl { color: #40a070; } /* Float */
 34 | code > span.ch { color: #4070a0; } /* Char */
 35 | code > span.st { color: #4070a0; } /* String */
 36 | code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
 37 | code > span.ot { color: #007020; } /* Other */
 38 | code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
 39 | code > span.fu { color: #06287e; } /* Function */
 40 | code > span.er { color: #ff0000; font-weight: bold; } /* Error */
 41 | code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
 42 | code > span.cn { color: #880000; } /* Constant */
 43 | code > span.sc { color: #4070a0; } /* SpecialChar */
 44 | code > span.vs { color: #4070a0; } /* VerbatimString */
 45 | code > span.ss { color: #bb6688; } /* SpecialString */
 46 | code > span.im { } /* Import */
 47 | code > span.va { color: #19177c; } /* Variable */
 48 | code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
 49 | code > span.op { color: #666666; } /* Operator */
 50 | code > span.bu { } /* BuiltIn */
 51 | code > span.ex { } /* Extension */
 52 | code > span.pp { color: #bc7a00; } /* Preprocessor */
 53 | code > span.at { color: #7d9029; } /* Attribute */
 54 | code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
 55 | code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
 56 | code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
 57 | code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
 58 | </style>
 59 | 
 60 | 
 61 | 
 62 | <link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20both%3B%0Amargin%3A%200%200%2010px%2010px%3B%0Apadding%3A%204px%3B%0Awidth%3A%20400px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Aborder%2Dradius%3A%205px%3B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Afont%2Dsize%3A%2013px%3B%0Aline%2Dheight%3A%201%2E3%3B%0A%7D%0A%23TOC%20%2Etoctitle%20%7B%0Afont%2Dweight%3A%20bold%3B%0Afont%2Dsize%3A%2015px%3B%0Amargin%2Dleft%3A%205px%3B%0A%7D%0A%23TOC%20ul%20%7B%0Apadding%2Dleft%3A%2040px%3B%0Amargin%2Dleft%3A%20%2D1%2E5em%3B%0Amargin%2Dtop%3A%205px%3B%0Amargin%2Dbottom%3A%205px%3B%0A%7D%0A%23TOC%20ul%20ul%20%7B%0Amargin%2Dleft%3A%20%2D2em%3B%0A%7D%0A%23TOC%20li%20%7B%0Aline%2Dheight%3A%2016px%3B%0A%7D%0Atable%20%7B%0Amargin%3A%201em%20auto%3B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dcolor%3A%20%23DDDDDD%3B%0Aborder%2Dstyle%3A%20outset%3B%0Aborder%2Dcollapse%3A%20collapse%3B%0A%7D%0Atable%20th%20%7B%0Aborder%2Dwidth%3A%202px%3B%0Apadding%3A%205px%3B%0Aborder%2Dstyle%3A%20inset%3B%0A%7D%0Atable%20td%20%7B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dstyle%3A%20inset%3B%0Aline%2Dheight%3A%2018px%3B%0Apadding%3A%205px%205px%3B%0A%7D%0Atable%2C%20table%20th%2C%20table%20td%20%7B%0Aborder%2Dleft%2Dstyle%3A%20none%3B%0Aborder%2Dright%2Dstyle%3A%20none%3B%0A%7D%0Atable%20thead%2C%20table%20tr%2Eeven%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Ap%20%7B%0Amargin%3A%200%2E5em%200%3B%0A%7D%0Ablockquote%20%7B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Apadding%3A%200%2E25em%200%2E75em%3B%0A%7D%0Ahr%20%7B%0Aborder%2Dstyle%3A%20solid%3B%0Aborder%3A%20none%3B%0Aborder%2Dtop%3A%201px%20solid%20%23777%3B%0Amargin%3A%2028px%200%3B%0A%7D%0Adl%20%7B%0Amargin%2Dleft%3A%200%3B%0A%7D%0Adl%20dd%20%7B%0Amargin%2Dbottom%3A%2013px%3B%0Amargin%2Dleft%3A%2013px%3B%0A%7D%0Adl%20dt%20%7B%0Afont%2Dweight%3A%20bold%3B%0A%7D%0Aul%20%7B%0Amargin%2Dtop%3A%200%3B%0A%7D%0Aul%20li%20%7B%0Alist%2Dstyle%3A%20circle%20outside%3B%0A%7D%0Aul%20ul%20%7B%0Amargin%2Dbottom%3A%200%3B%0A%7D%0Apre%2C%20code%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0Aborder%2Dradius%3A%203px%3B%0Acolor%3A%20%23333%3B%0Awhite%2Dspace%3A%20pre%2Dwrap%3B%20%0A%7D%0Apre%20%7B%0Aborder%2Dradius%3A%203px%3B%0Amargin%3A%205px%200px%2010px%200px%3B%0Apadding%3A%2010px%3B%0A%7D%0Apre%3Anot%28%5Bclass%5D%29%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Acode%20%7B%0Afont%2Dfamily%3A%20Consolas%2C%20Monaco%2C%20%27Courier%20New%27%2C%20monospace%3B%0Afont%2Dsize%3A%2085%25%3B%0A%7D%0Ap%20%3E%20code%2C%20li%20%3E%20code%20%7B%0Apadding%3A%202px%200px%3B%0A%7D%0Adiv%2Efigure%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0Aimg%20%7B%0Abackground%2Dcolor%3A%20%23FFFFFF%3B%0Apadding%3A%202px%3B%0Aborder%3A%201px%20solid%20%23DDDDDD%3B%0Aborder%2Dradius%3A%203px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Amargin%3A%200%205px%3B%0A%7D%0Ah1%20%7B%0Amargin%2Dtop%3A%200%3B%0Afont%2Dsize%3A%2035px%3B%0Aline%2Dheight%3A%2040px%3B%0A%7D%0Ah2%20%7B%0Aborder%2Dbottom%3A%204px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Apadding%2Dbottom%3A%202px%3B%0Afont%2Dsize%3A%20145%25%3B%0A%7D%0Ah3%20%7B%0Aborder%2Dbottom%3A%202px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Afont%2Dsize%3A%20120%25%3B%0A%7D%0Ah4%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23f7f7f7%3B%0Amargin%2Dleft%3A%208px%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Ah5%2C%20h6%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23ccc%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Aa%20%7B%0Acolor%3A%20%230033dd%3B%0Atext%2Ddecoration%3A%20none%3B%0A%7D%0Aa%3Ahover%20%7B%0Acolor%3A%20%236666ff%3B%20%7D%0Aa%3Avisited%20%7B%0Acolor%3A%20%23800080%3B%20%7D%0Aa%3Avisited%3Ahover%20%7B%0Acolor%3A%20%23BB00BB%3B%20%7D%0Aa%5Bhref%5E%3D%22http%3A%22%5D%20%7B%0Atext%2Ddecoration%3A%20underline%3B%20%7D%0Aa%5Bhref%5E%3D%22https%3A%22%5D%20%7B%0Atext%2Ddecoration%3A%20underline%3B%20%7D%0A%0Acode%20%3E%20span%2Ekw%20%7B%20color%3A%20%23555%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%0Acode%20%3E%20span%2Edt%20%7B%20color%3A%20%23902000%3B%20%7D%20%0Acode%20%3E%20span%2Edv%20%7B%20color%3A%20%2340a070%3B%20%7D%20%0Acode%20%3E%20span%2Ebn%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Efl%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Ech%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Est%20%7B%20color%3A%20%23d14%3B%20%7D%20%0Acode%20%3E%20span%2Eco%20%7B%20color%3A%20%23888888%3B%20font%2Dstyle%3A%20italic%3B%20%7D%20%0Acode%20%3E%20span%2Eot%20%7B%20color%3A%20%23007020%3B%20%7D%20%0Acode%20%3E%20span%2Eal%20%7B%20color%3A%20%23ff0000%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%0Acode%20%3E%20span%2Efu%20%7B%20color%3A%20%23900%3B%20font%2Dweight%3A%20bold%3B%20%7D%20%20code%20%3E%20span%2Eer%20%7B%20color%3A%20%23a61717%3B%20background%2Dcolor%3A%20%23e3d2d2%3B%20%7D%20%0A" rel="stylesheet" type="text/css" />
 63 | 
 64 | </head>
 65 | 
 66 | <body>
 67 | 
 68 | 
 69 | 
 70 | 
 71 | <h1 class="title toc-ignore">Using GetITRData to obtain quarterly financial reports from Bovespa</h1>
 72 | <h4 class="author"><em>Marcelo Perlin</em></h4>
 73 | <h4 class="date"><em>2018-02-28</em></h4>
 74 | 
 75 | 
 76 | 
 77 | <p>Financial statements of companies traded at B3 (formerly Bovespa), the Brazilian stock exchange, are available in its <a href="http://www.bmfbovespa.com.br/">website</a>. Accessing the data for a single company is straightforward. In the website one can find a simple interface for accessing this dataset. An example is given <a href="https://www.rad.cvm.gov.br/ENETCONSULTA/frmGerenciaPaginaFRE.aspx?NumeroSequencialDocumento=67775&amp;CodigoTipoInstituicao=2">here</a>. However, gathering and organizing the data for a large scale research, with many companies and many dates, is painful. Financial reports must be downloaded or copied individually and later aggregated. Changes in the accounting format thoughout time can make this process slow, unreliable and irreproducible.</p>
 78 | <p>Package <code>GetITRData</code> provides a R interface to all financial statements available in the website. It not only downloads the data but also organizes it in a tabular format and allows the use of inflation indexes. Users can select companies and a time period to download all available data. Several information about current companies, such as sector and available quarters are also at reach. The main purpose of the package is to make it easy to access quarterly financial statements in large scale research, facilitating the reproducibility of corporate finance studies with B3 data.</p>
 79 | <div id="installation" class="section level1">
 80 | <h1>Installation</h1>
 81 | <p>The package is available in CRAN (release version) and in Github (development version). You can install any of those with the following code:</p>
 82 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Release version in CRAN</span>
 83 | <span class="kw">install.packages</span>(<span class="st">'GetITRData'</span>) <span class="co"># not in CRAN yet</span>
 84 | 
 85 | <span class="co"># Development version in Github</span>
 86 | devtools<span class="op">::</span><span class="kw">install_github</span>(<span class="st">'msperlin/GetITRData'</span>)</code></pre></div>
 87 | </div>
 88 | <div id="how-to-use-getitrdata" class="section level1">
 89 | <h1>How to use <code>GetITRData</code></h1>
 90 | <p>The starting point of <code>GetITRData</code> is to find the official names of companies in Bovespa. Function <code>gitrd.search.company</code> serves this purpose. Given a string (text), it will search for a partial matches in companies names. As an example, let’s find the <em>official</em> name of Petrobras, one of the largest companies in Brazil:</p>
 91 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(GetITRData)
 92 | <span class="kw">library</span>(tibble)
 93 | 
 94 | <span class="kw">gitrd.search.company</span>(<span class="st">'petrobras'</span>)</code></pre></div>
 95 | <pre><code>## 
 96 | ## Reading info file from github
 97 | ## Found 44338 lines for 690 companies  [Actives =  518  Inactives =  173 ]
 98 | ## Last file update:  2017-10-19
 99 | ## Caching RDATA into tempdir()
100 | ## 
101 | ## Found 2 companies:
102 | ## PETROBRAS DISTRIBUIDORA SA             | situation = ATIVO | first date = 2016-12-31 | last date - 2017-12-31
103 | ## PETRÓLEO BRASILEIRO  S.A.  - PETROBRAS | situation = ATIVO | first date = 1998-09-30 | last date - 2017-12-31</code></pre>
104 | <p>Its official name in Bovespa records is <code>PETRÓLEO BRASILEIRO  S.A.  - PETROBRAS</code>. Data for quarterly and annual statements are available from 1998 to 2017. The situation of the company, active or canceled, is also given. This helps verifying the availability of data.</p>
105 | <p>The content of all available quarterly statements can be accessed with function <code>gitrd.get.info.companies</code>. It will read and parse a .csv file from my <a href="https://github.com/msperlin/GetITRData_auxiliary">github repository</a>. This will be periodically updated for new quarterly statements. Let’s try it out:</p>
106 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">df.info &lt;-<span class="st"> </span><span class="kw">gitrd.get.info.companies</span>(<span class="dt">type.data =</span> <span class="st">'companies'</span>)</code></pre></div>
107 | <pre><code>## 
108 | ## Reading info file from github
109 | ## Found 44338 lines for 690 companies  [Actives =  518  Inactives =  173 ]
110 | ## Last file update:  2017-10-19
111 | ## Caching RDATA into tempdir()</code></pre>
112 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">glimpse</span>(df.info)</code></pre></div>
113 | <pre><code>## Observations: 692
114 | ## Variables: 10
115 | ## $ name.company    &lt;chr&gt; &quot;521 PARTICIPAÇOES S.A. - EM LIQUIDAÇÃO EXTRAJ...
116 | ## $ id.company      &lt;int&gt; 16330, 16284, 21725, 19313, 18350, 18970, 1810...
117 | ## $ situation       &lt;chr&gt; &quot;ATIVO&quot;, &quot;ATIVO&quot;, &quot;ATIVO&quot;, &quot;ATIVO&quot;, &quot;CANCELADA...
118 | ## $ listing.segment &lt;chr&gt; NA, &quot;Tradicional&quot;, &quot;Tradicional&quot;, &quot;Tradicional...
119 | ## $ main.sector     &lt;chr&gt; NA, &quot;Financeiro e Outros&quot;, &quot;Saúde&quot;, &quot;Financeir...
120 | ## $ sub.sector      &lt;chr&gt; NA, &quot;Outros&quot;, &quot;Serv.Méd.Hospit..Análises e Dia...
121 | ## $ segment         &lt;chr&gt; NA, &quot;Outros&quot;, &quot;Serv.Méd.Hospit..Análises e Dia...
122 | ## $ tickers         &lt;chr&gt; NA, &quot;QVQP3B&quot;, &quot;ADHM3&quot;, &quot;AELP3&quot;, NA, &quot;TIET11;TI...
123 | ## $ first.date      &lt;date&gt; 1998-12-31, 2001-09-30, 2008-12-31, 2001-12-3...
124 | ## $ last.date       &lt;date&gt; 2016-12-31, 2017-09-30, 2017-12-31, 2017-12-3...</code></pre>
125 | <p>This file includes several information that are gathered from Bovespa: names of companies, sectors, dates quarterly statements and, most importantly, the links to download the files. The resulting dataframe can be used to filter and gather information for large scale research such as downloading financial data for a specific sector.</p>
126 | <div id="downloading-financial-information-for-one-company" class="section level2">
127 | <h2>Downloading financial information for ONE company</h2>
128 | <p>All you need to download financial data with <code>GetITRData</code> are the official names of companies, which can be found with <code>gitrd.search.company</code>, the desired starting and ending dates and the type of financial information (individual or consolidated). Let’s try it for PETROBRAS:</p>
129 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">name.companies &lt;-<span class="st"> 'PETRÓLEO BRASILEIRO  S.A.  - PETROBRAS'</span>
130 | first.date &lt;-<span class="st"> '2004-01-01'</span>
131 | last.date  &lt;-<span class="st"> '2006-01-01'</span>
132 | type.statements &lt;-<span class="st"> 'individual'</span>
133 | periodicy.fin.report &lt;-<span class="st"> 'annual'</span>
134 | 
135 | df.reports &lt;-<span class="st"> </span><span class="kw">gitrd.GetITRData</span>(<span class="dt">name.companies =</span> name.companies, 
136 |                                <span class="dt">periodicy.fin.report =</span> periodicy.fin.report, 
137 |                                <span class="dt">first.date =</span> first.date,
138 |                                <span class="dt">last.date =</span> last.date,
139 |                                <span class="dt">type.info =</span> type.statements)</code></pre></div>
140 | <pre><code>## Found cache file. Loading data..
141 | ## 
142 | ## Downloading data for 1 companies
143 | ## Type of financial reports: individual
144 | ## Periodicy of financial reports: annual (dfp system)
145 | ## First Date: 2004-01-01
146 | ## Laste Date: 2006-01-01
147 | ## Inflation index: dollar
148 | ## 
149 | ## Downloading inflation data
150 | ##  Caching inflation RDATA into tempdir()  Done
151 | ## 
152 | ## 
153 | ## WARNING: For data before 2009, the cash flow statements are not available
154 | ## 
155 | ## Inputs looking good! Starting download of files:
156 | ## 
157 | ## PETRÓLEO BRASILEIRO  S.A.  - PETROBRAS
158 | ##  Available periods: 2005-12-31   2004-12-31
159 | ## 
160 | ## 
161 | ## Processing PETRÓLEO BRASILEIRO  S.A.  - PETROBRAS, Date = 2005-12-31
162 | ##  Acessing dfp data | downloading file | reading file
163 | ##  Acessing fre data
164 | ##      No FRE file available..
165 | ## Processing PETRÓLEO BRASILEIRO  S.A.  - PETROBRAS, Date = 2004-12-31
166 | ##  Acessing dfp data | downloading file | reading file
167 | ##  Acessing fre data
168 | ##      No FRE file available..</code></pre>
169 | <p>The resulting object is a <code>tibble</code>, a data.frame type of object that allows for list columns. Let’s have a look in its content:</p>
170 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">glimpse</span>(df.reports)</code></pre></div>
171 | <pre><code>## Observations: 1
172 | ## Variables: 14
173 | ## $ company.name              &lt;chr&gt; &quot;PETRÓLEO BRASILEIRO  S.A.  - PETROB...
174 | ## $ company.code              &lt;int&gt; 9512
175 | ## $ type.info                 &lt;chr&gt; &quot;individual&quot;
176 | ## $ min.date                  &lt;date&gt; 2004-12-31
177 | ## $ max.date                  &lt;date&gt; 2005-12-31
178 | ## $ n.periods                 &lt;int&gt; 2
179 | ## $ current.stock.composition &lt;list&gt; [&lt;c(&quot;Ordinárias&quot;, &quot;Preferenciais&quot;, ...
180 | ## $ dividends.history         &lt;list&gt; [&lt;c(&quot;ON&quot;, &quot;ON&quot;, &quot;ON&quot;, &quot;ON&quot;, &quot;ON&quot;, &quot;...
181 | ## $ fr.assets                 &lt;list&gt; [&lt;# A tibble: 28 x 6,    company.na...
182 | ## $ fr.liabilities            &lt;list&gt; [&lt;# A tibble: 46 x 6,    company.na...
183 | ## $ fr.income                 &lt;list&gt; [&lt;# A tibble: 48 x 6,    company.na...
184 | ## $ fr.cashflow               &lt;list&gt; [&lt;character(0), character(0), chara...
185 | ## $ history.stock.holders     &lt;list&gt; [&lt;&gt;]
186 | ## $ history.capital           &lt;list&gt; [&lt;&gt;]</code></pre>
187 | <p>Object <code>df.reports</code> only has one row since we only asked for data of one company. The number of rows increases with the number of companies, as we will soon learn with the next example. All financial statements for the different years are available within <code>df.reports</code>. For example, the income statements for all desired years of PETROBRAS are:</p>
188 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">df.income.long &lt;-<span class="st"> </span>df.reports<span class="op">$</span>fr.income[[<span class="dv">1</span>]]
189 | 
190 | <span class="kw">glimpse</span>(df.income.long)</code></pre></div>
191 | <pre><code>## Observations: 48
192 | ## Variables: 6
193 | ## $ company.name       &lt;chr&gt; &quot;PETRÓLEO BRASILEIRO  S.A.  - PETROBRAS&quot;, &quot;...
194 | ## $ ref.date           &lt;date&gt; 2005-12-31, 2005-12-31, 2005-12-31, 2005-1...
195 | ## $ acc.number         &lt;chr&gt; &quot;3.01&quot;, &quot;3.02&quot;, &quot;3.03&quot;, &quot;3.04&quot;, &quot;3.05&quot;, &quot;3....
196 | ## $ acc.desc           &lt;chr&gt; &quot;Receita Bruta de Vendas e/ou Serviços&quot;, &quot;D...
197 | ## $ acc.value          &lt;int&gt; 143665730, -37843204, 105822526, -57512113,...
198 | ## $ acc.value.infl.adj &lt;dbl&gt; 61398234.97, -16173000.56, 45225234.41, -24...</code></pre>
199 | <p>The resulting dataframe is in the long format, ready for processing. In the long format, financial statements of different quarters are stacked. In the wide format, we have the quarters as dates. If you want the wide format, which I believe is most common in financial analysis, you can use function <code>gitrd.convert.to.wide</code>. See an example next:</p>
200 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">df.income.wide &lt;-<span class="st"> </span><span class="kw">gitrd.convert.to.wide</span>(df.income.long)
201 | 
202 | knitr<span class="op">::</span><span class="kw">kable</span>(df.income.wide )</code></pre></div>
203 | <table>
204 | <thead>
205 | <tr class="header">
206 | <th align="left">acc.number</th>
207 | <th align="left">acc.desc</th>
208 | <th align="left">company.name</th>
209 | <th align="right">2004-12-31</th>
210 | <th align="right">2005-12-31</th>
211 | </tr>
212 | </thead>
213 | <tbody>
214 | <tr class="odd">
215 | <td align="left">3.01</td>
216 | <td align="left">Receita Bruta de Vendas e/ou Serviços</td>
217 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
218 | <td align="right">120024727</td>
219 | <td align="right">143665730</td>
220 | </tr>
221 | <tr class="even">
222 | <td align="left">3.02</td>
223 | <td align="left">Deduções da Receita Bruta</td>
224 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
225 | <td align="right">-34450292</td>
226 | <td align="right">-37843204</td>
227 | </tr>
228 | <tr class="odd">
229 | <td align="left">3.03</td>
230 | <td align="left">Receita Líquida de Vendas e/ou Serviços</td>
231 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
232 | <td align="right">85574435</td>
233 | <td align="right">105822526</td>
234 | </tr>
235 | <tr class="even">
236 | <td align="left">3.04</td>
237 | <td align="left">Custo de Bens e/ou Serviços Vendidos</td>
238 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
239 | <td align="right">-48607576</td>
240 | <td align="right">-57512113</td>
241 | </tr>
242 | <tr class="odd">
243 | <td align="left">3.05</td>
244 | <td align="left">Resultado Bruto</td>
245 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
246 | <td align="right">36966859</td>
247 | <td align="right">48310413</td>
248 | </tr>
249 | <tr class="even">
250 | <td align="left">3.06</td>
251 | <td align="left">Despesas/Receitas Operacionais</td>
252 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
253 | <td align="right">-11110540</td>
254 | <td align="right">-14810467</td>
255 | </tr>
256 | <tr class="odd">
257 | <td align="left">3.06.01</td>
258 | <td align="left">Com Vendas</td>
259 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
260 | <td align="right">-2858630</td>
261 | <td align="right">-4195157</td>
262 | </tr>
263 | <tr class="even">
264 | <td align="left">3.06.02</td>
265 | <td align="left">Gerais e Administrativas</td>
266 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
267 | <td align="right">-2599552</td>
268 | <td align="right">-3453753</td>
269 | </tr>
270 | <tr class="odd">
271 | <td align="left">3.06.03</td>
272 | <td align="left">Financeiras</td>
273 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
274 | <td align="right">-1019901</td>
275 | <td align="right">126439</td>
276 | </tr>
277 | <tr class="even">
278 | <td align="left">3.06.04</td>
279 | <td align="left">Outras Receitas Operacionais</td>
280 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
281 | <td align="right">0</td>
282 | <td align="right">0</td>
283 | </tr>
284 | <tr class="odd">
285 | <td align="left">3.06.05</td>
286 | <td align="left">Outras Despesas Operacionais</td>
287 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
288 | <td align="right">-5982336</td>
289 | <td align="right">-9070019</td>
290 | </tr>
291 | <tr class="even">
292 | <td align="left">3.06.06</td>
293 | <td align="left">Resultado da Equivalência Patrimonial</td>
294 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
295 | <td align="right">1349879</td>
296 | <td align="right">1782023</td>
297 | </tr>
298 | <tr class="odd">
299 | <td align="left">3.07</td>
300 | <td align="left">Resultado Operacional</td>
301 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
302 | <td align="right">25856319</td>
303 | <td align="right">33499946</td>
304 | </tr>
305 | <tr class="even">
306 | <td align="left">3.08</td>
307 | <td align="left">Resultado Não Operacional</td>
308 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
309 | <td align="right">-550694</td>
310 | <td align="right">-199982</td>
311 | </tr>
312 | <tr class="odd">
313 | <td align="left">3.08.01</td>
314 | <td align="left">Receitas</td>
315 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
316 | <td align="right">46611</td>
317 | <td align="right">1256194</td>
318 | </tr>
319 | <tr class="even">
320 | <td align="left">3.08.02</td>
321 | <td align="left">Despesas</td>
322 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
323 | <td align="right">-597305</td>
324 | <td align="right">-1456176</td>
325 | </tr>
326 | <tr class="odd">
327 | <td align="left">3.09</td>
328 | <td align="left">Resultado Antes Tributação/Participações</td>
329 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
330 | <td align="right">25305625</td>
331 | <td align="right">33299964</td>
332 | </tr>
333 | <tr class="even">
334 | <td align="left">3.10</td>
335 | <td align="left">Provisão para IR e Contribuição Social</td>
336 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
337 | <td align="right">-5199166</td>
338 | <td align="right">-8581490</td>
339 | </tr>
340 | <tr class="odd">
341 | <td align="left">3.11</td>
342 | <td align="left">IR Diferido</td>
343 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
344 | <td align="right">-1692288</td>
345 | <td align="right">-422392</td>
346 | </tr>
347 | <tr class="even">
348 | <td align="left">3.12</td>
349 | <td align="left">Participações/Contribuições Estatutárias</td>
350 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
351 | <td align="right">-660000</td>
352 | <td align="right">-846000</td>
353 | </tr>
354 | <tr class="odd">
355 | <td align="left">3.12.01</td>
356 | <td align="left">Participações</td>
357 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
358 | <td align="right">-660000</td>
359 | <td align="right">-846000</td>
360 | </tr>
361 | <tr class="even">
362 | <td align="left">3.12.02</td>
363 | <td align="left">Contribuições</td>
364 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
365 | <td align="right">0</td>
366 | <td align="right">0</td>
367 | </tr>
368 | <tr class="odd">
369 | <td align="left">3.13</td>
370 | <td align="left">Reversão dos Juros sobre Capital Próprio</td>
371 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
372 | <td align="right">0</td>
373 | <td align="right">0</td>
374 | </tr>
375 | <tr class="even">
376 | <td align="left">3.15</td>
377 | <td align="left">Lucro/Prejuízo do Exercício</td>
378 | <td align="left">PETRÓLEO BRASILEIRO S.A. - PETROBRAS</td>
379 | <td align="right">17754171</td>
380 | <td align="right">23450082</td>
381 | </tr>
382 | </tbody>
383 | </table>
384 | </div>
385 | <div id="downloading-financial-information-for-several-companies" class="section level2">
386 | <h2>Downloading financial information for SEVERAL companies</h2>
387 | <p>If you are doing serious research, it is likely that you need financial statements for more than one company. Package <code>GetITRData</code> is specially designed for handling large scale download of data. Let’s build a case with 3 randomly selected companies:</p>
388 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">set.seed</span>(<span class="dv">2</span>)
389 | my.companies &lt;-<span class="st"> </span><span class="kw">sample</span>(<span class="kw">unique</span>(df.info<span class="op">$</span>name.company), <span class="dv">5</span>)
390 | 
391 | first.date &lt;-<span class="st"> '2008-01-01'</span>
392 | last.date  &lt;-<span class="st"> '2010-01-01'</span>
393 | type.statements &lt;-<span class="st"> 'individual'</span>
394 | periodicy.fin.report &lt;-<span class="st"> 'annual'</span>
395 | 
396 | df.reports &lt;-<span class="st"> </span><span class="kw">gitrd.GetITRData</span>(<span class="dt">name.companies =</span> my.companies, 
397 |                                <span class="dt">periodicy.fin.report =</span> periodicy.fin.report,
398 |                                <span class="dt">first.date =</span> first.date,
399 |                                <span class="dt">last.date =</span> last.date,
400 |                                <span class="dt">type.info =</span> type.statements)</code></pre></div>
401 | <pre><code>## Found cache file. Loading data..
402 | ## 
403 | ## Downloading data for 5 companies
404 | ## Type of financial reports: individual
405 | ## Periodicy of financial reports: annual (dfp system)
406 | ## First Date: 2008-01-01
407 | ## Laste Date: 2010-01-01
408 | ## Inflation index: dollar
409 | ## 
410 | ## Downloading inflation data
411 | ##  Found cache file. Loading data..    Done
412 | ## 
413 | ## 
414 | ## WARNING: Cant find available dates for BSI CAPITAL SECURITIZADORA S.A.
415 | ## WARNING: For data before 2009, the cash flow statements are not available
416 | ## 
417 | ## Inputs looking good! Starting download of files:
418 | ## 
419 | ## LIGA FUTEBOL SA
420 | ##  Available periods: 
421 | ## ULTRAPAR PARTICIPAÇÕES SA
422 | ##  Available periods: 2009-12-31   2008-12-31
423 | ## BROOKFIELD INCORPORAÇÕES S.A.
424 | ##  Available periods: 2009-12-31   2008-12-31
425 | ## OURINVEST SECURITIZADORA S.A.
426 | ##  Available periods:</code></pre>
427 | <pre><code>## Warning in min.default(structure(numeric(0), class = &quot;Date&quot;), na.rm =
428 | ## FALSE): no non-missing arguments to min; returning Inf</code></pre>
429 | <pre><code>## Warning in max.default(structure(numeric(0), class = &quot;Date&quot;), na.rm =
430 | ## FALSE): no non-missing arguments to max; returning -Inf</code></pre>
431 | <pre><code>## 
432 | ## Processing ULTRAPAR PARTICIPAÇÕES SA, Date = 2009-12-31
433 | ##  Acessing dfp data | downloading file | reading file
434 | ##  Acessing fre data | downloading file | reading file
435 | ## Processing ULTRAPAR PARTICIPAÇÕES SA, Date = 2008-12-31
436 | ##  Acessing dfp data | downloading file | reading file
437 | ##  Acessing fre data
438 | ##      No FRE file available..
439 | ## Processing BROOKFIELD INCORPORAÇÕES S.A., Date = 2009-12-31
440 | ##  Acessing dfp data | downloading file | reading file
441 | ##  Acessing fre data | downloading file | reading file
442 | ## Processing BROOKFIELD INCORPORAÇÕES S.A., Date = 2008-12-31
443 | ##  Acessing dfp data | downloading file | reading file
444 | ##  Acessing fre data
445 | ##      No FRE file available..</code></pre>
446 | <pre><code>## Warning in min.default(structure(numeric(0), class = &quot;Date&quot;), na.rm =
447 | ## FALSE): no non-missing arguments to min; returning Inf
448 | 
449 | ## Warning in min.default(structure(numeric(0), class = &quot;Date&quot;), na.rm =
450 | ## FALSE): no non-missing arguments to max; returning -Inf</code></pre>
451 | <p>And now we can check the resulting <code>tibble</code>:</p>
452 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">glimpse</span>(df.reports)</code></pre></div>
453 | <pre><code>## Observations: 4
454 | ## Variables: 14
455 | ## $ company.name              &lt;chr&gt; &quot;LIGA FUTEBOL SA&quot;, &quot;ULTRAPAR PARTICI...
456 | ## $ company.code              &lt;int&gt; NA, 18465, 20265, NA
457 | ## $ type.info                 &lt;chr&gt; &quot;individual&quot;, &quot;individual&quot;, &quot;individ...
458 | ## $ min.date                  &lt;date&gt; NA, 2008-12-31, 2008-12-31, NA
459 | ## $ max.date                  &lt;date&gt; NA, 2009-12-31, 2009-12-31, NA
460 | ## $ n.periods                 &lt;int&gt; 0, 2, 2, 0
461 | ## $ current.stock.composition &lt;list&gt; [&lt;&gt;, &lt;c(&quot;Ordinárias&quot;, &quot;Preferenciai...
462 | ## $ dividends.history         &lt;list&gt; [&lt;&gt;, &lt;c(&quot;ON&quot;, &quot;ON&quot;, &quot;ON&quot;, &quot;ON&quot;, &quot;ON...
463 | ## $ fr.assets                 &lt;list&gt; [&lt;&gt;, &lt;# A tibble: 18 x 6,    compan...
464 | ## $ fr.liabilities            &lt;list&gt; [&lt;&gt;, &lt;# A tibble: 42 x 6,    compan...
465 | ## $ fr.income                 &lt;list&gt; [&lt;&gt;, &lt;# A tibble: 48 x 6,    compan...
466 | ## $ fr.cashflow               &lt;list&gt; [&lt;&gt;, &lt;character(0), character(0), c...
467 | ## $ history.stock.holders     &lt;list&gt; [&lt;&gt;, &lt;c(&quot;ULTRAPAR PARTICIPAÇÕES SA&quot;...
468 | ## $ history.capital           &lt;list&gt; [&lt;&gt;, &lt;ULTRAPAR PARTICIPAÇÕES SA, 20...</code></pre>
469 | <p>Every row of <code>df.reports</code> will provide information for one company. Metadata about the corresponding dataframes such as min/max dates is available in the first columns. Keeping a tabular structure facilitates the organization and future processing of all financial data. We can use tibble <code>df.reports</code> for creating other dataframes in the long format containing data for all companies. See next, where we create dataframes with the assets and liabilities of all companies:</p>
470 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">df.assets &lt;-<span class="st"> </span><span class="kw">do.call</span>(<span class="dt">what =</span> rbind, <span class="dt">args =</span> df.reports<span class="op">$</span>fr.assets)
471 | df.liabilities &lt;-<span class="st"> </span><span class="kw">do.call</span>(<span class="dt">what =</span> rbind, <span class="dt">args =</span> df.reports<span class="op">$</span>fr.liabilities)
472 | 
473 | df.assets.liabilities &lt;-<span class="st"> </span><span class="kw">rbind</span>(df.assets, df.liabilities)</code></pre></div>
474 | <p>As an example, let’s use the resulting dataframe for calculating and analyzing a simple liquidity index of a company, the total of current (liquid) assets (<em>Ativo circulante</em>) divided by the total of current short term liabilities (<em>Passivo Circulante</em>), over time.</p>
475 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(dplyr)</code></pre></div>
476 | <pre><code>## 
477 | ## Attaching package: 'dplyr'</code></pre>
478 | <pre><code>## The following objects are masked from 'package:stats':
479 | ## 
480 | ##     filter, lag</code></pre>
481 | <pre><code>## The following objects are masked from 'package:base':
482 | ## 
483 | ##     intersect, setdiff, setequal, union</code></pre>
484 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">my.tab &lt;-<span class="st"> </span>df.assets.liabilities <span class="op">%&gt;%</span>
485 | <span class="st">  </span><span class="kw">group_by</span>(company.name, ref.date) <span class="op">%&gt;%</span>
486 | <span class="st">  </span><span class="kw">summarise</span>(<span class="dt">Liq.Index =</span> acc.value[acc.number <span class="op">==</span><span class="st"> '1.01'</span>]<span class="op">/</span><span class="st"> </span>acc.value[acc.number <span class="op">==</span><span class="st"> '2.01'</span>])
487 | 
488 | my.tab</code></pre></div>
489 | <pre><code>## # A tibble: 4 x 3
490 | ## # Groups:   company.name [?]
491 | ##   company.name                  ref.date   Liq.Index
492 | ##   &lt;chr&gt;                         &lt;date&gt;         &lt;dbl&gt;
493 | ## 1 BROOKFIELD INCORPORAÇÕES S.A. 2008-12-31     0.482
494 | ## 2 BROOKFIELD INCORPORAÇÕES S.A. 2009-12-31     4.41 
495 | ## 3 ULTRAPAR PARTICIPAÇÕES SA     2008-12-31     0.684
496 | ## 4 ULTRAPAR PARTICIPAÇÕES SA     2009-12-31     1.24</code></pre>
497 | <p>Now we can visualize the information using <code>ggplot2</code>:</p>
498 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(ggplot2)
499 | 
500 | p &lt;-<span class="st"> </span><span class="kw">ggplot</span>(my.tab, <span class="kw">aes</span>(<span class="dt">x =</span> ref.date, <span class="dt">y =</span> Liq.Index, <span class="dt">fill =</span> company.name)) <span class="op">+</span>
501 | <span class="st">  </span><span class="kw">geom_col</span>(<span class="dt">position =</span> <span class="st">'dodge'</span> )
502 | <span class="kw">print</span>(p)</code></pre></div>
503 | <p><img src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAASAAAAEgCAMAAAAjXV6yAAAC/VBMVEUAAAAAv8QBAQECAgIDAwMFBQUGBgYHBwcICAgJCQkKCgoLCwsMDAwNDQ0ODg4PDw8QEBARERESEhITExMUFBQVFRUWFhYXFxcYGBgZGRkaGhobGxscHBwdHR0eHh4fHx8gICAhISEiIiIjIyMkJCQlJSUmJiYnJycoKCgpKSkqKiorKyssLCwtLS0uLi4vLy8wMDAxMTEyMjIzMzM0NDQ1NTU2NjY3Nzc4ODg5OTk6Ojo7Ozs8PDw9PT0+Pj4/Pz9AQEBBQUFCQkJDQ0NERERFRUVGRkZHR0dISEhJSUlKSkpLS0tMTExNTU1OTk5PT09QUFBRUVFSUlJTU1NUVFRVVVVWVlZXV1dYWFhZWVlaWlpbW1tcXFxdXV1eXl5fX19gYGBhYWFiYmJjY2NkZGRlZWVmZmZnZ2doaGhpaWlqampra2tsbGxtbW1ubm5vb29wcHBxcXFycnJzc3N0dHR1dXV2dnZ3d3d4eHh5eXl6enp7e3t8fHx+fn5/f3+AgICBgYGCgoKEhISFhYWGhoaHh4eIiIiJiYmKioqLi4uMjIyNjY2Ojo6Pj4+QkJCRkZGSkpKTk5OUlJSVlZWWlpaXl5eYmJiZmZmampqbm5ucnJydnZ2enp6fn5+goKChoaGioqKjo6OkpKSlpaWmpqanp6eoqKipqamqqqqrq6usrKytra2urq6vr6+wsLCxsbGysrKzs7O0tLS1tbW2tra3t7e4uLi5ubm6urq7u7u8vLy9vb2+vr6/v7/AwMDBwcHCwsLDw8PExMTFxcXGxsbHx8fIyMjJycnKysrLy8vMzMzNzc3Ozs7Pz8/Q0NDR0dHS0tLT09PU1NTV1dXW1tbX19fY2NjZ2dna2trb29vc3Nzd3d3e3t7f39/g4ODh4eHi4uLj4+Pk5OTl5eXm5ubn5+fo6Ojp6enq6urr6+vs7Ozt7e3u7u7v7+/w8PDx8fHy8vLz8/P09PT19fX29vb39/f4dm34+Pj5+fn6+vr7+/v8/Pz9/f3+/v7///95jgojAAAACXBIWXMAAA7DAAAOwwHHb6hkAAAOd0lEQVR4nO2dfVwUdRrA55SLCnTBlV2tOzPfS80KK8u3zLfUmwXxLDs5TNE8xcwzyYouLQ5TT6/ypSjNtNI0OzNTc1VMBbNTMe3AfIlTNBQuAQM5kGWez837Djq7z8267k7u8/2D/c38fs/8nvl+5mV3HlgYILzCBDsBs0OCEEgQAglCIEEIJAiBBCGQIAQShECCEEgQAglCIEEIJAiBBCGQIAQShECCEEgQAglCIEEIJAiBBCGQIAQShECCEEgQAglCIEEIJAiBBCGQIAQShKAvaN/YAKdhXnQFnRudHOg8TIueoEupThKkoCdo0fpCUVDp3r17SwKcj+nQEZQ1i5ME7Y6Njf0i0AmZDR1B80Ylj4pLPg/wc15eXlngUzIX+nexQroGKZAgBHqjiECCEEgQAglCIEEIJAiBBCGQIAQShECCEEgQAglCIEEIJAiBBCGQIAQShECCEEgQAglCIEEIJAiBBCGQIAQShECCEEgQAglCIEEIJAiBBCGQIAQShECCEEgQAglCIEEIJAiBBCGQIAQShECCEHQEccuTkl4+H/hUzImOoNwx5bV/eyvwqZgTHUGn86Hq3RWBT8Wc6F6DdiaMq+BfjkyfPn1/gPMxHfoX6aq30/mfBxMTE3cHNh3zoSNoy3aAE6MDn4o50RG0c2IV9/5rgU/FnOjd5peOTs4I+b9VVaA3iggkCIEEIZAgBBKEQIIQSBCCKOgn4++bi4Zb7SlVcG6EzT7iLEDExocb9z71jK3p65Br3dWtce9DAPn9G0f2PAAQnjM0qtUaSO3Jh71yJydEy6vUIZpwqJjUInJQvn/38ipg/gncx7b7jIa5Og7IXh7zQl3XB7J2dLunDiLu/Cqr+Y0v5Sc3LM+94faVO38XVQr39nJu7dGVt3HXqsOPh1/8pkERcB0yxHB5lTpEEw7xD23Libeb5nkUE/kc2+gNl9Gw9Tf9BJA5akfYSYCTDbMgIhNgIn90/MDk5zIfAFy0vV436yjACitvYybAEeYYd9tiONDglBgur1KHaMLzw4oBamLW+31PfYR5kWE+Mh6WESu+vNVa+Hn7YojYApA2GKBUEPQjvy5+PNRmLxxtEwRtBChhjsG0vjCtjxSurFKGaMLXMBaehvP8tYNXC9PsHwsix5caDXvlfvFFEtT6TYhw8ns4RBbEX5MgIbmy5x1pXywRBGVJNvaGFd+6VAqXV6lDNOErowoEzHOK8ZmceLi50bA1kfyn2RV9ssIKAU6FbasvaC1AVbO5G26qBlioEcTdlnRjuRQur1KHaMIPMnn8a1Kev3fUV8S7WN0Co2E1bQbnrGnxdN293XZIF2mtoBaf7HJY/pPFzM1Oj7lhnyoIpjEj+NDM+aogdYgmnOvXeUvW4NbVft9TH2HWzzufX2M87qQjqtmkSjj7uM0m3ua1gjZ0ieyZC1xa0ybDjnfv4xaUzfCXHhgYqwpSh2jCoXRMc4vjhN931FcYW8OCpDY/+HGLuUyVh56l9lo/zhMgmBpLwflB8X7coidBpZvb/sWP0wQKBiwFsD3Kj1v0JOhwVLynQ8vMiII23xrsNMwLU2wpyOtE323rESaMsTD96RG9R5i81SsPcobDuEt61F2DBIMN4xQxGub6rx6XrkWGQYZhmAZMREejYSEkaFNVtbPNOqNhISRI+LGus9GwEBO0w2I07BcqqHjEwMfOGAthygHK+w0wOtMvVJAPMNEDBkTbvzca5llQCWOzN+qyHcDexG5r+kQF1M6523bHcxWgNkoY/jPrxltywGK12+0DoKNTat3Cf5iXPoxEWO3W7tJvJnV0FjLCw5g5KeDK6GJtt8i9HXUGPjqq3ac6HVDTtL3wFqZu9t1N2i3WDJDng5yezexDC8SJNE0lTBK0ePLUdyoMe/UmqBZqZrfls8zlj80OGfDEo8e5s0/e71Ibwog1v/2WT/OY7EBpqYKOgevzqCOyoPCYo6KgsUOKave0WuPeoDKDJR9qV99cemUHfHJn1B5+IxMH/8hHfuIeIM9XHfXppbKM7pc11TBJ0DER/wr6+ZV4SRA8k7L71p/517quy9QGP2JZK36fvQoCSP2TLMi66EEXL+j7aOHBsHOJuh1lBkEQv7EDV3bAkNmjxwMcjxYe4H6Z6R4gz1ccXgRQ9d5lTSVMFiThT0E2e8TNh0RB3JH7Ns8dJQbMHK82Spi/h4nP5MUD/Wv1FHukvqDVvRRBXP9ZvKAP+0pTq9tRZhAEVS1pVH5lx5lfF26yXIRVfS6PlOeDmbaRbx+XOjVNOUwWdKREwJ+CaoHbGXFBuBDEMCPrMqRvf89IUhslTP+vrf8C7AhyC4JTtkNzUt6T7yTqdpQZhN2N7rRRp2PWI3ApeiW8++jlkRbllCnbOL3FhMubcpgsyKia/0cQQMvt4nF+pu2Gra2EneZ6L1AbJUwlvN2pEj3FJqiCYPnd6SkHrBcEbfep21FmkE4xgCs6uPb29u0tAyHHxk8G63q4B8jzbRXKmJXW7+o3lbBrKWh/+GnpQpD8V46NK+DOTehUrTbEY2z4OOQivUG9SPOCuPjmKZDAFsLRDgvU7SgzKIKu6MiO4K9a+xqe5nqNvABn75ntHiDP9711c13l5qgibTO/Ugnjm4KgmRuzePwpyGaPabVKvkhn9IHqVztZ200pA7UhKixt+ZFWkHBNsB8u5WPt9mzxNv/gLnALgnMxKVD9Ygdrm7mce4PKDLKgKzqSU4S1D2ZAWXLL21qludwD5PlgU7cmTXpsFYOVZtguNSxMSIEJF/GfIHNyrvfHPkb6/RQzJZXzpxgu/cmEhqCrgOkoYjQshARNFzEaFkKCfAsLbUFbxo96CXlqEmKC5sdqV51OOFu7fBoSx+lxzbIMIqKgzIHaVQffAjg1km/UlpeXezgouFo9rsuyj+7amteEv1n18i/8XL/S47o8xZwKmpU5494Rfq3z7Nq1a0/qh4WQIKEu1iCCR13FLUo9hYWFkKBNF6u3ttmgXbX/afy3gkNIkPCjfl3sfUdCQkKi9zDzC/KhwqOLVBdrZDTM/IL8hVQXe9homBdBjPD8Ke0Z+eHXULudsdpvv6ISo5RqlGW132OXsqWOTrUAJD1JqjfYUIVHW97xKEioi0UfumaChB0WnmddXolRSjXKstrvrUvcEm9FKQAJgi4bbKjCoy3veBS09Nmn3zT8i/a+CdJUYpRSjbqs9nvrkgUpBSBZkHawsQqPtrzjUVBWlk+PXH0QpK3EKKUadVnpB29dsiClACQL0g42VOGpV97xKMjHR66GBdWvxCilGnVZ6QdvXbIgpQAkX4O0gw1VeOqVdzwKMqoGFRQu/Jb8lOd1TjFNJUatRCjLSgkHvHXJgpQCkHqKuQcbqfDUL+94FfRtFz8KGpJcwB24ZZveNchdiVF2TF1WSjhuQTpdykVaLgBpBPlS4VEnwAXtMV569iyo+MlmTe76AJQSjlaQphIj75i6rJRw3IJ0uhRBSgHILciXCo8aE2BBwcb3Co8u152gq6jw6HLdCfI3YlUjkQR5xNe6WOgI8i2MBCGQIATOpcf1WPehLzdBIEEIJAiBBCGQIAQShECCEEgQAglCIEEIJAiBBCGQIAQShECCEEgQAglCIEEIJAiBBCGQIAQShECCEEgQgq4g17jyQOdhWvQEbZzGkiAFPUHffZNAghT0r0HDRUGnly1bZp4vdg4S3gR5+YvD0MGbIAJIEAoJQqA3iggkCIEEIZAgBBKEQIIQSBACCUIgQQgkCIEEIZAgBBKEQIIQSBACCUIgQQgkCIEEIZAgBBKEQIIQSBACCUIgQQgkCIEEIZAgBBKEQIIQSBACCUIgQQgkCIEEIZAgBBKEQIIQSBACCUIgQQgkCEFP0P6JY16vCXgmJkVHUOUfClzpawOfijnREbTzZYDcqQHPxKToCPp0AcDpJL5xMDExcXegEzIbngT9kW8cT09PN/xvka439E6xGfzB8+fAp2JOdARVjCzi5tBFWkbvNr9vcsobdJuXoTeKCCQIgQQhkCAEEoRAghAQQbtSywKTh2lBBA2J3R6YPEwLCUIgQQiIoFk9fghMHqaF7mIIJAjBLSiRZdmlAFNY1lEIMJ9lh/8EkMQv5Un9UsckVuBAkLINAqqgTHZlyVAWDjvmlzmGQyGbwCZMgDfZ94ocCWK/3AFzhrMzHgtauoFHFTQtHmARC2MdAFNZmBH3ITsrDkY5ztSMY8V+uQM+/5CNPxu0dAOP9hpU6IiHxxzCwQQTR4BjFQtPsezv4yRBcgcPmxaMRIOFRtBTbFyZ6OFdSdDHLKQ64hMSWf5AcjwrdwB8yZ4MWrZBQBV0KZ5dzL+M5R08y59iQ8HxmgOmDAN4nhX/t6PcITSQ/699faEKmsKKH0tz2Q8u8dfif7M7HMMmwEJ2dbFjGGg6wOVwhKagoeL9G2CydJuf5xBu8xx/74+Tv61U7ljJhqggQh8ShECCEEgQAglCIEEIJAiBBCHoC/rqN73cCxHOwKRiTjx84faQIveCRlDvjGudj+nQFzRE+0SDBImE7+kXBxWTWkQOyoe+DPOAuDK/X+POn/GC8vs3jux5AGIZpq8yJlRwC+qcugPiH9qWE28/7xr0Qq2w7oKt3461LRs44d5ezq09ukJtr1ddypjgpRxY3IL4syo/rBigJma9cootji4H2MQ462YdBVhhFU8xdUyI4Bb0OcAaxsLTcJ4iaPIA/sdFxgm12QtH2yRB6pgQwS0oC2BlVIHAeUXQVEFQdQNnZc870r5YIglSx4QI9QQdZPIASpPyFEGZ0RcAtjLODTdVAyyUBKljQoR6grh+nbdkDW5dLQrKnA8V9v5ffdY2wpnFzM1Oj7lhHzwypkgdEyLUEwSlY5pbHCek90EDYwGO9GvcaV1HJ5fWtMmw4937wPKmceqYEIE+iyGQIAQShECCEEgQAglCIEEI/wOAjN2KzKNhXgAAAABJRU5ErkJggg==" /><!-- --></p>
504 | <p>As we can see, we could not find available data for all companies. Nonetheless, JPSP is the company with highest liquidity, being able to pay its short term debt with the current assets in all years. We can certainly do a lot more interesting studies based on this data set.</p>
505 | </div>
506 | <div id="exporting-financial-data" class="section level2">
507 | <h2>Exporting financial data</h2>
508 | <p>The package includes function <code>gitrd.export.ITR.data</code> for exporting the financial data to an Excel file. Users can choose between the long and wide format. See next:</p>
509 | <div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">my.basename &lt;-<span class="st"> 'MyExcelData'</span>
510 | my.format &lt;-<span class="st"> 'xlsx'</span> <span class="co"># only supported so far</span>
511 | <span class="kw">gitrd.export.ITR.data</span>(<span class="dt">data.in =</span> df.reports, 
512 |                       <span class="dt">base.file.name =</span> my.basename,
513 |                       <span class="dt">type.export =</span> my.format,
514 |                       <span class="dt">format.data =</span> <span class="st">'long'</span>)</code></pre></div>
515 | <p>The resulting Excel file contains all data available in <code>df.reports</code>.</p>
516 | </div>
517 | </div>
518 | 
519 | 
520 | 
521 | <!-- dynamically load mathjax for compatibility with self-contained -->
522 | <script>
523 |   (function () {
524 |     var script = document.createElement("script");
525 |     script.type = "text/javascript";
526 |     script.src  = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
527 |     document.getElementsByTagName("head")[0].appendChild(script);
528 |   })();
529 | </script>
530 | 
531 | </body>
532 | </html>
533 | 


--------------------------------------------------------------------------------